
	.importzp vt0, vt1
	.importzp mt0, mt1, mt2, mt3, mt4, mt5, mt6, mt7
	.importzp mb0, mb1, mb2, mb3, mb4, mb5, mb6, mb7
	.importzp mx0, mx1, mx2, mx3
	.importzp _v0, _v1, _a0, _a1
	.importzp dx0

	; table addresses
	sh_table = $b000
	r0_table = $b900
	t0_table = $ba00

	.segment "CODE"

r0_template:
	.byte $00,$00,$00,$56,$00,$9a,$56,$25,$00,$c8,$9a,$75,$56,$3c,$25,$12
	.byte $00,$e2,$c8,$b0,$9a,$87,$75,$65,$56,$48,$3c,$30,$25,$1b,$12,$09
	.byte $00,$f1,$e2,$d5,$c8,$bb,$b0,$a5,$9a,$90,$87,$7e,$75,$6d,$65,$5d
	.byte $56,$4f,$48,$42,$3c,$36,$30,$2a,$25,$20,$1b,$16,$12,$0d,$09,$05
	.byte $00,$f9,$f1,$ea,$e2,$db,$d5,$ce,$c8,$c1,$bb,$b5,$b0,$aa,$a5,$9f
	.byte $9a,$95,$90,$8b,$87,$82,$7e,$79,$75,$71,$6d,$69,$65,$61,$5d,$59
	.byte $56,$52,$4f,$4b,$48,$45,$42,$3f,$3c,$39,$36,$33,$30,$2d,$2a,$28
	.byte $25,$22,$20,$1d,$1b,$19,$16,$14,$12,$0f,$0d,$0b,$09,$07,$05,$03
	.byte $00,$fd,$f9,$f5,$f1,$ed,$ea,$e6,$e2,$df,$db,$d8,$d5,$d1,$ce,$cb
	.byte $c8,$c4,$c1,$be,$bb,$b8,$b5,$b3,$b0,$ad,$aa,$a7,$a5,$a2,$9f,$9d
	.byte $9a,$98,$95,$93,$90,$8e,$8b,$89,$87,$84,$82,$80,$7e,$7b,$79,$77
	.byte $75,$73,$71,$6f,$6d,$6b,$69,$67,$65,$63,$61,$5f,$5d,$5b,$59,$58
	.byte $56,$54,$52,$51,$4f,$4d,$4b,$4a,$48,$47,$45,$43,$42,$40,$3f,$3d
	.byte $3c,$3a,$39,$37,$36,$34,$33,$31,$30,$2f,$2d,$2c,$2a,$29,$28,$26
	.byte $25,$24,$22,$21,$20,$1f,$1d,$1c,$1b,$1a,$19,$17,$16,$15,$14,$13
	.byte $12,$10,$0f,$0e,$0d,$0c,$0b,$0a,$09,$08,$07,$06,$05,$04,$03,$02

	.P02
	.export _divide_6502_setup
_divide_6502_setup:
	; align reciprocal table
	ldy #0
align:
	lda r0_template,y
	sta r0_table,y
	iny
	bne align

	; division by zero
	lda #8
	ora #$b0
	sta t0_table

	; compute shift counts
	ldy #1
next:
	tya
	ldx #$ff
loop:
	inx
	asl
	bcc loop
	txa
	eor #$07
	ora #$b0
	sta t0_table,y
	iny
	bne next

	; compute shift tables
	ldy #0
sh_next:
	tya
	sta sh_table+(0*256),y
	lsr
	sta sh_table+(1*256),y
	lsr
	sta sh_table+(2*256),y
	lsr
	sta sh_table+(3*256),y
	lsr
	sta sh_table+(4*256),y
	lsr
	sta sh_table+(5*256),y
	lsr
	sta sh_table+(6*256),y
	lsr
	sta sh_table+(7*256),y
	lsr
	sta sh_table+(8*256),y
	iny
	bne sh_next

	; set pointer
	lda #0
	sta dx0 + 0
	sta dx0 + 1

	; return
	rts

	.P02
	.export _divide_6502_u32
_divide_6502_u32:
	; v0 = a0 / a1, v1 = a0 % a1

	; copy dividend
	lda _a0 + 0
	sta _v0 + 0
	lda _a0 + 1
	sta _v0 + 1
	lda _a0 + 2
	sta _v0 + 2
	lda _a0 + 3
	sta _v0 + 3

	; clear remainder
	lda #0
	sta _v1 + 0
	sta _v1 + 1
	sta _v1 + 2
	sta _v1 + 3

	; set binary count to 32
	ldx #32

	; shift into partial dividend
loop32:
	asl _v0 + 0
	rol _v0 + 1
	rol _v0 + 2
	rol _v0 + 3
	rol _v1 + 0
	rol _v1 + 1
	rol _v1 + 2
	rol _v1 + 3

	; subtract divisor
	sec
	lda _v1 + 0
	sbc _a1 + 0
	sta vt0 + 0
	lda _v1 + 1
	sbc _a1 + 1
	sta vt0 + 1
	lda _v1 + 2
	sbc _a1 + 2
	sta vt0 + 2
	lda _v1 + 3
	sbc _a1 + 3

	; jump if divisor didn't fit
	bcc no_fit32

	; store new remainder
	sta _v1 + 3
	lda vt0 + 0
	sta _v1 + 0
	lda vt0 + 1
	sta _v1 + 1
	lda vt0 + 2
	sta _v1 + 2

	; increase quotient
	inc _v0 + 0

	; loop
no_fit32:
	dex
	bne loop32

	; return
	rts

	.P02
	.export _divide_6502_u16
_divide_6502_u16:
	; v0 = a0 / a1, v1 = a0 % a1

	; copy dividend
	lda _a0 + 0
	sta _v0 + 0
	lda _a0 + 1
	sta _v0 + 1

	; clear remainder
	lda #0
	sta _v1 + 0
	sta _v1 + 1

	; set binary count to 16
	ldx #16

	; shift into partial dividend
loop16:
	asl _v0 + 0
	rol _v0 + 1
	rol _v1 + 0
	rol _v1 + 1

	; subtract divisor
	sec
	lda _v1 + 0
	sbc _a1 + 0
	sta vt0 + 0
	lda _v1 + 1
	sbc _a1 + 1

	; jump if divisor didn't fit
	bcc no_fit16

	; store new remainder
	sta _v1 + 1
	lda vt0 + 0
	sta _v1 + 0

	; increase quotient
	inc _v0 + 0

	; loop
no_fit16:
	dex
	bne loop16

	; return
	rts

	.P02
	.export _divide_6502_u8_slow
_divide_6502_u8_slow:
	; v0 = a0 / a1, v1 = a0 % a1

	; copy dividend
	lda _a0 + 0
	sta _v0 + 0

	; clear remainder
	lda #0
	sta _v1 + 0

	; set binary count to 8
	ldx #8

	; shift into partial dividend
loop8:
	asl _v0 + 0
	rol _v1 + 0

	; subtract divisor
	sec
	lda _v1 + 0
	sbc _a1 + 0

	; jump if divisor didn't fit
	bcc no_fit8

	; store new remainder
	sta _v1 + 0

	; increase quotient
	inc _v0 + 0

	; loop
no_fit8:
	dex
	bne loop8

	; return
	rts

	.P02
	.export _divide_6502_u8
_divide_6502_u8:
	; v0 = a0 / a1, v1 = a0 % a1

	; compute quotient
	lda _a0 + 0
	ldx _a1 + 0

	; grab shift count
	ldy t0_table,x
	sty dx0 + 1

	; grab reciprocal
	ldy r0_table,x
	beq b ; power of 2

	sta mx0
	sta mx1
	eor #$ff
	sta mx2
	sta mx3

	sec
	lda (mx0),y
	sbc (mx2),y
	lda (mx1),y
	sbc (mx3),y

	clc
	adc _a0 + 0
	ror

	; shift using table lookup
b:
	tay
	lda (dx0),y

	sta _v0 + 0

	; compute remainder
	ldy _a1 + 0

	sta mx0
	eor #$ff
	sta mx2

	sec
	lda (mx0),y
	sbc (mx2),y

	sec
	eor #$ff
	adc _a0 + 0

	sta _v1 + 0

	rts
