Są tam same LDA, JSR, LDY, LDA (xx),Y, JSR... Ale spox, wieczorem wkleje fmult - tak jako ciekawostkę...
------
Poniżej kod fdiv (dzielenie zmiennoprzecinkowe). Jest to na razie rekordzista w zwalnianiu - szczytowa prędkość to 46.7 ops/s Od razu mówię, że nie uważam to za coś finalnego...
Zaktualizowałem też wyniki we wcześniejszej tabeli.
Co do CC65 to nie jest tak źle jak myślałem - po zgrubnej analizie kodu asm zauważyłem, że kompilator ten robi parę optymalizacji - np. skraca warunki, jeśli po sprawdzeniu części wiadomo, że cały warunek już będzie fałszywy, to dalej nie liczy. Niestety jak widać od cholery tu JSRów, więc to takie średnio miłe.
; ---------------------------------------------------------------
; void __near__ fdiv (struct __float*, struct __float*, struct __float*)
; ---------------------------------------------------------------
.segment "CODE"
.proc _fdiv: near
.segment "CODE"
ldy #$03
jsr ldaxysp
ldy #$03
jsr ldaxidx
sta _dw_z1
stx _dw_z1+1
ldy #$03
jsr ldaxysp
ldy #$05
jsr ldaxidx
sta _dw_z1+2
stx _dw_z1+2+1
jsr ldax0sp
ldy #$03
jsr ldaxidx
sta _dw_bm
stx _dw_bm+1
jsr ldax0sp
ldy #$05
jsr ldaxidx
sta _dw_bm+2
stx _dw_bm+2+1
ldy #$03
jsr ldaxysp
ldy #$00
sta ptr1
stx ptr1+1
lda (ptr1),y
jsr pusha0
ldy #$03
jsr ldaxysp
ldy #$00
sta ptr1
stx ptr1+1
ldx #$00
lda (ptr1),y
ldy #$80
jsr decaxy
jsr tossubax
sta _r0exp
lda _r0exp
jsr pusha0
ldy #$05
jsr ldaxysp
ldy #$00
sta ptr1
stx ptr1+1
ldx #$00
lda (ptr1),y
jsr tosicmp
bcc L0533
beq L0533
ldy #$07
jsr pushwysp
lda #$FD
jsr pusha
jsr _fsetspec
jmp incsp6
L0533: lda _dw_bm
ora _dw_bm+1
jne L0539
lda _dw_bm+2
ora _dw_bm+2+1
jne L0539
jsr ldax0sp
ldy #$00
sta ptr1
stx ptr1+1
lda (ptr1),y
bne L053F
ldy #$03
jsr ldaxysp
ldy #$03
jsr ldaxidx
cpx #$00
bne L0541
cmp #$00
bne L0541
ldy #$03
jsr ldaxysp
ldy #$05
jsr ldaxidx
cpx #$00
bne L0541
cmp #$00
bne L0541
ldy #$03
jsr ldaxysp
ldy #$00
sta ptr1
stx ptr1+1
lda (ptr1),y
bne L0541
ldy #$07
jsr pushwysp
lda #$FE
jsr pusha
jsr _fsetspec
jmp incsp6
L0541: ldy #$07
jsr pushwysp
lda #$FF
jsr pusha
jsr _fsetspec
ldy #$07
jsr pushwysp
ldy #$05
jsr ldaxysp
ldy #$01
jsr ldaidx
jsr staspidx
jmp incsp6
L053F: jsr pushw0sp
lda #$FF
jsr pusha
jsr _fisspec
tax
beq L0551
ldy #$05
jsr pushwysp
lda #$FF
jsr pusha
jsr _fisspec
tax
beq L0551
ldy #$07
jsr pushwysp
lda #$FE
jsr pusha
jsr _fsetspec
jmp incsp6
L0551: ldy #$07
jsr pushwysp
ldy #$05
jsr pushwysp
jsr _fcopy
jmp incsp6
L0539: ldy #$03
jsr ldaxysp
ldy #$00
sta ptr1
stx ptr1+1
lda (ptr1),y
beq L0562
lda _dw_z1
ora _dw_z1+1
bne L0561
lda _dw_z1+2
ora _dw_z1+2+1
bne L0561
L0562: ldy #$07
jsr pushwysp
ldy #$09
jsr pushwysp
ldx #$00
txa
ldy #$04
jsr staxspidx
ldy #$02
jsr staxspidx
ldy #$05
jsr ldaxysp
sta sreg
stx sreg+1
lda #$00
tay
sta (sreg),y
ldy #$05
jsr ldaxysp
sta sreg
stx sreg+1
lda #$01
tay
jmp L0A13
L0561: tya
sta _dw_z+2
sta _dw_z+2+1
sta _dw_z
sta _dw_z+1
lda #$20
L0A21: sta _loop_cnt
lda _loop_cnt
jeq L0577
lda _loop_cnt
cmp #$11
bcs L0A26
lda _dw_z+2+1
and #$80
beq L057E
lda _dw_z
ldx _dw_z+1
jsr shlax1
ora #$01
jmp L0A1D
L057E: lda _dw_z
ldx _dw_z+1
jsr shlax1
L0A1D: sta _dw_z
stx _dw_z+1
L0A26: lda _dw_z+2
ldx _dw_z+2+1
jsr shlax1
sta _dw_z+2
stx _dw_z+2+1
lda _dw_z1
ldx _dw_z1+1
jsr pushax
lda _dw_bm
ldx _dw_bm+1
jsr tosicmp
bcs L058C
ldx #$FF
jmp L05A5
L058C: lda _dw_z1
ldx _dw_z1+1
jsr pushax
lda _dw_bm
ldx _dw_bm+1
jsr tosicmp
bcc L0592
bne L059E
L0592: lda _dw_z1+2
ldx _dw_z1+2+1
jsr pushax
lda _dw_bm+2
ldx _dw_bm+2+1
jsr tosicmp
bcs L0598
ldx #$FF
jmp L05A5
L0598: lda _dw_z1+2
ldx _dw_z1+2+1
jsr pushax
lda _dw_bm+2
ldx _dw_bm+2+1
jsr tosicmp
L059E: ldx #$00
L05A5: txa
bpl L0588
lda _dw_z+2
ldx _dw_z+2+1
jmp L0A1E
L0588: lda _dw_z1+2
ldx _dw_z1+2+1
jsr pushax
lda _dw_bm+2
ldx _dw_bm+2+1
jsr tosicmp
bcs L05A9
ldx #$FF
txa
jsr pushaFF
lda _dw_bm+2
ldx _dw_bm+2+1
jsr tossubax
clc
adc _dw_z1+2
pha
txa
adc _dw_z1+2+1
tax
pla
jsr incax1
sta _dw_z1+2
stx _dw_z1+2+1
lda _dw_z1
ldx _dw_z1+1
jsr pushax
lda _dw_bm
ldx _dw_bm+1
jsr tossubax
jsr decax1
jmp L0A1F
L05A9: lda _dw_z1+2
ldx _dw_z1+2+1
jsr pushax
lda _dw_bm+2
ldx _dw_bm+2+1
jsr tossubax
sta _dw_z1+2
stx _dw_z1+2+1
lda _dw_z1
ldx _dw_z1+1
jsr pushax
lda _dw_bm
ldx _dw_bm+1
jsr tossubax
L0A1F: sta _dw_z1
stx _dw_z1+1
lda _dw_z+2
ldx _dw_z+2+1
ora #$01
L0A1E: sta _dw_z+2
stx _dw_z+2+1
lda _loop_cnt
cmp #$11
bcs L05B6
lda _dw_bm+2
ldx _dw_bm+2+1
jsr shrax1
sta _dw_bm+2
stx _dw_bm+2+1
jmp L05BA
L05B6: lda _dw_bm
and #$01
beq L05BB
lda _dw_bm+2
ldx _dw_bm+2+1
jsr shrax1
pha
txa
ora #$80
tax
pla
jmp L0A20
L05BB: lda _dw_bm+2
ldx _dw_bm+2+1
jsr shrax1
L0A20: sta _dw_bm+2
stx _dw_bm+2+1
lda _dw_bm
ldx _dw_bm+1
jsr shrax1
sta _dw_bm
stx _dw_bm+1
L05BA: lda _loop_cnt
sec
sbc #$01
jmp L0A21
L0577: lda _dw_z
ora _dw_z+1
bne L05CB
lda _dw_z+2
ora _dw_z+2+1
beq L05CC
L05CB: lda _dw_z+1
and #$80
tax
lda #$00
jsr bnegax
beq L05CC
lda _r0exp
beq L05CC
lda _dw_z+2+1
and #$80
beq L05D2
lda _dw_z
ldx _dw_z+1
jsr shlax1
ora #$01
jmp L0A22
L05D2: lda _dw_z
ldx _dw_z+1
jsr shlax1
L0A22: sta _dw_z
stx _dw_z+1
lda _dw_z+2
ldx _dw_z+2+1
jsr shlax1
sta _dw_z+2
stx _dw_z+2+1
lda _r0exp
sec
sbc #$01
sta _r0exp
jmp L05CB
L05CC: ldy #$05
jsr ldaxysp
sta ptr1
stx ptr1+1
lda _dw_z
ldx _dw_z+1
ldy #$02
sta (ptr1),y
iny
txa
sta (ptr1),y
ldy #$05
jsr ldaxysp
sta ptr1
stx ptr1+1
lda _dw_z+2
ldx _dw_z+2+1
ldy #$04
sta (ptr1),y
iny
txa
sta (ptr1),y
ldy #$03
jsr ldaxysp
ldy #$01
jsr ldaidx
jsr pushax
ldy #$03
jsr ldaxysp
ldy #$01
jsr ldaidx
jsr tosicmp
beq L05E1
ldy #$05
jsr ldaxysp
sta sreg
stx sreg+1
lda #$00
jmp L0A23
L05E1: ldy #$05
jsr ldaxysp
sta sreg
stx sreg+1
lda #$01
L0A23: ldy #$01
sta (sreg),y
lda _dw_z
ora _dw_z+1
bne L05E8
lda _dw_z+2
ora _dw_z+2+1
beq L0A1B
L05E8: lda _r0exp
L0A1B: sta _zexp
ldy #$05
jsr ldaxysp
sta sreg
stx sreg+1
lda _zexp
ldy #$00
L0A13: sta (sreg),y
jmp incsp6
.endproc
I'm not so bad, once you get to know me.