kod memcpy CC65 jest mocno optymalizowany, korzystam z niego w MP (MadPascal)
; Ullrich von Bassewitz, 2003-08-20
; Performance increase (about 20%) by
; Christian Krueger, 2009-09-13
.proc @moveu ; assert Y = 0
ptr1 = edx
ptr2 = ecx
ptr3 = eax
stx @sp
ldy #0
ldx ptr3+1 ; Get high byte of n
beq L2 ; Jump if zero
L1: .rept 2 ; Unroll this a bit to make it faster...
lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
.endr
bne L1
inc ptr1+1
inc ptr2+1
dex ; Next 256 byte block
bne L1 ; Repeat if any
; the following section could be 10% faster if we were able to copy
; back to front - unfortunately we are forced to copy strict from
; low to high since this function is also used for
; memmove and blocks could be overlapping!
; {
L2: ; assert Y = 0
ldx ptr3 ; Get the low byte of n
beq done ; something to copy
L3: lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
dex
bne L3
; }
done ldx #0
@sp equ *-1
rts
.endp
@move .proc (.word ptr1, ptr2, ptr3) .var
ptr1 = edx
ptr2 = ecx
ptr3 = eax
src = ptr1
dst = ptr2
cnt = ptr3
cpw ptr2 ptr1
scs
jmp @moveu
stx @sp
; Copy downwards. Adjust the pointers to the end of the memory regions.
lda ptr1+1
add ptr3+1
sta ptr1+1
lda ptr2+1
add ptr3+1
sta ptr2+1
; handle fractions of a page size first
ldy ptr3 ; count, low byte
bne @entry ; something to copy?
beq PageSizeCopy ; here like bra...
@copyByte:
lda (ptr1),y
sta (ptr2),y
@entry:
dey
bne @copyByte
lda (ptr1),y ; copy remaining byte
sta (ptr2),y
PageSizeCopy: ; assert Y = 0
ldx ptr3+1 ; number of pages
beq done ; none? -> done
@initBase:
dec ptr1+1 ; adjust base...
dec ptr2+1
dey ; in entry case: 0 -> FF
lda (ptr1),y ; need to copy this 'intro byte'
sta (ptr2),y ; to 'land' later on Y=0! (as a result of the '.repeat'-block!)
dey ; FF ->FE
@copyBytes:
.rept 2 ; Unroll this a bit to make it faster...
lda (ptr1),y
sta (ptr2),y
dey
.endr
@copyEntry: ; in entry case: 0 -> FF
bne @copyBytes
lda (ptr1),y ; Y = 0, copy last byte
sta (ptr2),y
dex ; one page to copy less
bne @initBase ; still a page to copy?
done ldx #0
@sp equ *-1
rts
.endp
*- TeBe/Madteam
3x Atari 130XE, SDX, CPU 65816, 2x VBXE, 2x IDE Plus rev. C