inc hl
ld d,(hl)
1 = d + 8 + 8 + 4
- 2 = d + 8 + 8 + 8
\ No newline at end of file
+ 2 = d + 8 + 8 + 8
+
+Structure member get:
+ Normally fetch pair
+ Then add pair and constant with result in hl
+
+ ld l,c ; 4
+ ld h,b ; 4
+ inc hl .. ; 6 = 8 + 6n
+or
+ ld l,c ; 4
+ ld h,b ; 4
+ ld a,#0x06 ; 7
+ add a,c ; 4
+ ld l,a ; 4
+ ld a,#0x00 ; 7
+ adc a,b ; 4
+ ld h,a ; 4 = 38
+alt: (only when result=hl and left, rigth = pair, const)
+ ld hl,#const ; 10
+ add hl,pair ; 11 = 21
+
+So (1) is best for n <= 2, (2) is just bad, (3) is good n > 2
+
+How about:
+ pair = pair + constant:
+1:
+ ld a,#0x08 ; 7
+ add a,c ; 4
+ ld c,a ; 4
+ ld a,#0x00 ; 7
+ adc a,b ; 4
+ ld b,a ; 4 = 30
+2:
+ ld hl,#const ; 10
+ add hl,pair ; 11
+ ld c,l ; 4
+ ld b,h ; 4 = 29
+One cycle. If I cache HL later it will throw away the advantage. Choose 1.
+
+PlusIncr on pairs:
+1:
+ inc pair ; 6 = 6n
+2:
+ ld a,#0x04 ; 7
+ add a,c ; 4
+ ld c,a ; 4
+ ld a,#0x00 ; 7
+ adc a,b ; 4
+ ld b,a ; 4 = 30
+So n <= 5 (1) is better.
+
+Frame pointer:
+It's nice to use HL as the temp register, but what if I used it as the
+frame pointer instead of ix?
+
+Instead of:
+ ld e,5(ix) ; 19
+ ld d,6(ix) ; 19 = 38
+
+ ld hl,#5 ; 10
+ add hl,sp ; 11
+ ld e,(hl) ; 7
+ inc hl ; 6
+ ld d,(hl) ; 7 = 41
+
+Things get better when you access the same set over, as you get rid
+of the setup. But they get worse when both ops are on the stack/in
+direct space. Easiest this way for now. iy may benifit...
+
+cmpGt:
+ ld l,#0x80 ; 7
+ ld a,-1(ix) ; 19
+ xor a,#0x80 ; 7
+ ld h,a ; 4
+ ld a,#0x02 ; 7
+ sub a,-2(ix) ; 19
+ ld a,l ; 4
+ sbc a,h ; 4 = 71
+
+vs
+ ld hl,0x8002 ; 10
+ ld a,-2(ix) ; 19
+ xor 0x80 ; 7
+ ld d,a ; 4
+ ld e,-1(ix) ; 19
+ sbc hl,de ; 15 = 74
+
+Why is there the whole xor thing going on?
+
+cmpGt using sub:
+ left right l-r c expect
+ 0 0 0 0 false
+ -1 0 -1 0 false
+ 1 0 1 0 true
+ 0 -1 1 1 true
+ 0 1 -1 1 false
+
+With top most bits xored
+ 80h 80h 0 0 false
+ 7fh 80h FFh 1 false
+ 81h 80h 01h 0 true
+ 80h 7fh 01h 0 true
+ 80h 81h FFh 1 false
+
+r-l instead - ah.
+ 80h 80h 0 0 false
+ 7fh 80h 01h 0 false
+ 81h 80h FFh 1 true
+ 80h 7fh FFh 1 true
+ 80h 81h 01h 0 false
+
+How about using the sign bit and no XOR on r-l?
+ 0 0 0 false
+ FFh 0 01h false
+ 01h 0 FFh true
+ 0 FFh FFh true
+ 0 01h 01h false - works
+
+cmpEq:
+ ld hl,#nn ; 10
+ ld c,(ix+-5) ; 19
+ ld b,(ix+-4) ; 19
+ or a ; 4
+ sbc hl,bc ; 15
+ jp nz,l19 ; 10 = 77
+
+ ld a,-82(ix) ; 19
+ cp a,#0x01 ; 7
+ jp nz,00129$ ; 10
+ ld a,-81(ix) ; 19
+ or a,a ; 7
+ jp nz,00129$ ; 10 - 72
+
+Add:
+ ld a,c ; 4
+ add a,#0x04 ; 7
+ ld -4(ix),a ; 19
+ ld a,b ; 4
+ adc a,#0x00 ; 7
+ ld -3(ix),a ; 19 = 60
+vs
+ ld hl,#4 ; 10
+ add hl,bc ; 11
+ ld -4(ix),l ; 19
+ ld -3(ix),h ; 19 = 59
+
+Same argument as above - not worth the extra cycle.
+
+Pending optimisations:
+ iTemp1 = @iTemp2
+ iTemp3 = iTemp1
+
+ iTemp4 = something in direct space
+ ...
+ push iTemp4
+
+Swaps:
+ ld hl,bc ; 8
+ ld bc,de ; 8
+ ld de,hl ; 8
+
+vs
+ push bc ; 11
+ ld bc,de ; 8
+ pop de ; 11
+
+Swaps 2:
+ ld a,h
+ ld h,b
+ ld b,a
+ ld a,l
+ ld l,c
+ ld c,aq ; 6*4 = 24
+
+Cleaning up the arguments to a call:
+ ld iy,#n ; 14
+ add iy,sp ; 15
+ ld sp,iy ; 10 = 39
+
+ pop af ; 5/byte
+
+
+So for 8 bytes and above use the first form.
+
+Pointer assign:
+ ld hl,bc ; 4+4
+ ld e,(hl) ; 7
+ inc hl ; 6
+ ld d,(hl) ; 7
+
+vs:
+ ld a,(bc) ; 7
+ ld e,a ; 4
+ inc bc ; 6
+ ld a,(bc) ; 7
+ ld d,a ; 4
+
+Same cost. Not worth it, although is does free up HL.
+
+Shift left signed on HL
+ sla l
+ rl h ; 8+8 = 16
+
+ add hl,hl ; 11