X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=doc%2Fchoices.txt;h=1442eca452e5bed8306897b5062f5c87fce84d0b;hb=40de0984b032dbbf90dddb74d1a26a8d136a71ae;hp=080b0cf24a948918414914c1b83e3e6974a78d43;hpb=a408f6fc31450c42a558611971841af32c4edff4;p=fw%2Fsdcc

diff --git a/doc/choices.txt b/doc/choices.txt
index 080b0cf2..1442eca4 100644
--- a/doc/choices.txt
+++ b/doc/choices.txt
@@ -35,4 +35,208 @@ On stack word push
 	 inc  hl
 	 ld   d,(hl)
    1 = d + 8 + 8 + 4
-   2 = d + 8 + 8 + 8
\ No newline at end of file
+   2 = d + 8 + 8 + 8
+
+Structure member get:
+   Normally fetch pair
+   Then add pair and constant with result in hl
+
+   ld	l,c	; 4
+   ld	h,b	; 4
+   inc  hl ..	; 6	= 8 + 6n
+or
+   ld	l,c	; 4
+   ld	h,b	; 4
+   ld	a,#0x06	; 7
+   add	a,c	; 4
+   ld	l,a	; 4
+   ld	a,#0x00 ; 7
+   adc	a,b	; 4
+   ld	h,a	; 4	= 38
+alt: (only when result=hl and left, rigth = pair, const)
+   ld	   hl,#const	; 10
+   add	   hl,pair	; 11	= 21
+
+So (1) is best for n <= 2, (2) is just bad, (3) is good n > 2
+
+How about:
+    pair = pair + constant:
+1:
+    ld	a,#0x08	; 7
+    add	a,c	; 4
+    ld	c,a	; 4
+    ld	a,#0x00	; 7
+    adc	a,b	; 4
+    ld	b,a	; 4	= 30
+2:
+	ld	hl,#const	; 10
+	add	hl,pair		; 11
+	ld	c,l		; 4
+	ld	b,h		; 4	= 29
+One cycle.  If I cache HL later it will throw away the advantage.  Choose 1.
+
+PlusIncr on pairs:
+1:
+	 inc	pair		; 6 	= 6n
+2:
+	ld	a,#0x04		; 7
+	add	a,c		; 4
+	ld	c,a		; 4
+	ld	a,#0x00		; 7
+	adc	a,b		; 4
+	ld	b,a		; 4 	= 30
+So n <= 5 (1) is better.
+
+Frame pointer:
+It's nice to use HL as the temp register, but what if I used it as the
+frame pointer instead of ix?
+
+Instead of:
+	ld	e,5(ix)		; 19
+	ld	d,6(ix)		; 19	= 38
+
+	ld	hl,#5		; 10
+	add	hl,sp		; 11
+	ld	e,(hl)		; 7
+	inc	hl		; 6
+	ld	d,(hl)		; 7	= 41
+
+Things get better when you access the same set over, as you get rid
+of the setup.  But they get worse when both ops are on the stack/in
+direct space.  Easiest this way for now.  iy may benifit...
+
+cmpGt:
+	ld	l,#0x80		; 7
+	ld	a,-1(ix)        ; 19
+	xor	a,#0x80		; 7
+	ld	h,a		; 4
+	ld	a,#0x02		; 7
+	sub	a,-2(ix)	; 19
+	ld	a,l		; 4
+	sbc	a,h		; 4 = 71
+
+vs
+        ld      hl,0x8002	; 10
+        ld      a,-2(ix)	; 19
+        xor     0x80		; 7
+        ld      d,a		; 4
+        ld      e,-1(ix)	; 19
+        sbc     hl,de		; 15 = 74
+
+Why is there the whole xor thing going on?
+
+cmpGt using sub:
+        left    right   l-r  	c       expect
+        0       0       0       0       false
+        -1      0       -1	0       false
+        1       0       1       0       true
+        0       -1      1       1       true
+        0       1       -1      1       false
+
+With top most bits xored
+        80h     80h     0       0	false
+        7fh     80h     FFh     1       false
+        81h     80h     01h     0       true
+        80h     7fh     01h     0       true
+        80h     81h     FFh     1       false
+
+r-l instead - ah.
+        80h     80h     0       0	false
+        7fh     80h     01h     0       false
+        81h     80h     FFh     1       true
+        80h     7fh     FFh     1       true
+        80h     81h     01h     0       false
+
+How about using the sign bit and no XOR on r-l?
+        0       0        0              false
+        FFh     0   	 01h            false
+        01h     0        FFh            true
+        0       FFh      FFh            true
+        0       01h      01h            false  - works
+
+cmpEq:
+        ld      hl,#nn		; 10
+	ld	c,(ix+-5)	; 19
+	ld	b,(ix+-4)	; 19
+	or	a		; 4
+	sbc	hl,bc		; 15
+	jp	nz,l19          ; 10 = 77
+
+	ld	a,-82(ix)	; 19
+	cp	a,#0x01		; 7
+	jp	nz,00129$	; 10
+	ld	a,-81(ix)	; 19
+	or	a,a		; 7
+	jp	nz,00129$	; 10 - 72
+
+Add:
+	ld	a,c             ; 4
+	add	a,#0x04         ; 7
+	ld	-4(ix),a        ; 19
+	ld	a,b             ; 4
+	adc	a,#0x00         ; 7
+	ld	-3(ix),a        ; 19 = 60
+vs
+        ld      hl,#4           ; 10
+        add     hl,bc           ; 11
+        ld      -4(ix),l        ; 19
+        ld      -3(ix),h        ; 19 = 59
+
+Same argument as above - not worth the extra cycle.
+
+Pending optimisations:
+        iTemp1 = @iTemp2
+        iTemp3 = iTemp1
+
+        iTemp4 = something in direct space
+        ...
+        push    iTemp4
+
+Swaps:
+        ld      hl,bc           ; 8
+        ld      bc,de           ; 8
+        ld      de,hl           ; 8
+
+vs
+        push    bc		; 11
+        ld      bc,de           ; 8
+        pop     de              ; 11
+
+Swaps 2:
+        ld      a,h
+        ld      h,b
+        ld      b,a
+        ld      a,l
+        ld      l,c
+        ld      c,aq            ; 6*4 = 24
+
+Cleaning up the arguments to a call:
+         ld     iy,#n           ; 14
+         add    iy,sp           ; 15
+         ld     sp,iy           ; 10 = 39
+
+         pop    af              ; 5/byte
+
+
+So for 8 bytes and above use the first form.
+
+Pointer assign:
+        ld      hl,bc           ; 4+4
+        ld      e,(hl)          ; 7
+        inc     hl              ; 6
+        ld      d,(hl)          ; 7
+
+vs:
+        ld      a,(bc)          ; 7
+        ld      e,a             ; 4
+        inc     bc              ; 6
+        ld      a,(bc)          ; 7
+        ld      d,a             ; 4
+
+Same cost.  Not worth it, although is does free up HL.
+
+Shift left signed on HL
+      sla  l
+      rl   h                    ; 8+8 = 16
+
+      add  hl,hl                ; 11