#define _SDCC_MANGLES_SUPPORT_FUNS 1
#define _SDCC_Z80_STYLE_LIB_OPT 1
+/* The following are disabled to make the dhrystone test more authentic.
+ */
+#define _SDCC_PORT_PROVIDES_MEMCPY 1
+#define _SDCC_PORT_PROVIDES_STRCMP 1
+/* Register allocator is as good as hand coded asm. Cool. */
+#define _SDCC_PORT_PROVIDES_STRCPY 0
+
#endif
#include "string.h"
#include <sdcc-lib.h>
+#if !_SDCC_PORT_PROVIDES_MEMCPY
+
#define NULL (void *)0
#pragma NOINDUCTION
return(ret);
#endif
}
+#endif
#include "string.h"
#include <sdcc-lib.h>
+#if !_SDCC_PORT_PROVIDES_STRCMP
+
#define NULL (void *)0
int strcmp (
return( ret );
#endif
}
+
+#endif
+
#include "string.h"
#include <sdcc-lib.h>
+#if !_SDCC_PORT_PROVIDES_STRCPY
+
#define NULL (void *)0
char _generic *strcpy (
return d;
#endif
}
+
+#endif
SCC = $(TOPDIR)/bin/sdcc -mz80
SAS = $(TOPDIR)/bin/as-z80
-OBJ = div.o mul.o putchar.o printf.o shift.o stubs.o # asm_strings.o string.s
+OBJ = div.o mul.o putchar.o printf.o shift.o stubs.o \
+ asm_strings.o
+
LIB = z80.lib
CC = $(SCC)
AS = $(SAS)
+ASFLAGS = -plosgff
+
CFLAGS = -I../../include -I.
all: $(LIB) crt0.o
.c.o:
$(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) $(ASFLAGS) $@ $<
+
_dummy:
clean:
-------------------
Z80 specific routines.
+
+Notes:
+* Cost of ld r,(ix+n): 19
+* Cost of ld r,(hl); inc hl: 7+6 = 13 and you don't have to pop ix
+
\ No newline at end of file
;; Why - because I want a better dhrystone score :)
+ ;; strcpy is disabled as the C version is almost as good.
+ ;; Just the setup and return is slower.
+ .if 0
; char *strcpy(char *dest, const char *source)
_strcpy::
- push de
- push ix
- ld ix,#0
- add ix,sp
- ld l,6(ix)
- ld h,7(ix)
- ld e,8(ix)
- ld d,9(ix)
-
- push hl
-1$:
- ld a,(de)
- ld (hl),a
- inc hl
- inc de
- or a,a
- jr nz,1$
-
- pop hl
- pop ix
- pop de
- ret
+ ;; Fall through to the correct type
+__strcpy_rrf_s::
+ ld a,#5
+ rst 0x08
+__strcpy_rrx_s::
+ ld hl,#2
+ add hl,sp
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ inc hl
+ ld c,(hl)
+ inc hl
+ ld b,(hl)
+ ;; Setup the return value
+ ld l,c
+ ld h,b
+1$:
+ ld a,(bc)
+ ld (de),a
+ or a
+ jp nz,1$
+ ret
+ ;; Notes on strcpy styles:
+ ;; *de = *hl; hl++; de++; or a; ret z; jp - slower as jp is
+ ;; same cost as conditional jump, so condition on ret is more expensive.
+ ;; *de = *bc; bc++; de++; or a, jp nz - OK
+ ;; Can't use LDI as need to check for end of string.
+ ;; Above also matches the z88dk version.
+ .endif
+
; void *memcpy(void *dest, const void *source, int count)
-_memcpy::
- push de
- push bc
- push ix
- ld ix,#0
- add ix,sp
- ld l,8(ix)
- ld h,9(ix)
- ld e,10(ix)
- ld d,11(ix)
- ld c,12(ix)
- ld b,13(ix)
+_memcpy::
+ ;; Fall through to correct type
+__memcpy_rrf_s::
+ ld a,#5
+ rst 0x08
+__memcpy_rrx_s::
+ ;; Using LDIR
+ ;; LDIR: do; *DE = *HL; HL++; BC--; while BC != 0
+
+ ;; All registers are already saved.
+ ld hl,#2
+ add hl,sp
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ inc hl
+ ld a,(hl)
+ inc hl
+ ld b,(hl)
+ inc hl
+ ld c,(hl)
+ inc hl
+ ld h,(hl)
+ ld l,a
+ ld a,h
+ ld h,b
+ ld b,a
- inc b
- inc c
- push hl
+ ;; Pending: could optimise this check to occur earlier.
+ or c
+ ret z
- jr 2$
-1$:
- ld a,(de)
- ld (hl),a
- inc de
- inc hl
-2$:
- dec c
- jr nz,1$
- dec b
- jr nz,1$
-
- pop hl
- pop ix
- pop bc
- pop de
- ret
+ ldir
+ ret
; int strcmp(const char *s1, const char *s2)
_strcmp::
- push de
- push ix
- ld ix,#0
- add ix,sp
- ld e,6(ix)
- ld d,7(ix)
- ld l,8(ix)
- ld h,9(ix)
+ ;; Fall through to the correct style
+ ;; Fall through to correct type
+__strcmp_rrf_s::
+ ld a,#5
+ rst 0x08
+__strcmp_rrx_s::
+ ld hl,#2
+ add hl,sp
+
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ inc hl
+ ld a,(hl)
+ inc hl
+ ld h,(hl)
+ ld l,a
+
+1$:
+ ld a,(de)
+ sub (hl)
+
+ ;; Normally not taken, so use a jr (12/7) instead of jp (10)
+ jr nz,2$
- jr 1$
-2$:
- ld a,(de)
- sub (hl)
- jr nz,4$
- ;; A == 0
- cp (hl)
- jr z,3$
-1$:
- inc de
- inc hl
- jr 2$
+ ;; A == 0
+ cp (hl)
-3$:
- ld hl,#0
- jr 5$
-4$:
- ld hl,#1
- jr nc,5$
- ld hl,#-1
-5$:
- pop ix
- pop de
- ret
-
\ No newline at end of file
+ inc de
+ inc hl
+ ;; Normally taken. Flag from the cp above.
+ jp nz,1$
+2$:
+ ;; Sign extend
+ ld l,a
+ rla
+ sbc a
+ ld h,a
+ ret
+
;; Register used: AF,BC,DE,HL
.mul16:
.mulu16:
- LD HL,#0x00 ; Product = 0
- LD A,#15 ; Count = bit length - 1
- ;; Shift-and-add algorithm
- ;; If MSB of multiplier is 1, add multiplicand to partial product
- ;; Shift partial product, multiplier left 1 bit
-.mlp:
- SLA E ; Shift multiplier left 1 bit
- RL D
- jp NC,.mlp1 ; Jump if MSB of multiplier = 0
- ADD HL,BC ; Add multiplicand to partial product
-.mlp1:
- ADD HL,HL ; Shift partial product left
- DEC A
- jp NZ,.mlp ; Continue until count = 0
- ;; Add multiplicand one last time if MSB of multiplier is 1
- BIT 7,D ; Get MSB of multiplier
- JR Z,.mend ; Exit if MSB of multiplier is 0
- ADD HL,BC ; Add multiplicand to product
-.mend:
- ; HL = result
- ret
+ ld hl,#0
+ ld a,b
+ ; ld c,c
+ ld b,#16
+ ;; Optimise for the case when this side has 8 bits of data or
+ ;; less. This is often the case with support address calls.
+ or a
+ jp nz,1$
+
+ ld b,#8
+ ld a,c
+1$:
+ ;; Taken from z88dk, which originally borrowed from the
+ ;; Spectrum rom.
+ add hl,hl
+ rl c
+ rla ;DLE 27/11/98
+ jr nc,2$
+ add hl,de
+2$:
+ djnz 1$
+ ret
/*-------------------------------------------------------------------------
gen.c - Z80 specific code generator.
+
+ Michael Hope <michaelh@juju.net.nz> 2000
+ Based on the mcs51 generator -
+ Sandeep Dutta . sandeep.dutta@usa.net (1998)
+ and - Jean-Louis VERN.jlvern@writeme.com (1999)
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ In other words, you are welcome to use, share and improve this program.
+ You are forbidden to forbid anyone else to use, share and improve
+ what you give them. Help stamp out software-hoarding!
+
+-------------------------------------------------------------------------*/
+
+/*
Benchmarks on dhry.c 2.1 with 32766 loops and a 10ms clock:
ticks dhry size
Base with asm strcpy / strcmp / memcpy: 23198 141 1A14
5. Optimised strcmp further 21660 151 228C
6. Optimised memcpy by unroling 20885 157 2201
7. After turning loop induction on 19862 165 236D
+ 8. Same as 7 but with more info
+ 9. With asm optimised strings 17030 192 2223
- Michael Hope <michaelh@juju.net.nz> 2000
- Based on the mcs51 generator -
- Sandeep Dutta . sandeep.dutta@usa.net (1998)
- and - Jean-Louis VERN.jlvern@writeme.com (1999)
-
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- In other words, you are welcome to use, share and improve this program.
- You are forbidden to forbid anyone else to use, share and improve
- what you give them. Help stamp out software-hoarding!
-
--------------------------------------------------------------------------*/
+ 10 and below are with asm strings off.
+
+ Apparent advantage of turning on regparams:
+ 1. Cost of push
+ Decent case is push of a constant
+ - ld hl,#n; push hl: (10+11)*nargs
+ 2. Cost of pull from stack
+ Using asm with ld hl, etc
+ - ld hl,#2; add hl,sp; (ld bc,(hl); hl+=2)*nargs
+ 10+11+(7+6+7+6)*nargs
+ 3. Cost of fixing stack
+ - pop hl*nargs
+ 10*nargs
+
+ So cost is (10+11+7+6+7+10)*nargs+10+11
+ = 51*nargs+21
+ = 123 for mul, div, strcmp, strcpy
+ Saving of (98298+32766+32766+32766)*123 = 24181308
+ At 192 d/s for 682411768t, speed up to 199. Hmm.
+*/
#include <stdio.h>
#include <stdlib.h>
_memcpy 80800956 10.16
_strcmp 97216722 12.22
; 795663339 t-states
+
+-- 8
+; Function total-ticks total-calls ticks-per-call total-percent
+0000 71 0 0 0.00
+_main 122823011 1 122823011 15.45
+_Proc_1 69267324 32766 2114 8.71
+_Proc_2 16514064 32766 504 2.08
+_Proc_3 13368528 32766 408 1.68
+_Proc_4 11009376 32766 336 1.38
+_Proc_5 4914900 32766 150 0.62
+_Proc_6 18840450 32766 575 2.37
+_Func_1 13466826 98298 137 1.69
+_Func_3 5308092 32766 162 0.67
+_Proc_7 22313646 98298 227 2.81
+_Proc_8 83553300 32766 2550 10.51
+_Func_2 32438340 32766 990 4.08
+_strcpy 46497792 32768 1419 5.85
+__mulsint_rrf_s 88861392 98298 904 11.18
+__divsint_rrf_s 67760088 32766 2068 8.52
+__rlulong_rrf_s 13056 128 102 0.00
+__divulong 52549 2 26274 0.01
+_memcpy 80800956 32766 2466 10.16
+_strcmp 97216722 32766 2967 12.23
+; 795020510 t-states
+; So the lib functions except mul, div etc take 28% of the time.
+
+-- 9
+; Function total-ticks total-calls ticks-per-call total-percent
+0000 71 0 0 0.00
+_main 122864113 1 122864113 18.00
+_Proc_1 96102678 32766 2933 14.08
+_Proc_2 16514064 32766 504 2.42
+_Proc_3 13368528 32766 408 1.96
+_Proc_4 11009376 32766 336 1.61
+_Proc_5 4914900 32766 150 0.72
+_Proc_6 18840450 32766 575 2.76
+_Func_1 13466826 98298 137 1.97
+_Func_3 5308092 32766 162 0.78
+_Proc_7 22313646 98298 227 3.27
+_Proc_8 83553300 32766 2550 12.24
+_Func_2 32438340 32766 990 4.75
+_strcpy 46497792 32768 1419 6.81
+__mulsint_rrf_s 88861392 98298 904 13.02
+__divsint_rrf_s 67760088 32766 2068 9.93
+__rlulong_rrf_s 13056 128 102 0.00
+__divulong 52213 2 26106 0.01
+__memcpy_rrf_s 3702558 32766 113 0.54
+__strcmp_rrf_s 34830258 32766 1063 5.10
+; 682411768 t-states