From c4b55ee40ddb79b816c42b1567dc9b5858153b66 Mon Sep 17 00:00:00 2001 From: michaelh Date: Sun, 26 Aug 2001 00:49:03 +0000 Subject: [PATCH] Added support for gsinit packing. git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@1171 4a8a32a2-be11-0410-ad9d-d568d2c75423 --- device/lib/z80/crt0.s | 51 ++++++ src/z80/gen.c | 245 ++++++++++++++++++++++++++ src/z80/main.c | 2 +- src/z80/profile.txt | 23 +++ support/regression/tests/staticinit.c | 70 ++++++++ 5 files changed, 390 insertions(+), 1 deletion(-) diff --git a/device/lib/z80/crt0.s b/device/lib/z80/crt0.s index c05ae1d1..23843568 100644 --- a/device/lib/z80/crt0.s +++ b/device/lib/z80/crt0.s @@ -63,6 +63,57 @@ _exit:: halt jr 1$ + ;; Special RLE decoder used for initing global data +__initrleblock:: + ;; Pop the return address + pop hl + ;; Save registers + push bc + push de + + ;; Pull the destination address out + ld c,(hl) + inc hl + ld b,(hl) + inc hl +1$: + ;; Fetch the run + ld e,(hl) + inc hl + ;; Negative means a run + bit 7,e + jp z,2$ + ;; Code for expanding a run + ld a,(hl) + inc hl +3$: + ld (bc),a + inc bc + inc e + jp nz,3$ + jp 1$ +2$: + ;; Zero means end of a block + xor a + or e + jp z,4$ + ;; Code for expanding a block +5$: + ld a,(hl) + inc hl + ld (bc),a + inc bc + dec e + jp nz,5$ + jp 1$ +4$: + pop de + pop bc + + ;; Push the return address back onto the stack + push hl + ret + .area _GSINIT gsinit:: diff --git a/src/z80/gen.c b/src/z80/gen.c index d5009fd0..0482186c 100644 --- a/src/z80/gen.c +++ b/src/z80/gen.c @@ -28,6 +28,7 @@ 4. Optimised strcmp fun 21999 149 2294 5. Optimised strcmp further 21660 151 228C 6. Optimised memcpy by unroling 20885 157 2201 + 7. After turning loop induction on 19862 165 236D Michael Hope 2000 Based on the mcs51 generator - @@ -5566,6 +5567,246 @@ genReceive (iCode * ic) freeAsmop (IC_RESULT (ic), NULL, ic); } +enum + { + /** Maximum number of bytes to emit per line. */ + DBEMIT_MAX_RUN = 8 + }; + +/** Context for the byte output chunker. */ +typedef struct +{ + unsigned char buffer[DBEMIT_MAX_RUN]; + int pos; +} DBEMITCTX; + + +/** Flushes a byte chunker by writing out all in the buffer and + reseting. +*/ +static void +_dbFlush(DBEMITCTX *self) +{ + char line[256]; + + if (self->pos > 0) + { + int i; + sprintf(line, ".db 0x%02X", self->buffer[0]); + + for (i = 1; i < self->pos; i++) + { + sprintf(line + strlen(line), ", 0x%02X", self->buffer[i]); + } + emit2(line); + } + self->pos = 0; +} + +/** Write out another byte, buffering until a decent line is + generated. +*/ +static void +_dbEmit(DBEMITCTX *self, int c) +{ + if (self->pos == DBEMIT_MAX_RUN) + { + _dbFlush(self); + } + self->buffer[self->pos++] = c; +} + +/** Context for a simple run length encoder. */ +typedef struct +{ + unsigned last; + unsigned char buffer[128]; + int pos; + /** runLen may be equivalent to pos. */ + int runLen; +} RLECTX; + +enum + { + RLE_CHANGE_COST = 4, + RLE_MAX_BLOCK = 127 + }; + +/** Flush the buffer of a run length encoder by writing out the run or + data that it currently contains. +*/ +static void +_rleCommit(RLECTX *self) +{ + int i; + if (self->pos != 0) + { + DBEMITCTX db; + memset(&db, 0, sizeof(db)); + + emit2(".db %u", self->pos); + + for (i = 0; i < self->pos; i++) + { + _dbEmit(&db, self->buffer[i]); + } + _dbFlush(&db); + } + /* Reset */ + self->pos = 0; +} + +/* Encoder design: + Can get either a run or a block of random stuff. + Only want to change state if a good run comes in or a run ends. + Detecting run end is easy. + Initial state? + + Say initial state is in run, len zero, last zero. Then if you get a + few zeros then something else then a short run will be output. + Seems OK. While in run mode, keep counting. While in random mode, + keep a count of the run. If run hits margin, output all up to run, + restart, enter run mode. +*/ + +/** Add another byte into the run length encoder, flushing as + required. The run length encoder uses the Amiga IFF style, where + a block is prefixed by its run length. A positive length means + the next n bytes pass straight through. A negative length means + that the next byte is repeated -n times. A zero terminates the + chunks. +*/ +static void +_rleAppend(RLECTX *self, int c) +{ + int i; + + if (c != self->last) + { + /* The run has stopped. See if it is worthwhile writing it out + as a run. Note that the random data comes in as runs of + length one. + */ + if (self->runLen > RLE_CHANGE_COST) + { + /* Yes, worthwhile. */ + /* Commit whatever was in the buffer. */ + _rleCommit(self); + emit2(".db -%u,0x%02X", self->runLen, self->last); + } + else + { + /* Not worthwhile. Append to the end of the random list. */ + for (i = 0; i < self->runLen; i++) + { + if (self->pos >= RLE_MAX_BLOCK) + { + /* Commit. */ + _rleCommit(self); + } + self->buffer[self->pos++] = self->last; + } + } + self->runLen = 1; + self->last = c; + } + else + { + if (self->runLen >= RLE_MAX_BLOCK) + { + /* Commit whatever was in the buffer. */ + _rleCommit(self); + + emit2 (".db -%u,0x%02X", self->runLen, self->last); + self->runLen = 0; + } + self->runLen++; + } +} + +static void +_rleFlush(RLECTX *self) +{ + _rleAppend(self, -1); + _rleCommit(self); + self->pos = 0; + self->last = 0; + self->runLen = 0; +} + +/** genArrayInit - Special code for initialising an array with constant + data. +*/ +static void +genArrayInit (iCode * ic) +{ + literalList *iLoop; + int ix; + int elementSize = 0, eIndex, i; + unsigned val, lastVal; + sym_link *type; + RLECTX rle; + + memset(&rle, 0, sizeof(rle)); + + aopOp (IC_LEFT(ic), ic, FALSE, FALSE); + + if (AOP_TYPE(IC_LEFT(ic)) == AOP_IMMD) + { + /* Emit the support function call and the destination address. */ + emit2("call __initrleblock"); + emit2(".dw %s", aopGetWord (AOP(IC_LEFT(ic)), 0)); + } + else + { + wassertl (0, "Unexpected operand to genArrayInit.\n"); + } + + type = operandType(IC_LEFT(ic)); + + if (type && type->next) + { + elementSize = getSize(type->next); + } + else + { + wassertl (0, "Can't determine element size in genArrayInit."); + } + + iLoop = IC_ARRAYILIST(ic); + lastVal = (unsigned)-1; + + /* Feed all the bytes into the run length encoder which will handle + the actual output. + This works well for mixed char data, and for random int and long + data. + */ + while (iLoop) + { + ix = iLoop->count; + + if (ix != 0) + { + for (i = 0; i < ix; i++) + { + for (eIndex = 0; eIndex < elementSize; eIndex++) + { + val = (((int)iLoop->literalValue) >> (eIndex * 8)) & 0xff; + _rleAppend(&rle, val); + } + } + } + + iLoop = iLoop->next; + } + + _rleFlush(&rle); + /* Mark the end of the run. */ + emit2(".db 0"); + + freeAsmop (IC_LEFT(ic), NULL, ic); +} + /*-----------------------------------------------------------------*/ /* genZ80Code - generate code for Z80 based controllers */ /*-----------------------------------------------------------------*/ @@ -5837,6 +6078,10 @@ genZ80Code (iCode * lic) addSet (&_G.sendSet, ic); break; + case ARRAYINIT: + genArrayInit(ic); + break; + default: ic = ic; /* piCode(ic,stdout); */ diff --git a/src/z80/main.c b/src/z80/main.c index 7784f38f..f60636d7 100644 --- a/src/z80/main.c +++ b/src/z80/main.c @@ -531,7 +531,7 @@ PORT z80_port = 1, /* transform >= to ! < */ 1, /* transform != to !(a == b) */ 0, /* leave == */ - FALSE, /* No array initializer support. */ + TRUE, /* Array initializer support. */ PORT_MAGIC }; diff --git a/src/z80/profile.txt b/src/z80/profile.txt index 5efbb699..3f076e3b 100644 --- a/src/z80/profile.txt +++ b/src/z80/profile.txt @@ -89,3 +89,26 @@ __divulong 52549 0.01 _memcpy 80800956 9.66 _strcmp 97216722 11.62 ; 836567814 t-states + +-- 7 +0000 71 0.00 +_main 122822995 15.44 +_Proc_1 69889878 8.78 +_Proc_2 16514064 2.08 +_Proc_3 13368528 1.68 +_Proc_4 11009376 1.38 +_Proc_5 4914900 0.62 +_Proc_6 18840450 2.37 +_Func_1 13466826 1.69 +_Func_3 5308092 0.67 +_Proc_7 22313646 2.80 +_Proc_8 83553300 10.50 +_Func_2 32438340 4.08 +_strcpy 46497792 5.84 +__mulsint_rrf_s 88861392 11.17 +__divsint_rrf_s 67760088 8.52 +__rlulong_rrf_s 13056 0.00 +__divulong 72840 0.01 +_memcpy 80800956 10.16 +_strcmp 97216722 12.22 +; 795663339 t-states diff --git a/support/regression/tests/staticinit.c b/support/regression/tests/staticinit.c index 794b729e..75875320 100644 --- a/support/regression/tests/staticinit.c +++ b/support/regression/tests/staticinit.c @@ -42,3 +42,73 @@ static {type} smallSparseZero[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +static {type} smallSparseZeroTail[] = { + 1, 2, 3 +}; + +static void +testSmallSparseZero(void) +{ + ASSERT(smallSparseZero[0] == 0); + ASSERT(smallSparseZero[1] == 0); + ASSERT(smallSparseZero[2] == 0); + ASSERT(smallSparseZero[3] == 0); + ASSERT(smallSparseZero[4] == 0); + ASSERT(smallSparseZero[5] == 0); + ASSERT(smallSparseZero[6] == 0); + ASSERT(smallSparseZero[7] == 0); + ASSERT(smallSparseZero[8] == 0); +} + +static {type} largeMixed[] = { + 1, 2, 3, 4, 5, 6, 7, /* 0-6 */ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, /* 8*12 = 96+7 = -102 */ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 3, 4, 5, 6, 3, 4, 5, 6, /* 8*17 = 136+7 */ + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6, + 3, 4, 5, 6, 3, 4, 5, 6 +}; + +static void +testLargeMixed(void) +{ + ASSERT(largeMixed[0] == 1); + ASSERT(largeMixed[1] == 2); + ASSERT(largeMixed[7] == 1); + ASSERT(largeMixed[102] == 1); + ASSERT(largeMixed[143] == 3); + ASSERT(largeMixed[143+8] == 3); + ASSERT(largeMixed[143+16] == 3); + ASSERT(largeMixed[143+1] == 4); + ASSERT(largeMixed[143+8+1] == 4); + ASSERT(largeMixed[143+16+1] == 4); +} -- 2.30.2