From f7bf07dfdad260c1f219064957ef08fb480bf20f Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 23 Jun 2012 02:12:58 -0700 Subject: [PATCH] altos: optimize Viterbi implementation Minimize data usage, make data arrays static Signed-off-by: Keith Packard --- src/core/ao_fec.h | 5 +- src/core/ao_fec_tx.c | 2 +- src/core/ao_viterbi.c | 149 +++++++++++++++++++++++------------------- 3 files changed, 86 insertions(+), 70 deletions(-) diff --git a/src/core/ao_fec.h b/src/core/ao_fec.h index 985352dd..d4f64b74 100644 --- a/src/core/ao_fec.h +++ b/src/core/ao_fec.h @@ -42,7 +42,7 @@ ao_fec_prepare(uint8_t *in, uint8_t len, uint8_t *out); * must be 'len' bytes long. 'out' and 'in' can be * the same array */ -uint8_t +void ao_fec_whiten(uint8_t *in, uint8_t len, uint8_t *out); /* @@ -57,13 +57,12 @@ ao_fec_encode(uint8_t *in, uint8_t len, uint8_t *out); uint8_t ao_fec_interleave(uint8_t *in, uint8_t len, uint8_t *out); - /* * Decode data. 'in' is one byte per bit, soft decision * 'out' must be len/8 bytes long */ uint8_t -ao_fec_decode(uint8_t *in, int in_len, uint8_t *out); +ao_fec_decode(uint8_t *in, uint16_t in_len, uint8_t *out); #endif /* _AO_FEC_H_ */ diff --git a/src/core/ao_fec_tx.c b/src/core/ao_fec_tx.c index bf32c506..c5f410b8 100644 --- a/src/core/ao_fec_tx.c +++ b/src/core/ao_fec_tx.c @@ -83,7 +83,7 @@ static const uint8_t whiten[] = { #include "ao_whiten.h" }; -uint8_t +void ao_fec_whiten(uint8_t *in, uint8_t len, uint8_t *out) { const uint8_t *w = whiten; diff --git a/src/core/ao_viterbi.c b/src/core/ao_viterbi.c index df95e979..1b1784b3 100644 --- a/src/core/ao_viterbi.c +++ b/src/core/ao_viterbi.c @@ -18,37 +18,26 @@ #include #include -/* - * 'input' is 8-bits per symbol soft decision data - * 'len' is output byte length - */ - struct ao_soft_sym { uint8_t a, b; }; -static const struct ao_soft_sym ao_fec_decode_table[16] = { +#define NUM_STATE 8 +#define NUM_HIST 8 +#define MOD_HIST(b) ((b) & 7) + +static const struct ao_soft_sym ao_fec_decode_table[NUM_STATE][2] = { /* next 0 1 state */ - { 0x00, 0x00 }, { 0xff, 0xff }, /* 000 */ - { 0x00, 0xff }, { 0xff, 0x00 }, /* 001 */ - { 0xff, 0xff }, { 0x00, 0x00 }, /* 010 */ - { 0xff, 0x00 }, { 0x00, 0xff }, /* 011 */ - { 0xff, 0xff }, { 0x00, 0x00 }, /* 100 */ - { 0xff, 0x00 }, { 0x00, 0xff }, /* 101 */ - { 0x00, 0x00 }, { 0xff, 0xff }, /* 110 */ - { 0x00, 0xff }, { 0xff, 0x00 } /* 111 */ + { { 0x00, 0x00 }, { 0xff, 0xff } } , /* 000 */ + { { 0x00, 0xff }, { 0xff, 0x00 } }, /* 001 */ + { { 0xff, 0xff }, { 0x00, 0x00 } }, /* 010 */ + { { 0xff, 0x00 }, { 0x00, 0xff } }, /* 011 */ + { { 0xff, 0xff }, { 0x00, 0x00 } }, /* 100 */ + { { 0xff, 0x00 }, { 0x00, 0xff } }, /* 101 */ + { { 0x00, 0x00 }, { 0xff, 0xff } }, /* 110 */ + { { 0x00, 0xff }, { 0xff, 0x00 } } /* 111 */ }; -struct ao_soft_sym -ao_soft_sym(uint8_t bits) -{ - struct ao_soft_sym s; - - s.a = ((bits & 2) >> 1) * 0xff; - s.b = (bits & 1) * 0xff; - return s; -} - static inline uint8_t ao_next_state(uint8_t state, uint8_t bit) { @@ -63,71 +52,93 @@ ao_cost(struct ao_soft_sym a, struct ao_soft_sym b) return ao_abs(a.a - b.a) + ao_abs(a.b - b.b); } -#define NUM_STATE 8 +/* + * 'in' is 8-bits per symbol soft decision data + * 'len' is input byte length. 'out' must be + * 'len'/16 bytes long + */ uint8_t -ao_fec_decode(uint8_t *in, int len, uint8_t *out) +ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out) { - uint16_t cost[2][NUM_STATE], min_cost; - uint8_t prev[len/2 + 1][NUM_STATE]; - uint16_t prev_bits[2][NUM_STATE]; - int i, b, dump; - uint8_t p, n; - uint8_t state = 0, min_state; + static uint16_t cost[2][NUM_STATE]; /* path cost */ + static uint16_t bits[2][NUM_STATE]; /* save bits to quickly output them */ + uint16_t i; /* input byte index */ + uint16_t b; /* encoded symbol index (bytes/2) */ + uint16_t o; /* output bit index */ + uint8_t p; /* previous cost/bits index */ + uint8_t n; /* next cost/bits index */ + uint8_t state; /* state index */ + uint8_t bit; /* original encoded bit index */ p = 0; for (state = 0; state < NUM_STATE; state++) { cost[0][state] = 0xffff; - prev_bits[0][state] = 0; + bits[0][state] = 0; } cost[0][0] = 0; - min_state = 0; - min_cost = 0; - dump = 0; + o = 0; for (i = 0; i < len; i += 2) { b = i/2; n = p ^ 1; struct ao_soft_sym s = { .a = in[i], .b = in[i+1] }; + /* Reset next costs to 'impossibly high' values so that + * the first path through this state is cheaper than this + */ for (state = 0; state < NUM_STATE; state++) cost[n][state] = 0xffff; + /* Compute path costs and accumulate output bit path + * for each state and encoded bit value + */ for (state = 0; state < NUM_STATE; state++) { - int zero_cost = ao_cost(s, ao_fec_decode_table[state * 2 + 0]); - int one_cost = ao_cost(s, ao_fec_decode_table[state * 2 + 1]); - - uint8_t zero_state = ao_next_state(state, 0); - uint8_t one_state = ao_next_state(state, 1); - - zero_cost += cost[p][state]; - one_cost += cost[p][state]; - if (zero_cost < cost[n][zero_state]) { - prev[b+1][zero_state] = state; - cost[n][zero_state] = zero_cost; - prev_bits[n][zero_state] = (prev_bits[p][state] << 1) | (state & 1); - } - - if (one_cost < cost[n][one_state]) { - prev[b+1][one_state] = state; - cost[n][one_state] = one_cost; - prev_bits[n][one_state] = (prev_bits[p][state] << 1) | (state & 1); + for (bit = 0; bit < 2; bit++) { + int bit_cost = cost[p][state] + ao_cost(s, ao_fec_decode_table[state][bit]); + uint8_t bit_state = ao_next_state(state, bit); + + /* Only track the minimal cost to reach + * this state; the best path can never + * go through the higher cost paths as + * total path cost is cumulative + */ + if (bit_cost < cost[n][bit_state]) { + cost[n][bit_state] = bit_cost; + bits[n][bit_state] = (bits[p][state] << 1) | (state & 1); + } } } #if 0 printf ("bit %3d symbol %2x %2x:", i/2, s.a, s.b); for (state = 0; state < NUM_STATE; state++) { - printf (" %5d(%04x)", cost[n][state], prev_bits[n][state]); + printf (" %5d(%04x)", cost[n][state], bits[n][state]); } printf ("\n"); #endif p = n; - b++; - if ((b - dump) > 16 || i + 2 >= len) { - uint8_t dist = b - (dump + 9); - uint8_t rev; + /* A loop is needed to handle the last output byte. It + * won't have a full NUM_HIST bits of future data to + * perform full error correction, but we might as well + * give the best possible answer anyways. + */ + while ((b - o) >= (8 + NUM_HIST) || (i + 2 >= len && b > o)) { + + /* Compute number of bits to the end of the + * last full byte of data. This is generally + * NUM_HIST, unless we've reached + * the end of the input, in which case + * it will be seven. + */ + int8_t dist = b - (o + 8); /* distance to last ready-for-writing bit */ + uint16_t min_cost; /* lowest cost */ + uint8_t min_state; /* lowest cost state */ + + /* Find the best fit at the current point + * of the decode. + */ min_cost = cost[p][0]; min_state = 0; for (state = 1; state < NUM_STATE; state++) { @@ -136,18 +147,24 @@ ao_fec_decode(uint8_t *in, int len, uint8_t *out) min_state = state; } } - for (rev = 0; rev < dist; rev++) { - min_state = prev[b][min_state]; - b--; + + /* The very last byte of data has the very last bit + * of data left in the state value; just smash the + * bits value in place and reset the 'dist' from + * -1 to 0 so that the full byte is read out + */ + if (dist < 0) { + bits[p][min_state] = (bits[p][min_state] << 1) | (min_state & 1); + dist = 0; } + #if 0 printf ("\tbit %3d min_cost %5d old bit %3d old_state %x bits %02x\n", - i/2, min_cost, b-1, min_state, (prev_bits[p][min_state] >> dist) & 0xff); + i/2, min_cost, o + 8, min_state, (bits[p][min_state] >> dist) & 0xff); #endif - out[dump/8] = prev_bits[p][min_state] >> dist; - dump = b - 1; + out[o >> 3] = bits[p][min_state] >> dist; + o += 8; } - } return len/16; } -- 2.30.2