altos: optimize Viterbi implementation
authorKeith Packard <keithp@keithp.com>
Sat, 23 Jun 2012 09:12:58 +0000 (02:12 -0700)
committerKeith Packard <keithp@keithp.com>
Sat, 23 Jun 2012 09:12:58 +0000 (02:12 -0700)
Minimize data usage, make data arrays static

Signed-off-by: Keith Packard <keithp@keithp.com>
src/core/ao_fec.h
src/core/ao_fec_tx.c
src/core/ao_viterbi.c

index 985352ddf9b845638a6c53686ee1b6fdf34e65d4..d4f64b740ac85e4a5d48f7be1932f5f3cb03380d 100644 (file)
@@ -42,7 +42,7 @@ ao_fec_prepare(uint8_t *in, uint8_t len, uint8_t *out);
  * must be 'len' bytes long. 'out' and 'in' can be
  * the same array
  */
-uint8_t
+void
 ao_fec_whiten(uint8_t *in, uint8_t len, uint8_t *out);
 
 /*
@@ -57,13 +57,12 @@ ao_fec_encode(uint8_t *in, uint8_t len, uint8_t *out);
 uint8_t
 ao_fec_interleave(uint8_t *in, uint8_t len, uint8_t *out);
 
-
 /*
  * Decode data. 'in' is one byte per bit, soft decision
  * 'out' must be len/8 bytes long
  */
 
 uint8_t
-ao_fec_decode(uint8_t *in, int in_len, uint8_t *out);
+ao_fec_decode(uint8_t *in, uint16_t in_len, uint8_t *out);
 
 #endif /* _AO_FEC_H_ */
index bf32c506a90f54410dc051aa388da42b2282b429..c5f410b887ab7cc0b50ffea5df7ced23830c7bc5 100644 (file)
@@ -83,7 +83,7 @@ static const uint8_t whiten[] = {
 #include "ao_whiten.h"
 };
 
-uint8_t
+void
 ao_fec_whiten(uint8_t *in, uint8_t len, uint8_t *out)
 {
        const uint8_t   *w = whiten;
index df95e979e25993039c897e192606cfb31d5a7af7..1b1784b3d3b3e6abbc22939134b734200d5a9516 100644 (file)
 #include <ao_fec.h>
 #include <stdio.h>
 
-/*
- * 'input' is 8-bits per symbol soft decision data
- * 'len' is output byte length
- */
-
 struct ao_soft_sym {
        uint8_t a, b;
 };
 
-static const struct ao_soft_sym ao_fec_decode_table[16] = {
+#define NUM_STATE      8
+#define NUM_HIST       8
+#define MOD_HIST(b)    ((b) & 7)
+
+static const struct ao_soft_sym ao_fec_decode_table[NUM_STATE][2] = {
 /* next        0              1                 state */
-       { 0x00, 0x00 }, { 0xff, 0xff }, /* 000 */
-       { 0x00, 0xff }, { 0xff, 0x00 }, /* 001 */
-       { 0xff, 0xff }, { 0x00, 0x00 }, /* 010 */
-       { 0xff, 0x00 }, { 0x00, 0xff }, /* 011 */
-       { 0xff, 0xff }, { 0x00, 0x00 }, /* 100 */
-       { 0xff, 0x00 }, { 0x00, 0xff }, /* 101 */
-       { 0x00, 0x00 }, { 0xff, 0xff }, /* 110 */
-       { 0x00, 0xff }, { 0xff, 0x00 }  /* 111 */
+       { { 0x00, 0x00 }, { 0xff, 0xff } } ,    /* 000 */
+       { { 0x00, 0xff }, { 0xff, 0x00 } },     /* 001 */
+       { { 0xff, 0xff }, { 0x00, 0x00 } },     /* 010 */
+       { { 0xff, 0x00 }, { 0x00, 0xff } },     /* 011 */
+       { { 0xff, 0xff }, { 0x00, 0x00 } },     /* 100 */
+       { { 0xff, 0x00 }, { 0x00, 0xff } },     /* 101 */
+       { { 0x00, 0x00 }, { 0xff, 0xff } },     /* 110 */
+       { { 0x00, 0xff }, { 0xff, 0x00 } }      /* 111 */
 };
 
-struct ao_soft_sym
-ao_soft_sym(uint8_t bits)
-{
-       struct ao_soft_sym      s;
-
-       s.a = ((bits & 2) >> 1) * 0xff;
-       s.b = (bits & 1) * 0xff;
-       return s;
-}
-
 static inline uint8_t
 ao_next_state(uint8_t state, uint8_t bit)
 {
@@ -63,71 +52,93 @@ ao_cost(struct ao_soft_sym a, struct ao_soft_sym b)
        return ao_abs(a.a - b.a) + ao_abs(a.b - b.b);
 }
 
-#define NUM_STATE      8
+/*
+ * 'in' is 8-bits per symbol soft decision data
+ * 'len' is input byte length. 'out' must be
+ * 'len'/16 bytes long
+ */
 
 uint8_t
-ao_fec_decode(uint8_t *in, int len, uint8_t *out)
+ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out)
 {
-       uint16_t        cost[2][NUM_STATE], min_cost;
-       uint8_t         prev[len/2 + 1][NUM_STATE];
-       uint16_t        prev_bits[2][NUM_STATE];
-       int             i, b, dump;
-       uint8_t         p, n;
-       uint8_t         state = 0, min_state;
+       static uint16_t cost[2][NUM_STATE];             /* path cost */
+       static uint16_t bits[2][NUM_STATE];             /* save bits to quickly output them */
+       uint16_t        i;                              /* input byte index */
+       uint16_t        b;                              /* encoded symbol index (bytes/2) */
+       uint16_t        o;                              /* output bit index */
+       uint8_t         p;                              /* previous cost/bits index */
+       uint8_t         n;                              /* next cost/bits index */
+       uint8_t         state;                          /* state index */
+       uint8_t         bit;                            /* original encoded bit index */
 
        p = 0;
        for (state = 0; state < NUM_STATE; state++) {
                cost[0][state] = 0xffff;
-               prev_bits[0][state] = 0;
+               bits[0][state] = 0;
        }
        cost[0][0] = 0;
 
-       min_state = 0;
-       min_cost = 0;
-       dump = 0;
+       o = 0;
        for (i = 0; i < len; i += 2) {
                b = i/2;
                n = p ^ 1;
                struct ao_soft_sym s = { .a = in[i], .b = in[i+1] };
 
+               /* Reset next costs to 'impossibly high' values so that
+                * the first path through this state is cheaper than this
+                */
                for (state = 0; state < NUM_STATE; state++)
                        cost[n][state] = 0xffff;
 
+               /* Compute path costs and accumulate output bit path
+                * for each state and encoded bit value
+                */
                for (state = 0; state < NUM_STATE; state++) {
-                       int     zero_cost = ao_cost(s, ao_fec_decode_table[state * 2 + 0]);
-                       int     one_cost = ao_cost(s, ao_fec_decode_table[state * 2 + 1]);
-
-                       uint8_t zero_state = ao_next_state(state, 0);
-                       uint8_t one_state = ao_next_state(state, 1);
-
-                       zero_cost += cost[p][state];
-                       one_cost += cost[p][state];
-                       if (zero_cost < cost[n][zero_state]) {
-                               prev[b+1][zero_state] = state;
-                               cost[n][zero_state] = zero_cost;
-                               prev_bits[n][zero_state] = (prev_bits[p][state] << 1) | (state & 1);
-                       }
-
-                       if (one_cost < cost[n][one_state]) {
-                               prev[b+1][one_state] = state;
-                               cost[n][one_state] = one_cost;
-                               prev_bits[n][one_state] = (prev_bits[p][state] << 1) | (state & 1);
+                       for (bit = 0; bit < 2; bit++) {
+                               int     bit_cost = cost[p][state] + ao_cost(s, ao_fec_decode_table[state][bit]);
+                               uint8_t bit_state = ao_next_state(state, bit);
+
+                               /* Only track the minimal cost to reach
+                                * this state; the best path can never
+                                * go through the higher cost paths as
+                                * total path cost is cumulative
+                                */
+                               if (bit_cost < cost[n][bit_state]) {
+                                       cost[n][bit_state] = bit_cost;
+                                       bits[n][bit_state] = (bits[p][state] << 1) | (state & 1);
+                               }
                        }
                }
 
 #if 0
                printf ("bit %3d symbol %2x %2x:", i/2, s.a, s.b);
                for (state = 0; state < NUM_STATE; state++) {
-                       printf (" %5d(%04x)", cost[n][state], prev_bits[n][state]);
+                       printf (" %5d(%04x)", cost[n][state], bits[n][state]);
                }
                printf ("\n");
 #endif
                p = n;
 
-               b++;
-               if ((b - dump) > 16 || i + 2 >= len) {
-                       uint8_t dist = b - (dump + 9);
-                       uint8_t rev;
+               /* A loop is needed to handle the last output byte. It
+                * won't have a full NUM_HIST bits of future data to
+                * perform full error correction, but we might as well
+                * give the best possible answer anyways.
+                */
+               while ((b - o) >= (8 + NUM_HIST) || (i + 2 >= len && b > o)) {
+
+                       /* Compute number of bits to the end of the
+                        * last full byte of data. This is generally
+                        * NUM_HIST, unless we've reached
+                        * the end of the input, in which case
+                        * it will be seven.
+                        */
+                       int8_t          dist = b - (o + 8);     /* distance to last ready-for-writing bit */
+                       uint16_t        min_cost;               /* lowest cost */
+                       uint8_t         min_state;              /* lowest cost state */
+
+                       /* Find the best fit at the current point
+                        * of the decode.
+                        */
                        min_cost = cost[p][0];
                        min_state = 0;
                        for (state = 1; state < NUM_STATE; state++) {
@@ -136,18 +147,24 @@ ao_fec_decode(uint8_t *in, int len, uint8_t *out)
                                        min_state = state;
                                }
                        }
-                       for (rev = 0; rev < dist; rev++) {
-                               min_state = prev[b][min_state];
-                               b--;
+
+                       /* The very last byte of data has the very last bit
+                        * of data left in the state value; just smash the
+                        * bits value in place and reset the 'dist' from
+                        * -1 to 0 so that the full byte is read out
+                        */
+                       if (dist < 0) {
+                               bits[p][min_state] = (bits[p][min_state] << 1) | (min_state & 1);
+                               dist = 0;
                        }
+
 #if 0
                        printf ("\tbit %3d min_cost %5d old bit %3d old_state %x bits %02x\n",
-                               i/2, min_cost, b-1, min_state, (prev_bits[p][min_state] >> dist) & 0xff);
+                               i/2, min_cost, o + 8, min_state, (bits[p][min_state] >> dist) & 0xff);
 #endif
-                       out[dump/8] = prev_bits[p][min_state] >> dist;
-                       dump = b - 1;
+                       out[o >> 3] = bits[p][min_state] >> dist;
+                       o += 8;
                }
-
        }
        return len/16;
 }