X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=src%2Fcore%2Fao_fec_rx.c;h=c4f5559a11f8ea79b07d069e99ee60a2208fe58f;hb=e0a0a747624c2df66ca4a73b5a0de014ea204dca;hp=69e9c1f5b7da393f9757e9b88b6448a1cf95b31d;hpb=eab18714ed9eabbcef0ff81b07427da042a58ccc;p=fw%2Faltos diff --git a/src/core/ao_fec_rx.c b/src/core/ao_fec_rx.c index 69e9c1f5..c4f5559a 100644 --- a/src/core/ao_fec_rx.c +++ b/src/core/ao_fec_rx.c @@ -18,6 +18,16 @@ #include #include +#ifdef TELEMEGA +#include +#endif + +#if AO_PROFILE +#include + +uint32_t ao_fec_decode_start, ao_fec_decode_end; +#endif + /* * byte order repeats through 3 2 1 0 * @@ -40,34 +50,37 @@ * 18/19 10/11 08/09 00/01 */ +static const uint8_t ao_interleave_order[] = { + 0x1e, 0x16, 0x0e, 0x06, + 0x1c, 0x14, 0x0c, 0x04, + 0x1a, 0x12, 0x0a, 0x02, + 0x18, 0x10, 0x08, 0x00 +}; + static inline uint16_t ao_interleave_index(uint16_t i) { - uint8_t l = i & 0x1e; - uint16_t h = i & ~0x1e; - uint8_t o = 0x1e ^ (((l >> 2) & 0x6) | ((l << 2) & 0x18)); - return h | o; + return (i & ~0x1e) | ao_interleave_order[(i & 0x1e) >> 1]; } -struct ao_soft_sym { - uint8_t a, b; -}; - #define NUM_STATE 8 -#define NUM_HIST 8 -#define MOD_HIST(b) ((b) & 7) - -#define V_0 0xc0 -#define V_1 0x40 - -static const struct ao_soft_sym ao_fec_decode_table[NUM_STATE][2] = { -/* next 0 1 state */ - { { V_0, V_0 }, { V_1, V_1 } } , /* 000 */ - { { V_0, V_1 }, { V_1, V_0 } }, /* 001 */ - { { V_1, V_1 }, { V_0, V_0 } }, /* 010 */ - { { V_1, V_0 }, { V_0, V_1 } }, /* 011 */ - { { V_1, V_1 }, { V_0, V_0 } }, /* 100 */ - { { V_1, V_0 }, { V_0, V_1 } }, /* 101 */ - { { V_0, V_0 }, { V_1, V_1 } }, /* 110 */ - { { V_0, V_1 }, { V_1, V_0 } } /* 111 */ +#define NUM_HIST 24 + +typedef uint32_t bits_t; + +#define V_0 0xff +#define V_1 0x00 + +/* + * These are just the 'zero' states; the 'one' states mirror them + */ +static const uint8_t ao_fec_decode_table[NUM_STATE*2] = { + V_0, V_0, /* 000 */ + V_0, V_1, /* 001 */ + V_1, V_1, /* 010 */ + V_1, V_0, /* 011 */ + V_1, V_1, /* 100 */ + V_1, V_0, /* 101 */ + V_0, V_0, /* 110 */ + V_0, V_1 /* 111 */ }; static inline uint8_t @@ -76,14 +89,6 @@ ao_next_state(uint8_t state, uint8_t bit) return ((state << 1) | bit) & 0x7; } -static inline uint16_t ao_abs(int16_t x) { return x < 0 ? -x : x; } - -static inline uint16_t -ao_cost(struct ao_soft_sym a, struct ao_soft_sym b) -{ - return ao_abs(a.a - b.a) + ao_abs(a.b - b.b); -} - /* * 'in' is 8-bits per symbol soft decision data * 'len' is input byte length. 'out' must be @@ -91,25 +96,29 @@ ao_cost(struct ao_soft_sym a, struct ao_soft_sym b) */ uint8_t -ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t (*callback)()) +ao_fec_decode(const uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t (*callback)(void)) { - static uint16_t cost[2][NUM_STATE]; /* path cost */ - static uint16_t bits[2][NUM_STATE]; /* save bits to quickly output them */ + static uint32_t cost[2][NUM_STATE]; /* path cost */ + static bits_t bits[2][NUM_STATE]; /* save bits to quickly output them */ + uint16_t i; /* input byte index */ uint16_t b; /* encoded symbol index (bytes/2) */ uint16_t o; /* output bit index */ uint8_t p; /* previous cost/bits index */ uint8_t n; /* next cost/bits index */ uint8_t state; /* state index */ - uint8_t bit; /* original encoded bit index */ const uint8_t *whiten = ao_fec_whiten_table; uint16_t interleave; /* input byte array index */ - struct ao_soft_sym s; /* input symbol pair */ + uint8_t s0, s1; uint16_t avail; + uint16_t crc = AO_FEC_CRC_INIT; +#if AO_PROFILE + uint32_t start_tick; +#endif p = 0; for (state = 0; state < NUM_STATE; state++) { - cost[0][state] = 0xffff; + cost[0][state] = 0x7fffffff; bits[0][state] = 0; } cost[0][0] = 0; @@ -119,6 +128,14 @@ ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t else avail = len; +#if AO_PROFILE + if (!avail) { + avail = callback(); + if (!avail) + return 0; + } + start_tick = ao_profile_tick(); +#endif o = 0; for (i = 0; i < len; i += 2) { b = i/2; @@ -127,46 +144,105 @@ ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t if (!avail) { avail = callback(); if (!avail) - break; + return 0; } /* Fetch one pair of input bytes, de-interleaving * the input. */ interleave = ao_interleave_index(i); - s.a = in[interleave]; - s.b = in[interleave+1]; + s0 = in[interleave]; + s1 = in[interleave+1]; - /* Reset next costs to 'impossibly high' values so that - * the first path through this state is cheaper than this - */ - for (state = 0; state < NUM_STATE; state++) - cost[n][state] = 0xffff; + avail -= 2; /* Compute path costs and accumulate output bit path - * for each state and encoded bit value + * for each state and encoded bit value. Unrolling + * this loop is worth about > 30% performance boost. + * Decoding 76-byte remote access packets is reduced + * from 14.700ms to 9.3ms. Redoing the loop to + * directly compare the two pasts for each future state + * reduces this down to 5.7ms */ - for (state = 0; state < NUM_STATE; state++) { - for (bit = 0; bit < 2; bit++) { - int bit_cost = cost[p][state] + ao_cost(s, ao_fec_decode_table[state][bit]); - uint8_t bit_state = ao_next_state(state, bit); - - /* Only track the minimal cost to reach - * this state; the best path can never - * go through the higher cost paths as - * total path cost is cumulative - */ - if (bit_cost < cost[n][bit_state]) { - cost[n][bit_state] = bit_cost; - bits[n][bit_state] = (bits[p][state] << 1) | (state & 1); - } - } + + /* Ok, of course this is tricky, it's optimized. + * + * First, it's important to realize that we have 8 + * states representing the combinations of the three + * most recent bits from the encoder. Flipping any + * of these three bits flips both output bits. + * + * 'state<<1' represents the target state for a new + * bit value of 0. '(state<<1)+1' represents the + * target state for a new bit value of 1. + * + * 'state' is the previous state with an oldest bit + * value of 0. 'state + 4' is the previous state with + * an oldest bit value of 1. These two states will + * either lead to 'state<<1' or '(state<<1)+1', depending + * on whether the next encoded bit was a zero or a one. + * + * m0 and m1 are the cost of coming to 'state<<1' from + * one of the two possible previous states 'state' and + * 'state + 4'. + * + * Because we know the expected values of each + * received bit are flipped between these two previous + * states: + * + * bitcost(state+4) = 510 - bitcost(state) + * + * With those two total costs in hand, we then pick + * the lower as the cost of the 'state<<1', and compute + * the path of bits leading to that state. + * + * Then, do the same for '(state<<1) + 1'. This time, + * instead of computing the m0 and m1 values from + * scratch, because the only difference is that we're + * expecting a one bit instead of a zero bit, we just + * flip the bitcost values around to match the + * expected transmitted bits with some tricky + * arithmetic which is equivalent to: + * + * m0 = cost[p][state] + (510 - bitcost); + * m1 = cost[p][state+4] + bitcost + * + * Then, the lowest cost and bit trace of the new state + * is saved. + */ + +#define DO_STATE(state) { \ + uint32_t bitcost; \ + \ + uint32_t m0; \ + uint32_t m1; \ + uint32_t bit; \ + \ + bitcost = ((uint32_t) (s0 ^ ao_fec_decode_table[(state<<1)]) + \ + (uint32_t) (s1 ^ ao_fec_decode_table[(state<<1)|1])); \ + \ + m0 = cost[p][state] + bitcost; \ + m1 = cost[p][state+4] + (510 - bitcost); \ + bit = m0 > m1; \ + cost[n][state<<1] = bit ? m1 : m0; \ + bits[n][state<<1] = (bits[p][state + (bit<<2)] << 1) | (state&1); \ + \ + m0 -= (bitcost+bitcost-510); \ + m1 += (bitcost+bitcost-510); \ + bit = m0 > m1; \ + cost[n][(state<<1)+1] = bit ? m1 : m0; \ + bits[n][(state<<1)+1] = (bits[p][state + (bit<<2)] << 1) | (state&1); \ } + DO_STATE(0); + DO_STATE(1); + DO_STATE(2); + DO_STATE(3); + #if 0 - printf ("bit %3d symbol %2x %2x:", i/2, s.a, s.b); + printf ("bit %3d symbol %2x %2x:", i/2, s0, s1); for (state = 0; state < NUM_STATE; state++) { - printf (" %5d(%04x)", cost[n][state], bits[n][state]); + printf (" %8u(%08x)", cost[n][state], bits[n][state]); } printf ("\n"); #endif @@ -186,8 +262,9 @@ ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t * it will be seven. */ int8_t dist = b - (o + 8); /* distance to last ready-for-writing bit */ - uint16_t min_cost; /* lowest cost */ + uint32_t min_cost; /* lowest cost */ uint8_t min_state; /* lowest cost state */ + uint8_t byte; /* Find the best fit at the current point * of the decode. @@ -215,12 +292,27 @@ ao_fec_decode(uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t printf ("\tbit %3d min_cost %5d old bit %3d old_state %x bits %02x whiten %0x\n", i/2, min_cost, o + 8, min_state, (bits[p][min_state] >> dist) & 0xff, *whiten); #endif - if (out_len) { - *out++ = (bits[p][min_state] >> dist) ^ *whiten++; - --out_len; + byte = (bits[p][min_state] >> dist) ^ *whiten++; + *out++ = byte; + if (out_len > 2) + crc = ao_fec_crc_byte(byte, crc); + + if (!--out_len) { + if ((out[-2] == (uint8_t) (crc >> 8)) && + out[-1] == (uint8_t) crc) + out[-1] = AO_FEC_DECODE_CRC_OK; + else + out[-1] = 0; + out[-2] = 0; + goto done; } o += 8; } } - return len/16; +done: +#if AO_PROFILE + ao_fec_decode_start = start_tick; + ao_fec_decode_end = ao_profile_tick(); +#endif + return 1; }