cost[n][state] = 0x7fffffff;
/* Compute path costs and accumulate output bit path
- * for each state and encoded bit value
+ * for each state and encoded bit value. Unrolling
+ * this loop is worth about > 30% performance boost.
+ * Decoding 76-byte remote access packets is reduced
+ * from 14.700ms to 9.3ms
*/
- for (state = 0; state < NUM_STATE; state++) {
- uint32_t bitcost = ((uint32_t) (s0 ^ ao_fec_decode_table[(state<<1)]) +
- (uint32_t) (s1 ^ ao_fec_decode_table[(state<<1)+1]));
- {
- uint32_t cost0 = cost[p][state] + bitcost;
- uint8_t state0 = ao_next_state(state, 0);
-
- if (cost0 < cost[n][state0]) {
- cost[n][state0] = cost0;
- bits[n][state0] = (bits[p][state] << 1) | (state & 1);
- }
- }
- {
- uint32_t cost1 = cost[p][state] + 510 - bitcost;
- uint8_t state1 = ao_next_state(state, 1);
-
- if (cost1 < cost[n][state1]) {
- cost[n][state1] = cost1;
- bits[n][state1] = (bits[p][state] << 1) | (state & 1);
- }
- }
+#define DO_STATE(state) { \
+ uint32_t bitcost = ((uint32_t) (s0 ^ ao_fec_decode_table[(state<<1)]) + \
+ (uint32_t) (s1 ^ ao_fec_decode_table[(state<<1)+1])); \
+ { \
+ uint32_t cost0 = cost[p][state] + bitcost; \
+ uint8_t state0 = ao_next_state(state, 0); \
+ \
+ if (cost0 < cost[n][state0]) { \
+ cost[n][state0] = cost0; \
+ bits[n][state0] = (bits[p][state] << 1) | (state & 1); \
+ } \
+ } \
+ { \
+ uint32_t cost1 = cost[p][state] + 510 - bitcost; \
+ uint8_t state1 = ao_next_state(state, 1); \
+ \
+ if (cost1 < cost[n][state1]) { \
+ cost[n][state1] = cost1; \
+ bits[n][state1] = (bits[p][state] << 1) | (state & 1); \
+ } \
+ } \
}
+ DO_STATE(0);
+ DO_STATE(1);
+ DO_STATE(2);
+ DO_STATE(3);
+ DO_STATE(4);
+ DO_STATE(5);
+ DO_STATE(6);
+ DO_STATE(7);
+
#if 0
printf ("bit %3d symbol %2x %2x:", i/2, s0, s1);
for (state = 0; state < NUM_STATE; state++) {
int8_t dist = b - (o + 8); /* distance to last ready-for-writing bit */
uint32_t min_cost; /* lowest cost */
uint8_t min_state; /* lowest cost state */
+ uint8_t byte;
/* Find the best fit at the current point
* of the decode.
printf ("\tbit %3d min_cost %5d old bit %3d old_state %x bits %02x whiten %0x\n",
i/2, min_cost, o + 8, min_state, (bits[p][min_state] >> dist) & 0xff, *whiten);
#endif
- if (out_len) {
- uint8_t byte = (bits[p][min_state] >> dist) ^ *whiten++;
+ byte = (bits[p][min_state] >> dist) ^ *whiten++;
+ *out++ = byte;
+ if (out_len > 2)
+ crc = ao_fec_crc_byte(byte, crc);
- if (out_len > 2) {
- crc = ao_fec_crc_byte(byte, crc);
- *out++ = byte;
- } else {
- *out++ = byte ^ (crc >> 8);
- crc <<= 8;
- }
- --out_len;
+ if (!--out_len) {
+ if ((out[-2] == (uint8_t) (crc >> 8)) &&
+ out[-1] == (uint8_t) crc)
+ out[-1] = AO_FEC_DECODE_CRC_OK;
+ else
+ out[-1] = 0;
+ out[-2] = 0;
+ goto done;
}
o += 8;
}
}
+done:
#if AO_PROFILE
ao_fec_decode_start = start_tick;
ao_fec_decode_end = ao_profile_tick();
#endif
- return len/16;
+ return 1;
}