+
+ /* Ok, this is a bit painful.
+ * We need this loop to be precisely timed, which
+ * means knowing exactly how many instructions will
+ * be executed for each bit. It's easy to do that by
+ * compiling the C code and looking at the output,
+ * but we need this code to work even if the compiler
+ * changes. So, just hand-code the whole thing
+ */
+
+ asm volatile (
+ " ldi %[b], 10\n" // loop value
+ "loop:\n"
+ " in %[v], %[port]\n" // read current value
+ " andi %[v], %[led_mask]\n" // mask to clear LED bit
+ " mov %[bit], %[w_lo]\n" // get current data byte
+ " andi %[bit], 0x01\n" // get current data bit
+#if AO_LED_SERIAL >= 1
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 2
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 3
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 4
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 5
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 6
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+#if AO_LED_SERIAL >= 7
+ " add %[bit],%[bit]\n" // shift by one
+#else
+ " nop\n"
+#endif
+ " or %[v], %[bit]\n" // add to register
+ " out %[port], %[v]\n" // write current value
+ " lsr %[w_hi]\n" // shift data
+ " ror %[w_lo]\n" // ...
+ " nop\n"
+ " nop\n"
+ " nop\n"
+ " nop\n"
+ " nop\n"
+
+ " nop\n"
+ " nop\n"
+ " nop\n"
+ " subi %[b], 1\n" // decrement bit count
+ " brne loop\n" // jump back to top
+ : [v] "=&r" (v),
+ [bit] "=&r" (bit),
+ [b] "=&r" (b),
+ [w_lo] "+r" (w_lo),
+ [w_hi] "+r" (w_hi)
+ : [port] "I" (_SFR_IO_ADDR(LED_PORT)),
+ [led_mask] "M" ((~(1 << AO_LED_SERIAL)) & 0xff)
+ );
+
+#if 0
+ /*
+ * Here's the equivalent C code to document
+ * what the above assembly code does
+ */