altos: Replace C code attiny async output with inline asm
[fw/altos] / src / attiny / ao_async.c
index 3556f54c00d511932baabad43a3d358311233d0f..9f7fd6d761d8072c7ac665b880d347319aaa47f7 100644 (file)
@@ -40,11 +40,75 @@ ao_async_byte(uint8_t byte)
 {
        uint8_t         b;
        uint16_t        w;
+       uint8_t         v;
+       uint8_t         bit;
+       uint8_t         w_hi, w_lo;
 
        /*    start           data           stop */
        w = (0x000 << 0) | (byte << 1) | (0x001 << 9);
 
+       w_hi = w >> 8;
+       w_lo = w;
+
        ao_arch_block_interrupts();
+
+#if AO_LED_SERIAL != 4
+#error "expect AO_LED_SERIAL to be 4"
+#endif
+
+       /* Ok, this is a bit painful.
+        * We need this loop to be precisely timed, which
+        * means knowing exactly how many instructions will
+        * be executed for each bit. It's easy to do that by
+        * compiling the C code and looking at the output,
+        * but we need this code to work even if the compiler
+        * changes. So, just hand-code the whole thing
+        */
+
+       asm volatile (
+               "       ldi     %[b], 10\n"             // loop value
+               "loop:\n"
+               "       in      %[v], %[port]\n"        // read current value
+               "       andi    %[v], %[led_mask]\n"    // mask to clear LED bit
+               "       mov     %[bit], %[w_lo]\n"      // get current data byte
+               "       andi    %[bit], 0x01\n"         // get current data bit
+               "       swap    %[bit]\n"               // rotate by 4 (AO_LED_SERIAL is 4)
+               "       andi    %[bit], 0xf0\n"         // mask off other 4 bits
+               "       or      %[v], %[bit]\n"         // add to register
+               "       out     %[port], %[v]\n"        // write current value
+               "       lsr     %[w_hi]\n"              // shift data
+               "       ror     %[w_lo]\n"              //  ...
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+
+               "       nop\n"
+               "       nop\n"
+               "       nop\n"
+               "       subi    %[b], 1\n"              // decrement bit count
+               "       brne    loop\n"                 // jump back to top
+               : [v]        "=&r" (v),
+                 [bit]      "=&r" (bit),
+                 [b]        "=&r" (b),
+                 [w_lo]     "+r" (w_lo),
+                 [w_hi]     "+r" (w_hi)
+               : [port]     "I"  (_SFR_IO_ADDR(LED_PORT)),
+                 [led_mask] "M"  ((~(1 << AO_LED_SERIAL)) & 0xff)
+               );
+
+#if 0
+       /*
+        * Here's the equivalent C code to document
+        * what the above assembly code does
+        */
        for (b = 0; b < 10; b++) {
                uint8_t v = LED_PORT & ~(1 << AO_LED_SERIAL);
                v |= (w & 1) << AO_LED_SERIAL;
@@ -54,6 +118,7 @@ ao_async_byte(uint8_t byte)
                /* Carefully timed to hit around 9600 baud */
                asm volatile ("nop");
                asm volatile ("nop");
+               asm volatile ("nop");
 
                asm volatile ("nop");
                asm volatile ("nop");
@@ -67,5 +132,6 @@ ao_async_byte(uint8_t byte)
                asm volatile ("nop");
                asm volatile ("nop");
        }
+#endif
        ao_arch_release_interrupts();
 }