altos: Use all 16 bits of setup packet len when limiting reply len
[fw/altos] / src / stm / ao_usb_stm.c
index 586f1e3326df53d20d28302d1c7b3b08a9c09a8d..b00390ec0318b14901a68156ad29597df5b08f2f 100644 (file)
 
 #define USB_DEBUG      0
 #define USB_DEBUG_DATA 0
+#define USB_ECHO       0
+
+#ifndef USE_USB_STDIO
+#define USE_USB_STDIO  1
+#endif
+
+#if USE_USB_STDIO
+#define AO_USB_OUT_SLEEP_ADDR  (&ao_stdin_ready)
+#else
+#define AO_USB_OUT_SLEEP_ADDR  (&ao_usb_out_avail)
+#endif
 
 #if USB_DEBUG
 #define debug(format, args...) printf(format, ## args);
@@ -34,8 +45,6 @@
 #define debug_data(format, args...)
 #endif
 
-struct ao_task ao_usb_task;
-
 struct ao_usb_setup {
        uint8_t         dir_type_recip;
        uint8_t         request;
@@ -45,34 +54,68 @@ struct ao_usb_setup {
 } ao_usb_setup;
 
 static uint8_t         ao_usb_ep0_state;
-static const uint8_t * ao_usb_ep0_in_data;
-static uint8_t         ao_usb_ep0_in_len;
-static uint8_t ao_usb_ep0_in_pending;
+
+/* Pending EP0 IN data */
+static const uint8_t   *ao_usb_ep0_in_data;    /* Remaining data */
+static uint8_t                 ao_usb_ep0_in_len;      /* Remaining amount */
+
+/* Temp buffer for smaller EP0 in data */
 static uint8_t ao_usb_ep0_in_buf[2];
-static uint8_t         ao_usb_ep0_out_len;
+
+/* Pending EP0 OUT data */
 static uint8_t *ao_usb_ep0_out_data;
+static uint8_t         ao_usb_ep0_out_len;
+
+/*
+ * Objects allocated in special USB memory
+ */
+
+/* Buffer description tables */
 static union stm_usb_bdt       *ao_usb_bdt;
+/* USB address of end of allocated storage */
 static uint16_t        ao_usb_sram_addr;
+
+/* Pointer to ep0 tx/rx buffers in USB memory */
+static uint32_t        *ao_usb_ep0_tx_buffer;
+static uint32_t        *ao_usb_ep0_rx_buffer;
+
+/* Pointer to bulk data tx/rx buffers in USB memory */
+static uint32_t        *ao_usb_in_tx_buffer;
+static uint32_t        *ao_usb_out_rx_buffer;
+
+/* System ram shadow of USB buffer; writing individual bytes is
+ * too much of a pain (sigh) */
 static uint8_t ao_usb_tx_buffer[AO_USB_IN_SIZE];
 static uint8_t ao_usb_tx_count;
+
 static uint8_t ao_usb_rx_buffer[AO_USB_OUT_SIZE];
 static uint8_t ao_usb_rx_count, ao_usb_rx_pos;
 
-#define AO_USB_INT_EPR 1
-#define AO_USB_OUT_EPR 2
-#define AO_USB_IN_EPR  3
-
 /*
- * Pointers into the USB packet buffer area
+ * End point register indices
  */
-static uint32_t        *ao_usb_ep0_tx_buffer;
-static uint32_t        *ao_usb_ep0_rx_buffer;
 
-static uint32_t        *ao_usb_in_tx_buffer;
-static uint32_t        *ao_usb_out_rx_buffer;
+#define AO_USB_CONTROL_EPR     0
+#define AO_USB_INT_EPR         1
+#define AO_USB_OUT_EPR         2
+#define AO_USB_IN_EPR          3
 
+/* Marks when we don't need to send an IN packet.
+ * This happens only when the last IN packet is not full,
+ * otherwise the host will expect to keep seeing packets.
+ * Send a zero-length packet as required
+ */
 static uint8_t ao_usb_in_flushed;
+
+/* Marks when we have delivered an IN packet to the hardware
+ * and it has not been received yet. ao_sleep on this address
+ * to wait for it to be delivered.
+ */
 static uint8_t ao_usb_in_pending;
+
+/* Marks when an OUT packet has been received by the hardware
+ * but not pulled to the shadow buffer.
+ */
 static uint8_t ao_usb_out_avail;
 static uint8_t ao_usb_running;
 static uint8_t ao_usb_configuration;
@@ -139,10 +182,134 @@ ao_usb_set_address(uint8_t address)
        ao_usb_address_pending = 0;
 }
 
+/*
+ * Write these values to preserve register contents under HW changes
+ */
+
+#define STM_USB_EPR_INVARIANT  ((1 << STM_USB_EPR_CTR_RX) |            \
+                                (STM_USB_EPR_DTOG_RX_WRITE_INVARIANT << STM_USB_EPR_DTOG_RX) | \
+                                (STM_USB_EPR_STAT_RX_WRITE_INVARIANT << STM_USB_EPR_STAT_RX) | \
+                                (1 << STM_USB_EPR_CTR_TX) |            \
+                                (STM_USB_EPR_DTOG_TX_WRITE_INVARIANT << STM_USB_EPR_DTOG_TX) | \
+                                (STM_USB_EPR_STAT_TX_WRITE_INVARIANT << STM_USB_EPR_STAT_TX))
+
+#define STM_USB_EPR_INVARIANT_MASK     ((1 << STM_USB_EPR_CTR_RX) |    \
+                                        (STM_USB_EPR_DTOG_RX_MASK << STM_USB_EPR_DTOG_RX) | \
+                                        (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX) | \
+                                        (1 << STM_USB_EPR_CTR_TX) |    \
+                                        (STM_USB_EPR_DTOG_TX_MASK << STM_USB_EPR_DTOG_TX) | \
+                                        (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX))
+
+/*
+ * These bits are purely under sw control, so preserve them in the
+ * register by re-writing what was read
+ */
+#define STM_USB_EPR_PRESERVE_MASK      ((STM_USB_EPR_EP_TYPE_MASK << STM_USB_EPR_EP_TYPE) | \
+                                        (1 << STM_USB_EPR_EP_KIND) |   \
+                                        (STM_USB_EPR_EA_MASK << STM_USB_EPR_EA))
+
+#define TX_DBG 0
+#define RX_DBG 0
+
+#if TX_DBG
+#define _tx_dbg0(msg) _dbg(__LINE__,msg,0)
+#define _tx_dbg1(msg,value) _dbg(__LINE__,msg,value)
+#else
+#define _tx_dbg0(msg)
+#define _tx_dbg1(msg,value)
+#endif
+
+#if RX_DBG
+#define _rx_dbg0(msg) _dbg(__LINE__,msg,0)
+#define _rx_dbg1(msg,value) _dbg(__LINE__,msg,value)
+#else
+#define _rx_dbg0(msg)
+#define _rx_dbg1(msg,value)
+#endif
+
+#if TX_DBG || RX_DBG
+static void _dbg(int line, char *msg, uint32_t value);
+#endif
+
+/*
+ * Set the state of the specified endpoint register to a new
+ * value. This is tricky because the bits toggle where the new
+ * value is one, and we need to write invariant values in other
+ * spots of the register. This hardware is strange...
+ */
+static void
+_ao_usb_set_stat_tx(int ep, uint32_t stat_tx)
+{
+       uint32_t        epr_write, epr_old;
+
+       _tx_dbg1("set_stat_tx top", stat_tx);
+       epr_old = epr_write = stm_usb.epr[ep];
+       epr_write &= STM_USB_EPR_PRESERVE_MASK;
+       epr_write |= STM_USB_EPR_INVARIANT;
+       epr_write |= set_toggle(epr_old,
+                             STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX,
+                             stat_tx << STM_USB_EPR_STAT_TX);
+       stm_usb.epr[ep] = epr_write;
+       _tx_dbg1("set_stat_tx bottom", epr_write);
+}
+
+static void
+ao_usb_set_stat_tx(int ep, uint32_t stat_tx)
+{
+       ao_arch_block_interrupts();
+       _ao_usb_set_stat_tx(ep, stat_tx);
+       ao_arch_release_interrupts();
+}
+
+static void
+_ao_usb_set_stat_rx(int ep, uint32_t stat_rx) {
+       uint32_t        epr_write, epr_old;
+
+       epr_write = epr_old = stm_usb.epr[ep];
+       epr_write &= STM_USB_EPR_PRESERVE_MASK;
+       epr_write |= STM_USB_EPR_INVARIANT;
+       epr_write |= set_toggle(epr_old,
+                             STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX,
+                             stat_rx << STM_USB_EPR_STAT_RX);
+       stm_usb.epr[ep] = epr_write;
+}
+
+static void
+ao_usb_set_stat_rx(int ep, uint32_t stat_rx) {
+       ao_arch_block_interrupts();
+       _ao_usb_set_stat_rx(ep, stat_rx);
+       ao_arch_release_interrupts();
+}
+
 /*
  * Set just endpoint 0, for use during startup
  */
 
+static void
+ao_usb_init_ep(uint8_t ep, uint32_t addr, uint32_t type, uint32_t stat_rx, uint32_t stat_tx)
+{
+       uint32_t                epr;
+       ao_arch_block_interrupts();
+       epr = stm_usb.epr[ep];
+       epr = ((0 << STM_USB_EPR_CTR_RX) |
+              (epr & (1 << STM_USB_EPR_DTOG_RX)) |
+              set_toggle(epr,
+                         (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
+                         (stat_rx << STM_USB_EPR_STAT_RX)) |
+              (type << STM_USB_EPR_EP_TYPE) |
+              (0 << STM_USB_EPR_EP_KIND) |
+              (0 << STM_USB_EPR_CTR_TX) |
+              (epr & (1 << STM_USB_EPR_DTOG_TX)) |
+              set_toggle(epr,
+                         (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
+                         (stat_tx << STM_USB_EPR_STAT_TX)) |
+              (addr << STM_USB_EPR_EA));
+       stm_usb.epr[ep] = epr;
+       ao_arch_release_interrupts();
+       debug ("writing epr[%d] 0x%08x wrote 0x%08x\n",
+              ep, epr, stm_usb.epr[ep]);
+}
+
 static void
 ao_usb_set_ep0(void)
 {
@@ -170,44 +337,17 @@ ao_usb_set_ep0(void)
        ao_usb_ep0_rx_buffer = ao_usb_packet_buffer_addr(ao_usb_sram_addr);
        ao_usb_sram_addr += AO_USB_CONTROL_SIZE;
 
-       cli();
-       epr = stm_usb.epr[0];
-       epr = ((STM_USB_EPR_CTR_RX_WRITE_INVARIANT << STM_USB_EPR_CTR_RX) |
-              (STM_USB_EPR_DTOG_RX_WRITE_INVARIANT << STM_USB_EPR_DTOG_RX) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
-                         (STM_USB_EPR_STAT_RX_VALID << STM_USB_EPR_STAT_RX)) |
-              (STM_USB_EPR_EP_TYPE_CONTROL << STM_USB_EPR_EP_TYPE) |
-              (0 << STM_USB_EPR_EP_KIND) |
-              (STM_USB_CTR_TX_WRITE_INVARIANT << STM_USB_EPR_CTR_TX) |
-              (STM_USB_EPR_DTOG_TX_WRITE_INVARIANT << STM_USB_EPR_DTOG_TX) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
-                         (STM_USB_EPR_STAT_TX_NAK << STM_USB_EPR_STAT_TX)) |
-              (AO_USB_CONTROL_EP << STM_USB_EPR_EA));
-       stm_usb.epr[0] = epr;
-       sei();
-       debug ("epr 0 now %x\n", stm_usb.epr[0]);
+       ao_usb_init_ep(AO_USB_CONTROL_EPR, AO_USB_CONTROL_EP,
+                      STM_USB_EPR_EP_TYPE_CONTROL,
+                      STM_USB_EPR_STAT_RX_VALID,
+                      STM_USB_EPR_STAT_TX_NAK);
 
        /* Clear all of the other endpoints */
        for (e = 1; e < 8; e++) {
-               cli();
-               epr = stm_usb.epr[e];
-               epr = ((STM_USB_EPR_CTR_RX_WRITE_INVARIANT << STM_USB_EPR_CTR_RX) |
-                      (STM_USB_EPR_DTOG_RX_WRITE_INVARIANT << STM_USB_EPR_DTOG_RX) |
-                      set_toggle(epr,
-                                 (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
-                                 (STM_USB_EPR_STAT_RX_DISABLED << STM_USB_EPR_STAT_RX)) |
-                      (STM_USB_EPR_EP_TYPE_CONTROL << STM_USB_EPR_EP_TYPE) |
-                      (0 << STM_USB_EPR_EP_KIND) |
-                      (STM_USB_CTR_TX_WRITE_INVARIANT << STM_USB_EPR_CTR_TX) |
-                      (STM_USB_EPR_DTOG_TX_WRITE_INVARIANT << STM_USB_EPR_DTOG_TX) |
-                      set_toggle(epr,
-                                 (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
-                                 (STM_USB_EPR_STAT_TX_DISABLED << STM_USB_EPR_STAT_TX)) |
-                      (0 << STM_USB_EPR_EA));
-               stm_usb.epr[e] = epr;
-               sei();
+               ao_usb_init_ep(e, 0,
+                              STM_USB_EPR_EP_TYPE_CONTROL,
+                              STM_USB_EPR_STAT_RX_DISABLED,
+                              STM_USB_EPR_STAT_TX_DISABLED);
        }
 
        ao_usb_set_address(0);
@@ -226,25 +366,11 @@ ao_usb_set_configuration(void)
        ao_usb_in_tx_buffer = ao_usb_packet_buffer_addr(ao_usb_sram_addr);
        ao_usb_sram_addr += AO_USB_INT_SIZE;
 
-       cli();
-       epr = stm_usb.epr[AO_USB_INT_EPR];
-       epr = ((0 << STM_USB_EPR_CTR_RX) |
-              (epr & (1 << STM_USB_EPR_DTOG_RX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
-                         (STM_USB_EPR_STAT_RX_DISABLED << STM_USB_EPR_STAT_RX)) |
-              (STM_USB_EPR_EP_TYPE_CONTROL << STM_USB_EPR_EP_TYPE) |
-              (0 << STM_USB_EPR_EP_KIND) |
-              (0 << STM_USB_EPR_CTR_TX) |
-              (epr & (1 << STM_USB_EPR_DTOG_TX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
-                         (STM_USB_EPR_STAT_TX_NAK << STM_USB_EPR_STAT_TX)) |
-              (AO_USB_INT_EP << STM_USB_EPR_EA));
-       stm_usb.epr[AO_USB_INT_EPR] = epr;
-       sei();
-       debug ("writing epr[%d] 0x%08x wrote 0x%08x\n",
-              AO_USB_INT_EPR, epr, stm_usb.epr[AO_USB_INT_EPR]);
+       ao_usb_init_ep(AO_USB_INT_EPR,
+                      AO_USB_INT_EP,
+                      STM_USB_EPR_EP_TYPE_INTERRUPT,
+                      STM_USB_EPR_STAT_RX_DISABLED,
+                      STM_USB_EPR_STAT_TX_NAK);
 
        /* Set up the OUT end point */
        ao_usb_bdt[AO_USB_OUT_EPR].single.addr_rx = ao_usb_sram_addr;
@@ -253,193 +379,33 @@ ao_usb_set_configuration(void)
        ao_usb_out_rx_buffer = ao_usb_packet_buffer_addr(ao_usb_sram_addr);
        ao_usb_sram_addr += AO_USB_OUT_SIZE;
 
-       cli();
-       epr = stm_usb.epr[AO_USB_OUT_EPR];
-       epr = ((0 << STM_USB_EPR_CTR_RX) |
-              (epr & (1 <<  STM_USB_EPR_DTOG_RX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
-                         (STM_USB_EPR_STAT_RX_VALID << STM_USB_EPR_STAT_RX)) |
-              (STM_USB_EPR_EP_TYPE_CONTROL << STM_USB_EPR_EP_TYPE) |
-              (0 << STM_USB_EPR_EP_KIND) |
-              (0 << STM_USB_EPR_CTR_TX) |
-              (epr & (1 << STM_USB_EPR_DTOG_TX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
-                         (STM_USB_EPR_STAT_TX_DISABLED << STM_USB_EPR_STAT_TX)) |
-              (AO_USB_OUT_EP << STM_USB_EPR_EA));
-       stm_usb.epr[AO_USB_OUT_EPR] = epr;
-       sei();
-       debug ("writing epr[%d] 0x%08x wrote 0x%08x\n",
-              AO_USB_OUT_EPR, epr, stm_usb.epr[AO_USB_OUT_EPR]);
-       
+       ao_usb_init_ep(AO_USB_OUT_EPR,
+                      AO_USB_OUT_EP,
+                      STM_USB_EPR_EP_TYPE_BULK,
+                      STM_USB_EPR_STAT_RX_VALID,
+                      STM_USB_EPR_STAT_TX_DISABLED);
+
        /* Set up the IN end point */
        ao_usb_bdt[AO_USB_IN_EPR].single.addr_tx = ao_usb_sram_addr;
        ao_usb_bdt[AO_USB_IN_EPR].single.count_tx = 0;
        ao_usb_in_tx_buffer = ao_usb_packet_buffer_addr(ao_usb_sram_addr);
        ao_usb_sram_addr += AO_USB_IN_SIZE;
 
-       cli();
-       epr = stm_usb.epr[AO_USB_IN_EPR];
-       epr = ((0 << STM_USB_EPR_CTR_RX) |
-              (epr & (1 << STM_USB_EPR_DTOG_RX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX),
-                         (STM_USB_EPR_STAT_RX_DISABLED << STM_USB_EPR_STAT_RX)) |
-              (STM_USB_EPR_EP_TYPE_CONTROL << STM_USB_EPR_EP_TYPE) |
-              (0 << STM_USB_EPR_EP_KIND) |
-              (0 << STM_USB_EPR_CTR_TX) |
-              (epr & (1 << STM_USB_EPR_DTOG_TX)) |
-              set_toggle(epr,
-                         (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX),
-                         (STM_USB_EPR_STAT_TX_NAK << STM_USB_EPR_STAT_TX)) |
-              (AO_USB_IN_EP << STM_USB_EPR_EA));
-       stm_usb.epr[AO_USB_IN_EPR] = epr;
-       sei();
-       debug ("writing epr[%d] 0x%08x wrote 0x%08x\n",
-              AO_USB_IN_EPR, epr, stm_usb.epr[AO_USB_IN_EPR]);
+       ao_usb_init_ep(AO_USB_IN_EPR,
+                      AO_USB_IN_EP,
+                      STM_USB_EPR_EP_TYPE_BULK,
+                      STM_USB_EPR_STAT_RX_DISABLED,
+                      STM_USB_EPR_STAT_TX_NAK);
+
        ao_usb_running = 1;
 }
 
 static uint16_t        control_count;
+static uint16_t int_count;
 static uint16_t        in_count;
 static uint16_t        out_count;
 static uint16_t        reset_count;
 
-/*
- * Write these values to preserve register contents under HW changes
- */
-
-#define STM_USB_EPR_INVARIANT  ((1 << STM_USB_EPR_CTR_RX) |            \
-                                (STM_USB_EPR_DTOG_RX_WRITE_INVARIANT << STM_USB_EPR_DTOG_RX) | \
-                                (STM_USB_EPR_STAT_RX_WRITE_INVARIANT << STM_USB_EPR_STAT_RX) | \
-                                (1 << STM_USB_EPR_CTR_TX) |            \
-                                (STM_USB_EPR_DTOG_TX_WRITE_INVARIANT << STM_USB_EPR_DTOG_TX) | \
-                                (STM_USB_EPR_STAT_TX_WRITE_INVARIANT << STM_USB_EPR_STAT_TX))
-
-#define STM_USB_EPR_INVARIANT_MASK     ((1 << STM_USB_EPR_CTR_RX) |    \
-                                        (STM_USB_EPR_DTOG_RX_MASK << STM_USB_EPR_DTOG_RX) | \
-                                        (STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX) | \
-                                        (1 << STM_USB_EPR_CTR_TX) |    \
-                                        (STM_USB_EPR_DTOG_TX_MASK << STM_USB_EPR_DTOG_TX) | \
-                                        (STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX))
-
-/*
- * These bits are purely under sw control, so preserve them in the
- * register by re-writing what was read
- */
-#define STM_USB_EPR_PRESERVE_MASK      ((STM_USB_EPR_EP_TYPE_MASK << STM_USB_EPR_EP_TYPE) | \
-                                        (1 << STM_USB_EPR_EP_KIND) |   \
-                                        (STM_USB_EPR_EA_MASK << STM_USB_EPR_EA))
-
-void
-stm_usb_lp_isr(void)
-{
-       uint32_t        istr = stm_usb.istr;
-
-       if (istr & (1 << STM_USB_ISTR_CTR)) {
-               uint8_t         ep = istr & STM_USB_ISTR_EP_ID_MASK;
-               uint32_t        epr, epr_write;
-
-               /* Preserve the SW write bits, don't mess with most HW writable bits,
-                * clear the CTR_RX and CTR_TX bits
-                */
-               epr = stm_usb.epr[ep];
-               epr_write = epr;
-               epr_write &= STM_USB_EPR_PRESERVE_MASK;
-               epr_write |= STM_USB_EPR_INVARIANT;
-               epr_write &= ~(1 << STM_USB_EPR_CTR_RX);
-               epr_write &= ~(1 << STM_USB_EPR_CTR_TX);
-               stm_usb.epr[ep] = epr_write;
-
-               switch (ep) {
-               case 0:
-                       ++control_count;
-                       if (ao_usb_epr_ctr_rx(epr)) {
-                               if (ao_usb_epr_setup(epr))
-                                       ao_usb_ep0_receive |= AO_USB_EP0_GOT_SETUP;
-                               else
-                                       ao_usb_ep0_receive |= AO_USB_EP0_GOT_RX_DATA;
-                       }
-                       if (ao_usb_epr_ctr_tx(epr))
-                               ao_usb_ep0_receive |= AO_USB_EP0_GOT_TX_ACK;
-                       ao_wakeup(&ao_usb_ep0_receive);
-                       break;
-               case AO_USB_OUT_EPR:
-                       ++out_count;
-                       if (ao_usb_epr_ctr_rx(epr)) {
-                               ao_usb_out_avail = 1;
-                               ao_wakeup(&ao_stdin_ready);
-                       }
-                       break;
-               case AO_USB_IN_EPR:
-                       ++in_count;
-                       if (ao_usb_epr_ctr_tx(epr)) {
-                               ao_usb_in_pending = 0;
-                               ao_wakeup(&ao_usb_in_pending);
-                       }
-                       break;
-               }
-               return;
-       }
-
-       if (istr & (1 << STM_USB_ISTR_RESET)) {
-               ++reset_count;
-               stm_usb.istr &= ~(1 << STM_USB_ISTR_RESET);
-               ao_usb_ep0_receive |= AO_USB_EP0_GOT_RESET;
-               ao_wakeup(&ao_usb_ep0_receive);
-       }
-}
-
-void
-stm_usb_hp_isr(void)
-{
-       stm_usb_lp_isr();
-}
-
-void
-stm_usb_fs_wkup(void)
-{
-       /* USB wakeup, just clear the bit for now */
-       stm_usb.istr &= ~(1 << STM_USB_ISTR_WKUP);
-}
-
-static struct ao_usb_line_coding ao_usb_line_coding = {115200, 0, 0, 8};
-
-/* Walk through the list of descriptors and find a match
- */
-static void
-ao_usb_get_descriptor(uint16_t value)
-{
-       const uint8_t           *descriptor;
-       uint8_t         type = value >> 8;
-       uint8_t         index = value;
-
-       descriptor = ao_usb_descriptors;
-       while (descriptor[0] != 0) {
-               if (descriptor[1] == type && index-- == 0) {
-                       if (type == AO_USB_DESC_CONFIGURATION)
-                               ao_usb_ep0_in_len = descriptor[2];
-                       else
-                               ao_usb_ep0_in_len = descriptor[0];
-                       ao_usb_ep0_in_data = descriptor;
-                       break;
-               }
-               descriptor += descriptor[0];
-       }
-}
-
-static void
-ao_usb_ep0_set_in_pending(uint8_t in_pending)
-{
-       ao_usb_ep0_in_pending = in_pending;
-
-#if 0
-       if (in_pending)
-               ueienx_0 = ((1 << RXSTPE) | (1 << RXOUTE) | (1 << TXINE));      /* Enable IN interrupt */
-#endif
-}
-
 /* The USB memory holds 16 bit values on 32 bit boundaries
  * and must be accessed only in 32 bit units. Sigh.
  */
@@ -525,28 +491,6 @@ ao_usb_read(uint8_t *dst, uint32_t *base, uint16_t offset, uint16_t bytes)
        }
 }
 
-static inline void
-ao_usb_set_stat_tx(int ep, uint32_t stat_tx) {
-       uint32_t        epr_write, epr_old, epr_new, epr_want;
-
-       cli();
-       epr_write = epr_old = stm_usb.epr[ep];
-       epr_write &= STM_USB_EPR_PRESERVE_MASK;
-       epr_write |= STM_USB_EPR_INVARIANT;
-       epr_write |= set_toggle(epr_old,
-                             STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX,
-                             stat_tx << STM_USB_EPR_STAT_TX);
-       stm_usb.epr[ep] = epr_write;
-       epr_new = stm_usb.epr[ep];
-       sei();
-       epr_want = (epr_old & ~(STM_USB_EPR_STAT_TX_MASK << STM_USB_EPR_STAT_TX)) |
-               (stat_tx << STM_USB_EPR_STAT_TX);
-       if (epr_new != epr_want) {
-               debug ("**** set_stat_tx to %x. old %08x want %08x write %08x new %08x\n",
-                      stat_tx, epr_old, epr_want, epr_write, epr_new);
-       }
-}
-
 /* Send an IN data packet */
 static void
 ao_usb_ep0_flush(void)
@@ -556,52 +500,27 @@ ao_usb_ep0_flush(void)
        /* Check to see if the endpoint is still busy */
        if (ao_usb_epr_stat_tx(stm_usb.epr[0]) == STM_USB_EPR_STAT_TX_VALID) {
                debug("EP0 not accepting IN data\n");
-               ao_usb_ep0_set_in_pending(1);
-       } else {
-               this_len = ao_usb_ep0_in_len;
-               if (this_len > AO_USB_CONTROL_SIZE)
-                       this_len = AO_USB_CONTROL_SIZE;
-
-               ao_usb_ep0_in_len -= this_len;
-
-               /* Set IN interrupt enable */
-               if (ao_usb_ep0_in_len == 0 && this_len != AO_USB_CONTROL_SIZE)
-                       ao_usb_ep0_set_in_pending(0);
-               else
-                       ao_usb_ep0_set_in_pending(1);
-
-               debug_data ("Flush EP0 len %d:", this_len);
-               ao_usb_write(ao_usb_ep0_in_data, ao_usb_ep0_tx_buffer, 0, this_len);
-               debug_data ("\n");
-               ao_usb_ep0_in_data += this_len;
-
-               /* Mark the endpoint as TX valid to send the packet */
-               ao_usb_bdt[0].single.count_tx = this_len;
-               ao_usb_set_stat_tx(0, STM_USB_EPR_STAT_TX_VALID);
-               debug ("queue tx. epr 0 now %08x\n", stm_usb.epr[0]);
+               return;
        }
-}
 
-static inline void
-ao_usb_set_stat_rx(int ep, uint32_t stat_rx) {
-       uint32_t        epr_write, epr_old, epr_new, epr_want;
+       this_len = ao_usb_ep0_in_len;
+       if (this_len > AO_USB_CONTROL_SIZE)
+               this_len = AO_USB_CONTROL_SIZE;
 
-       cli();
-       epr_write = epr_old = stm_usb.epr[ep];
-       epr_write &= STM_USB_EPR_PRESERVE_MASK;
-       epr_write |= STM_USB_EPR_INVARIANT;
-       epr_write |= set_toggle(epr_old,
-                             STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX,
-                             stat_rx << STM_USB_EPR_STAT_RX);
-       stm_usb.epr[ep] = epr_write;
-       epr_new = stm_usb.epr[ep];
-       sei();
-       epr_want = (epr_old & ~(STM_USB_EPR_STAT_RX_MASK << STM_USB_EPR_STAT_RX)) |
-               (stat_rx << STM_USB_EPR_STAT_RX);
-       if (epr_new != epr_want) {
-               debug ("**** set_stat_rx to %x. old %08x want %08x write %08x new %08x\n",
-                      stat_rx, epr_old, epr_want, epr_write, epr_new);
-       }
+       if (this_len < AO_USB_CONTROL_SIZE)
+               ao_usb_ep0_state = AO_USB_EP0_IDLE;
+
+       ao_usb_ep0_in_len -= this_len;
+
+       debug_data ("Flush EP0 len %d:", this_len);
+       ao_usb_write(ao_usb_ep0_in_data, ao_usb_ep0_tx_buffer, 0, this_len);
+       debug_data ("\n");
+       ao_usb_ep0_in_data += this_len;
+
+       /* Mark the endpoint as TX valid to send the packet */
+       ao_usb_bdt[AO_USB_CONTROL_EPR].single.count_tx = this_len;
+       ao_usb_set_stat_tx(AO_USB_CONTROL_EPR, STM_USB_EPR_STAT_TX_VALID);
+       debug ("queue tx. epr 0 now %08x\n", stm_usb.epr[AO_USB_CONTROL_EPR]);
 }
 
 /* Read data from the ep0 OUT fifo */
@@ -624,39 +543,87 @@ ao_usb_ep0_fill(void)
        ao_usb_set_stat_rx(0, STM_USB_EPR_STAT_RX_VALID);
 }
 
-void
-ao_usb_ep0_queue_byte(uint8_t a)
+static void
+ao_usb_ep0_in_reset(void)
+{
+       ao_usb_ep0_in_data = ao_usb_ep0_in_buf;
+       ao_usb_ep0_in_len = 0;
+}
+
+static void
+ao_usb_ep0_in_queue_byte(uint8_t a)
+{
+       if (ao_usb_ep0_in_len < sizeof (ao_usb_ep0_in_buf))
+               ao_usb_ep0_in_buf[ao_usb_ep0_in_len++] = a;
+}
+
+static void
+ao_usb_ep0_in_set(const uint8_t *data, uint8_t len)
+{
+       ao_usb_ep0_in_data = data;
+       ao_usb_ep0_in_len = len;
+}
+
+static void
+ao_usb_ep0_out_set(uint8_t *data, uint8_t len)
+{
+       ao_usb_ep0_out_data = data;
+       ao_usb_ep0_out_len = len;
+}
+
+static void
+ao_usb_ep0_in_start(uint16_t max)
+{
+       /* Don't send more than asked for */
+       if (ao_usb_ep0_in_len > max)
+               ao_usb_ep0_in_len = max;
+       ao_usb_ep0_flush();
+}
+
+static struct ao_usb_line_coding ao_usb_line_coding = {115200, 0, 0, 8};
+
+/* Walk through the list of descriptors and find a match
+ */
+static void
+ao_usb_get_descriptor(uint16_t value)
 {
-       ao_usb_ep0_in_buf[ao_usb_ep0_in_len++] = a;
+       const uint8_t           *descriptor;
+       uint8_t         type = value >> 8;
+       uint8_t         index = value;
+
+       descriptor = ao_usb_descriptors;
+       while (descriptor[0] != 0) {
+               if (descriptor[1] == type && index-- == 0) {
+                       uint8_t len;
+                       if (type == AO_USB_DESC_CONFIGURATION)
+                               len = descriptor[2];
+                       else
+                               len = descriptor[0];
+                       ao_usb_ep0_in_set(descriptor, len);
+                       break;
+               }
+               descriptor += descriptor[0];
+       }
 }
 
 static void
 ao_usb_ep0_setup(void)
 {
        /* Pull the setup packet out of the fifo */
-       ao_usb_ep0_out_data = (uint8_t *) &ao_usb_setup;
-       ao_usb_ep0_out_len = 8;
+       ao_usb_ep0_out_set((uint8_t *) &ao_usb_setup, 8);
        ao_usb_ep0_fill();
        if (ao_usb_ep0_out_len != 0) {
                debug ("invalid setup packet length\n");
                return;
        }
 
-       /* Figure out how to ACK the setup packet */
-       if (ao_usb_setup.dir_type_recip & AO_USB_DIR_IN) {
-               if (ao_usb_setup.length)
-                       ao_usb_ep0_state = AO_USB_EP0_DATA_IN;
-               else
-                       ao_usb_ep0_state = AO_USB_EP0_IDLE;
-       } else {
-               if (ao_usb_setup.length)
-                       ao_usb_ep0_state = AO_USB_EP0_DATA_OUT;
-               else
-                       ao_usb_ep0_state = AO_USB_EP0_IDLE;
-       }
+       if ((ao_usb_setup.dir_type_recip & AO_USB_DIR_IN) || ao_usb_setup.length == 0)
+               ao_usb_ep0_state = AO_USB_EP0_DATA_IN;
+       else
+               ao_usb_ep0_state = AO_USB_EP0_DATA_OUT;
+
+       ao_usb_ep0_in_reset();
 
-       ao_usb_ep0_in_data = ao_usb_ep0_in_buf;
-       ao_usb_ep0_in_len = 0;
        switch(ao_usb_setup.dir_type_recip & AO_USB_SETUP_TYPE_MASK) {
        case AO_USB_TYPE_STANDARD:
                debug ("Standard setup packet\n");
@@ -666,8 +633,8 @@ ao_usb_ep0_setup(void)
                        switch(ao_usb_setup.request) {
                        case AO_USB_REQ_GET_STATUS:
                                debug ("get status\n");
-                               ao_usb_ep0_queue_byte(0);
-                               ao_usb_ep0_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
                                break;
                        case AO_USB_REQ_SET_ADDRESS:
                                debug ("set address %d\n", ao_usb_setup.value);
@@ -680,7 +647,7 @@ ao_usb_ep0_setup(void)
                                break;
                        case AO_USB_REQ_GET_CONFIGURATION:
                                debug ("get configuration %d\n", ao_usb_configuration);
-                               ao_usb_ep0_queue_byte(ao_usb_configuration);
+                               ao_usb_ep0_in_queue_byte(ao_usb_configuration);
                                break;
                        case AO_USB_REQ_SET_CONFIGURATION:
                                ao_usb_configuration = ao_usb_setup.value;
@@ -693,11 +660,11 @@ ao_usb_ep0_setup(void)
                        debug ("Interface setup packet\n");
                        switch(ao_usb_setup.request) {
                        case AO_USB_REQ_GET_STATUS:
-                               ao_usb_ep0_queue_byte(0);
-                               ao_usb_ep0_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
                                break;
                        case AO_USB_REQ_GET_INTERFACE:
-                               ao_usb_ep0_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
                                break;
                        case AO_USB_REQ_SET_INTERFACE:
                                break;
@@ -707,8 +674,8 @@ ao_usb_ep0_setup(void)
                        debug ("Endpoint setup packet\n");
                        switch(ao_usb_setup.request) {
                        case AO_USB_REQ_GET_STATUS:
-                               ao_usb_ep0_queue_byte(0);
-                               ao_usb_ep0_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
+                               ao_usb_ep0_in_queue_byte(0);
                                break;
                        }
                        break;
@@ -719,98 +686,173 @@ ao_usb_ep0_setup(void)
                switch (ao_usb_setup.request) {
                case AO_USB_SET_LINE_CODING:
                        debug ("set line coding\n");
-                       ao_usb_ep0_out_len = 7;
-                       ao_usb_ep0_out_data = (uint8_t *) &ao_usb_line_coding;
+                       ao_usb_ep0_out_set((uint8_t *) &ao_usb_line_coding, 7);
                        break;
                case AO_USB_GET_LINE_CODING:
                        debug ("get line coding\n");
-                       ao_usb_ep0_in_len = 7;
-                       ao_usb_ep0_in_data = (uint8_t *) &ao_usb_line_coding;
+                       ao_usb_ep0_in_set((const uint8_t *) &ao_usb_line_coding, 7);
                        break;
                case AO_USB_SET_CONTROL_LINE_STATE:
                        break;
                }
                break;
        }
-       if (ao_usb_ep0_state != AO_USB_EP0_DATA_OUT) {
-               if (ao_usb_setup.length < ao_usb_ep0_in_len)
-                       ao_usb_ep0_in_len = ao_usb_setup.length;
-               ao_usb_ep0_flush();
-       }
+
+       /* If we're not waiting to receive data from the host,
+        * queue an IN response
+        */
+       if (ao_usb_ep0_state == AO_USB_EP0_DATA_IN)
+               ao_usb_ep0_in_start(ao_usb_setup.length);
 }
 
-/* End point 0 receives all of the control messages. */
 static void
-ao_usb_ep0(void)
+ao_usb_ep0_handle(uint8_t receive)
 {
-       uint8_t intx, udint;
-
-       debug ("usb task started\n");
-       ao_usb_ep0_state = AO_USB_EP0_IDLE;
-       for (;;) {
-               uint8_t receive;
-               ao_arch_critical(
-                       while (!(receive = ao_usb_ep0_receive))
-                               ao_sleep(&ao_usb_ep0_receive);
-                       ao_usb_ep0_receive = 0;
-                       );
-               
-               if (receive & AO_USB_EP0_GOT_RESET) {
-                       debug ("\treset\n");
-                       ao_usb_set_ep0();
-                       continue;
-               }
-               if (receive & AO_USB_EP0_GOT_SETUP) {
-                       debug ("\tsetup\n");
-                       ao_usb_ep0_setup();
-               }
-               if (receive & AO_USB_EP0_GOT_RX_DATA) {
-                       debug ("\tgot rx data\n");
+       ao_usb_ep0_receive = 0;
+       if (receive & AO_USB_EP0_GOT_RESET) {
+               debug ("\treset\n");
+               ao_usb_set_ep0();
+               return;
+       }
+       if (receive & AO_USB_EP0_GOT_SETUP) {
+               debug ("\tsetup\n");
+               ao_usb_ep0_setup();
+       }
+       if (receive & AO_USB_EP0_GOT_RX_DATA) {
+               debug ("\tgot rx data\n");
+               if (ao_usb_ep0_state == AO_USB_EP0_DATA_OUT) {
                        ao_usb_ep0_fill();
-                       ao_usb_ep0_set_in_pending(1);
+                       if (ao_usb_ep0_out_len == 0) {
+                               ao_usb_ep0_state = AO_USB_EP0_DATA_IN;
+                               ao_usb_ep0_in_start(0);
+                       }
                }
-               if (receive & AO_USB_EP0_GOT_TX_ACK) {
-                       debug ("\tgot tx ack\n");
+       }
+       if (receive & AO_USB_EP0_GOT_TX_ACK) {
+               debug ("\tgot tx ack\n");
+
+               /* Wait until the IN packet is received from addr 0
+                * before assigning our local address
+                */
+               if (ao_usb_address_pending)
+                       ao_usb_set_address(ao_usb_address);
+               if (ao_usb_ep0_state == AO_USB_EP0_DATA_IN)
                        ao_usb_ep0_flush();
-                       if (ao_usb_address_pending) {
-                               ao_usb_set_address(ao_usb_address);
-                               ao_usb_set_configuration();
+       }
+}
+
+void
+stm_usb_lp_isr(void)
+{
+       uint32_t        istr = stm_usb.istr;
+
+       if (istr & (1 << STM_USB_ISTR_CTR)) {
+               uint8_t         ep = istr & STM_USB_ISTR_EP_ID_MASK;
+               uint32_t        epr, epr_write;
+
+               /* Preserve the SW write bits, don't mess with most HW writable bits,
+                * clear the CTR_RX and CTR_TX bits
+                */
+               epr = stm_usb.epr[ep];
+               epr_write = epr;
+               epr_write &= STM_USB_EPR_PRESERVE_MASK;
+               epr_write |= STM_USB_EPR_INVARIANT;
+               epr_write &= ~(1 << STM_USB_EPR_CTR_RX);
+               epr_write &= ~(1 << STM_USB_EPR_CTR_TX);
+               stm_usb.epr[ep] = epr_write;
+
+               switch (ep) {
+               case 0:
+                       ++control_count;
+                       if (ao_usb_epr_ctr_rx(epr)) {
+                               if (ao_usb_epr_setup(epr))
+                                       ao_usb_ep0_receive |= AO_USB_EP0_GOT_SETUP;
+                               else
+                                       ao_usb_ep0_receive |= AO_USB_EP0_GOT_RX_DATA;
+                       }
+                       if (ao_usb_epr_ctr_tx(epr))
+                               ao_usb_ep0_receive |= AO_USB_EP0_GOT_TX_ACK;
+                       ao_usb_ep0_handle(ao_usb_ep0_receive);
+                       break;
+               case AO_USB_OUT_EPR:
+                       ++out_count;
+                       if (ao_usb_epr_ctr_rx(epr)) {
+                               _rx_dbg1("RX ISR", epr);
+                               ao_usb_out_avail = 1;
+                               _rx_dbg0("out avail set");
+                               ao_wakeup(AO_USB_OUT_SLEEP_ADDR);
+                               _rx_dbg0("stdin awoken");
+                       }
+                       break;
+               case AO_USB_IN_EPR:
+                       ++in_count;
+                       _tx_dbg1("TX ISR", epr);
+                       if (ao_usb_epr_ctr_tx(epr)) {
+                               ao_usb_in_pending = 0;
+                               ao_wakeup(&ao_usb_in_pending);
                        }
+                       break;
+               case AO_USB_INT_EPR:
+                       ++int_count;
+                       if (ao_usb_epr_ctr_tx(epr))
+                               _ao_usb_set_stat_tx(AO_USB_INT_EPR, STM_USB_EPR_STAT_TX_NAK);
+                       break;
                }
+               return;
+       }
+
+       if (istr & (1 << STM_USB_ISTR_RESET)) {
+               ++reset_count;
+               stm_usb.istr &= ~(1 << STM_USB_ISTR_RESET);
+               ao_usb_ep0_receive |= AO_USB_EP0_GOT_RESET;
+               ao_usb_ep0_handle(ao_usb_ep0_receive);
        }
 }
 
+void
+stm_usb_fs_wkup(void)
+{
+       /* USB wakeup, just clear the bit for now */
+       stm_usb.istr &= ~(1 << STM_USB_ISTR_WKUP);
+}
+
 /* Queue the current IN buffer for transmission */
 static void
-ao_usb_in_send(void)
+_ao_usb_in_send(void)
 {
+       _tx_dbg0("in_send start");
        debug ("send %d\n", ao_usb_tx_count);
+       while (ao_usb_in_pending)
+               ao_sleep(&ao_usb_in_pending);
+       ao_usb_in_pending = 1;
+       if (ao_usb_tx_count != AO_USB_IN_SIZE)
+               ao_usb_in_flushed = 1;
        ao_usb_write(ao_usb_tx_buffer, ao_usb_in_tx_buffer, 0, ao_usb_tx_count);
        ao_usb_bdt[AO_USB_IN_EPR].single.count_tx = ao_usb_tx_count;
-       ao_usb_set_stat_tx(AO_USB_IN_EPR, STM_USB_EPR_STAT_TX_VALID);
-       ao_usb_in_pending = 1;
        ao_usb_tx_count = 0;
+       _ao_usb_set_stat_tx(AO_USB_IN_EPR, STM_USB_EPR_STAT_TX_VALID);
+       _tx_dbg0("in_send end");
 }
 
-/* Wait for a free IN buffer */
+/* Wait for a free IN buffer. Interrupts are blocked */
 static void
-ao_usb_in_wait(void)
+_ao_usb_in_wait(void)
 {
        for (;;) {
                /* Check if the current buffer is writable */
                if (ao_usb_tx_count < AO_USB_IN_SIZE)
                        break;
 
-               cli();
+               _tx_dbg0("in_wait top");
                /* Wait for an IN buffer to be ready */
                while (ao_usb_in_pending)
                        ao_sleep(&ao_usb_in_pending);
-               sei();
+               _tx_dbg0("in_wait bottom");
        }
 }
 
 void
-ao_usb_flush(void) __critical
+ao_usb_flush(void)
 {
        if (!ao_usb_running)
                return;
@@ -822,40 +864,45 @@ ao_usb_flush(void) __critical
         * packet was full, in which case we now
         * want to send an empty packet
         */
-       if (!ao_usb_in_flushed) {
-               ao_usb_in_flushed = 1;
-               cli();
-               /* Wait for an IN buffer to be ready */
-               while (ao_usb_in_pending)
-                       ao_sleep(&ao_usb_in_pending);
-               sei();
-               ao_usb_in_send();
+       ao_arch_block_interrupts();
+       while (!ao_usb_in_flushed) {
+               _tx_dbg0("flush top");
+               _ao_usb_in_send();
+               _tx_dbg0("flush end");
        }
+       ao_arch_release_interrupts();
 }
 
 void
-ao_usb_putchar(char c) __critical __reentrant
+ao_usb_putchar(char c)
 {
        if (!ao_usb_running)
                return;
 
-       ao_usb_in_wait();
+       ao_arch_block_interrupts();
+       _ao_usb_in_wait();
 
+       ao_usb_in_flushed = 0;
        ao_usb_tx_buffer[ao_usb_tx_count++] = (uint8_t) c;
 
        /* Send the packet when full */
-       if (ao_usb_tx_count == AO_USB_IN_SIZE)
-               ao_usb_in_send();
-       ao_usb_in_flushed = 0;
+       if (ao_usb_tx_count == AO_USB_IN_SIZE) {
+               _tx_dbg0("putchar full");
+               _ao_usb_in_send();
+               _tx_dbg0("putchar flushed");
+       }
+       ao_arch_release_interrupts();
 }
 
 static void
-ao_usb_out_recv(void)
+_ao_usb_out_recv(void)
 {
+       _rx_dbg0("out_recv top");
        ao_usb_out_avail = 0;
 
        ao_usb_rx_count = ao_usb_bdt[AO_USB_OUT_EPR].single.count_rx & STM_USB_BDT_COUNT_RX_COUNT_RX_MASK;
 
+       _rx_dbg1("out_recv count", ao_usb_rx_count);
        debug ("recv %d\n", ao_usb_rx_count);
        debug_data("Fill OUT len %d:", ao_usb_rx_count);
        ao_usb_read(ao_usb_rx_buffer, ao_usb_out_rx_buffer, 0, ao_usb_rx_count);
@@ -863,13 +910,13 @@ ao_usb_out_recv(void)
        ao_usb_rx_pos = 0;
 
        /* ACK the packet */
-       ao_usb_set_stat_rx(AO_USB_OUT_EPR, STM_USB_EPR_STAT_RX_VALID);
+       _ao_usb_set_stat_rx(AO_USB_OUT_EPR, STM_USB_EPR_STAT_RX_VALID);
 }
 
-static char
+int
 _ao_usb_pollchar(void)
 {
-       char c;
+       uint8_t c;
 
        if (!ao_usb_running)
                return AO_READ_AGAIN;
@@ -878,10 +925,13 @@ _ao_usb_pollchar(void)
                if (ao_usb_rx_pos != ao_usb_rx_count)
                        break;
 
+               _rx_dbg0("poll check");
                /* Check to see if a packet has arrived */
-               if (!ao_usb_out_avail)
+               if (!ao_usb_out_avail) {
+                       _rx_dbg0("poll none");
                        return AO_READ_AGAIN;
-               ao_usb_out_recv();
+               }
+               _ao_usb_out_recv();
        }
 
        /* Pull a character out of the fifo */
@@ -890,30 +940,21 @@ _ao_usb_pollchar(void)
 }
 
 char
-ao_usb_pollchar(void)
-{
-       char    c;
-       cli();
-       c = _ao_usb_pollchar();
-       sei();
-       return c;
-}
-
-char
-ao_usb_getchar(void) __critical
+ao_usb_getchar(void)
 {
-       char    c;
+       int     c;
 
-       cli();
+       ao_arch_block_interrupts();
        while ((c = _ao_usb_pollchar()) == AO_READ_AGAIN)
-               ao_sleep(&ao_stdin_ready);
-       sei();
+               ao_sleep(AO_USB_OUT_SLEEP_ADDR);
+       ao_arch_release_interrupts();
        return c;
 }
 
 void
 ao_usb_disable(void)
 {
+       ao_arch_block_interrupts();
        stm_usb.cntr = (1 << STM_USB_CNTR_FRES);
        stm_usb.istr = 0;
 
@@ -925,12 +966,13 @@ ao_usb_disable(void)
 
        /* Disable the interface */
        stm_rcc.apb1enr &+ ~(1 << STM_RCC_APB1ENR_USBEN);
+       ao_arch_release_interrupts();
 }
 
 void
 ao_usb_enable(void)
 {
-       uint16_t        tick;
+       int     t;
 
        /* Enable SYSCFG */
        stm_rcc.apb2enr |= (1 << STM_RCC_APB2ENR_SYSCFGEN);
@@ -947,6 +989,8 @@ ao_usb_enable(void)
         * pulled low and doesn't work at all
         */
 
+       ao_arch_block_interrupts();
+
        /* Route interrupts */
        stm_nvic_set_priority(STM_ISR_USB_LP_POS, 3);
        stm_nvic_set_enable(STM_ISR_USB_LP_POS);
@@ -960,7 +1004,7 @@ ao_usb_enable(void)
 
        /* Clear any spurious interrupts */
        stm_usb.istr = 0;
-       
+
        debug ("ao_usb_enable\n");
 
        /* Enable interrupts */
@@ -978,11 +1022,15 @@ ao_usb_enable(void)
                        (0 << STM_USB_CNTR_PDWN) |
                        (0 << STM_USB_CNTR_FRES));
 
+       ao_arch_release_interrupts();
+
+       for (t = 0; t < 1000; t++)
+               ao_arch_nop();
        /* Enable USB pull-up */
        stm_syscfg.pmc |= (1 << STM_SYSCFG_PMC_USB_PU);
 }
 
-#if USB_DEBUG
+#if USB_ECHO
 struct ao_task ao_usb_echo_task;
 
 static void
@@ -998,17 +1046,19 @@ ao_usb_echo(void)
 }
 #endif
 
+#if USB_DEBUG
 static void
 ao_usb_irq(void)
 {
-       printf ("control: %d out: %d in: %d reset: %d\n",
-               control_count, out_count, in_count, reset_count);
+       printf ("control: %d out: %d in: %d int: %d reset: %d\n",
+               control_count, out_count, in_count, int_count, reset_count);
 }
 
 __code struct ao_cmds ao_usb_cmds[] = {
        { ao_usb_irq, "I\0Show USB interrupt counts" },
        { 0, NULL }
 };
+#endif
 
 void
 ao_usb_init(void)
@@ -1016,12 +1066,69 @@ ao_usb_init(void)
        ao_usb_enable();
 
        debug ("ao_usb_init\n");
-       ao_add_task(&ao_usb_task, ao_usb_ep0, "usb");
-#if USB_DEBUG
+       ao_usb_ep0_state = AO_USB_EP0_IDLE;
+#if USB_ECHO
        ao_add_task(&ao_usb_echo_task, ao_usb_echo, "usb echo");
 #endif
+#if USB_DEBUG
        ao_cmd_register(&ao_usb_cmds[0]);
-#if !USB_DEBUG
-       ao_add_stdio(ao_usb_pollchar, ao_usb_putchar, ao_usb_flush);
 #endif
+#if !USB_ECHO
+#if USE_USB_STDIO
+       ao_add_stdio(_ao_usb_pollchar, ao_usb_putchar, ao_usb_flush);
+#endif
+#endif
+}
+
+#if TX_DBG || RX_DBG
+
+struct ao_usb_dbg {
+       int             line;
+       char            *msg;
+       uint32_t        value;
+       uint32_t        primask;
+#if TX_DBG
+       uint16_t        in_count;
+       uint32_t        in_epr;
+       uint32_t        in_pending;
+       uint32_t        tx_count;
+       uint32_t        in_flushed;
+#endif
+#if RX_DBG
+       uint8_t         rx_count;
+       uint8_t         rx_pos;
+       uint8_t         out_avail;
+       uint32_t        out_epr;
+#endif
+};
+
+#define NUM_USB_DBG    128
+
+static struct ao_usb_dbg dbg[128];
+static int dbg_i;
+
+static void _dbg(int line, char *msg, uint32_t value)
+{
+       uint32_t        primask;
+       dbg[dbg_i].line = line;
+       dbg[dbg_i].msg = msg;
+       dbg[dbg_i].value = value;
+       asm("mrs %0,primask" : "=&r" (primask));
+       dbg[dbg_i].primask = primask;
+#if TX_DBG
+       dbg[dbg_i].in_count = in_count;
+       dbg[dbg_i].in_epr = stm_usb.epr[AO_USB_IN_EPR];
+       dbg[dbg_i].in_pending = ao_usb_in_pending;
+       dbg[dbg_i].tx_count = ao_usb_tx_count;
+       dbg[dbg_i].in_flushed = ao_usb_in_flushed;
+#endif
+#if RX_DBG
+       dbg[dbg_i].rx_count = ao_usb_rx_count;
+       dbg[dbg_i].rx_pos = ao_usb_rx_pos;
+       dbg[dbg_i].out_avail = ao_usb_out_avail;
+       dbg[dbg_i].out_epr = stm_usb.epr[AO_USB_OUT_EPR];
+#endif
+       if (++dbg_i == NUM_USB_DBG)
+               dbg_i = 0;
 }
+#endif