stlink: add none 32bit memory read/write functions
[fw/openocd] / src / target / xscale.c
index f5aada507d54557826091ab95ff865f4ef228b33..484cdceafdf523fa3d57d68300ffd2819ed298e7 100644 (file)
@@ -158,10 +158,9 @@ static int xscale_verify_pointer(struct command_context *cmd_ctx,
        return ERROR_OK;
 }
 
-static int xscale_jtag_set_instr(struct jtag_tap *tap, uint32_t new_instr)
+static int xscale_jtag_set_instr(struct jtag_tap *tap, uint32_t new_instr, tap_state_t end_state)
 {
-       if (tap == NULL)
-               return ERROR_FAIL;
+       assert (tap != NULL);
 
        if (buf_get_u32(tap->cur_instr, 0, tap->ir_length) != new_instr)
        {
@@ -171,9 +170,9 @@ static int xscale_jtag_set_instr(struct jtag_tap *tap, uint32_t new_instr)
                memset(&field, 0, sizeof field);
                field.num_bits = tap->ir_length;
                field.out_value = scratch;
-               buf_set_u32(field.out_value, 0, field.num_bits, new_instr);
+               buf_set_u32(scratch, 0, field.num_bits, new_instr);
 
-               jtag_add_ir_scan(tap, &field, jtag_get_end_state());
+               jtag_add_ir_scan(tap, &field, end_state);
        }
 
        return ERROR_OK;
@@ -191,9 +190,9 @@ static int xscale_read_dcsr(struct target *target)
        uint8_t field2_check_value = 0x0;
        uint8_t field2_check_mask = 0x1;
 
-       jtag_set_end_state(TAP_DRPAUSE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_SELDCSR << xscale->xscale_variant);
+               XSCALE_SELDCSR << xscale->xscale_variant,
+               TAP_DRPAUSE);
 
        buf_set_u32(&field0, 1, 1, xscale->hold_rst);
        buf_set_u32(&field0, 2, 1, xscale->external_debug_break);
@@ -234,8 +233,6 @@ static int xscale_read_dcsr(struct target *target)
        fields[1].out_value = xscale->reg_cache->reg_list[XSCALE_DCSR].value;
        fields[1].in_value = NULL;
 
-       jtag_set_end_state(TAP_IDLE);
-
        jtag_add_dr_scan(target->tap, 3, fields, TAP_DRPAUSE);
 
        /* DANGER!!! this must be here. It will make sure that the arguments
@@ -247,13 +244,13 @@ static int xscale_read_dcsr(struct target *target)
 static void xscale_getbuf(jtag_callback_data_t arg)
 {
        uint8_t *in = (uint8_t *)arg;
-       *((uint32_t *)in) = buf_get_u32(in, 0, 32);
+       *((uint32_t *)arg) = buf_get_u32(in, 0, 32);
 }
 
 static int xscale_receive(struct target *target, uint32_t *buffer, int num_words)
 {
        if (num_words == 0)
-               return ERROR_INVALID_ARGUMENTS;
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
        struct xscale_common *xscale = target_to_xscale(target);
        int retval = ERROR_OK;
@@ -276,18 +273,22 @@ static int xscale_receive(struct target *target, uint32_t *buffer, int num_words
        memset(&fields, 0, sizeof fields);
 
        fields[0].num_bits = 3;
+       uint8_t tmp;
+       fields[0].in_value = &tmp;
        fields[0].check_value = &field0_check_value;
        fields[0].check_mask = &field0_check_mask;
 
        fields[1].num_bits = 32;
 
        fields[2].num_bits = 1;
+       uint8_t tmp2;
+       fields[2].in_value = &tmp2;
        fields[2].check_value = &field2_check_value;
        fields[2].check_mask = &field2_check_mask;
 
-       jtag_set_end_state(TAP_IDLE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_DBGTX << xscale->xscale_variant);
+               XSCALE_DBGTX << xscale->xscale_variant,
+               TAP_IDLE);
        jtag_add_runtest(1, TAP_IDLE); /* ensures that we're in the TAP_IDLE state as the above could be a no-op */
 
        /* repeat until all words have been collected */
@@ -320,7 +321,7 @@ static int xscale_receive(struct target *target, uint32_t *buffer, int num_words
                /* examine results */
                for (i = words_done; i < num_words; i++)
                {
-                       if (!(field0[0] & 1))
+                       if (!(field0[i] & 1))
                        {
                                /* move backwards if necessary */
                                int j;
@@ -367,10 +368,9 @@ static int xscale_read_tx(struct target *target, int consume)
        uint8_t field2_check_value = 0x0;
        uint8_t field2_check_mask = 0x1;
 
-       jtag_set_end_state(TAP_IDLE);
-
        xscale_jtag_set_instr(target->tap,
-               XSCALE_DBGTX << xscale->xscale_variant);
+               XSCALE_DBGTX << xscale->xscale_variant,
+               TAP_IDLE);
 
        path[0] = TAP_DRSELECT;
        path[1] = TAP_DRCAPTURE;
@@ -463,10 +463,9 @@ static int xscale_write_rx(struct target *target)
        uint8_t field2_check_value = 0x0;
        uint8_t field2_check_mask = 0x1;
 
-       jtag_set_end_state(TAP_IDLE);
-
        xscale_jtag_set_instr(target->tap,
-               XSCALE_DBGRX << xscale->xscale_variant);
+               XSCALE_DBGRX << xscale->xscale_variant,
+               TAP_IDLE);
 
        memset(&fields, 0, sizeof fields);
 
@@ -533,7 +532,7 @@ static int xscale_write_rx(struct target *target)
 }
 
 /* send count elements of size byte to the debug handler */
-static int xscale_send(struct target *target, uint8_t *buffer, int count, int size)
+static int xscale_send(struct target *target, const uint8_t *buffer, int count, int size)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        uint32_t t[3];
@@ -541,10 +540,9 @@ static int xscale_send(struct target *target, uint8_t *buffer, int count, int si
        int retval;
        int done_count = 0;
 
-       jtag_set_end_state(TAP_IDLE);
-
        xscale_jtag_set_instr(target->tap,
-               XSCALE_DBGRX << xscale->xscale_variant);
+               XSCALE_DBGRX << xscale->xscale_variant,
+               TAP_IDLE);
 
        bits[0]=3;
        t[0]=0;
@@ -579,7 +577,7 @@ static int xscale_send(struct target *target, uint8_t *buffer, int count, int si
                        break;
                default:
                        LOG_ERROR("BUG: size neither 4, 2 nor 1");
-                       return ERROR_INVALID_ARGUMENTS;
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
                jtag_add_dr_out(target->tap,
                                3,
@@ -624,9 +622,9 @@ static int xscale_write_dcsr(struct target *target, int hold_rst, int ext_dbg_br
        if (ext_dbg_brk != -1)
                xscale->external_debug_break = ext_dbg_brk;
 
-       jtag_set_end_state(TAP_IDLE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_SELDCSR << xscale->xscale_variant);
+               XSCALE_SELDCSR << xscale->xscale_variant,
+               TAP_IDLE);
 
        buf_set_u32(&field0, 1, 1, xscale->hold_rst);
        buf_set_u32(&field0, 2, 1, xscale->external_debug_break);
@@ -686,9 +684,9 @@ static int xscale_load_ic(struct target *target, uint32_t va, uint32_t buffer[8]
        LOG_DEBUG("loading miniIC at 0x%8.8" PRIx32 "", va);
 
        /* LDIC into IR */
-       jtag_set_end_state(TAP_IDLE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_LDIC << xscale->xscale_variant);
+               XSCALE_LDIC << xscale->xscale_variant,
+               TAP_IDLE);
 
        /* CMD is b011 to load a cacheline into the Mini ICache.
         * Loading into the main ICache is deprecated, and unused.
@@ -737,9 +735,9 @@ static int xscale_invalidate_ic_line(struct target *target, uint32_t va)
        uint8_t cmd;
        struct scan_field fields[2];
 
-       jtag_set_end_state(TAP_IDLE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_LDIC << xscale->xscale_variant);
+               XSCALE_LDIC << xscale->xscale_variant,
+               TAP_IDLE);
 
        /* CMD for invalidate IC line b000, bits [6:4] b000 */
        buf_set_u32(&cmd, 0, 6, 0x0);
@@ -842,7 +840,7 @@ static int xscale_arch_state(struct target *target)
        if (armv4_5->common_magic != ARM_COMMON_MAGIC)
        {
                LOG_ERROR("BUG: called for a non-ARMv4/5 target");
-               return ERROR_INVALID_ARGUMENTS;
+               return ERROR_COMMAND_SYNTAX_ERROR;
        }
 
        arm_arch_state(target);
@@ -901,7 +899,7 @@ static int xscale_debug_entry(struct target *target)
        struct arm *armv4_5 = &xscale->armv4_5_common;
        uint32_t pc;
        uint32_t buffer[10];
-       int i;
+       unsigned i;
        int retval;
        uint32_t moe;
 
@@ -970,6 +968,11 @@ static int xscale_debug_entry(struct target *target)
                r->valid = true;
        }
 
+       /* mark xscale regs invalid to ensure they are retrieved from the
+        * debug handler if requested  */
+       for (i = 0; i < xscale->reg_cache->num_regs; i++)
+          xscale->reg_cache->reg_list[i].valid = 0;
+
        /* examine debug reason */
        xscale_read_dcsr(target);
        moe = buf_get_u32(xscale->reg_cache->reg_list[XSCALE_DCSR].value, 2, 3);
@@ -1045,21 +1048,20 @@ static int xscale_debug_entry(struct target *target)
        xscale->armv4_5_mmu.armv4_5_cache.i_cache_enabled = (xscale->cp15_control_reg & 0x1000U) ? 1 : 0;
 
        /* tracing enabled, read collected trace data */
-       if (xscale->trace.buffer_enabled)
+       if (xscale->trace.mode != XSCALE_TRACE_DISABLED)
        {
                xscale_read_trace(target);
-               xscale->trace.buffer_fill--;
 
-               /* resume if we're still collecting trace data */
-               if ((xscale->arch_debug_reason == XSCALE_DBG_REASON_TB_FULL)
-                       && (xscale->trace.buffer_fill > 0))
+               /* Resume if entered debug due to buffer fill and we're still collecting
+                * trace data.  Note that a debug exception due to trace buffer full
+                * can only happen in fill mode. */
+               if (xscale->arch_debug_reason == XSCALE_DBG_REASON_TB_FULL)
                {
+                 if (--xscale->trace.fill_counter > 0)
                        xscale_resume(target, 1, 0x0, 1, 0);
                }
-               else
-               {
-                       xscale->trace.buffer_enabled = 0;
-               }
+               else    /* entered debug for other reason; reset counter */
+                 xscale->trace.fill_counter = 0;
        }
 
        return ERROR_OK;
@@ -1163,12 +1165,25 @@ static void xscale_enable_breakpoints(struct target *target)
        }
 }
 
+static void xscale_free_trace_data(struct xscale_common *xscale)
+{
+   struct xscale_trace_data *td = xscale->trace.data;
+   while (td)
+   {
+         struct xscale_trace_data *next_td = td->next;
+         if (td->entries)
+                free(td->entries);
+         free(td);
+         td = next_td;
+   }
+   xscale->trace.data = NULL;
+}
+
 static int xscale_resume(struct target *target, int current,
                uint32_t address, int handle_breakpoints, int debug_execution)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        struct arm *armv4_5 = &xscale->armv4_5_common;
-       struct breakpoint *breakpoint = target->breakpoints;
        uint32_t current_pc;
        int retval;
        int i;
@@ -1206,11 +1221,13 @@ static int xscale_resume(struct target *target, int current,
        /* the front-end may request us not to handle breakpoints */
        if (handle_breakpoints)
        {
+               struct breakpoint *breakpoint;
                breakpoint = breakpoint_find(target,
                                buf_get_u32(armv4_5->pc->value, 0, 32));
                if (breakpoint != NULL)
                {
                        uint32_t next_pc;
+                       enum trace_mode saved_trace_mode;
 
                        /* there's a breakpoint at the current PC, we have to step over it */
                        LOG_DEBUG("unset breakpoint at 0x%8.8" PRIx32 "", breakpoint->address);
@@ -1229,16 +1246,11 @@ static int xscale_resume(struct target *target, int current,
 
                        /* restore banked registers */
                        retval = xscale_restore_banked(target);
+                       if (retval != ERROR_OK)
+                               return retval;
 
-                       /* send resume request (command 0x30 or 0x31)
-                        * clean the trace buffer if it is to be enabled (0x62) */
-                       if (xscale->trace.buffer_enabled)
-                       {
-                               xscale_send_u32(target, 0x62);
-                               xscale_send_u32(target, 0x31);
-                       }
-                       else
-                               xscale_send_u32(target, 0x30);
+                       /* send resume request */
+                       xscale_send_u32(target, 0x30);
 
                        /* send CPSR */
                        xscale_send_u32(target,
@@ -1259,9 +1271,16 @@ static int xscale_resume(struct target *target, int current,
                        LOG_DEBUG("writing PC with value 0x%8.8" PRIx32,
                                        buf_get_u32(armv4_5->pc->value, 0, 32));
 
+                       /* disable trace data collection in xscale_debug_entry() */
+                       saved_trace_mode = xscale->trace.mode;
+                       xscale->trace.mode = XSCALE_TRACE_DISABLED;
+
                        /* wait for and process debug entry */
                        xscale_debug_entry(target);
 
+                       /* re-enable trace buffer, if enabled previously */
+                       xscale->trace.mode = saved_trace_mode;
+
                        LOG_DEBUG("disable single-step");
                        xscale_disable_single_step(target);
 
@@ -1276,11 +1295,26 @@ static int xscale_resume(struct target *target, int current,
 
        /* restore banked registers */
        retval = xscale_restore_banked(target);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* send resume request (command 0x30 or 0x31)
         * clean the trace buffer if it is to be enabled (0x62) */
-       if (xscale->trace.buffer_enabled)
+       if (xscale->trace.mode != XSCALE_TRACE_DISABLED)
        {
+               if (xscale->trace.mode == XSCALE_TRACE_FILL)
+               {
+                  /* If trace enabled in fill mode and starting collection of new set
+                       * of buffers, initialize buffer counter and free previous buffers */
+                  if (xscale->trace.fill_counter == 0)
+                  {
+                         xscale->trace.fill_counter = xscale->trace.buffer_fill;
+                         xscale_free_trace_data(xscale);
+                  }
+               }
+               else     /* wrap mode; free previous buffer */
+                  xscale_free_trace_data(xscale);
+
                xscale_send_u32(target, 0x62);
                xscale_send_u32(target, 0x31);
        }
@@ -1356,7 +1390,7 @@ static int xscale_step_inner(struct target *target, int current,
 
        /* send resume request (command 0x30 or 0x31)
         * clean the trace buffer if it is to be enabled (0x62) */
-       if (xscale->trace.buffer_enabled)
+       if (xscale->trace.mode != XSCALE_TRACE_DISABLED)
        {
                if ((retval = xscale_send_u32(target, 0x62)) != ERROR_OK)
                        return retval;
@@ -1436,6 +1470,7 @@ static int xscale_step(struct target *target, int current,
                if ((retval = arm_simulate_step(target, NULL)) != ERROR_OK)
                        return retval;
                current_pc = buf_get_u32(armv4_5->pc->value, 0, 32);
+               LOG_DEBUG("current pc %" PRIx32, current_pc);
 
                target->debug_reason = DBG_REASON_SINGLESTEP;
                target_call_event_callbacks(target, TARGET_EVENT_HALTED);
@@ -1454,6 +1489,8 @@ static int xscale_step(struct target *target, int current,
        }
 
        retval = xscale_step_inner(target, current, address, handle_breakpoints);
+       if (retval != ERROR_OK)
+               return retval;
 
        if (breakpoint)
        {
@@ -1476,9 +1513,9 @@ static int xscale_assert_reset(struct target *target)
        /* select DCSR instruction (set endstate to R-T-I to ensure we don't
         * end up in T-L-R, which would reset JTAG
         */
-       jtag_set_end_state(TAP_IDLE);
        xscale_jtag_set_instr(target->tap,
-               XSCALE_SELDCSR << xscale->xscale_variant);
+               XSCALE_SELDCSR << xscale->xscale_variant,
+               TAP_IDLE);
 
        /* set Hold reset, Halt mode and Trap Reset */
        buf_set_u32(xscale->reg_cache->reg_list[XSCALE_DCSR].value, 30, 1, 0x1);
@@ -1486,7 +1523,7 @@ static int xscale_assert_reset(struct target *target)
        xscale_write_dcsr(target, 1, 0);
 
        /* select BYPASS, because having DCSR selected caused problems on the PXA27x */
-       xscale_jtag_set_instr(target->tap, ~0);
+       xscale_jtag_set_instr(target->tap, ~0, TAP_IDLE);
        jtag_execute_queue();
 
        /* assert reset */
@@ -1533,6 +1570,9 @@ static int xscale_deassert_reset(struct target *target)
                breakpoint = breakpoint->next;
        }
 
+       xscale->trace.mode = XSCALE_TRACE_DISABLED;
+       xscale_free_trace_data(xscale);
+
        register_cache_invalidate(xscale->armv4_5_common.core_cache);
 
        /* FIXME mark hardware watchpoints got unset too.  Also,
@@ -1831,7 +1871,7 @@ static int xscale_read_memory(struct target *target, uint32_t address,
 
        /* sanitize arguments */
        if (((size != 4) && (size != 2) && (size != 1)) || (count == 0) || !(buffer))
-               return ERROR_INVALID_ARGUMENTS;
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
        if (((size == 4) && (address & 0x3u)) || ((size == 2) && (address & 0x1u)))
                return ERROR_TARGET_UNALIGNED_ACCESS;
@@ -1871,7 +1911,7 @@ static int xscale_read_memory(struct target *target, uint32_t address,
                                break;
                        default:
                                LOG_ERROR("invalid read size");
-                               return ERROR_INVALID_ARGUMENTS;
+                               return ERROR_COMMAND_SYNTAX_ERROR;
                }
        }
 
@@ -1908,7 +1948,7 @@ static int xscale_read_phys_memory(struct target *target, uint32_t address,
 }
 
 static int xscale_write_memory(struct target *target, uint32_t address,
-               uint32_t size, uint32_t count, uint8_t *buffer)
+               uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        int retval;
@@ -1923,7 +1963,7 @@ static int xscale_write_memory(struct target *target, uint32_t address,
 
        /* sanitize arguments */
        if (((size != 4) && (size != 2) && (size != 1)) || (count == 0) || !(buffer))
-               return ERROR_INVALID_ARGUMENTS;
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
        if (((size == 4) && (address & 0x3u)) || ((size == 2) && (address & 0x1u)))
                return ERROR_TARGET_UNALIGNED_ACCESS;
@@ -1979,6 +2019,7 @@ static int xscale_write_memory(struct target *target, uint32_t address,
                if ((retval = xscale_send_u32(target, 0x60)) != ERROR_OK)
                        return retval;
 
+               LOG_ERROR("data abort writing memory");
                return ERROR_TARGET_DATA_ABORT;
        }
 
@@ -1986,13 +2027,13 @@ static int xscale_write_memory(struct target *target, uint32_t address,
 }
 
 static int xscale_write_phys_memory(struct target *target, uint32_t address,
-               uint32_t size, uint32_t count, uint8_t *buffer)
+               uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        struct xscale_common *xscale = target_to_xscale(target);
 
        /* with MMU inactive, there are only physical addresses */
        if (!xscale->armv4_5_mmu.mmu_enabled)
-               return xscale_read_memory(target, address, size, count, buffer);
+               return xscale_write_memory(target, address, size, count, buffer);
 
        /** \todo: provide a non-stub implementation of this routine. */
        LOG_ERROR("%s: %s is not implemented.  Disable MMU?",
@@ -2001,30 +2042,38 @@ static int xscale_write_phys_memory(struct target *target, uint32_t address,
 }
 
 static int xscale_bulk_write_memory(struct target *target, uint32_t address,
-               uint32_t count, uint8_t *buffer)
+               uint32_t count, const uint8_t *buffer)
 {
        return xscale_write_memory(target, address, 4, count, buffer);
 }
 
-static uint32_t xscale_get_ttb(struct target *target)
+static int xscale_get_ttb(struct target *target, uint32_t *result)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        uint32_t ttb;
+       int retval;
 
-       xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_TTB]);
+       retval = xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_TTB]);
+       if (retval != ERROR_OK)
+               return retval;
        ttb = buf_get_u32(xscale->reg_cache->reg_list[XSCALE_TTB].value, 0, 32);
 
-       return ttb;
+       *result = ttb;
+
+       return ERROR_OK;
 }
 
-static void xscale_disable_mmu_caches(struct target *target, int mmu,
+static int xscale_disable_mmu_caches(struct target *target, int mmu,
                int d_u_cache, int i_cache)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        uint32_t cp15_control;
+       int retval;
 
        /* read cp15 control register */
-       xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_CTRL]);
+       retval = xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_CTRL]);
+       if (retval !=ERROR_OK)
+               return retval;
        cp15_control = buf_get_u32(xscale->reg_cache->reg_list[XSCALE_CTRL].value, 0, 32);
 
        if (mmu)
@@ -2033,11 +2082,17 @@ static void xscale_disable_mmu_caches(struct target *target, int mmu,
        if (d_u_cache)
        {
                /* clean DCache */
-               xscale_send_u32(target, 0x50);
-               xscale_send_u32(target, xscale->cache_clean_address);
+               retval = xscale_send_u32(target, 0x50);
+               if (retval !=ERROR_OK)
+                       return retval;
+               retval = xscale_send_u32(target, xscale->cache_clean_address);
+               if (retval !=ERROR_OK)
+                       return retval;
 
                /* invalidate DCache */
-               xscale_send_u32(target, 0x51);
+               retval = xscale_send_u32(target, 0x51);
+               if (retval !=ERROR_OK)
+                       return retval;
 
                cp15_control &= ~0x4U;
        }
@@ -2045,25 +2100,33 @@ static void xscale_disable_mmu_caches(struct target *target, int mmu,
        if (i_cache)
        {
                /* invalidate ICache */
-               xscale_send_u32(target, 0x52);
+               retval = xscale_send_u32(target, 0x52);
+               if (retval !=ERROR_OK)
+                       return retval;
                cp15_control &= ~0x1000U;
        }
 
        /* write new cp15 control register */
-       xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_CTRL], cp15_control);
+       retval = xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_CTRL], cp15_control);
+       if (retval !=ERROR_OK)
+               return retval;
 
        /* execute cpwait to ensure outstanding operations complete */
-       xscale_send_u32(target, 0x53);
+       retval = xscale_send_u32(target, 0x53);
+       return retval;
 }
 
-static void xscale_enable_mmu_caches(struct target *target, int mmu,
+static int xscale_enable_mmu_caches(struct target *target, int mmu,
                int d_u_cache, int i_cache)
 {
        struct xscale_common *xscale = target_to_xscale(target);
        uint32_t cp15_control;
+       int retval;
 
        /* read cp15 control register */
-       xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_CTRL]);
+       retval = xscale_get_reg(&xscale->reg_cache->reg_list[XSCALE_CTRL]);
+       if (retval !=ERROR_OK)
+               return retval;
        cp15_control = buf_get_u32(xscale->reg_cache->reg_list[XSCALE_CTRL].value, 0, 32);
 
        if (mmu)
@@ -2076,10 +2139,13 @@ static void xscale_enable_mmu_caches(struct target *target, int mmu,
                cp15_control |= 0x1000U;
 
        /* write new cp15 control register */
-       xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_CTRL], cp15_control);
+       retval = xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_CTRL], cp15_control);
+       if (retval !=ERROR_OK)
+               return retval;
 
        /* execute cpwait to ensure outstanding operations complete */
-       xscale_send_u32(target, 0x53);
+       retval = xscale_send_u32(target, 0x53);
+       return retval;
 }
 
 static int xscale_set_breakpoint(struct target *target,
@@ -2116,9 +2182,9 @@ static int xscale_set_breakpoint(struct target *target,
                        breakpoint->set = 2;    /* breakpoint set on second breakpoint register */
                }
                else
-               {
+               {       /* bug: availability previously verified in xscale_add_breakpoint() */
                        LOG_ERROR("BUG: no hardware comparator available");
-                       return ERROR_OK;
+                       return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
                }
        }
        else if (breakpoint->type == BKPT_SOFT)
@@ -2130,7 +2196,7 @@ static int xscale_set_breakpoint(struct target *target,
                        {
                                return retval;
                        }
-                       /* write the original instruction in target endianness (arm7_9->arm_bkpt is host endian) */
+                       /* write the bkpt instruction in target endianness (arm7_9->arm_bkpt is host endian) */
                        if ((retval = target_write_u32(target, breakpoint->address, xscale->arm_bkpt)) != ERROR_OK)
                        {
                                return retval;
@@ -2143,13 +2209,18 @@ static int xscale_set_breakpoint(struct target *target,
                        {
                                return retval;
                        }
-                       /* write the original instruction in target endianness (arm7_9->arm_bkpt is host endian) */
-                       if ((retval = target_write_u32(target, breakpoint->address, xscale->thumb_bkpt)) != ERROR_OK)
+                       /* write the bkpt instruction in target endianness (arm7_9->arm_bkpt is host endian) */
+                       if ((retval = target_write_u16(target, breakpoint->address, xscale->thumb_bkpt)) != ERROR_OK)
                        {
                                return retval;
                        }
                }
                breakpoint->set = 1;
+
+               xscale_send_u32(target, 0x50);   /* clean dcache */
+               xscale_send_u32(target, xscale->cache_clean_address);
+               xscale_send_u32(target, 0x51);   /* invalidate dcache */
+               xscale_send_u32(target, 0x52);   /* invalidate icache and flush fetch buffers */
        }
 
        return ERROR_OK;
@@ -2162,13 +2233,13 @@ static int xscale_add_breakpoint(struct target *target,
 
        if ((breakpoint->type == BKPT_HARD) && (xscale->ibcr_available < 1))
        {
-               LOG_INFO("no breakpoint unit available for hardware breakpoint");
+               LOG_ERROR("no breakpoint unit available for hardware breakpoint");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if ((breakpoint->length != 2) && (breakpoint->length != 4))
        {
-               LOG_INFO("only breakpoints of two (Thumb) or four (ARM) bytes length supported");
+               LOG_ERROR("only breakpoints of two (Thumb) or four (ARM) bytes length supported");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
@@ -2177,7 +2248,7 @@ static int xscale_add_breakpoint(struct target *target,
                xscale->ibcr_available--;
        }
 
-       return ERROR_OK;
+       return xscale_set_breakpoint(target, breakpoint);
 }
 
 static int xscale_unset_breakpoint(struct target *target,
@@ -2230,6 +2301,11 @@ static int xscale_unset_breakpoint(struct target *target,
                        }
                }
                breakpoint->set = 0;
+
+               xscale_send_u32(target, 0x50);   /* clean dcache */
+               xscale_send_u32(target, xscale->cache_clean_address);
+               xscale_send_u32(target, 0x51);   /* invalidate dcache */
+               xscale_send_u32(target, 0x52);   /* invalidate icache and flush fetch buffers */
        }
 
        return ERROR_OK;
@@ -2241,7 +2317,7 @@ static int xscale_remove_breakpoint(struct target *target, struct breakpoint *br
 
        if (target->state != TARGET_HALTED)
        {
-               LOG_WARNING("target not halted");
+               LOG_ERROR("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
@@ -2260,18 +2336,16 @@ static int xscale_set_watchpoint(struct target *target,
                struct watchpoint *watchpoint)
 {
        struct xscale_common *xscale = target_to_xscale(target);
-       uint8_t enable = 0;
+       uint32_t enable = 0;
        struct reg *dbcon = &xscale->reg_cache->reg_list[XSCALE_DBCON];
        uint32_t dbcon_value = buf_get_u32(dbcon->value, 0, 32);
 
        if (target->state != TARGET_HALTED)
        {
-               LOG_WARNING("target not halted");
+               LOG_ERROR("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       xscale_get_reg(dbcon);
-
        switch (watchpoint->rw)
        {
                case WPT_READ:
@@ -2287,6 +2361,24 @@ static int xscale_set_watchpoint(struct target *target,
                        LOG_ERROR("BUG: watchpoint->rw neither read, write nor access");
        }
 
+       /* For watchpoint across more than one word, both DBR registers must
+          be enlisted, with the second used as a mask. */
+       if (watchpoint->length > 4)
+       {
+          if (xscale->dbr0_used || xscale->dbr1_used) 
+          {
+                 LOG_ERROR("BUG: sufficient hardware comparators unavailable");
+                 return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+          }
+
+          /* Write mask value to DBR1, based on the length argument.
+               * Address bits ignored by the comparator are those set in mask. */
+          xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_DBR1],
+                                                 watchpoint->length - 1);
+          xscale->dbr1_used = 1;
+          enable |= 0x100;                     /* DBCON[M] */
+       }
+
        if (!xscale->dbr0_used)
        {
                xscale_set_reg_u32(&xscale->reg_cache->reg_list[XSCALE_DBR0], watchpoint->address);
@@ -2306,7 +2398,7 @@ static int xscale_set_watchpoint(struct target *target,
        else
        {
                LOG_ERROR("BUG: no hardware comparator available");
-               return ERROR_OK;
+               return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        return ERROR_OK;
@@ -2319,16 +2411,44 @@ static int xscale_add_watchpoint(struct target *target,
 
        if (xscale->dbr_available < 1)
        {
-               return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+          LOG_ERROR("no more watchpoint registers available");
+          return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
-       if ((watchpoint->length != 1) && (watchpoint->length != 2) && (watchpoint->length != 4))
+       if (watchpoint->value)
+          LOG_WARNING("xscale does not support value, mask arguments; ignoring");
+
+       /* check that length is a power of two */
+       for (uint32_t len = watchpoint->length; len != 1; len /= 2)
        {
-               return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+          if (len % 2)
+          {
+                 LOG_ERROR("xscale requires that watchpoint length is a power of two");
+                 return ERROR_COMMAND_ARGUMENT_INVALID;
+          }
        }
 
-       xscale->dbr_available--;
+       if (watchpoint->length == 4) /* single word watchpoint */
+       {
+          xscale->dbr_available--; /* one DBR reg used */
+          return ERROR_OK;
+       }
 
+       /* watchpoints across multiple words require both DBR registers */
+       if (xscale->dbr_available < 2)
+       {
+          LOG_ERROR("insufficient watchpoint registers available");
+          return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+       }
+       
+       if (watchpoint->length > watchpoint->address)
+       {
+          LOG_ERROR("xscale does not support watchpoints with length "
+                                "greater than address");
+          return ERROR_COMMAND_ARGUMENT_INVALID;
+       }
+          
+       xscale->dbr_available = 0;
        return ERROR_OK;
 }
 
@@ -2353,7 +2473,14 @@ static int xscale_unset_watchpoint(struct target *target,
 
        if (watchpoint->set == 1)
        {
-               dbcon_value &= ~0x3;
+          if (watchpoint->length > 4)
+          {
+                 dbcon_value &= ~0x103; /* clear DBCON[M] as well */
+                 xscale->dbr1_used = 0; /* DBR1 was used for mask */
+          }
+          else
+                 dbcon_value &= ~0x3;
+
                xscale_set_reg_u32(dbcon, dbcon_value);
                xscale->dbr0_used = 0;
        }
@@ -2374,7 +2501,7 @@ static int xscale_remove_watchpoint(struct target *target, struct watchpoint *wa
 
        if (target->state != TARGET_HALTED)
        {
-               LOG_WARNING("target not halted");
+               LOG_ERROR("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
@@ -2383,6 +2510,9 @@ static int xscale_remove_watchpoint(struct target *target, struct watchpoint *wa
                xscale_unset_watchpoint(target, watchpoint);
        }
 
+       if (watchpoint->length > 4)
+          xscale->dbr_available++;     /* both DBR regs now available */
+       
        xscale->dbr_available++;
 
        return ERROR_OK;
@@ -2509,6 +2639,7 @@ static int xscale_read_trace(struct target *target)
        uint32_t trace_buffer[258];
        int is_address[256];
        int i, j;
+       unsigned int num_checkpoints = 0;
 
        if (target->state != TARGET_HALTED)
        {
@@ -2525,23 +2656,27 @@ static int xscale_read_trace(struct target *target)
        /* parse buffer backwards to identify address entries */
        for (i = 255; i >= 0; i--)
        {
+               /* also count number of checkpointed entries */
+               if ((trace_buffer[i] & 0xe0) == 0xc0)
+                       num_checkpoints++;
+
                is_address[i] = 0;
                if (((trace_buffer[i] & 0xf0) == 0x90) ||
                        ((trace_buffer[i] & 0xf0) == 0xd0))
                {
-                       if (i >= 3)
+                       if (i > 0)
                                is_address[--i] = 1;
-                       if (i >= 2)
+                       if (i > 0)
                                is_address[--i] = 1;
-                       if (i >= 1)
+                       if (i > 0)
                                is_address[--i] = 1;
-                       if (i >= 0)
+                       if (i > 0)
                                is_address[--i] = 1;
                }
        }
 
 
-       /* search first non-zero entry */
+       /* search first non-zero entry that is not part of an address */
        for (j = 0; (j < 256) && (trace_buffer[j] == 0) && (!is_address[j]); j++)
                ;
 
@@ -2551,6 +2686,22 @@ static int xscale_read_trace(struct target *target)
                return ERROR_XSCALE_NO_TRACE_DATA;
        }
 
+       /* account for possible partial address at buffer start (wrap mode only) */
+       if (is_address[0])
+       {       /* first entry is address; complete set of 4? */
+               i = 1;
+               while (i < 4)
+                       if (!is_address[i++])
+                               break;
+               if (i < 4)
+                       j += i;   /* partial address; can't use it */
+       }
+
+       /* if first valid entry is indirect branch, can't use that either (no address) */
+       if (((trace_buffer[j] & 0xf0) == 0x90) || ((trace_buffer[j] & 0xf0) == 0xd0))
+               j++;
+
+       /* walk linked list to terminating entry */
        for (trace_data_p = &xscale->trace.data; *trace_data_p; trace_data_p = &(*trace_data_p)->next)
                ;
 
@@ -2562,6 +2713,7 @@ static int xscale_read_trace(struct target *target)
                        buf_get_u32(armv4_5->pc->value, 0, 32);
        (*trace_data_p)->entries = malloc(sizeof(struct xscale_trace_entry) * (256 - j));
        (*trace_data_p)->depth = 256 - j;
+       (*trace_data_p)->num_checkpoints = num_checkpoints;
 
        for (i = j; i < 256; i++)
        {
@@ -2575,10 +2727,10 @@ static int xscale_read_trace(struct target *target)
        return ERROR_OK;
 }
 
-static int xscale_read_instruction(struct target *target,
-               struct arm_instruction *instruction)
+static int xscale_read_instruction(struct target *target, uint32_t pc,
+                                                                  struct arm_instruction *instruction)
 {
-       struct xscale_common *xscale = target_to_xscale(target);
+       struct xscale_common *const xscale = target_to_xscale(target);
        int i;
        int section = -1;
        size_t size_read;
@@ -2591,8 +2743,8 @@ static int xscale_read_instruction(struct target *target,
        /* search for the section the current instruction belongs to */
        for (i = 0; i < xscale->trace.image->num_sections; i++)
        {
-               if ((xscale->trace.image->sections[i].base_address <= xscale->trace.current_pc) &&
-                       (xscale->trace.image->sections[i].base_address + xscale->trace.image->sections[i].size > xscale->trace.current_pc))
+               if ((xscale->trace.image->sections[i].base_address <= pc) &&
+                       (xscale->trace.image->sections[i].base_address + xscale->trace.image->sections[i].size > pc))
                {
                        section = i;
                        break;
@@ -2609,27 +2761,27 @@ static int xscale_read_instruction(struct target *target,
        {
                uint8_t buf[4];
                if ((retval = image_read_section(xscale->trace.image, section,
-                       xscale->trace.current_pc - xscale->trace.image->sections[section].base_address,
+                       pc - xscale->trace.image->sections[section].base_address,
                        4, buf, &size_read)) != ERROR_OK)
                {
-                       LOG_ERROR("error while reading instruction: %i", retval);
+                       LOG_ERROR("error while reading instruction");
                        return ERROR_TRACE_INSTRUCTION_UNAVAILABLE;
                }
                opcode = target_buffer_get_u32(target, buf);
-               arm_evaluate_opcode(opcode, xscale->trace.current_pc, instruction);
+               arm_evaluate_opcode(opcode, pc, instruction);
        }
        else if (xscale->trace.core_state == ARM_STATE_THUMB)
        {
                uint8_t buf[2];
                if ((retval = image_read_section(xscale->trace.image, section,
-                       xscale->trace.current_pc - xscale->trace.image->sections[section].base_address,
+                       pc - xscale->trace.image->sections[section].base_address,
                        2, buf, &size_read)) != ERROR_OK)
                {
-                       LOG_ERROR("error while reading instruction: %i", retval);
+                       LOG_ERROR("error while reading instruction");
                        return ERROR_TRACE_INSTRUCTION_UNAVAILABLE;
                }
                opcode = target_buffer_get_u16(target, buf);
-               thumb_evaluate_opcode(opcode, xscale->trace.current_pc, instruction);
+               thumb_evaluate_opcode(opcode, pc, instruction);
        }
        else
        {
@@ -2640,207 +2792,234 @@ static int xscale_read_instruction(struct target *target,
        return ERROR_OK;
 }
 
-static int xscale_branch_address(struct xscale_trace_data *trace_data,
-               int i, uint32_t *target)
+/* Extract address encoded into trace data. 
+ * Write result to address referenced by argument 'target', or 0 if incomplete.  */
+static inline void xscale_branch_address(struct xscale_trace_data *trace_data,
+                                        int i, uint32_t *target)
 {
        /* if there are less than four entries prior to the indirect branch message
         * we can't extract the address */
        if (i < 4)
-       {
-               return -1;
-       }
-
-       *target = (trace_data->entries[i-1].data) | (trace_data->entries[i-2].data << 8) |
-                               (trace_data->entries[i-3].data << 16) | (trace_data->entries[i-4].data << 24);
+               *target = 0;
+       else
+               *target = (trace_data->entries[i-1].data) | (trace_data->entries[i-2].data << 8) |
+                       (trace_data->entries[i-3].data << 16) | (trace_data->entries[i-4].data << 24);
+}
 
-       return 0;
+static inline void xscale_display_instruction(struct target *target, uint32_t pc,
+                                                                                         struct arm_instruction *instruction,
+                                                                                         struct command_context *cmd_ctx)
+{
+   int retval = xscale_read_instruction(target, pc, instruction);
+   if (retval == ERROR_OK)
+         command_print(cmd_ctx, "%s", instruction->text);
+   else
+         command_print(cmd_ctx, "0x%8.8" PRIx32 "\t<not found in image>", pc);
 }
 
 static int xscale_analyze_trace(struct target *target, struct command_context *cmd_ctx)
 {
-       struct xscale_common *xscale = target_to_xscale(target);
-       int next_pc_ok = 0;
-       uint32_t next_pc = 0x0;
-       struct xscale_trace_data *trace_data = xscale->trace.data;
-       int retval;
-
-       while (trace_data)
-       {
-               int i, chkpt;
-               int rollover;
-               int branch;
-               int exception;
-               xscale->trace.core_state = ARM_STATE_ARM;
-
-               chkpt = 0;
-               rollover = 0;
+   struct xscale_common *xscale = target_to_xscale(target);
+   struct xscale_trace_data *trace_data = xscale->trace.data;
+   int i, retval;
+   uint32_t breakpoint_pc;
+   struct arm_instruction instruction;
+   uint32_t current_pc = 0;  /* initialized when address determined */
+       
+   if (!xscale->trace.image)
+         LOG_WARNING("No trace image loaded; use 'xscale trace_image'");
+
+   /* loop for each trace buffer that was loaded from target */
+   while (trace_data)
+   {
+         int chkpt = 0;  /* incremented as checkpointed entries found */
+         int j;
+
+         /* FIXME: set this to correct mode when trace buffer is first enabled */
+         xscale->trace.core_state = ARM_STATE_ARM;
+
+         /* loop for each entry in this trace buffer */
+         for (i = 0; i < trace_data->depth; i++)
+         {
+                int exception = 0;
+                uint32_t chkpt_reg = 0x0;
+                uint32_t branch_target = 0;
+                int count;
+
+                /* trace entry type is upper nybble of 'message byte' */
+                int trace_msg_type = (trace_data->entries[i].data & 0xf0) >> 4;
+
+                /* Target addresses of indirect branches are written into buffer
+                 * before the message byte representing the branch. Skip past it */
+                if (trace_data->entries[i].type == XSCALE_TRACE_ADDRESS)
+                       continue;
 
-               for (i = 0; i < trace_data->depth; i++)
-               {
-                       next_pc_ok = 0;
-                       branch = 0;
-                       exception = 0;
+                switch (trace_msg_type)
+                {
+                       case 0:         /* Exceptions */
+                       case 1:
+                       case 2:
+                       case 3:
+                       case 4:
+                       case 5:
+                       case 6:
+                       case 7:
+                          exception = (trace_data->entries[i].data & 0x70) >> 4;
+
+                          /* FIXME: vector table may be at ffff0000 */
+                          branch_target = (trace_data->entries[i].data & 0xf0) >> 2;
+                          break;
+
+                       case 8:         /* Direct Branch */
+                          break;
+
+                       case 9:         /* Indirect Branch */
+                          xscale_branch_address(trace_data, i, &branch_target);
+                          break;
+
+                       case 13:           /* Checkpointed Indirect Branch */
+                          xscale_branch_address(trace_data, i, &branch_target);
+                          if ((trace_data->num_checkpoints == 2) && (chkpt == 0))
+                                 chkpt_reg = trace_data->chkpt1; /* 2 chkpts, this is oldest */
+                          else
+                                 chkpt_reg = trace_data->chkpt0; /* 1 chkpt, or 2 and newest */
+
+                          chkpt++;
+                          break;
+
+                       case 12:           /* Checkpointed Direct Branch */
+                          if ((trace_data->num_checkpoints == 2) && (chkpt == 0))
+                                 chkpt_reg = trace_data->chkpt1; /* 2 chkpts, this is oldest */
+                          else
+                                 chkpt_reg = trace_data->chkpt0; /* 1 chkpt, or 2 and newest */
+
+                          /* if no current_pc, checkpoint will be starting point */
+                          if (current_pc == 0)
+                                 branch_target = chkpt_reg;
+
+                          chkpt++;
+                          break;
+
+                       case 15:        /* Roll-over */
+                          break;
+
+                       default:        /* Reserved */
+                          LOG_WARNING("trace is suspect: invalid trace message byte");
+                          continue;
+
+                }
+
+                /* If we don't have the current_pc yet, but we did get the branch target
+                 * (either from the trace buffer on indirect branch, or from a checkpoint reg),
+                 * then we can start displaying instructions at the next iteration, with
+                 * branch_target as the starting point.
+                 */
+                if (current_pc == 0)
+                {
+                       current_pc = branch_target; /* remains 0 unless branch_target obtained */
+                       continue;
+                }
+
+                /* We have current_pc.  Read and display the instructions from the image.
+                 * First, display count instructions (lower nybble of message byte). */
+                count = trace_data->entries[i].data & 0x0f;
+                for (j = 0; j < count; j++)
+                {
+                       xscale_display_instruction(target, current_pc, &instruction, cmd_ctx);
+                       current_pc += xscale->trace.core_state == ARM_STATE_ARM ? 4 : 2;
+                }
+
+                /* An additional instruction is implicitly added to count for
+                 * rollover and some exceptions: undef, swi, prefetch abort. */
+                if ((trace_msg_type == 15) || (exception > 0 && exception < 4))
+                {
+                       xscale_display_instruction(target, current_pc, &instruction, cmd_ctx);
+                       current_pc += xscale->trace.core_state == ARM_STATE_ARM ? 4 : 2;
+                }
+
+                if (trace_msg_type == 15) /* rollover */
+                       continue;
 
-                       if (trace_data->entries[i].type == XSCALE_TRACE_ADDRESS)
-                               continue;
+                if (exception)
+                {
+                       command_print(cmd_ctx, "--- exception %i ---", exception);
+                       continue;
+                }
+                       
+                /* not exception or rollover; next instruction is a branch and is
+                 * not included in the count */
+                xscale_display_instruction(target, current_pc, &instruction, cmd_ctx);
+
+                /* for direct branches, extract branch destination from instruction */
+                if ((trace_msg_type == 8) || (trace_msg_type == 12))
+                {
+                       retval = xscale_read_instruction(target, current_pc, &instruction);
+                       if (retval == ERROR_OK)
+                          current_pc = instruction.info.b_bl_bx_blx.target_address;
+                       else
+                          current_pc = 0;      /* branch destination unknown */
 
-                       switch ((trace_data->entries[i].data & 0xf0) >> 4)
+                       /* direct branch w/ checkpoint; can also get from checkpoint reg */
+                       if (trace_msg_type == 12)
                        {
-                               case 0:         /* Exceptions */
-                               case 1:
-                               case 2:
-                               case 3:
-                               case 4:
-                               case 5:
-                               case 6:
-                               case 7:
-                                       exception = (trace_data->entries[i].data & 0x70) >> 4;
-                                       next_pc_ok = 1;
-                                       next_pc = (trace_data->entries[i].data & 0xf0) >> 2;
-                                       command_print(cmd_ctx, "--- exception %i ---", (trace_data->entries[i].data & 0xf0) >> 4);
-                                       break;
-                               case 8:         /* Direct Branch */
-                                       branch = 1;
-                                       break;
-                               case 9:         /* Indirect Branch */
-                                       branch = 1;
-                                       if (xscale_branch_address(trace_data, i, &next_pc) == 0)
-                                       {
-                                               next_pc_ok = 1;
-                                       }
-                                       break;
-                               case 13:        /* Checkpointed Indirect Branch */
-                                       if (xscale_branch_address(trace_data, i, &next_pc) == 0)
-                                       {
-                                               next_pc_ok = 1;
-                                               if (((chkpt == 0) && (next_pc != trace_data->chkpt0))
-                                                       || ((chkpt == 1) && (next_pc != trace_data->chkpt1)))
-                                                       LOG_WARNING("checkpointed indirect branch target address doesn't match checkpoint");
-                                       }
-                                       /* explicit fall-through */
-                               case 12:        /* Checkpointed Direct Branch */
-                                       branch = 1;
-                                       if (chkpt == 0)
-                                       {
-                                               next_pc_ok = 1;
-                                               next_pc = trace_data->chkpt0;
-                                               chkpt++;
-                                       }
-                                       else if (chkpt == 1)
-                                       {
-                                               next_pc_ok = 1;
-                                               next_pc = trace_data->chkpt0;
-                                               chkpt++;
-                                       }
-                                       else
-                                       {
-                                               LOG_WARNING("more than two checkpointed branches encountered");
-                                       }
-                                       break;
-                               case 15:        /* Roll-over */
-                                       rollover++;
-                                       continue;
-                               default:        /* Reserved */
-                                       command_print(cmd_ctx, "--- reserved trace message ---");
-                                       LOG_ERROR("BUG: trace message %i is reserved", (trace_data->entries[i].data & 0xf0) >> 4);
-                                       return ERROR_OK;
+                          if (current_pc == 0)
+                                 current_pc = chkpt_reg;
+                          else if (current_pc != chkpt_reg)  /* sanity check */
+                                 LOG_WARNING("trace is suspect: checkpoint register "
+                                                         "inconsistent with adddress from image");
                        }
 
-                       if (xscale->trace.pc_ok)
-                       {
-                               int executed = (trace_data->entries[i].data & 0xf) + rollover * 16;
-                               struct arm_instruction instruction;
+                       if (current_pc == 0)
+                          command_print(cmd_ctx, "address unknown");
 
-                               if ((exception == 6) || (exception == 7))
-                               {
-                                       /* IRQ or FIQ exception, no instruction executed */
-                                       executed -= 1;
-                               }
+                       continue;
+                }
 
-                               while (executed-- >= 0)
-                               {
-                                       if ((retval = xscale_read_instruction(target, &instruction)) != ERROR_OK)
-                                       {
-                                               /* can't continue tracing with no image available */
-                                               if (retval == ERROR_TRACE_IMAGE_UNAVAILABLE)
-                                               {
-                                                       return retval;
-                                               }
-                                               else if (retval == ERROR_TRACE_INSTRUCTION_UNAVAILABLE)
-                                               {
-                                                       /* TODO: handle incomplete images */
-                                               }
-                                       }
-
-                                       /* a precise abort on a load to the PC is included in the incremental
-                                        * word count, other instructions causing data aborts are not included
-                                        */
-                                       if ((executed == 0) && (exception == 4)
-                                               && ((instruction.type >= ARM_LDR) && (instruction.type <= ARM_LDM)))
-                                       {
-                                               if ((instruction.type == ARM_LDM)
-                                                       && ((instruction.info.load_store_multiple.register_list & 0x8000) == 0))
-                                               {
-                                                       executed--;
-                                               }
-                                               else if (((instruction.type >= ARM_LDR) && (instruction.type <= ARM_LDRSH))
-                                                       && (instruction.info.load_store.Rd != 15))
-                                               {
-                                                       executed--;
-                                               }
-                                       }
-
-                                       /* only the last instruction executed
-                                        * (the one that caused the control flow change)
-                                        * could be a taken branch
-                                        */
-                                       if (((executed == -1) && (branch == 1)) &&
-                                               (((instruction.type == ARM_B) ||
-                                                       (instruction.type == ARM_BL) ||
-                                                       (instruction.type == ARM_BLX)) &&
-                                                       (instruction.info.b_bl_bx_blx.target_address != 0xffffffff)))
-                                       {
-                                               xscale->trace.current_pc = instruction.info.b_bl_bx_blx.target_address;
-                                       }
-                                       else
-                                       {
-                                               xscale->trace.current_pc += (xscale->trace.core_state == ARM_STATE_ARM) ? 4 : 2;
-                                       }
-                                       command_print(cmd_ctx, "%s", instruction.text);
-                               }
+                /* indirect branch; the branch destination was read from trace buffer */
+                if ((trace_msg_type == 9) || (trace_msg_type == 13))
+                {
+                       current_pc = branch_target;
 
-                               rollover = 0;
-                       }
+                       /* sanity check (checkpoint reg is redundant) */
+                       if ((trace_msg_type == 13) && (chkpt_reg != branch_target))
+                          LOG_WARNING("trace is suspect: checkpoint register "
+                                                  "inconsistent with address from trace buffer");
+                }
 
-                       if (next_pc_ok)
-                       {
-                               xscale->trace.current_pc = next_pc;
-                               xscale->trace.pc_ok = 1;
-                       }
-               }
+         } /* END: for (i = 0; i < trace_data->depth; i++) */
 
-               for (; xscale->trace.current_pc < trace_data->last_instruction; xscale->trace.current_pc += (xscale->trace.core_state == ARM_STATE_ARM) ? 4 : 2)
-               {
-                       struct arm_instruction instruction;
-                       if ((retval = xscale_read_instruction(target, &instruction)) != ERROR_OK)
-                       {
-                               /* can't continue tracing with no image available */
-                               if (retval == ERROR_TRACE_IMAGE_UNAVAILABLE)
-                               {
-                                       return retval;
-                               }
-                               else if (retval == ERROR_TRACE_INSTRUCTION_UNAVAILABLE)
-                               {
-                                       /* TODO: handle incomplete images */
-                               }
-                       }
-                       command_print(cmd_ctx, "%s", instruction.text);
-               }
+         breakpoint_pc = trace_data->last_instruction; /* used below */
+         trace_data = trace_data->next;
 
-               trace_data = trace_data->next;
-       }
+   } /* END: while (trace_data) */
 
-       return ERROR_OK;
+   /* Finally... display all instructions up to the value of the pc when the
+       * debug break occurred (saved when trace data was collected from target).
+       * This is necessary because the trace only records execution branches and 16
+       * consecutive instructions (rollovers), so last few typically missed.
+       */
+   if (current_pc == 0)
+         return ERROR_OK;   /* current_pc was never found */
+
+   /* how many instructions remaining? */
+   int gap_count = (breakpoint_pc - current_pc) /
+         (xscale->trace.core_state == ARM_STATE_ARM ? 4 : 2);
+
+   /* should never be negative or over 16, but verify */
+   if (gap_count < 0 || gap_count > 16)
+   {
+         LOG_WARNING("trace is suspect: excessive gap at end of trace");
+         return ERROR_OK;  /* bail; large number or negative value no good */
+   }
+
+   /* display remaining instructions */
+   for (i = 0; i < gap_count; i++)
+   {
+         xscale_display_instruction(target, current_pc, &instruction, cmd_ctx);
+         current_pc += xscale->trace.core_state == ARM_STATE_ARM ? 4 : 2;
+   }
+
+   return ERROR_OK;
 }
 
 static const struct reg_arch_type xscale_reg_type = {
@@ -2981,11 +3160,11 @@ static int xscale_init_arch_info(struct target *target,
 
        xscale->vector_catch = 0x1;
 
-       xscale->trace.capture_status = TRACE_IDLE;
        xscale->trace.data = NULL;
        xscale->trace.image = NULL;
-       xscale->trace.buffer_enabled = 0;
+       xscale->trace.mode = XSCALE_TRACE_DISABLED;
        xscale->trace.buffer_fill = 0;
+       xscale->trace.fill_counter = 0;
 
        /* prepare ARMv4/5 specific information */
        armv4_5->arch_info = xscale;
@@ -3033,8 +3212,7 @@ COMMAND_HANDLER(xscale_handle_debug_handler_command)
 
        if (CMD_ARGC < 2)
        {
-               LOG_ERROR("'xscale debug_handler <target#> <address>' command takes two required operands");
-               return ERROR_OK;
+               return ERROR_COMMAND_SYNTAX_ERROR;
        }
 
        if ((target = get_target(CMD_ARGV[0])) == NULL)
@@ -3118,21 +3296,18 @@ static int xscale_virt2phys(struct target *target,
                uint32_t virtual, uint32_t *physical)
 {
        struct xscale_common *xscale = target_to_xscale(target);
-       int type;
        uint32_t cb;
-       int domain;
-       uint32_t ap;
 
        if (xscale->common_magic != XSCALE_COMMON_MAGIC) {
                LOG_ERROR(xscale_not);
                return ERROR_TARGET_INVALID;
        }
 
-       uint32_t ret = armv4_5_mmu_translate_va(target, &xscale->armv4_5_mmu, virtual, &type, &cb, &domain, &ap);
-       if (type == -1)
-       {
-               return ret;
-       }
+       uint32_t ret;
+       int retval = armv4_5_mmu_translate_va(target, &xscale->armv4_5_mmu,
+                       virtual, &cb, &ret);
+       if (retval != ERROR_OK)
+               return retval;
        *physical = ret;
        return ERROR_OK;
 }
@@ -3204,14 +3379,19 @@ COMMAND_HANDLER(xscale_handle_idcache_command)
        {
                bool enable;
                COMMAND_PARSE_ENABLE(CMD_ARGV[0], enable);
-               if (enable)
-                       xscale_enable_mmu_caches(target, 1, 0, 0);
-               else
-                       xscale_disable_mmu_caches(target, 1, 0, 0);
-               if (icache)
+               if (icache) {
                        xscale->armv4_5_mmu.armv4_5_cache.i_cache_enabled = enable;
-               else
+                       if (enable)
+                               xscale_enable_mmu_caches(target, 0, 0, 1);
+                       else
+                               xscale_disable_mmu_caches(target, 0, 0, 1);
+               } else {
                        xscale->armv4_5_mmu.armv4_5_cache.d_u_cache_enabled = enable;
+                       if (enable)
+                               xscale_enable_mmu_caches(target, 0, 1, 0);
+                       else
+                               xscale_disable_mmu_caches(target, 0, 1, 0);
+               }
        }
 
        bool enabled = icache ?
@@ -3235,7 +3415,7 @@ COMMAND_HANDLER(xscale_handle_vector_catch_command)
 
        if (CMD_ARGC < 1)
        {
-               command_print(CMD_CTX, "usage: xscale vector_catch [mask]");
+           return ERROR_COMMAND_SYNTAX_ERROR;
        }
        else
        {
@@ -3302,7 +3482,7 @@ COMMAND_HANDLER(xscale_handle_vector_table_command)
        }
 
        if (err)
-               command_print(CMD_CTX, "usage: xscale vector_table <high|low> <index> <code>");
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
        return ERROR_OK;
 }
@@ -3312,7 +3492,6 @@ COMMAND_HANDLER(xscale_handle_trace_buffer_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        struct xscale_common *xscale = target_to_xscale(target);
-       struct arm *armv4_5 = &xscale->armv4_5_common;
        uint32_t dcsr_value;
        int retval;
 
@@ -3326,61 +3505,54 @@ COMMAND_HANDLER(xscale_handle_trace_buffer_command)
                return ERROR_OK;
        }
 
-       if ((CMD_ARGC >= 1) && (strcmp("enable", CMD_ARGV[0]) == 0))
-       {
-               struct xscale_trace_data *td, *next_td;
-               xscale->trace.buffer_enabled = 1;
-
-               /* free old trace data */
-               td = xscale->trace.data;
-               while (td)
-               {
-                       next_td = td->next;
-
-                       if (td->entries)
-                               free(td->entries);
-                       free(td);
-                       td = next_td;
-               }
-               xscale->trace.data = NULL;
-       }
-       else if ((CMD_ARGC >= 1) && (strcmp("disable", CMD_ARGV[0]) == 0))
-       {
-               xscale->trace.buffer_enabled = 0;
-       }
-
-       if ((CMD_ARGC >= 2) && (strcmp("fill", CMD_ARGV[1]) == 0))
-       {
-               uint32_t fill = 1;
-               if (CMD_ARGC >= 3)
-                       COMMAND_PARSE_NUMBER(u32, CMD_ARGV[2], fill);
-               xscale->trace.buffer_fill = fill;
-       }
-       else if ((CMD_ARGC >= 2) && (strcmp("wrap", CMD_ARGV[1]) == 0))
-       {
-               xscale->trace.buffer_fill = -1;
-       }
-
-       if (xscale->trace.buffer_enabled)
+       if (CMD_ARGC >= 1)
        {
-               /* if we enable the trace buffer in fill-once
-                * mode we know the address of the first instruction */
-               xscale->trace.pc_ok = 1;
-               xscale->trace.current_pc =
-                               buf_get_u32(armv4_5->pc->value, 0, 32);
+          if (strcmp("enable", CMD_ARGV[0]) == 0)
+                 xscale->trace.mode = XSCALE_TRACE_WRAP; /* default */
+          else if (strcmp("disable", CMD_ARGV[0]) == 0)
+                 xscale->trace.mode = XSCALE_TRACE_DISABLED;
+          else
+                 return ERROR_COMMAND_SYNTAX_ERROR;
+       }
+
+       if (CMD_ARGC >= 2 && xscale->trace.mode != XSCALE_TRACE_DISABLED)
+       {
+          if (strcmp("fill", CMD_ARGV[1]) == 0)
+          {
+                 int buffcount = 1;                    /* default */
+                 if (CMD_ARGC >= 3)
+                        COMMAND_PARSE_NUMBER(int, CMD_ARGV[2], buffcount);
+                 if (buffcount < 1)                    /* invalid */
+                 {
+                        command_print(CMD_CTX, "fill buffer count must be > 0");
+                        xscale->trace.mode = XSCALE_TRACE_DISABLED;
+                        return ERROR_COMMAND_SYNTAX_ERROR;
+                 }
+                 xscale->trace.buffer_fill = buffcount;
+                 xscale->trace.mode = XSCALE_TRACE_FILL;
+          }
+          else if (strcmp("wrap", CMD_ARGV[1]) == 0)
+                 xscale->trace.mode = XSCALE_TRACE_WRAP;
+          else
+          {
+                 xscale->trace.mode = XSCALE_TRACE_DISABLED;
+                 return ERROR_COMMAND_SYNTAX_ERROR;
+          }
+       }
+       
+       if (xscale->trace.mode != XSCALE_TRACE_DISABLED)
+       {
+          char fill_string[12];
+          sprintf(fill_string, "fill %" PRId32, xscale->trace.buffer_fill); 
+          command_print(CMD_CTX, "trace buffer enabled (%s)",
+                                        (xscale->trace.mode == XSCALE_TRACE_FILL)
+                                        ? fill_string : "wrap");
        }
        else
-       {
-               /* otherwise the address is unknown, and we have no known good PC */
-               xscale->trace.pc_ok = 0;
-       }
-
-       command_print(CMD_CTX, "trace buffer %s (%s)",
-               (xscale->trace.buffer_enabled) ? "enabled" : "disabled",
-               (xscale->trace.buffer_fill > 0) ? "fill" : "wrap");
-
+          command_print(CMD_CTX, "trace buffer disabled");
+          
        dcsr_value = buf_get_u32(xscale->reg_cache->reg_list[XSCALE_DCSR].value, 0, 32);
-       if (xscale->trace.buffer_fill >= 0)
+       if (xscale->trace.mode == XSCALE_TRACE_FILL)
                xscale_write_dcsr_sw(target, (dcsr_value & 0xfffffffc) | 2);
        else
                xscale_write_dcsr_sw(target, dcsr_value & 0xfffffffc);
@@ -3396,8 +3568,7 @@ COMMAND_HANDLER(xscale_handle_trace_image_command)
 
        if (CMD_ARGC < 1)
        {
-               command_print(CMD_CTX, "usage: xscale trace_image <file> [base address] [type]");
-               return ERROR_OK;
+               return ERROR_COMMAND_SYNTAX_ERROR;
        }
 
        retval = xscale_verify_pointer(CMD_CTX, xscale);
@@ -3456,8 +3627,7 @@ COMMAND_HANDLER(xscale_handle_dump_trace_command)
 
        if (CMD_ARGC < 1)
        {
-               command_print(CMD_CTX, "usage: xscale dump_trace <file>");
-               return ERROR_OK;
+               return ERROR_COMMAND_SYNTAX_ERROR;
        }
 
        trace_data = xscale->trace.data;
@@ -3557,7 +3727,7 @@ COMMAND_HANDLER(xscale_handle_cp15)
                        break;
                default:
                        command_print(CMD_CTX, "invalid register number");
-                       return ERROR_INVALID_ARGUMENTS;
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
                reg = &xscale->reg_cache->reg_list[reg_no];
 
@@ -3590,7 +3760,7 @@ COMMAND_HANDLER(xscale_handle_cp15)
        }
        else
        {
-               command_print(CMD_CTX, "usage: cp15 [register]<, [value]>");
+           return ERROR_COMMAND_SYNTAX_ERROR;
        }
 
        return ERROR_OK;
@@ -3648,7 +3818,7 @@ static const struct command_registration xscale_exec_command_handlers[] = {
                .mode = COMMAND_EXEC,
                .help = "display trace buffer status, enable or disable "
                        "tracing, and optionally reconfigure trace mode",
-               .usage = "['enable'|'disable' ['fill' number|'wrap']]",
+               .usage = "['enable'|'disable' ['fill' [number]|'wrap']]",
        },
        {
                .name = "dump_trace",
@@ -3686,7 +3856,7 @@ static const struct command_registration xscale_any_command_handlers[] = {
                .handler = xscale_handle_debug_handler_command,
                .mode = COMMAND_ANY,
                .help = "Change address used for debug handler.",
-               .usage = "target address",
+               .usage = "<target> <address>",
        },
        {
                .name = "cache_clean_address",