target/cortex_m: faster reading of all CPU registers
authorTomas Vanek <vanekt@fbl.cz>
Tue, 5 Feb 2019 08:39:11 +0000 (09:39 +0100)
committerTomas Vanek <vanekt@fbl.cz>
Thu, 18 Nov 2021 21:09:34 +0000 (21:09 +0000)
Without the change cortex_m_debug_entry() reads all registers
calling cortex_m_load_core_reg_u32() for each register with
a poor usage of JTAG/SWD queue.
It is time consuming, especially on an USB FS based adapter.
Moreover if target_request debugmsgs are enabled, DCB_DCRDR
is saved and restored on each register read.

This change introduces cortex_m_fast_read_all_regs()
which queues all register reads and a single dap_run() transaction
does all work.

cortex_m_fast_read_all_regs() reads all registers unconditionally
regardless register cache is valid or not. This is a difference
from the original cortex_m_debug_entry() code.

cortex_m_debug_entry times from -d3 log, Cortex-M4F and CMSIS-DAP
(Kinetis K28F-FRDM kit)

target_request |                time [ms]
debugmsgs      | without the change | with the change
---------------+--------------------+-----------------
disable        |        186         |       27
enable         |        232         |       29

Added checking of DHCSR.S_REGRDY flag. If "not ready" is seen,
cortex_m->slow_register_read is set and fallback to the old
register read method cortex_m_slow_read_all_regs() is used
instead of cortex_m_fast_read_all_regs().

Change-Id: I0665d94b97ede217394640871dc451ec93410254
Signed-off-by: Tomas Vanek <vanekt@fbl.cz>
Reviewed-on: https://review.openocd.org/c/openocd/+/5321
Tested-by: jenkins
Reviewed-by: Andreas Fritiofson <andreas.fritiofson@gmail.com>
Reviewed-by: Antonio Borneo <borneo.antonio@gmail.com>
src/target/armv7m.c
src/target/armv7m.h
src/target/cortex_m.c
src/target/cortex_m.h

index ffc8ca875db0675a477fd11142d8ec207efaf41a..0a51ad4d60c3c2b69d0700a5ac0e1c41aef46133 100644 (file)
@@ -251,7 +251,7 @@ static int armv7m_set_core_reg(struct reg *reg, uint8_t *buf)
        return ERROR_OK;
 }
 
-static uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id)
+uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id)
 {
        switch (arm_reg_id) {
        case ARMV7M_R0 ... ARMV7M_R14:
@@ -289,7 +289,7 @@ static uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id)
        }
 }
 
-static bool armv7m_map_reg_packing(unsigned int arm_reg_id,
+bool armv7m_map_reg_packing(unsigned int arm_reg_id,
                                        unsigned int *reg32_id, uint32_t *offset)
 {
 
index 2816a91452404d3c63b22bdcd079932dc3cfb6d3..d33e574928b6e071c18df1645965210b0ed41f1f 100644 (file)
@@ -309,6 +309,11 @@ int armv7m_invalidate_core_regs(struct target *target);
 
 int armv7m_restore_context(struct target *target);
 
+uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id);
+
+bool armv7m_map_reg_packing(unsigned int arm_reg_id,
+               unsigned int *reg32_id, uint32_t *offset);
+
 int armv7m_checksum_memory(struct target *target,
                target_addr_t address, uint32_t count, uint32_t *checksum);
 int armv7m_blank_check_memory(struct target *target,
index ba7bc17b55c0c0a79b6c3c42efa800409139206c..82d5eff80055c7a5ccd5b54a15d452436454a8a9 100644 (file)
@@ -202,6 +202,163 @@ static int cortex_m_load_core_reg_u32(struct target *target,
        return retval;
 }
 
+static int cortex_m_slow_read_all_regs(struct target *target)
+{
+       struct armv7m_common *armv7m = target_to_armv7m(target);
+       const unsigned int num_regs = armv7m->arm.core_cache->num_regs;
+
+       for (unsigned int reg_id = 0; reg_id < num_regs; reg_id++) {
+               struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id];
+               if (r->exist) {
+                       int retval = armv7m->arm.read_core_reg(target, r, reg_id, ARM_MODE_ANY);
+                       if (retval != ERROR_OK)
+                               return retval;
+               }
+       }
+       return ERROR_OK;
+}
+
+static int cortex_m_queue_reg_read(struct target *target, uint32_t regsel,
+               uint32_t *reg_value, uint32_t *dhcsr)
+{
+       struct armv7m_common *armv7m = target_to_armv7m(target);
+       int retval;
+
+       retval = mem_ap_write_u32(armv7m->debug_ap, DCB_DCRSR, regsel);
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = mem_ap_read_u32(armv7m->debug_ap, DCB_DHCSR, dhcsr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return mem_ap_read_u32(armv7m->debug_ap, DCB_DCRDR, reg_value);
+}
+
+static int cortex_m_fast_read_all_regs(struct target *target)
+{
+       struct cortex_m_common *cortex_m = target_to_cm(target);
+       struct armv7m_common *armv7m = target_to_armv7m(target);
+       int retval;
+       uint32_t dcrdr;
+
+       /* because the DCB_DCRDR is used for the emulated dcc channel
+        * we have to save/restore the DCB_DCRDR when used */
+       if (target->dbg_msg_enabled) {
+               retval = mem_ap_read_u32(armv7m->debug_ap, DCB_DCRDR, &dcrdr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       const unsigned int num_regs = armv7m->arm.core_cache->num_regs;
+       const unsigned int n_r32 = ARMV7M_LAST_REG - ARMV7M_CORE_FIRST_REG + 1
+                                                          + ARMV7M_FPU_LAST_REG - ARMV7M_FPU_FIRST_REG + 1;
+       /* we need one 32-bit word for each register except FP D0..D15, which
+        * need two words */
+       uint32_t r_vals[n_r32];
+       uint32_t dhcsr[n_r32];
+
+       unsigned int wi = 0; /* write index to r_vals and dhcsr arrays */
+       unsigned int reg_id; /* register index in the reg_list, ARMV7M_R0... */
+       for (reg_id = 0; reg_id < num_regs; reg_id++) {
+               struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id];
+               if (!r->exist)
+                       continue;       /* skip non existent registers */
+
+               if (r->size <= 8) {
+                       /* Any 8-bit or shorter register is unpacked from a 32-bit
+                        * container register. Skip it now. */
+                       continue;
+               }
+
+               uint32_t regsel = armv7m_map_id_to_regsel(reg_id);
+               retval = cortex_m_queue_reg_read(target, regsel, &r_vals[wi],
+                                                                                &dhcsr[wi]);
+               if (retval != ERROR_OK)
+                       return retval;
+               wi++;
+
+               assert(r->size == 32 || r->size == 64);
+               if (r->size == 32)
+                       continue;       /* done with 32-bit register */
+
+               assert(reg_id >= ARMV7M_FPU_FIRST_REG && reg_id <= ARMV7M_FPU_LAST_REG);
+               /* the odd part of FP register (S1, S3...) */
+               retval = cortex_m_queue_reg_read(target, regsel + 1, &r_vals[wi],
+                                                                                        &dhcsr[wi]);
+               if (retval != ERROR_OK)
+                       return retval;
+               wi++;
+       }
+
+       assert(wi <= n_r32);
+
+       retval = dap_run(armv7m->debug_ap->dap);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->dbg_msg_enabled) {
+               /* restore DCB_DCRDR - this needs to be in a separate
+                * transaction otherwise the emulated DCC channel breaks */
+               retval = mem_ap_write_atomic_u32(armv7m->debug_ap, DCB_DCRDR, dcrdr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       bool not_ready = false;
+       for (unsigned int i = 0; i < wi; i++) {
+               if ((dhcsr[i] & S_REGRDY) == 0) {
+                       not_ready = true;
+                       LOG_DEBUG("Register %u was not ready during fast read", i);
+               }
+               cortex_m_cumulate_dhcsr_sticky(cortex_m, dhcsr[i]);
+       }
+
+       if (not_ready) {
+               /* Any register was not ready,
+                * fall back to slow read with S_REGRDY polling */
+               return ERROR_TIMEOUT_REACHED;
+       }
+
+       LOG_DEBUG("read %u 32-bit registers", wi);
+
+       unsigned int ri = 0; /* read index from r_vals array */
+       for (reg_id = 0; reg_id < num_regs; reg_id++) {
+               struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id];
+               if (!r->exist)
+                       continue;       /* skip non existent registers */
+
+               r->dirty = false;
+
+               unsigned int reg32_id;
+               uint32_t offset;
+               if (armv7m_map_reg_packing(reg_id, &reg32_id, &offset)) {
+                       /* Unpack a partial register from 32-bit container register */
+                       struct reg *r32 = &armv7m->arm.core_cache->reg_list[reg32_id];
+
+                       /* The container register ought to precede all regs unpacked
+                        * from it in the reg_list. So the value should be ready
+                        * to unpack */
+                       assert(r32->valid);
+                       buf_cpy(r32->value + offset, r->value, r->size);
+
+               } else {
+                       assert(r->size == 32 || r->size == 64);
+                       buf_set_u32(r->value, 0, 32, r_vals[ri++]);
+
+                       if (r->size == 64) {
+                               assert(reg_id >= ARMV7M_FPU_FIRST_REG && reg_id <= ARMV7M_FPU_LAST_REG);
+                               /* the odd part of FP register (S1, S3...) */
+                               buf_set_u32(r->value + 4, 0, 32, r_vals[ri++]);
+                       }
+               }
+               r->valid = true;
+       }
+       assert(ri == wi);
+
+       return retval;
+}
+
 static int cortex_m_store_core_reg_u32(struct target *target,
                uint32_t regsel, uint32_t value)
 {
@@ -610,7 +767,6 @@ static int cortex_m_examine_exception_reason(struct target *target)
 
 static int cortex_m_debug_entry(struct target *target)
 {
-       int i;
        uint32_t xPSR;
        int retval;
        struct cortex_m_common *cortex_m = target_to_cm(target);
@@ -646,16 +802,21 @@ static int cortex_m_debug_entry(struct target *target)
                secure_state = (dscsr & DSCSR_CDS) == DSCSR_CDS;
        }
 
-       /* Examine target state and mode
-        * First load register accessible through core debug port */
-       int num_regs = arm->core_cache->num_regs;
-
-       for (i = 0; i < num_regs; i++) {
-               r = &armv7m->arm.core_cache->reg_list[i];
-               if (r->exist && !r->valid)
-                       arm->read_core_reg(target, r, i, ARM_MODE_ANY);
+       /* Load all registers to arm.core_cache */
+       if (!cortex_m->slow_register_read) {
+               retval = cortex_m_fast_read_all_regs(target);
+               if (retval == ERROR_TIMEOUT_REACHED) {
+                       cortex_m->slow_register_read = true;
+                       LOG_DEBUG("Switched to slow register read");
+               }
        }
 
+       if (cortex_m->slow_register_read)
+               retval = cortex_m_slow_read_all_regs(target);
+
+       if (retval != ERROR_OK)
+               return retval;
+
        r = arm->cpsr;
        xPSR = buf_get_u32(r->value, 0, 32);
 
index a5dfbf856aef0aea68f5922373622d3881b13a56..57ef1e7e84a5bfd97ae1a775527069defec2f4a6 100644 (file)
@@ -238,6 +238,8 @@ struct cortex_m_common {
        const struct cortex_m_part_info *core_info;
        struct armv7m_common armv7m;
 
+       bool slow_register_read;        /* A register has not been ready, poll S_REGRDY */
+
        int apsel;
 
        /* Whether this target has the erratum that makes C_MASKINTS not apply to