aarch64: improve debug output
[fw/openocd] / src / target / aarch64.c
index e215206b468293ef1f42886d2b4ff9964360b0ec..f0ac32c12a357c87a192b9041f22d771d5c04807 100644 (file)
@@ -27,6 +27,7 @@
 #include "target_request.h"
 #include "target_type.h"
 #include "armv8_opcodes.h"
+#include "armv8_cache.h"
 #include <helper/time_support.h>
 
 static int aarch64_poll(struct target *target);
@@ -45,8 +46,6 @@ static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
 static int aarch64_read_apb_ap_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data);
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
@@ -89,7 +88,10 @@ static int aarch64_restore_system_control_reg(struct target *target)
                                        return retval;
                        break;
                        default:
-                               LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
+                               retval = armv8->arm.mcr(target, 15, 0, 0, 1, 0, aarch64->system_control_reg);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                               break;
                        }
        }
        return retval;
@@ -211,19 +213,6 @@ static int aarch64_init_debug_access(struct target *target)
 
        LOG_DEBUG(" ");
 
-       /* Unlocking the debug registers for modification
-        * The debugport might be uninitialised so try twice */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               /* try again */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-                       LOG_USER("Locking debug access failed on first, but succeeded on second try.");
-       }
-       if (retval != ERROR_OK)
-               return retval;
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -231,67 +220,35 @@ static int aarch64_init_debug_access(struct target *target)
        if (retval != ERROR_OK)
                return retval;
 
-       /* Enabling of instruction execution in debug mode is done in debug_entry code */
-
-       /* Resync breakpoint registers */
-
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
-}
-
-/* To reduce needless round-trips, pass in a pointer to the current
- * DSCR value.  Initialize it to zero if you just need to know the
- * value on return from this function; or DSCR_ITE if you
- * happen to know that no instruction is pending.
- */
-static int aarch64_exec_opcode(struct target *target,
-       uint32_t opcode, uint32_t *dscr_p)
-{
-       uint32_t dscr;
-       int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       dscr = dscr_p ? *dscr_p : 0;
-
-       LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-
-       /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_ITE) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       /*
+        * Static CTI configuration:
+        * Channel 0 -> trigger outputs HALT request to PE
+        * Channel 1 -> trigger outputs Resume request to PE
+        * Gate all channel trigger events from entering the CTM
+        */
 
-       retval = mem_ap_write_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_ITR, opcode);
+       /* Enable CTI */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->cti_base + CTI_CTR, 1);
+       /* By default, gate all channel triggers to and from the CTM */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 trigger */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 trigger */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
-       do {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register");
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       } while ((dscr & DSCR_ITE) == 0);       /* Wait for InstrCompl bit to be set */
-
-       if (dscr_p)
-               *dscr_p = dscr;
+       /* Resync breakpoint registers */
 
-       return retval;
+       /* Since this is likely called from init or reset, update target state information*/
+       return aarch64_poll(target);
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -307,463 +264,43 @@ static int aarch64_dap_write_memap_register_u32(struct target *target,
        return retval;
 }
 
-/*
- * AARCH64 implementation of Debug Programmer's Model
- *
- * NOTE the invariant:  these routines return with DSCR_ITE set,
- * so there's no need to poll for it before executing an instruction.
- *
- * NOTE that in several of these cases the "stall" mode might be useful.
- * It'd let us queue a few operations together... prepare/finish might
- * be the places to enable/disable that mode.
- */
-
-static inline struct aarch64_common *dpm_to_a8(struct arm_dpm *dpm)
-{
-       return container_of(dpm, struct aarch64_common, armv8_common.dpm);
-}
-
-static int aarch64_write_dcc(struct armv8_common *armv8, uint32_t data)
-{
-       LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
-}
-
-static int aarch64_write_dcc_64(struct armv8_common *armv8, uint64_t data)
-{
-       int ret;
-       LOG_DEBUG("write DCC Low word0x%08" PRIx32, (unsigned)data);
-       LOG_DEBUG("write DCC High word 0x%08" PRIx32, (unsigned)(data >> 32));
-       ret = mem_ap_write_u32(armv8->debug_ap,
-                              armv8->debug_base + CPUV8_DBG_DTRRX, data);
-       ret += mem_ap_write_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, data >> 32);
-       return ret;
-}
-
-static int aarch64_read_dcc(struct armv8_common *armv8, uint32_t *data,
-       uint32_t *dscr_p)
-{
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           data);
-       if (retval != ERROR_OK)
-               return retval;
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
-
-       if (dscr_p)
-               *dscr_p = dscr;
-
-       return retval;
-}
-
-static int aarch64_read_dcc_64(struct armv8_common *armv8, uint64_t *data,
-       uint32_t *dscr_p)
+static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
 {
-       uint32_t dscr = DSCR_ITE;
-       uint32_t higher;
+       struct arm_dpm *dpm = &a8->armv8_common.dpm;
        int retval;
 
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRTX,
-                                           (uint32_t *)data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                           armv8->debug_base + CPUV8_DBG_DTRRX,
-                                           &higher);
-       if (retval != ERROR_OK)
-               return retval;
-
-       *data = *(uint32_t *)data | (uint64_t)higher << 32;
-       LOG_DEBUG("read DCC 0x%16.16" PRIx64, *data);
+       dpm->arm = &a8->armv8_common.arm;
+       dpm->didr = debug;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       retval = armv8_dpm_setup(dpm);
+       if (retval == ERROR_OK)
+               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
 
-static int aarch64_dpm_prepare(struct arm_dpm *dpm)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
+       struct armv8_common *armv8 = target_to_armv8(target);
        uint32_t dscr;
-       int retval;
-
-       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUV8_DBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_ITE) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for dpm prepare");
-                       return ERROR_FAIL;
-               }
-       }
 
-       /* this "should never happen" ... */
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX */
-               retval = mem_ap_read_u32(a8->armv8_common.debug_ap,
-                       a8->armv8_common.debug_base + CPUV8_DBG_DTRRX, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-
-               /* Clear sticky error */
-               retval = mem_ap_write_u32(a8->armv8_common.debug_ap,
-                       a8->armv8_common.debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
-       return retval;
-}
-
-static int aarch64_dpm_finish(struct arm_dpm *dpm)
-{
-       /* REVISIT what could be done here? */
-       return ERROR_OK;
-}
-
-static int aarch64_instr_execute(struct arm_dpm *dpm,
-       uint32_t opcode)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       retval = aarch64_write_dcc(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 0),
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_write_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       retval = aarch64_write_dcc_64(&a8->armv8_common, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0),
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_cpsr_sync(struct arm_dpm *dpm)
-{
-       struct target *target = dpm->arm->target;
-       uint32_t dscr = DSCR_ITE;
-
-       /* "Prefetch flush" after modifying execution status in CPSR */
-       return aarch64_exec_opcode(target,
-                       DSB_SY,
-                       &dscr);
-}
-
-static int aarch64_instr_read_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_ITE;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 0),  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_ITE;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0),  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
+       /* Read DSCR */
+       int retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (ERROR_OK != retval)
                return retval;
 
-       return aarch64_read_dcc_64(&a8->armv8_common, data, &dscr);
-}
-
-static int aarch64_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-       uint32_t addr, uint32_t control)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv8_common.debug_base;
-       uint32_t cr = a8->armv8_common.debug_base;
-       int retval;
+       /* clear bitfield */
+       dscr &= ~bit_mask;
+       /* put new value */
+       dscr |= value & bit_mask;
 
-       switch (index_t) {
-               case 0 ... 15:  /* breakpoints */
-                       vr += CPUV8_DBG_BVR_BASE;
-                       cr += CPUV8_DBG_BCR_BASE;
-                       break;
-               case 16 ... 31: /* watchpoints */
-                       vr += CPUV8_DBG_WVR_BASE;
-                       cr += CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       vr += 16 * index_t;
-       cr += 16 * index_t;
-
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-               (unsigned) vr, (unsigned) cr);
-
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       vr, addr);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       cr, control);
+       /* write new DSCR */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
        return retval;
 }
 
-static int aarch64_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
-{
-       struct aarch64_common *a = dpm_to_a8(dpm);
-       uint32_t cr;
-
-       switch (index_t) {
-               case 0 ... 15:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_BCR_BASE;
-                       break;
-               case 16 ... 31:
-                       cr = a->armv8_common.debug_base + CPUV8_DBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       cr += 16 * index_t;
-
-       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
-
-       /* clear control register */
-       return aarch64_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
-
-}
-
-static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
-{
-       struct arm_dpm *dpm = &a8->armv8_common.dpm;
-       int retval;
-
-       dpm->arm = &a8->armv8_common.arm;
-       dpm->didr = debug;
-
-       dpm->prepare = aarch64_dpm_prepare;
-       dpm->finish = aarch64_dpm_finish;
-
-       dpm->instr_execute = aarch64_instr_execute;
-       dpm->instr_write_data_dcc = aarch64_instr_write_data_dcc;
-       dpm->instr_write_data_dcc_64 = aarch64_instr_write_data_dcc_64;
-       dpm->instr_write_data_r0 = aarch64_instr_write_data_r0;
-       dpm->instr_write_data_r0_64 = aarch64_instr_write_data_r0_64;
-       dpm->instr_cpsr_sync = aarch64_instr_cpsr_sync;
-
-       dpm->instr_read_data_dcc = aarch64_instr_read_data_dcc;
-       dpm->instr_read_data_dcc_64 = aarch64_instr_read_data_dcc_64;
-       dpm->instr_read_data_r0 = aarch64_instr_read_data_r0;
-       dpm->instr_read_data_r0_64 = aarch64_instr_read_data_r0_64;
-
-       dpm->arm_reg_current = armv8_reg_current;
-
-       dpm->bpwp_enable = aarch64_bpwp_enable;
-       dpm->bpwp_disable = aarch64_bpwp_disable;
-
-       retval = armv8_dpm_setup(dpm);
-       if (retval == ERROR_OK)
-               retval = armv8_dpm_initialize(dpm);
-
-       return retval;
-}
 static struct target *get_aarch64(struct target *target, int32_t coreid)
 {
        struct target_list *head;
@@ -782,16 +319,30 @@ static int aarch64_halt(struct target *target);
 
 static int aarch64_halt_smp(struct target *target)
 {
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
+       int retval = ERROR_OK;
+       struct target_list *head = target->head;
+
        while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_HALTED))
-                       retval += aarch64_halt(curr);
+               struct target *curr = head->target;
+               struct armv8_common *armv8 = target_to_armv8(curr);
+
+               /* open the gate for channel 0 to let HALT requests pass to the CTM */
+               if (curr->smp) {
+                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->cti_base + CTI_GATE, CTI_CHNL(0));
+                       if (retval == ERROR_OK)
+                               retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
+               }
+               if (retval != ERROR_OK)
+                       break;
+
                head = head->next;
        }
+
+       /* halt the target PE */
+       if (retval == ERROR_OK)
+               retval = aarch64_halt(target);
+
        return retval;
 }
 
@@ -838,7 +389,7 @@ static int aarch64_poll(struct target *target)
        if (DSCR_RUN_MODE(dscr) == 0x3) {
                if (prev_target_state != TARGET_HALTED) {
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
+                       LOG_DEBUG("Target %s halted", target_name(target));
                        target->state = TARGET_HALTED;
                        if ((prev_target_state == TARGET_RUNNING)
                                || (prev_target_state == TARGET_UNKNOWN)
@@ -882,51 +433,19 @@ static int aarch64_halt(struct target *target)
        uint32_t dscr;
        struct armv8_common *armv8 = target_to_armv8(target);
 
-       /* enable CTI*/
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_CTR, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_GATE, 3);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_OUTEN0, 1);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_OUTEN1, 2);
-       if (retval != ERROR_OK)
-               return retval;
-
        /*
         * add HDE in halting debug mode
         */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr | DSCR_HDE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, 1);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK)
                return retval;
 
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_INACK, 1);
+                       armv8->cti_base + CTI_APPPULSE, CTI_CHNL(0));
        if (retval != ERROR_OK)
                return retval;
 
-
        long long then = timeval_ms();
        for (;; ) {
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
@@ -985,44 +504,21 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
        arm->pc->dirty = 1;
        arm->pc->valid = 1;
-       dpmv8_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
-
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
-#if 0
-       /* the front-end may request us not to handle breakpoints */
-       if (handle_breakpoints) {
-               /* Single step past breakpoint at current address */
-               breakpoint = breakpoint_find(target, resume_pc);
-               if (breakpoint) {
-                       LOG_DEBUG("unset breakpoint at 0x%8.8x", breakpoint->address);
-                       cortex_m3_unset_breakpoint(target, breakpoint);
-                       cortex_m3_single_step_core(target);
-                       cortex_m3_set_breakpoint(target, breakpoint);
-               }
-       }
-#endif
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target)
+static int aarch64_internal_restart(struct target *target, bool slave_pe)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm *arm = &armv8->arm;
@@ -1042,24 +538,43 @@ static int aarch64_internal_restart(struct target *target)
                return retval;
 
        if ((dscr & DSCR_ITE) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
+       /* make sure to acknowledge the halt event before resuming */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, 2);
+                       armv8->cti_base + CTI_INACK, CTI_TRIG(HALT));
+
+       /*
+        * open the CTI gate for channel 1 so that the restart events
+        * get passed along to all PEs
+        */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_GATE, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (!slave_pe) {
+               /* trigger an event on channel 1, generates a restart request to the PE */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_APPPULSE, CTI_CHNL(1));
                if (retval != ERROR_OK)
                        return retval;
-               if ((dscr & DSCR_HDE) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for resume");
-                       return ERROR_FAIL;
+
+               long long then = timeval_ms();
+               for (;; ) {
+                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       if ((dscr & DSCR_HDE) != 0)
+                               break;
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("Timeout waiting for resume");
+                               return ERROR_FAIL;
+                       }
                }
        }
 
@@ -1068,6 +583,7 @@ static int aarch64_internal_restart(struct target *target)
 
        /* registers are now invalid */
        register_cache_invalidate(arm->core_cache);
+       register_cache_invalidate(arm->core_cache->next);
 
        return ERROR_OK;
 }
@@ -1085,7 +601,7 @@ static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
                        /*  resume current address , not in step mode */
                        retval += aarch64_internal_restore(curr, 1, &address,
                                        handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr);
+                       retval += aarch64_internal_restart(curr, true);
                }
                head = head->next;
 
@@ -1116,7 +632,7 @@ static int aarch64_resume(struct target *target, int current,
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target);
+       aarch64_internal_restart(target, false);
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
@@ -1136,22 +652,29 @@ static int aarch64_debug_entry(struct target *target)
        int retval = ERROR_OK;
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), aarch64->cpudbg_dscr);
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
+       dpm->dscr = aarch64->cpudbg_dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
 
        /* make sure to clear all sticky errors */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
+
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       armv8_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
+       armv8_dpm_report_dscr(dpm, aarch64->cpudbg_dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
@@ -1176,11 +699,8 @@ static int aarch64_debug_entry(struct target *target)
 
        retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -1191,10 +711,10 @@ static int aarch64_post_debug_entry(struct target *target)
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval;
 
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                                   armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
        switch (armv8->arm.core_mode) {
                case ARMV8_64_EL0T:
+                       armv8_dpm_modeswitch(&armv8->dpm, ARMV8_64_EL1H);
+                       /* fall through */
                case ARMV8_64_EL1T:
                case ARMV8_64_EL1H:
                        retval = armv8->arm.mrs(target, 3, /*op 0*/
@@ -1222,14 +742,27 @@ static int aarch64_post_debug_entry(struct target *target)
                        if (retval != ERROR_OK)
                                return retval;
                break;
+
+               case ARM_MODE_SVC:
+                       retval = armv8->arm.mrc(target, 15, 0, 0, 1, 0, &aarch64->system_control_reg);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       break;
+
                default:
-                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
+                       LOG_INFO("cannot read system control register in this mode");
+                       break;
        }
+
+       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+
        LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
        aarch64->system_control_reg_curr = aarch64->system_control_reg;
 
-       if (armv8->armv8_mmu.armv8_cache.ctype == -1)
-               armv8_identify_cache(target);
+       if (armv8->armv8_mmu.armv8_cache.info == -1) {
+               armv8_identify_cache(armv8);
+               armv8_read_mpidr(armv8);
+       }
 
        armv8->armv8_mmu.mmu_enabled =
                        (aarch64->system_control_reg & 0x1U) ? 1 : 0;
@@ -1246,7 +779,7 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 {
        struct armv8_common *armv8 = target_to_armv8(target);
        int retval;
-       uint32_t tmp;
+       uint32_t edecr;
 
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
@@ -1254,25 +787,31 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, &tmp);
+                       armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
        if (retval != ERROR_OK)
                return retval;
 
+       /* make sure EDECR.SS is not set when restoring the register */
+       edecr &= ~0x4;
+
+       /* set EDECR.SS to enter hardware step mode */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (tmp|0x4));
+                       armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
        if (retval != ERROR_OK)
                return retval;
 
-       target->debug_reason = DBG_REASON_SINGLESTEP;
-       retval = aarch64_resume(target, 1, address, 0, 0);
+       /* disable interrupts while stepping */
+       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       if (retval != ERROR_OK)
+               return ERROR_OK;
+
+       /* resume the target */
+       retval = aarch64_resume(target, current, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
        long long then = timeval_ms();
        while (target->state != TARGET_HALTED) {
-               mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDESR, &tmp);
-               LOG_DEBUG("DESR = %#x", tmp);
                retval = aarch64_poll(target);
                if (retval != ERROR_OK)
                        return retval;
@@ -1282,14 +821,16 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
                }
        }
 
+       /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (tmp&(~0x4)));
+                       armv8->debug_base + CPUV8_DBG_EDECR, edecr);
        if (retval != ERROR_OK)
                return retval;
 
-       target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-       if (target->state == TARGET_HALTED)
-               LOG_DEBUG("target stepped");
+       /* restore interrupts */
+       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+       if (retval != ERROR_OK)
+               return ERROR_OK;
 
        return ERROR_OK;
 }
@@ -1298,13 +839,12 @@ static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
 
-       LOG_DEBUG(" ");
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
        return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
-
 }
 
 /*
@@ -1322,7 +862,6 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1371,27 +910,38 @@ static int aarch64_set_breakpoint(struct target *target,
 
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
-               buf_set_u32(code, 0, 32, ARMV8_BKPT(0x11));
+
+               buf_set_u32(code, 0, 32, ARMV8_HLT(0x11));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HDE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -1638,6 +1188,11 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                }
        } else {
                /* restore original instruction (kept in target endianness) */
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                if (breakpoint->length == 4) {
                        retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFFFFFFFFFE,
@@ -1651,6 +1206,14 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
        }
        breakpoint->set = 0;
 
@@ -1793,6 +1356,7 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        /* write memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
        int total_bytes = count * size;
        int total_u32;
@@ -1802,8 +1366,9 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        uint32_t dscr;
        uint8_t *tmp_buff = NULL;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
+       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count %"  PRIu32,
                          address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
@@ -1822,13 +1387,6 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        reg = armv8_reg_current(arm, 0);
        reg->dirty = true;
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               return retval;
-
-
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* The algorithm only copies 32 bit words, so the buffer
@@ -1877,17 +1435,15 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRX */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
+               dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
 
        }
        /* Step 1.d   - Change DCC to memory mode */
@@ -1916,11 +1472,12 @@ static int aarch64_write_apb_ap_memory(struct target *target,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                goto error_free_buff_w;
+
+       dpm->dscr = dscr;
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
+               armv8_dpm_handle_exception(dpm);
                goto error_free_buff_w;
        }
 
@@ -1948,6 +1505,7 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        /* read memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
        int total_bytes = count * size;
        int total_u32;
@@ -1959,8 +1517,9 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        uint8_t *u8buf_ptr;
        uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
+       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count %"  PRIu32,
                          address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
@@ -1978,12 +1537,6 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        reg = armv8_reg_current(arm, 0);
        reg->dirty = true;
 
-       /*      clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
-
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
@@ -1998,11 +1551,11 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
-               retval += aarch64_write_dcc_64(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += aarch64_exec_opcode(target, ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
+               retval += dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
                /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0), &dscr);
+               retval += dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
                /* Step 1.e - Change DCC to memory mode */
                dscr = dscr | DSCR_MA;
                retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
@@ -2013,13 +1566,11 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
-               retval += aarch64_write_dcc(armv8, address & ~0x3ULL);
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), &dscr);
+               retval += dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
                /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += aarch64_exec_opcode(target,
-                               T32_FMTITR(ARMV4_5_MCR(14, 0, 0, 0, 5, 0)), &dscr);
+               retval += dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
                /* Step 1.e - Change DCC to memory mode */
                dscr = dscr | DSCR_MA;
                retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
@@ -2079,11 +1630,13 @@ static int aarch64_read_apb_ap_memory(struct target *target,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                goto error_free_buff_r;
+
+       dpm->dscr = dscr;
+
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+               armv8_dpm_handle_exception(dpm);
                goto error_free_buff_r;
        }
 
@@ -2113,26 +1666,16 @@ static int aarch64_read_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
        LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
                address, size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* read memory through AHB-AP */
-                       retval = mem_ap_read_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-                       /* read memory through APB-AP */
-                       retval = aarch64_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
-               }
+               /* read memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
+               if (retval != ERROR_OK)
+                       return retval;
+               retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -2141,11 +1684,7 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
 
        /* aarch64 handles unaligned memory access */
        LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
@@ -2156,116 +1695,33 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        if (retval != ERROR_OK)
                return retval;
 
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Reading at virtual address. Translating v:0x%" TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR,
-                                 virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_read_phys_memory(target, address, size, count,
-                                                 buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys
-                          access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_read_apb_ap_memory(target, address, size,
-                                                   count, buffer);
+       if (mmu_enabled) {
+               retval = aarch64_check_address(target, address);
+               if (retval != ERROR_OK)
+                       return retval;
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
+               if (retval != ERROR_OK)
+                       return retval;
        }
-       return retval;
+       return aarch64_read_apb_ap_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
 
        LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
                size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* write memory through AHB-AP */
-                       retval = mem_ap_write_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-
-                       /* write memory through APB-AP */
-                       retval = aarch64_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
-               }
-       }
-
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED) {
-               struct arm_dpm *dpm = armv8->arm.dpm;
-
-               retval = dpm->prepare(dpm);
+               /* write memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
-
-               /* invalidate I-Cache */
-               if (armv8->armv8_mmu.armv8_cache.i_cache_enabled) {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = 0;
-                               cacheline < size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV8_MSR_GP(SYSTEM_ICIVAU, 0),
-                                               address + cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* invalidate D-Cache */
-               if (armv8->armv8_mmu.armv8_cache.d_u_cache_enabled) {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = 0;
-                               cacheline < size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV8_MSR_GP(SYSTEM_DCCVAU, 0),
-                                               address + cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* (void) */ dpm->finish(dpm);
+               return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -2275,11 +1731,7 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
 
        /* aarch64 handles unaligned memory access */
        LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
@@ -2290,34 +1742,16 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        if (retval != ERROR_OK)
                return retval;
 
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               LOG_DEBUG("Writing memory to address 0x%" TARGET_PRIxADDR "; size %"
-                         PRId32 "; count %" PRId32, address, size, count);
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Writing to virtual address. Translating v:0x%"
-                                 TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR, virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_write_phys_memory(target, address, size,
-                               count, buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+       if (mmu_enabled) {
+               retval = aarch64_check_address(target, address);
+               if (retval != ERROR_OK)
+                       return retval;
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
+               if (retval != ERROR_OK)
+                       return retval;
        }
-       return retval;
+       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -2386,20 +1820,6 @@ static int aarch64_examine_first(struct target *target)
 
        armv8->debug_ap->memaccess_tck = 80;
 
-       /* Search for the AHB-AB */
-       armv8->memory_ap_available = false;
-       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv8->memory_ap);
-       if (retval == ERROR_OK) {
-               retval = mem_ap_init(armv8->memory_ap);
-               if (retval == ERROR_OK)
-                       armv8->memory_ap_available = true;
-       }
-       if (retval != ERROR_OK) {
-               /* AHB-AP not found or unavailable - use the CPU */
-               LOG_DEBUG("No AHB-AP available for memory access");
-       }
-
-
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
                /* Get ROM Table base */
@@ -2472,12 +1892,6 @@ static int aarch64_examine_first(struct target *target)
        } else
                armv8->cti_base = target->ctibase;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_UNLOCK , 0xC5ACCE55);
-       if (retval != ERROR_OK)
-               return retval;
-
-
        armv8->arm.core_type = ARM_MODE_MON;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
@@ -2590,22 +2004,7 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       int retval = ERROR_FAIL;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               uint32_t ret;
-               retval = armv8_mmu_translate_va(target,
-                               virt, &ret);
-               if (retval != ERROR_OK)
-                       goto done;
-               *phys = ret;
-       } else {
-               LOG_ERROR("AAR64 processor not support translate va to pa");
-       }
-done:
-       return retval;
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)