aarch64: improve debug output
[fw/openocd] / src / target / aarch64.c
index 78d19731e757a65790fd6d5f736dbca5de7f8c4f..f0ac32c12a357c87a192b9041f22d771d5c04807 100644 (file)
@@ -26,7 +26,8 @@
 #include "register.h"
 #include "target_request.h"
 #include "target_type.h"
-#include "arm_opcodes.h"
+#include "armv8_opcodes.h"
+#include "armv8_cache.h"
 #include <helper/time_support.h>
 
 static int aarch64_poll(struct target *target);
@@ -43,10 +44,8 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ab_memory(struct target *target,
+static int aarch64_read_apb_ap_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data);
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
@@ -57,11 +56,44 @@ static int aarch64_restore_system_control_reg(struct target *target)
 
        if (aarch64->system_control_reg != aarch64->system_control_reg_curr) {
                aarch64->system_control_reg_curr = aarch64->system_control_reg;
-               retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                    0xd5181000,
-                                                    aarch64->system_control_reg);
+               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
+
+               switch (armv8->arm.core_mode) {
+                       case ARMV8_64_EL0T:
+                       case ARMV8_64_EL1T:
+                       case ARMV8_64_EL1H:
+                               retval = armv8->arm.msr(target, 3, /*op 0*/
+                                               0, 1,   /* op1, op2 */
+                                               0, 0,   /* CRn, CRm */
+                                               aarch64->system_control_reg);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       break;
+                       case ARMV8_64_EL2T:
+                       case ARMV8_64_EL2H:
+                               retval = armv8->arm.msr(target, 3, /*op 0*/
+                                               4, 1,   /* op1, op2 */
+                                               0, 0,   /* CRn, CRm */
+                                               aarch64->system_control_reg);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       break;
+                       case ARMV8_64_EL3H:
+                       case ARMV8_64_EL3T:
+                               retval = armv8->arm.msr(target, 3, /*op 0*/
+                                               6, 1,   /* op1, op2 */
+                                               0, 0,   /* CRn, CRm */
+                                               aarch64->system_control_reg);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       break;
+                       default:
+                               retval = armv8->arm.mcr(target, 15, 0, 0, 1, 0, aarch64->system_control_reg);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                               break;
+                       }
        }
-
        return retval;
 }
 
@@ -82,16 +114,45 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        int retval = ERROR_OK;
 
        if (enable) {
-               /*  if mmu enabled at target stop and mmu not enable */
+               /*      if mmu enabled at target stop and mmu not enable */
                if (!(aarch64->system_control_reg & 0x1U)) {
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
                if (!(aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr |= 0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
+                       switch (armv8->arm.core_mode) {
+                               case ARMV8_64_EL0T:
+                               case ARMV8_64_EL1T:
+                               case ARMV8_64_EL1H:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       0, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                               break;
+                               case ARMV8_64_EL2T:
+                               case ARMV8_64_EL2H:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       4, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                               break;
+                               case ARMV8_64_EL3H:
+                               case ARMV8_64_EL3T:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       6, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                               break;
+                               default:
+                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
+                       }
                }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
@@ -103,9 +164,39 @@ static int aarch64_mmu_modify(struct target *target, int enable)
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
+                       switch (armv8->arm.core_mode) {
+                               case ARMV8_64_EL0T:
+                               case ARMV8_64_EL1T:
+                               case ARMV8_64_EL1H:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       0, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                                       break;
+                               case ARMV8_64_EL2T:
+                               case ARMV8_64_EL2H:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       4, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                                       break;
+                               case ARMV8_64_EL3H:
+                               case ARMV8_64_EL3T:
+                                       retval = armv8->arm.msr(target, 3, /*op 0*/
+                                                       6, 0,   /* op1, op2 */
+                                                       1, 0,   /* CRn, CRm */
+                                                       aarch64->system_control_reg_curr);
+                                       if (retval != ERROR_OK)
+                                               return retval;
+                                       break;
+                               default:
+                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
+                                       break;
+                       }
                }
        }
        return retval;
@@ -122,87 +213,42 @@ static int aarch64_init_debug_access(struct target *target)
 
        LOG_DEBUG(" ");
 
-       /* Unlocking the debug registers for modification
-        * The debugport might be uninitialised so try twice */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               /* try again */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-                       LOG_USER("Locking debug access failed on first, but succeeded on second try.");
-       }
-       if (retval != ERROR_OK)
-               return retval;
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_PRSR, &dummy);
+                       armv8->debug_base + CPUV8_DBG_PRSR, &dummy);
        if (retval != ERROR_OK)
                return retval;
 
-       /* Enabling of instruction execution in debug mode is done in debug_entry code */
-
-       /* Resync breakpoint registers */
-
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
-}
-
-/* To reduce needless round-trips, pass in a pointer to the current
- * DSCR value.  Initialize it to zero if you just need to know the
- * value on return from this function; or DSCR_INSTR_COMP if you
- * happen to know that no instruction is pending.
- */
-static int aarch64_exec_opcode(struct target *target,
-       uint32_t opcode, uint32_t *dscr_p)
-{
-       uint32_t dscr;
-       int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       dscr = dscr_p ? *dscr_p : 0;
-
-       LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-
-       /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_INSTR_COMP) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       /*
+        * Static CTI configuration:
+        * Channel 0 -> trigger outputs HALT request to PE
+        * Channel 1 -> trigger outputs Resume request to PE
+        * Gate all channel trigger events from entering the CTM
+        */
 
-       retval = mem_ap_write_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_ITR, opcode);
+       /* Enable CTI */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->cti_base + CTI_CTR, 1);
+       /* By default, gate all channel triggers to and from the CTM */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 trigger */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 trigger */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
-       do {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register");
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       } while ((dscr & DSCR_INSTR_COMP) == 0);        /* Wait for InstrCompl bit to be set */
-
-       if (dscr_p)
-               *dscr_p = dscr;
+       /* Resync breakpoint registers */
 
-       return retval;
+       /* Since this is likely called from init or reset, update target state information*/
+       return aarch64_poll(target);
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -218,448 +264,43 @@ static int aarch64_dap_write_memap_register_u32(struct target *target,
        return retval;
 }
 
-/*
- * AARCH64 implementation of Debug Programmer's Model
- *
- * NOTE the invariant:  these routines return with DSCR_INSTR_COMP set,
- * so there's no need to poll for it before executing an instruction.
- *
- * NOTE that in several of these cases the "stall" mode might be useful.
- * It'd let us queue a few operations together... prepare/finish might
- * be the places to enable/disable that mode.
- */
-
-static inline struct aarch64_common *dpm_to_a8(struct arm_dpm *dpm)
-{
-       return container_of(dpm, struct aarch64_common, armv8_common.dpm);
-}
-
-static int aarch64_write_dcc(struct aarch64_common *a8, uint32_t data)
-{
-       LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_write_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DTRRX, data);
-}
-
-static int aarch64_write_dcc_64(struct aarch64_common *a8, uint64_t data)
-{
-       int ret;
-       LOG_DEBUG("write DCC 0x%08" PRIx32, (unsigned)data);
-       LOG_DEBUG("write DCC 0x%08" PRIx32, (unsigned)(data >> 32));
-       ret = mem_ap_write_u32(a8->armv8_common.debug_ap,
-                              a8->armv8_common.debug_base + CPUDBG_DTRRX, data);
-       ret += mem_ap_write_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DTRTX, data >> 32);
-       return ret;
-}
-
-static int aarch64_read_dcc(struct aarch64_common *a8, uint32_t *data,
-       uint32_t *dscr_p)
-{
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRTX,
-                                           data);
-       if (retval != ERROR_OK)
-               return retval;
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
-
-       if (dscr_p)
-               *dscr_p = dscr;
-
-       return retval;
-}
-static int aarch64_read_dcc_64(struct aarch64_common *a8, uint64_t *data,
-       uint32_t *dscr_p)
+static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
 {
-       uint32_t dscr = DSCR_INSTR_COMP;
-       uint32_t higher;
+       struct arm_dpm *dpm = &a8->armv8_common.dpm;
        int retval;
 
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRTX,
-                                           (uint32_t *)data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRRX,
-                                           &higher);
-       if (retval != ERROR_OK)
-               return retval;
-
-       *data = *(uint32_t *)data | (uint64_t)higher << 32;
-       LOG_DEBUG("read DCC 0x%16.16" PRIx64, *data);
+       dpm->arm = &a8->armv8_common.arm;
+       dpm->didr = debug;
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       retval = armv8_dpm_setup(dpm);
+       if (retval == ERROR_OK)
+               retval = armv8_dpm_initialize(dpm);
 
        return retval;
 }
 
-static int aarch64_dpm_prepare(struct arm_dpm *dpm)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
+       struct armv8_common *armv8 = target_to_armv8(target);
        uint32_t dscr;
-       int retval;
-
-       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_INSTR_COMP) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for dpm prepare");
-                       return ERROR_FAIL;
-               }
-       }
-
-       /* this "should never happen" ... */
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX */
-               retval = aarch64_exec_opcode(
-                               a8->armv8_common.arm.target,
-                               0xd5130400,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
-       return retval;
-}
-
-static int aarch64_dpm_finish(struct arm_dpm *dpm)
-{
-       /* REVISIT what could be done here? */
-       return ERROR_OK;
-}
-
-static int aarch64_instr_write_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
-
-       retval = aarch64_write_dcc(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
-
-       retval = aarch64_write_dcc_64(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       retval = aarch64_write_dcc(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330500,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_write_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       retval = aarch64_write_dcc_64(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330400,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_cpsr_sync(struct arm_dpm *dpm)
-{
-       struct target *target = dpm->arm->target;
-       uint32_t dscr = DSCR_INSTR_COMP;
-
-       /* "Prefetch flush" after modifying execution status in CPSR */
-       return aarch64_exec_opcode(target,
-                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
-                       &dscr);
-}
-
-static int aarch64_instr_read_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
 
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc(a8, data, &dscr);
-}
-
-static int aarch64_instr_read_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
-
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc_64(a8, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       return aarch64_read_dcc(a8, data, &dscr);
-}
-
-static int aarch64_instr_read_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
+       /* Read DSCR */
+       int retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (ERROR_OK != retval)
                return retval;
 
-       return aarch64_read_dcc_64(a8, data, &dscr);
-}
-
-static int aarch64_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-       uint32_t addr, uint32_t control)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv8_common.debug_base;
-       uint32_t cr = a8->armv8_common.debug_base;
-       int retval;
-
-       switch (index_t) {
-               case 0 ... 15:  /* breakpoints */
-                       vr += CPUDBG_BVR_BASE;
-                       cr += CPUDBG_BCR_BASE;
-                       break;
-               case 16 ... 31: /* watchpoints */
-                       vr += CPUDBG_WVR_BASE;
-                       cr += CPUDBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       vr += 4 * index_t;
-       cr += 4 * index_t;
-
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-               (unsigned) vr, (unsigned) cr);
+       /* clear bitfield */
+       dscr &= ~bit_mask;
+       /* put new value */
+       dscr |= value & bit_mask;
 
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       vr, addr);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       cr, control);
+       /* write new DSCR */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
        return retval;
 }
 
-static int aarch64_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
-{
-       return ERROR_OK;
-
-#if 0
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t cr;
-
-       switch (index_t) {
-               case 0 ... 15:
-                       cr = a8->armv8_common.debug_base + CPUDBG_BCR_BASE;
-                       break;
-               case 16 ... 31:
-                       cr = a8->armv8_common.debug_base + CPUDBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       cr += 4 * index_t;
-
-       LOG_DEBUG("A8: bpwp disable, cr %08x", (unsigned) cr);
-
-       /* clear control register */
-       return aarch64_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
-#endif
-}
-
-static int aarch64_dpm_setup(struct aarch64_common *a8, uint32_t debug)
-{
-       struct arm_dpm *dpm = &a8->armv8_common.dpm;
-       int retval;
-
-       dpm->arm = &a8->armv8_common.arm;
-       dpm->didr = debug;
-
-       dpm->prepare = aarch64_dpm_prepare;
-       dpm->finish = aarch64_dpm_finish;
-
-       dpm->instr_write_data_dcc = aarch64_instr_write_data_dcc;
-       dpm->instr_write_data_dcc_64 = aarch64_instr_write_data_dcc_64;
-       dpm->instr_write_data_r0 = aarch64_instr_write_data_r0;
-       dpm->instr_write_data_r0_64 = aarch64_instr_write_data_r0_64;
-       dpm->instr_cpsr_sync = aarch64_instr_cpsr_sync;
-
-       dpm->instr_read_data_dcc = aarch64_instr_read_data_dcc;
-       dpm->instr_read_data_dcc_64 = aarch64_instr_read_data_dcc_64;
-       dpm->instr_read_data_r0 = aarch64_instr_read_data_r0;
-       dpm->instr_read_data_r0_64 = aarch64_instr_read_data_r0_64;
-
-       dpm->arm_reg_current = armv8_reg_current;
-
-       dpm->bpwp_enable = aarch64_bpwp_enable;
-       dpm->bpwp_disable = aarch64_bpwp_disable;
-
-       retval = arm_dpm_setup(dpm);
-       if (retval == ERROR_OK)
-               retval = arm_dpm_initialize(dpm);
-
-       return retval;
-}
 static struct target *get_aarch64(struct target *target, int32_t coreid)
 {
        struct target_list *head;
@@ -678,16 +319,30 @@ static int aarch64_halt(struct target *target);
 
 static int aarch64_halt_smp(struct target *target)
 {
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
+       int retval = ERROR_OK;
+       struct target_list *head = target->head;
+
        while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_HALTED))
-                       retval += aarch64_halt(curr);
+               struct target *curr = head->target;
+               struct armv8_common *armv8 = target_to_armv8(curr);
+
+               /* open the gate for channel 0 to let HALT requests pass to the CTM */
+               if (curr->smp) {
+                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->cti_base + CTI_GATE, CTI_CHNL(0));
+                       if (retval == ERROR_OK)
+                               retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
+               }
+               if (retval != ERROR_OK)
+                       break;
+
                head = head->next;
        }
+
+       /* halt the target PE */
+       if (retval == ERROR_OK)
+               retval = aarch64_halt(target);
+
        return retval;
 }
 
@@ -726,15 +381,15 @@ static int aarch64_poll(struct target *target)
                return retval;
        }
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
        aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) {
+       if (DSCR_RUN_MODE(dscr) == 0x3) {
                if (prev_target_state != TARGET_HALTED) {
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
+                       LOG_DEBUG("Target %s halted", target_name(target));
                        target->state = TARGET_HALTED;
                        if ((prev_target_state == TARGET_RUNNING)
                                || (prev_target_state == TARGET_UNKNOWN)
@@ -766,12 +421,8 @@ static int aarch64_poll(struct target *target)
                                        TARGET_EVENT_DEBUG_HALTED);
                        }
                }
-       } else if (DSCR_RUN_MODE(dscr) == DSCR_CORE_RESTARTED)
+       } else
                target->state = TARGET_RUNNING;
-       else {
-               LOG_DEBUG("Unknown target state dscr = 0x%08" PRIx32, dscr);
-               target->state = TARGET_UNKNOWN;
-       }
 
        return retval;
 }
@@ -782,68 +433,26 @@ static int aarch64_halt(struct target *target)
        uint32_t dscr;
        struct armv8_common *armv8 = target_to_armv8(target);
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, 1);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, 6);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, 5);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, 2);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, 4);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, &dscr);
-
        /*
-        * enter halting debug mode
+        * add HDE in halting debug mode
         */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK)
                return retval;
 
-#      /* STATUS */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x134, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, &dscr);
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, 1);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, &dscr);
-
+                       armv8->cti_base + CTI_APPPULSE, CTI_CHNL(0));
+       if (retval != ERROR_OK)
+               return retval;
 
        long long then = timeval_ms();
        for (;; ) {
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               if ((dscr & DSCR_CORE_HALTED) != 0)
+               if ((dscr & DSCRV8_HALT_MASK) != 0)
                        break;
                if (timeval_ms() > then + 1000) {
                        LOG_ERROR("Timeout waiting for halt");
@@ -879,6 +488,8 @@ static int aarch64_internal_restore(struct target *target, int current,
         */
        switch (arm->core_state) {
                case ARM_STATE_ARM:
+                       resume_pc &= 0xFFFFFFFC;
+                       break;
                case ARM_STATE_AARCH64:
                        resume_pc &= 0xFFFFFFFFFFFFFFFC;
                        break;
@@ -893,46 +504,21 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
        arm->pc->dirty = 1;
        arm->pc->valid = 1;
-#if 0
-       /* restore dpm_mode at system halt */
-       dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
-#endif
+
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
-
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
-#if 0
-       /* the front-end may request us not to handle breakpoints */
-       if (handle_breakpoints) {
-               /* Single step past breakpoint at current address */
-               breakpoint = breakpoint_find(target, resume_pc);
-               if (breakpoint) {
-                       LOG_DEBUG("unset breakpoint at 0x%8.8x", breakpoint->address);
-                       cortex_m3_unset_breakpoint(target, breakpoint);
-                       cortex_m3_single_step_core(target);
-                       cortex_m3_set_breakpoint(target, breakpoint);
-               }
-       }
-#endif
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target)
+static int aarch64_internal_restart(struct target *target, bool slave_pe)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm *arm = &armv8->arm;
@@ -947,45 +533,48 @@ static int aarch64_internal_restart(struct target *target)
         */
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       if ((dscr & DSCR_INSTR_COMP) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
-
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr & ~DSCR_ITR_EN);
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DRCR, DRCR_RESTART |
-                       DRCR_CLEAR_EXCEPTIONS);
-       if (retval != ERROR_OK)
-               return retval;
+       if ((dscr & DSCR_ITE) == 0)
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
+       /* make sure to acknowledge the halt event before resuming */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x10, 1);
-       if (retval != ERROR_OK)
-               return retval;
+                       armv8->cti_base + CTI_INACK, CTI_TRIG(HALT));
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, 2);
+       /*
+        * open the CTI gate for channel 1 so that the restart events
+        * get passed along to all PEs
+        */
+       if (retval == ERROR_OK)
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_GATE, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+       if (!slave_pe) {
+               /* trigger an event on channel 1, generates a restart request to the PE */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->cti_base + CTI_APPPULSE, CTI_CHNL(1));
                if (retval != ERROR_OK)
                        return retval;
-               if ((dscr & DSCR_CORE_RESTARTED) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for resume");
-                       return ERROR_FAIL;
+
+               long long then = timeval_ms();
+               for (;; ) {
+                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       if ((dscr & DSCR_HDE) != 0)
+                               break;
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("Timeout waiting for resume");
+                               return ERROR_FAIL;
+                       }
                }
        }
 
@@ -994,6 +583,7 @@ static int aarch64_internal_restart(struct target *target)
 
        /* registers are now invalid */
        register_cache_invalidate(arm->core_cache);
+       register_cache_invalidate(arm->core_cache->next);
 
        return ERROR_OK;
 }
@@ -1011,7 +601,7 @@ static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
                        /*  resume current address , not in step mode */
                        retval += aarch64_internal_restore(curr, 1, &address,
                                        handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr);
+                       retval += aarch64_internal_restart(curr, true);
                }
                head = head->next;
 
@@ -1042,16 +632,16 @@ static int aarch64_resume(struct target *target, int current,
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target);
+       aarch64_internal_restart(target, false);
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               LOG_DEBUG("target resumed at 0x%" PRIu64, addr);
+               LOG_DEBUG("target resumed at 0x%" PRIx64, addr);
        } else {
                target->state = TARGET_DEBUG_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_DEBUG_RESUMED);
-               LOG_DEBUG("target debug resumed at 0x%" PRIu64, addr);
+               LOG_DEBUG("target debug resumed at 0x%" PRIx64, addr);
        }
 
        return ERROR_OK;
@@ -1059,58 +649,58 @@ static int aarch64_resume(struct target *target, int current,
 
 static int aarch64_debug_entry(struct target *target)
 {
-       uint32_t dscr;
        int retval = ERROR_OK;
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t tmp;
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), aarch64->cpudbg_dscr);
 
-       /* REVISIT surely we should not re-read DSCR !! */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       dpm->dscr = aarch64->cpudbg_dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
-
-       /* Enable the ITR execution once we are in debug mode */
-       dscr |= DSCR_ITR_EN;
+       /* make sure to clear all sticky errors */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr);
+                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
+
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       arm_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                                  armv8->debug_base + CPUDBG_DESR, &tmp);
-       if ((tmp & 0x7) == 0x4)
-               target->debug_reason = DBG_REASON_SINGLESTEP;
+       armv8_dpm_report_dscr(dpm, aarch64->cpudbg_dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
-               uint32_t wfar;
+               uint32_t tmp;
+               uint64_t wfar = 0;
 
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_WFAR,
-                               &wfar);
+                               armv8->debug_base + CPUV8_DBG_WFAR1,
+                               &tmp);
+               if (retval != ERROR_OK)
+                       return retval;
+               wfar = tmp;
+               wfar = (wfar << 32);
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_WFAR0,
+                               &tmp);
                if (retval != ERROR_OK)
                        return retval;
-               arm_dpm_report_wfar(&armv8->dpm, wfar);
+               wfar |= tmp;
+               armv8_dpm_report_wfar(&armv8->dpm, wfar);
        }
 
-       retval = arm_dpm_read_current_registers_64(&armv8->dpm);
+       retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -1119,31 +709,68 @@ static int aarch64_post_debug_entry(struct target *target)
 {
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct armv8_mmu_common *armv8_mmu = &armv8->armv8_mmu;
-       uint32_t sctlr_el1 = 0;
        int retval;
 
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-       retval = aarch64_instr_read_data_r0(armv8->arm.dpm,
-                                           0xd5381000, &sctlr_el1);
-       if (retval != ERROR_OK)
-               return retval;
+       switch (armv8->arm.core_mode) {
+               case ARMV8_64_EL0T:
+                       armv8_dpm_modeswitch(&armv8->dpm, ARMV8_64_EL1H);
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       retval = armv8->arm.mrs(target, 3, /*op 0*/
+                                       0, 0,   /* op1, op2 */
+                                       1, 0,   /* CRn, CRm */
+                                       &aarch64->system_control_reg);
+                       if (retval != ERROR_OK)
+                               return retval;
+               break;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       retval = armv8->arm.mrs(target, 3, /*op 0*/
+                                       4, 0,   /* op1, op2 */
+                                       1, 0,   /* CRn, CRm */
+                                       &aarch64->system_control_reg);
+                       if (retval != ERROR_OK)
+                               return retval;
+               break;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       retval = armv8->arm.mrs(target, 3, /*op 0*/
+                                       6, 0,   /* op1, op2 */
+                                       1, 0,   /* CRn, CRm */
+                                       &aarch64->system_control_reg);
+                       if (retval != ERROR_OK)
+                               return retval;
+               break;
+
+               case ARM_MODE_SVC:
+                       retval = armv8->arm.mrc(target, 15, 0, 0, 1, 0, &aarch64->system_control_reg);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       break;
 
-       LOG_DEBUG("sctlr_el1 = %#8.8x", sctlr_el1);
-       aarch64->system_control_reg = sctlr_el1;
-       aarch64->system_control_reg_curr = sctlr_el1;
-       aarch64->curr_mode = armv8->arm.core_mode;
+               default:
+                       LOG_INFO("cannot read system control register in this mode");
+                       break;
+       }
 
-       armv8_mmu->mmu_enabled = sctlr_el1 & 0x1U ? 1 : 0;
-       armv8_mmu->armv8_cache.d_u_cache_enabled = sctlr_el1 & 0x4U ? 1 : 0;
-       armv8_mmu->armv8_cache.i_cache_enabled = sctlr_el1 & 0x1000U ? 1 : 0;
+       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
-#if 0
-       if (armv8->armv8_mmu.armv8_cache.ctype == -1)
-               armv8_identify_cache(target);
-#endif
+       LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
+       aarch64->system_control_reg_curr = aarch64->system_control_reg;
+
+       if (armv8->armv8_mmu.armv8_cache.info == -1) {
+               armv8_identify_cache(armv8);
+               armv8_read_mpidr(armv8);
+       }
 
+       armv8->armv8_mmu.mmu_enabled =
+                       (aarch64->system_control_reg & 0x1U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.d_u_cache_enabled =
+               (aarch64->system_control_reg & 0x4U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.i_cache_enabled =
+               (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
+       aarch64->curr_mode = armv8->arm.core_mode;
        return ERROR_OK;
 }
 
@@ -1152,7 +779,7 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 {
        struct armv8_common *armv8 = target_to_armv8(target);
        int retval;
-       uint32_t tmp;
+       uint32_t edecr;
 
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
@@ -1160,25 +787,31 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, &tmp);
+                       armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
        if (retval != ERROR_OK)
                return retval;
 
+       /* make sure EDECR.SS is not set when restoring the register */
+       edecr &= ~0x4;
+
+       /* set EDECR.SS to enter hardware step mode */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, (tmp|0x4));
+                       armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
        if (retval != ERROR_OK)
                return retval;
 
-       target->debug_reason = DBG_REASON_SINGLESTEP;
-       retval = aarch64_resume(target, 1, address, 0, 0);
+       /* disable interrupts while stepping */
+       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       if (retval != ERROR_OK)
+               return ERROR_OK;
+
+       /* resume the target */
+       retval = aarch64_resume(target, current, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
        long long then = timeval_ms();
        while (target->state != TARGET_HALTED) {
-               mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DESR, &tmp);
-               LOG_DEBUG("DESR = %#x", tmp);
                retval = aarch64_poll(target);
                if (retval != ERROR_OK)
                        return retval;
@@ -1188,14 +821,16 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
                }
        }
 
+       /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, (tmp&(~0x4)));
+                       armv8->debug_base + CPUV8_DBG_EDECR, edecr);
        if (retval != ERROR_OK)
                return retval;
 
-       target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-       if (target->state == TARGET_HALTED)
-               LOG_DEBUG("target stepped");
+       /* restore interrupts */
+       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+       if (retval != ERROR_OK)
+               return ERROR_OK;
 
        return ERROR_OK;
 }
@@ -1204,14 +839,12 @@ static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
 
-       LOG_DEBUG(" ");
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return arm_dpm_write_dirty_registers(&armv8->dpm, bpwp);
-
-       return ERROR_OK;
+       return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
 }
 
 /*
@@ -1229,7 +862,6 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1257,18 +889,18 @@ static int aarch64_set_breakpoint(struct target *target,
                bpt_value = brp_list[brp_i].value;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                                (uint32_t)(bpt_value & 0xFFFFFFFF));
                if (retval != ERROR_OK)
                        return retval;
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
                                (uint32_t)(bpt_value >> 32));
                if (retval != ERROR_OK)
                        return retval;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].control);
                if (retval != ERROR_OK)
                        return retval;
@@ -1278,28 +910,38 @@ static int aarch64_set_breakpoint(struct target *target,
 
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
-               buf_set_u32(code, 0, 32, 0xD4400000);
 
+               buf_set_u32(code, 0, 32, ARMV8_HLT(0x11));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HALT_DBG_MODE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUDBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -1335,18 +977,19 @@ static int aarch64_set_context_breakpoint(struct target *target,
 
        breakpoint->set = brp_i + 1;
        control = ((matchmode & 0x7) << 20)
+               | (1 << 13)
                | (byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_i].used = 1;
        brp_list[brp_i].value = (breakpoint->asid);
        brp_list[brp_i].control = control;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1407,30 +1050,36 @@ static int aarch64_set_hybrid_breakpoint(struct target *target, struct breakpoin
        brp_list[brp_1].value = (breakpoint->asid);
        brp_list[brp_1].control = control_CTX;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].control);
        if (retval != ERROR_OK)
                return retval;
 
        control_IVA = ((IVA_machmode & 0x7) << 20)
                | (brp_1 << 16)
+               | (1 << 13)
                | (IVA_byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_2].used = 1;
-       brp_list[brp_2].value = (breakpoint->address & 0xFFFFFFFC);
+       brp_list[brp_2].value = breakpoint->address & 0xFFFFFFFFFFFFFFFC;
        brp_list[brp_2].control = control_IVA;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_2].BRPn,
-                       brp_list[brp_2].value);
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].value & 0xFFFFFFFF);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].value >> 32);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_2].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_2].BRPn,
                        brp_list[brp_2].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1464,10 +1113,20 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        if ((brp_j < 0) || (brp_j >= aarch64->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
@@ -1478,10 +1137,21 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_j].value = 0;
                        brp_list[brp_j].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+
                        breakpoint->linked_BRP = 0;
                        breakpoint->set = 0;
                        return ERROR_OK;
@@ -1498,20 +1168,31 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
+
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        breakpoint->set = 0;
                        return ERROR_OK;
                }
        } else {
                /* restore original instruction (kept in target endianness) */
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                if (breakpoint->length == 4) {
                        retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFFFFFFFFFE,
@@ -1525,6 +1206,14 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
        }
        breakpoint->set = 0;
 
@@ -1660,13 +1349,14 @@ static int aarch64_deassert_reset(struct target *target)
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ab_memory(struct target *target,
+static int aarch64_write_apb_ap_memory(struct target *target,
        uint64_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
        /* write memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
        int total_bytes = count * size;
        int total_u32;
@@ -1675,10 +1365,10 @@ static int aarch64_write_apb_ab_memory(struct target *target,
        struct reg *reg;
        uint32_t dscr;
        uint8_t *tmp_buff = NULL;
-       uint32_t i = 0;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
+       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count %"  PRIu32,
                          address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
@@ -1697,16 +1387,7 @@ static int aarch64_write_apb_ab_memory(struct target *target,
        reg = armv8_reg_current(arm, 0);
        reg->dirty = true;
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap, armv8->debug_base + CPUDBG_DRCR, 1<<2);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* This algorithm comes from either :
-        * Cortex-A8 TRM Example 12-25
-        * Cortex-R4 TRM Example 11-26
-        * (slight differences)
-        */
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* The algorithm only copies 32 bit words, so the buffer
         * should be expanded to include the words at either end.
@@ -1719,7 +1400,7 @@ static int aarch64_write_apb_ab_memory(struct target *target,
                /* First bytes not aligned - read the 32 bit word to avoid corrupting
                 * the other bytes in the word.
                 */
-               retval = aarch64_read_apb_ab_memory(target, (address & ~0x3), 4, 1, tmp_buff);
+               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3), 4, 1, tmp_buff);
                if (retval != ERROR_OK)
                        goto error_free_buff_w;
        }
@@ -1730,7 +1411,7 @@ static int aarch64_write_apb_ab_memory(struct target *target,
 
                /* Read the last word to avoid corruption during 32 bit write */
                int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ab_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
+               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
                if (retval != ERROR_OK)
                        goto error_free_buff_w;
        }
@@ -1742,56 +1423,61 @@ static int aarch64_write_apb_ab_memory(struct target *target,
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                goto error_free_buff_w;
 
-       /* Set DTR mode to Normal*/
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
+       /* Set Normal access mode  */
+       dscr = (dscr & ~DSCR_MA);
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_w;
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Write X0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
+       } else {
+               /* Write R0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTRRX */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
+               dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
 
-       if (size > 4) {
-               LOG_WARNING("reading size >4 bytes not yet supported");
-               goto error_unset_dtr_w;
        }
-
-       retval = aarch64_instr_write_data_dcc_64(arm->dpm, 0xd5330401, address+4);
+       /* Step 1.d   - Change DCC to memory mode */
+       dscr = dscr | DSCR_MA;
+       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
        if (retval != ERROR_OK)
                goto error_unset_dtr_w;
 
-       dscr = DSCR_INSTR_COMP;
-       while (i < count * size) {
-               uint32_t val;
-
-               memcpy(&val, &buffer[i], size);
-               retval = aarch64_instr_write_data_dcc(arm->dpm, 0xd5330500, val);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
-
-               retval = aarch64_exec_opcode(target, 0xb81fc020, &dscr);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
 
-               retval = aarch64_exec_opcode(target, 0x91001021, &dscr);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       tmp_buff, 4, total_u32, armv8->debug_base + CPUV8_DBG_DTRRX);
+       if (retval != ERROR_OK)
+               goto error_unset_dtr_w;
 
-               i += 4;
-       }
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       dscr = (dscr & ~DSCR_MA);
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               goto error_unset_dtr_w;
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                goto error_free_buff_w;
-       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
+
+       dpm->dscr = dscr;
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DRCR, 1<<2);
+               armv8_dpm_handle_exception(dpm);
                goto error_free_buff_w;
        }
 
@@ -1802,103 +1488,177 @@ static int aarch64_write_apb_ab_memory(struct target *target,
 error_unset_dtr_w:
        /* Unset DTR mode */
        mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       dscr = (dscr & ~DSCR_MA);
        mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
 error_free_buff_w:
        LOG_ERROR("error");
        free(tmp_buff);
        return ERROR_FAIL;
 }
 
-static int aarch64_read_apb_ab_memory(struct target *target,
+static int aarch64_read_apb_ap_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
        /* read memory through APB-AP */
-
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
+       int total_bytes = count * size;
+       int total_u32;
+       int start_byte = address & 0x3;
+       int end_byte   = (address + total_bytes) & 0x3;
        struct reg *reg;
-       uint32_t dscr, val;
+       uint32_t dscr;
        uint8_t *tmp_buff = NULL;
-       uint32_t i = 0;
+       uint8_t *u8buf_ptr;
+       uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
+       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count %"  PRIu32,
                          address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       /* Mark register R0 as dirty, as it will be used
+       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
+       /* Mark register X0, X1 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 0);
+       reg = armv8_reg_current(arm, 1);
        reg->dirty = true;
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       reg = armv8_reg_current(arm, 0);
+       reg->dirty = true;
 
+       /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
+
+       /* Set Normal access mode  */
+       dscr = (dscr & ~DSCR_MA);
+       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Write X0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
+               retval += dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval += dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
+               /* Step 1.e - Change DCC to memory mode */
+               dscr = dscr | DSCR_MA;
+               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+               /* Step 1.f - read DBGDTRTX and discard the value */
+               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       } else {
+               /* Write R0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
+               retval += dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval += dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+               /* Step 1.e - Change DCC to memory mode */
+               dscr = dscr | DSCR_MA;
+               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+               /* Step 1.f - read DBGDTRTX and discard the value */
+               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+
+       }
        if (retval != ERROR_OK)
                goto error_unset_dtr_r;
 
-       if (size > 4) {
-               LOG_WARNING("reading size >4 bytes not yet supported");
-               goto error_unset_dtr_r;
-       }
+       /* Optimize the read as much as we can, either way we read in a single pass  */
+       if ((start_byte) || (end_byte)) {
+               /* The algorithm only copies 32 bit words, so the buffer
+                * should be expanded to include the words at either end.
+                * The first and last words will be read into a temp buffer
+                * to avoid corruption
+                */
+               tmp_buff = malloc(total_u32 * 4);
+               if (!tmp_buff)
+                       goto error_unset_dtr_r;
 
-       while (i < count * size) {
+               /* use the tmp buffer to read the entire data */
+               u8buf_ptr = tmp_buff;
+       } else
+               /* address and read length are aligned so read directly into the passed buffer */
+               u8buf_ptr = buffer;
 
-               retval = aarch64_instr_write_data_dcc_64(arm->dpm, 0xd5330400, address+4);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
 
-               dscr = DSCR_INSTR_COMP;
-               retval = aarch64_exec_opcode(target, 0xb85fc000, &dscr);
-               if (retval != ERROR_OK)
+       /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+        * increments X0 by 4. */
+       retval = mem_ap_read_buf_noincr(armv8->debug_ap, u8buf_ptr, 4, total_u32-1,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+       if (retval != ERROR_OK)
                        goto error_unset_dtr_r;
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
 
-               retval = aarch64_instr_read_data_dcc(arm->dpm, 0xd5130400, &val);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
-               memcpy(&buffer[i], &val, size);
-               i += 4;
-               address += 4;
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       dscr = (dscr & ~DSCR_MA);
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               goto error_free_buff_r;
+
+       /* Step 3.b - read DBGDTRTX for the final value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       memcpy(u8buf_ptr + (total_u32-1) * 4, &value, 4);
+
+       /* Check for sticky abort flags in the DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               goto error_free_buff_r;
+
+       dpm->dscr = dscr;
+
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
+               /* Abort occurred - clear it and exit */
+               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
+               armv8_dpm_handle_exception(dpm);
+               goto error_free_buff_r;
        }
 
-       /* Clear any sticky error */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
+       /* check if we need to copy aligned data by applying any shift necessary */
+       if (tmp_buff) {
+               memcpy(buffer, tmp_buff + start_byte, total_bytes);
+               free(tmp_buff);
+       }
 
        /* Done */
        return ERROR_OK;
 
 error_unset_dtr_r:
-       LOG_WARNING("DSCR = 0x%" PRIx32, dscr);
-       /* Todo: Unset DTR mode */
-
+       /* Unset DTR mode */
+       mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       dscr = (dscr & ~DSCR_MA);
+       mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
 error_free_buff_r:
        LOG_ERROR("error");
        free(tmp_buff);
-
-       /* Clear any sticky error */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-
        return ERROR_FAIL;
 }
 
@@ -1906,26 +1666,16 @@ static int aarch64_read_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
        LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
                address, size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* read memory through AHB-AP */
-                       retval = mem_ap_read_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-                       /* read memory through APB-AP */
-                       retval = aarch64_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       retval = aarch64_read_apb_ab_memory(target, address, size, count, buffer);
-               }
+               /* read memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
+               if (retval != ERROR_OK)
+                       return retval;
+               retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -1934,136 +1684,44 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
 
        /* aarch64 handles unaligned memory access */
        LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
                size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               retval = aarch64_check_address(target, address);
+               if (retval != ERROR_OK)
+                       return retval;
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Reading at virtual address. Translating v:0x%" TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR,
-                                 virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_read_phys_memory(target, address, size, count,
-                                                 buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys
-                          access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_read_apb_ab_memory(target, address, size,
-                                                   count, buffer);
-       }
-       return retval;
+       return aarch64_read_apb_ap_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
 
        LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
                size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* write memory through AHB-AP */
-                       retval = mem_ap_write_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-
-                       /* write memory through APB-AP */
-                       if (!armv8->is_armv7r) {
-                               retval = aarch64_mmu_modify(target, 0);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-                       return aarch64_write_apb_ab_memory(target, address, size, count, buffer);
-               }
-       }
-
-
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED) {
-               struct arm_dpm *dpm = armv8->arm.dpm;
-
-               retval = dpm->prepare(dpm);
+               /* write memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A8 has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
-
-               /* invalidate I-Cache */
-               if (armv8->armv8_mmu.armv8_cache.i_cache_enabled) {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 5, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* invalidate D-Cache */
-               if (armv8->armv8_mmu.armv8_cache.d_u_cache_enabled) {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 6, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* (void) */ dpm->finish(dpm);
+               return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -2073,51 +1731,27 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
 
        /* aarch64 handles unaligned memory access */
        LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
                  "; count %" PRId32, address, size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               retval = aarch64_check_address(target, address);
+               if (retval != ERROR_OK)
+                       return retval;
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               LOG_DEBUG("Writing memory to address 0x%" TARGET_PRIxADDR "; size %"
-                         PRId32 "; count %" PRId32, address, size, count);
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Writing to virtual address. Translating v:0x%"
-                                 TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR, virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_write_phys_memory(target, address, size,
-                               count, buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_write_apb_ab_memory(target, address, size, count, buffer);
-       }
-       return retval;
+       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -2135,16 +1769,16 @@ static int aarch64_handle_target_request(void *priv)
                uint32_t request;
                uint32_t dscr;
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
 
                /* check if we have data */
                while ((dscr & DSCR_DTR_TX_FULL) && (retval == ERROR_OK)) {
                        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DTRTX, &request);
+                                       armv8->debug_base + CPUV8_DBG_DTRTX, &request);
                        if (retval == ERROR_OK) {
                                target_request(target, request);
                                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
                        }
                }
        }
@@ -2157,9 +1791,12 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
-       int retval = ERROR_OK;
-       uint32_t pfr, debug, ctypr, ttypr, cpuid;
        int i;
+       int retval = ERROR_OK;
+       uint64_t debug, ttypr;
+       uint32_t cpuid;
+       uint32_t tmp0, tmp1;
+       debug = ttypr = cpuid = 0;
 
        /* We do one extra read to ensure DAP is configured,
         * we call ahbap_debugport_init(swjdp) instead
@@ -2183,20 +1820,6 @@ static int aarch64_examine_first(struct target *target)
 
        armv8->debug_ap->memaccess_tck = 80;
 
-       /* Search for the AHB-AB */
-       armv8->memory_ap_available = false;
-       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv8->memory_ap);
-       if (retval == ERROR_OK) {
-               retval = mem_ap_init(armv8->memory_ap);
-               if (retval == ERROR_OK)
-                       armv8->memory_ap_available = true;
-       }
-       if (retval != ERROR_OK) {
-               /* AHB-AP not found or unavailable - use the CPU */
-               LOG_DEBUG("No AHB-AP available for memory access");
-       }
-
-
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
                /* Get ROM Table base */
@@ -2210,85 +1833,73 @@ static int aarch64_examine_first(struct target *target)
                                &armv8->debug_base, &coreidx);
                if (retval != ERROR_OK)
                        return retval;
-               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32,
-                         coreidx, armv8->debug_base);
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32
+                               " apid: %08" PRIx32, coreidx, armv8->debug_base, apid);
        } else
                armv8->debug_base = target->dbgbase;
 
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x300, 0);
+                       armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "oslock");
+               LOG_DEBUG("LOCK debug access fail");
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x88, &cpuid);
-       LOG_DEBUG("0x88 = %x", cpuid);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x314, &cpuid);
-       LOG_DEBUG("0x314 = %x", cpuid);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x310, &cpuid);
-       LOG_DEBUG("0x310 = %x", cpuid);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CPUID, &cpuid);
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CPUID");
+               LOG_DEBUG("Examine %s failed", "oslock");
                return retval;
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CTYPR, &ctypr);
+                       armv8->debug_base + CPUV8_DBG_MAINID0, &cpuid);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CTYPR");
+               LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_TTYPR, &ttypr);
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0, &tmp0);
+       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "TTYPR");
+               LOG_DEBUG("Examine %s failed", "Memory Model Type");
                return retval;
        }
+       ttypr |= tmp1;
+       ttypr = (ttypr << 32) | tmp0;
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64PFR0_EL1, &pfr);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
-               return retval;
-       }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64DFR0_EL1, &debug);
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp0);
+       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
                return retval;
        }
+       debug |= tmp1;
+       debug = (debug << 32) | tmp0;
 
        LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
-       LOG_DEBUG("ctypr = 0x%08" PRIx32, ctypr);
-       LOG_DEBUG("ttypr = 0x%08" PRIx32, ttypr);
-       LOG_DEBUG("ID_AA64PFR0_EL1 = 0x%08" PRIx32, pfr);
-       LOG_DEBUG("ID_AA64DFR0_EL1 = 0x%08" PRIx32, debug);
+       LOG_DEBUG("ttypr = 0x%08" PRIx64, ttypr);
+       LOG_DEBUG("debug = 0x%08" PRIx64, debug);
+
+       if (target->ctibase == 0) {
+               /* assume a v8 rom table layout */
+               armv8->cti_base = target->ctibase = armv8->debug_base + 0x10000;
+               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, target->ctibase);
+       } else
+               armv8->cti_base = target->ctibase;
 
        armv8->arm.core_type = ARM_MODE_MON;
-       armv8->arm.core_state = ARM_STATE_AARCH64;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
 
        /* Setup Breakpoint Register Pairs */
-       aarch64->brp_num = ((debug >> 12) & 0x0F) + 1;
-       aarch64->brp_num_context = ((debug >> 28) & 0x0F) + 1;
-
-       /* hack - no context bpt support yet */
-       aarch64->brp_num_context = 0;
-
+       aarch64->brp_num = (uint32_t)((debug >> 12) & 0x0F) + 1;
+       aarch64->brp_num_context = (uint32_t)((debug >> 28) & 0x0F) + 1;
        aarch64->brp_num_available = aarch64->brp_num;
        aarch64->brp_list = calloc(aarch64->brp_num, sizeof(struct aarch64_brp));
        for (i = 0; i < aarch64->brp_num; i++) {
@@ -2376,8 +1987,6 @@ static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 {
        struct aarch64_common *aarch64 = calloc(1, sizeof(struct aarch64_common));
 
-       aarch64->armv8_common.is_armv7r = false;
-
        return aarch64_init_arch_info(target, aarch64, target->tap);
 }
 
@@ -2395,26 +2004,7 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       int retval = ERROR_FAIL;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               uint32_t ret;
-               retval = armv8_mmu_translate_va(target,
-                               virt, &ret);
-               if (retval != ERROR_OK)
-                       goto done;
-               *phys = ret;
-       } else {/*  use this method if armv8->memory_ap not selected
-                *  mmu must be enable in order to get a correct translation */
-               retval = aarch64_mmu_modify(target, 1);
-               if (retval != ERROR_OK)
-                       goto done;
-               retval = armv8_mmu_translate_va_pa(target, virt,  phys, 1);
-       }
-done:
-       return retval;
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)