target: make local symbols static
[fw/openocd] / src / target / aarch64.c
index 05abe6b717c5ab5350723609384b31226002a8bd..8b0cf3b631d9d74096a01fb59bab62349b1a4f79 100644 (file)
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
 /***************************************************************************
  *   Copyright (C) 2015 by David Ung                                       *
  *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- *   This program is distributed in the hope that it will be useful,       *
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
- *   GNU General Public License for more details.                          *
- *                                                                         *
- *   You should have received a copy of the GNU General Public License     *
- *   along with this program; if not, write to the                         *
- *   Free Software Foundation, Inc.,                                       *
- *                                                                         *
  ***************************************************************************/
 
 #ifdef HAVE_CONFIG_H
 
 #include "breakpoints.h"
 #include "aarch64.h"
+#include "a64_disassembler.h"
 #include "register.h"
 #include "target_request.h"
 #include "target_type.h"
 #include "armv8_opcodes.h"
 #include "armv8_cache.h"
+#include "arm_coresight.h"
+#include "arm_semihosting.h"
+#include "jtag/interface.h"
+#include "smp.h"
 #include <helper/time_support.h>
 
+enum restart_mode {
+       RESTART_LAZY,
+       RESTART_SYNC,
+};
+
+enum halt_mode {
+       HALT_LAZY,
+       HALT_SYNC,
+};
+
+struct aarch64_private_config {
+       struct adiv5_private_config adiv5_config;
+       struct arm_cti *cti;
+};
+
 static int aarch64_poll(struct target *target);
 static int aarch64_debug_entry(struct target *target);
 static int aarch64_restore_context(struct target *target, bool bpwp);
@@ -44,12 +52,14 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
+       enum arm_mode target_mode = ARM_MODE_ANY;
        int retval = ERROR_OK;
+       uint32_t instr;
 
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -59,51 +69,52 @@ static int aarch64_restore_system_control_reg(struct target *target)
                /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
 
                switch (armv8->arm.core_mode) {
-                       case ARMV8_64_EL0T:
-                       case ARMV8_64_EL1T:
-                       case ARMV8_64_EL1H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               0, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL0T:
+                       target_mode = ARMV8_64_EL1H;
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
                        break;
-                       case ARMV8_64_EL2T:
-                       case ARMV8_64_EL2H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               4, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
                        break;
-                       case ARMV8_64_EL3H:
-                       case ARMV8_64_EL3T:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               6, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
                        break;
-                       default:
-                               retval = armv8->arm.mcr(target, 15, 0, 0, 1, 0, aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       }
+
+               case ARM_MODE_SVC:
+               case ARM_MODE_ABT:
+               case ARM_MODE_FIQ:
+               case ARM_MODE_IRQ:
+               case ARM_MODE_HYP:
+               case ARM_MODE_UND:
+               case ARM_MODE_SYS:
+                       instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+                       break;
+
+               default:
+                       LOG_ERROR("cannot read system control register in this mode: (%s : 0x%x)",
+                                       armv8_mode_name(armv8->arm.core_mode), armv8->arm.core_mode);
+                       return ERROR_FAIL;
+               }
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+               retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr, aarch64->system_control_reg);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
        }
+
        return retval;
 }
 
-/*  check address before aarch64_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int aarch64_check_address(struct target *target, uint32_t address)
-{
-       /* TODO */
-       return ERROR_OK;
-}
 /*  modify system_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
@@ -112,6 +123,8 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval = ERROR_OK;
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr = 0;
 
        if (enable) {
                /*      if mmu enabled at target stop and mmu not enable */
@@ -119,86 +132,61 @@ static int aarch64_mmu_modify(struct target *target, int enable)
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(aarch64->system_control_reg_curr & 0x1U)) {
+               if (!(aarch64->system_control_reg_curr & 0x1U))
                        aarch64->system_control_reg_curr |= 0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                       }
-               }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
                        /*  data cache is active */
                        aarch64->system_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
+                       /* flush data cache armv8 function to be called */
                        if (armv8->armv8_mmu.armv8_cache.flush_all_data_cache)
                                armv8->armv8_mmu.armv8_cache.flush_all_data_cache(target);
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                                       break;
-                       }
                }
        }
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+               break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_HYP:
+       case ARM_MODE_UND:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_DEBUG("unknown cpu state 0x%x", armv8->arm.core_mode);
+               break;
+       }
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+       retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr,
+                               aarch64->system_control_reg_curr);
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+
        return retval;
 }
 
@@ -211,7 +199,14 @@ static int aarch64_init_debug_access(struct target *target)
        int retval;
        uint32_t dummy;
 
-       LOG_DEBUG(" ");
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Examine %s failed", "oslock");
+               return retval;
+       }
 
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
@@ -228,32 +223,27 @@ static int aarch64_init_debug_access(struct target *target)
         */
 
        /* Enable CTI */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_CTR, 1);
-       /* By default, gate all channel triggers to and from the CTM */
+       retval = arm_cti_enable(armv8->cti, true);
+       /* By default, gate all channel events to and from the CTM */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, 0);
-       /* output halt requests to PE on channel 0 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN0, CTI_CHNL(0));
-       /* output restart requests to PE on channel 1 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN1, CTI_CHNL(1));
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
        /* Resync breakpoint registers */
 
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
+       return ERROR_OK;
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
 static int aarch64_dap_write_memap_register_u32(struct target *target,
-       uint32_t address,
+       target_addr_t address,
        uint32_t value)
 {
        int retval;
@@ -279,121 +269,278 @@ static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
        return retval;
 }
 
-static struct target *get_aarch64(struct target *target, int32_t coreid)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       return armv8_set_dbgreg_bits(armv8, CPUV8_DBG_DSCR, bit_mask, value);
+}
+
+static int aarch64_check_state_one(struct target *target,
+               uint32_t mask, uint32_t val, int *p_result, uint32_t *p_prsr)
 {
-       struct target_list *head;
-       struct target *curr;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t prsr;
+       int retval;
 
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-                       return curr;
-               head = head->next;
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (p_prsr)
+               *p_prsr = prsr;
+
+       if (p_result)
+               *p_result = (prsr & mask) == (val & mask);
+
+       return ERROR_OK;
+}
+
+static int aarch64_wait_halt_one(struct target *target)
+{
+       int retval = ERROR_OK;
+       uint32_t prsr;
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int halted;
+
+               retval = aarch64_check_state_one(target, PRSR_HALT, PRSR_HALT, &halted, &prsr);
+               if (retval != ERROR_OK || halted)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       LOG_DEBUG("target %s timeout, prsr=0x%08"PRIx32, target_name(target), prsr);
+                       break;
+               }
        }
-       return target;
+       return retval;
 }
-static int aarch64_halt(struct target *target);
 
-static int aarch64_halt_smp(struct target *target)
+static int aarch64_prepare_halt_smp(struct target *target, bool exc_target, struct target **p_first)
 {
        int retval = ERROR_OK;
-       struct target_list *head = target->head;
+       struct target_list *head;
+       struct target *first = NULL;
+
+       LOG_DEBUG("target %s exc %i", target_name(target), exc_target);
 
-       while (head != (struct target_list *)NULL) {
+       foreach_smp_target(head, target->smp_targets) {
                struct target *curr = head->target;
                struct armv8_common *armv8 = target_to_armv8(curr);
 
+               if (exc_target && curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_RUNNING)
+                       continue;
+
+               /* HACK: mark this target as prepared for halting */
+               curr->debug_reason = DBG_REASON_DBGRQ;
+
                /* open the gate for channel 0 to let HALT requests pass to the CTM */
-               if (curr->smp)
-                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->cti_base + CTI_GATE, CTI_CHNL(0));
+               retval = arm_cti_ungate_channel(armv8->cti, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
                if (retval != ERROR_OK)
                        break;
 
-               head = head->next;
+               LOG_DEBUG("target %s prepared", target_name(curr));
+
+               if (!first)
+                       first = curr;
+       }
+
+       if (p_first) {
+               if (exc_target && first)
+                       *p_first = first;
+               else
+                       *p_first = target;
+       }
+
+       return retval;
+}
+
+static int aarch64_halt_one(struct target *target, enum halt_mode mode)
+{
+       int retval = ERROR_OK;
+       struct armv8_common *armv8 = target_to_armv8(target);
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* allow Halting Debug Mode */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 0);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mode == HALT_SYNC) {
+               retval = aarch64_wait_halt_one(target);
+               if (retval != ERROR_OK) {
+                       if (retval == ERROR_TARGET_TIMEOUT)
+                               LOG_ERROR("Timeout waiting for target %s halt", target_name(target));
+                       return retval;
+               }
        }
 
+       return ERROR_OK;
+}
+
+static int aarch64_halt_smp(struct target *target, bool exc_target)
+{
+       struct target *next = target;
+       int retval;
+
+       /* prepare halt on all PEs of the group */
+       retval = aarch64_prepare_halt_smp(target, exc_target, &next);
+
+       if (exc_target && next == target)
+               return retval;
+
        /* halt the target PE */
        if (retval == ERROR_OK)
-               retval = aarch64_halt(target);
+               retval = aarch64_halt_one(next, HALT_LAZY);
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* wait for all PEs to halt */
+       int64_t then = timeval_ms();
+       for (;;) {
+               bool all_halted = true;
+               struct target_list *head;
+               struct target *curr;
+
+               foreach_smp_target(head, target->smp_targets) {
+                       int halted;
+
+                       curr = head->target;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr, PRSR_HALT, PRSR_HALT, &halted, NULL);
+                       if (retval != ERROR_OK || !halted) {
+                               all_halted = false;
+                               break;
+                       }
+               }
+
+               if (all_halted)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit halt for the second cluster.
+                */
+               retval = aarch64_halt_one(curr, HALT_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+       }
 
        return retval;
 }
 
-static int update_halt_gdb(struct target *target)
+static int update_halt_gdb(struct target *target, enum target_debug_reason debug_reason)
 {
-       int retval = 0;
-       if (target->gdb_service && target->gdb_service->core[0] == -1) {
-               target->gdb_service->target = target;
-               target->gdb_service->core[0] = target->coreid;
-               retval += aarch64_halt_smp(target);
+       struct target *gdb_target = NULL;
+       struct target_list *head;
+       struct target *curr;
+
+       if (debug_reason == DBG_REASON_NOTHALTED) {
+               LOG_DEBUG("Halting remaining targets in SMP group");
+               aarch64_halt_smp(target, true);
        }
-       return retval;
+
+       /* poll all targets in the group, but skip the target that serves GDB */
+       foreach_smp_target(head, target->smp_targets) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* remember the gdb_service->target */
+               if (curr->gdb_service)
+                       gdb_target = curr->gdb_service->target;
+               /* skip it */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in aarch64_poll() */
+               curr->smp = 0;
+               aarch64_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target && gdb_target != target)
+               aarch64_poll(gdb_target);
+
+       return ERROR_OK;
 }
 
 /*
- * Cortex-A8 Run control
+ * Aarch64 Run control
  */
 
 static int aarch64_poll(struct target *target)
 {
+       enum target_state prev_target_state;
        int retval = ERROR_OK;
-       uint32_t dscr;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
-       struct armv8_common *armv8 = &aarch64->armv8_common;
-       enum target_state prev_target_state = target->state;
-       /*  toggle to another core is done by gdb as follow */
-       /*  maint packet J core_id */
-       /*  continue */
-       /*  the next polling trigger an halt event sent to gdb */
-       if ((target->state == TARGET_HALTED) && (target->smp) &&
-               (target->gdb_service) &&
-               (target->gdb_service->target == NULL)) {
-               target->gdb_service->target =
-                       get_aarch64(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-               return retval;
-       }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       int halted;
+
+       retval = aarch64_check_state_one(target,
+                               PRSR_HALT, PRSR_HALT, &halted, NULL);
        if (retval != ERROR_OK)
                return retval;
-       aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == 0x3) {
+       if (halted) {
+               prev_target_state = target->state;
                if (prev_target_state != TARGET_HALTED) {
+                       enum target_debug_reason debug_reason = target->debug_reason;
+
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
-                       }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
+                       LOG_DEBUG("Target %s halted", target_name(target));
+                       retval = aarch64_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
 
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
+                       if (target->smp)
+                               update_halt_gdb(target, debug_reason);
+
+                       if (arm_semihosting(target, &retval) != 0)
+                               return retval;
 
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
+                       switch (prev_target_state) {
+                       case TARGET_RUNNING:
+                       case TARGET_UNKNOWN:
+                       case TARGET_RESET:
+                               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
+                               break;
+                       case TARGET_DEBUG_RUNNING:
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                               break;
+                       default:
+                               break;
                        }
                }
        } else
@@ -404,47 +551,16 @@ static int aarch64_poll(struct target *target)
 
 static int aarch64_halt(struct target *target)
 {
-       int retval = ERROR_OK;
-       uint32_t dscr;
        struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_HALT;
 
-       /*
-        * add HDE in halting debug mode
-        */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr | DSCR_HDE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* trigger an event on channel 0, this outputs a halt request to the PE */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, CTI_CHNL(0));
-       if (retval != ERROR_OK)
-               return retval;
-
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCRV8_HALT_MASK) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for halt");
-                       return ERROR_FAIL;
-               }
-       }
-
-       target->debug_reason = DBG_REASON_DBGRQ;
+       if (target->smp)
+               return aarch64_halt_smp(target, false);
 
-       return ERROR_OK;
+       return aarch64_halt_one(target, HALT_SYNC);
 }
 
-static int aarch64_internal_restore(struct target *target, int current,
+static int aarch64_restore_one(struct target *target, int current,
        uint64_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -452,6 +568,8 @@ static int aarch64_internal_restore(struct target *target, int current,
        int retval;
        uint64_t resume_pc;
 
+       LOG_DEBUG("%s", target_name(target));
+
        if (!debug_execution)
                target_free_all_working_areas(target);
 
@@ -483,42 +601,33 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
-       arm->pc->dirty = 1;
-       arm->pc->valid = 1;
-       dpmv8_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+       arm->pc->dirty = true;
+       arm->pc->valid = true;
 
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
-
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target, bool slave_pe)
+/**
+ * prepare single target for restart
+ *
+ *
+ */
+static int aarch64_prepare_restart_one(struct target *target)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
        int retval;
        uint32_t dscr;
-       /*
-        * * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * * exception flags: see ARMv7 ARM, C5.9.
-        *
-        * REVISIT: for single stepping, we probably want to
-        * disable IRQs by default, with optional override...
-        */
+       uint32_t tmp;
+
+       LOG_DEBUG("%s", target_name(target));
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
@@ -526,71 +635,204 @@ static int aarch64_internal_restart(struct target *target, bool slave_pe)
                return retval;
 
        if ((dscr & DSCR_ITE) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
-
-       /* make sure to acknowledge the halt event before resuming */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_INACK, CTI_TRIG(HALT));
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
+       /* acknowledge a pending CTI halt event */
+       retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
        /*
         * open the CTI gate for channel 1 so that the restart events
-        * get passed along to all PEs
+        * get passed along to all PEs. Also close gate for channel 0
+        * to isolate the PE from halt events.
         */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, CTI_CHNL(1));
-       if (retval != ERROR_OK)
-               return retval;
+               retval = arm_cti_ungate_channel(armv8->cti, 1);
+       if (retval == ERROR_OK)
+               retval = arm_cti_gate_channel(armv8->cti, 0);
 
-       if (!slave_pe) {
-               /* trigger an event on channel 1, generates a restart request to the PE */
+       /* make sure that DSCR.HDE is set */
+       if (retval == ERROR_OK) {
+               dscr |= DSCR_HDE;
                retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_APPPULSE, CTI_CHNL(1));
-               if (retval != ERROR_OK)
-                       return retval;
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
 
-               long long then = timeval_ms();
-               for (;; ) {
-                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       if ((dscr & DSCR_HDE) != 0)
+       if (retval == ERROR_OK) {
+               /* clear sticky bits in PRSR, SDR is now 0 */
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_PRSR, &tmp);
+       }
+
+       return retval;
+}
+
+static int aarch64_do_restart_one(struct target *target, enum restart_mode mode)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* trigger an event on channel 1, generates a restart request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 1);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mode == RESTART_SYNC) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       int resumed;
+                       /*
+                        * if PRSR.SDR is set now, the target did restart, even
+                        * if it's now already halted again (e.g. due to breakpoint)
+                        */
+                       retval = aarch64_check_state_one(target,
+                                               PRSR_SDR, PRSR_SDR, &resumed, NULL);
+                       if (retval != ERROR_OK || resumed)
                                break;
+
                        if (timeval_ms() > then + 1000) {
-                               LOG_ERROR("Timeout waiting for resume");
-                               return ERROR_FAIL;
+                               LOG_ERROR("%s: Timeout waiting for resume"PRIx32, target_name(target));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
                        }
                }
        }
 
+       if (retval != ERROR_OK)
+               return retval;
+
        target->debug_reason = DBG_REASON_NOTHALTED;
        target->state = TARGET_RUNNING;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
        return ERROR_OK;
 }
 
-static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
+static int aarch64_restart_one(struct target *target, enum restart_mode mode)
 {
-       int retval = 0;
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prepare_restart_one(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_do_restart_one(target, mode);
+
+       return retval;
+}
+
+/*
+ * prepare all but the current target for restart
+ */
+static int aarch64_prep_restart_smp(struct target *target, int handle_breakpoints, struct target **p_first)
+{
+       int retval = ERROR_OK;
        struct target_list *head;
-       struct target *curr;
+       struct target *first = NULL;
        uint64_t address;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
-                       /*  resume current address , not in step mode */
-                       retval += aarch64_internal_restore(curr, 1, &address,
-                                       handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr, true);
+
+       foreach_smp_target(head, target->smp_targets) {
+               struct target *curr = head->target;
+
+               /* skip calling target */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_HALTED)
+                       continue;
+
+               /*  resume at current address, not in step mode */
+               retval = aarch64_restore_one(curr, 1, &address, handle_breakpoints, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_prepare_restart_one(curr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("failed to restore target %s", target_name(curr));
+                       break;
                }
-               head = head->next;
+               /* remember the first valid target in the group */
+               if (!first)
+                       first = curr;
+       }
+
+       if (p_first)
+               *p_first = first;
+
+       return retval;
+}
+
 
+static int aarch64_step_restart_smp(struct target *target)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prep_restart_smp(target, 0, &first);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (first)
+               retval = aarch64_do_restart_one(first, RESTART_LAZY);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("error restarting target %s", target_name(first));
+               return retval;
        }
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               struct target *curr = target;
+               bool all_resumed = true;
+
+               foreach_smp_target(head, target->smp_targets) {
+                       uint32_t prsr;
+                       int resumed;
+
+                       curr = head->target;
+
+                       if (curr == target)
+                               continue;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr,
+                                       PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                       if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                               all_resumed = false;
+                               break;
+                       }
+
+                       if (curr->state != TARGET_RUNNING) {
+                               curr->state = TARGET_RUNNING;
+                               curr->debug_reason = DBG_REASON_NOTHALTED;
+                               target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                       }
+               }
+
+               if (all_resumed)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("%s: timeout waiting for target resume", __func__);
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit resume for the second cluster.
+                */
+               retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+}
+
        return retval;
 }
 
@@ -600,24 +842,89 @@ static int aarch64_resume(struct target *target, int current,
        int retval = 0;
        uint64_t addr = address;
 
-       /* dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
-               /*   simulate a start and halt of target */
-               target->gdb_service->target = NULL;
-               target->gdb_service->core[0] = target->gdb_service->core[1];
-               /*  fake resume at next poll we play the  target core[1], see poll*/
-               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               return 0;
-       }
-       aarch64_internal_restore(target, current, &addr, handle_breakpoints,
-                                debug_execution);
+       struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_RESUME;
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       /*
+        * If this target is part of a SMP group, prepare the others
+        * targets for resuming. This involves restoring the complete
+        * target register context and setting up CTI gates to accept
+        * resume events from the trigger matrix.
+        */
        if (target->smp) {
-               target->gdb_service->core[0] = -1;
-               retval = aarch64_restore_smp(target, handle_breakpoints);
+               retval = aarch64_prep_restart_smp(target, handle_breakpoints, NULL);
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target, false);
+
+       /* all targets prepared, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &addr, handle_breakpoints,
+                                debug_execution);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_SYNC);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->smp) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       struct target *curr = target;
+                       struct target_list *head;
+                       bool all_resumed = true;
+
+                       foreach_smp_target(head, target->smp_targets) {
+                               uint32_t prsr;
+                               int resumed;
+
+                               curr = head->target;
+                               if (curr == target)
+                                       continue;
+                               if (!target_was_examined(curr))
+                                       continue;
+
+                               retval = aarch64_check_state_one(curr,
+                                               PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                               if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                                       all_resumed = false;
+                                       break;
+                               }
+
+                               if (curr->state != TARGET_RUNNING) {
+                                       curr->state = TARGET_RUNNING;
+                                       curr->debug_reason = DBG_REASON_NOTHALTED;
+                                       target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                               }
+                       }
+
+                       if (all_resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: timeout waiting for target %s to resume", __func__, target_name(curr));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+
+                       /*
+                        * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                        * and it looks like the CTI's are not connected by a common
+                        * trigger matrix. It seems that we need to halt one core in each
+                        * cluster explicitly. So if we find that a core has not halted
+                        * yet, we trigger an explicit resume for the second cluster.
+                        */
+                       retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+                       if (retval != ERROR_OK)
+                               break;
+               }
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
@@ -635,53 +942,68 @@ static int aarch64_resume(struct target *target, int current,
 static int aarch64_debug_entry(struct target *target)
 {
        int retval = ERROR_OK;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
-
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
-
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
+       uint32_t dscr;
 
        /* make sure to clear all sticky errors */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       if (retval == ERROR_OK)
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval == ERROR_OK)
+               retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), dscr);
+
+       dpm->dscr = dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
+
+       /* close the CTI gate for all events */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       armv8_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
+       armv8_dpm_report_dscr(dpm, dscr);
 
-       /* save address of instruction that triggered the watchpoint? */
+       /* save the memory address that triggered the watchpoint */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
                uint32_t tmp;
-               uint64_t wfar = 0;
 
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_WFAR1,
-                               &tmp);
-               if (retval != ERROR_OK)
-                       return retval;
-               wfar = tmp;
-               wfar = (wfar << 32);
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_WFAR0,
-                               &tmp);
+                               armv8->debug_base + CPUV8_DBG_EDWAR0, &tmp);
                if (retval != ERROR_OK)
                        return retval;
-               wfar |= tmp;
-               armv8_dpm_report_wfar(&armv8->dpm, wfar);
+               target_addr_t edwar = tmp;
+
+               /* EDWAR[63:32] has unknown content in aarch32 state */
+               if (core_state == ARM_STATE_AARCH64) {
+                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_EDWAR1, &tmp);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       edwar |= ((target_addr_t)tmp) << 32;
+               }
+
+               armv8->dpm.wp_addr = edwar;
        }
 
        retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -691,47 +1013,52 @@ static int aarch64_post_debug_entry(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval;
-
-       /* clear sticky errors */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                                   armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr;
 
        switch (armv8->arm.core_mode) {
-               case ARMV8_64_EL0T:
-               case ARMV8_64_EL1T:
-               case ARMV8_64_EL1H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL1, 0);
                break;
-               case ARMV8_64_EL2T:
-               case ARMV8_64_EL2H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       4, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL2, 0);
                break;
-               case ARMV8_64_EL3H:
-               case ARMV8_64_EL3T:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       6, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL3, 0);
                break;
-               default:
-                       retval = armv8->arm.mrc(target, 15, 0, 0, 1, 0, &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_HYP:
+       case ARM_MODE_UND:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MRC(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_ERROR("cannot read system control register in this mode: (%s : 0x%x)",
+                               armv8_mode_name(armv8->arm.core_mode), armv8->arm.core_mode);
+               return ERROR_FAIL;
        }
 
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+       retval = armv8->dpm.instr_read_data_r0(&armv8->dpm, instr, &aarch64->system_control_reg);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+
        LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
        aarch64->system_control_reg_curr = aarch64->system_control_reg;
 
@@ -746,39 +1073,23 @@ static int aarch64_post_debug_entry(struct target *target)
                (aarch64->system_control_reg & 0x4U) ? 1 : 0;
        armv8->armv8_mmu.armv8_cache.i_cache_enabled =
                (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
-       aarch64->curr_mode = armv8->arm.core_mode;
        return ERROR_OK;
 }
 
-static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
-{
-       struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t dscr;
-
-       /* Read DSCR */
-       int retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (ERROR_OK != retval)
-               return retval;
-
-       /* clear bitfield */
-       dscr &= ~bit_mask;
-       /* put new value */
-       dscr |= value & bit_mask;
-
-       /* write new DSCR */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       return retval;
-}
-
+/*
+ * single-step a target
+ */
 static int aarch64_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       int saved_retval = ERROR_OK;
        int retval;
        uint32_t edecr;
 
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_STEP;
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
@@ -786,39 +1097,76 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
-       if (retval != ERROR_OK)
-               return retval;
-
        /* make sure EDECR.SS is not set when restoring the register */
-       edecr &= ~0x4;
 
-       /* set EDECR.SS to enter hardware step mode */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       if (retval == ERROR_OK) {
+               edecr &= ~0x4;
+               /* set EDECR.SS to enter hardware step mode */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       }
+       /* disable interrupts while stepping */
+       if (retval == ERROR_OK && aarch64->isrmasking_mode == AARCH64_ISRMASK_ON)
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       /* bail out if stepping setup has failed */
        if (retval != ERROR_OK)
                return retval;
 
-       /* disable interrupts while stepping */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (target->smp && (current == 1)) {
+               /*
+                * isolate current target so that it doesn't get resumed
+                * together with the others
+                */
+               retval = arm_cti_gate_channel(armv8->cti, 1);
+               /* resume all other targets in the group */
+               if (retval == ERROR_OK)
+                       retval = aarch64_step_restart_smp(target);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Failed to restart non-stepping targets in SMP group");
+                       return retval;
+               }
+               LOG_DEBUG("Restarted all non-stepping targets in SMP group");
+       }
+
+       /* all other targets running, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &address, 0, 0);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_LAZY);
 
-       /* resume the target */
-       retval = aarch64_resume(target, current, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED) {
-               retval = aarch64_poll(target);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("timeout waiting for target halt");
-                       return ERROR_FAIL;
+       LOG_DEBUG("target step-resumed at 0x%" PRIx64, address);
+       if (!handle_breakpoints)
+               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int stepped;
+               uint32_t prsr;
+
+               retval = aarch64_check_state_one(target,
+                                       PRSR_SDR|PRSR_HALT, PRSR_SDR|PRSR_HALT, &stepped, &prsr);
+               if (retval != ERROR_OK || stepped)
+                       break;
+
+               if (timeval_ms() > then + 100) {
+                       LOG_ERROR("timeout waiting for target %s halt after step",
+                                       target_name(target));
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
        }
 
+       /*
+        * At least on one SoC (Renesas R8A7795) stepping over a WFI instruction
+        * causes a timeout. The core takes the step but doesn't complete it and so
+        * debug state is never entered. However, you can manually halt the core
+        * as an external debug even is also a WFI wakeup event.
+        */
+       if (retval == ERROR_TARGET_TIMEOUT)
+               saved_retval = aarch64_halt_one(target, HALT_SYNC);
+
        /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, edecr);
@@ -826,9 +1174,14 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
                return retval;
 
        /* restore interrupts */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (aarch64->isrmasking_mode == AARCH64_ISRMASK_ON) {
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+               if (retval != ERROR_OK)
+                       return ERROR_OK;
+       }
+
+       if (saved_retval != ERROR_OK)
+               return saved_retval;
 
        return ERROR_OK;
 }
@@ -836,14 +1189,23 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
 
-       LOG_DEBUG(" ");
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       retval = armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       if (retval == ERROR_OK) {
+               /* registers are now invalid */
+               register_cache_invalidate(arm->core_cache);
+               register_cache_invalidate(arm->core_cache->next);
+       }
 
+       return retval;
 }
 
 /*
@@ -861,9 +1223,8 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
-       if (breakpoint->set) {
+       if (breakpoint->is_set) {
                LOG_WARNING("breakpoint already set");
                return ERROR_OK;
        }
@@ -876,7 +1237,7 @@ static int aarch64_set_breakpoint(struct target *target,
                        LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                        return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
                }
-               breakpoint->set = brp_i + 1;
+               breakpoint_hw_set(breakpoint, brp_i);
                if (breakpoint->length == 2)
                        byte_addr_select = (3 << (breakpoint->address & 0x02));
                control = ((matchmode & 0x7) << 20)
@@ -889,18 +1250,18 @@ static int aarch64_set_breakpoint(struct target *target,
                bpt_value = brp_list[brp_i].value;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].brpn,
                                (uint32_t)(bpt_value & 0xFFFFFFFF));
                if (retval != ERROR_OK)
                        return retval;
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].brpn,
                                (uint32_t)(bpt_value >> 32));
                if (retval != ERROR_OK)
                        return retval;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].brpn,
                                brp_list[brp_i].control);
                if (retval != ERROR_OK)
                        return retval;
@@ -909,9 +1270,32 @@ static int aarch64_set_breakpoint(struct target *target,
                        brp_list[brp_i].value);
 
        } else if (breakpoint->type == BKPT_SOFT) {
+               uint32_t opcode;
                uint8_t code[4];
 
-               buf_set_u32(code, 0, 32, ARMV8_HLT(0x11));
+               if (armv8_dpm_get_core_state(&armv8->dpm) == ARM_STATE_AARCH64) {
+                       opcode = ARMV8_HLT(11);
+
+                       if (breakpoint->length != 4)
+                               LOG_ERROR("bug: breakpoint length should be 4 in AArch64 mode");
+               } else {
+                       /**
+                        * core_state is ARM_STATE_ARM
+                        * in that case the opcode depends on breakpoint length:
+                        *  - if length == 4 => A32 opcode
+                        *  - if length == 2 => T32 opcode
+                        *  - if length == 3 => T32 opcode (refer to gdb doc : ARM-Breakpoint-Kinds)
+                        *    in that case the length should be changed from 3 to 4 bytes
+                        **/
+                       opcode = (breakpoint->length == 4) ? ARMV8_HLT_A1(11) :
+                                       (uint32_t) (ARMV8_HLT_T1(11) | ARMV8_HLT_T1(11) << 16);
+
+                       if (breakpoint->length == 3)
+                               breakpoint->length = 4;
+               }
+
+               buf_set_u32(code, 0, 32, opcode);
+
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
@@ -937,15 +1321,11 @@ static int aarch64_set_breakpoint(struct target *target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length);
 
-               breakpoint->set = 0x11; /* Any nice value but 0 */
+               breakpoint->is_set = true;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HDE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -965,7 +1345,7 @@ static int aarch64_set_context_breakpoint(struct target *target,
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
 
-       if (breakpoint->set) {
+       if (breakpoint->is_set) {
                LOG_WARNING("breakpoint already set");
                return retval;
        }
@@ -979,7 +1359,7 @@ static int aarch64_set_context_breakpoint(struct target *target,
                return ERROR_FAIL;
        }
 
-       breakpoint->set = brp_i + 1;
+       breakpoint_hw_set(breakpoint, brp_i);
        control = ((matchmode & 0x7) << 20)
                | (1 << 13)
                | (byte_addr_select << 5)
@@ -988,12 +1368,12 @@ static int aarch64_set_context_breakpoint(struct target *target,
        brp_list[brp_i].value = (breakpoint->asid);
        brp_list[brp_i].control = control;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].brpn,
                        brp_list[brp_i].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].brpn,
                        brp_list[brp_i].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1009,16 +1389,16 @@ static int aarch64_set_hybrid_breakpoint(struct target *target, struct breakpoin
        int retval = ERROR_FAIL;
        int brp_1 = 0;  /* holds the contextID pair */
        int brp_2 = 0;  /* holds the IVA pair */
-       uint32_t control_CTX, control_IVA;
-       uint8_t CTX_byte_addr_select = 0x0F;
-       uint8_t IVA_byte_addr_select = 0x0F;
-       uint8_t CTX_machmode = 0x03;
-       uint8_t IVA_machmode = 0x01;
+       uint32_t control_ctx, control_iva;
+       uint8_t ctx_byte_addr_select = 0x0F;
+       uint8_t iva_byte_addr_select = 0x0F;
+       uint8_t ctx_machmode = 0x03;
+       uint8_t iva_machmode = 0x01;
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
 
-       if (breakpoint->set) {
+       if (breakpoint->is_set) {
                LOG_WARNING("breakpoint already set");
                return retval;
        }
@@ -1027,7 +1407,7 @@ static int aarch64_set_hybrid_breakpoint(struct target *target, struct breakpoin
                (brp_list[brp_1].type != BRP_CONTEXT)) && (brp_1 < aarch64->brp_num))
                brp_1++;
 
-       printf("brp(CTX) found num: %d\n", brp_1);
+       LOG_DEBUG("brp(CTX) found num: %d", brp_1);
        if (brp_1 >= aarch64->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
@@ -1037,53 +1417,53 @@ static int aarch64_set_hybrid_breakpoint(struct target *target, struct breakpoin
                (brp_list[brp_2].type != BRP_NORMAL)) && (brp_2 < aarch64->brp_num))
                brp_2++;
 
-       printf("brp(IVA) found num: %d\n", brp_2);
+       LOG_DEBUG("brp(IVA) found num: %d", brp_2);
        if (brp_2 >= aarch64->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
 
-       breakpoint->set = brp_1 + 1;
-       breakpoint->linked_BRP = brp_2;
-       control_CTX = ((CTX_machmode & 0x7) << 20)
+       breakpoint_hw_set(breakpoint, brp_1);
+       breakpoint->linked_brp = brp_2;
+       control_ctx = ((ctx_machmode & 0x7) << 20)
                | (brp_2 << 16)
                | (0 << 14)
-               | (CTX_byte_addr_select << 5)
+               | (ctx_byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_1].used = 1;
        brp_list[brp_1].value = (breakpoint->asid);
-       brp_list[brp_1].control = control_CTX;
+       brp_list[brp_1].control = control_ctx;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_1].brpn,
                        brp_list[brp_1].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_1].brpn,
                        brp_list[brp_1].control);
        if (retval != ERROR_OK)
                return retval;
 
-       control_IVA = ((IVA_machmode & 0x7) << 20)
+       control_iva = ((iva_machmode & 0x7) << 20)
                | (brp_1 << 16)
                | (1 << 13)
-               | (IVA_byte_addr_select << 5)
+               | (iva_byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_2].used = 1;
        brp_list[brp_2].value = breakpoint->address & 0xFFFFFFFFFFFFFFFC;
-       brp_list[brp_2].control = control_IVA;
+       brp_list[brp_2].control = control_iva;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_2].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_2].brpn,
                        brp_list[brp_2].value & 0xFFFFFFFF);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_2].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_2].brpn,
                        brp_list[brp_2].value >> 32);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_2].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_2].brpn,
                        brp_list[brp_2].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1098,16 +1478,16 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
 
-       if (!breakpoint->set) {
+       if (!breakpoint->is_set) {
                LOG_WARNING("breakpoint not set");
                return ERROR_OK;
        }
 
        if (breakpoint->type == BKPT_HARD) {
                if ((breakpoint->address != 0) && (breakpoint->asid != 0)) {
-                       int brp_i = breakpoint->set - 1;
-                       int brp_j = breakpoint->linked_BRP;
-                       if ((brp_i < 0) || (brp_i >= aarch64->brp_num)) {
+                       int brp_i = breakpoint->number;
+                       int brp_j = breakpoint->linked_brp;
+                       if (brp_i >= aarch64->brp_num) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
@@ -1117,17 +1497,17 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].brpn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].brpn,
                                        (uint32_t)brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].brpn,
                                        (uint32_t)brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
@@ -1141,28 +1521,28 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_j].value = 0;
                        brp_list[brp_j].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_j].brpn,
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_j].brpn,
                                        (uint32_t)brp_list[brp_j].value);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_j].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_j].brpn,
                                        (uint32_t)brp_list[brp_j].value);
                        if (retval != ERROR_OK)
                                return retval;
 
-                       breakpoint->linked_BRP = 0;
-                       breakpoint->set = 0;
+                       breakpoint->linked_brp = 0;
+                       breakpoint->is_set = false;
                        return ERROR_OK;
 
                } else {
-                       int brp_i = breakpoint->set - 1;
-                       if ((brp_i < 0) || (brp_i >= aarch64->brp_num)) {
+                       int brp_i = breakpoint->number;
+                       if (brp_i >= aarch64->brp_num) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
@@ -1172,22 +1552,22 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].brpn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].brpn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
 
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].brpn,
                                        (uint32_t)brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
-                       breakpoint->set = 0;
+                       breakpoint->is_set = false;
                        return ERROR_OK;
                }
        } else {
@@ -1219,7 +1599,7 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length);
        }
-       breakpoint->set = 0;
+       breakpoint->is_set = false;
 
        return ERROR_OK;
 }
@@ -1272,7 +1652,6 @@ static int aarch64_add_hybrid_breakpoint(struct target *target,
        return aarch64_set_hybrid_breakpoint(target, breakpoint);       /* ??? */
 }
 
-
 static int aarch64_remove_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
        struct aarch64_common *aarch64 = target_to_aarch64(target);
@@ -1285,7 +1664,7 @@ static int aarch64_remove_breakpoint(struct target *target, struct breakpoint *b
        }
 #endif
 
-       if (breakpoint->set) {
+       if (breakpoint->is_set) {
                aarch64_unset_breakpoint(target, breakpoint);
                if (breakpoint->type == BKPT_HARD)
                        aarch64->brp_num_available++;
@@ -1294,33 +1673,305 @@ static int aarch64_remove_breakpoint(struct target *target, struct breakpoint *b
        return ERROR_OK;
 }
 
-/*
- * Cortex-A8 Reset functions
- */
-
-static int aarch64_assert_reset(struct target *target)
+/* Setup hardware Watchpoint Register Pair */
+static int aarch64_set_watchpoint(struct target *target,
+       struct watchpoint *watchpoint)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+       int wp_i = 0;
+       uint32_t control, offset, length;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       struct armv8_common *armv8 = &aarch64->armv8_common;
+       struct aarch64_brp *wp_list = aarch64->wp_list;
 
-       LOG_DEBUG(" ");
+       if (watchpoint->is_set) {
+               LOG_WARNING("watchpoint already set");
+               return ERROR_OK;
+       }
+
+       while (wp_list[wp_i].used && (wp_i < aarch64->wp_num))
+               wp_i++;
+       if (wp_i >= aarch64->wp_num) {
+               LOG_ERROR("ERROR Can not find free Watchpoint Register Pair");
+               return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+       }
+
+       control = (1 << 0)      /* enable */
+               | (3 << 1)      /* both user and privileged access */
+               | (1 << 13);    /* higher mode control */
+
+       switch (watchpoint->rw) {
+       case WPT_READ:
+               control |= 1 << 3;
+               break;
+       case WPT_WRITE:
+               control |= 2 << 3;
+               break;
+       case WPT_ACCESS:
+               control |= 3 << 3;
+               break;
+       }
+
+       /* Match up to 8 bytes. */
+       offset = watchpoint->address & 7;
+       length = watchpoint->length;
+       if (offset + length > sizeof(uint64_t)) {
+               length = sizeof(uint64_t) - offset;
+               LOG_WARNING("Adjust watchpoint match inside 8-byte boundary");
+       }
+       for (; length > 0; offset++, length--)
+               control |= (1 << offset) << 5;
+
+       wp_list[wp_i].value = watchpoint->address & 0xFFFFFFFFFFFFFFF8ULL;
+       wp_list[wp_i].control = control;
+
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WVR_BASE + 16 * wp_list[wp_i].brpn,
+                       (uint32_t)(wp_list[wp_i].value & 0xFFFFFFFF));
+       if (retval != ERROR_OK)
+               return retval;
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WVR_BASE + 4 + 16 * wp_list[wp_i].brpn,
+                       (uint32_t)(wp_list[wp_i].value >> 32));
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WCR_BASE + 16 * wp_list[wp_i].brpn,
+                       control);
+       if (retval != ERROR_OK)
+               return retval;
+       LOG_DEBUG("wp %i control 0x%0" PRIx32 " value 0x%" TARGET_PRIxADDR, wp_i,
+               wp_list[wp_i].control, wp_list[wp_i].value);
+
+       /* Ensure that halting debug mode is enable */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Failed to set DSCR.HDE");
+               return retval;
+       }
+
+       wp_list[wp_i].used = 1;
+       watchpoint_set(watchpoint, wp_i);
+
+       return ERROR_OK;
+}
+
+/* Clear hardware Watchpoint Register Pair */
+static int aarch64_unset_watchpoint(struct target *target,
+       struct watchpoint *watchpoint)
+{
+       int retval;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       struct armv8_common *armv8 = &aarch64->armv8_common;
+       struct aarch64_brp *wp_list = aarch64->wp_list;
+
+       if (!watchpoint->is_set) {
+               LOG_WARNING("watchpoint not set");
+               return ERROR_OK;
+       }
+
+       int wp_i = watchpoint->number;
+       if (wp_i >= aarch64->wp_num) {
+               LOG_DEBUG("Invalid WP number in watchpoint");
+               return ERROR_OK;
+       }
+       LOG_DEBUG("rwp %i control 0x%0" PRIx32 " value 0x%0" PRIx64, wp_i,
+               wp_list[wp_i].control, wp_list[wp_i].value);
+       wp_list[wp_i].used = 0;
+       wp_list[wp_i].value = 0;
+       wp_list[wp_i].control = 0;
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WCR_BASE + 16 * wp_list[wp_i].brpn,
+                       wp_list[wp_i].control);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WVR_BASE + 16 * wp_list[wp_i].brpn,
+                       wp_list[wp_i].value);
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_WVR_BASE + 4 + 16 * wp_list[wp_i].brpn,
+                       (uint32_t)wp_list[wp_i].value);
+       if (retval != ERROR_OK)
+               return retval;
+       watchpoint->is_set = false;
+
+       return ERROR_OK;
+}
+
+static int aarch64_add_watchpoint(struct target *target,
+       struct watchpoint *watchpoint)
+{
+       int retval;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+
+       if (aarch64->wp_num_available < 1) {
+               LOG_INFO("no hardware watchpoint available");
+               return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
+       }
+
+       retval = aarch64_set_watchpoint(target, watchpoint);
+       if (retval == ERROR_OK)
+               aarch64->wp_num_available--;
+
+       return retval;
+}
+
+static int aarch64_remove_watchpoint(struct target *target,
+       struct watchpoint *watchpoint)
+{
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+
+       if (watchpoint->is_set) {
+               aarch64_unset_watchpoint(target, watchpoint);
+               aarch64->wp_num_available++;
+       }
+
+       return ERROR_OK;
+}
+
+/**
+ * find out which watchpoint hits
+ * get exception address and compare the address to watchpoints
+ */
+static int aarch64_hit_watchpoint(struct target *target,
+       struct watchpoint **hit_watchpoint)
+{
+       if (target->debug_reason != DBG_REASON_WATCHPOINT)
+               return ERROR_FAIL;
+
+       struct armv8_common *armv8 = target_to_armv8(target);
 
-       /* FIXME when halt is requested, make it work somehow... */
+       target_addr_t exception_address;
+       struct watchpoint *wp;
+
+       exception_address = armv8->dpm.wp_addr;
+
+       if (exception_address == 0xFFFFFFFF)
+               return ERROR_FAIL;
+
+       for (wp = target->watchpoints; wp; wp = wp->next)
+               if (exception_address >= wp->address && exception_address < (wp->address + wp->length)) {
+                       *hit_watchpoint = wp;
+                       return ERROR_OK;
+               }
+
+       return ERROR_FAIL;
+}
+
+/*
+ * Cortex-A8 Reset functions
+ */
+
+static int aarch64_enable_reset_catch(struct target *target, bool enable)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t edecr;
+       int retval;
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
+       LOG_DEBUG("EDECR = 0x%08" PRIx32 ", enable=%d", edecr, enable);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (enable)
+               edecr |= ECR_RCE;
+       else
+               edecr &= ~ECR_RCE;
+
+       return mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_EDECR, edecr);
+}
+
+static int aarch64_clear_reset_catch(struct target *target)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t edesr;
+       int retval;
+       bool was_triggered;
+
+       /* check if Reset Catch debug event triggered as expected */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+               armv8->debug_base + CPUV8_DBG_EDESR, &edesr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       was_triggered = !!(edesr & ESR_RC);
+       LOG_DEBUG("Reset Catch debug event %s",
+                       was_triggered ? "triggered" : "NOT triggered!");
+
+       if (was_triggered) {
+               /* clear pending Reset Catch debug event */
+               edesr &= ~ESR_RC;
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_EDESR, edesr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_assert_reset(struct target *target)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       enum reset_types reset_config = jtag_get_reset_config();
+       int retval;
+
+       LOG_DEBUG(" ");
 
        /* Issue some kind of warm reset. */
        if (target_has_event_action(target, TARGET_EVENT_RESET_ASSERT))
                target_handle_event(target, TARGET_EVENT_RESET_ASSERT);
-       else if (jtag_get_reset_config() & RESET_HAS_SRST) {
+       else if (reset_config & RESET_HAS_SRST) {
+               bool srst_asserted = false;
+
+               if (target->reset_halt && !(reset_config & RESET_SRST_PULLS_TRST)) {
+                       if (target_was_examined(target)) {
+
+                               if (reset_config & RESET_SRST_NO_GATING) {
+                                       /*
+                                        * SRST needs to be asserted *before* Reset Catch
+                                        * debug event can be set up.
+                                        */
+                                       adapter_assert_reset();
+                                       srst_asserted = true;
+                               }
+
+                               /* make sure to clear all sticky errors */
+                               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+
+                               /* set up Reset Catch debug event to halt the CPU after reset */
+                               retval = aarch64_enable_reset_catch(target, true);
+                               if (retval != ERROR_OK)
+                                       LOG_WARNING("%s: Error enabling Reset Catch debug event; the CPU will not halt immediately after reset!",
+                                                       target_name(target));
+                       } else {
+                               LOG_WARNING("%s: Target not examined, will not halt immediately after reset!",
+                                               target_name(target));
+                       }
+               }
+
                /* REVISIT handle "pulls" cases, if there's
                 * hardware that needs them to work.
                 */
-               jtag_add_reset(0, 1);
+               if (!srst_asserted)
+                       adapter_assert_reset();
        } else {
                LOG_ERROR("%s: how to reset?", target_name(target));
                return ERROR_FAIL;
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv8->arm.core_cache);
+       if (target_was_examined(target)) {
+               register_cache_invalidate(armv8->arm.core_cache);
+               register_cache_invalidate(armv8->arm.core_cache->next);
+       }
 
        target->state = TARGET_RESET;
 
@@ -1334,180 +1985,356 @@ static int aarch64_deassert_reset(struct target *target)
        LOG_DEBUG(" ");
 
        /* be certain SRST is off */
-       jtag_add_reset(0, 0);
+       adapter_deassert_reset();
+
+       if (!target_was_examined(target))
+               return ERROR_OK;
+
+       retval = aarch64_init_debug_access(target);
+       if (retval != ERROR_OK)
+               return retval;
 
        retval = aarch64_poll(target);
        if (retval != ERROR_OK)
                return retval;
 
        if (target->reset_halt) {
+               /* clear pending Reset Catch debug event */
+               retval = aarch64_clear_reset_catch(target);
+               if (retval != ERROR_OK)
+                       LOG_WARNING("%s: Clearing Reset Catch debug event failed",
+                                       target_name(target));
+
+               /* disable Reset Catch debug event */
+               retval = aarch64_enable_reset_catch(target, false);
+               if (retval != ERROR_OK)
+                       LOG_WARNING("%s: Disabling Reset Catch debug event failed",
+                                       target_name(target));
+
                if (target->state != TARGET_HALTED) {
                        LOG_WARNING("%s: ran after reset and before halt ...",
                                target_name(target));
-                       retval = target_halt(target);
-                       if (retval != ERROR_OK)
-                               return retval;
+                       if (target_was_examined(target)) {
+                               retval = aarch64_halt_one(target, HALT_LAZY);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       } else {
+                               target->state = TARGET_UNKNOWN;
+                       }
                }
        }
 
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ap_memory(struct target *target,
-       uint64_t address, uint32_t size,
-       uint32_t count, const uint8_t *buffer)
+static int aarch64_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
 {
-       /* write memory through APB-AP */
-       int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
-       uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
+       int retval;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
-                         address, size, count);
-       if (target->state != TARGET_HALTED) {
-               LOG_WARNING("target not halted");
-               return ERROR_TARGET_NOT_HALTED;
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode if necessary */
+       if (*dscr & DSCR_MA) {
+               *dscr &= ~DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
+       while (count) {
+               uint32_t data, opcode;
 
-       /* Mark register R0 as dirty, as it will be used
-        * for transferring the data.
-        * It will be restored automatically when exiting
-        * debug mode
-        */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
+               /* write the data to store into DTRRX */
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MRC(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
 
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* Step 1.d   - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
        if (retval != ERROR_OK)
                return retval;
 
 
-       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       buffer, 4, count, armv8->debug_base + CPUV8_DBG_DTRRX);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = malloc(total_u32 * 4);
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       *dscr &= ~DSCR_MA;
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3), 4, 1, tmp_buff);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
+       return ERROR_OK;
+}
 
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
+static int aarch64_write_cpu_memory(struct target *target,
+       uint64_t address, uint32_t size,
+       uint32_t count, const uint8_t *buffer)
+{
+       /* write memory through APB-AP */
+       int retval = ERROR_COMMAND_SYNTAX_ERROR;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       uint32_t dscr;
 
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
+       if (target->state != TARGET_HALTED) {
+               LOG_WARNING("target not halted");
+               return ERROR_TARGET_NOT_HALTED;
        }
 
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
+       /* Mark register X0 as dirty, as it will be used
+        * for transferring the data.
+        * It will be restored automatically when exiting
+        * debug mode
+        */
+       armv8_reg_current(arm, 0)->dirty = true;
 
-       /* We now have a 32 bit aligned buffer that can be written */
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
        /* Set Normal access mode  */
        dscr = (dscr & ~DSCR_MA);
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
                retval = dpm->instr_write_data_dcc_64(dpm,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRX */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               dpm->instr_write_data_dcc(dpm,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), address & ~0x3ULL);
-
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       /* Step 1.d   - Change DCC to memory mode */
-       dscr = dscr | DSCR_MA;
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
-
 
-       /* Step 2.a   - Do the write */
-       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
-                                       tmp_buff, 4, total_u32, armv8->debug_base + CPUV8_DBG_DTRRX);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Step 3.a   - Switch DTR mode back to Normal mode */
-       dscr = (dscr & ~DSCR_MA);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_write_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_write_cpu_memory_slow(target, size, count, buffer, &dscr);
+
+       if (retval != ERROR_OK) {
+               /* Unset DTR mode */
+               mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
+
+       dpm->dscr = dscr;
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
-               goto error_free_buff_w;
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
+       }
+
+       /* Done */
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode (if necessary) */
+       if (*dscr & DSCR_MA) {
+               *dscr &= DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t opcode, data;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       *buffer = (uint8_t)data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t)data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+       uint32_t value;
+
+       /* Mark X1 as dirty */
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
+       } else {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 1.e - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 1.f - read DBGDTRTX and discard the value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       if (retval != ERROR_OK)
+               return retval;
+
+       count--;
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
+
+       if (count) {
+               /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+                * increments X0 by 4. */
+               retval = mem_ap_read_buf_noincr(armv8->debug_ap, buffer, 4, count,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
        }
 
-       /* Done */
-       free(tmp_buff);
-       return ERROR_OK;
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       *dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.b - read DBGDTRTX for the final value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       if (retval != ERROR_OK)
+               return retval;
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+       target_buffer_set_u32(target, buffer + count * 4, value);
+       return retval;
 }
 
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
@@ -1516,162 +2343,86 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
-       uint8_t *u8buf_ptr;
-       uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
-                         address, size, count);
+       LOG_DEBUG("Reading CPU memory address 0x%016" PRIx64 " size %" PRIu32 " count %" PRIu32,
+                       address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-       /* Mark register X0, X1 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*      clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* Set Normal access mode  */
-       dscr = (dscr & ~DSCR_MA);
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+       dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += dpm->instr_write_data_dcc_64(dpm,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += dpm->instr_write_data_dcc(dpm,
-                               T32_FMTITR(ARMV4_5_MRC(14, 0, 0, 0, 5, 0)), address & ~0x3ULL);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += dpm->instr_execute(dpm, T32_FMTITR(ARMV4_5_MCR(14, 0, 0, 0, 5, 0)));
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
-
-       /* Optimize the read as much as we can, either way we read in a single pass  */
-       if ((start_byte) || (end_byte)) {
-               /* The algorithm only copies 32 bit words, so the buffer
-                * should be expanded to include the words at either end.
-                * The first and last words will be read into a temp buffer
-                * to avoid corruption
-                */
-               tmp_buff = malloc(total_u32 * 4);
-               if (!tmp_buff)
-                       goto error_unset_dtr_r;
 
-               /* use the tmp buffer to read the entire data */
-               u8buf_ptr = tmp_buff;
-       } else
-               /* address and read length are aligned so read directly into the passed buffer */
-               u8buf_ptr = buffer;
-
-       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
-        * Abort flags are sticky, so can be read at end of transactions
-        *
-        * This data is read in aligned to 32 bit boundary.
-        */
-
-       /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
-        * increments X0 by 4. */
-       retval = mem_ap_read_buf_noincr(armv8->debug_ap, u8buf_ptr, 4, total_u32-1,
-                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
        if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
+               return retval;
 
-       /* Step 3.a - set DTR access mode back to Normal mode   */
-       dscr = (dscr & ~DSCR_MA);
-       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_read_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_read_cpu_memory_slow(target, size, count, buffer, &dscr);
+
+       if (dscr & DSCR_MA) {
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       }
 
-       /* Step 3.b - read DBGDTRTX for the final value */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-       memcpy(u8buf_ptr + (total_u32-1) * 4, &value, 4);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
+
+       dpm->dscr = dscr;
+
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               goto error_free_buff_r;
-       }
-
-       /* check if we need to copy aligned data by applying any shift necessary */
-       if (tmp_buff) {
-               memcpy(buffer, tmp_buff + start_byte, total_bytes);
-               free(tmp_buff);
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
        }
 
        /* Done */
        return ERROR_OK;
-
-error_unset_dtr_r:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
 }
 
 static int aarch64_read_phys_memory(struct target *target,
@@ -1679,15 +2430,13 @@ static int aarch64_read_phys_memory(struct target *target,
        uint32_t count, uint8_t *buffer)
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
-               address, size, count);
 
        if (count && buffer) {
                /* read memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+               retval = aarch64_read_cpu_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -1698,25 +2447,18 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_read_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
@@ -1725,15 +2467,12 @@ static int aarch64_write_phys_memory(struct target *target,
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
 
-       LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        if (count && buffer) {
                /* write memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+               return aarch64_write_cpu_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -1745,25 +2484,18 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
-                 "; count %" PRId32, address, size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_write_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -1803,25 +2535,35 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
+       struct aarch64_private_config *pc = target->private_config;
        int i;
        int retval = ERROR_OK;
        uint64_t debug, ttypr;
        uint32_t cpuid;
-       uint32_t tmp0, tmp1;
+       uint32_t tmp0, tmp1, tmp2, tmp3;
        debug = ttypr = cpuid = 0;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
-       retval = dap_dp_init(swjdp);
-       if (retval != ERROR_OK)
-               return retval;
+       if (!pc)
+               return ERROR_FAIL;
 
-       /* Search for the APB-AB - it is needed for access to debug registers */
-       retval = dap_find_ap(swjdp, AP_TYPE_APB_AP, &armv8->debug_ap);
-       if (retval != ERROR_OK) {
-               LOG_ERROR("Could not find APB-AP for debug access");
-               return retval;
+       if (armv8->debug_ap) {
+               dap_put_ap(armv8->debug_ap);
+               armv8->debug_ap = NULL;
+       }
+
+       if (pc->adiv5_config.ap_num == DP_APSEL_INVALID) {
+               /* Search for the APB-AB */
+               retval = dap_find_get_ap(swjdp, AP_TYPE_APB_AP, &armv8->debug_ap);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Could not find APB-AP for debug access");
+                       return retval;
+               }
+       } else {
+               armv8->debug_ap = dap_get_ap(swjdp, pc->adiv5_config.ap_num);
+               if (!armv8->debug_ap) {
+                       LOG_ERROR("Cannot get AP");
+                       return ERROR_FAIL;
+               }
        }
 
        retval = mem_ap_init(armv8->debug_ap);
@@ -1830,33 +2572,19 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       armv8->debug_ap->memaccess_tck = 80;
+       armv8->debug_ap->memaccess_tck = 10;
 
        if (!target->dbgbase_set) {
-               uint32_t dbgbase;
-               /* Get ROM Table base */
-               uint32_t apid;
-               int32_t coreidx = target->coreid;
-               retval = dap_get_debugbase(armv8->debug_ap, &dbgbase, &apid);
-               if (retval != ERROR_OK)
-                       return retval;
-               /* Lookup 0x15 -- Processor DAP */
-               retval = dap_lookup_cs_component(armv8->debug_ap, dbgbase, 0x15,
-                               &armv8->debug_base, &coreidx);
+               /* Lookup Processor DAP */
+               retval = dap_lookup_cs_component(armv8->debug_ap, ARM_CS_C9_DEVTYPE_CORE_DEBUG,
+                               &armv8->debug_base, target->coreid);
                if (retval != ERROR_OK)
                        return retval;
-               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32
-                               " apid: %08" PRIx32, coreidx, armv8->debug_base, apid);
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: " TARGET_ADDR_FMT,
+                               target->coreid, armv8->debug_base);
        } else
                armv8->debug_base = target->dbgbase;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("LOCK debug access fail");
-               return retval;
-       }
-
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
@@ -1864,47 +2592,52 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval = mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MAINID0, &cpuid);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval = mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MEMFEATURE0, &tmp0);
-       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval += mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MEMFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "Memory Model Type");
                return retval;
        }
-       ttypr |= tmp1;
-       ttypr = (ttypr << 32) | tmp0;
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp0);
-       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp1);
+       retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp2);
+       retval += mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp3);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
                return retval;
        }
-       debug |= tmp1;
-       debug = (debug << 32) | tmp0;
+
+       retval = dap_run(armv8->debug_ap->dap);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("%s: examination failed\n", target_name(target));
+               return retval;
+       }
+
+       ttypr |= tmp1;
+       ttypr = (ttypr << 32) | tmp0;
+       debug |= tmp3;
+       debug = (debug << 32) | tmp2;
 
        LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
        LOG_DEBUG("ttypr = 0x%08" PRIx64, ttypr);
        LOG_DEBUG("debug = 0x%08" PRIx64, debug);
 
-       if (target->ctibase == 0) {
-               /* assume a v8 rom table layout */
-               armv8->cti_base = target->ctibase = armv8->debug_base + 0x10000;
-               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, target->ctibase);
-       } else
-               armv8->cti_base = target->ctibase;
+       if (!pc->cti) {
+               LOG_TARGET_ERROR(target, "CTI not specified");
+               return ERROR_FAIL;
+       }
+
+       armv8->cti = pc->cti;
 
-       armv8->arm.core_type = ARM_MODE_MON;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
@@ -1922,11 +2655,27 @@ static int aarch64_examine_first(struct target *target)
                        aarch64->brp_list[i].type = BRP_CONTEXT;
                aarch64->brp_list[i].value = 0;
                aarch64->brp_list[i].control = 0;
-               aarch64->brp_list[i].BRPn = i;
+               aarch64->brp_list[i].brpn = i;
+       }
+
+       /* Setup Watchpoint Register Pairs */
+       aarch64->wp_num = (uint32_t)((debug >> 20) & 0x0F) + 1;
+       aarch64->wp_num_available = aarch64->wp_num;
+       aarch64->wp_list = calloc(aarch64->wp_num, sizeof(struct aarch64_brp));
+       for (i = 0; i < aarch64->wp_num; i++) {
+               aarch64->wp_list[i].used = 0;
+               aarch64->wp_list[i].type = BRP_NORMAL;
+               aarch64->wp_list[i].value = 0;
+               aarch64->wp_list[i].control = 0;
+               aarch64->wp_list[i].brpn = i;
        }
 
-       LOG_DEBUG("Configured %i hw breakpoints", aarch64->brp_num);
+       LOG_DEBUG("Configured %i hw breakpoints, %i watchpoints",
+               aarch64->brp_num, aarch64->wp_num);
 
+       target->state = TARGET_UNKNOWN;
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       aarch64->isrmasking_mode = AARCH64_ISRMASK_ON;
        target_set_examined(target);
        return ERROR_OK;
 }
@@ -1954,58 +2703,70 @@ static int aarch64_init_target(struct command_context *cmd_ctx,
        struct target *target)
 {
        /* examine_first() does a bunch of this */
+       arm_semihosting_init(target);
        return ERROR_OK;
 }
 
 static int aarch64_init_arch_info(struct target *target,
-       struct aarch64_common *aarch64, struct jtag_tap *tap)
+       struct aarch64_common *aarch64, struct adiv5_dap *dap)
 {
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct adiv5_dap *dap = armv8->arm.dap;
-
-       armv8->arm.dap = dap;
 
        /* Setup struct aarch64_common */
        aarch64->common_magic = AARCH64_COMMON_MAGIC;
-       /*  tap has no dap initialized */
-       if (!tap->dap) {
-               tap->dap = dap_init();
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
-               tap->dap->tap = tap;
-       }
-
-       armv8->arm.dap = tap->dap;
-
-       aarch64->fast_reg_read = 0;
+       armv8->arm.dap = dap;
 
        /* register arch-specific functions */
        armv8->examine_debug_reason = NULL;
-
        armv8->post_debug_entry = aarch64_post_debug_entry;
-
        armv8->pre_restore_context = NULL;
-
        armv8->armv8_mmu.read_physical_memory = aarch64_read_phys_memory;
 
-       /* REVISIT v7a setup should be in a v7a-specific routine */
        armv8_init_arch_info(target, armv8);
-       target_register_timer_callback(aarch64_handle_target_request, 1, 1, target);
+       target_register_timer_callback(aarch64_handle_target_request, 1,
+               TARGET_TIMER_TYPE_PERIODIC, target);
 
        return ERROR_OK;
 }
 
 static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct aarch64_common *aarch64 = calloc(1, sizeof(struct aarch64_common));
+       struct aarch64_private_config *pc = target->private_config;
+       struct aarch64_common *aarch64;
+
+       if (adiv5_verify_config(&pc->adiv5_config) != ERROR_OK)
+               return ERROR_FAIL;
+
+       aarch64 = calloc(1, sizeof(struct aarch64_common));
+       if (!aarch64) {
+               LOG_ERROR("Out of memory");
+               return ERROR_FAIL;
+       }
+
+       return aarch64_init_arch_info(target, aarch64, pc->adiv5_config.dap);
+}
+
+static void aarch64_deinit_target(struct target *target)
+{
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       struct armv8_common *armv8 = &aarch64->armv8_common;
+       struct arm_dpm *dpm = &armv8->dpm;
 
-       return aarch64_init_arch_info(target, aarch64, target->tap);
+       if (armv8->debug_ap)
+               dap_put_ap(armv8->debug_ap);
+
+       armv8_free_reg_cache(target);
+       free(aarch64->brp_list);
+       free(dpm->dbp);
+       free(dpm->dwp);
+       free(target->private_config);
+       free(aarch64);
 }
 
 static int aarch64_mmu(struct target *target, int *enabled)
 {
        if (target->state != TARGET_HALTED) {
-               LOG_ERROR("%s: target not halted", __func__);
+               LOG_ERROR("%s: target %s not halted", __func__, target_name(target));
                return ERROR_TARGET_INVALID;
        }
 
@@ -2016,7 +2777,101 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       return armv8_mmu_translate_va(target, virt, phys);
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
+}
+
+/*
+ * private target configuration items
+ */
+enum aarch64_cfg_param {
+       CFG_CTI,
+};
+
+static const struct jim_nvp nvp_config_opts[] = {
+       { .name = "-cti", .value = CFG_CTI },
+       { .name = NULL, .value = -1 }
+};
+
+static int aarch64_jim_configure(struct target *target, struct jim_getopt_info *goi)
+{
+       struct aarch64_private_config *pc;
+       struct jim_nvp *n;
+       int e;
+
+       pc = (struct aarch64_private_config *)target->private_config;
+       if (!pc) {
+                       pc = calloc(1, sizeof(struct aarch64_private_config));
+                       pc->adiv5_config.ap_num = DP_APSEL_INVALID;
+                       target->private_config = pc;
+       }
+
+       /*
+        * Call adiv5_jim_configure() to parse the common DAP options
+        * It will return JIM_CONTINUE if it didn't find any known
+        * options, JIM_OK if it correctly parsed the topmost option
+        * and JIM_ERR if an error occurred during parameter evaluation.
+        * For JIM_CONTINUE, we check our own params.
+        *
+        * adiv5_jim_configure() assumes 'private_config' to point to
+        * 'struct adiv5_private_config'. Override 'private_config'!
+        */
+       target->private_config = &pc->adiv5_config;
+       e = adiv5_jim_configure(target, goi);
+       target->private_config = pc;
+       if (e != JIM_CONTINUE)
+               return e;
+
+       /* parse config or cget options ... */
+       if (goi->argc > 0) {
+               Jim_SetEmptyResult(goi->interp);
+
+               /* check first if topmost item is for us */
+               e = jim_nvp_name2value_obj(goi->interp, nvp_config_opts,
+                               goi->argv[0], &n);
+               if (e != JIM_OK)
+                       return JIM_CONTINUE;
+
+               e = jim_getopt_obj(goi, NULL);
+               if (e != JIM_OK)
+                       return e;
+
+               switch (n->value) {
+               case CFG_CTI: {
+                       if (goi->isconfigure) {
+                               Jim_Obj *o_cti;
+                               struct arm_cti *cti;
+                               e = jim_getopt_obj(goi, &o_cti);
+                               if (e != JIM_OK)
+                                       return e;
+                               cti = cti_instance_by_jim_obj(goi->interp, o_cti);
+                               if (!cti) {
+                                       Jim_SetResultString(goi->interp, "CTI name invalid!", -1);
+                                       return JIM_ERR;
+                               }
+                               pc->cti = cti;
+                       } else {
+                               if (goi->argc != 0) {
+                                       Jim_WrongNumArgs(goi->interp,
+                                                       goi->argc, goi->argv,
+                                                       "NO PARAMS");
+                                       return JIM_ERR;
+                               }
+
+                               if (!pc || !pc->cti) {
+                                       Jim_SetResultString(goi->interp, "CTI not configured", -1);
+                                       return JIM_ERR;
+                               }
+                               Jim_SetResultString(goi->interp, arm_cti_name(pc->cti), -1);
+                       }
+                       break;
+               }
+
+               default:
+                       return JIM_CONTINUE;
+               }
+       }
+
+       return JIM_OK;
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)
@@ -2024,11 +2879,10 @@ COMMAND_HANDLER(aarch64_handle_cache_info_command)
        struct target *target = get_current_target(CMD_CTX);
        struct armv8_common *armv8 = target_to_armv8(target);
 
-       return armv8_handle_cache_info_command(CMD_CTX,
+       return armv8_handle_cache_info_command(CMD,
                        &armv8->armv8_mmu.armv8_cache);
 }
 
-
 COMMAND_HANDLER(aarch64_handle_dbginit_command)
 {
        struct target *target = get_current_target(CMD_CTX);
@@ -2039,62 +2893,204 @@ COMMAND_HANDLER(aarch64_handle_dbginit_command)
 
        return aarch64_init_debug_access(target);
 }
-COMMAND_HANDLER(aarch64_handle_smp_off_command)
+
+COMMAND_HANDLER(aarch64_handle_disassemble_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       /* check target is an smp target */
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       target->smp = 0;
-       if (head != (struct target_list *)NULL) {
-               while (head != (struct target_list *)NULL) {
-                       curr = head->target;
-                       curr->smp = 0;
-                       head = head->next;
-               }
-               /*  fixes the target display to the debugger */
-               target->gdb_service->target = target;
+
+       if (!target) {
+               LOG_ERROR("No target selected");
+               return ERROR_FAIL;
        }
-       return ERROR_OK;
+
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+
+       if (aarch64->common_magic != AARCH64_COMMON_MAGIC) {
+               command_print(CMD, "current target isn't an AArch64");
+               return ERROR_FAIL;
+       }
+
+       int count = 1;
+       target_addr_t address;
+
+       switch (CMD_ARGC) {
+               case 2:
+                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[1], count);
+               /* FALL THROUGH */
+               case 1:
+                       COMMAND_PARSE_ADDRESS(CMD_ARGV[0], address);
+                       break;
+               default:
+                       return ERROR_COMMAND_SYNTAX_ERROR;
+       }
+
+       return a64_disassemble(CMD, target, address, count);
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_on_command)
+COMMAND_HANDLER(aarch64_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               target->smp = 1;
-               while (head != (struct target_list *)NULL) {
-                       curr = head->target;
-                       curr->smp = 1;
-                       head = head->next;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+
+       static const struct jim_nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = AARCH64_ISRMASK_OFF },
+               { .name = "on", .value = AARCH64_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const struct jim_nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = jim_nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (!n->name) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
+
+               aarch64->isrmasking_mode = n->value;
        }
+
+       n = jim_nvp_value2name_simple(nvp_maskisr_modes, aarch64->isrmasking_mode);
+       command_print(CMD, "aarch64 interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_gdb_command)
+static int jim_mcrmrc(Jim_Interp *interp, int argc, Jim_Obj * const *argv)
 {
-       struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct command *c = jim_to_command(interp);
+       struct command_context *context;
+       struct target *target;
+       struct arm *arm;
+       int retval;
+       bool is_mcr = false;
+       int arg_cnt = 0;
 
-               }
-               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
+       if (!strcmp(c->name, "mcr")) {
+               is_mcr = true;
+               arg_cnt = 7;
+       } else {
+               arg_cnt = 6;
        }
-       return ERROR_OK;
+
+       context = current_command_context(interp);
+       assert(context);
+
+       target = get_current_target(context);
+       if (!target) {
+               LOG_ERROR("%s: no current target", __func__);
+               return JIM_ERR;
+       }
+       if (!target_was_examined(target)) {
+               LOG_ERROR("%s: not yet examined", target_name(target));
+               return JIM_ERR;
+       }
+
+       arm = target_to_arm(target);
+       if (!is_arm(arm)) {
+               LOG_ERROR("%s: not an ARM", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               LOG_ERROR("%s: not 32-bit arm target", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (argc != arg_cnt) {
+               LOG_ERROR("%s: wrong number of arguments", __func__);
+               return JIM_ERR;
+       }
+
+       int cpnum;
+       uint32_t op1;
+       uint32_t op2;
+       uint32_t crn;
+       uint32_t crm;
+       uint32_t value;
+       long l;
+
+       /* NOTE:  parameter sequence matches ARM instruction set usage:
+        *      MCR     pNUM, op1, rX, CRn, CRm, op2    ; write CP from rX
+        *      MRC     pNUM, op1, rX, CRn, CRm, op2    ; read CP into rX
+        * The "rX" is necessarily omitted; it uses Tcl mechanisms.
+        */
+       retval = Jim_GetLong(interp, argv[1], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "coprocessor", (int) l);
+               return JIM_ERR;
+       }
+       cpnum = l;
+
+       retval = Jim_GetLong(interp, argv[2], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op1", (int) l);
+               return JIM_ERR;
+       }
+       op1 = l;
+
+       retval = Jim_GetLong(interp, argv[3], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRn", (int) l);
+               return JIM_ERR;
+       }
+       crn = l;
+
+       retval = Jim_GetLong(interp, argv[4], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRm", (int) l);
+               return JIM_ERR;
+       }
+       crm = l;
+
+       retval = Jim_GetLong(interp, argv[5], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op2", (int) l);
+               return JIM_ERR;
+       }
+       op2 = l;
+
+       value = 0;
+
+       if (is_mcr == true) {
+               retval = Jim_GetLong(interp, argv[6], &l);
+               if (retval != JIM_OK)
+                       return retval;
+               value = l;
+
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MCR(cpnum, op1, 0, crn, crm, op2) */
+               retval = arm->mcr(target, cpnum, op1, op2, crn, crm, value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+       } else {
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MRC(cpnum, op1, 0, crn, crm, op2) */
+               retval = arm->mrc(target, cpnum, op1, op2, crn, crm, &value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+
+               Jim_SetResult(interp, Jim_NewIntObj(interp, value));
+       }
+
+       return JIM_OK;
 }
 
 static const struct command_registration aarch64_exec_command_handlers[] = {
@@ -2112,41 +3108,57 @@ static const struct command_registration aarch64_exec_command_handlers[] = {
                .help = "Initialize core debug",
                .usage = "",
        },
-       {       .name = "smp_off",
-               .handler = aarch64_handle_smp_off_command,
+       {
+               .name = "disassemble",
+               .handler = aarch64_handle_disassemble_command,
                .mode = COMMAND_EXEC,
-               .help = "Stop smp handling",
-               .usage = "",
+               .help = "Disassemble instructions",
+               .usage = "address [count]",
        },
        {
-               .name = "smp_on",
-               .handler = aarch64_handle_smp_on_command,
+               .name = "maskisr",
+               .handler = aarch64_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask aarch64 interrupts during single-step",
+               .usage = "['on'|'off']",
+       },
+       {
+               .name = "mcr",
                .mode = COMMAND_EXEC,
-               .help = "Restart smp handling",
-               .usage = "",
+               .jim_handler = jim_mcrmrc,
+               .help = "write coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2 value",
        },
        {
-               .name = "smp_gdb",
-               .handler = aarch64_handle_smp_gdb_command,
+               .name = "mrc",
                .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .jim_handler = jim_mcrmrc,
+               .help = "read coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2",
+       },
+       {
+               .chain = smp_command_handlers,
        },
 
 
        COMMAND_REGISTRATION_DONE
 };
+
 static const struct command_registration aarch64_command_handlers[] = {
        {
-               .chain = arm_command_handlers,
+               .name = "arm",
+               .mode = COMMAND_ANY,
+               .help = "ARM Command Group",
+               .usage = "",
+               .chain = semihosting_common_handlers
        },
        {
                .chain = armv8_command_handlers,
        },
        {
-               .name = "cortex_a",
+               .name = "aarch64",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A command group",
+               .help = "Aarch64 command group",
                .usage = "",
                .chain = aarch64_exec_command_handlers,
        },
@@ -2167,26 +3179,25 @@ struct target_type aarch64_target = {
        .deassert_reset = aarch64_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_arch = armv8_get_gdb_arch,
        .get_gdb_reg_list = armv8_get_gdb_reg_list,
 
        .read_memory = aarch64_read_memory,
        .write_memory = aarch64_write_memory,
 
-       .checksum_memory = arm_checksum_memory,
-       .blank_check_memory = arm_blank_check_memory,
-
-       .run_algorithm = armv4_5_run_algorithm,
-
        .add_breakpoint = aarch64_add_breakpoint,
        .add_context_breakpoint = aarch64_add_context_breakpoint,
        .add_hybrid_breakpoint = aarch64_add_hybrid_breakpoint,
        .remove_breakpoint = aarch64_remove_breakpoint,
-       .add_watchpoint = NULL,
-       .remove_watchpoint = NULL,
+       .add_watchpoint = aarch64_add_watchpoint,
+       .remove_watchpoint = aarch64_remove_watchpoint,
+       .hit_watchpoint = aarch64_hit_watchpoint,
 
        .commands = aarch64_command_handlers,
        .target_create = aarch64_target_create,
+       .target_jim_configure = aarch64_jim_configure,
        .init_target = aarch64_init_target,
+       .deinit_target = aarch64_deinit_target,
        .examine = aarch64_examine,
 
        .read_phys_memory = aarch64_read_phys_memory,