openocd: remove CamelCase symbols *xPSR*
[fw/openocd] / src / target / armv8_dpm.c
index c3d5ec4def8b716966cf2f7e4d2b0ceac5927085..3ea8fa9640ae84327cd6802a01093d1e8432806c 100644 (file)
@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
 /*
  * Copyright (C) 2009 by David Brownell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifdef HAVE_CONFIG_H
 #include "target_type.h"
 #include "armv8_opcodes.h"
 
+#include "helper/time_support.h"
+
+/* T32 ITR format */
+#define T32_FMTITR(instr) (((instr & 0x0000FFFF) << 16) | ((instr & 0xFFFF0000) >> 16))
 
 /**
  * @file
  * to minimize needless differences in debug behavior between cores.
  */
 
+/**
+ * Get core state from EDSCR, without necessity to retrieve CPSR
+ */
+enum arm_state armv8_dpm_get_core_state(struct arm_dpm *dpm)
+{
+       int el = (dpm->dscr >> 8) & 0x3;
+       int rw = (dpm->dscr >> 10) & 0xF;
+
+       dpm->last_el = el;
+
+       /* In Debug state, each bit gives the current Execution state of each EL */
+       if ((rw >> el) & 0b1)
+               return ARM_STATE_AARCH64;
+
+       return ARM_STATE_ARM;
+}
+
 /*----------------------------------------------------------------------*/
 
-/*
- * Coprocessor support
- */
+static int dpmv8_write_dcc(struct armv8_common *armv8, uint32_t data)
+{
+       return mem_ap_write_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+}
 
-/* Read coprocessor */
-static int dpmv8_mrc(struct target *target, int cpnum,
-       uint32_t op1, uint32_t op2, uint32_t CRn, uint32_t CRm,
-       uint32_t *value)
+static int dpmv8_write_dcc_64(struct armv8_common *armv8, uint64_t data)
 {
-       struct arm *arm = target_to_arm(target);
-       struct arm_dpm *dpm = arm->dpm;
+       int ret;
+       ret = mem_ap_write_u32(armv8->debug_ap,
+                              armv8->debug_base + CPUV8_DBG_DTRRX, data);
+       if (ret == ERROR_OK)
+               ret = mem_ap_write_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, data >> 32);
+       return ret;
+}
+
+static int dpmv8_read_dcc(struct armv8_common *armv8, uint32_t *data,
+       uint32_t *dscr_p)
+{
+       uint32_t dscr = DSCR_ITE;
        int retval;
 
-       retval = dpm->prepare(dpm);
+       if (dscr_p)
+               dscr = *dscr_p;
+
+       /* Wait for DTRRXfull */
+       long long then = timeval_ms();
+       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR,
+                               &dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for read dcc");
+                       return ERROR_FAIL;
+               }
+       }
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                           armv8->debug_base + CPUV8_DBG_DTRTX,
+                                           data);
        if (retval != ERROR_OK)
                return retval;
 
-       LOG_DEBUG("MRC p%d, %d, r0, c%d, c%d, %d", cpnum,
-               (int) op1, (int) CRn,
-               (int) CRm, (int) op2);
+       if (dscr_p)
+               *dscr_p = dscr;
 
-       /* read coprocessor register into R0; return via DCC */
-       retval = dpm->instr_read_data_r0(dpm,
-                       T32_FMTITR(ARMV4_5_MRC(cpnum, op1, 0, CRn, CRm, op2)),
-                       value);
+       return retval;
+}
+
+static int dpmv8_read_dcc_64(struct armv8_common *armv8, uint64_t *data,
+       uint32_t *dscr_p)
+{
+       uint32_t dscr = DSCR_ITE;
+       uint32_t higher;
+       int retval;
+
+       if (dscr_p)
+               dscr = *dscr_p;
+
+       /* Wait for DTRRXfull */
+       long long then = timeval_ms();
+       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR,
+                               &dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for DTR_TX_FULL, dscr = 0x%08" PRIx32, dscr);
+                       return ERROR_FAIL;
+               }
+       }
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                           armv8->debug_base + CPUV8_DBG_DTRTX,
+                                           (uint32_t *)data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                                           armv8->debug_base + CPUV8_DBG_DTRRX,
+                                           &higher);
+       if (retval != ERROR_OK)
+               return retval;
+
+       *data = *(uint32_t *)data | (uint64_t)higher << 32;
+
+       if (dscr_p)
+               *dscr_p = dscr;
 
-       /* (void) */ dpm->finish(dpm);
        return retval;
 }
 
-static int dpmv8_mcr(struct target *target, int cpnum,
-       uint32_t op1, uint32_t op2, uint32_t CRn, uint32_t CRm,
-       uint32_t value)
+static int dpmv8_dpm_prepare(struct arm_dpm *dpm)
 {
-       struct arm *arm = target_to_arm(target);
-       struct arm_dpm *dpm = arm->dpm;
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       uint32_t dscr;
        int retval;
 
-       retval = dpm->prepare(dpm);
+       /* set up invariant:  ITE is set after ever DPM operation */
+       long long then = timeval_ms();
+       for (;; ) {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR,
+                               &dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+               if ((dscr & DSCR_ITE) != 0)
+                       break;
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for dpm prepare");
+                       return ERROR_FAIL;
+               }
+       }
+
+       /* update the stored copy of dscr */
+       dpm->dscr = dscr;
+
+       /* this "should never happen" ... */
+       if (dscr & DSCR_DTR_RX_FULL) {
+               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
+               /* Clear DCCRX */
+               retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRRX, &dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       return retval;
+}
+
+static int dpmv8_dpm_finish(struct arm_dpm *dpm)
+{
+       /* REVISIT what could be done here? */
+       return ERROR_OK;
+}
+
+static int dpmv8_exec_opcode(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t *p_dscr)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       uint32_t dscr = dpm->dscr;
+       int retval;
+
+       if (p_dscr)
+               dscr = *p_dscr;
+
+       /* Wait for InstrCompl bit to be set */
+       long long then = timeval_ms();
+       while ((dscr & DSCR_ITE) == 0) {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
+                       return retval;
+               }
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
+                       return ERROR_FAIL;
+               }
+       }
+
+       if (armv8_dpm_get_core_state(dpm) != ARM_STATE_AARCH64)
+               opcode = T32_FMTITR(opcode);
+
+       retval = mem_ap_write_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_ITR, opcode);
        if (retval != ERROR_OK)
                return retval;
 
-       LOG_DEBUG("MCR p%d, %d, r0, c%d, c%d, %d", cpnum,
-               (int) op1, (int) CRn,
-               (int) CRm, (int) op2);
+       then = timeval_ms();
+       do {
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Could not read DSCR register");
+                       return retval;
+               }
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
+                       return ERROR_FAIL;
+               }
+       } while ((dscr & DSCR_ITE) == 0);       /* Wait for InstrCompl bit to be set */
 
-       /* read DCC into r0; then write coprocessor register from R0 */
-       retval = dpm->instr_write_data_r0(dpm,
-                       T32_FMTITR(ARMV4_5_MCR(cpnum, op1, 0, CRn, CRm, op2)),
-                       value);
+       /* update dscr and el after each command execution */
+       dpm->dscr = dscr;
+       if (dpm->last_el != ((dscr >> 8) & 3))
+               LOG_DEBUG("EL %i -> %" PRIu32, dpm->last_el, (dscr >> 8) & 3);
+       dpm->last_el = (dscr >> 8) & 3;
+
+       if (dscr & DSCR_ERR) {
+               LOG_ERROR("Opcode 0x%08" PRIx32 ", DSCR.ERR=1, DSCR.EL=%i", opcode, dpm->last_el);
+               armv8_dpm_handle_exception(dpm, true);
+               retval = ERROR_FAIL;
+       }
 
-       /* (void) */ dpm->finish(dpm);
+       if (p_dscr)
+               *p_dscr = dscr;
+
+       return retval;
+}
+
+static int dpmv8_instr_execute(struct arm_dpm *dpm, uint32_t opcode)
+{
+       return dpmv8_exec_opcode(dpm, opcode, NULL);
+}
+
+static int dpmv8_instr_write_data_dcc(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       retval = dpmv8_write_dcc(armv8, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_exec_opcode(dpm, opcode, 0);
+}
+
+static int dpmv8_instr_write_data_dcc_64(struct arm_dpm *dpm,
+       uint32_t opcode, uint64_t data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       retval = dpmv8_write_dcc_64(armv8, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_exec_opcode(dpm, opcode, 0);
+}
+
+static int dpmv8_instr_write_data_r0(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       uint32_t dscr = DSCR_ITE;
+       int retval;
+
+       retval = dpmv8_write_dcc(armv8, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = dpmv8_exec_opcode(dpm, armv8_opcode(armv8, READ_REG_DTRRX), &dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* then the opcode, taking data from R0 */
+       return dpmv8_exec_opcode(dpm, opcode, &dscr);
+}
+
+static int dpmv8_instr_write_data_r0_64(struct arm_dpm *dpm,
+       uint32_t opcode, uint64_t data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       if (dpm->arm->core_state != ARM_STATE_AARCH64)
+               return dpmv8_instr_write_data_r0(dpm, opcode, data);
+
+       /* transfer data from DCC to R0 */
+       retval = dpmv8_write_dcc_64(armv8, data);
+       if (retval == ERROR_OK)
+               retval = dpmv8_exec_opcode(dpm, ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), &dpm->dscr);
+
+       /* then the opcode, taking data from R0 */
+       if (retval == ERROR_OK)
+               retval = dpmv8_exec_opcode(dpm, opcode, &dpm->dscr);
+
+       return retval;
+}
+
+static int dpmv8_instr_cpsr_sync(struct arm_dpm *dpm)
+{
+       int retval;
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+
+       /* "Prefetch flush" after modifying execution status in CPSR */
+       retval = dpmv8_exec_opcode(dpm, armv8_opcode(armv8, ARMV8_OPC_DSB_SY), &dpm->dscr);
+       if (retval == ERROR_OK)
+               dpmv8_exec_opcode(dpm, armv8_opcode(armv8, ARMV8_OPC_ISB_SY), &dpm->dscr);
        return retval;
 }
 
-static int dpmv8_mrs(struct target *target, uint32_t op0,
-       uint32_t op1, uint32_t op2, uint32_t CRn, uint32_t CRm,
+static int dpmv8_instr_read_data_dcc(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t *data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       /* the opcode, writing data to DCC */
+       retval = dpmv8_exec_opcode(dpm, opcode, &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_read_dcc(armv8, data, &dpm->dscr);
+}
+
+static int dpmv8_instr_read_data_dcc_64(struct arm_dpm *dpm,
+       uint32_t opcode, uint64_t *data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       /* the opcode, writing data to DCC */
+       retval = dpmv8_exec_opcode(dpm, opcode, &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_read_dcc_64(armv8, data, &dpm->dscr);
+}
+
+static int dpmv8_instr_read_data_r0(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t *data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       /* the opcode, writing data to R0 */
+       retval = dpmv8_exec_opcode(dpm, opcode, &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* write R0 to DCC */
+       retval = dpmv8_exec_opcode(dpm, armv8_opcode(armv8, WRITE_REG_DTRTX), &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_read_dcc(armv8, data, &dpm->dscr);
+}
+
+static int dpmv8_instr_read_data_r0_64(struct arm_dpm *dpm,
+       uint32_t opcode, uint64_t *data)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval;
+
+       if (dpm->arm->core_state != ARM_STATE_AARCH64) {
+               uint32_t tmp;
+               retval = dpmv8_instr_read_data_r0(dpm, opcode, &tmp);
+               if (retval == ERROR_OK)
+                       *data = tmp;
+               return retval;
+       }
+
+       /* the opcode, writing data to R0 */
+       retval = dpmv8_exec_opcode(dpm, opcode, &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* write R0 to DCC */
+       retval = dpmv8_exec_opcode(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0), &dpm->dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return dpmv8_read_dcc_64(armv8, data, &dpm->dscr);
+}
+
+#if 0
+static int dpmv8_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
+       target_addr_t addr, uint32_t control)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       uint32_t vr = armv8->debug_base;
+       uint32_t cr = armv8->debug_base;
+       int retval;
+
+       switch (index_t) {
+               case 0 ... 15:  /* breakpoints */
+                       vr += CPUV8_DBG_BVR_BASE;
+                       cr += CPUV8_DBG_BCR_BASE;
+                       break;
+               case 16 ... 31: /* watchpoints */
+                       vr += CPUV8_DBG_WVR_BASE;
+                       cr += CPUV8_DBG_WCR_BASE;
+                       index_t -= 16;
+                       break;
+               default:
+                       return ERROR_FAIL;
+       }
+       vr += 16 * index_t;
+       cr += 16 * index_t;
+
+       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
+               (unsigned) vr, (unsigned) cr);
+
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap, vr, addr);
+       if (retval != ERROR_OK)
+               return retval;
+       return mem_ap_write_atomic_u32(armv8->debug_ap, cr, control);
+}
+#endif
+
+static int dpmv8_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
+{
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       uint32_t cr;
+
+       switch (index_t) {
+               case 0 ... 15:
+                       cr = armv8->debug_base + CPUV8_DBG_BCR_BASE;
+                       break;
+               case 16 ... 31:
+                       cr = armv8->debug_base + CPUV8_DBG_WCR_BASE;
+                       index_t -= 16;
+                       break;
+               default:
+                       return ERROR_FAIL;
+       }
+       cr += 16 * index_t;
+
+       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
+
+       /* clear control register */
+       return mem_ap_write_atomic_u32(armv8->debug_ap, cr, 0);
+}
+
+/*
+ * Coprocessor support
+ */
+
+/* Read coprocessor */
+static int dpmv8_mrc(struct target *target, int cpnum,
+       uint32_t op1, uint32_t op2, uint32_t crn, uint32_t crm,
        uint32_t *value)
 {
        struct arm *arm = target_to_arm(target);
        struct arm_dpm *dpm = arm->dpm;
        int retval;
-       uint32_t op_code;
 
        retval = dpm->prepare(dpm);
        if (retval != ERROR_OK)
                return retval;
-       op_code = ((op0 & 0x3) << 19 | (op1 & 0x7) << 16 | (CRn & 0xF) << 12 |\
-                               (CRm & 0xF) << 8 | (op2 & 0x7) << 5);
-       op_code >>= 5;
-       LOG_DEBUG("MRS p%d, %d, r0, c%d, c%d, %d", (int)op0,
-               (int) op1, (int) CRn,
-               (int) CRm, (int) op2);
+
+       LOG_DEBUG("MRC p%d, %d, r0, c%d, c%d, %d", cpnum,
+               (int) op1, (int) crn,
+               (int) crm, (int) op2);
+
        /* read coprocessor register into R0; return via DCC */
        retval = dpm->instr_read_data_r0(dpm,
-                       ARMV8_MRS(op_code, 0),
+                       ARMV4_5_MRC(cpnum, op1, 0, crn, crm, op2),
                        value);
 
        /* (void) */ dpm->finish(dpm);
        return retval;
 }
 
-static int dpmv8_msr(struct target *target, uint32_t op0,
-       uint32_t op1, uint32_t op2, uint32_t CRn, uint32_t CRm,
+static int dpmv8_mcr(struct target *target, int cpnum,
+       uint32_t op1, uint32_t op2, uint32_t crn, uint32_t crm,
        uint32_t value)
 {
        struct arm *arm = target_to_arm(target);
        struct arm_dpm *dpm = arm->dpm;
        int retval;
-       uint32_t op_code;
 
        retval = dpm->prepare(dpm);
        if (retval != ERROR_OK)
                return retval;
 
-       op_code = ((op0 & 0x3) << 19 | (op1 & 0x7) << 16 | (CRn & 0xF) << 12 |\
-                               (CRm & 0xF) << 8 | (op2 & 0x7) << 5);
-       op_code >>= 5;
-       LOG_DEBUG("MSR p%d, %d, r0, c%d, c%d, %d", (int)op0,
-               (int) op1, (int) CRn,
-               (int) CRm, (int) op2);
+       LOG_DEBUG("MCR p%d, %d, r0, c%d, c%d, %d", cpnum,
+               (int) op1, (int) crn,
+               (int) crm, (int) op2);
 
        /* read DCC into r0; then write coprocessor register from R0 */
        retval = dpm->instr_write_data_r0(dpm,
-                       ARMV8_MSR_GP(op_code, 0),
+                       ARMV4_5_MCR(cpnum, op1, 0, crn, crm, op2),
                        value);
 
        /* (void) */ dpm->finish(dpm);
@@ -159,265 +536,193 @@ static int dpmv8_msr(struct target *target, uint32_t op0,
  * Register access utilities
  */
 
-/* Toggles between recorded core mode (USR, SVC, etc) and a temporary one.
- * Routines *must* restore the original mode before returning!!
- */
-int dpmv8_modeswitch(struct arm_dpm *dpm, enum arm_mode mode)
+int armv8_dpm_modeswitch(struct arm_dpm *dpm, enum arm_mode mode)
 {
        struct armv8_common *armv8 = (struct armv8_common *)dpm->arm->arch_info;
-       int retval;
+       int retval = ERROR_OK;
+       unsigned int target_el;
+       enum arm_state core_state;
        uint32_t cpsr;
 
        /* restore previous mode */
-       if (mode == ARM_MODE_ANY)
+       if (mode == ARM_MODE_ANY) {
                cpsr = buf_get_u32(dpm->arm->cpsr->value, 0, 32);
 
-       /* else force to the specified mode */
-       else
-               cpsr = mode >> 4;
+               LOG_DEBUG("restoring mode, cpsr = 0x%08"PRIx32, cpsr);
 
-       switch ((cpsr & 0xC) >> 2) {
-                       case SYSTEM_CUREL_EL1:
-                               retval = dpm->instr_execute(dpm, ARMV8_DCPS1(11));
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       case SYSTEM_CUREL_EL2:
-                               retval = dpm->instr_execute(dpm, ARMV8_DCPS2(11));
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       break;
-                       case SYSTEM_CUREL_EL3:
-                               retval = dpm->instr_execute(dpm, ARMV8_DCPS3(11));
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       break;
-                       default:
-                               LOG_DEBUG("unknow mode 0x%x", (unsigned) ((cpsr & 0xC) >> 2));
-                               break;
+       } else {
+               LOG_DEBUG("setting mode 0x%x", mode);
+               cpsr = mode;
        }
 
-
-       retval = dpm->instr_write_data_r0(dpm, armv8_opcode(armv8, WRITE_REG_DSPSR), cpsr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       if (dpm->instr_cpsr_sync)
-               retval = dpm->instr_cpsr_sync(dpm);
-
-       return retval;
-}
-
-static int dpmv8_read_reg32(struct arm_dpm *dpm, struct reg *r, unsigned regnum)
-{
-       uint32_t value;
-       int retval = ERROR_FAIL;
-       bool valid = true;
-
-       switch (regnum) {
-       case 0 ... 14:
-               /* return via DCC:  "MCR p14, 0, Rnum, c0, c5, 0" */
-               retval = dpm->instr_read_data_dcc(dpm,
-                       T32_FMTITR(ARMV4_5_MCR(14, 0, regnum, 0, 5, 0)),
-                       &value);
+       switch (cpsr & 0x1f) {
+       /* aarch32 modes */
+       case ARM_MODE_USR:
+               target_el = 0;
                break;
-               case ARMV8_R31:
-                       retval = dpm->instr_read_data_dcc(dpm,
-                               T32_FMTITR(ARMV4_5_MCR(14, 0, 13, 0, 5, 0)),
-                               &value);
-                               break;
-               case ARMV8_PC:
-                       retval = dpm->instr_read_data_r0(dpm,
-                               T32_FMTITR(ARMV8_MRC_DLR(0)),
-                               &value);
-                       break;
-               case ARMV8_xPSR:
-                       retval = dpm->instr_read_data_r0(dpm,
-                               T32_FMTITR(ARMV8_MRC_DSPSR(0)),
-                               &value);
-                       break;
-               default:
-                       LOG_DEBUG("READ: %s ignored", r->name);
-                       retval = ERROR_OK;
-                       value = 0xFFFFFFFF;
-                       valid = false;
-                       break;
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_SYS:
+               target_el = 1;
+               break;
+       /*
+        * TODO: handle ARM_MODE_HYP
+        * case ARM_MODE_HYP:
+        *      target_el = 2;
+        *      break;
+        */
+       case ARM_MODE_MON:
+               target_el = 3;
+               break;
+       /* aarch64 modes */
+       default:
+               target_el = (cpsr >> 2) & 3;
        }
 
-       if (retval == ERROR_OK) {
-               r->valid = valid;
-               r->dirty = false;
-               buf_set_u64(r->value, 0, 32, value);
-               LOG_DEBUG("READ: %s, %8.8x", r->name, (unsigned) value);
+       if (target_el > SYSTEM_CUREL_EL3) {
+               LOG_ERROR("%s: Invalid target exception level %i", __func__, target_el);
+               return ERROR_FAIL;
        }
-       return retval;
-}
 
-static int dpmv8_write_reg32(struct arm_dpm *dpm, struct reg *r, unsigned regnum)
-{
-       int retval;
-       uint64_t value = buf_get_u64(r->value, 0, 32);
+       LOG_DEBUG("target_el = %i, last_el = %i", target_el, dpm->last_el);
+       if (target_el > dpm->last_el) {
+               retval = dpm->instr_execute(dpm,
+                               armv8_opcode(armv8, ARMV8_OPC_DCPS) | target_el);
 
-       switch (regnum) {
-               case 0 ... 14:
-                       /* load register from DCC:  "MRC p14, 0, Rnum, c0, c5, 0" */
-                       retval = dpm->instr_write_data_dcc(dpm,
-                                       T32_FMTITR(ARMV4_5_MRC(14, 0, regnum, 0, 5, 0)), value);
-                       break;
-               case ARMV8_PC:/* PC
-                        * read r0 from DCC; then "MOV pc, r0" */
-                       retval = dpm->instr_write_data_r0(dpm,
-                                       T32_FMTITR(ARMV8_MCR_DLR(0)), value);
-                       break;
-               case ARMV8_xPSR: /* CPSR */
-                       /* read r0 from DCC, then "MCR r0, DSPSR" */
-                       retval = dpm->instr_write_data_r0(dpm,
-                                       T32_FMTITR(ARMV8_MCR_DSPSR(0)), value);
-                       break;
-               default:
-                       retval = ERROR_OK;
-                       LOG_DEBUG("WRITE: %s ignored", r->name);
-                       break;
-       }
+               /* DCPS clobbers registers just like an exception taken */
+               armv8_dpm_handle_exception(dpm, false);
+       } else {
+               core_state = armv8_dpm_get_core_state(dpm);
+               if (core_state != ARM_STATE_AARCH64) {
+                       /* cannot do DRPS/ERET when already in EL0 */
+                       if (dpm->last_el != 0) {
+                               /* load SPSR with the desired mode and execute DRPS */
+                               LOG_DEBUG("SPSR = 0x%08"PRIx32, cpsr);
+                               retval = dpm->instr_write_data_r0(dpm,
+                                               ARMV8_MSR_GP_XPSR_T1(1, 0, 15), cpsr);
+                               if (retval == ERROR_OK)
+                                       retval = dpm->instr_execute(dpm, armv8_opcode(armv8, ARMV8_OPC_DRPS));
+                       }
+               } else {
+                       /*
+                        * need to execute multiple DRPS instructions until target_el
+                        * is reached
+                        */
+                       while (retval == ERROR_OK && dpm->last_el != target_el) {
+                               unsigned int cur_el = dpm->last_el;
+                               retval = dpm->instr_execute(dpm, armv8_opcode(armv8, ARMV8_OPC_DRPS));
+                               if (cur_el == dpm->last_el) {
+                                       LOG_INFO("Cannot reach EL %i, SPSR corrupted?", target_el);
+                                       break;
+                               }
+                       }
+               }
 
-       if (retval == ERROR_OK) {
-               r->dirty = false;
-               LOG_DEBUG("WRITE: %s, %8.8x", r->name, (unsigned) value);
+               /* On executing DRPS, DSPSR and DLR become UNKNOWN, mark them as dirty */
+               dpm->arm->cpsr->dirty = true;
+               dpm->arm->pc->dirty = true;
+
+               /*
+                * re-evaluate the core state, we might be in Aarch32 state now
+                * we rely on dpm->dscr being up-to-date
+                */
+               core_state = armv8_dpm_get_core_state(dpm);
+               armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+               armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
        }
 
        return retval;
 }
 
-/* just read the register -- rely on the core mode being right */
+/*
+ * Common register read, relies on armv8_select_reg_access() having been called.
+ */
 static int dpmv8_read_reg(struct arm_dpm *dpm, struct reg *r, unsigned regnum)
 {
-       uint32_t value;
-       uint64_t value_64;
+       struct armv8_common *armv8 = dpm->arm->arch_info;
        int retval = ERROR_FAIL;
 
-       switch (regnum) {
-               case 0 ... 30:
-                       retval = dpm->instr_read_data_dcc_64(dpm,
-                               ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, regnum),
-                               &value_64);
-                       break;
-               case ARMV8_R31:
-                       retval = dpm->instr_read_data_r0_64(dpm,
-                               ARMV8_MOVFSP_64(0),
-                               &value_64);
-                       break;
-               case ARMV8_PC:
-                       retval = dpm->instr_read_data_r0_64(dpm,
-                               ARMV8_MRS_DLR(0),
-                               &value_64);
-                       break;
-               case ARMV8_xPSR:
-                       retval = dpm->instr_read_data_r0(dpm,
-                               ARMV8_MRS_DSPSR(0),
-                               &value);
-                       break;
-               default:
-                       LOG_DEBUG("READ: %s fail", r->name);
-                       break;
-       }
-
-       if (retval == ERROR_OK) {
-               r->valid = true;
-               r->dirty = false;
-               if (r->size == 64) {
-                       buf_set_u64(r->value, 0, 64, value_64);
-                       LOG_DEBUG("READ: %s, %16.8llx", r->name, (unsigned long long) value_64);
-               } else {
-                       buf_set_u32(r->value, 0, 32, value);
-                       LOG_DEBUG("READ: %s, %8.8x", r->name, (unsigned) value);
+       if (r->size <= 64) {
+               uint64_t value_64;
+               retval = armv8->read_reg_u64(armv8, regnum, &value_64);
+
+               if (retval == ERROR_OK) {
+                       r->valid = true;
+                       r->dirty = false;
+                       buf_set_u64(r->value, 0, r->size, value_64);
+                       if (r->size == 64)
+                               LOG_DEBUG("READ: %s, %16.8llx", r->name, (unsigned long long) value_64);
+                       else
+                               LOG_DEBUG("READ: %s, %8.8x", r->name, (unsigned int) value_64);
                }
-       }
-       return retval;
-}
+       } else if (r->size <= 128) {
+               uint64_t lvalue = 0, hvalue = 0;
+               retval = armv8->read_reg_u128(armv8, regnum, &lvalue, &hvalue);
 
-/* just write the register -- rely on the core mode being right */
-static int dpmv8_write_reg(struct arm_dpm *dpm, struct reg *r, unsigned regnum)
-{
-       int retval = ERROR_FAIL;
-       uint32_t value = 0xFFFFFFFF;
-       uint64_t value_64 = 0xFFFFFFFFFFFFFFFF;
-
-       switch (regnum) {
-               case 0 ... 30:
-                       value_64 = buf_get_u64(r->value, 0, 64);
-                       retval = dpm->instr_write_data_dcc_64(dpm,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, regnum),
-                               value_64);
-                       break;
-               case ARMV8_R31:
-                       value_64 = buf_get_u64(r->value, 0, 64);
-                       retval = dpm->instr_write_data_r0_64(dpm,
-                               ARMV8_MOVTSP_64(0),
-                               value_64);
-                       break;
-               case ARMV8_PC:
-                       value_64 = buf_get_u64(r->value, 0, 64);
-                       retval = dpm->instr_write_data_r0_64(dpm,
-                               ARMV8_MSR_DLR(0),
-                               value_64);
-                       break;
-               case ARMV8_xPSR:
-                       value = buf_get_u32(r->value, 0, 32);
-                       retval = dpm->instr_write_data_r0(dpm,
-                               ARMV8_MSR_DSPSR(0),
-                               value);
-                       break;
-               default:
-                       LOG_DEBUG("write: %s fail", r->name);
-                       break;
-       }
+               if (retval == ERROR_OK) {
+                       r->valid = true;
+                       r->dirty = false;
 
+                       buf_set_u64(r->value, 0, 64, lvalue);
+                       buf_set_u64(r->value + 8, 0, r->size - 64, hvalue);
 
-       if (retval == ERROR_OK) {
-               r->dirty = false;
-               if (r->size == 64)
-                       LOG_DEBUG("WRITE: %s, %16.8llx", r->name, (unsigned long long) value_64);
-               else
-                       LOG_DEBUG("WRITE: %s, %8.8x", r->name, (unsigned) value);
+                       LOG_DEBUG("READ: %s, lvalue=%16.8llx", r->name, (unsigned long long) lvalue);
+                       LOG_DEBUG("READ: %s, hvalue=%16.8llx", r->name, (unsigned long long) hvalue);
+               }
        }
 
+       if (retval != ERROR_OK)
+               LOG_ERROR("Failed to read %s register", r->name);
+
        return retval;
 }
 
-static inline enum arm_state dpm_get_core_state(uint32_t dscr)
+/*
+ * Common register write, relies on armv8_select_reg_access() having been called.
+ */
+static int dpmv8_write_reg(struct arm_dpm *dpm, struct reg *r, unsigned regnum)
 {
-       int el = (dscr >> 8) & 0x3;
-       int rw = (dscr >> 10) & 0xF;
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       int retval = ERROR_FAIL;
 
-       LOG_DEBUG("EL:%i, RW:0x%x", el, rw);
+       if (r->size <= 64) {
+               uint64_t value_64;
 
-       /* DSCR.RW = 0b1111 - all EL are using AArch64 state */
-       if (rw == 0xF)
-               return ARM_STATE_AARCH64;
+               value_64 = buf_get_u64(r->value, 0, r->size);
+               retval = armv8->write_reg_u64(armv8, regnum, value_64);
 
-       /* DSCR.RW = 0b1110 - all EL > 0 are using AArch64 state */
-       if (rw == 0xE && el > 0)
-               return ARM_STATE_AARCH64;
+               if (retval == ERROR_OK) {
+                       r->dirty = false;
+                       if (r->size == 64)
+                               LOG_DEBUG("WRITE: %s, %16.8llx", r->name, (unsigned long long)value_64);
+                       else
+                               LOG_DEBUG("WRITE: %s, %8.8x", r->name, (unsigned int)value_64);
+               }
+       } else if (r->size <= 128) {
+               uint64_t lvalue, hvalue;
 
-       /* DSCR.RW = 0b110x - all EL > 1 are using Aarch64 state */
-       if ((rw & 0xE) == 0xC && el > 1)
-               return ARM_STATE_AARCH64;
+               lvalue = buf_get_u64(r->value, 0, 64);
+               hvalue = buf_get_u64(r->value + 8, 0, r->size - 64);
+               retval = armv8->write_reg_u128(armv8, regnum, lvalue, hvalue);
 
-       /* DSCR.RW = 0b10xx - all EL > 2 are using Aarch64 state */
-       if ((rw & 0xC) == 0x8 && el > 2)
-               return ARM_STATE_AARCH64;
+               if (retval == ERROR_OK) {
+                       r->dirty = false;
+
+                       LOG_DEBUG("WRITE: %s, lvalue=%16.8llx", r->name, (unsigned long long) lvalue);
+                       LOG_DEBUG("WRITE: %s, hvalue=%16.8llx", r->name, (unsigned long long) hvalue);
+               }
+       }
 
-       /* DSCR.RW = 0b0xxx - all EL are using AArch32 state */
-       if ((rw & 0x8) == 0)
-               return ARM_STATE_ARM;
+       if (retval != ERROR_OK)
+               LOG_ERROR("Failed to write %s register", r->name);
 
-       return ARM_STATE_ARM;
+       return retval;
 }
 
 /**
- * Read basic registers of the the current context:  R0 to R15, and CPSR;
+ * Read basic registers of the current context:  R0 to R15, and CPSR;
  * sets the core mode (such as USR or IRQ) and state (such as ARM or Thumb).
  * In normal operation this is called on entry to halting debug state,
  * possibly after some other operations supporting restore of debug state
@@ -427,60 +732,75 @@ int armv8_dpm_read_current_registers(struct arm_dpm *dpm)
 {
        struct arm *arm = dpm->arm;
        struct armv8_common *armv8 = (struct armv8_common *)arm->arch_info;
-       enum arm_state core_state;
+       struct reg_cache *cache;
+       struct reg *r;
        uint32_t cpsr;
-
        int retval;
-       struct reg *r;
 
        retval = dpm->prepare(dpm);
        if (retval != ERROR_OK)
                return retval;
 
-       core_state = dpm_get_core_state(dpm->dscr);
-
-       armv8_select_opcodes(armv8, core_state);
+       cache = arm->core_cache;
 
        /* read R0 first (it's used for scratch), then CPSR */
-       r = arm->core_cache->reg_list + 0;
+       r = cache->reg_list + ARMV8_R0;
        if (!r->valid) {
-               retval = core_state == ARM_STATE_AARCH64 ?
-                                       dpmv8_read_reg(dpm, r, 0) : dpmv8_read_reg32(dpm, r, 0);
+               retval = dpmv8_read_reg(dpm, r, ARMV8_R0);
                if (retval != ERROR_OK)
                        goto fail;
        }
        r->dirty = true;
 
+       /* read R1, too, it will be clobbered during memory access */
+       r = cache->reg_list + ARMV8_R1;
+       if (!r->valid) {
+               retval = dpmv8_read_reg(dpm, r, ARMV8_R1);
+               if (retval != ERROR_OK)
+                       goto fail;
+       }
+
        /* read cpsr to r0 and get it back */
-       retval = dpm->instr_read_data_r0(dpm, armv8_opcode(armv8, READ_REG_DSPSR), &cpsr);
+       retval = dpm->instr_read_data_r0(dpm,
+                       armv8_opcode(armv8, READ_REG_DSPSR), &cpsr);
        if (retval != ERROR_OK)
                goto fail;
 
-       /* update core mode and state, plus shadow mapping for R8..R14 */
+       /* update core mode and state */
        armv8_set_cpsr(arm, cpsr);
 
-       /* REVISIT we can probably avoid reading R1..R14, saving time... */
-       for (unsigned i = 1; i < arm->core_cache->num_regs ; i++) {
+       for (unsigned int i = ARMV8_PC; i < cache->num_regs ; i++) {
+               struct arm_reg *arm_reg;
+
                r = armv8_reg_current(arm, i);
-               if (r->valid)
+               if (!r->exist || r->valid)
+                       continue;
+
+               /* Skip reading FP-SIMD registers */
+               if (r->number >= ARMV8_V0 && r->number <= ARMV8_FPCR)
+                       continue;
+
+               /*
+                * Only read registers that are available from the
+                * current EL (or core mode).
+                */
+               arm_reg = r->arch_info;
+               if (arm_reg->mode != ARM_MODE_ANY &&
+                               dpm->last_el != armv8_curel_from_core_mode(arm_reg->mode))
                        continue;
 
-               retval = core_state == ARM_STATE_AARCH64 ?
-                                       dpmv8_read_reg(dpm, r, i) : dpmv8_read_reg32(dpm, r, i);
+               /* Special case: ARM_MODE_SYS has no SPSR at EL1 */
+               if (r->number == ARMV8_SPSR_EL1 && arm->core_mode == ARM_MODE_SYS)
+                       continue;
 
+               retval = dpmv8_read_reg(dpm, r, i);
                if (retval != ERROR_OK)
                        goto fail;
-       }
 
-       /* NOTE: SPSR ignored (if it's even relevant). */
-
-       /* REVISIT the debugger can trigger various exceptions.  See the
-        * ARMv7A architecture spec, section C5.7, for more info about
-        * what defenses are needed; v6 debug has the most issues.
-        */
+       }
 
 fail:
-       /* (void) */ dpm->finish(dpm);
+       dpm->finish(dpm);
        return retval;
 }
 
@@ -489,7 +809,7 @@ fail:
  * or running debugger code.
  */
 static int dpmv8_maybe_update_bpwp(struct arm_dpm *dpm, bool bpwp,
-       struct dpm_bpwp *xp, int *set_p)
+       struct dpm_bpwp *xp, bool *set_p)
 {
        int retval = ERROR_OK;
        bool disable;
@@ -545,7 +865,6 @@ int armv8_dpm_write_dirty_registers(struct arm_dpm *dpm, bool bpwp)
        struct arm *arm = dpm->arm;
        struct reg_cache *cache = arm->core_cache;
        int retval;
-       bool is_aarch64 = arm->core_state == ARM_STATE_AARCH64;
 
        retval = dpm->prepare(dpm);
        if (retval != ERROR_OK)
@@ -564,7 +883,7 @@ int armv8_dpm_write_dirty_registers(struct arm_dpm *dpm, bool bpwp)
                        struct breakpoint *bp = dbp->bp;
 
                        retval = dpmv8_maybe_update_bpwp(dpm, bpwp, &dbp->bpwp,
-                                       bp ? &bp->set : NULL);
+                                       bp ? &bp->is_set : NULL);
                        if (retval != ERROR_OK)
                                goto done;
                }
@@ -576,7 +895,7 @@ int armv8_dpm_write_dirty_registers(struct arm_dpm *dpm, bool bpwp)
                struct watchpoint *wp = dwp->wp;
 
                retval = dpmv8_maybe_update_bpwp(dpm, bpwp, &dwp->bpwp,
-                               wp ? &wp->set : NULL);
+                               wp ? &wp->is_set : NULL);
                if (retval != ERROR_OK)
                        goto done;
        }
@@ -585,59 +904,51 @@ int armv8_dpm_write_dirty_registers(struct arm_dpm *dpm, bool bpwp)
         * be queued, and need (efficient/batched) flushing later.
         */
 
-       /* Scan the registers until we find one that's both dirty and
-        * eligible for flushing.  Flush that and everything else that
-        * shares the same core mode setting.  Typically this won't
-        * actually find anything to do...
-        */
+       /* Restore original core mode and state */
+       retval = armv8_dpm_modeswitch(dpm, ARM_MODE_ANY);
+       if (retval != ERROR_OK)
+               goto done;
 
        /* check everything except our scratch register R0 */
        for (unsigned i = 1; i < cache->num_regs; i++) {
                struct arm_reg *r;
-               unsigned regnum;
 
-               /* also skip PC, CPSR, and non-dirty */
-               if (i == (arm->core_cache->num_regs - 2))
+               /* skip non-existent */
+               if (!cache->reg_list[i].exist)
                        continue;
-               if (arm->cpsr == cache->reg_list + i)
+               /* skip PC and CPSR */
+               if (i == ARMV8_PC || i == ARMV8_XPSR)
                        continue;
+               /* skip invalid */
+               if (!cache->reg_list[i].valid)
+                       continue;
+               /* skip non-dirty */
                if (!cache->reg_list[i].dirty)
                        continue;
 
+               /* skip all registers not on the current EL */
                r = cache->reg_list[i].arch_info;
-               regnum = r->num;
+               if (r->mode != ARM_MODE_ANY &&
+                               dpm->last_el != armv8_curel_from_core_mode(r->mode))
+                       continue;
 
-               retval = is_aarch64 ? dpmv8_write_reg(dpm, &cache->reg_list[i], regnum)
-                               : dpmv8_write_reg32(dpm, &cache->reg_list[i], regnum);
+               retval = dpmv8_write_reg(dpm, &cache->reg_list[i], i);
                if (retval != ERROR_OK)
-                       goto done;
+                       break;
        }
 
-
-       /* Restore original CPSR ... assuming either that we changed it,
-        * or it's dirty.  Must write PC to ensure the return address is
-        * defined, and must not write it before CPSR.
-        */
-       retval = dpmv8_modeswitch(dpm, ARM_MODE_ANY);
-       if (retval != ERROR_OK)
-               goto done;
-       arm->cpsr->dirty = false;
-
-       retval = is_aarch64 ? dpmv8_write_reg(dpm, arm->pc, (arm->core_cache->num_regs - 2))
-                       : dpmv8_write_reg32(dpm, arm->pc, (arm->core_cache->num_regs - 2));
-       if (retval != ERROR_OK)
-               goto done;
-       arm->pc->dirty = false;
-
+       /* flush CPSR and PC */
+       if (retval == ERROR_OK)
+               retval = dpmv8_write_reg(dpm, &cache->reg_list[ARMV8_XPSR], ARMV8_XPSR);
+       if (retval == ERROR_OK)
+               retval = dpmv8_write_reg(dpm, &cache->reg_list[ARMV8_PC], ARMV8_PC);
        /* flush R0 -- it's *very* dirty by now */
-       retval = is_aarch64 ? dpmv8_write_reg(dpm, &cache->reg_list[0], 0)
-                       : dpmv8_write_reg32(dpm, &cache->reg_list[0], 0);
-       if (retval != ERROR_OK)
-               goto done;
-       cache->reg_list[0].dirty = false;
-
-       /* (void) */ dpm->finish(dpm);
+       if (retval == ERROR_OK)
+               retval = dpmv8_write_reg(dpm, &cache->reg_list[0], 0);
+       if (retval == ERROR_OK)
+               dpm->instr_cpsr_sync(dpm);
 done:
+       dpm->finish(dpm);
        return retval;
 }
 
@@ -655,19 +966,18 @@ static int armv8_dpm_read_core_reg(struct target *target, struct reg *r,
        int retval;
        int max = arm->core_cache->num_regs;
 
-       if (regnum < 0 || regnum > max)
+       if (regnum < 0 || regnum >= max)
                return ERROR_COMMAND_SYNTAX_ERROR;
 
-       /* REVISIT what happens if we try to read SPSR in a core mode
+       /*
+        * REVISIT what happens if we try to read SPSR in a core mode
         * which has no such register?
         */
-
        retval = dpm->prepare(dpm);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = arm->core_state == ARM_STATE_AARCH64 ?
-                       dpmv8_read_reg(dpm, r, regnum) : dpmv8_read_reg32(dpm, r, regnum);
+       retval = dpmv8_read_reg(dpm, r, regnum);
        if (retval != ERROR_OK)
                goto fail;
 
@@ -695,12 +1005,11 @@ static int armv8_dpm_write_core_reg(struct target *target, struct reg *r,
        if (retval != ERROR_OK)
                return retval;
 
-       retval = arm->core_state == ARM_STATE_AARCH64 ?
-                       dpmv8_write_reg(dpm, r, regnum) : dpmv8_write_reg32(dpm, r, regnum);
+       retval = dpmv8_write_reg(dpm, r, regnum);
 
        /* always clean up, regardless of error */
+       dpm->finish(dpm);
 
-       /* (void) */ dpm->finish(dpm);
        return retval;
 }
 
@@ -732,7 +1041,7 @@ static int armv8_dpm_full_context(struct target *target)
                for (unsigned i = 0; i < cache->num_regs; i++) {
                        struct arm_reg *r;
 
-                       if (cache->reg_list[i].valid)
+                       if (!cache->reg_list[i].exist || cache->reg_list[i].valid)
                                continue;
                        r = cache->reg_list[i].arch_info;
 
@@ -746,9 +1055,9 @@ static int armv8_dpm_full_context(struct target *target)
                                 * in FIQ mode we need to patch mode.
                                 */
                                if (mode != ARM_MODE_ANY)
-                                       retval = dpmv8_modeswitch(dpm, mode);
+                                       retval = armv8_dpm_modeswitch(dpm, mode);
                                else
-                                       retval = dpmv8_modeswitch(dpm, ARM_MODE_USR);
+                                       retval = armv8_dpm_modeswitch(dpm, ARM_MODE_USR);
 
                                if (retval != ERROR_OK)
                                        goto done;
@@ -766,7 +1075,7 @@ static int armv8_dpm_full_context(struct target *target)
 
        } while (did_read);
 
-       retval = dpmv8_modeswitch(dpm, ARM_MODE_ANY);
+       retval = armv8_dpm_modeswitch(dpm, ARM_MODE_ANY);
        /* (void) */ dpm->finish(dpm);
 done:
        return retval;
@@ -964,25 +1273,64 @@ static int dpmv8_remove_watchpoint(struct target *target, struct watchpoint *wp)
        return retval;
 }
 
-void armv8_dpm_report_wfar(struct arm_dpm *dpm, uint64_t addr)
+/*
+ * Handle exceptions taken in debug state. This happens mostly for memory
+ * accesses that violated a MMU policy. Taking an exception while in debug
+ * state clobbers certain state registers on the target exception level.
+ * Just mark those registers dirty so that they get restored on resume.
+ * This works both for Aarch32 and Aarch64 states.
+ *
+ * This function must not perform any actions that trigger another exception
+ * or a recursion will happen.
+ */
+void armv8_dpm_handle_exception(struct arm_dpm *dpm, bool do_restore)
 {
-       switch (dpm->arm->core_state) {
-               case ARM_STATE_ARM:
-               case ARM_STATE_AARCH64:
-                       addr -= 8;
-                       break;
-               case ARM_STATE_THUMB:
-               case ARM_STATE_THUMB_EE:
-                       addr -= 4;
-                       break;
-               case ARM_STATE_JAZELLE:
-                       /* ?? */
-                       break;
-               default:
-                       LOG_DEBUG("Unknow core_state");
-                       break;
+       struct armv8_common *armv8 = dpm->arm->arch_info;
+       struct reg_cache *cache = dpm->arm->core_cache;
+       enum arm_state core_state;
+       uint64_t dlr;
+       uint32_t dspsr;
+       unsigned int el;
+
+       static const int clobbered_regs_by_el[3][5] = {
+               { ARMV8_PC, ARMV8_XPSR, ARMV8_ELR_EL1, ARMV8_ESR_EL1, ARMV8_SPSR_EL1 },
+               { ARMV8_PC, ARMV8_XPSR, ARMV8_ELR_EL2, ARMV8_ESR_EL2, ARMV8_SPSR_EL2 },
+               { ARMV8_PC, ARMV8_XPSR, ARMV8_ELR_EL3, ARMV8_ESR_EL3, ARMV8_SPSR_EL3 },
+       };
+
+       el = (dpm->dscr >> 8) & 3;
+
+       /* safety check, must not happen since EL0 cannot be a target for an exception */
+       if (el < SYSTEM_CUREL_EL1 || el > SYSTEM_CUREL_EL3) {
+               LOG_ERROR("%s: EL %i is invalid, DSCR corrupted?", __func__, el);
+               return;
        }
-       dpm->wp_pc = addr;
+
+       /* Clear sticky error */
+       mem_ap_write_u32(armv8->debug_ap,
+               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+
+       armv8->read_reg_u64(armv8, ARMV8_XPSR, &dlr);
+       dspsr = dlr;
+       armv8->read_reg_u64(armv8, ARMV8_PC, &dlr);
+
+       LOG_DEBUG("Exception taken to EL %i, DLR=0x%016"PRIx64" DSPSR=0x%08"PRIx32,
+                       el, dlr, dspsr);
+
+       /* mark all clobbered registers as dirty */
+       for (int i = 0; i < 5; i++)
+               cache->reg_list[clobbered_regs_by_el[el-1][i]].dirty = true;
+
+       /*
+        * re-evaluate the core state, we might be in Aarch64 state now
+        * we rely on dpm->dscr being up-to-date
+        */
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
+
+       if (do_restore)
+               armv8_dpm_modeswitch(dpm, ARM_MODE_ANY);
 }
 
 /*----------------------------------------------------------------------*/
@@ -996,6 +1344,7 @@ void armv8_dpm_report_dscr(struct arm_dpm *dpm, uint32_t dscr)
        struct target *target = dpm->arm->target;
 
        dpm->dscr = dscr;
+       dpm->last_el = (dscr >> 8) & 3;
 
        /* Examine debug reason */
        switch (DSCR_ENTRY(dscr)) {
@@ -1012,13 +1361,15 @@ void armv8_dpm_report_dscr(struct arm_dpm *dpm, uint32_t dscr)
                case DSCRV8_ENTRY_BKPT: /* SW BKPT (?) */
                case DSCRV8_ENTRY_RESET_CATCH:  /* Reset catch */
                case DSCRV8_ENTRY_OS_UNLOCK:  /*OS unlock catch*/
-               case DSCRV8_ENTRY_EXCEPTION_CATCH:  /*exception catch*/
                case DSCRV8_ENTRY_SW_ACCESS_DBG: /*SW access dbg register*/
                        target->debug_reason = DBG_REASON_BREAKPOINT;
                        break;
                case DSCRV8_ENTRY_WATCHPOINT:   /* asynch watchpoint */
                        target->debug_reason = DBG_REASON_WATCHPOINT;
                        break;
+               case DSCRV8_ENTRY_EXCEPTION_CATCH:  /*exception catch*/
+                       target->debug_reason = DBG_REASON_EXC_CATCH;
+                       break;
                default:
                        target->debug_reason = DBG_REASON_UNDEFINED;
                        break;
@@ -1050,7 +1401,7 @@ int armv8_dpm_setup(struct arm_dpm *dpm)
        arm->read_core_reg = armv8_dpm_read_core_reg;
        arm->write_core_reg = armv8_dpm_write_core_reg;
 
-       if (arm->core_cache == NULL) {
+       if (!arm->core_cache) {
                cache = armv8_build_reg_cache(target);
                if (!cache)
                        return ERROR_FAIL;
@@ -1059,8 +1410,27 @@ int armv8_dpm_setup(struct arm_dpm *dpm)
        /* coprocessor access setup */
        arm->mrc = dpmv8_mrc;
        arm->mcr = dpmv8_mcr;
-       arm->mrs = dpmv8_mrs;
-       arm->msr = dpmv8_msr;
+
+       dpm->prepare = dpmv8_dpm_prepare;
+       dpm->finish = dpmv8_dpm_finish;
+
+       dpm->instr_execute = dpmv8_instr_execute;
+       dpm->instr_write_data_dcc = dpmv8_instr_write_data_dcc;
+       dpm->instr_write_data_dcc_64 = dpmv8_instr_write_data_dcc_64;
+       dpm->instr_write_data_r0 = dpmv8_instr_write_data_r0;
+       dpm->instr_write_data_r0_64 = dpmv8_instr_write_data_r0_64;
+       dpm->instr_cpsr_sync = dpmv8_instr_cpsr_sync;
+
+       dpm->instr_read_data_dcc = dpmv8_instr_read_data_dcc;
+       dpm->instr_read_data_dcc_64 = dpmv8_instr_read_data_dcc_64;
+       dpm->instr_read_data_r0 = dpmv8_instr_read_data_r0;
+       dpm->instr_read_data_r0_64 = dpmv8_instr_read_data_r0_64;
+
+       dpm->arm_reg_current = armv8_reg_current;
+
+/*     dpm->bpwp_enable = dpmv8_bpwp_enable; */
+       dpm->bpwp_disable = dpmv8_bpwp_disable;
+
        /* breakpoint setup -- optional until it works everywhere */
        if (!target->type->add_breakpoint) {
                target->type->add_breakpoint = dpmv8_add_breakpoint;
@@ -1068,16 +1438,18 @@ int armv8_dpm_setup(struct arm_dpm *dpm)
        }
 
        /* watchpoint setup */
-       target->type->add_watchpoint = dpmv8_add_watchpoint;
-       target->type->remove_watchpoint = dpmv8_remove_watchpoint;
+       if (!target->type->add_watchpoint) {
+               target->type->add_watchpoint = dpmv8_add_watchpoint;
+               target->type->remove_watchpoint = dpmv8_remove_watchpoint;
+       }
 
        /* FIXME add vector catch support */
 
        dpm->nbp = 1 + ((dpm->didr >> 12) & 0xf);
-       dpm->dbp = calloc(dpm->nbp, sizeof *dpm->dbp);
+       dpm->dbp = calloc(dpm->nbp, sizeof(*dpm->dbp));
 
        dpm->nwp = 1 + ((dpm->didr >> 20) & 0xf);
-       dpm->dwp = calloc(dpm->nwp, sizeof *dpm->dwp);
+       dpm->dwp = calloc(dpm->nwp, sizeof(*dpm->dwp));
 
        if (!dpm->dbp || !dpm->dwp) {
                free(dpm->dbp);