target: remove handle_profile_command memory leak
[fw/openocd] / src / target / target.c
index ab35d6abe1903663958f39b19e966c0f69cb7ceb..b868383d1bed017654b2dfcceb232060293c0fe4 100644 (file)
@@ -72,6 +72,8 @@ static int target_get_gdb_fileio_info_default(struct target *target,
                struct gdb_fileio_info *fileio_info);
 static int target_gdb_fileio_end_default(struct target *target, int retcode,
                int fileio_errno, bool ctrl_c);
+static int target_profiling_default(struct target *target, uint32_t *samples,
+               uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds);
 
 /* targets */
 extern struct target_type arm7tdmi_target;
@@ -85,7 +87,7 @@ extern struct target_type fa526_target;
 extern struct target_type feroceon_target;
 extern struct target_type dragonite_target;
 extern struct target_type xscale_target;
-extern struct target_type cortexm3_target;
+extern struct target_type cortexm_target;
 extern struct target_type cortexa8_target;
 extern struct target_type cortexr4_target;
 extern struct target_type arm11_target;
@@ -99,6 +101,8 @@ extern struct target_type hla_target;
 extern struct target_type nds32_v2_target;
 extern struct target_type nds32_v3_target;
 extern struct target_type nds32_v3m_target;
+extern struct target_type or1k_target;
+extern struct target_type quark_x10xx_target;
 
 static struct target_type *target_types[] = {
        &arm7tdmi_target,
@@ -112,7 +116,7 @@ static struct target_type *target_types[] = {
        &feroceon_target,
        &dragonite_target,
        &xscale_target,
-       &cortexm3_target,
+       &cortexm_target,
        &cortexa8_target,
        &cortexr4_target,
        &arm11_target,
@@ -126,6 +130,8 @@ static struct target_type *target_types[] = {
        &nds32_v2_target,
        &nds32_v3_target,
        &nds32_v3m_target,
+       &or1k_target,
+       &quark_x10xx_target,
        NULL,
 };
 
@@ -290,6 +296,15 @@ static int new_target_number(void)
        return x + 1;
 }
 
+/* read a uint64_t from a buffer in target memory endianness */
+uint64_t target_buffer_get_u64(struct target *target, const uint8_t *buffer)
+{
+       if (target->endianness == TARGET_LITTLE_ENDIAN)
+               return le_to_h_u64(buffer);
+       else
+               return be_to_h_u64(buffer);
+}
+
 /* read a uint32_t from a buffer in target memory endianness */
 uint32_t target_buffer_get_u32(struct target *target, const uint8_t *buffer)
 {
@@ -323,6 +338,15 @@ static uint8_t target_buffer_get_u8(struct target *target, const uint8_t *buffer
        return *buffer & 0x0ff;
 }
 
+/* write a uint64_t to a buffer in target memory endianness */
+void target_buffer_set_u64(struct target *target, uint8_t *buffer, uint64_t value)
+{
+       if (target->endianness == TARGET_LITTLE_ENDIAN)
+               h_u64_to_le(buffer, value);
+       else
+               h_u64_to_be(buffer, value);
+}
+
 /* write a uint32_t to a buffer in target memory endianness */
 void target_buffer_set_u32(struct target *target, uint8_t *buffer, uint32_t value)
 {
@@ -356,6 +380,14 @@ static void target_buffer_set_u8(struct target *target, uint8_t *buffer, uint8_t
        *buffer = value;
 }
 
+/* write a uint64_t array to a buffer in target memory endianness */
+void target_buffer_get_u64_array(struct target *target, const uint8_t *buffer, uint32_t count, uint64_t *dstbuf)
+{
+       uint32_t i;
+       for (i = 0; i < count; i++)
+               dstbuf[i] = target_buffer_get_u64(target, &buffer[i * 8]);
+}
+
 /* write a uint32_t array to a buffer in target memory endianness */
 void target_buffer_get_u32_array(struct target *target, const uint8_t *buffer, uint32_t count, uint32_t *dstbuf)
 {
@@ -372,8 +404,16 @@ void target_buffer_get_u16_array(struct target *target, const uint8_t *buffer, u
                dstbuf[i] = target_buffer_get_u16(target, &buffer[i * 2]);
 }
 
+/* write a uint64_t array to a buffer in target memory endianness */
+void target_buffer_set_u64_array(struct target *target, uint8_t *buffer, uint32_t count, const uint64_t *srcbuf)
+{
+       uint32_t i;
+       for (i = 0; i < count; i++)
+               target_buffer_set_u64(target, &buffer[i * 8], srcbuf[i]);
+}
+
 /* write a uint32_t array to a buffer in target memory endianness */
-void target_buffer_set_u32_array(struct target *target, uint8_t *buffer, uint32_t count, uint32_t *srcbuf)
+void target_buffer_set_u32_array(struct target *target, uint8_t *buffer, uint32_t count, const uint32_t *srcbuf)
 {
        uint32_t i;
        for (i = 0; i < count; i++)
@@ -381,7 +421,7 @@ void target_buffer_set_u32_array(struct target *target, uint8_t *buffer, uint32_
 }
 
 /* write a uint16_t array to a buffer in target memory endianness */
-void target_buffer_set_u16_array(struct target *target, uint8_t *buffer, uint32_t count, uint16_t *srcbuf)
+void target_buffer_set_u16_array(struct target *target, uint8_t *buffer, uint32_t count, const uint16_t *srcbuf)
 {
        uint32_t i;
        for (i = 0; i < count; i++)
@@ -813,7 +853,7 @@ done:
  */
 
 int target_run_flash_async_algorithm(struct target *target,
-               uint8_t *buffer, uint32_t count, int block_size,
+               const uint8_t *buffer, uint32_t count, int block_size,
                int num_mem_params, struct mem_param *mem_params,
                int num_reg_params, struct reg_param *reg_params,
                uint32_t buffer_start, uint32_t buffer_size,
@@ -1084,6 +1124,17 @@ int target_gdb_fileio_end(struct target *target, int retcode, int fileio_errno,
        return target->type->gdb_fileio_end(target, retcode, fileio_errno, ctrl_c);
 }
 
+int target_profiling(struct target *target, uint32_t *samples,
+                       uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds)
+{
+       if (target->state != TARGET_HALTED) {
+               LOG_WARNING("target %s is not halted", target->cmd_name);
+               return ERROR_TARGET_NOT_HALTED;
+       }
+       return target->type->profiling(target, samples, max_num_samples,
+                       num_samples, seconds);
+}
+
 /**
  * Reset the @c examined flag for the given target.
  * Pure paranoia -- targets are zeroed on allocation.
@@ -1173,6 +1224,9 @@ static int target_init_one(struct command_context *cmd_ctx,
        if (target->type->gdb_fileio_end == NULL)
                target->type->gdb_fileio_end = target_gdb_fileio_end_default;
 
+       if (target->type->profiling == NULL)
+               target->type->profiling = target_profiling_default;
+
        return ERROR_OK;
 }
 
@@ -1307,7 +1361,7 @@ int target_unregister_event_callback(int (*callback)(struct target *target,
        return ERROR_OK;
 }
 
-static int target_unregister_timer_callback(int (*callback)(void *priv), void *priv)
+int target_unregister_timer_callback(int (*callback)(void *priv), void *priv)
 {
        struct target_timer_callback **p = &target_timer_callbacks;
        struct target_timer_callback *c = target_timer_callbacks;
@@ -1738,6 +1792,55 @@ static int target_gdb_fileio_end_default(struct target *target,
        return ERROR_OK;
 }
 
+static int target_profiling_default(struct target *target, uint32_t *samples,
+               uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds)
+{
+       struct timeval timeout, now;
+
+       gettimeofday(&timeout, NULL);
+       timeval_add_time(&timeout, seconds, 0);
+
+       LOG_INFO("Starting profiling. Halting and resuming the"
+                       " target as often as we can...");
+
+       uint32_t sample_count = 0;
+       /* hopefully it is safe to cache! We want to stop/restart as quickly as possible. */
+       struct reg *reg = register_get_by_name(target->reg_cache, "pc", 1);
+
+       int retval = ERROR_OK;
+       for (;;) {
+               target_poll(target);
+               if (target->state == TARGET_HALTED) {
+                       uint32_t t = *((uint32_t *)reg->value);
+                       samples[sample_count++] = t;
+                       /* current pc, addr = 0, do not handle breakpoints, not debugging */
+                       retval = target_resume(target, 1, 0, 0, 0);
+                       target_poll(target);
+                       alive_sleep(10); /* sleep 10ms, i.e. <100 samples/second. */
+               } else if (target->state == TARGET_RUNNING) {
+                       /* We want to quickly sample the PC. */
+                       retval = target_halt(target);
+               } else {
+                       LOG_INFO("Target not halted or running");
+                       retval = ERROR_OK;
+                       break;
+               }
+
+               if (retval != ERROR_OK)
+                       break;
+
+               gettimeofday(&now, NULL);
+               if ((sample_count >= max_num_samples) ||
+                       ((now.tv_sec >= timeout.tv_sec) && (now.tv_usec >= timeout.tv_usec))) {
+                       LOG_INFO("Profiling completed. %" PRIu32 " samples.", sample_count);
+                       break;
+               }
+       }
+
+       *num_samples = sample_count;
+       return retval;
+}
+
 /* Single aligned words are guaranteed to use 16 or 32 bit access
  * mode respectively, otherwise data is handled as quickly as
  * possible
@@ -1916,6 +2019,30 @@ int target_blank_check_memory(struct target *target, uint32_t address, uint32_t
        return retval;
 }
 
+int target_read_u64(struct target *target, uint64_t address, uint64_t *value)
+{
+       uint8_t value_buf[8];
+       if (!target_was_examined(target)) {
+               LOG_ERROR("Target not examined yet");
+               return ERROR_FAIL;
+       }
+
+       int retval = target_read_memory(target, address, 8, 1, value_buf);
+
+       if (retval == ERROR_OK) {
+               *value = target_buffer_get_u64(target, value_buf);
+               LOG_DEBUG("address: 0x%" PRIx64 ", value: 0x%16.16" PRIx64 "",
+                                 address,
+                                 *value);
+       } else {
+               *value = 0x0;
+               LOG_DEBUG("address: 0x%" PRIx64 " failed",
+                                 address);
+       }
+
+       return retval;
+}
+
 int target_read_u32(struct target *target, uint32_t address, uint32_t *value)
 {
        uint8_t value_buf[4];
@@ -1966,12 +2093,13 @@ int target_read_u16(struct target *target, uint32_t address, uint16_t *value)
 
 int target_read_u8(struct target *target, uint32_t address, uint8_t *value)
 {
-       int retval = target_read_memory(target, address, 1, 1, value);
        if (!target_was_examined(target)) {
                LOG_ERROR("Target not examined yet");
                return ERROR_FAIL;
        }
 
+       int retval = target_read_memory(target, address, 1, 1, value);
+
        if (retval == ERROR_OK) {
                LOG_DEBUG("address: 0x%8.8" PRIx32 ", value: 0x%2.2x",
                                  address,
@@ -1985,6 +2113,27 @@ int target_read_u8(struct target *target, uint32_t address, uint8_t *value)
        return retval;
 }
 
+int target_write_u64(struct target *target, uint64_t address, uint64_t value)
+{
+       int retval;
+       uint8_t value_buf[8];
+       if (!target_was_examined(target)) {
+               LOG_ERROR("Target not examined yet");
+               return ERROR_FAIL;
+       }
+
+       LOG_DEBUG("address: 0x%" PRIx64 ", value: 0x%16.16" PRIx64 "",
+                         address,
+                         value);
+
+       target_buffer_set_u64(target, value_buf, value);
+       retval = target_write_memory(target, address, 8, 1, value_buf);
+       if (retval != ERROR_OK)
+               LOG_DEBUG("failed: %i", retval);
+
+       return retval;
+}
+
 int target_write_u32(struct target *target, uint32_t address, uint32_t value)
 {
        int retval;
@@ -2230,6 +2379,10 @@ static int handle_target(void *priv)
        for (struct target *target = all_targets;
                        is_jtag_poll_safe() && target;
                        target = target->next) {
+
+               if (!target_was_examined(target))
+                       continue;
+
                if (!target->tap->enabled)
                        continue;
 
@@ -2679,12 +2832,6 @@ COMMAND_HANDLER(handle_md_command)
 typedef int (*target_write_fn)(struct target *target,
                uint32_t address, uint32_t size, uint32_t count, const uint8_t *buffer);
 
-static int target_write_memory_fast(struct target *target,
-               uint32_t address, uint32_t size, uint32_t count, const uint8_t *buffer)
-{
-       return target_write_buffer(target, address, size * count, buffer);
-}
-
 static int target_fill_mem(struct target *target,
                uint32_t address,
                target_write_fn fn,
@@ -2749,7 +2896,7 @@ COMMAND_HANDLER(handle_mw_command)
                CMD_ARGV++;
                fn = target_write_phys_memory;
        } else
-               fn = target_write_memory_fast;
+               fn = target_write_memory;
        if ((CMD_ARGC < 2) || (CMD_ARGC > 3))
                return ERROR_COMMAND_SYNTAX_ERROR;
 
@@ -3361,8 +3508,11 @@ static void writeString(FILE *f, char *s)
        writeData(f, s, strlen(s));
 }
 
+typedef unsigned char UNIT[2];  /* unit of profiling */
+
 /* Dump a gmon.out histogram file. */
-static void writeGmon(uint32_t *samples, uint32_t sampleNum, const char *filename)
+static void write_gmon(uint32_t *samples, uint32_t sampleNum, const char *filename,
+               bool with_range, uint32_t start_address, uint32_t end_address)
 {
        uint32_t i;
        FILE *f = fopen(filename, "w");
@@ -3378,18 +3528,25 @@ static void writeGmon(uint32_t *samples, uint32_t sampleNum, const char *filenam
        writeData(f, &zero, 1);
 
        /* figure out bucket size */
-       uint32_t min = samples[0];
-       uint32_t max = samples[0];
-       for (i = 0; i < sampleNum; i++) {
-               if (min > samples[i])
-                       min = samples[i];
-               if (max < samples[i])
-                       max = samples[i];
-       }
+       uint32_t min;
+       uint32_t max;
+       if (with_range) {
+               min = start_address;
+               max = end_address;
+       } else {
+               min = samples[0];
+               max = samples[0];
+               for (i = 0; i < sampleNum; i++) {
+                       if (min > samples[i])
+                               min = samples[i];
+                       if (max < samples[i])
+                               max = samples[i];
+               }
 
-       /* max should be (largest sample + 1)
-        * Refer to binutils/gprof/hist.c (find_histogram_for_pc) */
-       max++;
+               /* max should be (largest sample + 1)
+                * Refer to binutils/gprof/hist.c (find_histogram_for_pc) */
+               max++;
+       }
 
        int addressSpace = max - min;
        assert(addressSpace >= 2);
@@ -3397,7 +3554,7 @@ static void writeGmon(uint32_t *samples, uint32_t sampleNum, const char *filenam
        /* FIXME: What is the reasonable number of buckets?
         * The profiling result will be more accurate if there are enough buckets. */
        static const uint32_t maxBuckets = 128 * 1024; /* maximum buckets. */
-       uint32_t numBuckets = addressSpace;
+       uint32_t numBuckets = addressSpace / sizeof(UNIT);
        if (numBuckets > maxBuckets)
                numBuckets = maxBuckets;
        int *buckets = malloc(sizeof(int) * numBuckets);
@@ -3408,6 +3565,10 @@ static void writeGmon(uint32_t *samples, uint32_t sampleNum, const char *filenam
        memset(buckets, 0, sizeof(int) * numBuckets);
        for (i = 0; i < sampleNum; i++) {
                uint32_t address = samples[i];
+
+               if ((address < min) || (max <= address))
+                       continue;
+
                long long a = address - min;
                long long b = numBuckets;
                long long c = addressSpace;
@@ -3451,84 +3612,70 @@ static void writeGmon(uint32_t *samples, uint32_t sampleNum, const char *filenam
 COMMAND_HANDLER(handle_profile_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       struct timeval timeout, now;
 
-       gettimeofday(&timeout, NULL);
-       if (CMD_ARGC != 2)
+       if ((CMD_ARGC != 2) && (CMD_ARGC != 4))
                return ERROR_COMMAND_SYNTAX_ERROR;
-       unsigned offset;
-       COMMAND_PARSE_NUMBER(uint, CMD_ARGV[0], offset);
 
-       timeval_add_time(&timeout, offset, 0);
+       const uint32_t MAX_PROFILE_SAMPLE_NUM = 10000;
+       uint32_t offset;
+       uint32_t num_of_sampels;
+       int retval = ERROR_OK;
+
+       COMMAND_PARSE_NUMBER(u32, CMD_ARGV[0], offset);
+
+       uint32_t *samples = malloc(sizeof(uint32_t) * MAX_PROFILE_SAMPLE_NUM);
+       if (samples == NULL) {
+               LOG_ERROR("No memory to store samples.");
+               return ERROR_FAIL;
+       }
 
        /**
-        * @todo: Some cores let us sample the PC without the
+        * Some cores let us sample the PC without the
         * annoying halt/resume step; for example, ARMv7 PCSR.
         * Provide a way to use that more efficient mechanism.
         */
+       retval = target_profiling(target, samples, MAX_PROFILE_SAMPLE_NUM,
+                               &num_of_sampels, offset);
+       if (retval != ERROR_OK) {
+               free(samples);
+               return retval;
+       }
 
-       command_print(CMD_CTX, "Starting profiling. Halting and resuming the target as often as we can...");
-
-       static const int maxSample = 10000;
-       uint32_t *samples = malloc(sizeof(uint32_t)*maxSample);
-       if (samples == NULL)
-               return ERROR_OK;
-
-       int numSamples = 0;
-       /* hopefully it is safe to cache! We want to stop/restart as quickly as possible. */
-       struct reg *reg = register_get_by_name(target->reg_cache, "pc", 1);
+       assert(num_of_sampels <= MAX_PROFILE_SAMPLE_NUM);
 
-       int retval = ERROR_OK;
-       for (;;) {
-               target_poll(target);
-               if (target->state == TARGET_HALTED) {
-                       uint32_t t = *((uint32_t *)reg->value);
-                       samples[numSamples++] = t;
-                       /* current pc, addr = 0, do not handle breakpoints, not debugging */
-                       retval = target_resume(target, 1, 0, 0, 0);
-                       target_poll(target);
-                       alive_sleep(10); /* sleep 10ms, i.e. <100 samples/second. */
-               } else if (target->state == TARGET_RUNNING) {
-                       /* We want to quickly sample the PC. */
-                       retval = target_halt(target);
-                       if (retval != ERROR_OK) {
-                               free(samples);
-                               return retval;
-                       }
-               } else {
-                       command_print(CMD_CTX, "Target not halted or running");
-                       retval = ERROR_OK;
-                       break;
+       retval = target_poll(target);
+       if (retval != ERROR_OK) {
+               free(samples);
+               return retval;
+       }
+       if (target->state == TARGET_RUNNING) {
+               retval = target_halt(target);
+               if (retval != ERROR_OK) {
+                       free(samples);
+                       return retval;
                }
-               if (retval != ERROR_OK)
-                       break;
+       }
 
-               gettimeofday(&now, NULL);
-               if ((numSamples >= maxSample) || ((now.tv_sec >= timeout.tv_sec)
-                               && (now.tv_usec >= timeout.tv_usec))) {
-                       command_print(CMD_CTX, "Profiling completed. %d samples.", numSamples);
-                       retval = target_poll(target);
-                       if (retval != ERROR_OK) {
-                               free(samples);
-                               return retval;
-                       }
-                       if (target->state == TARGET_HALTED) {
-                               /* current pc, addr = 0, do not handle
-                                * breakpoints, not debugging */
-                               target_resume(target, 1, 0, 0, 0);
-                       }
-                       retval = target_poll(target);
-                       if (retval != ERROR_OK) {
-                               free(samples);
-                               return retval;
-                       }
-                       writeGmon(samples, numSamples, CMD_ARGV[1]);
-                       command_print(CMD_CTX, "Wrote %s", CMD_ARGV[1]);
-                       break;
-               }
+       retval = target_poll(target);
+       if (retval != ERROR_OK) {
+               free(samples);
+               return retval;
        }
-       free(samples);
 
+       uint32_t start_address = 0;
+       uint32_t end_address = 0;
+       bool with_range = false;
+       if (CMD_ARGC == 4) {
+               with_range = true;
+               COMMAND_PARSE_NUMBER(u32, CMD_ARGV[2], start_address);
+               COMMAND_PARSE_NUMBER(u32, CMD_ARGV[3], end_address);
+       }
+
+       write_gmon(samples, num_of_sampels, CMD_ARGV[1],
+                       with_range, start_address, end_address);
+       command_print(CMD_CTX, "Wrote %s", CMD_ARGV[1]);
+
+       free(samples);
        return retval;
 }
 
@@ -3706,6 +3853,7 @@ static int target_mem2array(Jim_Interp *interp, struct target *target, int argc,
                                new_int_array_element(interp, varname, n, v);
                        }
                        len -= count;
+                       addr += count * width;
                }
        }
 
@@ -3899,6 +4047,7 @@ static int target_array2mem(Jim_Interp *interp, struct target *target,
                        e = JIM_ERR;
                        break;
                }
+               addr += count * width;
        }
 
        free(buffer);
@@ -4199,11 +4348,10 @@ no_params:
                                                                                   n->name);
                                        return JIM_ERR;
                                }
-                               if (target->variant)
-                                       free((void *)(target->variant));
                                e = Jim_GetOpt_String(goi, &cp, NULL);
                                if (e != JIM_OK)
                                        return e;
+                               free(target->variant);
                                target->variant = strdup(cp);
                        } else {
                                if (goi->argc != 0)
@@ -4311,7 +4459,7 @@ static int jim_target_mw(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
        }
 
        target_write_fn fn;
-       fn = target_write_memory_fast;
+       fn = target_write_memory;
 
        int e;
        if (strcmp(Jim_GetString(argv[1], NULL), "phys") == 0) {
@@ -5378,7 +5526,7 @@ COMMAND_HANDLER(handle_fast_load_image_command)
                        fastload[i].data = malloc(length);
                        if (fastload[i].data == NULL) {
                                free(buffer);
-                               command_print(CMD_CTX, "error allocating buffer for section (%d bytes)",
+                               command_print(CMD_CTX, "error allocating buffer for section (%" PRIu32 " bytes)",
                                                          length);
                                retval = ERROR_FAIL;
                                break;
@@ -5501,6 +5649,198 @@ COMMAND_HANDLER(handle_ps_command)
        }
 }
 
+static void binprint(struct command_context *cmd_ctx, const char *text, const uint8_t *buf, int size)
+{
+       if (text != NULL)
+               command_print_sameline(cmd_ctx, "%s", text);
+       for (int i = 0; i < size; i++)
+               command_print_sameline(cmd_ctx, " %02x", buf[i]);
+       command_print(cmd_ctx, " ");
+}
+
+COMMAND_HANDLER(handle_test_mem_access_command)
+{
+       struct target *target = get_current_target(CMD_CTX);
+       uint32_t test_size;
+       int retval = ERROR_OK;
+
+       if (target->state != TARGET_HALTED) {
+               LOG_INFO("target not halted !!");
+               return ERROR_FAIL;
+       }
+
+       if (CMD_ARGC != 1)
+               return ERROR_COMMAND_SYNTAX_ERROR;
+
+       COMMAND_PARSE_NUMBER(u32, CMD_ARGV[0], test_size);
+
+       /* Test reads */
+       size_t num_bytes = test_size + 4;
+
+       struct working_area *wa = NULL;
+       retval = target_alloc_working_area(target, num_bytes, &wa);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("Not enough working area");
+               return ERROR_FAIL;
+       }
+
+       uint8_t *test_pattern = malloc(num_bytes);
+
+       for (size_t i = 0; i < num_bytes; i++)
+               test_pattern[i] = rand();
+
+       retval = target_write_memory(target, wa->address, 1, num_bytes, test_pattern);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("Test pattern write failed");
+               goto out;
+       }
+
+       for (int host_offset = 0; host_offset <= 1; host_offset++) {
+               for (int size = 1; size <= 4; size *= 2) {
+                       for (int offset = 0; offset < 4; offset++) {
+                               uint32_t count = test_size / size;
+                               size_t host_bufsiz = (count + 2) * size + host_offset;
+                               uint8_t *read_ref = malloc(host_bufsiz);
+                               uint8_t *read_buf = malloc(host_bufsiz);
+
+                               for (size_t i = 0; i < host_bufsiz; i++) {
+                                       read_ref[i] = rand();
+                                       read_buf[i] = read_ref[i];
+                               }
+                               command_print_sameline(CMD_CTX,
+                                               "Test read %d x %d @ %d to %saligned buffer: ", count,
+                                               size, offset, host_offset ? "un" : "");
+
+                               struct duration bench;
+                               duration_start(&bench);
+
+                               retval = target_read_memory(target, wa->address + offset, size, count,
+                                               read_buf + size + host_offset);
+
+                               duration_measure(&bench);
+
+                               if (retval == ERROR_TARGET_UNALIGNED_ACCESS) {
+                                       command_print(CMD_CTX, "Unsupported alignment");
+                                       goto next;
+                               } else if (retval != ERROR_OK) {
+                                       command_print(CMD_CTX, "Memory read failed");
+                                       goto next;
+                               }
+
+                               /* replay on host */
+                               memcpy(read_ref + size + host_offset, test_pattern + offset, count * size);
+
+                               /* check result */
+                               int result = memcmp(read_ref, read_buf, host_bufsiz);
+                               if (result == 0) {
+                                       command_print(CMD_CTX, "Pass in %fs (%0.3f KiB/s)",
+                                                       duration_elapsed(&bench),
+                                                       duration_kbps(&bench, count * size));
+                               } else {
+                                       command_print(CMD_CTX, "Compare failed");
+                                       binprint(CMD_CTX, "ref:", read_ref, host_bufsiz);
+                                       binprint(CMD_CTX, "buf:", read_buf, host_bufsiz);
+                               }
+next:
+                               free(read_ref);
+                               free(read_buf);
+                       }
+               }
+       }
+
+out:
+       free(test_pattern);
+
+       if (wa != NULL)
+               target_free_working_area(target, wa);
+
+       /* Test writes */
+       num_bytes = test_size + 4 + 4 + 4;
+
+       retval = target_alloc_working_area(target, num_bytes, &wa);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("Not enough working area");
+               return ERROR_FAIL;
+       }
+
+       test_pattern = malloc(num_bytes);
+
+       for (size_t i = 0; i < num_bytes; i++)
+               test_pattern[i] = rand();
+
+       for (int host_offset = 0; host_offset <= 1; host_offset++) {
+               for (int size = 1; size <= 4; size *= 2) {
+                       for (int offset = 0; offset < 4; offset++) {
+                               uint32_t count = test_size / size;
+                               size_t host_bufsiz = count * size + host_offset;
+                               uint8_t *read_ref = malloc(num_bytes);
+                               uint8_t *read_buf = malloc(num_bytes);
+                               uint8_t *write_buf = malloc(host_bufsiz);
+
+                               for (size_t i = 0; i < host_bufsiz; i++)
+                                       write_buf[i] = rand();
+                               command_print_sameline(CMD_CTX,
+                                               "Test write %d x %d @ %d from %saligned buffer: ", count,
+                                               size, offset, host_offset ? "un" : "");
+
+                               retval = target_write_memory(target, wa->address, 1, num_bytes, test_pattern);
+                               if (retval != ERROR_OK) {
+                                       command_print(CMD_CTX, "Test pattern write failed");
+                                       goto nextw;
+                               }
+
+                               /* replay on host */
+                               memcpy(read_ref, test_pattern, num_bytes);
+                               memcpy(read_ref + size + offset, write_buf + host_offset, count * size);
+
+                               struct duration bench;
+                               duration_start(&bench);
+
+                               retval = target_write_memory(target, wa->address + size + offset, size, count,
+                                               write_buf + host_offset);
+
+                               duration_measure(&bench);
+
+                               if (retval == ERROR_TARGET_UNALIGNED_ACCESS) {
+                                       command_print(CMD_CTX, "Unsupported alignment");
+                                       goto nextw;
+                               } else if (retval != ERROR_OK) {
+                                       command_print(CMD_CTX, "Memory write failed");
+                                       goto nextw;
+                               }
+
+                               /* read back */
+                               retval = target_read_memory(target, wa->address, 1, num_bytes, read_buf);
+                               if (retval != ERROR_OK) {
+                                       command_print(CMD_CTX, "Test pattern write failed");
+                                       goto nextw;
+                               }
+
+                               /* check result */
+                               int result = memcmp(read_ref, read_buf, num_bytes);
+                               if (result == 0) {
+                                       command_print(CMD_CTX, "Pass in %fs (%0.3f KiB/s)",
+                                                       duration_elapsed(&bench),
+                                                       duration_kbps(&bench, count * size));
+                               } else {
+                                       command_print(CMD_CTX, "Compare failed");
+                                       binprint(CMD_CTX, "ref:", read_ref, num_bytes);
+                                       binprint(CMD_CTX, "buf:", read_buf, num_bytes);
+                               }
+nextw:
+                               free(read_ref);
+                               free(read_buf);
+                       }
+               }
+       }
+
+       free(test_pattern);
+
+       if (wa != NULL)
+               target_free_working_area(target, wa);
+       return retval;
+}
+
 static const struct command_registration target_exec_command_handlers[] = {
        {
                .name = "fast_load_image",
@@ -5523,7 +5863,7 @@ static const struct command_registration target_exec_command_handlers[] = {
                .name = "profile",
                .handler = handle_profile_command,
                .mode = COMMAND_EXEC,
-               .usage = "seconds filename",
+               .usage = "seconds filename [start end]",
                .help = "profiling samples the CPU PC",
        },
        /** @todo don't register virt2phys() unless target supports it */
@@ -5538,9 +5878,9 @@ static const struct command_registration target_exec_command_handlers[] = {
                .name = "reg",
                .handler = handle_reg_command,
                .mode = COMMAND_EXEC,
-               .help = "display or set a register; with no arguments, "
-                       "displays all registers and their values",
-               .usage = "[(register_name|register_number) [value]]",
+               .help = "display (reread from target with \"force\") or set a register; "
+                       "with no arguments, displays all registers and their values",
+               .usage = "[(register_number|register_name) [(value|'force')]]",
        },
        {
                .name = "poll",
@@ -5720,6 +6060,13 @@ static const struct command_registration target_exec_command_handlers[] = {
                .help = "list all tasks ",
                .usage = " ",
        },
+       {
+               .name = "test_mem_access",
+               .handler = handle_test_mem_access_command,
+               .mode = COMMAND_EXEC,
+               .help = "Test the target's memory access functions",
+               .usage = "size",
+       },
 
        COMMAND_REGISTRATION_DONE
 };