zy1000: clean up jtag_add_xx_scan fn's
[fw/openocd] / src / jtag / zy1000 / zy1000.c
index 33947332bb563a5dfd1fda94e0f0c97d320c74c3..6f75e59bcc76e0c21f31b9525b577c06fbfcc06c 100644 (file)
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2007-2008 by Øyvind Harboe                              *
+ *   Copyright (C) 2007-2010 by Øyvind Harboe                              *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
+
+/* This file supports the zy1000 debugger: http://www.zylin.com/zy1000.html
+ *
+ * The zy1000 is a standalone debugger that has a web interface and
+ * requires no drivers on the developer host as all communication
+ * is via TCP/IP. The zy1000 gets it performance(~400-700kBytes/s
+ * DCC downloads @ 16MHz target) as it has an FPGA to hardware
+ * accelerate the JTAG commands, while offering *very* low latency
+ * between OpenOCD and the FPGA registers.
+ *
+ * The disadvantage of the zy1000 is that it has a feeble CPU compared to
+ * a PC(ca. 50-500 DMIPS depending on how one counts it), whereas a PC
+ * is on the order of 10000 DMIPS(i.e. at a factor of 20-200).
+ *
+ * The zy1000 revc hardware is using an Altera Nios CPU, whereas the
+ * revb is using ARM7 + Xilinx.
+ *
+ * See Zylin web pages or contact Zylin for more information.
+ *
+ * The reason this code is in OpenOCD rather than OpenOCD linked with the
+ * ZY1000 code is that OpenOCD is the long road towards getting
+ * libopenocd into place. libopenocd will support both low performance,
+ * low latency systems(embedded) and high performance high latency
+ * systems(PCs).
+ */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-#include "embeddedice.h"
-#include "minidriver.h"
-#include "interface.h"
+#include <target/embeddedice.h>
+#include <jtag/minidriver.h>
+#include <jtag/interface.h>
+#include "zy1000_version.h"
 
 #include <cyg/hal/hal_io.h>             // low level i/o
 #include <cyg/hal/hal_diag.h>
 
+#include <time.h>
+
+#ifdef CYGPKG_HAL_NIOS2
+#include <cyg/hal/io.h>
+#include <cyg/firmwareutil/firmwareutil.h>
+#endif
 
-#define ZYLIN_VERSION "1.52"
+#define ZYLIN_VERSION GIT_ZY1000_VERSION
 #define ZYLIN_DATE __DATE__
 #define ZYLIN_TIME __TIME__
-#define ZYLIN_OPENOCD "$Revision$"
-#define ZYLIN_OPENOCD_VERSION "Zylin JTAG ZY1000 " ZYLIN_VERSION " " ZYLIN_DATE " " ZYLIN_TIME
-
-/* low level command set
- */
-void zy1000_reset(int trst, int srst);
-
-
-int zy1000_speed(int speed);
-int zy1000_register_commands(struct command_context_s *cmd_ctx);
-int zy1000_init(void);
-int zy1000_quit(void);
+#define ZYLIN_OPENOCD GIT_OPENOCD_VERSION
+#define ZYLIN_OPENOCD_VERSION "ZY1000 " ZYLIN_VERSION " " ZYLIN_DATE
 
-/* interface commands */
-int zy1000_handle_zy1000_port_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 
 static int zy1000_khz(int khz, int *jtag_speed)
 {
@@ -78,8 +98,8 @@ static bool readPowerDropout(void)
 {
        cyg_uint32 state;
        // sample and clear power dropout
-       HAL_WRITE_UINT32(ZY1000_JTAG_BASE + 0x10, 0x80);
-       HAL_READ_UINT32(ZY1000_JTAG_BASE + 0x10, state);
+       ZY1000_POKE(ZY1000_JTAG_BASE + 0x10, 0x80);
+       ZY1000_PEEK(ZY1000_JTAG_BASE + 0x10, state);
        bool powerDropout;
        powerDropout = (state & 0x80) != 0;
        return powerDropout;
@@ -90,8 +110,8 @@ static bool readSRST(void)
 {
        cyg_uint32 state;
        // sample and clear SRST sensing
-       HAL_WRITE_UINT32(ZY1000_JTAG_BASE + 0x10, 0x00000040);
-       HAL_READ_UINT32(ZY1000_JTAG_BASE + 0x10, state);
+       ZY1000_POKE(ZY1000_JTAG_BASE + 0x10, 0x00000040);
+       ZY1000_PEEK(ZY1000_JTAG_BASE + 0x10, state);
        bool srstAsserted;
        srstAsserted = (state & 0x40) != 0;
        return srstAsserted;
@@ -109,24 +129,16 @@ static int zy1000_power_dropout(int *dropout)
        return ERROR_OK;
 }
 
-
-jtag_interface_t zy1000_interface =
-{
-       .name = "ZY1000",
-       .execute_queue = NULL,
-       .speed = zy1000_speed,
-       .register_commands = zy1000_register_commands,
-       .init = zy1000_init,
-       .quit = zy1000_quit,
-       .khz = zy1000_khz,
-       .speed_div = zy1000_speed_div,
-       .power_dropout = zy1000_power_dropout,
-       .srst_asserted = zy1000_srst_asserted,
-};
-
 void zy1000_reset(int trst, int srst)
 {
        LOG_DEBUG("zy1000 trst=%d, srst=%d", trst, srst);
+
+       /* flush the JTAG FIFO. Not flushing the queue before messing with
+        * reset has such interesting bugs as causing hard to reproduce
+        * RCLK bugs as RCLK will stop responding when TRST is asserted
+        */
+       waitIdle();
+
        if (!srst)
        {
                ZY1000_POKE(ZY1000_JTAG_BASE + 0x14, 0x00000001);
@@ -151,7 +163,6 @@ void zy1000_reset(int trst, int srst)
 
        if (trst||(srst && (jtag_get_reset_config() & RESET_SRST_PULLS_TRST)))
        {
-               waitIdle();
                /* we're now in the RESET state until trst is deasserted */
                ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_RESET);
        } else
@@ -188,6 +199,9 @@ void zy1000_reset(int trst, int srst)
 
 int zy1000_speed(int speed)
 {
+       /* flush JTAG master FIFO before setting speed */
+       waitIdle();
+
        if (speed == 0)
        {
                /*0 means RCLK*/
@@ -218,38 +232,30 @@ static void setPower(bool power)
        savePower = power;
        if (power)
        {
-               HAL_WRITE_UINT32(ZY1000_JTAG_BASE + 0x14, 0x8);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x14, 0x8);
        } else
        {
-               HAL_WRITE_UINT32(ZY1000_JTAG_BASE + 0x10, 0x8);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x10, 0x8);
        }
 }
 
-int handle_power_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
+COMMAND_HANDLER(handle_power_command)
 {
-       if (argc > 1)
+       switch (CMD_ARGC)
        {
-               return ERROR_INVALID_ARGUMENTS;
+       case 1: {
+               bool enable;
+               COMMAND_PARSE_ON_OFF(CMD_ARGV[0], enable);
+               setPower(enable);
+               // fall through
        }
-
-       if (argc == 1)
-       {
-               if (strcmp(args[0], "on") == 0)
-               {
-                       setPower(1);
-               }
-               else if (strcmp(args[0], "off") == 0)
-               {
-                       setPower(0);
-               } else
-               {
-                       command_print(cmd_ctx, "arg is \"on\" or \"off\"");
-                       return ERROR_INVALID_ARGUMENTS;
-               }
+       case 0:
+               LOG_INFO("Target power %s", savePower ? "on" : "off");
+               break;
+       default:
+               return ERROR_INVALID_ARGUMENTS;
        }
 
-       command_print(cmd_ctx, "Target power %s", savePower ? "on" : "off");
-
        return ERROR_OK;
 }
 
@@ -257,9 +263,8 @@ int handle_power_command(struct command_context_s *cmd_ctx, char *cmd, char **ar
 /* Give TELNET a way to find out what version this is */
 static int jim_zy1000_version(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
 {
-       if ((argc < 1) || (argc > 2))
+       if ((argc < 1) || (argc > 3))
                return JIM_ERR;
-       char buff[128];
        const char *version_str = NULL;
 
        if (argc == 1)
@@ -268,12 +273,12 @@ static int jim_zy1000_version(Jim_Interp *interp, int argc, Jim_Obj *const *argv
        } else
        {
                const char *str = Jim_GetString(argv[1], NULL);
+               const char *str2 = NULL;
+               if (argc > 2)
+                       str2 = Jim_GetString(argv[2], NULL);
                if (strcmp("openocd", str) == 0)
                {
-                       int revision;
-                       revision = atol(ZYLIN_OPENOCD + strlen("XRevision: "));
-                       sprintf(buff, "%d", revision);
-                       version_str = buff;
+                       version_str = ZYLIN_OPENOCD;
                }
                else if (strcmp("zy1000", str) == 0)
                {
@@ -283,6 +288,41 @@ static int jim_zy1000_version(Jim_Interp *interp, int argc, Jim_Obj *const *argv
                {
                        version_str = ZYLIN_DATE;
                }
+               else if (strcmp("time", str) == 0)
+               {
+                       version_str = ZYLIN_TIME;
+               }
+               else if (strcmp("pcb", str) == 0)
+               {
+#ifdef CYGPKG_HAL_NIOS2
+                       version_str="c";
+#else
+                       version_str="b";
+#endif
+               }
+#ifdef CYGPKG_HAL_NIOS2
+               else if (strcmp("fpga", str) == 0)
+               {
+
+                       /* return a list of 32 bit integers to describe the expected
+                        * and actual FPGA
+                        */
+                       static char *fpga_id = "0x12345678 0x12345678 0x12345678 0x12345678";
+                       cyg_uint32 id, timestamp;
+                       HAL_READ_UINT32(SYSID_BASE, id);
+                       HAL_READ_UINT32(SYSID_BASE+4, timestamp);
+                       sprintf(fpga_id, "0x%08x 0x%08x 0x%08x 0x%08x", id, timestamp, SYSID_ID, SYSID_TIMESTAMP);
+                       version_str = fpga_id;
+                       if ((argc>2) && (strcmp("time", str2) == 0))
+                       {
+                           time_t last_mod = timestamp;
+                           char * t = ctime (&last_mod) ;
+                           t[strlen(t)-1] = 0;
+                           version_str = t;
+                       }
+               }
+#endif
+
                else
                {
                        return JIM_ERR;
@@ -295,6 +335,61 @@ static int jim_zy1000_version(Jim_Interp *interp, int argc, Jim_Obj *const *argv
 }
 
 
+#ifdef CYGPKG_HAL_NIOS2
+
+
+struct info_forward
+{
+       void *data;
+       struct cyg_upgrade_info *upgraded_file;
+};
+
+static void report_info(void *data, const char * format, va_list args)
+{
+       char *s = alloc_vprintf(format, args);
+       LOG_USER_N("%s", s);
+       free(s);
+}
+
+struct cyg_upgrade_info firmware_info =
+{
+               (cyg_uint8 *)0x84000000,
+               "/ram/firmware.phi",
+               "Firmware",
+               0x0300000,
+               0x1f00000 -
+               0x0300000,
+               "ZylinNiosFirmware\n",
+               report_info,
+};
+
+static int jim_zy1000_writefirmware(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
+{
+       if (argc != 2)
+               return JIM_ERR;
+
+       int length;
+       const char *str = Jim_GetString(argv[1], &length);
+
+       /* */
+       int tmpFile;
+       if ((tmpFile = open(firmware_info.file, O_RDWR | O_CREAT | O_TRUNC)) <= 0)
+       {
+               return JIM_ERR;
+       }
+       bool success;
+       success = write(tmpFile, str, length) == length;
+       close(tmpFile);
+       if (!success)
+               return JIM_ERR;
+
+       if (!cyg_firmware_upgrade(NULL, firmware_info))
+               return JIM_ERR;
+
+       return JIM_OK;
+}
+#endif
+
 static int
 zylinjtag_Jim_Command_powerstatus(Jim_Interp *interp,
                                                                   int argc,
@@ -314,19 +409,6 @@ zylinjtag_Jim_Command_powerstatus(Jim_Interp *interp,
        return JIM_OK;
 }
 
-int zy1000_register_commands(struct command_context_s *cmd_ctx)
-{
-       register_command(cmd_ctx, NULL, "power", handle_power_command, COMMAND_ANY,
-                       "power <on/off> - turn power switch to target on/off. No arguments - print status.");
-
-       Jim_CreateCommand(interp, "zy1000_version", jim_zy1000_version, NULL, NULL);
-
-
-       Jim_CreateCommand(interp, "powerstatus", zylinjtag_Jim_Command_powerstatus, NULL, NULL);
-
-       return ERROR_OK;
-}
-
 
 
 
@@ -410,134 +492,103 @@ static void shiftValueInnerFlip(const tap_state_t state, const tap_state_t endSt
 }
 #endif
 
-extern int jtag_check_value(uint8_t *captured, void *priv);
-
-static __inline void scanFields(int num_fields, const scan_field_t *fields, tap_state_t shiftState, tap_state_t end_state)
+// here we shuffle N bits out/in
+static __inline void scanBits(const uint8_t *out_value, uint8_t *in_value, int num_bits, bool pause, tap_state_t shiftState, tap_state_t end_state)
 {
-       int i;
-       int j;
-       int k;
-
-       for (i = 0; i < num_fields; i++)
+       tap_state_t pause_state = shiftState;
+       for (int j = 0; j < num_bits; j += 32)
        {
-               cyg_uint32 value;
-
-               uint8_t *inBuffer = NULL;
-
-
-               // figure out where to store the input data
-               int num_bits = fields[i].num_bits;
-               if (fields[i].in_value != NULL)
+               int k = num_bits - j;
+               if (k > 32)
                {
-                       inBuffer = fields[i].in_value;
+                       k = 32;
+                       /* we have more to shift out */
+               } else if (pause)
+               {
+                       /* this was the last to shift out this time */
+                       pause_state = end_state;
                }
 
-               // here we shuffle N bits out/in
-               j = 0;
-               while (j < num_bits)
+               // we have (num_bits + 7)/8 bytes of bits to toggle out.
+               // bits are pushed out LSB to MSB
+               cyg_uint32 value;
+               value = 0;
+               if (out_value != NULL)
                {
-                       tap_state_t pause_state;
-                       int l;
-                       k = num_bits-j;
-                       pause_state = (shiftState == TAP_DRSHIFT)?TAP_DRSHIFT:TAP_IRSHIFT;
-                       if (k > 32)
+                       for (int l = 0; l < k; l += 8)
                        {
-                               k = 32;
-                               /* we have more to shift out */
-                       } else if (i == num_fields-1)
-                       {
-                               /* this was the last to shift out this time */
-                               pause_state = end_state;
-                       }
-
-                       // we have (num_bits + 7)/8 bytes of bits to toggle out.
-                       // bits are pushed out LSB to MSB
-                       value = 0;
-                       if (fields[i].out_value != NULL)
-                       {
-                               for (l = 0; l < k; l += 8)
-                               {
-                                       value|=fields[i].out_value[(j + l)/8]<<l;
-                               }
+                               value|=out_value[(j + l)/8]<<l;
                        }
-                       /* mask away unused bits for easier debugging */
+               }
+               /* mask away unused bits for easier debugging */
+               if (k < 32)
+               {
                        value&=~(((uint32_t)0xffffffff) << k);
+               } else
+               {
+                       /* Shifting by >= 32 is not defined by the C standard
+                        * and will in fact shift by &0x1f bits on nios */
+               }
+
+               shiftValueInner(shiftState, pause_state, k, value);
 
-                       shiftValueInner(shiftState, pause_state, k, value);
+               if (in_value != NULL)
+               {
+                       // data in, LSB to MSB
+                       value = getShiftValue();
+                       // we're shifting in data to MSB, shift data to be aligned for returning the value
+                       value >>= 32-k;
 
-                       if (inBuffer != NULL)
+                       for (int l = 0; l < k; l += 8)
                        {
-                               // data in, LSB to MSB
-                               value = getShiftValue();
-                               // we're shifting in data to MSB, shift data to be aligned for returning the value
-                               value >>= 32-k;
-
-                               for (l = 0; l < k; l += 8)
-                               {
-                                       inBuffer[(j + l)/8]=(value >> l)&0xff;
-                               }
+                               in_value[(j + l)/8]=(value >> l)&0xff;
                        }
-                       j += k;
                }
        }
 }
 
-int interface_jtag_set_end_state(tap_state_t state)
+static __inline void scanFields(int num_fields, const struct scan_field *fields, tap_state_t shiftState, tap_state_t end_state)
 {
-       return ERROR_OK;
+       for (int i = 0; i < num_fields; i++)
+       {
+               scanBits(fields[i].out_value,
+                               fields[i].in_value,
+                               fields[i].num_bits,
+                               (i == num_fields-1),
+                               shiftState,
+                               end_state);
+       }
 }
 
-
-int interface_jtag_add_ir_scan(int num_fields, const scan_field_t *fields, tap_state_t state)
+int interface_jtag_add_ir_scan(struct jtag_tap *active, const struct scan_field *fields, tap_state_t state)
 {
-
-       int j;
        int scan_size = 0;
-       jtag_tap_t *tap, *nextTap;
+       struct jtag_tap *tap, *nextTap;
+       tap_state_t pause_state = TAP_IRSHIFT;
+
        for (tap = jtag_tap_next_enabled(NULL); tap!= NULL; tap = nextTap)
        {
                nextTap = jtag_tap_next_enabled(tap);
-               tap_state_t end_state;
-               if (nextTap == NULL)
-               {
-                       end_state = state;
-               } else
+               if (nextTap==NULL)
                {
-                       end_state = TAP_IRSHIFT;
+                       pause_state = state;
                }
-
-               int found = 0;
-
                scan_size = tap->ir_length;
 
                /* search the list */
-               for (j = 0; j < num_fields; j++)
+               if (tap == active)
                {
-                       if (tap == fields[j].tap)
-                       {
-                               found = 1;
-
-                               scanFields(1, fields + j, TAP_IRSHIFT, end_state);
-                               /* update device information */
-                               buf_cpy(fields[j].out_value, tap->cur_instr, scan_size);
-
-                               tap->bypass = 0;
-                               break;
-                       }
-               }
+                       scanFields(1, fields, TAP_IRSHIFT, pause_state);
+                       /* update device information */
+                       buf_cpy(fields[0].out_value, tap->cur_instr, scan_size);
 
-               if (!found)
+                       tap->bypass = 0;
+               } else
                {
                        /* if a device isn't listed, set it to BYPASS */
-                       uint8_t ones[]={0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
+                       assert(scan_size <= 32);
+                       shiftValueInner(TAP_IRSHIFT, pause_state, scan_size, 0xffffffff);
 
-                       scan_field_t tmp;
-                       memset(&tmp, 0, sizeof(tmp));
-                       tmp.out_value = ones;
-                       tmp.num_bits = scan_size;
-                       scanFields(1, &tmp, TAP_IRSHIFT, end_state);
-                       /* update device information */
-                       buf_cpy(tmp.out_value, tap->cur_instr, scan_size);
                        tap->bypass = 1;
                }
        }
@@ -549,66 +600,46 @@ int interface_jtag_add_ir_scan(int num_fields, const scan_field_t *fields, tap_s
 
 
 
-int interface_jtag_add_plain_ir_scan(int num_fields, const scan_field_t *fields, tap_state_t state)
+int interface_jtag_add_plain_ir_scan(int num_bits, const uint8_t *out_bits, uint8_t *in_bits, tap_state_t state)
 {
-       scanFields(num_fields, fields, TAP_IRSHIFT, state);
-
+       scanBits(out_bits, in_bits, num_bits, true, TAP_IRSHIFT, state);
        return ERROR_OK;
 }
 
-/*extern jtag_command_t **jtag_get_last_command_p(void);*/
-
-int interface_jtag_add_dr_scan(int num_fields, const scan_field_t *fields, tap_state_t state)
+int interface_jtag_add_dr_scan(struct jtag_tap *active, int num_fields, const struct scan_field *fields, tap_state_t state)
 {
-
-       int j;
-       jtag_tap_t *tap, *nextTap;
+       struct jtag_tap *tap, *nextTap;
+       tap_state_t pause_state = TAP_DRSHIFT;
        for (tap = jtag_tap_next_enabled(NULL); tap!= NULL; tap = nextTap)
        {
                nextTap = jtag_tap_next_enabled(tap);
-               int found = 0;
-               tap_state_t end_state;
-               if (nextTap == NULL)
-               {
-                       end_state = state;
-               } else
+               if (nextTap==NULL)
                {
-                       end_state = TAP_DRSHIFT;
+                       pause_state = state;
                }
 
-               for (j = 0; j < num_fields; j++)
-               {
-                       if (tap == fields[j].tap)
-                       {
-                               found = 1;
-
-                               scanFields(1, fields + j, TAP_DRSHIFT, end_state);
-                       }
-               }
-               if (!found)
+               /* Find a range of fields to write to this tap */
+               if (tap == active)
                {
-                       scan_field_t tmp;
-                       /* program the scan field to 1 bit length, and ignore it's value */
-                       tmp.num_bits = 1;
-                       tmp.out_value = NULL;
-                       tmp.in_value = NULL;
+                       assert(!tap->bypass);
 
-                       scanFields(1, &tmp, TAP_DRSHIFT, end_state);
-               }
-               else
+                       scanFields(num_fields, fields, TAP_DRSHIFT, pause_state);
+               } else
                {
+                       /* Shift out a 0 for disabled tap's */
+                       assert(tap->bypass);
+                       shiftValueInner(TAP_DRSHIFT, pause_state, 1, 0);
                }
        }
        return ERROR_OK;
 }
 
-int interface_jtag_add_plain_dr_scan(int num_fields, const scan_field_t *fields, tap_state_t state)
+int interface_jtag_add_plain_dr_scan(int num_bits, const uint8_t *out_bits, uint8_t *in_bits, tap_state_t state)
 {
-       scanFields(num_fields, fields, TAP_DRSHIFT, state);
+       scanBits(out_bits, in_bits, num_bits, true, TAP_DRSHIFT, state);
        return ERROR_OK;
 }
 
-
 int interface_jtag_add_tlr()
 {
        setCurrentState(TAP_RESET);
@@ -616,11 +647,6 @@ int interface_jtag_add_tlr()
 }
 
 
-
-
-extern int jtag_nsrst_delay;
-extern int jtag_ntrst_delay;
-
 int interface_jtag_add_reset(int req_trst, int req_srst)
 {
        zy1000_reset(req_trst, req_srst);
@@ -665,7 +691,6 @@ static int zy1000_jtag_add_clocks(int num_cycles, tap_state_t state, tap_state_t
        ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, state);
 #endif
 
-
        return ERROR_OK;
 }
 
@@ -685,18 +710,52 @@ int interface_jtag_add_sleep(uint32_t us)
        return ERROR_OK;
 }
 
+int interface_add_tms_seq(unsigned num_bits, const uint8_t *seq, enum tap_state state)
+{
+       /*wait for the fifo to be empty*/
+       waitIdle();
+
+       for (unsigned i = 0; i < num_bits; i++)
+       {
+               int tms;
+
+               if (((seq[i/8] >> (i % 8)) & 1) == 0)
+               {
+                       tms = 0;
+               }
+               else
+               {
+                       tms = 1;
+               }
+
+               waitIdle();
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, tms);
+       }
+
+       waitIdle();
+       if (state != TAP_INVALID)
+       {
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, state);
+       } else
+       {
+               /* this would be normal if we are switching to SWD mode */
+       }
+       return ERROR_OK;
+}
+
 int interface_jtag_add_pathmove(int num_states, const tap_state_t *path)
 {
        int state_count;
        int tms = 0;
 
-       /*wait for the fifo to be empty*/
-       waitIdle();
-
        state_count = 0;
 
        tap_state_t cur_state = cmd_queue_cur_state;
 
+       uint8_t seq[16];
+       memset(seq, 0, sizeof(seq));
+       assert(num_states < (int)((sizeof(seq) * 8)));
+
        while (num_states)
        {
                if (tap_state_transition(cur_state, false) == path[state_count])
@@ -713,49 +772,53 @@ int interface_jtag_add_pathmove(int num_states, const tap_state_t *path)
                        exit(-1);
                }
 
-               waitIdle();
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28,  tms);
+               seq[state_count/8] = seq[state_count/8] | (tms << (state_count % 8));
 
                cur_state = path[state_count];
                state_count++;
                num_states--;
        }
 
-       waitIdle();
-       ZY1000_POKE(ZY1000_JTAG_BASE + 0x20,  cur_state);
-       return ERROR_OK;
+       return interface_add_tms_seq(state_count, seq, cur_state);
 }
 
-
-
-void embeddedice_write_dcc(jtag_tap_t *tap, int reg_addr, uint8_t *buffer, int little, int count)
+static void jtag_pre_post_bits(struct jtag_tap *tap, int *pre, int *post)
 {
-//     static int const reg_addr = 0x5;
-       tap_state_t end_state = jtag_get_end_state();
-       if (jtag_tap_next_enabled(jtag_tap_next_enabled(NULL)) == NULL)
+       /* bypass bits before and after */
+       int pre_bits = 0;
+       int post_bits = 0;
+
+       bool found = false;
+       struct jtag_tap *cur_tap, *nextTap;
+       for (cur_tap = jtag_tap_next_enabled(NULL); cur_tap!= NULL; cur_tap = nextTap)
        {
-               /* better performance via code duplication */
-               if (little)
+               nextTap = jtag_tap_next_enabled(cur_tap);
+               if (cur_tap == tap)
                {
-                       int i;
-                       for (i = 0; i < count; i++)
-                       {
-                               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, 1));
-                               shiftValueInner(TAP_DRSHIFT, end_state, 6, reg_addr | (1 << 5));
-                               buffer += 4;
-                       }
+                       found = true;
                } else
                {
-                       int i;
-                       for (i = 0; i < count; i++)
+                       if (found)
                        {
-                               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, 0));
-                               shiftValueInner(TAP_DRSHIFT, end_state, 6, reg_addr | (1 << 5));
-                               buffer += 4;
+                               post_bits++;
+                       } else
+                       {
+                               pre_bits++;
                        }
                }
        }
-       else
+       *pre = pre_bits;
+       *post = post_bits;
+}
+
+void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer, int little, int count)
+{
+
+       int pre_bits;
+       int post_bits;
+       jtag_pre_post_bits(tap, &pre_bits, &post_bits);
+
+       if (pre_bits + post_bits + 6 > 32)
        {
                int i;
                for (i = 0; i < count; i++)
@@ -763,7 +826,178 @@ void embeddedice_write_dcc(jtag_tap_t *tap, int reg_addr, uint8_t *buffer, int l
                        embeddedice_write_reg_inner(tap, reg_addr, fast_target_buffer_get_u32(buffer, little));
                        buffer += 4;
                }
+       } else
+       {
+               tap_state_t end_state = TAP_IDLE;
+               tap_state_t shift_end_state = TAP_DRSHIFT;
+               if (post_bits == 0)
+                       shift_end_state = end_state;
+
+               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
+               int i;
+               for (i = 0; i < count - 1; i++)
+               {
+                       /* Fewer pokes means we get to use the FIFO more efficiently */
+                       shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, little));
+                       shiftValueInner(TAP_DRSHIFT, shift_end_state, 6 + post_bits + pre_bits, (reg_addr | (1 << 5)));
+                       buffer += 4;
+               }
+               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, little));
+               shiftValueInner(TAP_DRSHIFT, shift_end_state, 6, reg_addr | (1 << 5));
+               shiftValueInner(shift_end_state, end_state, post_bits, 0);
        }
 }
 
 
+
+int arm11_run_instr_data_to_core_noack_inner(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count)
+{
+#if 0
+       int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count);
+       return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count);
+#else
+       static const int bits[] = {32, 2};
+       uint32_t values[] = {0, 0};
+
+       /* FIX!!!!!! the target_write_memory() API started this nasty problem
+        * with unaligned uint32_t * pointers... */
+       const uint8_t *t = (const uint8_t *)data;
+
+
+       /* bypass bits before and after */
+       int pre_bits;
+       int post_bits;
+       jtag_pre_post_bits(tap, &pre_bits, &post_bits);
+
+       bool found = false;
+       struct jtag_tap *cur_tap, *nextTap;
+       for (cur_tap = jtag_tap_next_enabled(NULL); cur_tap!= NULL; cur_tap = nextTap)
+       {
+               nextTap = jtag_tap_next_enabled(cur_tap);
+               if (cur_tap == tap)
+               {
+                       found = true;
+               } else
+               {
+                       if (found)
+                       {
+                               post_bits++;
+                       } else
+                       {
+                               pre_bits++;
+                       }
+               }
+       }
+
+       post_bits+=2;
+
+
+       while (--count > 0)
+       {
+               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
+
+               uint32_t value;
+               value = *t++;
+               value |= (*t++<<8);
+               value |= (*t++<<16);
+               value |= (*t++<<24);
+
+               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, value);
+               /* minimum 2 bits */
+               shiftValueInner(TAP_DRSHIFT, TAP_DRPAUSE, post_bits, 0);
+
+#if 1
+               /* copy & paste from arm11_dbgtap.c */
+               //TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
+
+               waitIdle();
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+               /* we don't have to wait for the queue to empty here. waitIdle();        */
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT);
+#else
+               static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] =
+               {
+                       TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
+               };
+
+               jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay),
+                       arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay);
+#endif
+       }
+
+       values[0] = *t++;
+       values[0] |= (*t++<<8);
+       values[0] |= (*t++<<16);
+       values[0] |= (*t++<<24);
+
+       /* This will happen on the last iteration updating the current tap state
+        * so we don't have to track it during the common code path */
+       jtag_add_dr_out(tap,
+               2,
+               bits,
+               values,
+               TAP_IDLE);
+
+       return jtag_execute_queue();
+#endif
+}
+
+
+static const struct command_registration zy1000_commands[] = {
+       {
+               .name = "power",
+               .handler = handle_power_command,
+               .mode = COMMAND_ANY,
+               .help = "Turn power switch to target on/off. "
+                       "With no arguments, prints status.",
+               .usage = "('on'|'off)",
+       },
+       {
+               .name = "zy1000_version",
+               .mode = COMMAND_ANY,
+               .jim_handler = jim_zy1000_version,
+               .help = "Print version info for zy1000.",
+               .usage = "['openocd'|'zy1000'|'date'|'time'|'pcb'|'fpga']",
+       },
+       {
+               .name = "powerstatus",
+               .mode = COMMAND_ANY,
+               .jim_handler = zylinjtag_Jim_Command_powerstatus,
+               .help = "Returns power status of target",
+       },
+#ifdef CYGPKG_HAL_NIOS2
+       {
+               .name = "updatezy1000firmware",
+               .mode = COMMAND_ANY,
+               .jim_handler = jim_zy1000_writefirmware,
+               .help = "writes firmware to flash",
+               /* .usage = "some_string", */
+       },
+#endif
+       COMMAND_REGISTRATION_DONE
+};
+
+
+
+struct jtag_interface zy1000_interface =
+{
+       .name = "ZY1000",
+       .supported = DEBUG_CAP_TMS_SEQ,
+       .execute_queue = NULL,
+       .speed = zy1000_speed,
+       .commands = zy1000_commands,
+       .init = zy1000_init,
+       .quit = zy1000_quit,
+       .khz = zy1000_khz,
+       .speed_div = zy1000_speed_div,
+       .power_dropout = zy1000_power_dropout,
+       .srst_asserted = zy1000_srst_asserted,
+};
+