lpc32xx: Flash driver
[fw/openocd] / src / jtag / zy1000 / zy1000.c
index f578058c25ad64fb26c51ba8ac32b02997030c4b..69fa4dc8e296c7876785eb59768488bf969ad9a9 100644 (file)
@@ -62,6 +62,9 @@
 #ifdef CYGPKG_HAL_NIOS2
 #include <cyg/hal/io.h>
 #include <cyg/firmwareutil/firmwareutil.h>
+#define ZYLIN_KHZ 60000
+#else
+#define ZYLIN_KHZ 64000
 #endif
 
 #define ZYLIN_VERSION GIT_ZY1000_VERSION
 #define ZYLIN_OPENOCD GIT_OPENOCD_VERSION
 #define ZYLIN_OPENOCD_VERSION "ZY1000 " ZYLIN_VERSION " " ZYLIN_DATE
 
+#else
+/* Assume we're connecting to a revc w/60MHz clock. */
+#define ZYLIN_KHZ 60000
 #endif
 
+
+/* The software needs to check if it's in RCLK mode or not */
+static bool zy1000_rclk = false;
+
 static int zy1000_khz(int khz, int *jtag_speed)
 {
        if (khz == 0)
@@ -80,7 +90,33 @@ static int zy1000_khz(int khz, int *jtag_speed)
        }
        else
        {
-               *jtag_speed = 64000/khz;
+               int speed;
+               /* Round speed up to nearest divisor.
+                *
+                * E.g. 16000kHz
+                * (64000 + 15999) / 16000 = 4
+                * (4 + 1) / 2 = 2
+                * 2 * 2 = 4
+                *
+                * 64000 / 4 = 16000
+                *
+                * E.g. 15999
+                * (64000 + 15998) / 15999 = 5
+                * (5 + 1) / 2 = 3
+                * 3 * 2 = 6
+                *
+                * 64000 / 6 = 10666
+                *
+                */
+               speed = (ZYLIN_KHZ + (khz -1)) / khz;
+               speed = (speed + 1 ) / 2;
+               speed *= 2;
+               if (speed > 8190)
+               {
+                       /* maximum dividend */
+                       speed = 8190;
+               }
+               *jtag_speed = speed;
        }
        return ERROR_OK;
 }
@@ -93,7 +129,7 @@ static int zy1000_speed_div(int speed, int *khz)
        }
        else
        {
-               *khz = 64000/speed;
+               *khz = ZYLIN_KHZ / speed;
        }
 
        return ERROR_OK;
@@ -222,24 +258,30 @@ int zy1000_speed(int speed)
        /* flush JTAG master FIFO before setting speed */
        waitIdle();
 
+       zy1000_rclk = false;
+
        if (speed == 0)
        {
                /*0 means RCLK*/
-               speed = 0;
                ZY1000_POKE(ZY1000_JTAG_BASE + 0x10, 0x100);
+               zy1000_rclk = true;
                LOG_DEBUG("jtag_speed using RCLK");
        }
        else
        {
                if (speed > 8190 || speed < 2)
                {
-                       LOG_USER("valid ZY1000 jtag_speed=[8190,2]. Divisor is 64MHz / even values between 8190-2, i.e. min 7814Hz, max 32MHz");
+                       LOG_USER("valid ZY1000 jtag_speed=[8190,2]. With divisor is %dkHz / even values between 8190-2, i.e. min %dHz, max %dMHz",
+                                       ZYLIN_KHZ, (ZYLIN_KHZ * 1000) / 8190, ZYLIN_KHZ / (2 * 1000));
                        return ERROR_INVALID_ARGUMENTS;
                }
 
-               LOG_USER("jtag_speed %d => JTAG clk=%f", speed, 64.0/(float)speed);
+               int khz;
+               speed &= ~1;
+               zy1000_speed_div(speed, &khz);
+               LOG_USER("jtag_speed %d => JTAG clk=%d kHz", speed, khz);
                ZY1000_POKE(ZY1000_JTAG_BASE + 0x14, 0x100);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x1c, speed&~1);
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x1c, speed);
        }
        return ERROR_OK;
 }
@@ -279,7 +321,7 @@ COMMAND_HANDLER(handle_power_command)
        return ERROR_OK;
 }
 
-#if !BUILD_ECOSBOARD
+#if !BUILD_ZY1000_MASTER
 static char *tcp_server = "notspecified";
 static int jim_zy1000_server(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
 {
@@ -456,17 +498,32 @@ int interface_jtag_execute_queue(void)
        uint32_t empty;
 
        waitIdle();
-       ZY1000_PEEK(ZY1000_JTAG_BASE + 0x10, empty);
-       /* clear JTAG error register */
-       ZY1000_POKE(ZY1000_JTAG_BASE + 0x14, 0x400);
 
-       if ((empty&0x400) != 0)
+       /* We must make sure to write data read back to memory location before we return
+        * from this fn
+        */
+       zy1000_flush_readqueue();
+
+       /* and handle any callbacks... */
+       zy1000_flush_callbackqueue();
+
+       if (zy1000_rclk)
        {
-               LOG_WARNING("RCLK timeout");
-               /* the error is informative only as we don't want to break the firmware if there
-                * is a false positive.
+               /* Only check for errors when using RCLK to speed up
+                * jtag over TCP/IP
                 */
-//             return ERROR_FAIL;
+               ZY1000_PEEK(ZY1000_JTAG_BASE + 0x10, empty);
+               /* clear JTAG error register */
+               ZY1000_POKE(ZY1000_JTAG_BASE + 0x14, 0x400);
+
+               if ((empty&0x400) != 0)
+               {
+                       LOG_WARNING("RCLK timeout");
+                       /* the error is informative only as we don't want to break the firmware if there
+                        * is a false positive.
+                        */
+       //              return ERROR_FAIL;
+               }
        }
        return ERROR_OK;
 }
@@ -474,41 +531,10 @@ int interface_jtag_execute_queue(void)
 
 
 
-
-static uint32_t getShiftValue(void)
-{
-       uint32_t value;
-       waitIdle();
-       ZY1000_PEEK(ZY1000_JTAG_BASE + 0xc, value);
-       VERBOSE(LOG_INFO("getShiftValue %08x", value));
-       return value;
-}
-#if 0
-static uint32_t getShiftValueFlip(void)
-{
-       uint32_t value;
-       waitIdle();
-       ZY1000_PEEK(ZY1000_JTAG_BASE + 0x18, value);
-       VERBOSE(LOG_INFO("getShiftValue %08x (flipped)", value));
-       return value;
-}
-#endif
-
-#if 0
-static void shiftValueInnerFlip(const tap_state_t state, const tap_state_t endState, int repeat, uint32_t value)
-{
-       VERBOSE(LOG_INFO("shiftValueInner %s %s %d %08x (flipped)", tap_state_name(state), tap_state_name(endState), repeat, value));
-       uint32_t a,b;
-       a = state;
-       b = endState;
-       ZY1000_POKE(ZY1000_JTAG_BASE + 0xc, value);
-       ZY1000_POKE(ZY1000_JTAG_BASE + 0x8, (1 << 15) | (repeat << 8) | (a << 4) | b);
-       VERBOSE(getShiftValueFlip());
-}
-#endif
+static void writeShiftValue(uint8_t *data, int bits);
 
 // here we shuffle N bits out/in
-static __inline void scanBits(const uint8_t *out_value, uint8_t *in_value, int num_bits, bool pause, tap_state_t shiftState, tap_state_t end_state)
+static __inline void scanBits(const uint8_t *out_value, uint8_t *in_value, int num_bits, bool pause_now, tap_state_t shiftState, tap_state_t end_state)
 {
        tap_state_t pause_state = shiftState;
        for (int j = 0; j < num_bits; j += 32)
@@ -518,7 +544,7 @@ static __inline void scanBits(const uint8_t *out_value, uint8_t *in_value, int n
                {
                        k = 32;
                        /* we have more to shift out */
-               } else if (pause)
+               } else if (pause_now)
                {
                        /* this was the last to shift out this time */
                        pause_state = end_state;
@@ -549,15 +575,7 @@ static __inline void scanBits(const uint8_t *out_value, uint8_t *in_value, int n
 
                if (in_value != NULL)
                {
-                       // data in, LSB to MSB
-                       value = getShiftValue();
-                       // we're shifting in data to MSB, shift data to be aligned for returning the value
-                       value >>= 32-k;
-
-                       for (int l = 0; l < k; l += 8)
-                       {
-                               in_value[(j + l)/8]=(value >> l)&0xff;
-                       }
+                       writeShiftValue(in_value + (j/8), k);
                }
        }
 }
@@ -820,14 +838,35 @@ static void jtag_pre_post_bits(struct jtag_tap *tap, int *pre, int *post)
        *post = post_bits;
 }
 
+/*
+       static const int embeddedice_num_bits[] = {32, 6};
+       uint32_t values[2];
+
+       values[0] = value;
+       values[1] = (1 << 5) | reg_addr;
+
+       jtag_add_dr_out(tap,
+                       2,
+                       embeddedice_num_bits,
+                       values,
+                       TAP_IDLE);
+*/
+
 void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer, int little, int count)
 {
-
+#if 0
+       int i;
+       for (i = 0; i < count; i++)
+       {
+               embeddedice_write_reg_inner(tap, reg_addr, fast_target_buffer_get_u32(buffer, little));
+               buffer += 4;
+       }
+#else
        int pre_bits;
        int post_bits;
        jtag_pre_post_bits(tap, &pre_bits, &post_bits);
 
-       if (pre_bits + post_bits + 6 > 32)
+       if ((pre_bits > 32) || (post_bits + 6 > 32))
        {
                int i;
                for (i = 0; i < count; i++)
@@ -837,119 +876,120 @@ void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer,
                }
        } else
        {
-               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
                int i;
-               for (i = 0; i < count - 1; i++)
+               for (i = 0; i < count; i++)
                {
                        /* Fewer pokes means we get to use the FIFO more efficiently */
+                       shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
                        shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, little));
-                       shiftValueInner(TAP_DRSHIFT, TAP_IDLE, 6 + post_bits + pre_bits, (reg_addr | (1 << 5)));
+                       /* Danger! here we need to exit into the TAP_IDLE state to make
+                        * DCC pick up this value.
+                        */
+                       shiftValueInner(TAP_DRSHIFT, TAP_IDLE, 6 + post_bits, (reg_addr | (1 << 5)));
                        buffer += 4;
                }
-               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, fast_target_buffer_get_u32(buffer, little));
-               shiftValueInner(TAP_DRSHIFT, TAP_IDLE, 6 + post_bits, (reg_addr | (1 << 5)));
        }
+#endif
 }
 
 
 
 int arm11_run_instr_data_to_core_noack_inner(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count)
 {
-#if 0
-       int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count);
-       return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count);
-#else
-       static const int bits[] = {32, 2};
-       uint32_t values[] = {0, 0};
-
-       /* FIX!!!!!! the target_write_memory() API started this nasty problem
-        * with unaligned uint32_t * pointers... */
-       const uint8_t *t = (const uint8_t *)data;
-
-
        /* bypass bits before and after */
        int pre_bits;
        int post_bits;
        jtag_pre_post_bits(tap, &pre_bits, &post_bits);
-
-       bool found = false;
-       struct jtag_tap *cur_tap, *nextTap;
-       for (cur_tap = jtag_tap_next_enabled(NULL); cur_tap!= NULL; cur_tap = nextTap)
-       {
-               nextTap = jtag_tap_next_enabled(cur_tap);
-               if (cur_tap == tap)
-               {
-                       found = true;
-               } else
-               {
-                       if (found)
-                       {
-                               post_bits++;
-                       } else
-                       {
-                               pre_bits++;
-                       }
-               }
-       }
-
        post_bits+=2;
 
-
-       while (--count > 0)
+       if ((pre_bits > 32) || (post_bits > 32))
        {
-               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
-
-               uint32_t value;
-               value = *t++;
-               value |= (*t++<<8);
-               value |= (*t++<<16);
-               value |= (*t++<<24);
+               int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap *, uint32_t, uint32_t *, size_t);
+               return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count);
+       } else
+       {
+               static const int bits[] = {32, 2};
+               uint32_t values[] = {0, 0};
 
-               shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, value);
-               /* minimum 2 bits */
-               shiftValueInner(TAP_DRSHIFT, TAP_DRPAUSE, post_bits, 0);
+               /* FIX!!!!!! the target_write_memory() API started this nasty problem
+                * with unaligned uint32_t * pointers... */
+               const uint8_t *t = (const uint8_t *)data;
 
+               while (--count > 0)
+               {
 #if 1
-               /* copy & paste from arm11_dbgtap.c */
-               //TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
-
-               waitIdle();
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
-               /* we don't have to wait for the queue to empty here. waitIdle();        */
-               ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT);
+                       /* Danger! This code doesn't update cmd_queue_cur_state, so
+                        * invoking jtag_add_pathmove() before jtag_add_dr_out() after
+                        * this loop would fail!
+                        */
+                       shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, pre_bits, 0);
+
+                       uint32_t value;
+                       value = *t++;
+                       value |= (*t++<<8);
+                       value |= (*t++<<16);
+                       value |= (*t++<<24);
+
+                       shiftValueInner(TAP_DRSHIFT, TAP_DRSHIFT, 32, value);
+                       /* minimum 2 bits */
+                       shiftValueInner(TAP_DRSHIFT, TAP_DRPAUSE, post_bits, 0);
+
+                       /* copy & paste from arm11_dbgtap.c */
+                       //TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
+                       /* KLUDGE! we have to flush the fifo or the Nios CPU locks up.
+                        * This is probably a bug in the Avalon bus(cross clocking bridge?)
+                        * or in the jtag registers module.
+                        */
+                       waitIdle();
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
+                       /* we don't have to wait for the queue to empty here */
+                       ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT);
+                       waitIdle();
 #else
-               static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] =
-               {
-                       TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
-               };
-
-               jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay),
-                       arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay);
+                       static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] =
+                       {
+                               TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
+                       };
+
+                       values[0] = *t++;
+                       values[0] |= (*t++<<8);
+                       values[0] |= (*t++<<16);
+                       values[0] |= (*t++<<24);
+
+                       jtag_add_dr_out(tap,
+                               2,
+                               bits,
+                               values,
+                               TAP_IDLE);
+
+                       jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay),
+                               arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay);
 #endif
-       }
+               }
 
-       values[0] = *t++;
-       values[0] |= (*t++<<8);
-       values[0] |= (*t++<<16);
-       values[0] |= (*t++<<24);
+               values[0] = *t++;
+               values[0] |= (*t++<<8);
+               values[0] |= (*t++<<16);
+               values[0] |= (*t++<<24);
 
-       /* This will happen on the last iteration updating the current tap state
-        * so we don't have to track it during the common code path */
-       jtag_add_dr_out(tap,
-               2,
-               bits,
-               values,
-               TAP_IDLE);
+               /* This will happen on the last iteration updating cmd_queue_cur_state
+                * so we don't have to track it during the common code path
+                */
+               jtag_add_dr_out(tap,
+                       2,
+                       bits,
+                       values,
+                       TAP_IDLE);
 
-       return jtag_execute_queue();
-#endif
+               return jtag_execute_queue();
+       }
 }
 
 
@@ -962,6 +1002,7 @@ static const struct command_registration zy1000_commands[] = {
                        "With no arguments, prints status.",
                .usage = "('on'|'off)",
        },
+#if BUILD_ZY1000_MASTER
 #if BUILD_ECOSBOARD
        {
                .name = "zy1000_version",
@@ -970,6 +1011,7 @@ static const struct command_registration zy1000_commands[] = {
                .help = "Print version info for zy1000.",
                .usage = "['openocd'|'zy1000'|'date'|'time'|'pcb'|'fpga']",
        },
+#endif
 #else
        {
                .name = "zy1000_server",
@@ -998,6 +1040,7 @@ static const struct command_registration zy1000_commands[] = {
 };
 
 
+#if !BUILD_ZY1000_MASTER || BUILD_ECOSBOARD
 static int tcp_ip = -1;
 
 /* Write large packets if we can */
@@ -1066,16 +1109,18 @@ static bool readLong(uint32_t *out_data)
        *out_data = data;
        return true;
 }
+#endif
 
 enum ZY1000_CMD
 {
        ZY1000_CMD_POKE = 0x0,
        ZY1000_CMD_PEEK = 0x8,
        ZY1000_CMD_SLEEP = 0x1,
+       ZY1000_CMD_WAITIDLE = 2
 };
 
 
-#if !BUILD_ECOSBOARD
+#if !BUILD_ZY1000_MASTER
 
 #include <sys/socket.h> /* for socket(), connect(), send(), and recv() */
 #include <arpa/inet.h>  /* for sockaddr_in and inet_addr() */
@@ -1132,9 +1177,26 @@ void zy1000_tcpout(uint32_t address, uint32_t data)
        }
 }
 
+/* By sending the wait to the server, we avoid a readback
+ * of status. Radically improves performance for this operation
+ * with long ping times.
+ */
+void waitIdle(void)
+{
+       tcpip_open();
+       if (!writeLong((ZY1000_CMD_WAITIDLE << 24)))
+       {
+               fprintf(stderr, "Could not write to zy1000 server\n");
+               exit(-1);
+       }
+}
+
 uint32_t zy1000_tcpin(uint32_t address)
 {
        tcpip_open();
+
+       zy1000_flush_readqueue();
+
        uint32_t data;
        if (!writeLong((ZY1000_CMD_PEEK << 24) | address)||
                        !readLong(&data))
@@ -1157,6 +1219,148 @@ int interface_jtag_add_sleep(uint32_t us)
        return ERROR_OK;
 }
 
+/* queue a readback */
+#define readqueue_size 16384
+static struct
+{
+       uint8_t *dest;
+       int bits;
+} readqueue[readqueue_size];
+
+static int readqueue_pos = 0;
+
+/* flush the readqueue, this means reading any data that
+ * we're expecting and store them into the final position
+ */
+void zy1000_flush_readqueue(void)
+{
+       if (readqueue_pos == 0)
+       {
+               /* simply debugging by allowing easy breakpoints when there
+                * is something to do. */
+               return;
+       }
+       int i;
+       tcpip_open();
+       for (i = 0; i < readqueue_pos; i++)
+       {
+               uint32_t value;
+               if (!readLong(&value))
+               {
+                       fprintf(stderr, "Could not read from zy1000 server\n");
+                       exit(-1);
+               }
+
+               uint8_t *in_value = readqueue[i].dest;
+               int k = readqueue[i].bits;
+
+               // we're shifting in data to MSB, shift data to be aligned for returning the value
+               value >>= 32-k;
+
+               for (int l = 0; l < k; l += 8)
+               {
+                       in_value[l/8]=(value >> l)&0xff;
+               }
+       }
+       readqueue_pos = 0;
+}
+
+/* By queuing the callback's we avoid flushing the
+read queue until jtag_execute_queue(). This can
+reduce latency dramatically for cases where
+callbacks are used extensively.
+*/
+#define callbackqueue_size 128
+static struct callbackentry
+{
+       jtag_callback_t callback;
+       jtag_callback_data_t data0;
+       jtag_callback_data_t data1;
+       jtag_callback_data_t data2;
+       jtag_callback_data_t data3;
+} callbackqueue[callbackqueue_size];
+
+static int callbackqueue_pos = 0;
+
+void zy1000_jtag_add_callback4(jtag_callback_t callback, jtag_callback_data_t data0, jtag_callback_data_t data1, jtag_callback_data_t data2, jtag_callback_data_t data3)
+{
+       if (callbackqueue_pos >= callbackqueue_size)
+       {
+               zy1000_flush_callbackqueue();
+       }
+
+       callbackqueue[callbackqueue_pos].callback = callback;
+       callbackqueue[callbackqueue_pos].data0 = data0;
+       callbackqueue[callbackqueue_pos].data1 = data1;
+       callbackqueue[callbackqueue_pos].data2 = data2;
+       callbackqueue[callbackqueue_pos].data3 = data3;
+       callbackqueue_pos++;
+}
+
+static int zy1000_jtag_convert_to_callback4(jtag_callback_data_t data0, jtag_callback_data_t data1, jtag_callback_data_t data2, jtag_callback_data_t data3)
+{
+       ((jtag_callback1_t)data1)(data0);
+       return ERROR_OK;
+}
+
+void zy1000_jtag_add_callback(jtag_callback1_t callback, jtag_callback_data_t data0)
+{
+       zy1000_jtag_add_callback4(zy1000_jtag_convert_to_callback4, data0, (jtag_callback_data_t)callback, 0, 0);
+}
+
+void zy1000_flush_callbackqueue(void)
+{
+       /* we have to flush the read queue so we have access to
+        the data the callbacks will use 
+       */
+       zy1000_flush_readqueue();
+       int i;
+       for (i = 0; i < callbackqueue_pos; i++)
+       {
+               struct callbackentry *entry = &callbackqueue[i];
+               jtag_set_error(entry->callback(entry->data0, entry->data1, entry->data2, entry->data3));
+       }
+       callbackqueue_pos = 0;
+}
+
+static void writeShiftValue(uint8_t *data, int bits)
+{
+       waitIdle();
+
+       if (!writeLong((ZY1000_CMD_PEEK << 24) | (ZY1000_JTAG_BASE + 0xc)))
+       {
+               fprintf(stderr, "Could not read from zy1000 server\n");
+               exit(-1);
+       }
+
+       if (readqueue_pos >= readqueue_size)
+       {
+               zy1000_flush_readqueue();
+       }
+
+       readqueue[readqueue_pos].dest = data;
+       readqueue[readqueue_pos].bits = bits;
+       readqueue_pos++;
+}
+
+#else
+
+static void writeShiftValue(uint8_t *data, int bits)
+{
+       uint32_t value;
+       waitIdle();
+       ZY1000_PEEK(ZY1000_JTAG_BASE + 0xc, value);
+       VERBOSE(LOG_INFO("getShiftValue %08x", value));
+
+       // data in, LSB to MSB
+       // we're shifting in data to MSB, shift data to be aligned for returning the value
+       value >>= 32 - bits;
+
+       for (int l = 0; l < bits; l += 8)
+       {
+               data[l/8]=(value >> l)&0xff;
+       }
+}
 
 #endif
 
@@ -1206,6 +1410,11 @@ static void tcpipserver(void)
                                jtag_sleep(data);
                                break;
                        }
+                       case ZY1000_CMD_WAITIDLE:
+                       {
+                               waitIdle();
+                               break;
+                       }
                        default:
                                return;
                }
@@ -1361,21 +1570,49 @@ static void watchdog_server(cyg_addrword_t data)
 }
 #endif
 
+#endif
+
+#if BUILD_ZY1000_MASTER
 int interface_jtag_add_sleep(uint32_t us)
 {
        jtag_sleep(us);
        return ERROR_OK;
 }
-
 #endif
 
+#if BUILD_ZY1000_MASTER && !BUILD_ECOSBOARD
+volatile void *zy1000_jtag_master;
+#include <sys/mman.h>
+#endif
 
 int zy1000_init(void)
 {
 #if BUILD_ECOSBOARD
        LOG_USER("%s", ZYLIN_OPENOCD_VERSION);
+#elif BUILD_ZY1000_MASTER
+       int fd;
+       if((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
+       {
+               LOG_ERROR("No access to /dev/mem");
+               return ERROR_FAIL;
+       }
+#ifndef REGISTERS_BASE
+#define REGISTERS_BASE 0x9002000
+#define REGISTERS_SPAN 128
+#endif
+    
+    zy1000_jtag_master = mmap(0, REGISTERS_SPAN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, REGISTERS_BASE);
+    
+    if(zy1000_jtag_master == (void *) -1) 
+    {
+           close(fd);
+               LOG_ERROR("No access to /dev/mem");
+               return ERROR_FAIL;
+    } 
 #endif
 
+
+
        ZY1000_POKE(ZY1000_JTAG_BASE + 0x10, 0x30); // Turn on LED1 & LED2
 
        setPower(true); // on by default
@@ -1383,7 +1620,11 @@ int zy1000_init(void)
 
         /* deassert resets. Important to avoid infinite loop waiting for SRST to deassert */
        zy1000_reset(0, 0);
-       zy1000_speed(jtag_get_speed());
+       int jtag_speed_var;
+       int retval = jtag_get_speed(&jtag_speed_var);
+       if (retval != ERROR_OK)
+               return retval;
+       zy1000_speed(jtag_speed_var);
 
 
 #if BUILD_ECOSBOARD
@@ -1418,4 +1659,3 @@ struct jtag_interface zy1000_interface =
        .power_dropout = zy1000_power_dropout,
        .srst_asserted = zy1000_srst_asserted,
 };
-