1 # SPDX-License-Identifier: GPL-2.0-or-later
4 # Copyright (C) 2008 Lou Deluxe
5 # lou.openocd012@fixit.nospammail.net
10 m4_dnl Setup and hold times depend on SHIFTER_PRESCALER
11 m4_define(`SETUP_DELAY_CYCLES', m4_eval(`('SHIFTER_PRESCALER` + 1) / 2'))
12 m4_define(`HOLD_DELAY_CYCLES', m4_eval(`'SHIFTER_PRESCALER` / 2'))
14 m4_dnl Some macros to make nybble handling a little easier
15 m4_define(`m4_high_nybble', `m4_eval(`(($1) >> 4) & 0xf')')
16 m4_define(`m4_low_nybble', `m4_eval(`($1) & 0xf')')
18 m4_dnl A macro to generate a number of NOPs depending on the argument
19 m4_define(`m4_0_to_5_nops', `m4_ifelse(m4_eval(`($1) >= 1'), 1, ` NOP
20 'm4_ifelse(m4_eval(`($1) >= 2'), 1, ` NOP
21 'm4_ifelse(m4_eval(`($1) >= 3'), 1, ` NOP
22 'm4_ifelse(m4_eval(`($1) >= 4'), 1, ` NOP
23 'm4_ifelse(m4_eval(`($1) >= 5'), 1, ` NOP
27 m4_dnl Some macros to facilitate bit-banging delays.
28 m4_dnl There are 3 of them. One for self-contained delays, and two for those which must be split between setup and loop to keep from disturbing A at delay time.
29 m4_dnl The argument passed to any of them is the number of cycles which the delay should consume.
31 m4_dnl This one is self-contained.
34 `; delay (m4_eval($1) cycles)'
35 `m4_ifelse(m4_eval(`('$1`) < 6'), 1,
38 m4_ifelse(m4_eval(`(('$1`) - 3) % 2'), 1, ` NOP')
39 A.H = m4_high_nybble(`(('$1`) - 3) / 2')
40 A.L = m4_low_nybble(`(('$1`) - 3) / 2')
47 m4_dnl These are the setup and loop parts of the split delay.
48 m4_dnl The argument passed to both must match for the result to make sense.
49 m4_dnl The setup does not figure into the delay. It takes 3 cycles when a loop is used and none if nops are used.
51 m4_define(`m4_delay_setup',
52 `; delay setup (m4_eval($1) cycles)'
53 `m4_ifelse(m4_eval(`('$1`) < 6'), 0, ` '
54 A.H = m4_high_nybble(`('$1`) / 2')
55 A.L = m4_low_nybble(`('$1`) / 2')
59 m4_define(`m4_delay_loop',
60 `; delay loop (m4_eval($1) cycles)'
61 `m4_ifelse(m4_eval(`('$1`) < 6'), 1,
64 m4_ifelse(m4_eval(`('$1`) % 2'), 1, ` NOP')
69 m4_dnl These are utility macros for use with delays. Specifically, there is code below which needs some predictability in code size for relative jumps to reach. The m4_delay macro generates an extra NOP when an even delay is needed, and the m4_delay_loop macro generates an extra NOP when an odd delay is needed. Using this for the argument to the respective macro rounds up the argument so that the extra NOP will not be generated. There is also logic built in to cancel the rounding when the result is small enough that a loop would not be generated.
71 m4_define(`m4_delay_loop_round_up', `m4_ifelse(m4_eval($1` < 6'), 1, $1, m4_eval(`(('$1`) + 1) / 2 * 2'))')
72 m4_define(`m4_delay_round_up', `m4_ifelse(m4_eval($1` < 6'), 1, $1, m4_eval(`(('$1`) / 2 * 2) + 1'))')
77 ;------------------------------------------------------------------------------
79 ; This is at address 0x00 in case of empty LUT entries
82 ;------------------------------------------------------------------------------
83 ; Command interpreter at address 0x01 because it is branched to a lot and having it be 0x01 means we can use X for it, which is already used for other purposes which want it to be 1.
85 ; Assumes ADR_BUFFER0 points to the next command byte
86 ; Stores the current command byte in CMP01
91 CMP01 = A ; store the current command for later
93 EXCHANGE ; put MSN into LSN
94 A.H = 0xc ; lookup table at 0x1550 + 0xc0 = 0x1610
96 ; branch to address in lookup table
101 ;------------------------------------------------------------------------------
102 ; LUT for high nybble
104 ;LUT; c0 opcode_error
105 ;LUT; c1 opcode_shift_tdi_andor_tms_bytes
106 ;LUT; c2 opcode_shift_tdi_andor_tms_bytes
107 ;LUT; c3 opcode_shift_tdi_andor_tms_bytes
108 ;LUT; c4 opcode_shift_tdo_bytes
109 ;LUT; c5 opcode_error
110 ;LUT; c6 opcode_shift_tdio_bytes
111 ;LUT; c7 opcode_error
112 ;LUT; c8 opcode_shift_tms_tdi_bit_pair
113 ;LUT; c9 opcode_shift_tms_bits
114 ;LUT; ca opcode_error
115 ;LUT; cb opcode_error
116 ;LUT; cc opcode_error
117 ;LUT; cd opcode_error
118 ;LUT; ce opcode_shift_tdio_bits
122 ;------------------------------------------------------------------------------
123 ; USB/buffer handling
126 ;ENTRY; download entry_download
130 ; pointer to completion flag
135 A = OR_MPEG ; buffer indicator from previous iteration
136 <Y> = A ; either indicator will have bit 0 set
137 BSET 1 ; was buffer 1 previously current?
138 ; A.H = 0 ; already zero from OR_MPEG
139 JP opcode_next_buffer_0
141 opcode_next_buffer_1:
142 A.L = 0x1 ; ack buffer 0
144 ; A.H = 0x0 ; already zero from BUFFER_MNGT
145 A.L = 0x3 ; Input buffer 1 = 0x1850 (0x0300)
148 opcode_next_buffer_0:
149 A.L = 0x2 ; ack buffer 1
152 A = X ; Input buffer 0 = 0x1650 (0x0100)
155 OR_MPEG = A ; store for next iteration
158 BUFFER_MNGT = A ; finish acking previous buffer
163 A.H = 0x4 ; Output buffer = 0x1590 (0x0040)
167 X = A ; for the spin loop below
169 ; pointer to status in shared memory
170 DECY ; setting to 0 above and decrementing here saves a byte
172 ; wait until a command buffer is available
173 A = BUFFER_MNGT ; spin while neither of bits 2 or 3 are set
174 CP A<X ; this is slightly faster and smaller than trying to AND and compare the result, and it lets us just use the nybble-swapped 0x40 from the output buffer setup.
176 <Y> = A ; update status once done spinning
178 ; restore X, since we used it
179 ; A.H = 0 ; high nybble of BUFFER_MNGT will always be 0 the way we use it
183 ; go to command interpreter
187 ;;------------------------------------------------------------------------------
191 ; ; Ack buffer 0 in download mode
198 ;------------------------------------------------------------------------------
199 :opcode_shift_tdi_andor_tms_bytes
202 A = CMP01 ; bits 3..0 contain the number of bytes to shift - 1
208 CMP01 = A ; we're interested in bits in the high nybble
210 opcode_shift_tdi_andor_tms_bytes__loop:
212 ; set tdi to supplied byte or zero
223 ; set tms to supplied byte or zero
232 ; run both shifters as nearly simultaneously as possible
241 JP opcode_shift_tdi_andor_tms_bytes__loop
247 ;------------------------------------------------------------------------------
248 :opcode_shift_tdo_bytes
251 A = CMP01 ; bits 3..0 contain the number of bytes to shift - 1
255 opcode_shift_tdo_bytes__loop:
263 ; put shifted byte into output buffer
269 JP opcode_shift_tdo_bytes__loop
275 ;------------------------------------------------------------------------------
276 :opcode_shift_tdio_bytes
279 A = CMP01 ; bits 3..0 contain the number of bytes to shift - 1
281 CMP10 = A ; byte loop counter
283 A.H = opcode_shift_tdio_bytes__sub_return
284 A.L = opcode_shift_tdio_bytes__sub_return
285 CMP00 = A ; return address
287 opcode_shift_tdio_bytes__loop:
290 CMP11 = A ; always use 8 bits
292 JP sub_shift_tdio_bits
293 opcode_shift_tdio_bytes__sub_return:
295 A = CMP10 ; byte loop counter
300 JP opcode_shift_tdio_bytes__loop
303 ;DR_MPEG = A ; return TCK low, as str912 reset halt seems to require it
307 ;------------------------------------------------------------------------------
308 :opcode_shift_tdio_bits
311 A = CMP01 ; bits 2..0 contain the number of bits to shift - 1
313 BCLR 3 ; set TMS=1 if bit 3 was set
314 CMP11 = A ; bit loop counter
316 A.H = opcode_shift_tdio_bits__sub_return
317 A.L = opcode_shift_tdio_bits__sub_return
318 CMP00 = A ; return address
320 JP sub_shift_tdio_bits
323 JP sub_shift_tdio_bits
324 opcode_shift_tdio_bits__sub_return:
327 ;DR_MPEG = A ; return TCK low, as str912 reset halt seems to require it
331 ;------------------------------------------------------------------------------
335 A = DATA_BUFFER0 ; get byte from input buffer
337 MASK = A ; put it in MASK where bit routine will use it
339 :sub_shift_tdio_bits__loop
340 m4_delay_setup(m4_delay_loop_round_up(SETUP_DELAY_CYCLES - 1))
342 A = MASK ; shift TDO into and TDI out of MASK via carry
347 A.L = 0x2 ; TCK=0, TDI=1
350 A.L = 0x0 ; TCK=0, TDI=0
353 m4_delay_loop(m4_delay_loop_round_up(SETUP_DELAY_CYCLES - 1))
358 A = DR_MPEG ; set carry bit to TDO
364 m4_delay(HOLD_DELAY_CYCLES - 10)
366 A = CMP11 ; bit loop counter
367 Y = A ; use Y to avoid corrupting carry bit with subtract
371 JP :sub_shift_tdio_bits__loop
373 ; shift last TDO bit into result
379 A = CMP00 ; return to caller
383 ;------------------------------------------------------------------------------
384 :opcode_shift_tms_tdi_bit_pair
387 ; set TMS line manually
388 A = CMP01 ; bits 3..0 contain TDI and TMS bits and whether to return TDO
395 ; stuff command buffer with bitmap of single TDI bit
406 CMP11 = A ; bit loop counter (only doing one bit)
408 A.H = opcode_shift_tms_tdi_bit_pair__sub_return
409 A.L = opcode_shift_tms_tdi_bit_pair__sub_return
410 CMP00 = A ; return address
412 ; jump this way due to relative jump range issues
413 A.H = sub_shift_tdio_bits
414 A.L = sub_shift_tdio_bits
416 opcode_shift_tms_tdi_bit_pair__sub_return:
419 BSET 3 ; bit says whether to return TDO
421 ADR_BUFFER1 -= X ; subroutine returns it, so undo that
424 DR_MPEG = A ; return TCK low, as str912 reset halt seems to require it
428 ;------------------------------------------------------------------------------
429 :opcode_shift_tms_bits
432 A = CMP01 ; bits 3..0 contain the number of bits to shift - 1 (only 1-8 bits is valid... no checking, just improper operation)
434 CMP11 = A ; bit loop counter
436 A = DATA_BUFFER0 ; get byte from input buffer
438 MASK = A ; The byte we'll be shifting
440 :opcode_shift_tms_bits__loop
441 m4_delay_setup(SETUP_DELAY_CYCLES - 1)
443 A = MASK ; shift TMS out of MASK via carry
448 A.L = 0x1 ; TCK=0, TDI=0, TMS=1
451 A.L = 0x0 ; TCK=0, TDI=0, TMS=0
455 m4_delay_loop(SETUP_DELAY_CYCLES - 1)
460 m4_delay(HOLD_DELAY_CYCLES - 10)
462 A = CMP11 ; bit loop counter
467 JP :opcode_shift_tms_bits__loop
470 DR_MPEG = A ; return TCK low, as str912 reset halt seems to require it