1 \ S\" implementation for pForth
3 \ Copied from ANS reference implementation at:
4 \ http://www.forth200x.org/escaped-strings.html
6 \ The code was not modified except for the use of private{ }private
8 \ Added November 2021 by Phil Burk
17 \ *G Add character C to the contents of address C-ADDR.
21 : addchar \ char string --
22 \ *G Add the character to the end of the counted string.
27 : append \ c-addr u $dest --
28 \ *G Add the string described by C-ADDR U to the counted string at
29 \ ** $DEST. The strings must not overlap.
31 tuck r@ count + swap cmove \ add source to end
32 r> c+! \ add length to count
35 : extract2H \ c-addr len -- c-addr' len' u
36 \ *G Extract a two-digit hex number in the given base from the
37 \ ** start of the string, returning the remaining string
38 \ ** and the converted number.
40 0 0 2over drop 2 >number 2drop drop
45 create EscapeTable \ -- addr
46 \ *G Table of translations for \a..\z.
48 8 c, \ \b BS (Backspace)
51 27 c, \ \e ESC (Escape)
52 12 c, \ \f FF (Form feed)
58 10 c, \ \l LF (Line feed)
60 10 c, \ \n (Unices only)
63 char " c, \ \q " (Double quote)
64 13 c, \ \r CR (Carriage Return)
66 9 c, \ \t HT (horizontal tab}
68 11 c, \ \v VT (vertical tab)
72 0 c, \ \z NUL (no character)
74 create CRLF$ \ -- addr ; CR/LF as counted string
77 : addEscape \ c-addr len dest -- c-addr' len'
78 \ *G Add an escape sequence to the counted string at dest,
79 \ ** returning the remaining string.
80 over 0= \ zero length check
82 >r \ -- caddr len ; R: -- dest
83 over c@ [char] x = if \ hex number?
84 1 /string extract2H r> addchar exit
86 over c@ [char] m = if \ CR/LF pair
87 1 /string 13 r@ addchar 10 r> addchar exit
89 over c@ [char] n = if \ CR/LF pair? (Windows/DOS only)
90 1 /string crlf$ count r> append exit
92 over c@ [char] a [char] z 1+ within if
93 over c@ [char] a - EscapeTable + c@ r> addchar
100 : parse\" \ c-addr len dest -- c-addr' len'
101 \ *G Parses a string up to an unescaped '"', translating '\'
102 \ ** escapes to characters. The translated string is a
103 \ ** counted string at *\i{dest}.
104 \ ** The supported escapes (case sensitive) are:
106 \ *D \b BS (backspace)
107 \ *D \e ESC (not in C99)
108 \ *D \f FF (form feed)
109 \ *D \l LF (ASCII 10)
110 \ *D \m CR/LF pair - for HTML etc.
111 \ *D \n newline - CRLF for Windows/DOS, LF for Unices
113 \ *D \r CR (ASCII 13)
116 \ *D \z NUL (ASCII 0)
118 \ *D \xAB Two char Hex numerical character value
119 \ *D \\ backslash itself
120 \ *D \ before any other character represents that character
121 dup >r 0 swap c! \ zero destination
122 begin \ -- caddr len ; R: -- dest
125 over c@ [char] " <> \ check for terminator
127 over c@ [char] \ = if \ deal with escapes
128 1 /string r@ addEscape
129 else \ normal character
130 over c@ r@ addchar 1 /string
133 dup \ step over terminating "
138 create pocket \ -- addr
139 \ *G A tempory buffer to hold processed string.
140 \ This would normally be an internal system buffer.
142 s" /COUNTED-STRING" environment? 0= [if] 256 [then]
145 : readEscaped \ "ccc" -- c-addr
146 \ *G Parses an escaped string from the input stream according to
147 \ ** the rules of *\fo{parse\"} above, returning the address
148 \ ** of the translated counted string in *\fo{POCKET}.
149 source >in @ /string tuck \ -- len caddr len
157 : S\" \ "string" -- caddr u
158 \ *G As *\fo{S"}, but translates escaped characters using
159 \ ** *\fo{parse\"} above.
160 readEscaped count state @
161 if postpone sliteral then
164 privatize \ hide the internal words