git.gag.com Git - debian/pforth/blob - fth/slashqt.fth

   1 \ S\" implementation for pForth
   2 \
   3 \ Copied from ANS reference implementation at:
   4 \    http://www.forth200x.org/escaped-strings.html
   5 \
   6 \ The code was not modified except for the use of private{ }private
   7 \
   8 \ Added November 2021 by Phil Burk
   9
  10 ANEW TASK-SLASHQT.FTH
  11
  12 private{
  13
  14 decimal
  15
  16 : c+!           \ c c-addr --
  17 \ *G Add character C to the contents of address C-ADDR.
  18   tuck c@ + swap c!
  19 ;
  20
  21 : addchar       \ char string --
  22 \ *G Add the character to the end of the counted string.
  23   tuck count + c!
  24   1 swap c+!
  25 ;
  26
  27 : append        \ c-addr u $dest --
  28 \ *G Add the string described by C-ADDR U to the counted string at
  29 \ ** $DEST. The strings must not overlap.
  30   >r
  31   tuck  r@ count +  swap cmove          \ add source to end
  32   r> c+!                                \ add length to count
  33 ;
  34
  35 : extract2H     \ c-addr len -- c-addr' len' u
  36 \ *G Extract a two-digit hex number in the given base from the
  37 \ ** start of the string, returning the remaining string
  38 \ ** and the converted number.
  39   base @ >r  hex
  40   0 0 2over drop 2 >number 2drop drop
  41   >r  2 /string  r>
  42   r> base !
  43 ;
  44
  45 create EscapeTable      \ -- addr
  46 \ *G Table of translations for \a..\z.
  47         7 c,    \ \a BEL (Alert)
  48         8 c,    \ \b BS  (Backspace)
  49    char c c,    \ \c
  50    char d c,    \ \d
  51        27 c,    \ \e ESC (Escape)
  52        12 c,    \ \f FF  (Form feed)
  53    char g c,    \ \g
  54    char h c,    \ \h
  55    char i c,    \ \i
  56    char j c,    \ \j
  57    char k c,    \ \k
  58        10 c,    \ \l LF  (Line feed)
  59    char m c,    \ \m
  60        10 c,    \ \n (Unices only)
  61    char o c,    \ \o
  62    char p c,    \ \p
  63    char " c,    \ \q "   (Double quote)
  64        13 c,    \ \r CR  (Carriage Return)
  65    char s c,    \ \s
  66         9 c,    \ \t HT  (horizontal tab}
  67    char u c,    \ \u
  68        11 c,    \ \v VT  (vertical tab)
  69    char w c,    \ \w
  70    char x c,    \ \x
  71    char y c,    \ \y
  72         0 c,    \ \z NUL (no character)
  73
  74 create CRLF$    \ -- addr ; CR/LF as counted string
  75   2 c,  13 c,  10 c,
  76
  77 : addEscape     \ c-addr len dest -- c-addr' len'
  78 \ *G Add an escape sequence to the counted string at dest,
  79 \ ** returning the remaining string.
  80   over 0=                               \ zero length check
  81   if  drop  exit  then
  82   >r                                    \ -- caddr len ; R: -- dest
  83   over c@ [char] x = if                 \ hex number?
  84     1 /string extract2H r> addchar  exit
  85   then
  86   over c@ [char] m = if                 \ CR/LF pair
  87     1 /string  13 r@ addchar  10 r> addchar  exit
  88   then
  89   over c@ [char] n = if                 \ CR/LF pair? (Windows/DOS only)
  90     1 /string  crlf$ count r> append  exit
  91   then
  92   over c@ [char] a [char] z 1+ within if
  93     over c@ [char] a - EscapeTable + c@  r> addchar
  94   else
  95     over c@ r> addchar
  96   then
  97   1 /string
  98 ;
  99
 100 : parse\"       \ c-addr len dest -- c-addr' len'
 101 \ *G Parses a string up to an unescaped '"', translating '\'
 102 \ ** escapes to characters. The translated string is a
 103 \ ** counted string at *\i{dest}.
 104 \ ** The supported escapes (case sensitive) are:
 105 \ *D \a      BEL          (alert)
 106 \ *D \b      BS           (backspace)
 107 \ *D \e      ESC (not in C99)
 108 \ *D \f      FF           (form feed)
 109 \ *D \l      LF (ASCII 10)
 110 \ *D \m      CR/LF pair - for HTML etc.
 111 \ *D \n      newline - CRLF for Windows/DOS, LF for Unices
 112 \ *D \q      double-quote
 113 \ *D \r      CR (ASCII 13)
 114 \ *D \t      HT (tab)
 115 \ *D \v      VT
 116 \ *D \z      NUL (ASCII 0)
 117 \ *D \"      double-quote
 118 \ *D \xAB    Two char Hex numerical character value
 119 \ *D \\      backslash itself
 120 \ *D \       before any other character represents that character
 121   dup >r  0 swap c!                     \ zero destination
 122   begin                                 \ -- caddr len ; R: -- dest
 123     dup
 124    while
 125     over c@ [char] " <>                 \ check for terminator
 126    while
 127     over c@ [char] \ = if               \ deal with escapes
 128       1 /string r@ addEscape
 129     else                                \ normal character
 130       over c@ r@ addchar  1 /string
 131     then
 132   repeat then
 133   dup                                   \ step over terminating "
 134   if 1 /string  then
 135   r> drop
 136 ;
 137
 138 create pocket  \ -- addr
 139 \ *G A tempory buffer to hold processed string.
 140 \    This would normally be an internal system buffer.
 141
 142 s" /COUNTED-STRING" environment? 0= [if] 256 [then]
 143 1 chars + allot
 144
 145 : readEscaped   \ "ccc" -- c-addr
 146 \ *G Parses an escaped string from the input stream according to
 147 \ ** the rules of *\fo{parse\"} above, returning the address
 148 \ ** of the translated counted string in *\fo{POCKET}.
 149   source >in @ /string tuck             \ -- len caddr len
 150   pocket parse\" nip
 151   - >in +!
 152   pocket
 153 ;
 154
 155 }private
 156
 157 : S\"           \ "string" -- caddr u
 158 \ *G As *\fo{S"}, but translates escaped characters using
 159 \ ** *\fo{parse\"} above.
 160   readEscaped count  state @
 161   if  postpone sliteral  then
 162 ; IMMEDIATE
 163
 164 privatize   \ hide the internal words
 165