1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2014, 2016 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Sergey Poznyakoff
35 # define gettext(msgid) msgid
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
40 #include <wordsplit.h>
42 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43 #define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45 #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46 #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47 #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48 #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49 #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50 #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51 #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52 #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
54 #define ALLOC_INIT 128
55 #define ALLOC_INCR 128
58 _wsplt_alloc_die (struct wordsplit *wsp)
60 wsp->ws_error (_("memory exhausted"));
64 static void __WORDSPLIT_ATTRIBUTE_FORMAT ((__printf__, 1, 2))
65 _wsplt_error (const char *fmt, ...)
70 vfprintf (stderr, fmt, ap);
75 static void wordsplit_free_nodes (struct wordsplit *);
78 _wsplt_nomem (struct wordsplit *wsp)
81 wsp->ws_errno = WRDSE_NOSPACE;
82 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
83 wsp->ws_alloc_die (wsp);
84 if (wsp->ws_flags & WRDSF_SHOWERR)
85 wordsplit_perror (wsp);
86 if (!(wsp->ws_flags & WRDSF_REUSE))
88 wordsplit_free_nodes (wsp);
93 wordsplit_init0 (struct wordsplit *wsp)
95 if (wsp->ws_flags & WRDSF_REUSE)
97 if (!(wsp->ws_flags & WRDSF_APPEND))
98 wordsplit_free_words (wsp);
102 wsp->ws_wordv = NULL;
108 wsp->ws_head = wsp->ws_tail = NULL;
112 wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
115 wsp->ws_flags = flags;
117 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
118 wsp->ws_alloc_die = _wsplt_alloc_die;
119 if (!(wsp->ws_flags & WRDSF_ERROR))
120 wsp->ws_error = _wsplt_error;
122 if (!(wsp->ws_flags & WRDSF_NOVAR)
123 && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
126 wsp->ws_errno = WRDSE_USAGE;
127 if (wsp->ws_flags & WRDSF_SHOWERR)
128 wordsplit_perror (wsp);
129 return wsp->ws_errno;
132 if (!(wsp->ws_flags & WRDSF_NOCMD))
135 wsp->ws_errno = WRDSE_NOSUPP;
136 if (wsp->ws_flags & WRDSF_SHOWERR)
137 wordsplit_perror (wsp);
138 return wsp->ws_errno;
141 if (wsp->ws_flags & WRDSF_SHOWDBG)
143 if (!(wsp->ws_flags & WRDSF_DEBUG))
145 if (wsp->ws_flags & WRDSF_ERROR)
146 wsp->ws_debug = wsp->ws_error;
147 else if (wsp->ws_flags & WRDSF_SHOWERR)
148 wsp->ws_debug = _wsplt_error;
150 wsp->ws_flags &= ~WRDSF_SHOWDBG;
154 wsp->ws_input = input;
157 if (!(wsp->ws_flags & WRDSF_DOOFFS))
160 if (!(wsp->ws_flags & WRDSF_DELIM))
161 wsp->ws_delim = " \t\n";
163 if (!(wsp->ws_flags & WRDSF_COMMENT))
164 wsp->ws_comment = NULL;
166 if (!(wsp->ws_flags & WRDSF_CLOSURE))
167 wsp->ws_closure = NULL;
171 wordsplit_init0 (wsp);
177 alloc_space (struct wordsplit *wsp, size_t count)
179 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
183 if (wsp->ws_wordv == NULL)
185 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
186 ptr = calloc (newalloc, sizeof (ptr[0]));
188 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
190 newalloc = offs + wsp->ws_wordc +
191 (count > ALLOC_INCR ? count : ALLOC_INCR);
192 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
199 wsp->ws_wordn = newalloc;
203 return _wsplt_nomem (wsp);
208 /* Node state flags */
209 #define _WSNF_NULL 0x01 /* null node (a noop) */
210 #define _WSNF_WORD 0x02 /* node contains word in v.word */
211 #define _WSNF_QUOTE 0x04 /* text is quoted */
212 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
213 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
214 #define _WSNF_SEXP 0x20 /* is a sed expression */
216 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
217 wordsplit_add_segm must add the
218 segment even if it is empty */
220 struct wordsplit_node
222 struct wordsplit_node *prev; /* Previous element */
223 struct wordsplit_node *next; /* Next element */
224 unsigned flags; /* Node flags */
229 size_t beg; /* Start of word in ws_input */
230 size_t end; /* End of word in ws_input */
237 wsnode_flagstr (int flags)
239 static char retbuf[6];
242 if (flags & _WSNF_WORD)
244 else if (flags & _WSNF_NULL)
248 if (flags & _WSNF_QUOTE)
252 if (flags & _WSNF_NOEXPAND)
256 if (flags & _WSNF_JOIN)
260 if (flags & _WSNF_SEXP)
269 wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
271 if (p->flags & _WSNF_NULL)
273 else if (p->flags & _WSNF_WORD)
276 return wsp->ws_input + p->v.segm.beg;
280 wsnode_len (struct wordsplit_node *p)
282 if (p->flags & _WSNF_NULL)
284 else if (p->flags & _WSNF_WORD)
285 return strlen (p->v.word);
287 return p->v.segm.end - p->v.segm.beg;
291 wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
293 struct wordsplit_node *node = calloc (1, sizeof (*node));
295 return _wsplt_nomem (wsp);
301 wsnode_free (struct wordsplit_node *p)
303 if (p->flags & _WSNF_WORD)
309 wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
312 node->prev = wsp->ws_tail;
314 wsp->ws_tail->next = node;
321 wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
323 struct wordsplit_node *p;
328 p->next = node->next;
330 p->flags &= ~_WSNF_JOIN;
333 wsp->ws_head = node->next;
337 p->prev = node->prev;
339 wsp->ws_tail = node->prev;
341 node->next = node->prev = NULL;
345 wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
346 struct wordsplit_node *anchor, int before)
350 node->next = node->prev = NULL;
351 wsp->ws_head = wsp->ws_tail = node;
356 wsnode_insert (wsp, node, anchor->prev, 0);
367 struct wordsplit_node *p;
381 wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
383 struct wordsplit_node *node;
386 if (end == beg && !(flg & _WSNF_EMPTYOK))
388 rc = wsnode_new (wsp, &node);
391 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
392 node->v.segm.beg = beg;
393 node->v.segm.end = end;
394 wsnode_append (wsp, node);
399 wordsplit_free_nodes (struct wordsplit *wsp)
401 struct wordsplit_node *p;
403 for (p = wsp->ws_head; p;)
405 struct wordsplit_node *next = p->next;
409 wsp->ws_head = wsp->ws_tail = NULL;
413 wordsplit_dump_nodes (struct wordsplit *wsp)
415 struct wordsplit_node *p;
418 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
420 if (p->flags & _WSNF_WORD)
421 wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
422 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
424 wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
425 n, p, p->flags, wsnode_flagstr (p->flags),
426 (int) (p->v.segm.end - p->v.segm.beg),
427 wsp->ws_input + p->v.segm.beg);
432 coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
434 struct wordsplit_node *p, *end;
439 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
441 len += wsnode_len (p);
443 len += wsnode_len (p);
446 buf = malloc (len + 1);
448 return _wsplt_nomem (wsp);
452 for (stop = 0; !stop;)
454 struct wordsplit_node *next = p->next;
455 const char *str = wsnode_ptr (wsp, p);
456 size_t slen = wsnode_len (p);
458 memcpy (cur, str, slen);
462 wsnode_remove (wsp, p);
471 node->flags &= ~_WSNF_JOIN;
473 if (node->flags & _WSNF_WORD)
476 node->flags |= _WSNF_WORD;
482 wsnode_quoteremoval (struct wordsplit *wsp)
484 struct wordsplit_node *p;
485 void (*uqfn) (char *, const char *, size_t) =
486 (wsp->ws_flags & WRDSF_CESCAPES) ?
487 wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
489 for (p = wsp->ws_head; p; p = p->next)
491 const char *str = wsnode_ptr (wsp, p);
492 size_t slen = wsnode_len (p);
495 if (wsp->ws_flags & WRDSF_QUOTE)
497 unquote = !(p->flags & _WSNF_NOEXPAND);
504 if (!(p->flags & _WSNF_WORD))
506 char *newstr = malloc (slen + 1);
508 return _wsplt_nomem (wsp);
509 memcpy (newstr, str, slen);
512 p->flags |= _WSNF_WORD;
515 if (wsp->ws_flags & WRDSF_ESCAPE)
516 wordsplit_general_unquote_copy (p->v.word, str, slen,
519 uqfn (p->v.word, str, slen);
526 wsnode_coalesce (struct wordsplit *wsp)
528 struct wordsplit_node *p;
530 for (p = wsp->ws_head; p; p = p->next)
532 if (p->flags & _WSNF_JOIN)
533 if (coalesce_segment (wsp, p))
540 wordsplit_finish (struct wordsplit *wsp)
542 struct wordsplit_node *p;
547 for (p = wsp->ws_head; p; p = p->next)
550 if (alloc_space (wsp, n + 1))
553 for (p = wsp->ws_head; p; p = p->next)
555 const char *str = wsnode_ptr (wsp, p);
556 size_t slen = wsnode_len (p);
557 char *newstr = malloc (slen + 1);
559 /* Assign newstr first, even if it is NULL. This way
560 wordsplit_free will work even if we return
562 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
564 return _wsplt_nomem (wsp);
565 memcpy (newstr, str, slen);
571 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
576 /* Variable expansion */
578 node_split_prefix (struct wordsplit *wsp,
579 struct wordsplit_node **ptail,
580 struct wordsplit_node *node,
581 size_t beg, size_t len, int flg)
583 struct wordsplit_node *newnode;
587 if (wsnode_new (wsp, &newnode))
589 wsnode_insert (wsp, newnode, *ptail, 0);
590 if (node->flags & _WSNF_WORD)
592 const char *str = wsnode_ptr (wsp, node);
593 char *newstr = malloc (len + 1);
595 return _wsplt_nomem (wsp);
596 memcpy (newstr, str + beg, len);
598 newnode->flags = _WSNF_WORD;
599 newnode->v.word = newstr;
603 newnode->v.segm.beg = node->v.segm.beg + beg;
604 newnode->v.segm.end = newnode->v.segm.beg + len;
606 newnode->flags |= flg;
612 find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
615 { st_init, st_squote, st_dquote } state = st_init;
655 else if (str[i] == '"')
664 wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
668 if (!(wsp->ws_flags & WRDSF_ENV))
671 if (wsp->ws_flags & WRDSF_ENV_KV)
673 /* A key-value pair environment */
674 for (i = 0; wsp->ws_env[i]; i++)
676 size_t elen = strlen (wsp->ws_env[i]);
677 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
678 return wsp->ws_env[i + 1];
679 /* Skip the value. Break the loop if it is NULL. */
681 if (wsp->ws_env[i] == NULL)
687 /* Usual (A=B) environment. */
688 for (i = 0; wsp->ws_env[i]; i++)
691 const char *var = wsp->ws_env[i];
693 for (j = 0; j < len; j++)
694 if (name[j] != var[j])
696 if (j == len && var[j] == '=')
704 expvar (struct wordsplit *wsp, const char *str, size_t len,
705 struct wordsplit_node **ptail, const char **pend, int flg)
708 const char *defstr = NULL;
711 struct wordsplit_node *newnode;
712 const char *start = str - 1;
714 if (ISALPHA (str[0]) || str[0] == '_')
716 for (i = 1; i < len; i++)
717 if (!(ISALNUM (str[i]) || str[i] == '_'))
721 else if (str[0] == '{')
725 for (i = 1; i < len; i++)
726 if (str[i] == '}' || str[i] == ':')
732 defstr = str + i + 1;
733 if (find_closing_cbrace (str, i + 1, len, &j))
735 wsp->ws_errno = WRDSE_CBRACE;
740 else if (str[i] == '}')
747 wsp->ws_errno = WRDSE_CBRACE;
753 if (wsnode_new (wsp, &newnode))
755 wsnode_insert (wsp, newnode, *ptail, 0);
757 newnode->flags = _WSNF_WORD | flg;
758 newnode->v.word = malloc (3);
759 if (!newnode->v.word)
760 return _wsplt_nomem (wsp);
761 newnode->v.word[0] = '$';
762 newnode->v.word[1] = str[0];
763 newnode->v.word[2] = 0;
768 /* Actually expand the variable */
769 /* str - start of the variable name
771 defstr - default replacement str */
773 vptr = wordsplit_find_env (wsp, str, i);
776 value = strdup (vptr);
778 return _wsplt_nomem (wsp);
780 else if (wsp->ws_flags & WRDSF_GETVAR)
781 value = wsp->ws_getvar (str, i, wsp->ws_closure);
782 else if (wsp->ws_flags & WRDSF_UNDEF)
784 wsp->ws_errno = WRDSE_UNDEF;
785 if (wsp->ws_flags & WRDSF_SHOWERR)
786 wordsplit_perror (wsp);
791 if (wsp->ws_flags & WRDSF_WARNUNDEF)
792 wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
793 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
799 /* FIXME: handle defstr */
804 if (flg & _WSNF_QUOTE)
806 if (wsnode_new (wsp, &newnode))
808 wsnode_insert (wsp, newnode, *ptail, 0);
810 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
811 newnode->v.word = strdup (value);
812 if (!newnode->v.word)
813 return _wsplt_nomem (wsp);
815 else if (*value == 0)
817 /* Empty string is a special case */
818 if (wsnode_new (wsp, &newnode))
820 wsnode_insert (wsp, newnode, *ptail, 0);
822 newnode->flags = _WSNF_NULL;
829 ws.ws_delim = wsp->ws_delim;
830 if (wordsplit (value, &ws,
831 WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
833 wordsplit_free (&ws);
836 for (i = 0; i < ws.ws_wordc; i++)
838 if (wsnode_new (wsp, &newnode))
840 wsnode_insert (wsp, newnode, *ptail, 0);
842 newnode->flags = _WSNF_WORD |
844 (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
845 newnode->v.word = strdup (ws.ws_wordv[i]);
846 if (!newnode->v.word)
847 return _wsplt_nomem (wsp);
849 wordsplit_free (&ws);
852 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
854 size_t size = *pend - start + 1;
856 if (wsnode_new (wsp, &newnode))
858 wsnode_insert (wsp, newnode, *ptail, 0);
860 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
861 newnode->v.word = malloc (size + 1);
862 if (!newnode->v.word)
863 return _wsplt_nomem (wsp);
864 memcpy (newnode->v.word, start, size);
865 newnode->v.word[size] = 0;
869 if (wsnode_new (wsp, &newnode))
871 wsnode_insert (wsp, newnode, *ptail, 0);
873 newnode->flags = _WSNF_NULL;
879 node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
881 const char *str = wsnode_ptr (wsp, node);
882 size_t slen = wsnode_len (node);
883 const char *end = str + slen;
886 struct wordsplit_node *tail = node;
888 for (p = str; p < end; p++)
900 tail->flags |= _WSNF_JOIN;
901 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
904 if (expvar (wsp, p, slen - n, &tail, &p,
905 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
914 tail->flags |= _WSNF_JOIN;
915 if (node_split_prefix (wsp, &tail, node, off, p - str,
916 node->flags & _WSNF_JOIN))
921 wsnode_remove (wsp, node);
927 /* Remove NULL lists */
929 wsnode_nullelim (struct wordsplit *wsp)
931 struct wordsplit_node *p;
933 for (p = wsp->ws_head; p;)
935 struct wordsplit_node *next = p->next;
936 if (p->flags & _WSNF_NULL)
938 wsnode_remove (wsp, p);
946 wordsplit_varexp (struct wordsplit *wsp)
948 struct wordsplit_node *p;
950 for (p = wsp->ws_head; p;)
952 struct wordsplit_node *next = p->next;
953 if (!(p->flags & _WSNF_NOEXPAND))
954 if (node_expand_vars (wsp, p))
959 wsnode_nullelim (wsp);
963 /* Strip off any leading and trailing whitespace. This function is called
964 right after the initial scanning, therefore it assumes that every
965 node in the list is a text reference node. */
967 wordsplit_trimws (struct wordsplit *wsp)
969 struct wordsplit_node *p;
971 for (p = wsp->ws_head; p; p = p->next)
975 if (p->flags & _WSNF_QUOTE)
978 /* Skip leading whitespace: */
979 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
983 /* Trim trailing whitespace */
984 for (n = p->v.segm.end;
985 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
987 if (p->v.segm.beg == p->v.segm.end)
988 p->flags |= _WSNF_NULL;
991 wsnode_nullelim (wsp);
995 skip_sed_expr (const char *command, size_t i, size_t len)
1003 if (command[i] == ';')
1005 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1008 delim = command[++i];
1010 for (i++; i < len; i++)
1014 if (command[i] == delim || !ISALNUM (command[i]))
1017 else if (command[i] == '\\')
1019 else if (command[i] == delim)
1023 while (state == 3 && i < len && command[i] == ';');
1028 skip_delim (struct wordsplit *wsp)
1030 size_t start = wsp->ws_endp;
1031 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
1033 if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
1034 ISDELIM (wsp, wsp->ws_input[start]))
1036 int delim = wsp->ws_input[start];
1039 while (start < wsp->ws_len && delim == wsp->ws_input[start]);
1045 while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
1050 if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS))
1061 scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
1064 const char *command = wsp->ws_input;
1065 size_t len = wsp->ws_len;
1066 char q = command[start];
1068 for (j = start + 1; j < len && command[j] != q; j++)
1069 if (q == '"' && command[j] == '\\')
1071 if (j < len && command[j] == q)
1073 int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
1075 flags |= _WSNF_NOEXPAND;
1076 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1082 wsp->ws_endp = start;
1083 wsp->ws_errno = WRDSE_QUOTE;
1084 if (wsp->ws_flags & WRDSF_SHOWERR)
1085 wordsplit_perror (wsp);
1092 scan_word (struct wordsplit *wsp, size_t start)
1094 size_t len = wsp->ws_len;
1095 const char *command = wsp->ws_input;
1096 const char *comment = wsp->ws_comment;
1104 wsp->ws_errno = WRDSE_EOF;
1110 if (wsp->ws_flags & WRDSF_SED_EXPR
1111 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1114 i = skip_sed_expr (command, i, len);
1116 else if (!ISDELIM (wsp, command[i]))
1120 if (comment && strchr (comment, command[i]) != NULL)
1123 for (j = i + 1; j < len && command[j] != '\n'; j++)
1125 if (wordsplit_add_segm (wsp, start, i, 0))
1131 if (wsp->ws_flags & WRDSF_QUOTE)
1133 if (command[i] == '\\')
1141 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
1142 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
1144 if (join && wsp->ws_tail)
1145 wsp->ws_tail->flags |= _WSNF_JOIN;
1146 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
1148 if (scan_qstring (wsp, i, &i))
1155 if (ISDELIM (wsp, command[i]))
1161 else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
1165 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
1166 flags |= _WSNF_EMPTYOK;
1168 if (join && i > start && wsp->ws_tail)
1169 wsp->ws_tail->flags |= _WSNF_JOIN;
1170 if (wordsplit_add_segm (wsp, start, i, flags))
1173 if (wsp->ws_flags & WRDSF_INCREMENTAL)
1178 static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1181 wordsplit_c_unquote_char (int c)
1185 for (p = quote_transtab; *p; p += 2)
1194 wordsplit_c_quote_char (int c)
1198 for (p = quote_transtab + sizeof (quote_transtab) - 2;
1199 p > quote_transtab; p -= 2)
1208 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1211 xtonum (int *pval, const char *src, int base, int cnt)
1215 for (i = 0, val = 0; i < cnt; i++, src++)
1217 int n = *(unsigned char *) src;
1218 if (n > 127 || (n = to_num (n)) >= base)
1220 val = val * base + n;
1227 wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
1234 if (strchr (" \"", *str))
1239 else if (*str == '"')
1241 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
1247 if (wordsplit_c_quote_char (*str) != -1)
1257 wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
1258 const char *escapable)
1264 if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
1272 wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
1286 wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
1296 if (src[i] == 'x' || src[i] == 'X')
1305 int off = xtonum (&c, src + i + 1,
1319 else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
1328 int off = xtonum (&c, src + i, 8, 3);
1342 *dst++ = wordsplit_c_unquote_char (src[i++]);
1351 wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
1360 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
1368 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
1369 memcpy (dst, tmp, 3);
1374 int c = wordsplit_c_quote_char (*src);
1380 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
1381 memcpy (dst, tmp, 3);
1390 wordsplit_process_list (struct wordsplit *wsp, size_t start)
1392 if (wsp->ws_flags & WRDSF_NOSPLIT)
1394 /* Treat entire input as a quoted argument */
1395 if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
1396 return wsp->ws_errno;
1402 while ((rc = scan_word (wsp, start)) == _WRDS_OK)
1403 start = skip_delim (wsp);
1404 /* Make sure tail element is not joinable */
1406 wsp->ws_tail->flags &= ~_WSNF_JOIN;
1407 if (rc == _WRDS_ERR)
1408 return wsp->ws_errno;
1411 if (wsp->ws_flags & WRDSF_SHOWDBG)
1413 wsp->ws_debug ("Initial list:");
1414 wordsplit_dump_nodes (wsp);
1417 if (wsp->ws_flags & WRDSF_WS)
1419 /* Trim leading and trailing whitespace */
1420 wordsplit_trimws (wsp);
1421 if (wsp->ws_flags & WRDSF_SHOWDBG)
1423 wsp->ws_debug ("After WS trimming:");
1424 wordsplit_dump_nodes (wsp);
1428 /* Expand variables (FIXME: & commands) */
1429 if (!(wsp->ws_flags & WRDSF_NOVAR))
1431 if (wordsplit_varexp (wsp))
1433 wordsplit_free_nodes (wsp);
1434 return wsp->ws_errno;
1436 if (wsp->ws_flags & WRDSF_SHOWDBG)
1438 wsp->ws_debug ("Expanded list:");
1439 wordsplit_dump_nodes (wsp);
1445 if (wsnode_quoteremoval (wsp))
1447 if (wsp->ws_flags & WRDSF_SHOWDBG)
1449 wsp->ws_debug ("After quote removal:");
1450 wordsplit_dump_nodes (wsp);
1453 if (wsnode_coalesce (wsp))
1456 if (wsp->ws_flags & WRDSF_SHOWDBG)
1458 wsp->ws_debug ("Coalesced list:");
1459 wordsplit_dump_nodes (wsp);
1463 return wsp->ws_errno;
1467 wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
1477 if (!(flags & WRDSF_INCREMENTAL))
1480 start = skip_delim (wsp);
1481 if (wsp->ws_endp == wsp->ws_len)
1483 wsp->ws_errno = WRDSE_NOINPUT;
1484 if (wsp->ws_flags & WRDSF_SHOWERR)
1485 wordsplit_perror (wsp);
1486 return wsp->ws_errno;
1489 cmdptr = wsp->ws_input + wsp->ws_endp;
1490 cmdlen = wsp->ws_len - wsp->ws_endp;
1491 wsp->ws_flags |= WRDSF_REUSE;
1492 wordsplit_init0 (wsp);
1499 rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
1504 if (wsp->ws_flags & WRDSF_SHOWDBG)
1505 wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
1507 rc = wordsplit_process_list (wsp, start);
1508 if (rc == 0 && (flags & WRDSF_INCREMENTAL))
1510 while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
1512 start = skip_delim (wsp);
1513 if (wsp->ws_flags & WRDSF_SHOWDBG)
1515 cmdptr = wsp->ws_input + wsp->ws_endp;
1516 cmdlen = wsp->ws_len - wsp->ws_endp;
1517 wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
1519 rc = wordsplit_process_list (wsp, start);
1526 wordsplit_free_nodes (wsp);
1529 wordsplit_finish (wsp);
1530 wordsplit_free_nodes (wsp);
1531 return wsp->ws_errno;
1535 wordsplit (const char *command, struct wordsplit *ws, int flags)
1537 return wordsplit_len (command, command ? strlen (command) : 0, ws,
1542 wordsplit_free_words (struct wordsplit *ws)
1546 for (i = 0; i < ws->ws_wordc; i++)
1548 char *p = ws->ws_wordv[ws->ws_offs + i];
1552 ws->ws_wordv[ws->ws_offs + i] = NULL;
1559 wordsplit_free (struct wordsplit *ws)
1561 wordsplit_free_words (ws);
1562 free (ws->ws_wordv);
1563 ws->ws_wordv = NULL;
1567 wordsplit_perror (struct wordsplit *wsp)
1569 switch (wsp->ws_errno)
1572 wsp->ws_error (_("no error"));
1576 wsp->ws_error (_("missing closing %c (start near #%lu)"),
1577 wsp->ws_input[wsp->ws_endp],
1578 (unsigned long) wsp->ws_endp);
1582 wsp->ws_error (_("memory exhausted"));
1586 wsp->ws_error (_("command substitution is not yet supported"));
1589 wsp->ws_error (_("invalid wordsplit usage"));
1593 wsp->ws_error (_("unbalanced curly brace"));
1597 wsp->ws_error (_("undefined variable"));
1601 wsp->ws_error (_("input exhausted"));
1605 wsp->ws_error (_("unknown error"));
1609 const char *_wordsplit_errstr[] = {
1611 N_("missing closing quote"),
1612 N_("memory exhausted"),
1613 N_("command substitution is not yet supported"),
1614 N_("invalid wordsplit usage"),
1615 N_("unbalanced curly brace"),
1616 N_("undefined variable"),
1617 N_("input exhausted")
1619 int _wordsplit_nerrs =
1620 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
1623 wordsplit_strerror (struct wordsplit *ws)
1625 if (ws->ws_errno < _wordsplit_nerrs)
1626 return _wordsplit_errstr[ws->ws_errno];
1627 return N_("unknown error");