1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
58 # define SIZE_MAX ((size_t) -1)
61 #define INT_BITS (sizeof (int) * CHAR_BIT)
63 struct quoting_options
65 /* Basic quoting style. */
66 enum quoting_style style;
68 /* Additional flags. Bitwise combination of enum quoting_flags. */
71 /* Quote the characters indicated by this bit vector even if the
72 quoting style would not normally require them to be quoted. */
73 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
76 /* Names of quoting styles. */
77 char const *const quoting_style_args[] =
90 /* Correspondences to quoting style names. */
91 enum quoting_style const quoting_style_vals[] =
93 literal_quoting_style,
95 shell_always_quoting_style,
97 c_maybe_quoting_style,
100 clocale_quoting_style
103 /* The default quoting options. */
104 static struct quoting_options default_quoting_options;
106 /* Allocate a new set of quoting options, with contents initially identical
107 to O if O is not null, or to the default if O is null.
108 It is the caller's responsibility to free the result. */
109 struct quoting_options *
110 clone_quoting_options (struct quoting_options *o)
113 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
119 /* Get the value of O's quoting style. If O is null, use the default. */
121 get_quoting_style (struct quoting_options *o)
123 return (o ? o : &default_quoting_options)->style;
126 /* In O (or in the default if O is null),
127 set the value of the quoting style to S. */
129 set_quoting_style (struct quoting_options *o, enum quoting_style s)
131 (o ? o : &default_quoting_options)->style = s;
134 /* In O (or in the default if O is null),
135 set the value of the quoting options for character C to I.
136 Return the old value. Currently, the only values defined for I are
137 0 (the default) and 1 (which means to quote the character even if
138 it would not otherwise be quoted). */
140 set_char_quoting (struct quoting_options *o, char c, int i)
142 unsigned char uc = c;
144 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
145 int shift = uc % INT_BITS;
146 int r = (*p >> shift) & 1;
147 *p ^= ((i & 1) ^ r) << shift;
151 /* In O (or in the default if O is null),
152 set the value of the quoting options flag to I, which can be a
153 bitwise combination of enum quoting_flags, or 0 for default
154 behavior. Return the old value. */
156 set_quoting_flags (struct quoting_options *o, int i)
160 o = &default_quoting_options;
166 /* Return quoting options for STYLE, with no extra quoting. */
167 static struct quoting_options
168 quoting_options_from_style (enum quoting_style style)
170 struct quoting_options o;
173 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
177 /* MSGID approximates a quotation mark. Return its translation if it
178 has one; otherwise, return either it or "\"", depending on S. */
180 gettext_quote (char const *msgid, enum quoting_style s)
182 char const *translation = _(msgid);
183 if (translation == msgid && s == clocale_quoting_style)
188 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
189 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
190 QUOTE_THESE_TOO to control quoting.
191 Terminate the output with a null character, and return the written
192 size of the output, not counting the terminating null.
193 If BUFFERSIZE is too small to store the output string, return the
194 value that would have been returned had BUFFERSIZE been large enough.
195 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
197 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
198 ARGSIZE, O), except it breaks O into its component pieces and is
199 not careful about errno. */
202 quotearg_buffer_restyled (char *buffer, size_t buffersize,
203 char const *arg, size_t argsize,
204 enum quoting_style quoting_style, int flags,
205 unsigned int const *quote_these_too)
209 char const *quote_string = 0;
210 size_t quote_string_len = 0;
211 bool backslash_escapes = false;
212 bool unibyte_locale = MB_CUR_MAX == 1;
213 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
218 if (len < buffersize) \
224 switch (quoting_style)
226 case c_maybe_quoting_style:
227 quoting_style = c_quoting_style;
228 elide_outer_quotes = true;
230 case c_quoting_style:
231 if (!elide_outer_quotes)
233 backslash_escapes = true;
235 quote_string_len = 1;
238 case escape_quoting_style:
239 backslash_escapes = true;
240 elide_outer_quotes = false;
243 case locale_quoting_style:
244 case clocale_quoting_style:
247 Get translations for open and closing quotation marks.
249 The message catalog should translate "`" to a left
250 quotation mark suitable for the locale, and similarly for
251 "'". If the catalog has no translation,
252 locale_quoting_style quotes `like this', and
253 clocale_quoting_style quotes "like this".
255 For example, an American English Unicode locale should
256 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
257 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
258 MARK). A British English Unicode locale should instead
259 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
260 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
262 If you don't know what to put here, please see
263 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
264 and use glyphs suitable for your language. */
266 char const *left = gettext_quote (N_("`"), quoting_style);
267 char const *right = gettext_quote (N_("'"), quoting_style);
268 if (!elide_outer_quotes)
269 for (quote_string = left; *quote_string; quote_string++)
270 STORE (*quote_string);
271 backslash_escapes = true;
272 quote_string = right;
273 quote_string_len = strlen (quote_string);
277 case shell_quoting_style:
278 quoting_style = shell_always_quoting_style;
279 elide_outer_quotes = true;
281 case shell_always_quoting_style:
282 if (!elide_outer_quotes)
285 quote_string_len = 1;
288 case literal_quoting_style:
289 elide_outer_quotes = false;
296 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
301 if (backslash_escapes
303 && i + quote_string_len <= argsize
304 && memcmp (arg + i, quote_string, quote_string_len) == 0)
306 if (elide_outer_quotes)
307 goto force_outer_quoting_style;
315 if (backslash_escapes)
317 if (elide_outer_quotes)
318 goto force_outer_quoting_style;
320 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
327 else if (flags & QA_ELIDE_NULL_BYTES)
332 switch (quoting_style)
334 case shell_always_quoting_style:
335 if (elide_outer_quotes)
336 goto force_outer_quoting_style;
339 case c_quoting_style:
340 if ((flags & QA_SPLIT_TRIGRAPHS)
341 && i + 2 < argsize && arg[i + 1] == '?')
345 case '(': case ')': case '-': case '/':
346 case '<': case '=': case '>':
347 /* Escape the second '?' in what would otherwise be
349 if (elide_outer_quotes)
350 goto force_outer_quoting_style;
369 case '\a': esc = 'a'; goto c_escape;
370 case '\b': esc = 'b'; goto c_escape;
371 case '\f': esc = 'f'; goto c_escape;
372 case '\n': esc = 'n'; goto c_and_shell_escape;
373 case '\r': esc = 'r'; goto c_and_shell_escape;
374 case '\t': esc = 't'; goto c_and_shell_escape;
375 case '\v': esc = 'v'; goto c_escape;
377 /* No need to escape the escape if we are trying to elide
378 outer quotes and nothing else is problematic. */
379 if (backslash_escapes && elide_outer_quotes && quote_string_len)
383 if (quoting_style == shell_always_quoting_style
384 && elide_outer_quotes)
385 goto force_outer_quoting_style;
388 if (backslash_escapes)
395 case '{': case '}': /* sometimes special if isolated */
396 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
404 case '!': /* special in bash */
405 case '"': case '$': case '&':
406 case '(': case ')': case '*': case ';':
408 case '=': /* sometimes special in 0th or (with "set -k") later args */
410 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
412 /* A shell special character. In theory, '$' and '`' could
413 be the first bytes of multibyte characters, which means
414 we should check them with mbrtowc, but in practice this
415 doesn't happen so it's not worth worrying about. */
416 if (quoting_style == shell_always_quoting_style
417 && elide_outer_quotes)
418 goto force_outer_quoting_style;
422 if (quoting_style == shell_always_quoting_style)
424 if (elide_outer_quotes)
425 goto force_outer_quoting_style;
432 case '%': case '+': case ',': case '-': case '.': case '/':
433 case '0': case '1': case '2': case '3': case '4': case '5':
434 case '6': case '7': case '8': case '9': case ':':
435 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
436 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
437 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
438 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
439 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
440 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
441 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
442 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
443 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
444 /* These characters don't cause problems, no matter what the
445 quoting style is. They cannot start multibyte sequences. */
449 /* If we have a multibyte sequence, copy it until we reach
450 its end, find an error, or come back to the initial shift
451 state. For C-like styles, if the sequence has
452 unprintable characters, escape the whole sequence, since
453 we can't easily escape single characters within it. */
455 /* Length of multibyte sequence found so far. */
463 printable = isprint (c) != 0;
468 memset (&mbstate, 0, sizeof mbstate);
472 if (argsize == SIZE_MAX)
473 argsize = strlen (arg);
478 size_t bytes = mbrtowc (&w, &arg[i + m],
479 argsize - (i + m), &mbstate);
482 else if (bytes == (size_t) -1)
487 else if (bytes == (size_t) -2)
490 while (i + m < argsize && arg[i + m])
496 /* Work around a bug with older shells that "see" a '\'
497 that is really the 2nd byte of a multibyte character.
498 In practice the problem is limited to ASCII
499 chars >= '@' that are shell special chars. */
500 if ('[' == 0x5b && elide_outer_quotes
501 && quoting_style == shell_always_quoting_style)
504 for (j = 1; j < bytes; j++)
505 switch (arg[i + m + j])
507 case '[': case '\\': case '^':
509 goto force_outer_quoting_style;
521 while (! mbsinit (&mbstate));
524 if (1 < m || (backslash_escapes && ! printable))
526 /* Output a multibyte sequence, or an escaped
527 unprintable unibyte character. */
532 if (backslash_escapes && ! printable)
534 if (elide_outer_quotes)
535 goto force_outer_quoting_style;
537 STORE ('0' + (c >> 6));
538 STORE ('0' + ((c >> 3) & 7));
552 if (! ((backslash_escapes || elide_outer_quotes)
554 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
558 if (elide_outer_quotes)
559 goto force_outer_quoting_style;
566 if (len == 0 && quoting_style == shell_always_quoting_style
567 && elide_outer_quotes)
568 goto force_outer_quoting_style;
570 if (quote_string && !elide_outer_quotes)
571 for (; *quote_string; quote_string++)
572 STORE (*quote_string);
574 if (len < buffersize)
578 force_outer_quoting_style:
579 /* Don't reuse quote_these_too, since the addition of outer quotes
580 sufficiently quotes the specified characters. */
581 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
583 flags & ~QA_ELIDE_OUTER_QUOTES, NULL);
586 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
587 argument ARG (of size ARGSIZE), using O to control quoting.
588 If O is null, use the default.
589 Terminate the output with a null character, and return the written
590 size of the output, not counting the terminating null.
591 If BUFFERSIZE is too small to store the output string, return the
592 value that would have been returned had BUFFERSIZE been large enough.
593 If ARGSIZE is SIZE_MAX, use the string length of the argument for
596 quotearg_buffer (char *buffer, size_t buffersize,
597 char const *arg, size_t argsize,
598 struct quoting_options const *o)
600 struct quoting_options const *p = o ? o : &default_quoting_options;
602 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
603 p->style, p->flags, p->quote_these_too);
608 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
610 quotearg_alloc (char const *arg, size_t argsize,
611 struct quoting_options const *o)
613 return quotearg_alloc_mem (arg, argsize, NULL, o);
616 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
617 allocated storage containing the quoted string, and store the
618 resulting size into *SIZE, if non-NULL. The result can contain
619 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
620 NULL, and set_quoting_flags has not set the null byte elision
623 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
624 struct quoting_options const *o)
626 struct quoting_options const *p = o ? o : &default_quoting_options;
628 /* Elide embedded null bytes if we can't return a size. */
629 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
630 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
631 flags, p->quote_these_too) + 1;
632 char *buf = xcharalloc (bufsize);
633 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
641 /* A storage slot with size and pointer to a value. */
648 /* Preallocate a slot 0 buffer, so that the caller can always quote
649 one small component of a "memory exhausted" message in slot 0. */
650 static char slot0[256];
651 static unsigned int nslots = 1;
652 static struct slotvec slotvec0 = {sizeof slot0, slot0};
653 static struct slotvec *slotvec = &slotvec0;
658 struct slotvec *sv = slotvec;
660 for (i = 1; i < nslots; i++)
662 if (sv[0].val != slot0)
665 slotvec0.size = sizeof slot0;
666 slotvec0.val = slot0;
676 /* Use storage slot N to return a quoted version of argument ARG.
677 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
678 null-terminated string.
679 OPTIONS specifies the quoting options.
680 The returned value points to static storage that can be
681 reused by the next call to this function with the same value of N.
682 N must be nonnegative. N is deliberately declared with type "int"
683 to allow for future extensions (using negative values). */
685 quotearg_n_options (int n, char const *arg, size_t argsize,
686 struct quoting_options const *options)
691 struct slotvec *sv = slotvec;
698 /* FIXME: technically, the type of n1 should be `unsigned int',
699 but that evokes an unsuppressible warning from gcc-4.0.1 and
700 older. If gcc ever provides an option to suppress that warning,
701 revert to the original type, so that the test in xalloc_oversized
702 is once again performed only at compile time. */
704 bool preallocated = (sv == &slotvec0);
706 if (xalloc_oversized (n1, sizeof *sv))
709 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
712 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
717 size_t size = sv[n].size;
718 char *val = sv[n].val;
719 /* Elide embedded null bytes since we don't return a size. */
720 int flags = options->flags | QA_ELIDE_NULL_BYTES;
721 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
722 options->style, flags,
723 options->quote_these_too);
727 sv[n].size = size = qsize + 1;
730 sv[n].val = val = xcharalloc (size);
731 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
732 flags, options->quote_these_too);
741 quotearg_n (int n, char const *arg)
743 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
747 quotearg_n_mem (int n, char const *arg, size_t argsize)
749 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
753 quotearg (char const *arg)
755 return quotearg_n (0, arg);
759 quotearg_mem (char const *arg, size_t argsize)
761 return quotearg_n_mem (0, arg, argsize);
765 quotearg_n_style (int n, enum quoting_style s, char const *arg)
767 struct quoting_options const o = quoting_options_from_style (s);
768 return quotearg_n_options (n, arg, SIZE_MAX, &o);
772 quotearg_n_style_mem (int n, enum quoting_style s,
773 char const *arg, size_t argsize)
775 struct quoting_options const o = quoting_options_from_style (s);
776 return quotearg_n_options (n, arg, argsize, &o);
780 quotearg_style (enum quoting_style s, char const *arg)
782 return quotearg_n_style (0, s, arg);
786 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
788 return quotearg_n_style_mem (0, s, arg, argsize);
792 quotearg_char_mem (char const *arg, size_t argsize, char ch)
794 struct quoting_options options;
795 options = default_quoting_options;
796 set_char_quoting (&options, ch, 1);
797 return quotearg_n_options (0, arg, argsize, &options);
801 quotearg_char (char const *arg, char ch)
803 return quotearg_char_mem (arg, SIZE_MAX, ch);
807 quotearg_colon (char const *arg)
809 return quotearg_char (arg, ':');
813 quotearg_colon_mem (char const *arg, size_t argsize)
815 return quotearg_char_mem (arg, argsize, ':');