Imported Upstream version 2.5.1
[debian/amanda] / regex-src / split.c
1 #include <amanda.h>
2 #include <stdio.h>
3 #include <string.h>
4
5 /*
6  - split - divide a string into fields, like awk split()
7  = int split(char *string, char *fields[], int nfields, char *sep);
8  */
9 int                             /* number of fields, including overflow */
10 split(
11     char *      string,
12     char *      fields[],       /* list is not NULL-terminated */
13     int         nfields,        /* number of entries available in fields[] */
14     char *      sep)            /* "" white, "c" single char, "ab" [ab]+ */
15 {
16         register char *p = string;
17         register char c;                        /* latest character */
18         register char sepc = sep[0];
19         register char sepc2;
20         register int fn;
21         register char **fp = fields;
22         register char *sepp;
23         register int trimtrail;
24
25         /* white space */
26         if (sepc == '\0') {
27                 while ((c = *p++) == ' ' || c == '\t')
28                         continue;
29                 p--;
30                 trimtrail = 1;
31                 sep = " \t";    /* note, code below knows this is 2 long */
32                 sepc = ' ';
33         } else
34                 trimtrail = 0;
35         sepc2 = sep[1];         /* now we can safely pick this up */
36
37         /* catch empties */
38         if (*p == '\0')
39                 return(0);
40
41         /* single separator */
42         if (sepc2 == '\0') {
43                 fn = nfields;
44                 for (;;) {
45                         *fp++ = p;
46                         fn--;
47                         if (fn == 0)
48                                 break;
49                         while ((c = *p++) != sepc)
50                                 if (c == '\0')
51                                         return(nfields - fn);
52                         *(p-1) = '\0';
53                 }
54                 /* we have overflowed the fields vector -- just count them */
55                 fn = nfields;
56                 for (;;) {
57                         while ((c = *p++) != sepc)
58                                 if (c == '\0')
59                                         return(fn);
60                         fn++;
61                 }
62                 /* not reached */
63         }
64
65         /* two separators */
66         if (sep[2] == '\0') {
67                 fn = nfields;
68                 for (;;) {
69                         *fp++ = p;
70                         fn--;
71                         while ((c = *p++) != sepc && c != sepc2)
72                                 if (c == '\0') {
73                                         if (trimtrail && **(fp-1) == '\0')
74                                                 fn++;
75                                         return(nfields - fn);
76                                 }
77                         if (fn == 0)
78                                 break;
79                         *(p-1) = '\0';
80                         while ((c = *p++) == sepc || c == sepc2)
81                                 continue;
82                         p--;
83                 }
84                 /* we have overflowed the fields vector -- just count them */
85                 fn = nfields;
86                 while (c != '\0') {
87                         while ((c = *p++) == sepc || c == sepc2)
88                                 continue;
89                         p--;
90                         fn++;
91                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
92                                 continue;
93                 }
94                 /* might have to trim trailing white space */
95                 if (trimtrail) {
96                         p--;
97                         while ((c = *--p) == sepc || c == sepc2)
98                                 continue;
99                         p++;
100                         if (*p != '\0') {
101                                 if (fn == nfields+1)
102                                         *p = '\0';
103                                 fn--;
104                         }
105                 }
106                 return(fn);
107         }
108
109         /* n separators */
110         fn = 0;
111         for (;;) {
112                 if (fn < nfields)
113                         *fp++ = p;
114                 fn++;
115                 for (;;) {
116                         c = *p++;
117                         if (c == '\0')
118                                 return(fn);
119                         sepp = sep;
120                         while ((sepc = *sepp++) != '\0' && sepc != c)
121                                 continue;
122                         if (sepc != '\0')       /* it was a separator */
123                                 break;
124                 }
125                 if (fn < nfields)
126                         *(p-1) = '\0';
127                 for (;;) {
128                         c = *p++;
129                         sepp = sep;
130                         while ((sepc = *sepp++) != '\0' && sepc != c)
131                                 continue;
132                         if (sepc == '\0')       /* it wasn't a separator */
133                                 break;
134                 }
135                 p--;
136         }
137
138         /* not reached */
139 }
140
141 #ifdef TEST_SPLIT
142
143
144 /*
145  * test program
146  * pgm          runs regression
147  * pgm sep      splits stdin lines by sep
148  * pgm str sep  splits str by sep
149  * pgm str sep n        splits str by sep n times
150  */
151 int
152 main(
153     int         argc,
154     char *      argv[])
155 {
156         char buf[512];
157         register int n;
158 #       define  MNF     10
159         char *fields[MNF];
160
161         if (argc > 4)
162                 for (n = atoi(argv[3]); n > 0; n--) {
163                         strncpy(buf, argv[1], SIZEOF(buf)-1);
164                         buf[SIZEOF(buf)-1] = '\0';
165                 }
166         else if (argc > 3)
167                 for (n = atoi(argv[3]); n > 0; n--) {
168                         strncpy(buf, argv[1], SIZEOF(buf)-1);
169                         buf[SIZEOF(buf)-1] = '\0';
170                         (void) split(buf, fields, MNF, argv[2]);
171                 }
172         else if (argc > 2)
173                 dosplit(argv[1], argv[2]);
174         else if (argc > 1)
175                 while (fgets(buf, (int)sizeof(buf), stdin) != NULL) {
176                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
177                         dosplit(buf, argv[1]);
178                 }
179         else
180                 regress();
181
182         exit(0);
183 }
184
185 int
186 dosplit(
187     char *      string,
188     char *      seps)
189 {
190 #       define  NF      5
191         char *fields[NF];
192         register int nf;
193
194         nf = split(string, fields, NF, seps);
195         print(nf, NF, fields);
196 }
197
198 int
199 print(
200     int nf,
201     int nfp,
202     char *fields[])
203 {
204         register int fn;
205         register int bound;
206
207         bound = (nf > nfp) ? nfp : nf;
208         printf("%d:\t", nf);
209         for (fn = 0; fn < bound; fn++)
210                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
211 }
212
213 #define RNF     5               /* some table entries know this */
214 struct {
215         char *str;
216         char *seps;
217         int nf;
218         char *fi[RNF];
219 } tests[] = {
220         "",             " ",    0,      { "" },
221         " ",            " ",    2,      { "", "" },
222         "x",            " ",    1,      { "x" },
223         "xy",           " ",    1,      { "xy" },
224         "x y",          " ",    2,      { "x", "y" },
225         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
226         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
227         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
228         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
229
230         "",             " _",   0,      { "" },
231         " ",            " _",   2,      { "", "" },
232         "x",            " _",   1,      { "x" },
233         "x y",          " _",   2,      { "x", "y" },
234         "ab _ cd",      " _",   2,      { "ab", "cd" },
235         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
236         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
237         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
238
239         "",             " _~",  0,      { "" },
240         " ",            " _~",  2,      { "", "" },
241         "x",            " _~",  1,      { "x" },
242         "x y",          " _~",  2,      { "x", "y" },
243         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
244         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
245         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
246         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
247
248         "",             " _~-", 0,      { "" },
249         " ",            " _~-", 2,      { "", "" },
250         "x",            " _~-", 1,      { "x" },
251         "x y",          " _~-", 2,      { "x", "y" },
252         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
253         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
254         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
255         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
256
257         "",             "  ",   0,      { "" },
258         " ",            "  ",   2,      { "", "" },
259         "x",            "  ",   1,      { "x" },
260         "xy",           "  ",   1,      { "xy" },
261         "x y",          "  ",   2,      { "x", "y" },
262         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
263         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
264         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
265         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
266
267         "",             "",     0,      { "" },
268         " ",            "",     0,      { "" },
269         "x",            "",     1,      { "x" },
270         "xy",           "",     1,      { "xy" },
271         "x y",          "",     2,      { "x", "y" },
272         "abc def  g ",  "",     3,      { "abc", "def", "g" },
273         "\t a bcd",     "",     2,      { "a", "bcd" },
274         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
275         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
276         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
277         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
278
279         NULL,           NULL,   0,      { NULL },
280 };
281
282 int
283 regress(void)
284 {
285         char buf[512];
286         register int n;
287         char *fields[RNF+1];
288         register int nf;
289         register int i;
290         register int printit;
291         register char *f;
292
293         for (n = 0; tests[n].str != NULL; n++) {
294                 strncpy(buf, tests[n].str, SIZEOF(buf)-1);
295                 buf[SIZEOF(buf)-1] = '\0';
296                 fields[RNF] = NULL;
297                 nf = split(buf, fields, RNF, tests[n].seps);
298                 printit = 0;
299                 if (nf != tests[n].nf) {
300                         printf("split `%s' by `%s' gave %d fields, not %d\n",
301                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
302                         printit = 1;
303                 } else if (fields[RNF] != NULL) {
304                         printf("split() went beyond array end\n");
305                         printit = 1;
306                 } else {
307                         for (i = 0; i < nf && i < RNF; i++) {
308                                 f = fields[i];
309                                 if (f == NULL)
310                                         f = "(NULL)";
311                                 if (strcmp(f, tests[n].fi[i]) != 0) {
312                                         printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
313                                                 tests[n].str, tests[n].seps,
314                                                 i, fields[i], tests[n].fi[i]);
315                                         printit = 1;
316                                 }
317                         }
318                 }
319                 if (printit)
320                         print(nf, RNF, fields);
321         }
322 }
323 #endif