Imported Upstream version 2.4.4p3
[debian/amanda] / regex-src / split.c
1 #include <stdio.h>
2 #include <string.h>
3
4 /*
5  - split - divide a string into fields, like awk split()
6  = int split(char *string, char *fields[], int nfields, char *sep);
7  */
8 int                             /* number of fields, including overflow */
9 split(string, fields, nfields, sep)
10 char *string;
11 char *fields[];                 /* list is not NULL-terminated */
12 int nfields;                    /* number of entries available in fields[] */
13 char *sep;                      /* "" white, "c" single char, "ab" [ab]+ */
14 {
15         register char *p = string;
16         register char c;                        /* latest character */
17         register char sepc = sep[0];
18         register char sepc2;
19         register int fn;
20         register char **fp = fields;
21         register char *sepp;
22         register int trimtrail;
23
24         /* white space */
25         if (sepc == '\0') {
26                 while ((c = *p++) == ' ' || c == '\t')
27                         continue;
28                 p--;
29                 trimtrail = 1;
30                 sep = " \t";    /* note, code below knows this is 2 long */
31                 sepc = ' ';
32         } else
33                 trimtrail = 0;
34         sepc2 = sep[1];         /* now we can safely pick this up */
35
36         /* catch empties */
37         if (*p == '\0')
38                 return(0);
39
40         /* single separator */
41         if (sepc2 == '\0') {
42                 fn = nfields;
43                 for (;;) {
44                         *fp++ = p;
45                         fn--;
46                         if (fn == 0)
47                                 break;
48                         while ((c = *p++) != sepc)
49                                 if (c == '\0')
50                                         return(nfields - fn);
51                         *(p-1) = '\0';
52                 }
53                 /* we have overflowed the fields vector -- just count them */
54                 fn = nfields;
55                 for (;;) {
56                         while ((c = *p++) != sepc)
57                                 if (c == '\0')
58                                         return(fn);
59                         fn++;
60                 }
61                 /* not reached */
62         }
63
64         /* two separators */
65         if (sep[2] == '\0') {
66                 fn = nfields;
67                 for (;;) {
68                         *fp++ = p;
69                         fn--;
70                         while ((c = *p++) != sepc && c != sepc2)
71                                 if (c == '\0') {
72                                         if (trimtrail && **(fp-1) == '\0')
73                                                 fn++;
74                                         return(nfields - fn);
75                                 }
76                         if (fn == 0)
77                                 break;
78                         *(p-1) = '\0';
79                         while ((c = *p++) == sepc || c == sepc2)
80                                 continue;
81                         p--;
82                 }
83                 /* we have overflowed the fields vector -- just count them */
84                 fn = nfields;
85                 while (c != '\0') {
86                         while ((c = *p++) == sepc || c == sepc2)
87                                 continue;
88                         p--;
89                         fn++;
90                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
91                                 continue;
92                 }
93                 /* might have to trim trailing white space */
94                 if (trimtrail) {
95                         p--;
96                         while ((c = *--p) == sepc || c == sepc2)
97                                 continue;
98                         p++;
99                         if (*p != '\0') {
100                                 if (fn == nfields+1)
101                                         *p = '\0';
102                                 fn--;
103                         }
104                 }
105                 return(fn);
106         }
107
108         /* n separators */
109         fn = 0;
110         for (;;) {
111                 if (fn < nfields)
112                         *fp++ = p;
113                 fn++;
114                 for (;;) {
115                         c = *p++;
116                         if (c == '\0')
117                                 return(fn);
118                         sepp = sep;
119                         while ((sepc = *sepp++) != '\0' && sepc != c)
120                                 continue;
121                         if (sepc != '\0')       /* it was a separator */
122                                 break;
123                 }
124                 if (fn < nfields)
125                         *(p-1) = '\0';
126                 for (;;) {
127                         c = *p++;
128                         sepp = sep;
129                         while ((sepc = *sepp++) != '\0' && sepc != c)
130                                 continue;
131                         if (sepc == '\0')       /* it wasn't a separator */
132                                 break;
133                 }
134                 p--;
135         }
136
137         /* not reached */
138 }
139
140 #ifdef TEST_SPLIT
141
142
143 /*
144  * test program
145  * pgm          runs regression
146  * pgm sep      splits stdin lines by sep
147  * pgm str sep  splits str by sep
148  * pgm str sep n        splits str by sep n times
149  */
150 int
151 main(argc, argv)
152 int argc;
153 char *argv[];
154 {
155         char buf[512];
156         register int n;
157 #       define  MNF     10
158         char *fields[MNF];
159
160         if (argc > 4)
161                 for (n = atoi(argv[3]); n > 0; n--) {
162                         strncpy(buf, argv[1], sizeof(buf)-1);
163                         buf[sizeof(buf)-1] = '\0';
164                 }
165         else if (argc > 3)
166                 for (n = atoi(argv[3]); n > 0; n--) {
167                         strncpy(buf, argv[1], sizeof(buf)-1);
168                         buf[sizeof(buf)-1] = '\0';
169                         (void) split(buf, fields, MNF, argv[2]);
170                 }
171         else if (argc > 2)
172                 dosplit(argv[1], argv[2]);
173         else if (argc > 1)
174                 while (fgets(buf, sizeof(buf), stdin) != NULL) {
175                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
176                         dosplit(buf, argv[1]);
177                 }
178         else
179                 regress();
180
181         exit(0);
182 }
183
184 dosplit(string, seps)
185 char *string;
186 char *seps;
187 {
188 #       define  NF      5
189         char *fields[NF];
190         register int nf;
191
192         nf = split(string, fields, NF, seps);
193         print(nf, NF, fields);
194 }
195
196 print(nf, nfp, fields)
197 int nf;
198 int nfp;
199 char *fields[];
200 {
201         register int fn;
202         register int bound;
203
204         bound = (nf > nfp) ? nfp : nf;
205         printf("%d:\t", nf);
206         for (fn = 0; fn < bound; fn++)
207                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
208 }
209
210 #define RNF     5               /* some table entries know this */
211 struct {
212         char *str;
213         char *seps;
214         int nf;
215         char *fi[RNF];
216 } tests[] = {
217         "",             " ",    0,      { "" },
218         " ",            " ",    2,      { "", "" },
219         "x",            " ",    1,      { "x" },
220         "xy",           " ",    1,      { "xy" },
221         "x y",          " ",    2,      { "x", "y" },
222         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
223         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
224         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
225         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
226
227         "",             " _",   0,      { "" },
228         " ",            " _",   2,      { "", "" },
229         "x",            " _",   1,      { "x" },
230         "x y",          " _",   2,      { "x", "y" },
231         "ab _ cd",      " _",   2,      { "ab", "cd" },
232         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
233         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
234         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
235
236         "",             " _~",  0,      { "" },
237         " ",            " _~",  2,      { "", "" },
238         "x",            " _~",  1,      { "x" },
239         "x y",          " _~",  2,      { "x", "y" },
240         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
241         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
242         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
243         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
244
245         "",             " _~-", 0,      { "" },
246         " ",            " _~-", 2,      { "", "" },
247         "x",            " _~-", 1,      { "x" },
248         "x y",          " _~-", 2,      { "x", "y" },
249         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
250         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
251         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
252         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
253
254         "",             "  ",   0,      { "" },
255         " ",            "  ",   2,      { "", "" },
256         "x",            "  ",   1,      { "x" },
257         "xy",           "  ",   1,      { "xy" },
258         "x y",          "  ",   2,      { "x", "y" },
259         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
260         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
261         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
262         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
263
264         "",             "",     0,      { "" },
265         " ",            "",     0,      { "" },
266         "x",            "",     1,      { "x" },
267         "xy",           "",     1,      { "xy" },
268         "x y",          "",     2,      { "x", "y" },
269         "abc def  g ",  "",     3,      { "abc", "def", "g" },
270         "\t a bcd",     "",     2,      { "a", "bcd" },
271         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
272         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
273         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
274         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
275
276         NULL,           NULL,   0,      { NULL },
277 };
278
279 regress()
280 {
281         char buf[512];
282         register int n;
283         char *fields[RNF+1];
284         register int nf;
285         register int i;
286         register int printit;
287         register char *f;
288
289         for (n = 0; tests[n].str != NULL; n++) {
290                 strncpy(buf, tests[n].str, sizeof(buf)-1);
291                 buf[sizeof(buf)-1] = '\0';
292                 fields[RNF] = NULL;
293                 nf = split(buf, fields, RNF, tests[n].seps);
294                 printit = 0;
295                 if (nf != tests[n].nf) {
296                         printf("split `%s' by `%s' gave %d fields, not %d\n",
297                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
298                         printit = 1;
299                 } else if (fields[RNF] != NULL) {
300                         printf("split() went beyond array end\n");
301                         printit = 1;
302                 } else {
303                         for (i = 0; i < nf && i < RNF; i++) {
304                                 f = fields[i];
305                                 if (f == NULL)
306                                         f = "(NULL)";
307                                 if (strcmp(f, tests[n].fi[i]) != 0) {
308                                         printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
309                                                 tests[n].str, tests[n].seps,
310                                                 i, fields[i], tests[n].fi[i]);
311                                         printit = 1;
312                                 }
313                         }
314                 }
315                 if (printit)
316                         print(nf, RNF, fields);
317         }
318 }
319 #endif