1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public
8 License as published by the Free Software Foundation; either
9 version 3 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 static void re_string_construct_common (const char *str, Idx len,
22 RE_TRANSLATE_TYPE trans, bool icase,
23 const re_dfa_t *dfa) internal_function;
24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25 const re_node_set *nodes,
26 re_hashval_t hash) internal_function;
27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
30 re_hashval_t hash) internal_function;
32 /* Functions for string operation. */
34 /* This function allocate the buffers. It is necessary to call
35 re_string_reconstruct before using the object. */
38 internal_function __attribute_warn_unused_result__
39 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
40 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
45 /* Ensure at least one character fits into the buffers. */
46 if (init_len < dfa->mb_cur_max)
47 init_len = dfa->mb_cur_max;
48 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
49 re_string_construct_common (str, len, pstr, trans, icase, dfa);
51 ret = re_string_realloc_buffers (pstr, init_buf_len);
52 if (BE (ret != REG_NOERROR, 0))
55 pstr->word_char = dfa->word_char;
56 pstr->word_ops_used = dfa->word_ops_used;
57 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
58 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
59 pstr->valid_raw_len = pstr->valid_len;
63 /* This function allocate the buffers, and initialize them. */
66 internal_function __attribute_warn_unused_result__
67 re_string_construct (re_string_t *pstr, const char *str, Idx len,
68 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
71 memset (pstr, '\0', sizeof (re_string_t));
72 re_string_construct_common (str, len, pstr, trans, icase, dfa);
76 ret = re_string_realloc_buffers (pstr, len + 1);
77 if (BE (ret != REG_NOERROR, 0))
80 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
85 if (dfa->mb_cur_max > 1)
89 ret = build_wcs_upper_buffer (pstr);
90 if (BE (ret != REG_NOERROR, 0))
92 if (pstr->valid_raw_len >= len)
94 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
96 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
97 if (BE (ret != REG_NOERROR, 0))
102 #endif /* RE_ENABLE_I18N */
103 build_upper_buffer (pstr);
107 #ifdef RE_ENABLE_I18N
108 if (dfa->mb_cur_max > 1)
109 build_wcs_buffer (pstr);
111 #endif /* RE_ENABLE_I18N */
114 re_string_translate_buffer (pstr);
117 pstr->valid_len = pstr->bufs_len;
118 pstr->valid_raw_len = pstr->bufs_len;
126 /* Helper functions for re_string_allocate, and re_string_construct. */
129 internal_function __attribute_warn_unused_result__
130 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
132 #ifdef RE_ENABLE_I18N
133 if (pstr->mb_cur_max > 1)
137 /* Avoid overflow in realloc. */
138 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
139 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
142 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
143 if (BE (new_wcs == NULL, 0))
146 if (pstr->offsets != NULL)
148 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
149 if (BE (new_offsets == NULL, 0))
151 pstr->offsets = new_offsets;
154 #endif /* RE_ENABLE_I18N */
155 if (pstr->mbs_allocated)
157 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
159 if (BE (new_mbs == NULL, 0))
163 pstr->bufs_len = new_buf_len;
170 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
171 RE_TRANSLATE_TYPE trans, bool icase,
174 pstr->raw_mbs = (const unsigned char *) str;
179 pstr->mbs_allocated = (trans != NULL || icase);
180 pstr->mb_cur_max = dfa->mb_cur_max;
181 pstr->is_utf8 = dfa->is_utf8;
182 pstr->map_notascii = dfa->map_notascii;
183 pstr->stop = pstr->len;
184 pstr->raw_stop = pstr->stop;
187 #ifdef RE_ENABLE_I18N
189 /* Build wide character buffer PSTR->WCS.
190 If the byte sequence of the string are:
191 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
192 Then wide character buffer will be:
193 <wc1> , WEOF , <wc2> , WEOF , <wc3>
194 We use WEOF for padding, they indicate that the position isn't
195 a first byte of a multibyte character.
197 Note that this function assumes PSTR->VALID_LEN elements are already
198 built and starts from PSTR->VALID_LEN. */
202 build_wcs_buffer (re_string_t *pstr)
205 unsigned char buf[MB_LEN_MAX];
206 assert (MB_LEN_MAX >= pstr->mb_cur_max);
208 unsigned char buf[64];
211 Idx byte_idx, end_idx, remain_len;
214 /* Build the buffers from pstr->valid_len to either pstr->len or
216 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
217 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
222 remain_len = end_idx - byte_idx;
223 prev_st = pstr->cur_state;
224 /* Apply the translation if we need. */
225 if (BE (pstr->trans != NULL, 0))
229 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
231 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
232 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
234 p = (const char *) buf;
237 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
238 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
239 if (BE (mbclen == (size_t) -1 || mbclen == 0
240 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
242 /* We treat these cases as a singlebyte character. */
244 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
245 if (BE (pstr->trans != NULL, 0))
246 wc = pstr->trans[wc];
247 pstr->cur_state = prev_st;
249 else if (BE (mbclen == (size_t) -2, 0))
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr->cur_state = prev_st;
256 /* Write wide character and padding. */
257 pstr->wcs[byte_idx++] = wc;
258 /* Write paddings. */
259 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260 pstr->wcs[byte_idx++] = WEOF;
262 pstr->valid_len = byte_idx;
263 pstr->valid_raw_len = byte_idx;
266 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267 but for REG_ICASE. */
270 internal_function __attribute_warn_unused_result__
271 build_wcs_upper_buffer (re_string_t *pstr)
274 Idx src_idx, byte_idx, end_idx, remain_len;
277 char buf[MB_LEN_MAX];
278 assert (MB_LEN_MAX >= pstr->mb_cur_max);
283 byte_idx = pstr->valid_len;
284 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286 /* The following optimization assumes that ASCII characters can be
287 mapped to wide characters with a simple cast. */
288 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290 while (byte_idx < end_idx)
294 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295 && mbsinit (&pstr->cur_state))
297 /* In case of a singlebyte character. */
299 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300 /* The next step uses the assumption that wchar_t is encoded
301 ASCII-safe: all ASCII values can be converted like this. */
302 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
307 remain_len = end_idx - byte_idx;
308 prev_st = pstr->cur_state;
309 mbclen = __mbrtowc (&wc,
310 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311 + byte_idx), remain_len, &pstr->cur_state);
312 if (BE (mbclen < (size_t) -2, 1))
314 wchar_t wcu = towupper (wc);
319 mbcdlen = wcrtomb (buf, wcu, &prev_st);
320 if (BE (mbclen == mbcdlen, 1))
321 memcpy (pstr->mbs + byte_idx, buf, mbclen);
329 memcpy (pstr->mbs + byte_idx,
330 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
331 pstr->wcs[byte_idx++] = wcu;
332 /* Write paddings. */
333 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
334 pstr->wcs[byte_idx++] = WEOF;
336 else if (mbclen == (size_t) -1 || mbclen == 0
337 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
339 /* It is an invalid character, an incomplete character
340 at the end of the string, or '\0'. Just use the byte. */
341 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
342 pstr->mbs[byte_idx] = ch;
343 /* And also cast it to wide char. */
344 pstr->wcs[byte_idx++] = (wchar_t) ch;
345 if (BE (mbclen == (size_t) -1, 0))
346 pstr->cur_state = prev_st;
350 /* The buffer doesn't have enough space, finish to build. */
351 pstr->cur_state = prev_st;
355 pstr->valid_len = byte_idx;
356 pstr->valid_raw_len = byte_idx;
360 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
365 remain_len = end_idx - byte_idx;
366 prev_st = pstr->cur_state;
367 if (BE (pstr->trans != NULL, 0))
371 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
373 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
374 buf[i] = pstr->trans[ch];
376 p = (const char *) buf;
379 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
380 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
381 if (BE (mbclen < (size_t) -2, 1))
383 wchar_t wcu = towupper (wc);
388 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389 if (BE (mbclen == mbcdlen, 1))
390 memcpy (pstr->mbs + byte_idx, buf, mbclen);
391 else if (mbcdlen != (size_t) -1)
395 if (byte_idx + mbcdlen > pstr->bufs_len)
397 pstr->cur_state = prev_st;
401 if (pstr->offsets == NULL)
403 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
405 if (pstr->offsets == NULL)
408 if (!pstr->offsets_needed)
410 for (i = 0; i < (size_t) byte_idx; ++i)
411 pstr->offsets[i] = i;
412 pstr->offsets_needed = 1;
415 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416 pstr->wcs[byte_idx] = wcu;
417 pstr->offsets[byte_idx] = src_idx;
418 for (i = 1; i < mbcdlen; ++i)
420 pstr->offsets[byte_idx + i]
421 = src_idx + (i < mbclen ? i : mbclen - 1);
422 pstr->wcs[byte_idx + i] = WEOF;
424 pstr->len += mbcdlen - mbclen;
425 if (pstr->raw_stop > src_idx)
426 pstr->stop += mbcdlen - mbclen;
427 end_idx = (pstr->bufs_len > pstr->len)
428 ? pstr->len : pstr->bufs_len;
434 memcpy (pstr->mbs + byte_idx, p, mbclen);
437 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 if (BE (pstr->offsets_needed != 0, 0))
442 for (i = 0; i < mbclen; ++i)
443 pstr->offsets[byte_idx + i] = src_idx + i;
447 pstr->wcs[byte_idx++] = wcu;
448 /* Write paddings. */
449 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450 pstr->wcs[byte_idx++] = WEOF;
452 else if (mbclen == (size_t) -1 || mbclen == 0
453 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
455 /* It is an invalid character or '\0'. Just use the byte. */
456 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
458 if (BE (pstr->trans != NULL, 0))
459 ch = pstr->trans [ch];
460 pstr->mbs[byte_idx] = ch;
462 if (BE (pstr->offsets_needed != 0, 0))
463 pstr->offsets[byte_idx] = src_idx;
466 /* And also cast it to wide char. */
467 pstr->wcs[byte_idx++] = (wchar_t) ch;
468 if (BE (mbclen == (size_t) -1, 0))
469 pstr->cur_state = prev_st;
473 /* The buffer doesn't have enough space, finish to build. */
474 pstr->cur_state = prev_st;
478 pstr->valid_len = byte_idx;
479 pstr->valid_raw_len = src_idx;
483 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
488 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
495 /* Skip the characters which are not necessary to check. */
496 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
497 rawbuf_idx < new_raw_idx;)
500 Idx remain_len = pstr->raw_len - rawbuf_idx;
501 prev_st = pstr->cur_state;
502 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503 remain_len, &pstr->cur_state);
504 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
506 /* We treat these cases as a single byte character. */
507 if (mbclen == 0 || remain_len == 0)
510 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
512 pstr->cur_state = prev_st;
516 /* Then proceed the next character. */
517 rawbuf_idx += mbclen;
522 #endif /* RE_ENABLE_I18N */
524 /* Build the buffer PSTR->MBS, and apply the translation if we need.
525 This function is used in case of REG_ICASE. */
529 build_upper_buffer (re_string_t *pstr)
531 Idx char_idx, end_idx;
532 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
534 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
536 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537 if (BE (pstr->trans != NULL, 0))
538 ch = pstr->trans[ch];
539 pstr->mbs[char_idx] = toupper (ch);
541 pstr->valid_len = char_idx;
542 pstr->valid_raw_len = char_idx;
545 /* Apply TRANS to the buffer in PSTR. */
549 re_string_translate_buffer (re_string_t *pstr)
551 Idx buf_idx, end_idx;
552 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
554 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
556 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
557 pstr->mbs[buf_idx] = pstr->trans[ch];
560 pstr->valid_len = buf_idx;
561 pstr->valid_raw_len = buf_idx;
564 /* This function re-construct the buffers.
565 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
566 convert to upper case in case of REG_ICASE, apply translation. */
569 internal_function __attribute_warn_unused_result__
570 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
574 if (BE (pstr->raw_mbs_idx <= idx, 0))
575 offset = idx - pstr->raw_mbs_idx;
579 #ifdef RE_ENABLE_I18N
580 if (pstr->mb_cur_max > 1)
581 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
582 #endif /* RE_ENABLE_I18N */
583 pstr->len = pstr->raw_len;
584 pstr->stop = pstr->raw_stop;
586 pstr->raw_mbs_idx = 0;
587 pstr->valid_raw_len = 0;
588 pstr->offsets_needed = 0;
589 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
590 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
591 if (!pstr->mbs_allocated)
592 pstr->mbs = (unsigned char *) pstr->raw_mbs;
596 if (BE (offset != 0, 1))
598 /* Should the already checked characters be kept? */
599 if (BE (offset < pstr->valid_raw_len, 1))
601 /* Yes, move them to the front of the buffer. */
602 #ifdef RE_ENABLE_I18N
603 if (BE (pstr->offsets_needed, 0))
605 Idx low = 0, high = pstr->valid_len, mid;
608 mid = (high + low) / 2;
609 if (pstr->offsets[mid] > offset)
611 else if (pstr->offsets[mid] < offset)
617 if (pstr->offsets[mid] < offset)
619 pstr->tip_context = re_string_context_at (pstr, mid - 1,
621 /* This can be quite complicated, so handle specially
622 only the common and easy case where the character with
623 different length representation of lower and upper
624 case is present at or after offset. */
625 if (pstr->valid_len > offset
626 && mid == offset && pstr->offsets[mid] == offset)
628 memmove (pstr->wcs, pstr->wcs + offset,
629 (pstr->valid_len - offset) * sizeof (wint_t));
630 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
631 pstr->valid_len -= offset;
632 pstr->valid_raw_len -= offset;
633 for (low = 0; low < pstr->valid_len; low++)
634 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
638 /* Otherwise, just find out how long the partial multibyte
639 character at offset is and fill it with WEOF/255. */
640 pstr->len = pstr->raw_len - idx + offset;
641 pstr->stop = pstr->raw_stop - idx + offset;
642 pstr->offsets_needed = 0;
643 while (mid > 0 && pstr->offsets[mid - 1] == offset)
645 while (mid < pstr->valid_len)
646 if (pstr->wcs[mid] != WEOF)
650 if (mid == pstr->valid_len)
654 pstr->valid_len = pstr->offsets[mid] - offset;
657 for (low = 0; low < pstr->valid_len; ++low)
658 pstr->wcs[low] = WEOF;
659 memset (pstr->mbs, 255, pstr->valid_len);
662 pstr->valid_raw_len = pstr->valid_len;
668 pstr->tip_context = re_string_context_at (pstr, offset - 1,
670 #ifdef RE_ENABLE_I18N
671 if (pstr->mb_cur_max > 1)
672 memmove (pstr->wcs, pstr->wcs + offset,
673 (pstr->valid_len - offset) * sizeof (wint_t));
674 #endif /* RE_ENABLE_I18N */
675 if (BE (pstr->mbs_allocated, 0))
676 memmove (pstr->mbs, pstr->mbs + offset,
677 pstr->valid_len - offset);
678 pstr->valid_len -= offset;
679 pstr->valid_raw_len -= offset;
680 #if defined DEBUG && DEBUG
681 assert (pstr->valid_len > 0);
687 #ifdef RE_ENABLE_I18N
688 /* No, skip all characters until IDX. */
689 Idx prev_valid_len = pstr->valid_len;
691 if (BE (pstr->offsets_needed, 0))
693 pstr->len = pstr->raw_len - idx + offset;
694 pstr->stop = pstr->raw_stop - idx + offset;
695 pstr->offsets_needed = 0;
699 #ifdef RE_ENABLE_I18N
700 if (pstr->mb_cur_max > 1)
707 const unsigned char *raw, *p, *end;
709 /* Special case UTF-8. Multi-byte chars start with any
710 byte other than 0x80 - 0xbf. */
711 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
712 end = raw + (offset - pstr->mb_cur_max);
713 if (end < pstr->raw_mbs)
715 p = raw + offset - 1;
717 /* We know the wchar_t encoding is UCS4, so for the simple
718 case, ASCII characters, skip the conversion step. */
719 if (isascii (*p) && BE (pstr->trans == NULL, 1))
721 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
722 /* pstr->valid_len = 0; */
727 for (; p >= end; --p)
728 if ((*p & 0xc0) != 0x80)
732 Idx mlen = raw + pstr->len - p;
733 unsigned char buf[6];
736 const unsigned char *pp = p;
737 if (BE (pstr->trans != NULL, 0))
739 int i = mlen < 6 ? mlen : 6;
741 buf[i] = pstr->trans[p[i]];
744 /* XXX Don't use mbrtowc, we know which conversion
745 to use (UTF-8 -> UCS4). */
746 memset (&cur_state, 0, sizeof (cur_state));
747 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
749 if (raw + offset - p <= mbclen
750 && mbclen < (size_t) -2)
752 memset (&pstr->cur_state, '\0',
754 pstr->valid_len = mbclen - (raw + offset - p);
762 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
765 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
767 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
768 && IS_WIDE_WORD_CHAR (wc))
770 : ((IS_WIDE_NEWLINE (wc)
771 && pstr->newline_anchor)
772 ? CONTEXT_NEWLINE : 0));
773 if (BE (pstr->valid_len, 0))
775 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
776 pstr->wcs[wcs_idx] = WEOF;
777 if (pstr->mbs_allocated)
778 memset (pstr->mbs, 255, pstr->valid_len);
780 pstr->valid_raw_len = pstr->valid_len;
783 #endif /* RE_ENABLE_I18N */
785 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
786 pstr->valid_raw_len = 0;
789 pstr->tip_context = (bitset_contain (pstr->word_char, c)
791 : ((IS_NEWLINE (c) && pstr->newline_anchor)
792 ? CONTEXT_NEWLINE : 0));
795 if (!BE (pstr->mbs_allocated, 0))
798 pstr->raw_mbs_idx = idx;
800 pstr->stop -= offset;
802 /* Then build the buffers. */
803 #ifdef RE_ENABLE_I18N
804 if (pstr->mb_cur_max > 1)
808 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
809 if (BE (ret != REG_NOERROR, 0))
813 build_wcs_buffer (pstr);
816 #endif /* RE_ENABLE_I18N */
817 if (BE (pstr->mbs_allocated, 0))
820 build_upper_buffer (pstr);
821 else if (pstr->trans != NULL)
822 re_string_translate_buffer (pstr);
825 pstr->valid_len = pstr->len;
832 internal_function __attribute__ ((pure))
833 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
838 /* Handle the common (easiest) cases first. */
839 if (BE (!pstr->mbs_allocated, 1))
840 return re_string_peek_byte (pstr, idx);
842 #ifdef RE_ENABLE_I18N
843 if (pstr->mb_cur_max > 1
844 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
845 return re_string_peek_byte (pstr, idx);
848 off = pstr->cur_idx + idx;
849 #ifdef RE_ENABLE_I18N
850 if (pstr->offsets_needed)
851 off = pstr->offsets[off];
854 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
856 #ifdef RE_ENABLE_I18N
857 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
858 this function returns CAPITAL LETTER I instead of first byte of
859 DOTLESS SMALL LETTER I. The latter would confuse the parser,
860 since peek_byte_case doesn't advance cur_idx in any way. */
861 if (pstr->offsets_needed && !isascii (ch))
862 return re_string_peek_byte (pstr, idx);
870 re_string_fetch_byte_case (re_string_t *pstr)
872 if (BE (!pstr->mbs_allocated, 1))
873 return re_string_fetch_byte (pstr);
875 #ifdef RE_ENABLE_I18N
876 if (pstr->offsets_needed)
881 /* For tr_TR.UTF-8 [[:islower:]] there is
882 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
883 in that case the whole multi-byte character and return
884 the original letter. On the other side, with
885 [[: DOTLESS SMALL LETTER I return [[:I, as doing
886 anything else would complicate things too much. */
888 if (!re_string_first_byte (pstr, pstr->cur_idx))
889 return re_string_fetch_byte (pstr);
891 off = pstr->offsets[pstr->cur_idx];
892 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
895 return re_string_fetch_byte (pstr);
897 re_string_skip_bytes (pstr,
898 re_string_char_size_at (pstr, pstr->cur_idx));
903 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
908 re_string_destruct (re_string_t *pstr)
910 #ifdef RE_ENABLE_I18N
912 re_free (pstr->offsets);
913 #endif /* RE_ENABLE_I18N */
914 if (pstr->mbs_allocated)
918 /* Return the context at IDX in INPUT. */
922 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
925 if (BE (! REG_VALID_INDEX (idx), 0))
926 /* In this case, we use the value stored in input->tip_context,
927 since we can't know the character in input->mbs[-1] here. */
928 return input->tip_context;
929 if (BE (idx == input->len, 0))
930 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
931 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
932 #ifdef RE_ENABLE_I18N
933 if (input->mb_cur_max > 1)
937 while(input->wcs[wc_idx] == WEOF)
939 #if defined DEBUG && DEBUG
940 /* It must not happen. */
941 assert (REG_VALID_INDEX (wc_idx));
944 if (! REG_VALID_INDEX (wc_idx))
945 return input->tip_context;
947 wc = input->wcs[wc_idx];
948 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
950 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
951 ? CONTEXT_NEWLINE : 0);
956 c = re_string_byte_at (input, idx);
957 if (bitset_contain (input->word_char, c))
959 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
963 /* Functions for set operation. */
966 internal_function __attribute_warn_unused_result__
967 re_node_set_alloc (re_node_set *set, Idx size)
971 set->elems = re_malloc (Idx, size);
972 if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
978 internal_function __attribute_warn_unused_result__
979 re_node_set_init_1 (re_node_set *set, Idx elem)
983 set->elems = re_malloc (Idx, 1);
984 if (BE (set->elems == NULL, 0))
986 set->alloc = set->nelem = 0;
989 set->elems[0] = elem;
994 internal_function __attribute_warn_unused_result__
995 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
998 set->elems = re_malloc (Idx, 2);
999 if (BE (set->elems == NULL, 0))
1004 set->elems[0] = elem1;
1011 set->elems[0] = elem1;
1012 set->elems[1] = elem2;
1016 set->elems[0] = elem2;
1017 set->elems[1] = elem1;
1023 static reg_errcode_t
1024 internal_function __attribute_warn_unused_result__
1025 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1027 dest->nelem = src->nelem;
1030 dest->alloc = dest->nelem;
1031 dest->elems = re_malloc (Idx, dest->alloc);
1032 if (BE (dest->elems == NULL, 0))
1034 dest->alloc = dest->nelem = 0;
1037 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1040 re_node_set_init_empty (dest);
1044 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1045 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1046 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1048 static reg_errcode_t
1049 internal_function __attribute_warn_unused_result__
1050 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1051 const re_node_set *src2)
1053 Idx i1, i2, is, id, delta, sbase;
1054 if (src1->nelem == 0 || src2->nelem == 0)
1057 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1058 conservative estimate. */
1059 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1061 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1062 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1063 if (BE (new_elems == NULL, 0))
1065 dest->elems = new_elems;
1066 dest->alloc = new_alloc;
1069 /* Find the items in the intersection of SRC1 and SRC2, and copy
1070 into the top of DEST those that are not already in DEST itself. */
1071 sbase = dest->nelem + src1->nelem + src2->nelem;
1072 i1 = src1->nelem - 1;
1073 i2 = src2->nelem - 1;
1074 id = dest->nelem - 1;
1077 if (src1->elems[i1] == src2->elems[i2])
1079 /* Try to find the item in DEST. Maybe we could binary search? */
1080 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1083 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1084 dest->elems[--sbase] = src1->elems[i1];
1086 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1090 /* Lower the highest of the two items. */
1091 else if (src1->elems[i1] < src2->elems[i2])
1093 if (! REG_VALID_INDEX (--i2))
1098 if (! REG_VALID_INDEX (--i1))
1103 id = dest->nelem - 1;
1104 is = dest->nelem + src1->nelem + src2->nelem - 1;
1105 delta = is - sbase + 1;
1107 /* Now copy. When DELTA becomes zero, the remaining
1108 DEST elements are already in place; this is more or
1109 less the same loop that is in re_node_set_merge. */
1110 dest->nelem += delta;
1111 if (delta > 0 && REG_VALID_INDEX (id))
1114 if (dest->elems[is] > dest->elems[id])
1116 /* Copy from the top. */
1117 dest->elems[id + delta--] = dest->elems[is--];
1123 /* Slide from the bottom. */
1124 dest->elems[id + delta] = dest->elems[id];
1125 if (! REG_VALID_INDEX (--id))
1130 /* Copy remaining SRC elements. */
1131 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1136 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1137 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1139 static reg_errcode_t
1140 internal_function __attribute_warn_unused_result__
1141 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1142 const re_node_set *src2)
1145 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1147 dest->alloc = src1->nelem + src2->nelem;
1148 dest->elems = re_malloc (Idx, dest->alloc);
1149 if (BE (dest->elems == NULL, 0))
1154 if (src1 != NULL && src1->nelem > 0)
1155 return re_node_set_init_copy (dest, src1);
1156 else if (src2 != NULL && src2->nelem > 0)
1157 return re_node_set_init_copy (dest, src2);
1159 re_node_set_init_empty (dest);
1162 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1164 if (src1->elems[i1] > src2->elems[i2])
1166 dest->elems[id++] = src2->elems[i2++];
1169 if (src1->elems[i1] == src2->elems[i2])
1171 dest->elems[id++] = src1->elems[i1++];
1173 if (i1 < src1->nelem)
1175 memcpy (dest->elems + id, src1->elems + i1,
1176 (src1->nelem - i1) * sizeof (Idx));
1177 id += src1->nelem - i1;
1179 else if (i2 < src2->nelem)
1181 memcpy (dest->elems + id, src2->elems + i2,
1182 (src2->nelem - i2) * sizeof (Idx));
1183 id += src2->nelem - i2;
1189 /* Calculate the union set of the sets DEST and SRC. And store it to
1190 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1192 static reg_errcode_t
1193 internal_function __attribute_warn_unused_result__
1194 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1196 Idx is, id, sbase, delta;
1197 if (src == NULL || src->nelem == 0)
1199 if (dest->alloc < 2 * src->nelem + dest->nelem)
1201 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1202 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1203 if (BE (new_buffer == NULL, 0))
1205 dest->elems = new_buffer;
1206 dest->alloc = new_alloc;
1209 if (BE (dest->nelem == 0, 0))
1211 dest->nelem = src->nelem;
1212 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1216 /* Copy into the top of DEST the items of SRC that are not
1217 found in DEST. Maybe we could binary search in DEST? */
1218 for (sbase = dest->nelem + 2 * src->nelem,
1219 is = src->nelem - 1, id = dest->nelem - 1;
1220 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1222 if (dest->elems[id] == src->elems[is])
1224 else if (dest->elems[id] < src->elems[is])
1225 dest->elems[--sbase] = src->elems[is--];
1226 else /* if (dest->elems[id] > src->elems[is]) */
1230 if (REG_VALID_INDEX (is))
1232 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1234 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1237 id = dest->nelem - 1;
1238 is = dest->nelem + 2 * src->nelem - 1;
1239 delta = is - sbase + 1;
1243 /* Now copy. When DELTA becomes zero, the remaining
1244 DEST elements are already in place. */
1245 dest->nelem += delta;
1248 if (dest->elems[is] > dest->elems[id])
1250 /* Copy from the top. */
1251 dest->elems[id + delta--] = dest->elems[is--];
1257 /* Slide from the bottom. */
1258 dest->elems[id + delta] = dest->elems[id];
1259 if (! REG_VALID_INDEX (--id))
1261 /* Copy remaining SRC elements. */
1262 memcpy (dest->elems, dest->elems + sbase,
1263 delta * sizeof (Idx));
1272 /* Insert the new element ELEM to the re_node_set* SET.
1273 SET should not already have ELEM.
1274 Return true if successful. */
1277 internal_function __attribute_warn_unused_result__
1278 re_node_set_insert (re_node_set *set, Idx elem)
1281 /* In case the set is empty. */
1282 if (set->alloc == 0)
1283 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1285 if (BE (set->nelem, 0) == 0)
1287 /* We already guaranteed above that set->alloc != 0. */
1288 set->elems[0] = elem;
1293 /* Realloc if we need. */
1294 if (set->alloc == set->nelem)
1297 set->alloc = set->alloc * 2;
1298 new_elems = re_realloc (set->elems, Idx, set->alloc);
1299 if (BE (new_elems == NULL, 0))
1301 set->elems = new_elems;
1304 /* Move the elements which follows the new element. Test the
1305 first element separately to skip a check in the inner loop. */
1306 if (elem < set->elems[0])
1309 for (idx = set->nelem; idx > 0; idx--)
1310 set->elems[idx] = set->elems[idx - 1];
1314 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1315 set->elems[idx] = set->elems[idx - 1];
1318 /* Insert the new element. */
1319 set->elems[idx] = elem;
1324 /* Insert the new element ELEM to the re_node_set* SET.
1325 SET should not already have any element greater than or equal to ELEM.
1326 Return true if successful. */
1329 internal_function __attribute_warn_unused_result__
1330 re_node_set_insert_last (re_node_set *set, Idx elem)
1332 /* Realloc if we need. */
1333 if (set->alloc == set->nelem)
1336 set->alloc = (set->alloc + 1) * 2;
1337 new_elems = re_realloc (set->elems, Idx, set->alloc);
1338 if (BE (new_elems == NULL, 0))
1340 set->elems = new_elems;
1343 /* Insert the new element. */
1344 set->elems[set->nelem++] = elem;
1348 /* Compare two node sets SET1 and SET2.
1349 Return true if SET1 and SET2 are equivalent. */
1352 internal_function __attribute__ ((pure))
1353 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1356 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1358 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1359 if (set1->elems[i] != set2->elems[i])
1364 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1367 internal_function __attribute__ ((pure))
1368 re_node_set_contains (const re_node_set *set, Idx elem)
1370 __re_size_t idx, right, mid;
1371 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1374 /* Binary search the element. */
1376 right = set->nelem - 1;
1379 mid = (idx + right) / 2;
1380 if (set->elems[mid] < elem)
1385 return set->elems[idx] == elem ? idx + 1 : 0;
1390 re_node_set_remove_at (re_node_set *set, Idx idx)
1392 if (idx < 0 || idx >= set->nelem)
1395 for (; idx < set->nelem; idx++)
1396 set->elems[idx] = set->elems[idx + 1];
1400 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1401 Or return REG_MISSING if an error occurred. */
1405 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1407 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1409 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1410 Idx *new_nexts, *new_indices;
1411 re_node_set *new_edests, *new_eclosures;
1412 re_token_t *new_nodes;
1414 /* Avoid overflows in realloc. */
1415 const size_t max_object_size = MAX (sizeof (re_token_t),
1416 MAX (sizeof (re_node_set),
1418 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
1421 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1422 if (BE (new_nodes == NULL, 0))
1424 dfa->nodes = new_nodes;
1425 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1426 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1427 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1428 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1429 if (BE (new_nexts == NULL || new_indices == NULL
1430 || new_edests == NULL || new_eclosures == NULL, 0))
1432 dfa->nexts = new_nexts;
1433 dfa->org_indices = new_indices;
1434 dfa->edests = new_edests;
1435 dfa->eclosures = new_eclosures;
1436 dfa->nodes_alloc = new_nodes_alloc;
1438 dfa->nodes[dfa->nodes_len] = token;
1439 dfa->nodes[dfa->nodes_len].constraint = 0;
1440 #ifdef RE_ENABLE_I18N
1441 dfa->nodes[dfa->nodes_len].accept_mb =
1442 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1443 || token.type == COMPLEX_BRACKET);
1445 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1446 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1447 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1448 return dfa->nodes_len++;
1453 calc_state_hash (const re_node_set *nodes, unsigned int context)
1455 re_hashval_t hash = nodes->nelem + context;
1457 for (i = 0 ; i < nodes->nelem ; i++)
1458 hash += nodes->elems[i];
1462 /* Search for the state whose node_set is equivalent to NODES.
1463 Return the pointer to the state, if we found it in the DFA.
1464 Otherwise create the new one and return it. In case of an error
1465 return NULL and set the error code in ERR.
1466 Note: - We assume NULL as the invalid state, then it is possible that
1467 return value is NULL and ERR is REG_NOERROR.
1468 - We never return non-NULL value in case of any errors, it is for
1471 static re_dfastate_t *
1472 internal_function __attribute_warn_unused_result__
1473 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1474 const re_node_set *nodes)
1477 re_dfastate_t *new_state;
1478 struct re_state_table_entry *spot;
1481 /* Suppress bogus uninitialized-variable warnings. */
1484 if (BE (nodes->nelem == 0, 0))
1489 hash = calc_state_hash (nodes, 0);
1490 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1492 for (i = 0 ; i < spot->num ; i++)
1494 re_dfastate_t *state = spot->array[i];
1495 if (hash != state->hash)
1497 if (re_node_set_compare (&state->nodes, nodes))
1501 /* There are no appropriate state in the dfa, create the new one. */
1502 new_state = create_ci_newstate (dfa, nodes, hash);
1503 if (BE (new_state == NULL, 0))
1509 /* Search for the state whose node_set is equivalent to NODES and
1510 whose context is equivalent to CONTEXT.
1511 Return the pointer to the state, if we found it in the DFA.
1512 Otherwise create the new one and return it. In case of an error
1513 return NULL and set the error code in ERR.
1514 Note: - We assume NULL as the invalid state, then it is possible that
1515 return value is NULL and ERR is REG_NOERROR.
1516 - We never return non-NULL value in case of any errors, it is for
1519 static re_dfastate_t *
1520 internal_function __attribute_warn_unused_result__
1521 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1522 const re_node_set *nodes, unsigned int context)
1525 re_dfastate_t *new_state;
1526 struct re_state_table_entry *spot;
1529 /* Suppress bogus uninitialized-variable warnings. */
1532 if (nodes->nelem == 0)
1537 hash = calc_state_hash (nodes, context);
1538 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1540 for (i = 0 ; i < spot->num ; i++)
1542 re_dfastate_t *state = spot->array[i];
1543 if (state->hash == hash
1544 && state->context == context
1545 && re_node_set_compare (state->entrance_nodes, nodes))
1548 /* There are no appropriate state in 'dfa', create the new one. */
1549 new_state = create_cd_newstate (dfa, nodes, context, hash);
1550 if (BE (new_state == NULL, 0))
1556 /* Finish initialization of the new state NEWSTATE, and using its hash value
1557 HASH put in the appropriate bucket of DFA's state table. Return value
1558 indicates the error code if failed. */
1560 static reg_errcode_t
1561 __attribute_warn_unused_result__
1562 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1565 struct re_state_table_entry *spot;
1569 newstate->hash = hash;
1570 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1571 if (BE (err != REG_NOERROR, 0))
1573 for (i = 0; i < newstate->nodes.nelem; i++)
1575 Idx elem = newstate->nodes.elems[i];
1576 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1577 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1581 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1582 if (BE (spot->alloc <= spot->num, 0))
1584 Idx new_alloc = 2 * spot->num + 2;
1585 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1587 if (BE (new_array == NULL, 0))
1589 spot->array = new_array;
1590 spot->alloc = new_alloc;
1592 spot->array[spot->num++] = newstate;
1597 free_state (re_dfastate_t *state)
1599 re_node_set_free (&state->non_eps_nodes);
1600 re_node_set_free (&state->inveclosure);
1601 if (state->entrance_nodes != &state->nodes)
1603 re_node_set_free (state->entrance_nodes);
1604 re_free (state->entrance_nodes);
1606 re_node_set_free (&state->nodes);
1607 re_free (state->word_trtable);
1608 re_free (state->trtable);
1612 /* Create the new state which is independent of contexts.
1613 Return the new state if succeeded, otherwise return NULL. */
1615 static re_dfastate_t *
1616 internal_function __attribute_warn_unused_result__
1617 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1622 re_dfastate_t *newstate;
1624 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1625 if (BE (newstate == NULL, 0))
1627 err = re_node_set_init_copy (&newstate->nodes, nodes);
1628 if (BE (err != REG_NOERROR, 0))
1634 newstate->entrance_nodes = &newstate->nodes;
1635 for (i = 0 ; i < nodes->nelem ; i++)
1637 re_token_t *node = dfa->nodes + nodes->elems[i];
1638 re_token_type_t type = node->type;
1639 if (type == CHARACTER && !node->constraint)
1641 #ifdef RE_ENABLE_I18N
1642 newstate->accept_mb |= node->accept_mb;
1643 #endif /* RE_ENABLE_I18N */
1645 /* If the state has the halt node, the state is a halt state. */
1646 if (type == END_OF_RE)
1648 else if (type == OP_BACK_REF)
1649 newstate->has_backref = 1;
1650 else if (type == ANCHOR || node->constraint)
1651 newstate->has_constraint = 1;
1653 err = register_state (dfa, newstate, hash);
1654 if (BE (err != REG_NOERROR, 0))
1656 free_state (newstate);
1662 /* Create the new state which is depend on the context CONTEXT.
1663 Return the new state if succeeded, otherwise return NULL. */
1665 static re_dfastate_t *
1666 internal_function __attribute_warn_unused_result__
1667 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1668 unsigned int context, re_hashval_t hash)
1670 Idx i, nctx_nodes = 0;
1672 re_dfastate_t *newstate;
1674 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1675 if (BE (newstate == NULL, 0))
1677 err = re_node_set_init_copy (&newstate->nodes, nodes);
1678 if (BE (err != REG_NOERROR, 0))
1684 newstate->context = context;
1685 newstate->entrance_nodes = &newstate->nodes;
1687 for (i = 0 ; i < nodes->nelem ; i++)
1689 re_token_t *node = dfa->nodes + nodes->elems[i];
1690 re_token_type_t type = node->type;
1691 unsigned int constraint = node->constraint;
1693 if (type == CHARACTER && !constraint)
1695 #ifdef RE_ENABLE_I18N
1696 newstate->accept_mb |= node->accept_mb;
1697 #endif /* RE_ENABLE_I18N */
1699 /* If the state has the halt node, the state is a halt state. */
1700 if (type == END_OF_RE)
1702 else if (type == OP_BACK_REF)
1703 newstate->has_backref = 1;
1707 if (newstate->entrance_nodes == &newstate->nodes)
1709 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1710 if (BE (newstate->entrance_nodes == NULL, 0))
1712 free_state (newstate);
1715 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1719 newstate->has_constraint = 1;
1722 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1724 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1729 err = register_state (dfa, newstate, hash);
1730 if (BE (err != REG_NOERROR, 0))
1732 free_state (newstate);