1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4 Copyright (C) 2002-2014 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public
10 License as published by the Free Software Foundation; either
11 version 3 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
22 static void re_string_construct_common (const char *str, Idx len,
24 RE_TRANSLATE_TYPE trans, bool icase,
25 const re_dfa_t *dfa) internal_function;
26 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
27 const re_node_set *nodes,
28 re_hashval_t hash) internal_function;
29 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
30 const re_node_set *nodes,
32 re_hashval_t hash) internal_function;
34 /* Functions for string operation. */
36 /* This function allocate the buffers. It is necessary to call
37 re_string_reconstruct before using the object. */
40 internal_function __attribute_warn_unused_result__
41 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
42 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
47 /* Ensure at least one character fits into the buffers. */
48 if (init_len < dfa->mb_cur_max)
49 init_len = dfa->mb_cur_max;
50 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
51 re_string_construct_common (str, len, pstr, trans, icase, dfa);
53 ret = re_string_realloc_buffers (pstr, init_buf_len);
54 if (BE (ret != REG_NOERROR, 0))
57 pstr->word_char = dfa->word_char;
58 pstr->word_ops_used = dfa->word_ops_used;
59 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
60 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
61 pstr->valid_raw_len = pstr->valid_len;
65 /* This function allocate the buffers, and initialize them. */
68 internal_function __attribute_warn_unused_result__
69 re_string_construct (re_string_t *pstr, const char *str, Idx len,
70 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
73 memset (pstr, '\0', sizeof (re_string_t));
74 re_string_construct_common (str, len, pstr, trans, icase, dfa);
78 ret = re_string_realloc_buffers (pstr, len + 1);
79 if (BE (ret != REG_NOERROR, 0))
82 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
87 if (dfa->mb_cur_max > 1)
91 ret = build_wcs_upper_buffer (pstr);
92 if (BE (ret != REG_NOERROR, 0))
94 if (pstr->valid_raw_len >= len)
96 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
98 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
99 if (BE (ret != REG_NOERROR, 0))
104 #endif /* RE_ENABLE_I18N */
105 build_upper_buffer (pstr);
109 #ifdef RE_ENABLE_I18N
110 if (dfa->mb_cur_max > 1)
111 build_wcs_buffer (pstr);
113 #endif /* RE_ENABLE_I18N */
116 re_string_translate_buffer (pstr);
119 pstr->valid_len = pstr->bufs_len;
120 pstr->valid_raw_len = pstr->bufs_len;
128 /* Helper functions for re_string_allocate, and re_string_construct. */
131 internal_function __attribute_warn_unused_result__
132 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
134 #ifdef RE_ENABLE_I18N
135 if (pstr->mb_cur_max > 1)
139 /* Avoid overflow in realloc. */
140 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
141 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
144 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
145 if (BE (new_wcs == NULL, 0))
148 if (pstr->offsets != NULL)
150 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
151 if (BE (new_offsets == NULL, 0))
153 pstr->offsets = new_offsets;
156 #endif /* RE_ENABLE_I18N */
157 if (pstr->mbs_allocated)
159 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
161 if (BE (new_mbs == NULL, 0))
165 pstr->bufs_len = new_buf_len;
172 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
173 RE_TRANSLATE_TYPE trans, bool icase,
176 pstr->raw_mbs = (const unsigned char *) str;
181 pstr->mbs_allocated = (trans != NULL || icase);
182 pstr->mb_cur_max = dfa->mb_cur_max;
183 pstr->is_utf8 = dfa->is_utf8;
184 pstr->map_notascii = dfa->map_notascii;
185 pstr->stop = pstr->len;
186 pstr->raw_stop = pstr->stop;
189 #ifdef RE_ENABLE_I18N
191 /* Build wide character buffer PSTR->WCS.
192 If the byte sequence of the string are:
193 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
194 Then wide character buffer will be:
195 <wc1> , WEOF , <wc2> , WEOF , <wc3>
196 We use WEOF for padding, they indicate that the position isn't
197 a first byte of a multibyte character.
199 Note that this function assumes PSTR->VALID_LEN elements are already
200 built and starts from PSTR->VALID_LEN. */
204 build_wcs_buffer (re_string_t *pstr)
207 unsigned char buf[MB_LEN_MAX];
208 assert (MB_LEN_MAX >= pstr->mb_cur_max);
210 unsigned char buf[64];
213 Idx byte_idx, end_idx, remain_len;
216 /* Build the buffers from pstr->valid_len to either pstr->len or
218 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
219 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
224 remain_len = end_idx - byte_idx;
225 prev_st = pstr->cur_state;
226 /* Apply the translation if we need. */
227 if (BE (pstr->trans != NULL, 0))
231 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
233 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
234 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
236 p = (const char *) buf;
239 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
240 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
241 if (BE (mbclen == (size_t) -1 || mbclen == 0
242 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
244 /* We treat these cases as a singlebyte character. */
246 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
247 if (BE (pstr->trans != NULL, 0))
248 wc = pstr->trans[wc];
249 pstr->cur_state = prev_st;
251 else if (BE (mbclen == (size_t) -2, 0))
253 /* The buffer doesn't have enough space, finish to build. */
254 pstr->cur_state = prev_st;
258 /* Write wide character and padding. */
259 pstr->wcs[byte_idx++] = wc;
260 /* Write paddings. */
261 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
262 pstr->wcs[byte_idx++] = WEOF;
264 pstr->valid_len = byte_idx;
265 pstr->valid_raw_len = byte_idx;
268 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
269 but for REG_ICASE. */
272 internal_function __attribute_warn_unused_result__
273 build_wcs_upper_buffer (re_string_t *pstr)
276 Idx src_idx, byte_idx, end_idx, remain_len;
279 char buf[MB_LEN_MAX];
280 assert (MB_LEN_MAX >= pstr->mb_cur_max);
285 byte_idx = pstr->valid_len;
286 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
288 /* The following optimization assumes that ASCII characters can be
289 mapped to wide characters with a simple cast. */
290 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
292 while (byte_idx < end_idx)
296 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
297 && mbsinit (&pstr->cur_state))
299 /* In case of a singlebyte character. */
301 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
302 /* The next step uses the assumption that wchar_t is encoded
303 ASCII-safe: all ASCII values can be converted like this. */
304 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
309 remain_len = end_idx - byte_idx;
310 prev_st = pstr->cur_state;
311 mbclen = __mbrtowc (&wc,
312 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
313 + byte_idx), remain_len, &pstr->cur_state);
314 if (BE (mbclen < (size_t) -2, 1))
316 wchar_t wcu = towupper (wc);
321 mbcdlen = wcrtomb (buf, wcu, &prev_st);
322 if (BE (mbclen == mbcdlen, 1))
323 memcpy (pstr->mbs + byte_idx, buf, mbclen);
331 memcpy (pstr->mbs + byte_idx,
332 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
333 pstr->wcs[byte_idx++] = wcu;
334 /* Write paddings. */
335 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
336 pstr->wcs[byte_idx++] = WEOF;
338 else if (mbclen == (size_t) -1 || mbclen == 0
339 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
341 /* It is an invalid character, an incomplete character
342 at the end of the string, or '\0'. Just use the byte. */
343 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
344 pstr->mbs[byte_idx] = ch;
345 /* And also cast it to wide char. */
346 pstr->wcs[byte_idx++] = (wchar_t) ch;
347 if (BE (mbclen == (size_t) -1, 0))
348 pstr->cur_state = prev_st;
352 /* The buffer doesn't have enough space, finish to build. */
353 pstr->cur_state = prev_st;
357 pstr->valid_len = byte_idx;
358 pstr->valid_raw_len = byte_idx;
362 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
367 remain_len = end_idx - byte_idx;
368 prev_st = pstr->cur_state;
369 if (BE (pstr->trans != NULL, 0))
373 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
375 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
376 buf[i] = pstr->trans[ch];
378 p = (const char *) buf;
381 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
382 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
383 if (BE (mbclen < (size_t) -2, 1))
385 wchar_t wcu = towupper (wc);
390 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
391 if (BE (mbclen == mbcdlen, 1))
392 memcpy (pstr->mbs + byte_idx, buf, mbclen);
393 else if (mbcdlen != (size_t) -1)
397 if (byte_idx + mbcdlen > pstr->bufs_len)
399 pstr->cur_state = prev_st;
403 if (pstr->offsets == NULL)
405 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
407 if (pstr->offsets == NULL)
410 if (!pstr->offsets_needed)
412 for (i = 0; i < (size_t) byte_idx; ++i)
413 pstr->offsets[i] = i;
414 pstr->offsets_needed = 1;
417 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418 pstr->wcs[byte_idx] = wcu;
419 pstr->offsets[byte_idx] = src_idx;
420 for (i = 1; i < mbcdlen; ++i)
422 pstr->offsets[byte_idx + i]
423 = src_idx + (i < mbclen ? i : mbclen - 1);
424 pstr->wcs[byte_idx + i] = WEOF;
426 pstr->len += mbcdlen - mbclen;
427 if (pstr->raw_stop > src_idx)
428 pstr->stop += mbcdlen - mbclen;
429 end_idx = (pstr->bufs_len > pstr->len)
430 ? pstr->len : pstr->bufs_len;
436 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 memcpy (pstr->mbs + byte_idx, p, mbclen);
441 if (BE (pstr->offsets_needed != 0, 0))
444 for (i = 0; i < mbclen; ++i)
445 pstr->offsets[byte_idx + i] = src_idx + i;
449 pstr->wcs[byte_idx++] = wcu;
450 /* Write paddings. */
451 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452 pstr->wcs[byte_idx++] = WEOF;
454 else if (mbclen == (size_t) -1 || mbclen == 0
455 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
457 /* It is an invalid character or '\0'. Just use the byte. */
458 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
460 if (BE (pstr->trans != NULL, 0))
461 ch = pstr->trans [ch];
462 pstr->mbs[byte_idx] = ch;
464 if (BE (pstr->offsets_needed != 0, 0))
465 pstr->offsets[byte_idx] = src_idx;
468 /* And also cast it to wide char. */
469 pstr->wcs[byte_idx++] = (wchar_t) ch;
470 if (BE (mbclen == (size_t) -1, 0))
471 pstr->cur_state = prev_st;
475 /* The buffer doesn't have enough space, finish to build. */
476 pstr->cur_state = prev_st;
480 pstr->valid_len = byte_idx;
481 pstr->valid_raw_len = src_idx;
485 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
490 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
497 /* Skip the characters which are not necessary to check. */
498 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
499 rawbuf_idx < new_raw_idx;)
502 Idx remain_len = pstr->raw_len - rawbuf_idx;
503 prev_st = pstr->cur_state;
504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
505 remain_len, &pstr->cur_state);
506 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
508 /* We treat these cases as a single byte character. */
509 if (mbclen == 0 || remain_len == 0)
512 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
514 pstr->cur_state = prev_st;
518 /* Then proceed the next character. */
519 rawbuf_idx += mbclen;
524 #endif /* RE_ENABLE_I18N */
526 /* Build the buffer PSTR->MBS, and apply the translation if we need.
527 This function is used in case of REG_ICASE. */
531 build_upper_buffer (re_string_t *pstr)
533 Idx char_idx, end_idx;
534 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
536 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
538 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
539 if (BE (pstr->trans != NULL, 0))
540 ch = pstr->trans[ch];
541 pstr->mbs[char_idx] = toupper (ch);
543 pstr->valid_len = char_idx;
544 pstr->valid_raw_len = char_idx;
547 /* Apply TRANS to the buffer in PSTR. */
551 re_string_translate_buffer (re_string_t *pstr)
553 Idx buf_idx, end_idx;
554 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
556 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
558 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
559 pstr->mbs[buf_idx] = pstr->trans[ch];
562 pstr->valid_len = buf_idx;
563 pstr->valid_raw_len = buf_idx;
566 /* This function re-construct the buffers.
567 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
568 convert to upper case in case of REG_ICASE, apply translation. */
571 internal_function __attribute_warn_unused_result__
572 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
576 if (BE (pstr->raw_mbs_idx <= idx, 0))
577 offset = idx - pstr->raw_mbs_idx;
581 #ifdef RE_ENABLE_I18N
582 if (pstr->mb_cur_max > 1)
583 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
584 #endif /* RE_ENABLE_I18N */
585 pstr->len = pstr->raw_len;
586 pstr->stop = pstr->raw_stop;
588 pstr->raw_mbs_idx = 0;
589 pstr->valid_raw_len = 0;
590 pstr->offsets_needed = 0;
591 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
592 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
593 if (!pstr->mbs_allocated)
594 pstr->mbs = (unsigned char *) pstr->raw_mbs;
598 if (BE (offset != 0, 1))
600 /* Should the already checked characters be kept? */
601 if (BE (offset < pstr->valid_raw_len, 1))
603 /* Yes, move them to the front of the buffer. */
604 #ifdef RE_ENABLE_I18N
605 if (BE (pstr->offsets_needed, 0))
607 Idx low = 0, high = pstr->valid_len, mid;
610 mid = (high + low) / 2;
611 if (pstr->offsets[mid] > offset)
613 else if (pstr->offsets[mid] < offset)
619 if (pstr->offsets[mid] < offset)
621 pstr->tip_context = re_string_context_at (pstr, mid - 1,
623 /* This can be quite complicated, so handle specially
624 only the common and easy case where the character with
625 different length representation of lower and upper
626 case is present at or after offset. */
627 if (pstr->valid_len > offset
628 && mid == offset && pstr->offsets[mid] == offset)
630 memmove (pstr->wcs, pstr->wcs + offset,
631 (pstr->valid_len - offset) * sizeof (wint_t));
632 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
633 pstr->valid_len -= offset;
634 pstr->valid_raw_len -= offset;
635 for (low = 0; low < pstr->valid_len; low++)
636 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
640 /* Otherwise, just find out how long the partial multibyte
641 character at offset is and fill it with WEOF/255. */
642 pstr->len = pstr->raw_len - idx + offset;
643 pstr->stop = pstr->raw_stop - idx + offset;
644 pstr->offsets_needed = 0;
645 while (mid > 0 && pstr->offsets[mid - 1] == offset)
647 while (mid < pstr->valid_len)
648 if (pstr->wcs[mid] != WEOF)
652 if (mid == pstr->valid_len)
656 pstr->valid_len = pstr->offsets[mid] - offset;
659 for (low = 0; low < pstr->valid_len; ++low)
660 pstr->wcs[low] = WEOF;
661 memset (pstr->mbs, 255, pstr->valid_len);
664 pstr->valid_raw_len = pstr->valid_len;
670 pstr->tip_context = re_string_context_at (pstr, offset - 1,
672 #ifdef RE_ENABLE_I18N
673 if (pstr->mb_cur_max > 1)
674 memmove (pstr->wcs, pstr->wcs + offset,
675 (pstr->valid_len - offset) * sizeof (wint_t));
676 #endif /* RE_ENABLE_I18N */
677 if (BE (pstr->mbs_allocated, 0))
678 memmove (pstr->mbs, pstr->mbs + offset,
679 pstr->valid_len - offset);
680 pstr->valid_len -= offset;
681 pstr->valid_raw_len -= offset;
683 assert (pstr->valid_len > 0);
689 #ifdef RE_ENABLE_I18N
690 /* No, skip all characters until IDX. */
691 Idx prev_valid_len = pstr->valid_len;
693 if (BE (pstr->offsets_needed, 0))
695 pstr->len = pstr->raw_len - idx + offset;
696 pstr->stop = pstr->raw_stop - idx + offset;
697 pstr->offsets_needed = 0;
701 #ifdef RE_ENABLE_I18N
702 if (pstr->mb_cur_max > 1)
709 const unsigned char *raw, *p, *end;
711 /* Special case UTF-8. Multi-byte chars start with any
712 byte other than 0x80 - 0xbf. */
713 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
714 end = raw + (offset - pstr->mb_cur_max);
715 if (end < pstr->raw_mbs)
717 p = raw + offset - 1;
719 /* We know the wchar_t encoding is UCS4, so for the simple
720 case, ASCII characters, skip the conversion step. */
721 if (isascii (*p) && BE (pstr->trans == NULL, 1))
723 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
724 /* pstr->valid_len = 0; */
729 for (; p >= end; --p)
730 if ((*p & 0xc0) != 0x80)
734 Idx mlen = raw + pstr->len - p;
735 unsigned char buf[6];
738 const unsigned char *pp = p;
739 if (BE (pstr->trans != NULL, 0))
741 int i = mlen < 6 ? mlen : 6;
743 buf[i] = pstr->trans[p[i]];
746 /* XXX Don't use mbrtowc, we know which conversion
747 to use (UTF-8 -> UCS4). */
748 memset (&cur_state, 0, sizeof (cur_state));
749 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
751 if (raw + offset - p <= mbclen
752 && mbclen < (size_t) -2)
754 memset (&pstr->cur_state, '\0',
756 pstr->valid_len = mbclen - (raw + offset - p);
764 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
767 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
769 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
770 && IS_WIDE_WORD_CHAR (wc))
772 : ((IS_WIDE_NEWLINE (wc)
773 && pstr->newline_anchor)
774 ? CONTEXT_NEWLINE : 0));
775 if (BE (pstr->valid_len, 0))
777 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
778 pstr->wcs[wcs_idx] = WEOF;
779 if (pstr->mbs_allocated)
780 memset (pstr->mbs, 255, pstr->valid_len);
782 pstr->valid_raw_len = pstr->valid_len;
785 #endif /* RE_ENABLE_I18N */
787 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
788 pstr->valid_raw_len = 0;
791 pstr->tip_context = (bitset_contain (pstr->word_char, c)
793 : ((IS_NEWLINE (c) && pstr->newline_anchor)
794 ? CONTEXT_NEWLINE : 0));
797 if (!BE (pstr->mbs_allocated, 0))
800 pstr->raw_mbs_idx = idx;
802 pstr->stop -= offset;
804 /* Then build the buffers. */
805 #ifdef RE_ENABLE_I18N
806 if (pstr->mb_cur_max > 1)
810 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
811 if (BE (ret != REG_NOERROR, 0))
815 build_wcs_buffer (pstr);
818 #endif /* RE_ENABLE_I18N */
819 if (BE (pstr->mbs_allocated, 0))
822 build_upper_buffer (pstr);
823 else if (pstr->trans != NULL)
824 re_string_translate_buffer (pstr);
827 pstr->valid_len = pstr->len;
834 internal_function __attribute__ ((pure))
835 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
840 /* Handle the common (easiest) cases first. */
841 if (BE (!pstr->mbs_allocated, 1))
842 return re_string_peek_byte (pstr, idx);
844 #ifdef RE_ENABLE_I18N
845 if (pstr->mb_cur_max > 1
846 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
847 return re_string_peek_byte (pstr, idx);
850 off = pstr->cur_idx + idx;
851 #ifdef RE_ENABLE_I18N
852 if (pstr->offsets_needed)
853 off = pstr->offsets[off];
856 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
858 #ifdef RE_ENABLE_I18N
859 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
860 this function returns CAPITAL LETTER I instead of first byte of
861 DOTLESS SMALL LETTER I. The latter would confuse the parser,
862 since peek_byte_case doesn't advance cur_idx in any way. */
863 if (pstr->offsets_needed && !isascii (ch))
864 return re_string_peek_byte (pstr, idx);
872 re_string_fetch_byte_case (re_string_t *pstr)
874 if (BE (!pstr->mbs_allocated, 1))
875 return re_string_fetch_byte (pstr);
877 #ifdef RE_ENABLE_I18N
878 if (pstr->offsets_needed)
883 /* For tr_TR.UTF-8 [[:islower:]] there is
884 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
885 in that case the whole multi-byte character and return
886 the original letter. On the other side, with
887 [[: DOTLESS SMALL LETTER I return [[:I, as doing
888 anything else would complicate things too much. */
890 if (!re_string_first_byte (pstr, pstr->cur_idx))
891 return re_string_fetch_byte (pstr);
893 off = pstr->offsets[pstr->cur_idx];
894 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
897 return re_string_fetch_byte (pstr);
899 re_string_skip_bytes (pstr,
900 re_string_char_size_at (pstr, pstr->cur_idx));
905 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
910 re_string_destruct (re_string_t *pstr)
912 #ifdef RE_ENABLE_I18N
914 re_free (pstr->offsets);
915 #endif /* RE_ENABLE_I18N */
916 if (pstr->mbs_allocated)
920 /* Return the context at IDX in INPUT. */
924 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
927 if (BE (! REG_VALID_INDEX (idx), 0))
928 /* In this case, we use the value stored in input->tip_context,
929 since we can't know the character in input->mbs[-1] here. */
930 return input->tip_context;
931 if (BE (idx == input->len, 0))
932 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
933 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
934 #ifdef RE_ENABLE_I18N
935 if (input->mb_cur_max > 1)
939 while(input->wcs[wc_idx] == WEOF)
942 /* It must not happen. */
943 assert (REG_VALID_INDEX (wc_idx));
946 if (! REG_VALID_INDEX (wc_idx))
947 return input->tip_context;
949 wc = input->wcs[wc_idx];
950 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
952 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
953 ? CONTEXT_NEWLINE : 0);
958 c = re_string_byte_at (input, idx);
959 if (bitset_contain (input->word_char, c))
961 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
965 /* Functions for set operation. */
968 internal_function __attribute_warn_unused_result__
969 re_node_set_alloc (re_node_set *set, Idx size)
973 set->elems = re_malloc (Idx, size);
974 if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
980 internal_function __attribute_warn_unused_result__
981 re_node_set_init_1 (re_node_set *set, Idx elem)
985 set->elems = re_malloc (Idx, 1);
986 if (BE (set->elems == NULL, 0))
988 set->alloc = set->nelem = 0;
991 set->elems[0] = elem;
996 internal_function __attribute_warn_unused_result__
997 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1000 set->elems = re_malloc (Idx, 2);
1001 if (BE (set->elems == NULL, 0))
1006 set->elems[0] = elem1;
1013 set->elems[0] = elem1;
1014 set->elems[1] = elem2;
1018 set->elems[0] = elem2;
1019 set->elems[1] = elem1;
1025 static reg_errcode_t
1026 internal_function __attribute_warn_unused_result__
1027 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1029 dest->nelem = src->nelem;
1032 dest->alloc = dest->nelem;
1033 dest->elems = re_malloc (Idx, dest->alloc);
1034 if (BE (dest->elems == NULL, 0))
1036 dest->alloc = dest->nelem = 0;
1039 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1042 re_node_set_init_empty (dest);
1046 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1047 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1048 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1050 static reg_errcode_t
1051 internal_function __attribute_warn_unused_result__
1052 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1053 const re_node_set *src2)
1055 Idx i1, i2, is, id, delta, sbase;
1056 if (src1->nelem == 0 || src2->nelem == 0)
1059 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1060 conservative estimate. */
1061 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1063 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1064 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1065 if (BE (new_elems == NULL, 0))
1067 dest->elems = new_elems;
1068 dest->alloc = new_alloc;
1071 /* Find the items in the intersection of SRC1 and SRC2, and copy
1072 into the top of DEST those that are not already in DEST itself. */
1073 sbase = dest->nelem + src1->nelem + src2->nelem;
1074 i1 = src1->nelem - 1;
1075 i2 = src2->nelem - 1;
1076 id = dest->nelem - 1;
1079 if (src1->elems[i1] == src2->elems[i2])
1081 /* Try to find the item in DEST. Maybe we could binary search? */
1082 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1085 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1086 dest->elems[--sbase] = src1->elems[i1];
1088 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1092 /* Lower the highest of the two items. */
1093 else if (src1->elems[i1] < src2->elems[i2])
1095 if (! REG_VALID_INDEX (--i2))
1100 if (! REG_VALID_INDEX (--i1))
1105 id = dest->nelem - 1;
1106 is = dest->nelem + src1->nelem + src2->nelem - 1;
1107 delta = is - sbase + 1;
1109 /* Now copy. When DELTA becomes zero, the remaining
1110 DEST elements are already in place; this is more or
1111 less the same loop that is in re_node_set_merge. */
1112 dest->nelem += delta;
1113 if (delta > 0 && REG_VALID_INDEX (id))
1116 if (dest->elems[is] > dest->elems[id])
1118 /* Copy from the top. */
1119 dest->elems[id + delta--] = dest->elems[is--];
1125 /* Slide from the bottom. */
1126 dest->elems[id + delta] = dest->elems[id];
1127 if (! REG_VALID_INDEX (--id))
1132 /* Copy remaining SRC elements. */
1133 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1138 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1139 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1141 static reg_errcode_t
1142 internal_function __attribute_warn_unused_result__
1143 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1144 const re_node_set *src2)
1147 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1149 dest->alloc = src1->nelem + src2->nelem;
1150 dest->elems = re_malloc (Idx, dest->alloc);
1151 if (BE (dest->elems == NULL, 0))
1156 if (src1 != NULL && src1->nelem > 0)
1157 return re_node_set_init_copy (dest, src1);
1158 else if (src2 != NULL && src2->nelem > 0)
1159 return re_node_set_init_copy (dest, src2);
1161 re_node_set_init_empty (dest);
1164 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1166 if (src1->elems[i1] > src2->elems[i2])
1168 dest->elems[id++] = src2->elems[i2++];
1171 if (src1->elems[i1] == src2->elems[i2])
1173 dest->elems[id++] = src1->elems[i1++];
1175 if (i1 < src1->nelem)
1177 memcpy (dest->elems + id, src1->elems + i1,
1178 (src1->nelem - i1) * sizeof (Idx));
1179 id += src1->nelem - i1;
1181 else if (i2 < src2->nelem)
1183 memcpy (dest->elems + id, src2->elems + i2,
1184 (src2->nelem - i2) * sizeof (Idx));
1185 id += src2->nelem - i2;
1191 /* Calculate the union set of the sets DEST and SRC. And store it to
1192 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1194 static reg_errcode_t
1195 internal_function __attribute_warn_unused_result__
1196 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1198 Idx is, id, sbase, delta;
1199 if (src == NULL || src->nelem == 0)
1201 if (dest->alloc < 2 * src->nelem + dest->nelem)
1203 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1204 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1205 if (BE (new_buffer == NULL, 0))
1207 dest->elems = new_buffer;
1208 dest->alloc = new_alloc;
1211 if (BE (dest->nelem == 0, 0))
1213 dest->nelem = src->nelem;
1214 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1218 /* Copy into the top of DEST the items of SRC that are not
1219 found in DEST. Maybe we could binary search in DEST? */
1220 for (sbase = dest->nelem + 2 * src->nelem,
1221 is = src->nelem - 1, id = dest->nelem - 1;
1222 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1224 if (dest->elems[id] == src->elems[is])
1226 else if (dest->elems[id] < src->elems[is])
1227 dest->elems[--sbase] = src->elems[is--];
1228 else /* if (dest->elems[id] > src->elems[is]) */
1232 if (REG_VALID_INDEX (is))
1234 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1236 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1239 id = dest->nelem - 1;
1240 is = dest->nelem + 2 * src->nelem - 1;
1241 delta = is - sbase + 1;
1245 /* Now copy. When DELTA becomes zero, the remaining
1246 DEST elements are already in place. */
1247 dest->nelem += delta;
1250 if (dest->elems[is] > dest->elems[id])
1252 /* Copy from the top. */
1253 dest->elems[id + delta--] = dest->elems[is--];
1259 /* Slide from the bottom. */
1260 dest->elems[id + delta] = dest->elems[id];
1261 if (! REG_VALID_INDEX (--id))
1263 /* Copy remaining SRC elements. */
1264 memcpy (dest->elems, dest->elems + sbase,
1265 delta * sizeof (Idx));
1274 /* Insert the new element ELEM to the re_node_set* SET.
1275 SET should not already have ELEM.
1276 Return true if successful. */
1279 internal_function __attribute_warn_unused_result__
1280 re_node_set_insert (re_node_set *set, Idx elem)
1283 /* In case the set is empty. */
1284 if (set->alloc == 0)
1285 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1287 if (BE (set->nelem, 0) == 0)
1289 /* We already guaranteed above that set->alloc != 0. */
1290 set->elems[0] = elem;
1295 /* Realloc if we need. */
1296 if (set->alloc == set->nelem)
1299 set->alloc = set->alloc * 2;
1300 new_elems = re_realloc (set->elems, Idx, set->alloc);
1301 if (BE (new_elems == NULL, 0))
1303 set->elems = new_elems;
1306 /* Move the elements which follows the new element. Test the
1307 first element separately to skip a check in the inner loop. */
1308 if (elem < set->elems[0])
1311 for (idx = set->nelem; idx > 0; idx--)
1312 set->elems[idx] = set->elems[idx - 1];
1316 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1317 set->elems[idx] = set->elems[idx - 1];
1320 /* Insert the new element. */
1321 set->elems[idx] = elem;
1326 /* Insert the new element ELEM to the re_node_set* SET.
1327 SET should not already have any element greater than or equal to ELEM.
1328 Return true if successful. */
1331 internal_function __attribute_warn_unused_result__
1332 re_node_set_insert_last (re_node_set *set, Idx elem)
1334 /* Realloc if we need. */
1335 if (set->alloc == set->nelem)
1338 set->alloc = (set->alloc + 1) * 2;
1339 new_elems = re_realloc (set->elems, Idx, set->alloc);
1340 if (BE (new_elems == NULL, 0))
1342 set->elems = new_elems;
1345 /* Insert the new element. */
1346 set->elems[set->nelem++] = elem;
1350 /* Compare two node sets SET1 and SET2.
1351 Return true if SET1 and SET2 are equivalent. */
1354 internal_function __attribute__ ((pure))
1355 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1358 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1360 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1361 if (set1->elems[i] != set2->elems[i])
1366 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1369 internal_function __attribute__ ((pure))
1370 re_node_set_contains (const re_node_set *set, Idx elem)
1372 __re_size_t idx, right, mid;
1373 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1376 /* Binary search the element. */
1378 right = set->nelem - 1;
1381 mid = (idx + right) / 2;
1382 if (set->elems[mid] < elem)
1387 return set->elems[idx] == elem ? idx + 1 : 0;
1392 re_node_set_remove_at (re_node_set *set, Idx idx)
1394 if (idx < 0 || idx >= set->nelem)
1397 for (; idx < set->nelem; idx++)
1398 set->elems[idx] = set->elems[idx + 1];
1402 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1403 Or return REG_MISSING if an error occurred. */
1407 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1409 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1411 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1412 Idx *new_nexts, *new_indices;
1413 re_node_set *new_edests, *new_eclosures;
1414 re_token_t *new_nodes;
1416 /* Avoid overflows in realloc. */
1417 const size_t max_object_size = MAX (sizeof (re_token_t),
1418 MAX (sizeof (re_node_set),
1420 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
1423 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1424 if (BE (new_nodes == NULL, 0))
1426 dfa->nodes = new_nodes;
1427 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1428 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1429 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1430 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1431 if (BE (new_nexts == NULL || new_indices == NULL
1432 || new_edests == NULL || new_eclosures == NULL, 0))
1434 dfa->nexts = new_nexts;
1435 dfa->org_indices = new_indices;
1436 dfa->edests = new_edests;
1437 dfa->eclosures = new_eclosures;
1438 dfa->nodes_alloc = new_nodes_alloc;
1440 dfa->nodes[dfa->nodes_len] = token;
1441 dfa->nodes[dfa->nodes_len].constraint = 0;
1442 #ifdef RE_ENABLE_I18N
1443 dfa->nodes[dfa->nodes_len].accept_mb =
1444 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1445 || token.type == COMPLEX_BRACKET);
1447 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1448 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1449 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1450 return dfa->nodes_len++;
1455 calc_state_hash (const re_node_set *nodes, unsigned int context)
1457 re_hashval_t hash = nodes->nelem + context;
1459 for (i = 0 ; i < nodes->nelem ; i++)
1460 hash += nodes->elems[i];
1464 /* Search for the state whose node_set is equivalent to NODES.
1465 Return the pointer to the state, if we found it in the DFA.
1466 Otherwise create the new one and return it. In case of an error
1467 return NULL and set the error code in ERR.
1468 Note: - We assume NULL as the invalid state, then it is possible that
1469 return value is NULL and ERR is REG_NOERROR.
1470 - We never return non-NULL value in case of any errors, it is for
1473 static re_dfastate_t *
1474 internal_function __attribute_warn_unused_result__
1475 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1476 const re_node_set *nodes)
1479 re_dfastate_t *new_state;
1480 struct re_state_table_entry *spot;
1483 /* Suppress bogus uninitialized-variable warnings. */
1486 if (BE (nodes->nelem == 0, 0))
1491 hash = calc_state_hash (nodes, 0);
1492 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1494 for (i = 0 ; i < spot->num ; i++)
1496 re_dfastate_t *state = spot->array[i];
1497 if (hash != state->hash)
1499 if (re_node_set_compare (&state->nodes, nodes))
1503 /* There are no appropriate state in the dfa, create the new one. */
1504 new_state = create_ci_newstate (dfa, nodes, hash);
1505 if (BE (new_state == NULL, 0))
1511 /* Search for the state whose node_set is equivalent to NODES and
1512 whose context is equivalent to CONTEXT.
1513 Return the pointer to the state, if we found it in the DFA.
1514 Otherwise create the new one and return it. In case of an error
1515 return NULL and set the error code in ERR.
1516 Note: - We assume NULL as the invalid state, then it is possible that
1517 return value is NULL and ERR is REG_NOERROR.
1518 - We never return non-NULL value in case of any errors, it is for
1521 static re_dfastate_t *
1522 internal_function __attribute_warn_unused_result__
1523 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1524 const re_node_set *nodes, unsigned int context)
1527 re_dfastate_t *new_state;
1528 struct re_state_table_entry *spot;
1531 /* Suppress bogus uninitialized-variable warnings. */
1534 if (nodes->nelem == 0)
1539 hash = calc_state_hash (nodes, context);
1540 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1542 for (i = 0 ; i < spot->num ; i++)
1544 re_dfastate_t *state = spot->array[i];
1545 if (state->hash == hash
1546 && state->context == context
1547 && re_node_set_compare (state->entrance_nodes, nodes))
1550 /* There are no appropriate state in 'dfa', create the new one. */
1551 new_state = create_cd_newstate (dfa, nodes, context, hash);
1552 if (BE (new_state == NULL, 0))
1558 /* Finish initialization of the new state NEWSTATE, and using its hash value
1559 HASH put in the appropriate bucket of DFA's state table. Return value
1560 indicates the error code if failed. */
1562 static reg_errcode_t
1563 __attribute_warn_unused_result__
1564 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1567 struct re_state_table_entry *spot;
1571 newstate->hash = hash;
1572 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1573 if (BE (err != REG_NOERROR, 0))
1575 for (i = 0; i < newstate->nodes.nelem; i++)
1577 Idx elem = newstate->nodes.elems[i];
1578 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1579 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1583 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1584 if (BE (spot->alloc <= spot->num, 0))
1586 Idx new_alloc = 2 * spot->num + 2;
1587 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1589 if (BE (new_array == NULL, 0))
1591 spot->array = new_array;
1592 spot->alloc = new_alloc;
1594 spot->array[spot->num++] = newstate;
1599 free_state (re_dfastate_t *state)
1601 re_node_set_free (&state->non_eps_nodes);
1602 re_node_set_free (&state->inveclosure);
1603 if (state->entrance_nodes != &state->nodes)
1605 re_node_set_free (state->entrance_nodes);
1606 re_free (state->entrance_nodes);
1608 re_node_set_free (&state->nodes);
1609 re_free (state->word_trtable);
1610 re_free (state->trtable);
1614 /* Create the new state which is independent of contexts.
1615 Return the new state if succeeded, otherwise return NULL. */
1617 static re_dfastate_t *
1618 internal_function __attribute_warn_unused_result__
1619 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1624 re_dfastate_t *newstate;
1626 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1627 if (BE (newstate == NULL, 0))
1629 err = re_node_set_init_copy (&newstate->nodes, nodes);
1630 if (BE (err != REG_NOERROR, 0))
1636 newstate->entrance_nodes = &newstate->nodes;
1637 for (i = 0 ; i < nodes->nelem ; i++)
1639 re_token_t *node = dfa->nodes + nodes->elems[i];
1640 re_token_type_t type = node->type;
1641 if (type == CHARACTER && !node->constraint)
1643 #ifdef RE_ENABLE_I18N
1644 newstate->accept_mb |= node->accept_mb;
1645 #endif /* RE_ENABLE_I18N */
1647 /* If the state has the halt node, the state is a halt state. */
1648 if (type == END_OF_RE)
1650 else if (type == OP_BACK_REF)
1651 newstate->has_backref = 1;
1652 else if (type == ANCHOR || node->constraint)
1653 newstate->has_constraint = 1;
1655 err = register_state (dfa, newstate, hash);
1656 if (BE (err != REG_NOERROR, 0))
1658 free_state (newstate);
1664 /* Create the new state which is depend on the context CONTEXT.
1665 Return the new state if succeeded, otherwise return NULL. */
1667 static re_dfastate_t *
1668 internal_function __attribute_warn_unused_result__
1669 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1670 unsigned int context, re_hashval_t hash)
1672 Idx i, nctx_nodes = 0;
1674 re_dfastate_t *newstate;
1676 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1677 if (BE (newstate == NULL, 0))
1679 err = re_node_set_init_copy (&newstate->nodes, nodes);
1680 if (BE (err != REG_NOERROR, 0))
1686 newstate->context = context;
1687 newstate->entrance_nodes = &newstate->nodes;
1689 for (i = 0 ; i < nodes->nelem ; i++)
1691 re_token_t *node = dfa->nodes + nodes->elems[i];
1692 re_token_type_t type = node->type;
1693 unsigned int constraint = node->constraint;
1695 if (type == CHARACTER && !constraint)
1697 #ifdef RE_ENABLE_I18N
1698 newstate->accept_mb |= node->accept_mb;
1699 #endif /* RE_ENABLE_I18N */
1701 /* If the state has the halt node, the state is a halt state. */
1702 if (type == END_OF_RE)
1704 else if (type == OP_BACK_REF)
1705 newstate->has_backref = 1;
1709 if (newstate->entrance_nodes == &newstate->nodes)
1711 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1712 if (BE (newstate->entrance_nodes == NULL, 0))
1714 free_state (newstate);
1717 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1721 newstate->has_constraint = 1;
1724 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1726 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1731 err = register_state (dfa, newstate, hash);
1732 if (BE (err != REG_NOERROR, 0))
1734 free_state (newstate);