1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4 Copyright (C) 2002-2011 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 static void re_string_construct_common (const char *str, Idx len,
24 RE_TRANSLATE_TYPE trans, bool icase,
25 const re_dfa_t *dfa) internal_function;
26 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
27 const re_node_set *nodes,
28 re_hashval_t hash) internal_function;
29 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
30 const re_node_set *nodes,
32 re_hashval_t hash) internal_function;
34 /* Functions for string operation. */
36 /* This function allocate the buffers. It is necessary to call
37 re_string_reconstruct before using the object. */
40 internal_function __attribute_warn_unused_result__
41 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
42 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
47 /* Ensure at least one character fits into the buffers. */
48 if (init_len < dfa->mb_cur_max)
49 init_len = dfa->mb_cur_max;
50 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
51 re_string_construct_common (str, len, pstr, trans, icase, dfa);
53 ret = re_string_realloc_buffers (pstr, init_buf_len);
54 if (BE (ret != REG_NOERROR, 0))
57 pstr->word_char = dfa->word_char;
58 pstr->word_ops_used = dfa->word_ops_used;
59 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
60 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
61 pstr->valid_raw_len = pstr->valid_len;
65 /* This function allocate the buffers, and initialize them. */
68 internal_function __attribute_warn_unused_result__
69 re_string_construct (re_string_t *pstr, const char *str, Idx len,
70 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
73 memset (pstr, '\0', sizeof (re_string_t));
74 re_string_construct_common (str, len, pstr, trans, icase, dfa);
78 ret = re_string_realloc_buffers (pstr, len + 1);
79 if (BE (ret != REG_NOERROR, 0))
82 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
87 if (dfa->mb_cur_max > 1)
91 ret = build_wcs_upper_buffer (pstr);
92 if (BE (ret != REG_NOERROR, 0))
94 if (pstr->valid_raw_len >= len)
96 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
98 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
99 if (BE (ret != REG_NOERROR, 0))
104 #endif /* RE_ENABLE_I18N */
105 build_upper_buffer (pstr);
109 #ifdef RE_ENABLE_I18N
110 if (dfa->mb_cur_max > 1)
111 build_wcs_buffer (pstr);
113 #endif /* RE_ENABLE_I18N */
116 re_string_translate_buffer (pstr);
119 pstr->valid_len = pstr->bufs_len;
120 pstr->valid_raw_len = pstr->bufs_len;
128 /* Helper functions for re_string_allocate, and re_string_construct. */
131 internal_function __attribute_warn_unused_result__
132 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
134 #ifdef RE_ENABLE_I18N
135 if (pstr->mb_cur_max > 1)
139 /* Avoid overflow. */
140 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
141 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
144 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
145 if (BE (new_wcs == NULL, 0))
148 if (pstr->offsets != NULL)
150 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
151 if (BE (new_offsets == NULL, 0))
153 pstr->offsets = new_offsets;
156 #endif /* RE_ENABLE_I18N */
157 if (pstr->mbs_allocated)
159 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
161 if (BE (new_mbs == NULL, 0))
165 pstr->bufs_len = new_buf_len;
172 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
173 RE_TRANSLATE_TYPE trans, bool icase,
176 pstr->raw_mbs = (const unsigned char *) str;
181 pstr->mbs_allocated = (trans != NULL || icase);
182 pstr->mb_cur_max = dfa->mb_cur_max;
183 pstr->is_utf8 = dfa->is_utf8;
184 pstr->map_notascii = dfa->map_notascii;
185 pstr->stop = pstr->len;
186 pstr->raw_stop = pstr->stop;
189 #ifdef RE_ENABLE_I18N
191 /* Build wide character buffer PSTR->WCS.
192 If the byte sequence of the string are:
193 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
194 Then wide character buffer will be:
195 <wc1> , WEOF , <wc2> , WEOF , <wc3>
196 We use WEOF for padding, they indicate that the position isn't
197 a first byte of a multibyte character.
199 Note that this function assumes PSTR->VALID_LEN elements are already
200 built and starts from PSTR->VALID_LEN. */
204 build_wcs_buffer (re_string_t *pstr)
207 unsigned char buf[MB_LEN_MAX];
208 assert (MB_LEN_MAX >= pstr->mb_cur_max);
210 unsigned char buf[64];
213 Idx byte_idx, end_idx, remain_len;
216 /* Build the buffers from pstr->valid_len to either pstr->len or
218 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
219 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
224 remain_len = end_idx - byte_idx;
225 prev_st = pstr->cur_state;
226 /* Apply the translation if we need. */
227 if (BE (pstr->trans != NULL, 0))
231 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
233 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
234 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
236 p = (const char *) buf;
239 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
240 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
241 if (BE (mbclen == (size_t) -2, 0))
243 /* The buffer doesn't have enough space, finish to build. */
244 pstr->cur_state = prev_st;
247 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
249 /* We treat these cases as a singlebyte character. */
251 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
252 if (BE (pstr->trans != NULL, 0))
253 wc = pstr->trans[wc];
254 pstr->cur_state = prev_st;
257 /* Write wide character and padding. */
258 pstr->wcs[byte_idx++] = wc;
259 /* Write paddings. */
260 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
261 pstr->wcs[byte_idx++] = WEOF;
263 pstr->valid_len = byte_idx;
264 pstr->valid_raw_len = byte_idx;
267 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
268 but for REG_ICASE. */
271 internal_function __attribute_warn_unused_result__
272 build_wcs_upper_buffer (re_string_t *pstr)
275 Idx src_idx, byte_idx, end_idx, remain_len;
278 char buf[MB_LEN_MAX];
279 assert (MB_LEN_MAX >= pstr->mb_cur_max);
284 byte_idx = pstr->valid_len;
285 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
287 /* The following optimization assumes that ASCII characters can be
288 mapped to wide characters with a simple cast. */
289 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
291 while (byte_idx < end_idx)
295 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
296 && mbsinit (&pstr->cur_state))
298 /* In case of a singlebyte character. */
300 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
301 /* The next step uses the assumption that wchar_t is encoded
302 ASCII-safe: all ASCII values can be converted like this. */
303 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
308 remain_len = end_idx - byte_idx;
309 prev_st = pstr->cur_state;
310 mbclen = __mbrtowc (&wc,
311 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
312 + byte_idx), remain_len, &pstr->cur_state);
313 if (BE (mbclen < (size_t) -2, 1))
321 mbcdlen = wcrtomb (buf, wcu, &prev_st);
322 if (BE (mbclen == mbcdlen, 1))
323 memcpy (pstr->mbs + byte_idx, buf, mbclen);
331 memcpy (pstr->mbs + byte_idx,
332 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
333 pstr->wcs[byte_idx++] = wcu;
334 /* Write paddings. */
335 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
336 pstr->wcs[byte_idx++] = WEOF;
338 else if (mbclen == (size_t) -1 || mbclen == 0)
340 /* It is an invalid character or '\0'. Just use the byte. */
341 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
342 pstr->mbs[byte_idx] = ch;
343 /* And also cast it to wide char. */
344 pstr->wcs[byte_idx++] = (wchar_t) ch;
345 if (BE (mbclen == (size_t) -1, 0))
346 pstr->cur_state = prev_st;
350 /* The buffer doesn't have enough space, finish to build. */
351 pstr->cur_state = prev_st;
355 pstr->valid_len = byte_idx;
356 pstr->valid_raw_len = byte_idx;
360 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
365 remain_len = end_idx - byte_idx;
366 prev_st = pstr->cur_state;
367 if (BE (pstr->trans != NULL, 0))
371 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
373 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
374 buf[i] = pstr->trans[ch];
376 p = (const char *) buf;
379 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
380 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
381 if (BE (mbclen < (size_t) -2, 1))
389 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
390 if (BE (mbclen == mbcdlen, 1))
391 memcpy (pstr->mbs + byte_idx, buf, mbclen);
392 else if (mbcdlen != (size_t) -1)
396 if (byte_idx + mbcdlen > pstr->bufs_len)
398 pstr->cur_state = prev_st;
402 if (pstr->offsets == NULL)
404 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
406 if (pstr->offsets == NULL)
409 if (!pstr->offsets_needed)
411 for (i = 0; i < (size_t) byte_idx; ++i)
412 pstr->offsets[i] = i;
413 pstr->offsets_needed = 1;
416 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
417 pstr->wcs[byte_idx] = wcu;
418 pstr->offsets[byte_idx] = src_idx;
419 for (i = 1; i < mbcdlen; ++i)
421 pstr->offsets[byte_idx + i]
422 = src_idx + (i < mbclen ? i : mbclen - 1);
423 pstr->wcs[byte_idx + i] = WEOF;
425 pstr->len += mbcdlen - mbclen;
426 if (pstr->raw_stop > src_idx)
427 pstr->stop += mbcdlen - mbclen;
428 end_idx = (pstr->bufs_len > pstr->len)
429 ? pstr->len : pstr->bufs_len;
435 memcpy (pstr->mbs + byte_idx, p, mbclen);
438 memcpy (pstr->mbs + byte_idx, p, mbclen);
440 if (BE (pstr->offsets_needed != 0, 0))
443 for (i = 0; i < mbclen; ++i)
444 pstr->offsets[byte_idx + i] = src_idx + i;
448 pstr->wcs[byte_idx++] = wcu;
449 /* Write paddings. */
450 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
451 pstr->wcs[byte_idx++] = WEOF;
453 else if (mbclen == (size_t) -1 || mbclen == 0)
455 /* It is an invalid character or '\0'. Just use the byte. */
456 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
458 if (BE (pstr->trans != NULL, 0))
459 ch = pstr->trans [ch];
460 pstr->mbs[byte_idx] = ch;
462 if (BE (pstr->offsets_needed != 0, 0))
463 pstr->offsets[byte_idx] = src_idx;
466 /* And also cast it to wide char. */
467 pstr->wcs[byte_idx++] = (wchar_t) ch;
468 if (BE (mbclen == (size_t) -1, 0))
469 pstr->cur_state = prev_st;
473 /* The buffer doesn't have enough space, finish to build. */
474 pstr->cur_state = prev_st;
478 pstr->valid_len = byte_idx;
479 pstr->valid_raw_len = src_idx;
483 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
488 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
495 /* Skip the characters which are not necessary to check. */
496 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
497 rawbuf_idx < new_raw_idx;)
501 remain_len = pstr->len - rawbuf_idx;
502 prev_st = pstr->cur_state;
503 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
504 remain_len, &pstr->cur_state);
505 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
507 /* We treat these cases as a single byte character. */
508 if (mbclen == 0 || remain_len == 0)
511 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
513 pstr->cur_state = prev_st;
517 /* Then proceed the next character. */
518 rawbuf_idx += mbclen;
523 #endif /* RE_ENABLE_I18N */
525 /* Build the buffer PSTR->MBS, and apply the translation if we need.
526 This function is used in case of REG_ICASE. */
530 build_upper_buffer (re_string_t *pstr)
532 Idx char_idx, end_idx;
533 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
535 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
537 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
538 if (BE (pstr->trans != NULL, 0))
539 ch = pstr->trans[ch];
541 pstr->mbs[char_idx] = toupper (ch);
543 pstr->mbs[char_idx] = ch;
545 pstr->valid_len = char_idx;
546 pstr->valid_raw_len = char_idx;
549 /* Apply TRANS to the buffer in PSTR. */
553 re_string_translate_buffer (re_string_t *pstr)
555 Idx buf_idx, end_idx;
556 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
558 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
560 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
561 pstr->mbs[buf_idx] = pstr->trans[ch];
564 pstr->valid_len = buf_idx;
565 pstr->valid_raw_len = buf_idx;
568 /* This function re-construct the buffers.
569 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
570 convert to upper case in case of REG_ICASE, apply translation. */
573 internal_function __attribute_warn_unused_result__
574 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
578 if (BE (pstr->raw_mbs_idx <= idx, 0))
579 offset = idx - pstr->raw_mbs_idx;
583 #ifdef RE_ENABLE_I18N
584 if (pstr->mb_cur_max > 1)
585 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
586 #endif /* RE_ENABLE_I18N */
587 pstr->len = pstr->raw_len;
588 pstr->stop = pstr->raw_stop;
590 pstr->raw_mbs_idx = 0;
591 pstr->valid_raw_len = 0;
592 pstr->offsets_needed = 0;
593 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
594 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
595 if (!pstr->mbs_allocated)
596 pstr->mbs = (unsigned char *) pstr->raw_mbs;
600 if (BE (offset != 0, 1))
602 /* Should the already checked characters be kept? */
603 if (BE (offset < pstr->valid_raw_len, 1))
605 /* Yes, move them to the front of the buffer. */
606 #ifdef RE_ENABLE_I18N
607 if (BE (pstr->offsets_needed, 0))
609 Idx low = 0, high = pstr->valid_len, mid;
612 mid = (high + low) / 2;
613 if (pstr->offsets[mid] > offset)
615 else if (pstr->offsets[mid] < offset)
621 if (pstr->offsets[mid] < offset)
623 pstr->tip_context = re_string_context_at (pstr, mid - 1,
625 /* This can be quite complicated, so handle specially
626 only the common and easy case where the character with
627 different length representation of lower and upper
628 case is present at or after offset. */
629 if (pstr->valid_len > offset
630 && mid == offset && pstr->offsets[mid] == offset)
632 memmove (pstr->wcs, pstr->wcs + offset,
633 (pstr->valid_len - offset) * sizeof (wint_t));
634 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
635 pstr->valid_len -= offset;
636 pstr->valid_raw_len -= offset;
637 for (low = 0; low < pstr->valid_len; low++)
638 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
642 /* Otherwise, just find out how long the partial multibyte
643 character at offset is and fill it with WEOF/255. */
644 pstr->len = pstr->raw_len - idx + offset;
645 pstr->stop = pstr->raw_stop - idx + offset;
646 pstr->offsets_needed = 0;
647 while (mid > 0 && pstr->offsets[mid - 1] == offset)
649 while (mid < pstr->valid_len)
650 if (pstr->wcs[mid] != WEOF)
654 if (mid == pstr->valid_len)
658 pstr->valid_len = pstr->offsets[mid] - offset;
661 for (low = 0; low < pstr->valid_len; ++low)
662 pstr->wcs[low] = WEOF;
663 memset (pstr->mbs, 255, pstr->valid_len);
666 pstr->valid_raw_len = pstr->valid_len;
672 pstr->tip_context = re_string_context_at (pstr, offset - 1,
674 #ifdef RE_ENABLE_I18N
675 if (pstr->mb_cur_max > 1)
676 memmove (pstr->wcs, pstr->wcs + offset,
677 (pstr->valid_len - offset) * sizeof (wint_t));
678 #endif /* RE_ENABLE_I18N */
679 if (BE (pstr->mbs_allocated, 0))
680 memmove (pstr->mbs, pstr->mbs + offset,
681 pstr->valid_len - offset);
682 pstr->valid_len -= offset;
683 pstr->valid_raw_len -= offset;
685 assert (pstr->valid_len > 0);
691 #ifdef RE_ENABLE_I18N
692 /* No, skip all characters until IDX. */
693 Idx prev_valid_len = pstr->valid_len;
695 if (BE (pstr->offsets_needed, 0))
697 pstr->len = pstr->raw_len - idx + offset;
698 pstr->stop = pstr->raw_stop - idx + offset;
699 pstr->offsets_needed = 0;
703 #ifdef RE_ENABLE_I18N
704 if (pstr->mb_cur_max > 1)
711 const unsigned char *raw, *p, *end;
713 /* Special case UTF-8. Multi-byte chars start with any
714 byte other than 0x80 - 0xbf. */
715 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
716 end = raw + (offset - pstr->mb_cur_max);
717 if (end < pstr->raw_mbs)
719 p = raw + offset - 1;
721 /* We know the wchar_t encoding is UCS4, so for the simple
722 case, ASCII characters, skip the conversion step. */
723 if (isascii (*p) && BE (pstr->trans == NULL, 1))
725 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
726 /* pstr->valid_len = 0; */
731 for (; p >= end; --p)
732 if ((*p & 0xc0) != 0x80)
736 Idx mlen = raw + pstr->len - p;
739 #if 0 /* dead code: buf is set but never used */
740 unsigned char buf[6];
741 if (BE (pstr->trans != NULL, 0))
743 int i = mlen < 6 ? mlen : 6;
745 buf[i] = pstr->trans[p[i]];
748 /* XXX Don't use mbrtowc, we know which conversion
749 to use (UTF-8 -> UCS4). */
750 memset (&cur_state, 0, sizeof (cur_state));
751 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
753 if (raw + offset - p <= mbclen
754 && mbclen < (size_t) -2)
756 memset (&pstr->cur_state, '\0',
758 pstr->valid_len = mbclen - (raw + offset - p);
766 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
769 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
771 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
772 && IS_WIDE_WORD_CHAR (wc))
774 : ((IS_WIDE_NEWLINE (wc)
775 && pstr->newline_anchor)
776 ? CONTEXT_NEWLINE : 0));
777 if (BE (pstr->valid_len, 0))
779 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
780 pstr->wcs[wcs_idx] = WEOF;
781 if (pstr->mbs_allocated)
782 memset (pstr->mbs, 255, pstr->valid_len);
784 pstr->valid_raw_len = pstr->valid_len;
787 #endif /* RE_ENABLE_I18N */
789 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
790 pstr->valid_raw_len = 0;
793 pstr->tip_context = (bitset_contain (pstr->word_char, c)
795 : ((IS_NEWLINE (c) && pstr->newline_anchor)
796 ? CONTEXT_NEWLINE : 0));
799 if (!BE (pstr->mbs_allocated, 0))
802 pstr->raw_mbs_idx = idx;
804 pstr->stop -= offset;
806 /* Then build the buffers. */
807 #ifdef RE_ENABLE_I18N
808 if (pstr->mb_cur_max > 1)
812 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
813 if (BE (ret != REG_NOERROR, 0))
817 build_wcs_buffer (pstr);
820 #endif /* RE_ENABLE_I18N */
821 if (BE (pstr->mbs_allocated, 0))
824 build_upper_buffer (pstr);
825 else if (pstr->trans != NULL)
826 re_string_translate_buffer (pstr);
829 pstr->valid_len = pstr->len;
836 internal_function __attribute ((pure))
837 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
842 /* Handle the common (easiest) cases first. */
843 if (BE (!pstr->mbs_allocated, 1))
844 return re_string_peek_byte (pstr, idx);
846 #ifdef RE_ENABLE_I18N
847 if (pstr->mb_cur_max > 1
848 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
849 return re_string_peek_byte (pstr, idx);
852 off = pstr->cur_idx + idx;
853 #ifdef RE_ENABLE_I18N
854 if (pstr->offsets_needed)
855 off = pstr->offsets[off];
858 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
860 #ifdef RE_ENABLE_I18N
861 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
862 this function returns CAPITAL LETTER I instead of first byte of
863 DOTLESS SMALL LETTER I. The latter would confuse the parser,
864 since peek_byte_case doesn't advance cur_idx in any way. */
865 if (pstr->offsets_needed && !isascii (ch))
866 return re_string_peek_byte (pstr, idx);
873 internal_function __attribute ((pure))
874 re_string_fetch_byte_case (re_string_t *pstr)
876 if (BE (!pstr->mbs_allocated, 1))
877 return re_string_fetch_byte (pstr);
879 #ifdef RE_ENABLE_I18N
880 if (pstr->offsets_needed)
885 /* For tr_TR.UTF-8 [[:islower:]] there is
886 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
887 in that case the whole multi-byte character and return
888 the original letter. On the other side, with
889 [[: DOTLESS SMALL LETTER I return [[:I, as doing
890 anything else would complicate things too much. */
892 if (!re_string_first_byte (pstr, pstr->cur_idx))
893 return re_string_fetch_byte (pstr);
895 off = pstr->offsets[pstr->cur_idx];
896 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
899 return re_string_fetch_byte (pstr);
901 re_string_skip_bytes (pstr,
902 re_string_char_size_at (pstr, pstr->cur_idx));
907 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
912 re_string_destruct (re_string_t *pstr)
914 #ifdef RE_ENABLE_I18N
916 re_free (pstr->offsets);
917 #endif /* RE_ENABLE_I18N */
918 if (pstr->mbs_allocated)
922 /* Return the context at IDX in INPUT. */
926 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
929 if (BE (! REG_VALID_INDEX (idx), 0))
930 /* In this case, we use the value stored in input->tip_context,
931 since we can't know the character in input->mbs[-1] here. */
932 return input->tip_context;
933 if (BE (idx == input->len, 0))
934 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
935 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
936 #ifdef RE_ENABLE_I18N
937 if (input->mb_cur_max > 1)
941 while(input->wcs[wc_idx] == WEOF)
944 /* It must not happen. */
945 assert (REG_VALID_INDEX (wc_idx));
948 if (! REG_VALID_INDEX (wc_idx))
949 return input->tip_context;
951 wc = input->wcs[wc_idx];
952 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
954 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
955 ? CONTEXT_NEWLINE : 0);
960 c = re_string_byte_at (input, idx);
961 if (bitset_contain (input->word_char, c))
963 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
967 /* Functions for set operation. */
970 internal_function __attribute_warn_unused_result__
971 re_node_set_alloc (re_node_set *set, Idx size)
975 set->elems = re_malloc (Idx, size);
976 if (BE (set->elems == NULL, 0))
982 internal_function __attribute_warn_unused_result__
983 re_node_set_init_1 (re_node_set *set, Idx elem)
987 set->elems = re_malloc (Idx, 1);
988 if (BE (set->elems == NULL, 0))
990 set->alloc = set->nelem = 0;
993 set->elems[0] = elem;
998 internal_function __attribute_warn_unused_result__
999 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1002 set->elems = re_malloc (Idx, 2);
1003 if (BE (set->elems == NULL, 0))
1008 set->elems[0] = elem1;
1015 set->elems[0] = elem1;
1016 set->elems[1] = elem2;
1020 set->elems[0] = elem2;
1021 set->elems[1] = elem1;
1027 static reg_errcode_t
1028 internal_function __attribute_warn_unused_result__
1029 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1031 dest->nelem = src->nelem;
1034 dest->alloc = dest->nelem;
1035 dest->elems = re_malloc (Idx, dest->alloc);
1036 if (BE (dest->elems == NULL, 0))
1038 dest->alloc = dest->nelem = 0;
1041 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1044 re_node_set_init_empty (dest);
1048 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1049 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1050 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1052 static reg_errcode_t
1053 internal_function __attribute_warn_unused_result__
1054 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1055 const re_node_set *src2)
1057 Idx i1, i2, is, id, delta, sbase;
1058 if (src1->nelem == 0 || src2->nelem == 0)
1061 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1062 conservative estimate. */
1063 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1065 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1066 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1067 if (BE (new_elems == NULL, 0))
1069 dest->elems = new_elems;
1070 dest->alloc = new_alloc;
1073 /* Find the items in the intersection of SRC1 and SRC2, and copy
1074 into the top of DEST those that are not already in DEST itself. */
1075 sbase = dest->nelem + src1->nelem + src2->nelem;
1076 i1 = src1->nelem - 1;
1077 i2 = src2->nelem - 1;
1078 id = dest->nelem - 1;
1081 if (src1->elems[i1] == src2->elems[i2])
1083 /* Try to find the item in DEST. Maybe we could binary search? */
1084 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1087 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1088 dest->elems[--sbase] = src1->elems[i1];
1090 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1094 /* Lower the highest of the two items. */
1095 else if (src1->elems[i1] < src2->elems[i2])
1097 if (! REG_VALID_INDEX (--i2))
1102 if (! REG_VALID_INDEX (--i1))
1107 id = dest->nelem - 1;
1108 is = dest->nelem + src1->nelem + src2->nelem - 1;
1109 delta = is - sbase + 1;
1111 /* Now copy. When DELTA becomes zero, the remaining
1112 DEST elements are already in place; this is more or
1113 less the same loop that is in re_node_set_merge. */
1114 dest->nelem += delta;
1115 if (delta > 0 && REG_VALID_INDEX (id))
1118 if (dest->elems[is] > dest->elems[id])
1120 /* Copy from the top. */
1121 dest->elems[id + delta--] = dest->elems[is--];
1127 /* Slide from the bottom. */
1128 dest->elems[id + delta] = dest->elems[id];
1129 if (! REG_VALID_INDEX (--id))
1134 /* Copy remaining SRC elements. */
1135 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1140 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1141 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1143 static reg_errcode_t
1144 internal_function __attribute_warn_unused_result__
1145 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1146 const re_node_set *src2)
1149 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1151 dest->alloc = src1->nelem + src2->nelem;
1152 dest->elems = re_malloc (Idx, dest->alloc);
1153 if (BE (dest->elems == NULL, 0))
1158 if (src1 != NULL && src1->nelem > 0)
1159 return re_node_set_init_copy (dest, src1);
1160 else if (src2 != NULL && src2->nelem > 0)
1161 return re_node_set_init_copy (dest, src2);
1163 re_node_set_init_empty (dest);
1166 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1168 if (src1->elems[i1] > src2->elems[i2])
1170 dest->elems[id++] = src2->elems[i2++];
1173 if (src1->elems[i1] == src2->elems[i2])
1175 dest->elems[id++] = src1->elems[i1++];
1177 if (i1 < src1->nelem)
1179 memcpy (dest->elems + id, src1->elems + i1,
1180 (src1->nelem - i1) * sizeof (Idx));
1181 id += src1->nelem - i1;
1183 else if (i2 < src2->nelem)
1185 memcpy (dest->elems + id, src2->elems + i2,
1186 (src2->nelem - i2) * sizeof (Idx));
1187 id += src2->nelem - i2;
1193 /* Calculate the union set of the sets DEST and SRC. And store it to
1194 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1196 static reg_errcode_t
1197 internal_function __attribute_warn_unused_result__
1198 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1200 Idx is, id, sbase, delta;
1201 if (src == NULL || src->nelem == 0)
1203 if (dest->alloc < 2 * src->nelem + dest->nelem)
1205 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1206 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1207 if (BE (new_buffer == NULL, 0))
1209 dest->elems = new_buffer;
1210 dest->alloc = new_alloc;
1213 if (BE (dest->nelem == 0, 0))
1215 dest->nelem = src->nelem;
1216 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1220 /* Copy into the top of DEST the items of SRC that are not
1221 found in DEST. Maybe we could binary search in DEST? */
1222 for (sbase = dest->nelem + 2 * src->nelem,
1223 is = src->nelem - 1, id = dest->nelem - 1;
1224 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1226 if (dest->elems[id] == src->elems[is])
1228 else if (dest->elems[id] < src->elems[is])
1229 dest->elems[--sbase] = src->elems[is--];
1230 else /* if (dest->elems[id] > src->elems[is]) */
1234 if (REG_VALID_INDEX (is))
1236 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1238 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1241 id = dest->nelem - 1;
1242 is = dest->nelem + 2 * src->nelem - 1;
1243 delta = is - sbase + 1;
1247 /* Now copy. When DELTA becomes zero, the remaining
1248 DEST elements are already in place. */
1249 dest->nelem += delta;
1252 if (dest->elems[is] > dest->elems[id])
1254 /* Copy from the top. */
1255 dest->elems[id + delta--] = dest->elems[is--];
1261 /* Slide from the bottom. */
1262 dest->elems[id + delta] = dest->elems[id];
1263 if (! REG_VALID_INDEX (--id))
1265 /* Copy remaining SRC elements. */
1266 memcpy (dest->elems, dest->elems + sbase,
1267 delta * sizeof (Idx));
1276 /* Insert the new element ELEM to the re_node_set* SET.
1277 SET should not already have ELEM.
1278 Return true if successful. */
1281 internal_function __attribute_warn_unused_result__
1282 re_node_set_insert (re_node_set *set, Idx elem)
1285 /* In case the set is empty. */
1286 if (set->alloc == 0)
1287 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1289 if (BE (set->nelem, 0) == 0)
1291 /* We already guaranteed above that set->alloc != 0. */
1292 set->elems[0] = elem;
1297 /* Realloc if we need. */
1298 if (set->alloc == set->nelem)
1301 set->alloc = set->alloc * 2;
1302 new_elems = re_realloc (set->elems, Idx, set->alloc);
1303 if (BE (new_elems == NULL, 0))
1305 set->elems = new_elems;
1308 /* Move the elements which follows the new element. Test the
1309 first element separately to skip a check in the inner loop. */
1310 if (elem < set->elems[0])
1313 for (idx = set->nelem; idx > 0; idx--)
1314 set->elems[idx] = set->elems[idx - 1];
1318 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1319 set->elems[idx] = set->elems[idx - 1];
1322 /* Insert the new element. */
1323 set->elems[idx] = elem;
1328 /* Insert the new element ELEM to the re_node_set* SET.
1329 SET should not already have any element greater than or equal to ELEM.
1330 Return true if successful. */
1333 internal_function __attribute_warn_unused_result__
1334 re_node_set_insert_last (re_node_set *set, Idx elem)
1336 /* Realloc if we need. */
1337 if (set->alloc == set->nelem)
1340 set->alloc = (set->alloc + 1) * 2;
1341 new_elems = re_realloc (set->elems, Idx, set->alloc);
1342 if (BE (new_elems == NULL, 0))
1344 set->elems = new_elems;
1347 /* Insert the new element. */
1348 set->elems[set->nelem++] = elem;
1352 /* Compare two node sets SET1 and SET2.
1353 Return true if SET1 and SET2 are equivalent. */
1356 internal_function __attribute ((pure))
1357 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1360 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1362 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1363 if (set1->elems[i] != set2->elems[i])
1368 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1371 internal_function __attribute ((pure))
1372 re_node_set_contains (const re_node_set *set, Idx elem)
1374 __re_size_t idx, right, mid;
1375 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1378 /* Binary search the element. */
1380 right = set->nelem - 1;
1383 mid = (idx + right) / 2;
1384 if (set->elems[mid] < elem)
1389 return set->elems[idx] == elem ? idx + 1 : 0;
1394 re_node_set_remove_at (re_node_set *set, Idx idx)
1396 if (idx < 0 || idx >= set->nelem)
1399 for (; idx < set->nelem; idx++)
1400 set->elems[idx] = set->elems[idx + 1];
1404 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1405 Or return REG_MISSING if an error occurred. */
1409 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1411 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1413 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1414 Idx *new_nexts, *new_indices;
1415 re_node_set *new_edests, *new_eclosures;
1416 re_token_t *new_nodes;
1417 size_t max_object_size =
1418 MAX (sizeof (re_token_t),
1419 MAX (sizeof (re_node_set),
1422 /* Avoid overflows. */
1423 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1426 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1427 if (BE (new_nodes == NULL, 0))
1429 dfa->nodes = new_nodes;
1430 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1431 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1432 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1433 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1434 if (BE (new_nexts == NULL || new_indices == NULL
1435 || new_edests == NULL || new_eclosures == NULL, 0))
1437 dfa->nexts = new_nexts;
1438 dfa->org_indices = new_indices;
1439 dfa->edests = new_edests;
1440 dfa->eclosures = new_eclosures;
1441 dfa->nodes_alloc = new_nodes_alloc;
1443 dfa->nodes[dfa->nodes_len] = token;
1444 dfa->nodes[dfa->nodes_len].constraint = 0;
1445 #ifdef RE_ENABLE_I18N
1447 int type = token.type;
1448 dfa->nodes[dfa->nodes_len].accept_mb =
1449 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1452 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1453 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1454 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1455 return dfa->nodes_len++;
1458 static inline re_hashval_t
1460 calc_state_hash (const re_node_set *nodes, unsigned int context)
1462 re_hashval_t hash = nodes->nelem + context;
1464 for (i = 0 ; i < nodes->nelem ; i++)
1465 hash += nodes->elems[i];
1469 /* Search for the state whose node_set is equivalent to NODES.
1470 Return the pointer to the state, if we found it in the DFA.
1471 Otherwise create the new one and return it. In case of an error
1472 return NULL and set the error code in ERR.
1473 Note: - We assume NULL as the invalid state, then it is possible that
1474 return value is NULL and ERR is REG_NOERROR.
1475 - We never return non-NULL value in case of any errors, it is for
1478 static re_dfastate_t *
1479 internal_function __attribute_warn_unused_result__
1480 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1481 const re_node_set *nodes)
1484 re_dfastate_t *new_state;
1485 struct re_state_table_entry *spot;
1488 /* Suppress bogus uninitialized-variable warnings. */
1491 if (BE (nodes->nelem == 0, 0))
1496 hash = calc_state_hash (nodes, 0);
1497 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1499 for (i = 0 ; i < spot->num ; i++)
1501 re_dfastate_t *state = spot->array[i];
1502 if (hash != state->hash)
1504 if (re_node_set_compare (&state->nodes, nodes))
1508 /* There are no appropriate state in the dfa, create the new one. */
1509 new_state = create_ci_newstate (dfa, nodes, hash);
1510 if (BE (new_state == NULL, 0))
1516 /* Search for the state whose node_set is equivalent to NODES and
1517 whose context is equivalent to CONTEXT.
1518 Return the pointer to the state, if we found it in the DFA.
1519 Otherwise create the new one and return it. In case of an error
1520 return NULL and set the error code in ERR.
1521 Note: - We assume NULL as the invalid state, then it is possible that
1522 return value is NULL and ERR is REG_NOERROR.
1523 - We never return non-NULL value in case of any errors, it is for
1526 static re_dfastate_t *
1527 internal_function __attribute_warn_unused_result__
1528 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1529 const re_node_set *nodes, unsigned int context)
1532 re_dfastate_t *new_state;
1533 struct re_state_table_entry *spot;
1536 /* Suppress bogus uninitialized-variable warnings. */
1539 if (nodes->nelem == 0)
1544 hash = calc_state_hash (nodes, context);
1545 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1547 for (i = 0 ; i < spot->num ; i++)
1549 re_dfastate_t *state = spot->array[i];
1550 if (state->hash == hash
1551 && state->context == context
1552 && re_node_set_compare (state->entrance_nodes, nodes))
1555 /* There are no appropriate state in `dfa', create the new one. */
1556 new_state = create_cd_newstate (dfa, nodes, context, hash);
1557 if (BE (new_state == NULL, 0))
1563 /* Finish initialization of the new state NEWSTATE, and using its hash value
1564 HASH put in the appropriate bucket of DFA's state table. Return value
1565 indicates the error code if failed. */
1567 static reg_errcode_t
1568 __attribute_warn_unused_result__
1569 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1572 struct re_state_table_entry *spot;
1576 newstate->hash = hash;
1577 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1578 if (BE (err != REG_NOERROR, 0))
1580 for (i = 0; i < newstate->nodes.nelem; i++)
1582 Idx elem = newstate->nodes.elems[i];
1583 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1584 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1588 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1589 if (BE (spot->alloc <= spot->num, 0))
1591 Idx new_alloc = 2 * spot->num + 2;
1592 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1594 if (BE (new_array == NULL, 0))
1596 spot->array = new_array;
1597 spot->alloc = new_alloc;
1599 spot->array[spot->num++] = newstate;
1604 free_state (re_dfastate_t *state)
1606 re_node_set_free (&state->non_eps_nodes);
1607 re_node_set_free (&state->inveclosure);
1608 if (state->entrance_nodes != &state->nodes)
1610 re_node_set_free (state->entrance_nodes);
1611 re_free (state->entrance_nodes);
1613 re_node_set_free (&state->nodes);
1614 re_free (state->word_trtable);
1615 re_free (state->trtable);
1619 /* Create the new state which is independ of contexts.
1620 Return the new state if succeeded, otherwise return NULL. */
1622 static re_dfastate_t *
1623 internal_function __attribute_warn_unused_result__
1624 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1629 re_dfastate_t *newstate;
1631 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1632 if (BE (newstate == NULL, 0))
1634 err = re_node_set_init_copy (&newstate->nodes, nodes);
1635 if (BE (err != REG_NOERROR, 0))
1641 newstate->entrance_nodes = &newstate->nodes;
1642 for (i = 0 ; i < nodes->nelem ; i++)
1644 re_token_t *node = dfa->nodes + nodes->elems[i];
1645 re_token_type_t type = node->type;
1646 if (type == CHARACTER && !node->constraint)
1648 #ifdef RE_ENABLE_I18N
1649 newstate->accept_mb |= node->accept_mb;
1650 #endif /* RE_ENABLE_I18N */
1652 /* If the state has the halt node, the state is a halt state. */
1653 if (type == END_OF_RE)
1655 else if (type == OP_BACK_REF)
1656 newstate->has_backref = 1;
1657 else if (type == ANCHOR || node->constraint)
1658 newstate->has_constraint = 1;
1660 err = register_state (dfa, newstate, hash);
1661 if (BE (err != REG_NOERROR, 0))
1663 free_state (newstate);
1669 /* Create the new state which is depend on the context CONTEXT.
1670 Return the new state if succeeded, otherwise return NULL. */
1672 static re_dfastate_t *
1673 internal_function __attribute_warn_unused_result__
1674 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1675 unsigned int context, re_hashval_t hash)
1677 Idx i, nctx_nodes = 0;
1679 re_dfastate_t *newstate;
1681 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1682 if (BE (newstate == NULL, 0))
1684 err = re_node_set_init_copy (&newstate->nodes, nodes);
1685 if (BE (err != REG_NOERROR, 0))
1691 newstate->context = context;
1692 newstate->entrance_nodes = &newstate->nodes;
1694 for (i = 0 ; i < nodes->nelem ; i++)
1696 re_token_t *node = dfa->nodes + nodes->elems[i];
1697 re_token_type_t type = node->type;
1698 unsigned int constraint = node->constraint;
1700 if (type == CHARACTER && !constraint)
1702 #ifdef RE_ENABLE_I18N
1703 newstate->accept_mb |= node->accept_mb;
1704 #endif /* RE_ENABLE_I18N */
1706 /* If the state has the halt node, the state is a halt state. */
1707 if (type == END_OF_RE)
1709 else if (type == OP_BACK_REF)
1710 newstate->has_backref = 1;
1714 if (newstate->entrance_nodes == &newstate->nodes)
1716 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1717 if (BE (newstate->entrance_nodes == NULL, 0))
1719 free_state (newstate);
1722 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1726 newstate->has_constraint = 1;
1729 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1731 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1736 err = register_state (dfa, newstate, hash);
1737 if (BE (err != REG_NOERROR, 0))
1739 free_state (newstate);