git.gag.com Git - debian/tar/blob - gnu/regexec.c

   1 /* -*- buffer-read-only: t -*- vi: set ro: */
   2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
   3 /* Extended regular expression matching and search library.
   4    Copyright (C) 2002-2011 Free Software Foundation, Inc.
   5    This file is part of the GNU C Library.
   6    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3, or (at your option)
  11    any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License along
  19    with this program; if not, write to the Free Software Foundation,
  20    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  23                                      Idx n) internal_function;
  24 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  25 static void match_ctx_free (re_match_context_t *cache) internal_function;
  26 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  27                                           Idx str_idx, Idx from, Idx to)
  28      internal_function;
  29 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
  30      internal_function;
  31 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  32                                            Idx str_idx) internal_function;
  33 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  34                                                     Idx node, Idx str_idx)
  35      internal_function;
  36 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  37                            re_dfastate_t **limited_sts, Idx last_node,
  38                            Idx last_str_idx)
  39      internal_function;
  40 static reg_errcode_t re_search_internal (const regex_t *preg,
  41                                          const char *string, Idx length,
  42                                          Idx start, Idx last_start, Idx stop,
  43                                          size_t nmatch, regmatch_t pmatch[],
  44                                          int eflags) internal_function;
  45 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  46                                   const char *string1, Idx length1,
  47                                   const char *string2, Idx length2,
  48                                   Idx start, regoff_t range,
  49                                   struct re_registers *regs,
  50                                   Idx stop, bool ret_len) internal_function;
  51 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  52                                 const char *string, Idx length, Idx start,
  53                                 regoff_t range, Idx stop,
  54                                 struct re_registers *regs,
  55                                 bool ret_len) internal_function;
  56 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  57                                   Idx nregs, int regs_allocated)
  58      internal_function;
  59 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  60      internal_function;
  61 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  62                            Idx *p_match_first) internal_function;
  63 static Idx check_halt_state_context (const re_match_context_t *mctx,
  64                                      const re_dfastate_t *state, Idx idx)
  65      internal_function;
  66 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  67                          regmatch_t *prev_idx_match, Idx cur_node,
  68                          Idx cur_idx, Idx nmatch) internal_function;
  69 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  70                                       Idx str_idx, Idx dest_node, Idx nregs,
  71                                       regmatch_t *regs,
  72                                       re_node_set *eps_via_nodes)
  73      internal_function;
  74 static reg_errcode_t set_regs (const regex_t *preg,
  75                                const re_match_context_t *mctx,
  76                                size_t nmatch, regmatch_t *pmatch,
  77                                bool fl_backtrack) internal_function;
  78 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
  79      internal_function;
  80
  81 #ifdef RE_ENABLE_I18N
  82 static int sift_states_iter_mb (const re_match_context_t *mctx,
  83                                 re_sift_context_t *sctx,
  84                                 Idx node_idx, Idx str_idx, Idx max_str_idx)
  85      internal_function;
  86 #endif /* RE_ENABLE_I18N */
  87 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  88                                            re_sift_context_t *sctx)
  89      internal_function;
  90 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  91                                           re_sift_context_t *sctx, Idx str_idx,
  92                                           re_node_set *cur_dest)
  93      internal_function;
  94 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  95                                               re_sift_context_t *sctx,
  96                                               Idx str_idx,
  97                                               re_node_set *dest_nodes)
  98      internal_function;
  99 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
 100                                             re_node_set *dest_nodes,
 101                                             const re_node_set *candidates)
 102      internal_function;
 103 static bool check_dst_limits (const re_match_context_t *mctx,
 104                               const re_node_set *limits,
 105                               Idx dst_node, Idx dst_idx, Idx src_node,
 106                               Idx src_idx) internal_function;
 107 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
 108                                         int boundaries, Idx subexp_idx,
 109                                         Idx from_node, Idx bkref_idx)
 110      internal_function;
 111 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
 112                                       Idx limit, Idx subexp_idx,
 113                                       Idx node, Idx str_idx,
 114                                       Idx bkref_idx) internal_function;
 115 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 116                                           re_node_set *dest_nodes,
 117                                           const re_node_set *candidates,
 118                                           re_node_set *limits,
 119                                           struct re_backref_cache_entry *bkref_ents,
 120                                           Idx str_idx) internal_function;
 121 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 122                                         re_sift_context_t *sctx,
 123                                         Idx str_idx, const re_node_set *candidates)
 124      internal_function;
 125 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 126                                         re_dfastate_t **dst,
 127                                         re_dfastate_t **src, Idx num)
 128      internal_function;
 129 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 130                                          re_match_context_t *mctx) internal_function;
 131 static re_dfastate_t *transit_state (reg_errcode_t *err,
 132                                      re_match_context_t *mctx,
 133                                      re_dfastate_t *state) internal_function;
 134 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 135                                             re_match_context_t *mctx,
 136                                             re_dfastate_t *next_state)
 137      internal_function;
 138 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 139                                                 re_node_set *cur_nodes,
 140                                                 Idx str_idx) internal_function;
 141 #if 0
 142 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 143                                         re_match_context_t *mctx,
 144                                         re_dfastate_t *pstate)
 145      internal_function;
 146 #endif
 147 #ifdef RE_ENABLE_I18N
 148 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 149                                        re_dfastate_t *pstate)
 150      internal_function;
 151 #endif /* RE_ENABLE_I18N */
 152 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 153                                           const re_node_set *nodes)
 154      internal_function;
 155 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 156                                  Idx bkref_node, Idx bkref_str_idx)
 157      internal_function;
 158 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 159                                      const re_sub_match_top_t *sub_top,
 160                                      re_sub_match_last_t *sub_last,
 161                                      Idx bkref_node, Idx bkref_str)
 162      internal_function;
 163 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 164                              Idx subexp_idx, int type) internal_function;
 165 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 166                                     state_array_t *path, Idx top_node,
 167                                     Idx top_str, Idx last_node, Idx last_str,
 168                                     int type) internal_function;
 169 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 170                                                    Idx str_idx,
 171                                                    re_node_set *cur_nodes,
 172                                                    re_node_set *next_nodes)
 173      internal_function;
 174 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 175                                                re_node_set *cur_nodes,
 176                                                Idx ex_subexp, int type)
 177      internal_function;
 178 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 179                                                    re_node_set *dst_nodes,
 180                                                    Idx target, Idx ex_subexp,
 181                                                    int type) internal_function;
 182 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 183                                          re_node_set *cur_nodes, Idx cur_str,
 184                                          Idx subexp_num, int type)
 185      internal_function;
 186 static bool build_trtable (const re_dfa_t *dfa,
 187                            re_dfastate_t *state) internal_function;
 188 #ifdef RE_ENABLE_I18N
 189 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 190                                     const re_string_t *input, Idx idx)
 191      internal_function;
 192 # ifdef _LIBC
 193 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 194                                                    size_t name_len)
 195      internal_function;
 196 # endif /* _LIBC */
 197 #endif /* RE_ENABLE_I18N */
 198 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 199                                        const re_dfastate_t *state,
 200                                        re_node_set *states_node,
 201                                        bitset_t *states_ch) internal_function;
 202 static bool check_node_accept (const re_match_context_t *mctx,
 203                                const re_token_t *node, Idx idx)
 204      internal_function;
 205 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
 206      internal_function;
 207 \f
 208 /* Entry point for POSIX code.  */
 209
 210 /* regexec searches for a given pattern, specified by PREG, in the
 211    string STRING.
 212
 213    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 214    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 215    least NMATCH elements, and we set them to the offsets of the
 216    corresponding matched substrings.
 217
 218    EFLAGS specifies `execution flags' which affect matching: if
 219    REG_NOTBOL is set, then ^ does not match at the beginning of the
 220    string; if REG_NOTEOL is set, then $ does not match at the end.
 221
 222    We return 0 if we find a match and REG_NOMATCH if not.  */
 223
 224 int
 225 regexec (preg, string, nmatch, pmatch, eflags)
 226     const regex_t *_Restrict_ preg;
 227     const char *_Restrict_ string;
 228     size_t nmatch;
 229     regmatch_t pmatch[_Restrict_arr_];
 230     int eflags;
 231 {
 232   reg_errcode_t err;
 233   Idx start, length;
 234 #ifdef _LIBC
 235   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 236 #endif
 237
 238   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 239     return REG_BADPAT;
 240
 241   if (eflags & REG_STARTEND)
 242     {
 243       start = pmatch[0].rm_so;
 244       length = pmatch[0].rm_eo;
 245     }
 246   else
 247     {
 248       start = 0;
 249       length = strlen (string);
 250     }
 251
 252   __libc_lock_lock (dfa->lock);
 253   if (preg->no_sub)
 254     err = re_search_internal (preg, string, length, start, length,
 255                               length, 0, NULL, eflags);
 256   else
 257     err = re_search_internal (preg, string, length, start, length,
 258                               length, nmatch, pmatch, eflags);
 259   __libc_lock_unlock (dfa->lock);
 260   return err != REG_NOERROR;
 261 }
 262
 263 #ifdef _LIBC
 264 # include <shlib-compat.h>
 265 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 266
 267 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 268 __typeof__ (__regexec) __compat_regexec;
 269
 270 int
 271 attribute_compat_text_section
 272 __compat_regexec (const regex_t *_Restrict_ preg,
 273                   const char *_Restrict_ string, size_t nmatch,
 274                   regmatch_t pmatch[], int eflags)
 275 {
 276   return regexec (preg, string, nmatch, pmatch,
 277                   eflags & (REG_NOTBOL | REG_NOTEOL));
 278 }
 279 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 280 # endif
 281 #endif
 282
 283 /* Entry points for GNU code.  */
 284
 285 /* re_match, re_search, re_match_2, re_search_2
 286
 287    The former two functions operate on STRING with length LENGTH,
 288    while the later two operate on concatenation of STRING1 and STRING2
 289    with lengths LENGTH1 and LENGTH2, respectively.
 290
 291    re_match() matches the compiled pattern in BUFP against the string,
 292    starting at index START.
 293
 294    re_search() first tries matching at index START, then it tries to match
 295    starting from index START + 1, and so on.  The last start position tried
 296    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 297    way as re_match().)
 298
 299    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 300    the first STOP characters of the concatenation of the strings should be
 301    concerned.
 302
 303    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 304    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 305    computed relative to the concatenation, not relative to the individual
 306    strings.)
 307
 308    On success, re_match* functions return the length of the match, re_search*
 309    return the position of the start of the match.  Return value -1 means no
 310    match was found and -2 indicates an internal error.  */
 311
 312 regoff_t
 313 re_match (bufp, string, length, start, regs)
 314     struct re_pattern_buffer *bufp;
 315     const char *string;
 316     Idx length, start;
 317     struct re_registers *regs;
 318 {
 319   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 320 }
 321 #ifdef _LIBC
 322 weak_alias (__re_match, re_match)
 323 #endif
 324
 325 regoff_t
 326 re_search (bufp, string, length, start, range, regs)
 327     struct re_pattern_buffer *bufp;
 328     const char *string;
 329     Idx length, start;
 330     regoff_t range;
 331     struct re_registers *regs;
 332 {
 333   return re_search_stub (bufp, string, length, start, range, length, regs,
 334                          false);
 335 }
 336 #ifdef _LIBC
 337 weak_alias (__re_search, re_search)
 338 #endif
 339
 340 regoff_t
 341 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 342     struct re_pattern_buffer *bufp;
 343     const char *string1, *string2;
 344     Idx length1, length2, start, stop;
 345     struct re_registers *regs;
 346 {
 347   return re_search_2_stub (bufp, string1, length1, string2, length2,
 348                            start, 0, regs, stop, true);
 349 }
 350 #ifdef _LIBC
 351 weak_alias (__re_match_2, re_match_2)
 352 #endif
 353
 354 regoff_t
 355 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 356     struct re_pattern_buffer *bufp;
 357     const char *string1, *string2;
 358     Idx length1, length2, start, stop;
 359     regoff_t range;
 360     struct re_registers *regs;
 361 {
 362   return re_search_2_stub (bufp, string1, length1, string2, length2,
 363                            start, range, regs, stop, false);
 364 }
 365 #ifdef _LIBC
 366 weak_alias (__re_search_2, re_search_2)
 367 #endif
 368
 369 static regoff_t
 370 internal_function
 371 re_search_2_stub (struct re_pattern_buffer *bufp,
 372                   const char *string1, Idx length1,
 373                   const char *string2, Idx length2,
 374                   Idx start, regoff_t range, struct re_registers *regs,
 375                   Idx stop, bool ret_len)
 376 {
 377   const char *str;
 378   regoff_t rval;
 379   Idx len = length1 + length2;
 380   char *s = NULL;
 381
 382   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
 383     return -2;
 384
 385   /* Concatenate the strings.  */
 386   if (length2 > 0)
 387     if (length1 > 0)
 388       {
 389         s = re_malloc (char, len);
 390
 391         if (BE (s == NULL, 0))
 392           return -2;
 393 #ifdef _LIBC
 394         memcpy (__mempcpy (s, string1, length1), string2, length2);
 395 #else
 396         memcpy (s, string1, length1);
 397         memcpy (s + length1, string2, length2);
 398 #endif
 399         str = s;
 400       }
 401     else
 402       str = string2;
 403   else
 404     str = string1;
 405
 406   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 407                          ret_len);
 408   re_free (s);
 409   return rval;
 410 }
 411
 412 /* The parameters have the same meaning as those of re_search.
 413    Additional parameters:
 414    If RET_LEN is true the length of the match is returned (re_match style);
 415    otherwise the position of the match is returned.  */
 416
 417 static regoff_t
 418 internal_function
 419 re_search_stub (struct re_pattern_buffer *bufp,
 420                 const char *string, Idx length,
 421                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 422                 bool ret_len)
 423 {
 424   reg_errcode_t result;
 425   regmatch_t *pmatch;
 426   Idx nregs;
 427   regoff_t rval;
 428   int eflags = 0;
 429 #ifdef _LIBC
 430   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
 431 #endif
 432   Idx last_start = start + range;
 433
 434   /* Check for out-of-range.  */
 435   if (BE (start < 0 || start > length, 0))
 436     return -1;
 437   if (BE (length < last_start || (0 <= range && last_start < start), 0))
 438     last_start = length;
 439   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
 440     last_start = 0;
 441
 442   __libc_lock_lock (dfa->lock);
 443
 444   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 445   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 446
 447   /* Compile fastmap if we haven't yet.  */
 448   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 449     re_compile_fastmap (bufp);
 450
 451   if (BE (bufp->no_sub, 0))
 452     regs = NULL;
 453
 454   /* We need at least 1 register.  */
 455   if (regs == NULL)
 456     nregs = 1;
 457   else if (BE (bufp->regs_allocated == REGS_FIXED
 458                && regs->num_regs <= bufp->re_nsub, 0))
 459     {
 460       nregs = regs->num_regs;
 461       if (BE (nregs < 1, 0))
 462         {
 463           /* Nothing can be copied to regs.  */
 464           regs = NULL;
 465           nregs = 1;
 466         }
 467     }
 468   else
 469     nregs = bufp->re_nsub + 1;
 470   pmatch = re_malloc (regmatch_t, nregs);
 471   if (BE (pmatch == NULL, 0))
 472     {
 473       rval = -2;
 474       goto out;
 475     }
 476
 477   result = re_search_internal (bufp, string, length, start, last_start, stop,
 478                                nregs, pmatch, eflags);
 479
 480   rval = 0;
 481
 482   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 483   if (result != REG_NOERROR)
 484     rval = -1;
 485   else if (regs != NULL)
 486     {
 487       /* If caller wants register contents data back, copy them.  */
 488       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 489                                            bufp->regs_allocated);
 490       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 491         rval = -2;
 492     }
 493
 494   if (BE (rval == 0, 1))
 495     {
 496       if (ret_len)
 497         {
 498           assert (pmatch[0].rm_so == start);
 499           rval = pmatch[0].rm_eo - start;
 500         }
 501       else
 502         rval = pmatch[0].rm_so;
 503     }
 504   re_free (pmatch);
 505  out:
 506   __libc_lock_unlock (dfa->lock);
 507   return rval;
 508 }
 509
 510 static unsigned int
 511 internal_function
 512 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 513               int regs_allocated)
 514 {
 515   int rval = REGS_REALLOCATE;
 516   Idx i;
 517   Idx need_regs = nregs + 1;
 518   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 519      uses.  */
 520
 521   /* Have the register data arrays been allocated?  */
 522   if (regs_allocated == REGS_UNALLOCATED)
 523     { /* No.  So allocate them with malloc.  */
 524       regs->start = re_malloc (regoff_t, need_regs);
 525       if (BE (regs->start == NULL, 0))
 526         return REGS_UNALLOCATED;
 527       regs->end = re_malloc (regoff_t, need_regs);
 528       if (BE (regs->end == NULL, 0))
 529         {
 530           re_free (regs->start);
 531           return REGS_UNALLOCATED;
 532         }
 533       regs->num_regs = need_regs;
 534     }
 535   else if (regs_allocated == REGS_REALLOCATE)
 536     { /* Yes.  If we need more elements than were already
 537          allocated, reallocate them.  If we need fewer, just
 538          leave it alone.  */
 539       if (BE (need_regs > regs->num_regs, 0))
 540         {
 541           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 542           regoff_t *new_end;
 543           if (BE (new_start == NULL, 0))
 544             return REGS_UNALLOCATED;
 545           new_end = re_realloc (regs->end, regoff_t, need_regs);
 546           if (BE (new_end == NULL, 0))
 547             {
 548               re_free (new_start);
 549               return REGS_UNALLOCATED;
 550             }
 551           regs->start = new_start;
 552           regs->end = new_end;
 553           regs->num_regs = need_regs;
 554         }
 555     }
 556   else
 557     {
 558       assert (regs_allocated == REGS_FIXED);
 559       /* This function may not be called with REGS_FIXED and nregs too big.  */
 560       assert (regs->num_regs >= nregs);
 561       rval = REGS_FIXED;
 562     }
 563
 564   /* Copy the regs.  */
 565   for (i = 0; i < nregs; ++i)
 566     {
 567       regs->start[i] = pmatch[i].rm_so;
 568       regs->end[i] = pmatch[i].rm_eo;
 569     }
 570   for ( ; i < regs->num_regs; ++i)
 571     regs->start[i] = regs->end[i] = -1;
 572
 573   return rval;
 574 }
 575
 576 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 577    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 578    this memory for recording register information.  STARTS and ENDS
 579    must be allocated using the malloc library routine, and must each
 580    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 581
 582    If NUM_REGS == 0, then subsequent matches should allocate their own
 583    register data.
 584
 585    Unless this function is called, the first search or match using
 586    PATTERN_BUFFER will allocate its own register data, without
 587    freeing the old data.  */
 588
 589 void
 590 re_set_registers (bufp, regs, num_regs, starts, ends)
 591     struct re_pattern_buffer *bufp;
 592     struct re_registers *regs;
 593     __re_size_t num_regs;
 594     regoff_t *starts, *ends;
 595 {
 596   if (num_regs)
 597     {
 598       bufp->regs_allocated = REGS_REALLOCATE;
 599       regs->num_regs = num_regs;
 600       regs->start = starts;
 601       regs->end = ends;
 602     }
 603   else
 604     {
 605       bufp->regs_allocated = REGS_UNALLOCATED;
 606       regs->num_regs = 0;
 607       regs->start = regs->end = NULL;
 608     }
 609 }
 610 #ifdef _LIBC
 611 weak_alias (__re_set_registers, re_set_registers)
 612 #endif
 613 \f
 614 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 615    them unless specifically requested.  */
 616
 617 #if defined _REGEX_RE_COMP || defined _LIBC
 618 int
 619 # ifdef _LIBC
 620 weak_function
 621 # endif
 622 re_exec (s)
 623      const char *s;
 624 {
 625   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 626 }
 627 #endif /* _REGEX_RE_COMP */
 628 \f
 629 /* Internal entry point.  */
 630
 631 /* Searches for a compiled pattern PREG in the string STRING, whose
 632    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 633    meaning as with regexec.  LAST_START is START + RANGE, where
 634    START and RANGE have the same meaning as with re_search.
 635    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 636    otherwise return the error code.
 637    Note: We assume front end functions already check ranges.
 638    (0 <= LAST_START && LAST_START <= LENGTH)  */
 639
 640 static reg_errcode_t
 641 internal_function __attribute_warn_unused_result__
 642 re_search_internal (const regex_t *preg,
 643                     const char *string, Idx length,
 644                     Idx start, Idx last_start, Idx stop,
 645                     size_t nmatch, regmatch_t pmatch[],
 646                     int eflags)
 647 {
 648   reg_errcode_t err;
 649   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
 650   Idx left_lim, right_lim;
 651   int incr;
 652   bool fl_longest_match;
 653   int match_kind;
 654   Idx match_first;
 655   Idx match_last = REG_MISSING;
 656   Idx extra_nmatch;
 657   bool sb;
 658   int ch;
 659 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 660   re_match_context_t mctx = { .dfa = dfa };
 661 #else
 662   re_match_context_t mctx;
 663 #endif
 664   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 665                     && start != last_start && !preg->can_be_null)
 666                    ? preg->fastmap : NULL);
 667   RE_TRANSLATE_TYPE t = preg->translate;
 668
 669 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 670   memset (&mctx, '\0', sizeof (re_match_context_t));
 671   mctx.dfa = dfa;
 672 #endif
 673
 674   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 675   nmatch -= extra_nmatch;
 676
 677   /* Check if the DFA haven't been compiled.  */
 678   if (BE (preg->used == 0 || dfa->init_state == NULL
 679           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 680           || dfa->init_state_begbuf == NULL, 0))
 681     return REG_NOMATCH;
 682
 683 #ifdef DEBUG
 684   /* We assume front-end functions already check them.  */
 685   assert (0 <= last_start && last_start <= length);
 686 #endif
 687
 688   /* If initial states with non-begbuf contexts have no elements,
 689      the regex must be anchored.  If preg->newline_anchor is set,
 690      we'll never use init_state_nl, so do not check it.  */
 691   if (dfa->init_state->nodes.nelem == 0
 692       && dfa->init_state_word->nodes.nelem == 0
 693       && (dfa->init_state_nl->nodes.nelem == 0
 694           || !preg->newline_anchor))
 695     {
 696       if (start != 0 && last_start != 0)
 697         return REG_NOMATCH;
 698       start = last_start = 0;
 699     }
 700
 701   /* We must check the longest matching, if nmatch > 0.  */
 702   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 703
 704   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 705                             preg->translate, (preg->syntax & RE_ICASE) != 0,
 706                             dfa);
 707   if (BE (err != REG_NOERROR, 0))
 708     goto free_return;
 709   mctx.input.stop = stop;
 710   mctx.input.raw_stop = stop;
 711   mctx.input.newline_anchor = preg->newline_anchor;
 712
 713   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 714   if (BE (err != REG_NOERROR, 0))
 715     goto free_return;
 716
 717   /* We will log all the DFA states through which the dfa pass,
 718      if nmatch > 1, or this dfa has "multibyte node", which is a
 719      back-reference or a node which can accept multibyte character or
 720      multi character collating element.  */
 721   if (nmatch > 1 || dfa->has_mb_node)
 722     {
 723       /* Avoid overflow.  */
 724       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
 725         {
 726           err = REG_ESPACE;
 727           goto free_return;
 728         }
 729
 730       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 731       if (BE (mctx.state_log == NULL, 0))
 732         {
 733           err = REG_ESPACE;
 734           goto free_return;
 735         }
 736     }
 737   else
 738     mctx.state_log = NULL;
 739
 740   match_first = start;
 741   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 742                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 743
 744   /* Check incrementally whether of not the input string match.  */
 745   incr = (last_start < start) ? -1 : 1;
 746   left_lim = (last_start < start) ? last_start : start;
 747   right_lim = (last_start < start) ? start : last_start;
 748   sb = dfa->mb_cur_max == 1;
 749   match_kind =
 750     (fastmap
 751      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 752         | (start <= last_start ? 2 : 0)
 753         | (t != NULL ? 1 : 0))
 754      : 8);
 755
 756   for (;; match_first += incr)
 757     {
 758       err = REG_NOMATCH;
 759       if (match_first < left_lim || right_lim < match_first)
 760         goto free_return;
 761
 762       /* Advance as rapidly as possible through the string, until we
 763          find a plausible place to start matching.  This may be done
 764          with varying efficiency, so there are various possibilities:
 765          only the most common of them are specialized, in order to
 766          save on code size.  We use a switch statement for speed.  */
 767       switch (match_kind)
 768         {
 769         case 8:
 770           /* No fastmap.  */
 771           break;
 772
 773         case 7:
 774           /* Fastmap with single-byte translation, match forward.  */
 775           while (BE (match_first < right_lim, 1)
 776                  && !fastmap[t[(unsigned char) string[match_first]]])
 777             ++match_first;
 778           goto forward_match_found_start_or_reached_end;
 779
 780         case 6:
 781           /* Fastmap without translation, match forward.  */
 782           while (BE (match_first < right_lim, 1)
 783                  && !fastmap[(unsigned char) string[match_first]])
 784             ++match_first;
 785
 786         forward_match_found_start_or_reached_end:
 787           if (BE (match_first == right_lim, 0))
 788             {
 789               ch = match_first >= length
 790                        ? 0 : (unsigned char) string[match_first];
 791               if (!fastmap[t ? t[ch] : ch])
 792                 goto free_return;
 793             }
 794           break;
 795
 796         case 4:
 797         case 5:
 798           /* Fastmap without multi-byte translation, match backwards.  */
 799           while (match_first >= left_lim)
 800             {
 801               ch = match_first >= length
 802                        ? 0 : (unsigned char) string[match_first];
 803               if (fastmap[t ? t[ch] : ch])
 804                 break;
 805               --match_first;
 806             }
 807           if (match_first < left_lim)
 808             goto free_return;
 809           break;
 810
 811         default:
 812           /* In this case, we can't determine easily the current byte,
 813              since it might be a component byte of a multibyte
 814              character.  Then we use the constructed buffer instead.  */
 815           for (;;)
 816             {
 817               /* If MATCH_FIRST is out of the valid range, reconstruct the
 818                  buffers.  */
 819               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 820               if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
 821                 {
 822                   err = re_string_reconstruct (&mctx.input, match_first,
 823                                                eflags);
 824                   if (BE (err != REG_NOERROR, 0))
 825                     goto free_return;
 826
 827                   offset = match_first - mctx.input.raw_mbs_idx;
 828                 }
 829               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 830                  Note that MATCH_FIRST must not be smaller than 0.  */
 831               ch = (match_first >= length
 832                     ? 0 : re_string_byte_at (&mctx.input, offset));
 833               if (fastmap[ch])
 834                 break;
 835               match_first += incr;
 836               if (match_first < left_lim || match_first > right_lim)
 837                 {
 838                   err = REG_NOMATCH;
 839                   goto free_return;
 840                 }
 841             }
 842           break;
 843         }
 844
 845       /* Reconstruct the buffers so that the matcher can assume that
 846          the matching starts from the beginning of the buffer.  */
 847       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 848       if (BE (err != REG_NOERROR, 0))
 849         goto free_return;
 850
 851 #ifdef RE_ENABLE_I18N
 852      /* Don't consider this char as a possible match start if it part,
 853         yet isn't the head, of a multibyte character.  */
 854       if (!sb && !re_string_first_byte (&mctx.input, 0))
 855         continue;
 856 #endif
 857
 858       /* It seems to be appropriate one, then use the matcher.  */
 859       /* We assume that the matching starts from 0.  */
 860       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 861       match_last = check_matching (&mctx, fl_longest_match,
 862                                    start <= last_start ? &match_first : NULL);
 863       if (match_last != REG_MISSING)
 864         {
 865           if (BE (match_last == REG_ERROR, 0))
 866             {
 867               err = REG_ESPACE;
 868               goto free_return;
 869             }
 870           else
 871             {
 872               mctx.match_last = match_last;
 873               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 874                 {
 875                   re_dfastate_t *pstate = mctx.state_log[match_last];
 876                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 877                                                              match_last);
 878                 }
 879               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 880                   || dfa->nbackref)
 881                 {
 882                   err = prune_impossible_nodes (&mctx);
 883                   if (err == REG_NOERROR)
 884                     break;
 885                   if (BE (err != REG_NOMATCH, 0))
 886                     goto free_return;
 887                   match_last = REG_MISSING;
 888                 }
 889               else
 890                 break; /* We found a match.  */
 891             }
 892         }
 893
 894       match_ctx_clean (&mctx);
 895     }
 896
 897 #ifdef DEBUG
 898   assert (match_last != REG_MISSING);
 899   assert (err == REG_NOERROR);
 900 #endif
 901
 902   /* Set pmatch[] if we need.  */
 903   if (nmatch > 0)
 904     {
 905       Idx reg_idx;
 906
 907       /* Initialize registers.  */
 908       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 909         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 910
 911       /* Set the points where matching start/end.  */
 912       pmatch[0].rm_so = 0;
 913       pmatch[0].rm_eo = mctx.match_last;
 914       /* FIXME: This function should fail if mctx.match_last exceeds
 915          the maximum possible regoff_t value.  We need a new error
 916          code REG_OVERFLOW.  */
 917
 918       if (!preg->no_sub && nmatch > 1)
 919         {
 920           err = set_regs (preg, &mctx, nmatch, pmatch,
 921                           dfa->has_plural_match && dfa->nbackref > 0);
 922           if (BE (err != REG_NOERROR, 0))
 923             goto free_return;
 924         }
 925
 926       /* At last, add the offset to the each registers, since we slided
 927          the buffers so that we could assume that the matching starts
 928          from 0.  */
 929       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 930         if (pmatch[reg_idx].rm_so != -1)
 931           {
 932 #ifdef RE_ENABLE_I18N
 933             if (BE (mctx.input.offsets_needed != 0, 0))
 934               {
 935                 pmatch[reg_idx].rm_so =
 936                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 937                    ? mctx.input.valid_raw_len
 938                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 939                 pmatch[reg_idx].rm_eo =
 940                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 941                    ? mctx.input.valid_raw_len
 942                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 943               }
 944 #else
 945             assert (mctx.input.offsets_needed == 0);
 946 #endif
 947             pmatch[reg_idx].rm_so += match_first;
 948             pmatch[reg_idx].rm_eo += match_first;
 949           }
 950       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 951         {
 952           pmatch[nmatch + reg_idx].rm_so = -1;
 953           pmatch[nmatch + reg_idx].rm_eo = -1;
 954         }
 955
 956       if (dfa->subexp_map)
 957         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 958           if (dfa->subexp_map[reg_idx] != reg_idx)
 959             {
 960               pmatch[reg_idx + 1].rm_so
 961                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 962               pmatch[reg_idx + 1].rm_eo
 963                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 964             }
 965     }
 966
 967  free_return:
 968   re_free (mctx.state_log);
 969   if (dfa->nbackref)
 970     match_ctx_free (&mctx);
 971   re_string_destruct (&mctx.input);
 972   return err;
 973 }
 974
 975 static reg_errcode_t
 976 internal_function __attribute_warn_unused_result__
 977 prune_impossible_nodes (re_match_context_t *mctx)
 978 {
 979   const re_dfa_t *const dfa = mctx->dfa;
 980   Idx halt_node, match_last;
 981   reg_errcode_t ret;
 982   re_dfastate_t **sifted_states;
 983   re_dfastate_t **lim_states = NULL;
 984   re_sift_context_t sctx;
 985 #ifdef DEBUG
 986   assert (mctx->state_log != NULL);
 987 #endif
 988   match_last = mctx->match_last;
 989   halt_node = mctx->last_node;
 990
 991   /* Avoid overflow.  */
 992   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
 993     return REG_ESPACE;
 994
 995   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 996   if (BE (sifted_states == NULL, 0))
 997     {
 998       ret = REG_ESPACE;
 999       goto free_return;
1000     }
1001   if (dfa->nbackref)
1002     {
1003       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1004       if (BE (lim_states == NULL, 0))
1005         {
1006           ret = REG_ESPACE;
1007           goto free_return;
1008         }
1009       while (1)
1010         {
1011           memset (lim_states, '\0',
1012                   sizeof (re_dfastate_t *) * (match_last + 1));
1013           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1014                          match_last);
1015           ret = sift_states_backward (mctx, &sctx);
1016           re_node_set_free (&sctx.limits);
1017           if (BE (ret != REG_NOERROR, 0))
1018               goto free_return;
1019           if (sifted_states[0] != NULL || lim_states[0] != NULL)
1020             break;
1021           do
1022             {
1023               --match_last;
1024               if (! REG_VALID_INDEX (match_last))
1025                 {
1026                   ret = REG_NOMATCH;
1027                   goto free_return;
1028                 }
1029             } while (mctx->state_log[match_last] == NULL
1030                      || !mctx->state_log[match_last]->halt);
1031           halt_node = check_halt_state_context (mctx,
1032                                                 mctx->state_log[match_last],
1033                                                 match_last);
1034         }
1035       ret = merge_state_array (dfa, sifted_states, lim_states,
1036                                match_last + 1);
1037       re_free (lim_states);
1038       lim_states = NULL;
1039       if (BE (ret != REG_NOERROR, 0))
1040         goto free_return;
1041     }
1042   else
1043     {
1044       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1045       ret = sift_states_backward (mctx, &sctx);
1046       re_node_set_free (&sctx.limits);
1047       if (BE (ret != REG_NOERROR, 0))
1048         goto free_return;
1049       if (sifted_states[0] == NULL)
1050         {
1051           ret = REG_NOMATCH;
1052           goto free_return;
1053         }
1054     }
1055   re_free (mctx->state_log);
1056   mctx->state_log = sifted_states;
1057   sifted_states = NULL;
1058   mctx->last_node = halt_node;
1059   mctx->match_last = match_last;
1060   ret = REG_NOERROR;
1061  free_return:
1062   re_free (sifted_states);
1063   re_free (lim_states);
1064   return ret;
1065 }
1066
1067 /* Acquire an initial state and return it.
1068    We must select appropriate initial state depending on the context,
1069    since initial states may have constraints like "\<", "^", etc..  */
1070
1071 static inline re_dfastate_t *
1072 __attribute ((always_inline)) internal_function
1073 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1074                             Idx idx)
1075 {
1076   const re_dfa_t *const dfa = mctx->dfa;
1077   if (dfa->init_state->has_constraint)
1078     {
1079       unsigned int context;
1080       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1081       if (IS_WORD_CONTEXT (context))
1082         return dfa->init_state_word;
1083       else if (IS_ORDINARY_CONTEXT (context))
1084         return dfa->init_state;
1085       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1086         return dfa->init_state_begbuf;
1087       else if (IS_NEWLINE_CONTEXT (context))
1088         return dfa->init_state_nl;
1089       else if (IS_BEGBUF_CONTEXT (context))
1090         {
1091           /* It is relatively rare case, then calculate on demand.  */
1092           return re_acquire_state_context (err, dfa,
1093                                            dfa->init_state->entrance_nodes,
1094                                            context);
1095         }
1096       else
1097         /* Must not happen?  */
1098         return dfa->init_state;
1099     }
1100   else
1101     return dfa->init_state;
1102 }
1103
1104 /* Check whether the regular expression match input string INPUT or not,
1105    and return the index where the matching end.  Return REG_MISSING if
1106    there is no match, and return REG_ERROR in case of an error.
1107    FL_LONGEST_MATCH means we want the POSIX longest matching.
1108    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1109    next place where we may want to try matching.
1110    Note that the matcher assume that the maching starts from the current
1111    index of the buffer.  */
1112
1113 static Idx
1114 internal_function __attribute_warn_unused_result__
1115 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1116                 Idx *p_match_first)
1117 {
1118   const re_dfa_t *const dfa = mctx->dfa;
1119   reg_errcode_t err;
1120   Idx match = 0;
1121   Idx match_last = REG_MISSING;
1122   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1123   re_dfastate_t *cur_state;
1124   bool at_init_state = p_match_first != NULL;
1125   Idx next_start_idx = cur_str_idx;
1126
1127   err = REG_NOERROR;
1128   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1129   /* An initial state must not be NULL (invalid).  */
1130   if (BE (cur_state == NULL, 0))
1131     {
1132       assert (err == REG_ESPACE);
1133       return REG_ERROR;
1134     }
1135
1136   if (mctx->state_log != NULL)
1137     {
1138       mctx->state_log[cur_str_idx] = cur_state;
1139
1140       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1141          later.  E.g. Processing back references.  */
1142       if (BE (dfa->nbackref, 0))
1143         {
1144           at_init_state = false;
1145           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1146           if (BE (err != REG_NOERROR, 0))
1147             return err;
1148
1149           if (cur_state->has_backref)
1150             {
1151               err = transit_state_bkref (mctx, &cur_state->nodes);
1152               if (BE (err != REG_NOERROR, 0))
1153                 return err;
1154             }
1155         }
1156     }
1157
1158   /* If the RE accepts NULL string.  */
1159   if (BE (cur_state->halt, 0))
1160     {
1161       if (!cur_state->has_constraint
1162           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1163         {
1164           if (!fl_longest_match)
1165             return cur_str_idx;
1166           else
1167             {
1168               match_last = cur_str_idx;
1169               match = 1;
1170             }
1171         }
1172     }
1173
1174   while (!re_string_eoi (&mctx->input))
1175     {
1176       re_dfastate_t *old_state = cur_state;
1177       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1178
1179       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1180           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1181               && mctx->input.valid_len < mctx->input.len))
1182         {
1183           err = extend_buffers (mctx);
1184           if (BE (err != REG_NOERROR, 0))
1185             {
1186               assert (err == REG_ESPACE);
1187               return REG_ERROR;
1188             }
1189         }
1190
1191       cur_state = transit_state (&err, mctx, cur_state);
1192       if (mctx->state_log != NULL)
1193         cur_state = merge_state_with_log (&err, mctx, cur_state);
1194
1195       if (cur_state == NULL)
1196         {
1197           /* Reached the invalid state or an error.  Try to recover a valid
1198              state using the state log, if available and if we have not
1199              already found a valid (even if not the longest) match.  */
1200           if (BE (err != REG_NOERROR, 0))
1201             return REG_ERROR;
1202
1203           if (mctx->state_log == NULL
1204               || (match && !fl_longest_match)
1205               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1206             break;
1207         }
1208
1209       if (BE (at_init_state, 0))
1210         {
1211           if (old_state == cur_state)
1212             next_start_idx = next_char_idx;
1213           else
1214             at_init_state = false;
1215         }
1216
1217       if (cur_state->halt)
1218         {
1219           /* Reached a halt state.
1220              Check the halt state can satisfy the current context.  */
1221           if (!cur_state->has_constraint
1222               || check_halt_state_context (mctx, cur_state,
1223                                            re_string_cur_idx (&mctx->input)))
1224             {
1225               /* We found an appropriate halt state.  */
1226               match_last = re_string_cur_idx (&mctx->input);
1227               match = 1;
1228
1229               /* We found a match, do not modify match_first below.  */
1230               p_match_first = NULL;
1231               if (!fl_longest_match)
1232                 break;
1233             }
1234         }
1235     }
1236
1237   if (p_match_first)
1238     *p_match_first += next_start_idx;
1239
1240   return match_last;
1241 }
1242
1243 /* Check NODE match the current context.  */
1244
1245 static bool
1246 internal_function
1247 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1248 {
1249   re_token_type_t type = dfa->nodes[node].type;
1250   unsigned int constraint = dfa->nodes[node].constraint;
1251   if (type != END_OF_RE)
1252     return false;
1253   if (!constraint)
1254     return true;
1255   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1256     return false;
1257   return true;
1258 }
1259
1260 /* Check the halt state STATE match the current context.
1261    Return 0 if not match, if the node, STATE has, is a halt node and
1262    match the context, return the node.  */
1263
1264 static Idx
1265 internal_function
1266 check_halt_state_context (const re_match_context_t *mctx,
1267                           const re_dfastate_t *state, Idx idx)
1268 {
1269   Idx i;
1270   unsigned int context;
1271 #ifdef DEBUG
1272   assert (state->halt);
1273 #endif
1274   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1275   for (i = 0; i < state->nodes.nelem; ++i)
1276     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1277       return state->nodes.elems[i];
1278   return 0;
1279 }
1280
1281 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1282    corresponding to the DFA).
1283    Return the destination node, and update EPS_VIA_NODES;
1284    return REG_MISSING in case of errors.  */
1285
1286 static Idx
1287 internal_function
1288 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1289                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1290                    struct re_fail_stack_t *fs)
1291 {
1292   const re_dfa_t *const dfa = mctx->dfa;
1293   Idx i;
1294   bool ok;
1295   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1296     {
1297       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1298       re_node_set *edests = &dfa->edests[node];
1299       Idx dest_node;
1300       ok = re_node_set_insert (eps_via_nodes, node);
1301       if (BE (! ok, 0))
1302         return REG_ERROR;
1303       /* Pick up a valid destination, or return REG_MISSING if none
1304          is found.  */
1305       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1306         {
1307           Idx candidate = edests->elems[i];
1308           if (!re_node_set_contains (cur_nodes, candidate))
1309             continue;
1310           if (dest_node == REG_MISSING)
1311             dest_node = candidate;
1312
1313           else
1314             {
1315               /* In order to avoid infinite loop like "(a*)*", return the second
1316                  epsilon-transition if the first was already considered.  */
1317               if (re_node_set_contains (eps_via_nodes, dest_node))
1318                 return candidate;
1319
1320               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1321               else if (fs != NULL
1322                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1323                                            eps_via_nodes))
1324                 return REG_ERROR;
1325
1326               /* We know we are going to exit.  */
1327               break;
1328             }
1329         }
1330       return dest_node;
1331     }
1332   else
1333     {
1334       Idx naccepted = 0;
1335       re_token_type_t type = dfa->nodes[node].type;
1336
1337 #ifdef RE_ENABLE_I18N
1338       if (dfa->nodes[node].accept_mb)
1339         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1340       else
1341 #endif /* RE_ENABLE_I18N */
1342       if (type == OP_BACK_REF)
1343         {
1344           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1345           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1346           if (fs != NULL)
1347             {
1348               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1349                 return REG_MISSING;
1350               else if (naccepted)
1351                 {
1352                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1353                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1354                               naccepted) != 0)
1355                     return REG_MISSING;
1356                 }
1357             }
1358
1359           if (naccepted == 0)
1360             {
1361               Idx dest_node;
1362               ok = re_node_set_insert (eps_via_nodes, node);
1363               if (BE (! ok, 0))
1364                 return REG_ERROR;
1365               dest_node = dfa->edests[node].elems[0];
1366               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1367                                         dest_node))
1368                 return dest_node;
1369             }
1370         }
1371
1372       if (naccepted != 0
1373           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1374         {
1375           Idx dest_node = dfa->nexts[node];
1376           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1377           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1378                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1379                                                dest_node)))
1380             return REG_MISSING;
1381           re_node_set_empty (eps_via_nodes);
1382           return dest_node;
1383         }
1384     }
1385   return REG_MISSING;
1386 }
1387
1388 static reg_errcode_t
1389 internal_function __attribute_warn_unused_result__
1390 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1391                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1392 {
1393   reg_errcode_t err;
1394   Idx num = fs->num++;
1395   if (fs->num == fs->alloc)
1396     {
1397       struct re_fail_stack_ent_t *new_array;
1398       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1399                                        * fs->alloc * 2));
1400       if (new_array == NULL)
1401         return REG_ESPACE;
1402       fs->alloc *= 2;
1403       fs->stack = new_array;
1404     }
1405   fs->stack[num].idx = str_idx;
1406   fs->stack[num].node = dest_node;
1407   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1408   if (fs->stack[num].regs == NULL)
1409     return REG_ESPACE;
1410   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1411   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1412   return err;
1413 }
1414
1415 static Idx
1416 internal_function
1417 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1418                 regmatch_t *regs, re_node_set *eps_via_nodes)
1419 {
1420   Idx num = --fs->num;
1421   assert (REG_VALID_INDEX (num));
1422   *pidx = fs->stack[num].idx;
1423   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1424   re_node_set_free (eps_via_nodes);
1425   re_free (fs->stack[num].regs);
1426   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1427   return fs->stack[num].node;
1428 }
1429
1430 /* Set the positions where the subexpressions are starts/ends to registers
1431    PMATCH.
1432    Note: We assume that pmatch[0] is already set, and
1433    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1434
1435 static reg_errcode_t
1436 internal_function __attribute_warn_unused_result__
1437 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1438           regmatch_t *pmatch, bool fl_backtrack)
1439 {
1440   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1441   Idx idx, cur_node;
1442   re_node_set eps_via_nodes;
1443   struct re_fail_stack_t *fs;
1444   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1445   regmatch_t *prev_idx_match;
1446   bool prev_idx_match_malloced = false;
1447
1448 #ifdef DEBUG
1449   assert (nmatch > 1);
1450   assert (mctx->state_log != NULL);
1451 #endif
1452   if (fl_backtrack)
1453     {
1454       fs = &fs_body;
1455       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1456       if (fs->stack == NULL)
1457         return REG_ESPACE;
1458     }
1459   else
1460     fs = NULL;
1461
1462   cur_node = dfa->init_node;
1463   re_node_set_init_empty (&eps_via_nodes);
1464
1465   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1466     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1467   else
1468     {
1469       prev_idx_match = re_malloc (regmatch_t, nmatch);
1470       if (prev_idx_match == NULL)
1471         {
1472           free_fail_stack_return (fs);
1473           return REG_ESPACE;
1474         }
1475       prev_idx_match_malloced = true;
1476     }
1477   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1478
1479   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1480     {
1481       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1482
1483       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1484         {
1485           Idx reg_idx;
1486           if (fs)
1487             {
1488               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1489                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1490                   break;
1491               if (reg_idx == nmatch)
1492                 {
1493                   re_node_set_free (&eps_via_nodes);
1494                   if (prev_idx_match_malloced)
1495                     re_free (prev_idx_match);
1496                   return free_fail_stack_return (fs);
1497                 }
1498               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1499                                          &eps_via_nodes);
1500             }
1501           else
1502             {
1503               re_node_set_free (&eps_via_nodes);
1504               if (prev_idx_match_malloced)
1505                 re_free (prev_idx_match);
1506               return REG_NOERROR;
1507             }
1508         }
1509
1510       /* Proceed to next node.  */
1511       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1512                                     &eps_via_nodes, fs);
1513
1514       if (BE (! REG_VALID_INDEX (cur_node), 0))
1515         {
1516           if (BE (cur_node == REG_ERROR, 0))
1517             {
1518               re_node_set_free (&eps_via_nodes);
1519               if (prev_idx_match_malloced)
1520                 re_free (prev_idx_match);
1521               free_fail_stack_return (fs);
1522               return REG_ESPACE;
1523             }
1524           if (fs)
1525             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1526                                        &eps_via_nodes);
1527           else
1528             {
1529               re_node_set_free (&eps_via_nodes);
1530               if (prev_idx_match_malloced)
1531                 re_free (prev_idx_match);
1532               return REG_NOMATCH;
1533             }
1534         }
1535     }
1536   re_node_set_free (&eps_via_nodes);
1537   if (prev_idx_match_malloced)
1538     re_free (prev_idx_match);
1539   return free_fail_stack_return (fs);
1540 }
1541
1542 static reg_errcode_t
1543 internal_function
1544 free_fail_stack_return (struct re_fail_stack_t *fs)
1545 {
1546   if (fs)
1547     {
1548       Idx fs_idx;
1549       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1550         {
1551           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1552           re_free (fs->stack[fs_idx].regs);
1553         }
1554       re_free (fs->stack);
1555     }
1556   return REG_NOERROR;
1557 }
1558
1559 static void
1560 internal_function
1561 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1562              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1563 {
1564   int type = dfa->nodes[cur_node].type;
1565   if (type == OP_OPEN_SUBEXP)
1566     {
1567       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1568
1569       /* We are at the first node of this sub expression.  */
1570       if (reg_num < nmatch)
1571         {
1572           pmatch[reg_num].rm_so = cur_idx;
1573           pmatch[reg_num].rm_eo = -1;
1574         }
1575     }
1576   else if (type == OP_CLOSE_SUBEXP)
1577     {
1578       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1579       if (reg_num < nmatch)
1580         {
1581           /* We are at the last node of this sub expression.  */
1582           if (pmatch[reg_num].rm_so < cur_idx)
1583             {
1584               pmatch[reg_num].rm_eo = cur_idx;
1585               /* This is a non-empty match or we are not inside an optional
1586                  subexpression.  Accept this right away.  */
1587               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1588             }
1589           else
1590             {
1591               if (dfa->nodes[cur_node].opt_subexp
1592                   && prev_idx_match[reg_num].rm_so != -1)
1593                 /* We transited through an empty match for an optional
1594                    subexpression, like (a?)*, and this is not the subexp's
1595                    first match.  Copy back the old content of the registers
1596                    so that matches of an inner subexpression are undone as
1597                    well, like in ((a?))*.  */
1598                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1599               else
1600                 /* We completed a subexpression, but it may be part of
1601                    an optional one, so do not update PREV_IDX_MATCH.  */
1602                 pmatch[reg_num].rm_eo = cur_idx;
1603             }
1604         }
1605     }
1606 }
1607
1608 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1609    and sift the nodes in each states according to the following rules.
1610    Updated state_log will be wrote to STATE_LOG.
1611
1612    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1613      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1614         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1615         the LAST_NODE, we throw away the node `a'.
1616      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1617         string `s' and transit to `b':
1618         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1619            away the node `a'.
1620         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1621             thrown away, we throw away the node `a'.
1622      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1623         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1624            node `a'.
1625         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1626             we throw away the node `a'.  */
1627
1628 #define STATE_NODE_CONTAINS(state,node) \
1629   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1630
1631 static reg_errcode_t
1632 internal_function
1633 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1634 {
1635   reg_errcode_t err;
1636   int null_cnt = 0;
1637   Idx str_idx = sctx->last_str_idx;
1638   re_node_set cur_dest;
1639
1640 #ifdef DEBUG
1641   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1642 #endif
1643
1644   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1645      transit to the last_node and the last_node itself.  */
1646   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1647   if (BE (err != REG_NOERROR, 0))
1648     return err;
1649   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1650   if (BE (err != REG_NOERROR, 0))
1651     goto free_return;
1652
1653   /* Then check each states in the state_log.  */
1654   while (str_idx > 0)
1655     {
1656       /* Update counters.  */
1657       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1658       if (null_cnt > mctx->max_mb_elem_len)
1659         {
1660           memset (sctx->sifted_states, '\0',
1661                   sizeof (re_dfastate_t *) * str_idx);
1662           re_node_set_free (&cur_dest);
1663           return REG_NOERROR;
1664         }
1665       re_node_set_empty (&cur_dest);
1666       --str_idx;
1667
1668       if (mctx->state_log[str_idx])
1669         {
1670           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1671           if (BE (err != REG_NOERROR, 0))
1672             goto free_return;
1673         }
1674
1675       /* Add all the nodes which satisfy the following conditions:
1676          - It can epsilon transit to a node in CUR_DEST.
1677          - It is in CUR_SRC.
1678          And update state_log.  */
1679       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1680       if (BE (err != REG_NOERROR, 0))
1681         goto free_return;
1682     }
1683   err = REG_NOERROR;
1684  free_return:
1685   re_node_set_free (&cur_dest);
1686   return err;
1687 }
1688
1689 static reg_errcode_t
1690 internal_function __attribute_warn_unused_result__
1691 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1692                      Idx str_idx, re_node_set *cur_dest)
1693 {
1694   const re_dfa_t *const dfa = mctx->dfa;
1695   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1696   Idx i;
1697
1698   /* Then build the next sifted state.
1699      We build the next sifted state on `cur_dest', and update
1700      `sifted_states[str_idx]' with `cur_dest'.
1701      Note:
1702      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1703      `cur_src' points the node_set of the old `state_log[str_idx]'
1704      (with the epsilon nodes pre-filtered out).  */
1705   for (i = 0; i < cur_src->nelem; i++)
1706     {
1707       Idx prev_node = cur_src->elems[i];
1708       int naccepted = 0;
1709       bool ok;
1710
1711 #ifdef DEBUG
1712       re_token_type_t type = dfa->nodes[prev_node].type;
1713       assert (!IS_EPSILON_NODE (type));
1714 #endif
1715 #ifdef RE_ENABLE_I18N
1716       /* If the node may accept `multi byte'.  */
1717       if (dfa->nodes[prev_node].accept_mb)
1718         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1719                                          str_idx, sctx->last_str_idx);
1720 #endif /* RE_ENABLE_I18N */
1721
1722       /* We don't check backreferences here.
1723          See update_cur_sifted_state().  */
1724       if (!naccepted
1725           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1726           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1727                                   dfa->nexts[prev_node]))
1728         naccepted = 1;
1729
1730       if (naccepted == 0)
1731         continue;
1732
1733       if (sctx->limits.nelem)
1734         {
1735           Idx to_idx = str_idx + naccepted;
1736           if (check_dst_limits (mctx, &sctx->limits,
1737                                 dfa->nexts[prev_node], to_idx,
1738                                 prev_node, str_idx))
1739             continue;
1740         }
1741       ok = re_node_set_insert (cur_dest, prev_node);
1742       if (BE (! ok, 0))
1743         return REG_ESPACE;
1744     }
1745
1746   return REG_NOERROR;
1747 }
1748
1749 /* Helper functions.  */
1750
1751 static reg_errcode_t
1752 internal_function
1753 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1754 {
1755   Idx top = mctx->state_log_top;
1756
1757   if (next_state_log_idx >= mctx->input.bufs_len
1758       || (next_state_log_idx >= mctx->input.valid_len
1759           && mctx->input.valid_len < mctx->input.len))
1760     {
1761       reg_errcode_t err;
1762       err = extend_buffers (mctx);
1763       if (BE (err != REG_NOERROR, 0))
1764         return err;
1765     }
1766
1767   if (top < next_state_log_idx)
1768     {
1769       memset (mctx->state_log + top + 1, '\0',
1770               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1771       mctx->state_log_top = next_state_log_idx;
1772     }
1773   return REG_NOERROR;
1774 }
1775
1776 static reg_errcode_t
1777 internal_function
1778 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1779                    re_dfastate_t **src, Idx num)
1780 {
1781   Idx st_idx;
1782   reg_errcode_t err;
1783   for (st_idx = 0; st_idx < num; ++st_idx)
1784     {
1785       if (dst[st_idx] == NULL)
1786         dst[st_idx] = src[st_idx];
1787       else if (src[st_idx] != NULL)
1788         {
1789           re_node_set merged_set;
1790           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1791                                         &src[st_idx]->nodes);
1792           if (BE (err != REG_NOERROR, 0))
1793             return err;
1794           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1795           re_node_set_free (&merged_set);
1796           if (BE (err != REG_NOERROR, 0))
1797             return err;
1798         }
1799     }
1800   return REG_NOERROR;
1801 }
1802
1803 static reg_errcode_t
1804 internal_function
1805 update_cur_sifted_state (const re_match_context_t *mctx,
1806                          re_sift_context_t *sctx, Idx str_idx,
1807                          re_node_set *dest_nodes)
1808 {
1809   const re_dfa_t *const dfa = mctx->dfa;
1810   reg_errcode_t err = REG_NOERROR;
1811   const re_node_set *candidates;
1812   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1813                 : &mctx->state_log[str_idx]->nodes);
1814
1815   if (dest_nodes->nelem == 0)
1816     sctx->sifted_states[str_idx] = NULL;
1817   else
1818     {
1819       if (candidates)
1820         {
1821           /* At first, add the nodes which can epsilon transit to a node in
1822              DEST_NODE.  */
1823           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1824           if (BE (err != REG_NOERROR, 0))
1825             return err;
1826
1827           /* Then, check the limitations in the current sift_context.  */
1828           if (sctx->limits.nelem)
1829             {
1830               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1831                                          mctx->bkref_ents, str_idx);
1832               if (BE (err != REG_NOERROR, 0))
1833                 return err;
1834             }
1835         }
1836
1837       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1838       if (BE (err != REG_NOERROR, 0))
1839         return err;
1840     }
1841
1842   if (candidates && mctx->state_log[str_idx]->has_backref)
1843     {
1844       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1845       if (BE (err != REG_NOERROR, 0))
1846         return err;
1847     }
1848   return REG_NOERROR;
1849 }
1850
1851 static reg_errcode_t
1852 internal_function __attribute_warn_unused_result__
1853 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1854                        const re_node_set *candidates)
1855 {
1856   reg_errcode_t err = REG_NOERROR;
1857   Idx i;
1858
1859   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1860   if (BE (err != REG_NOERROR, 0))
1861     return err;
1862
1863   if (!state->inveclosure.alloc)
1864     {
1865       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1866       if (BE (err != REG_NOERROR, 0))
1867         return REG_ESPACE;
1868       for (i = 0; i < dest_nodes->nelem; i++)
1869         {
1870           err = re_node_set_merge (&state->inveclosure,
1871                                    dfa->inveclosures + dest_nodes->elems[i]);
1872           if (BE (err != REG_NOERROR, 0))
1873             return REG_ESPACE;
1874         }
1875     }
1876   return re_node_set_add_intersect (dest_nodes, candidates,
1877                                     &state->inveclosure);
1878 }
1879
1880 static reg_errcode_t
1881 internal_function
1882 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1883                        const re_node_set *candidates)
1884 {
1885     Idx ecl_idx;
1886     reg_errcode_t err;
1887     re_node_set *inv_eclosure = dfa->inveclosures + node;
1888     re_node_set except_nodes;
1889     re_node_set_init_empty (&except_nodes);
1890     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1891       {
1892         Idx cur_node = inv_eclosure->elems[ecl_idx];
1893         if (cur_node == node)
1894           continue;
1895         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1896           {
1897             Idx edst1 = dfa->edests[cur_node].elems[0];
1898             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1899                          ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1900             if ((!re_node_set_contains (inv_eclosure, edst1)
1901                  && re_node_set_contains (dest_nodes, edst1))
1902                 || (REG_VALID_NONZERO_INDEX (edst2)
1903                     && !re_node_set_contains (inv_eclosure, edst2)
1904                     && re_node_set_contains (dest_nodes, edst2)))
1905               {
1906                 err = re_node_set_add_intersect (&except_nodes, candidates,
1907                                                  dfa->inveclosures + cur_node);
1908                 if (BE (err != REG_NOERROR, 0))
1909                   {
1910                     re_node_set_free (&except_nodes);
1911                     return err;
1912                   }
1913               }
1914           }
1915       }
1916     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1917       {
1918         Idx cur_node = inv_eclosure->elems[ecl_idx];
1919         if (!re_node_set_contains (&except_nodes, cur_node))
1920           {
1921             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1922             re_node_set_remove_at (dest_nodes, idx);
1923           }
1924       }
1925     re_node_set_free (&except_nodes);
1926     return REG_NOERROR;
1927 }
1928
1929 static bool
1930 internal_function
1931 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1932                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1933 {
1934   const re_dfa_t *const dfa = mctx->dfa;
1935   Idx lim_idx, src_pos, dst_pos;
1936
1937   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1938   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1939   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1940     {
1941       Idx subexp_idx;
1942       struct re_backref_cache_entry *ent;
1943       ent = mctx->bkref_ents + limits->elems[lim_idx];
1944       subexp_idx = dfa->nodes[ent->node].opr.idx;
1945
1946       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1947                                            subexp_idx, dst_node, dst_idx,
1948                                            dst_bkref_idx);
1949       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1950                                            subexp_idx, src_node, src_idx,
1951                                            src_bkref_idx);
1952
1953       /* In case of:
1954          <src> <dst> ( <subexp> )
1955          ( <subexp> ) <src> <dst>
1956          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1957       if (src_pos == dst_pos)
1958         continue; /* This is unrelated limitation.  */
1959       else
1960         return true;
1961     }
1962   return false;
1963 }
1964
1965 static int
1966 internal_function
1967 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1968                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1969 {
1970   const re_dfa_t *const dfa = mctx->dfa;
1971   const re_node_set *eclosures = dfa->eclosures + from_node;
1972   Idx node_idx;
1973
1974   /* Else, we are on the boundary: examine the nodes on the epsilon
1975      closure.  */
1976   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1977     {
1978       Idx node = eclosures->elems[node_idx];
1979       switch (dfa->nodes[node].type)
1980         {
1981         case OP_BACK_REF:
1982           if (bkref_idx != REG_MISSING)
1983             {
1984               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1985               do
1986                 {
1987                   Idx dst;
1988                   int cpos;
1989
1990                   if (ent->node != node)
1991                     continue;
1992
1993                   if (subexp_idx < BITSET_WORD_BITS
1994                       && !(ent->eps_reachable_subexps_map
1995                            & ((bitset_word_t) 1 << subexp_idx)))
1996                     continue;
1997
1998                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1999                      OP_CLOSE_SUBEXP cases below.  But, if the
2000                      destination node is the same node as the source
2001                      node, don't recurse because it would cause an
2002                      infinite loop: a regex that exhibits this behavior
2003                      is ()\1*\1*  */
2004                   dst = dfa->edests[node].elems[0];
2005                   if (dst == from_node)
2006                     {
2007                       if (boundaries & 1)
2008                         return -1;
2009                       else /* if (boundaries & 2) */
2010                         return 0;
2011                     }
2012
2013                   cpos =
2014                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2015                                                  dst, bkref_idx);
2016                   if (cpos == -1 /* && (boundaries & 1) */)
2017                     return -1;
2018                   if (cpos == 0 && (boundaries & 2))
2019                     return 0;
2020
2021                   if (subexp_idx < BITSET_WORD_BITS)
2022                     ent->eps_reachable_subexps_map
2023                       &= ~((bitset_word_t) 1 << subexp_idx);
2024                 }
2025               while (ent++->more);
2026             }
2027           break;
2028
2029         case OP_OPEN_SUBEXP:
2030           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2031             return -1;
2032           break;
2033
2034         case OP_CLOSE_SUBEXP:
2035           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2036             return 0;
2037           break;
2038
2039         default:
2040             break;
2041         }
2042     }
2043
2044   return (boundaries & 2) ? 1 : 0;
2045 }
2046
2047 static int
2048 internal_function
2049 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2050                            Idx subexp_idx, Idx from_node, Idx str_idx,
2051                            Idx bkref_idx)
2052 {
2053   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2054   int boundaries;
2055
2056   /* If we are outside the range of the subexpression, return -1 or 1.  */
2057   if (str_idx < lim->subexp_from)
2058     return -1;
2059
2060   if (lim->subexp_to < str_idx)
2061     return 1;
2062
2063   /* If we are within the subexpression, return 0.  */
2064   boundaries = (str_idx == lim->subexp_from);
2065   boundaries |= (str_idx == lim->subexp_to) << 1;
2066   if (boundaries == 0)
2067     return 0;
2068
2069   /* Else, examine epsilon closure.  */
2070   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2071                                       from_node, bkref_idx);
2072 }
2073
2074 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2075    which are against limitations from DEST_NODES. */
2076
2077 static reg_errcode_t
2078 internal_function
2079 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2080                      const re_node_set *candidates, re_node_set *limits,
2081                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2082 {
2083   reg_errcode_t err;
2084   Idx node_idx, lim_idx;
2085
2086   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2087     {
2088       Idx subexp_idx;
2089       struct re_backref_cache_entry *ent;
2090       ent = bkref_ents + limits->elems[lim_idx];
2091
2092       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2093         continue; /* This is unrelated limitation.  */
2094
2095       subexp_idx = dfa->nodes[ent->node].opr.idx;
2096       if (ent->subexp_to == str_idx)
2097         {
2098           Idx ops_node = REG_MISSING;
2099           Idx cls_node = REG_MISSING;
2100           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2101             {
2102               Idx node = dest_nodes->elems[node_idx];
2103               re_token_type_t type = dfa->nodes[node].type;
2104               if (type == OP_OPEN_SUBEXP
2105                   && subexp_idx == dfa->nodes[node].opr.idx)
2106                 ops_node = node;
2107               else if (type == OP_CLOSE_SUBEXP
2108                        && subexp_idx == dfa->nodes[node].opr.idx)
2109                 cls_node = node;
2110             }
2111
2112           /* Check the limitation of the open subexpression.  */
2113           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2114           if (REG_VALID_INDEX (ops_node))
2115             {
2116               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2117                                            candidates);
2118               if (BE (err != REG_NOERROR, 0))
2119                 return err;
2120             }
2121
2122           /* Check the limitation of the close subexpression.  */
2123           if (REG_VALID_INDEX (cls_node))
2124             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2125               {
2126                 Idx node = dest_nodes->elems[node_idx];
2127                 if (!re_node_set_contains (dfa->inveclosures + node,
2128                                            cls_node)
2129                     && !re_node_set_contains (dfa->eclosures + node,
2130                                               cls_node))
2131                   {
2132                     /* It is against this limitation.
2133                        Remove it form the current sifted state.  */
2134                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2135                                                  candidates);
2136                     if (BE (err != REG_NOERROR, 0))
2137                       return err;
2138                     --node_idx;
2139                   }
2140               }
2141         }
2142       else /* (ent->subexp_to != str_idx)  */
2143         {
2144           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2145             {
2146               Idx node = dest_nodes->elems[node_idx];
2147               re_token_type_t type = dfa->nodes[node].type;
2148               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2149                 {
2150                   if (subexp_idx != dfa->nodes[node].opr.idx)
2151                     continue;
2152                   /* It is against this limitation.
2153                      Remove it form the current sifted state.  */
2154                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2155                                                candidates);
2156                   if (BE (err != REG_NOERROR, 0))
2157                     return err;
2158                 }
2159             }
2160         }
2161     }
2162   return REG_NOERROR;
2163 }
2164
2165 static reg_errcode_t
2166 internal_function __attribute_warn_unused_result__
2167 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2168                    Idx str_idx, const re_node_set *candidates)
2169 {
2170   const re_dfa_t *const dfa = mctx->dfa;
2171   reg_errcode_t err;
2172   Idx node_idx, node;
2173   re_sift_context_t local_sctx;
2174   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2175
2176   if (first_idx == REG_MISSING)
2177     return REG_NOERROR;
2178
2179   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2180
2181   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2182     {
2183       Idx enabled_idx;
2184       re_token_type_t type;
2185       struct re_backref_cache_entry *entry;
2186       node = candidates->elems[node_idx];
2187       type = dfa->nodes[node].type;
2188       /* Avoid infinite loop for the REs like "()\1+".  */
2189       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2190         continue;
2191       if (type != OP_BACK_REF)
2192         continue;
2193
2194       entry = mctx->bkref_ents + first_idx;
2195       enabled_idx = first_idx;
2196       do
2197         {
2198           Idx subexp_len;
2199           Idx to_idx;
2200           Idx dst_node;
2201           bool ok;
2202           re_dfastate_t *cur_state;
2203
2204           if (entry->node != node)
2205             continue;
2206           subexp_len = entry->subexp_to - entry->subexp_from;
2207           to_idx = str_idx + subexp_len;
2208           dst_node = (subexp_len ? dfa->nexts[node]
2209                       : dfa->edests[node].elems[0]);
2210
2211           if (to_idx > sctx->last_str_idx
2212               || sctx->sifted_states[to_idx] == NULL
2213               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2214               || check_dst_limits (mctx, &sctx->limits, node,
2215                                    str_idx, dst_node, to_idx))
2216             continue;
2217
2218           if (local_sctx.sifted_states == NULL)
2219             {
2220               local_sctx = *sctx;
2221               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2222               if (BE (err != REG_NOERROR, 0))
2223                 goto free_return;
2224             }
2225           local_sctx.last_node = node;
2226           local_sctx.last_str_idx = str_idx;
2227           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2228           if (BE (! ok, 0))
2229             {
2230               err = REG_ESPACE;
2231               goto free_return;
2232             }
2233           cur_state = local_sctx.sifted_states[str_idx];
2234           err = sift_states_backward (mctx, &local_sctx);
2235           if (BE (err != REG_NOERROR, 0))
2236             goto free_return;
2237           if (sctx->limited_states != NULL)
2238             {
2239               err = merge_state_array (dfa, sctx->limited_states,
2240                                        local_sctx.sifted_states,
2241                                        str_idx + 1);
2242               if (BE (err != REG_NOERROR, 0))
2243                 goto free_return;
2244             }
2245           local_sctx.sifted_states[str_idx] = cur_state;
2246           re_node_set_remove (&local_sctx.limits, enabled_idx);
2247
2248           /* mctx->bkref_ents may have changed, reload the pointer.  */
2249           entry = mctx->bkref_ents + enabled_idx;
2250         }
2251       while (enabled_idx++, entry++->more);
2252     }
2253   err = REG_NOERROR;
2254  free_return:
2255   if (local_sctx.sifted_states != NULL)
2256     {
2257       re_node_set_free (&local_sctx.limits);
2258     }
2259
2260   return err;
2261 }
2262
2263
2264 #ifdef RE_ENABLE_I18N
2265 static int
2266 internal_function
2267 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2268                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2269 {
2270   const re_dfa_t *const dfa = mctx->dfa;
2271   int naccepted;
2272   /* Check the node can accept `multi byte'.  */
2273   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2274   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2275       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2276                             dfa->nexts[node_idx]))
2277     /* The node can't accept the `multi byte', or the
2278        destination was already thrown away, then the node
2279        could't accept the current input `multi byte'.   */
2280     naccepted = 0;
2281   /* Otherwise, it is sure that the node could accept
2282      `naccepted' bytes input.  */
2283   return naccepted;
2284 }
2285 #endif /* RE_ENABLE_I18N */
2286
2287 \f
2288 /* Functions for state transition.  */
2289
2290 /* Return the next state to which the current state STATE will transit by
2291    accepting the current input byte, and update STATE_LOG if necessary.
2292    If STATE can accept a multibyte char/collating element/back reference
2293    update the destination of STATE_LOG.  */
2294
2295 static re_dfastate_t *
2296 internal_function __attribute_warn_unused_result__
2297 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2298                re_dfastate_t *state)
2299 {
2300   re_dfastate_t **trtable;
2301   unsigned char ch;
2302
2303 #ifdef RE_ENABLE_I18N
2304   /* If the current state can accept multibyte.  */
2305   if (BE (state->accept_mb, 0))
2306     {
2307       *err = transit_state_mb (mctx, state);
2308       if (BE (*err != REG_NOERROR, 0))
2309         return NULL;
2310     }
2311 #endif /* RE_ENABLE_I18N */
2312
2313   /* Then decide the next state with the single byte.  */
2314 #if 0
2315   if (0)
2316     /* don't use transition table  */
2317     return transit_state_sb (err, mctx, state);
2318 #endif
2319
2320   /* Use transition table  */
2321   ch = re_string_fetch_byte (&mctx->input);
2322   for (;;)
2323     {
2324       trtable = state->trtable;
2325       if (BE (trtable != NULL, 1))
2326         return trtable[ch];
2327
2328       trtable = state->word_trtable;
2329       if (BE (trtable != NULL, 1))
2330         {
2331           unsigned int context;
2332           context
2333             = re_string_context_at (&mctx->input,
2334                                     re_string_cur_idx (&mctx->input) - 1,
2335                                     mctx->eflags);
2336           if (IS_WORD_CONTEXT (context))
2337             return trtable[ch + SBC_MAX];
2338           else
2339             return trtable[ch];
2340         }
2341
2342       if (!build_trtable (mctx->dfa, state))
2343         {
2344           *err = REG_ESPACE;
2345           return NULL;
2346         }
2347
2348       /* Retry, we now have a transition table.  */
2349     }
2350 }
2351
2352 /* Update the state_log if we need */
2353 static re_dfastate_t *
2354 internal_function
2355 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2356                       re_dfastate_t *next_state)
2357 {
2358   const re_dfa_t *const dfa = mctx->dfa;
2359   Idx cur_idx = re_string_cur_idx (&mctx->input);
2360
2361   if (cur_idx > mctx->state_log_top)
2362     {
2363       mctx->state_log[cur_idx] = next_state;
2364       mctx->state_log_top = cur_idx;
2365     }
2366   else if (mctx->state_log[cur_idx] == 0)
2367     {
2368       mctx->state_log[cur_idx] = next_state;
2369     }
2370   else
2371     {
2372       re_dfastate_t *pstate;
2373       unsigned int context;
2374       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2375       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2376          the destination of a multibyte char/collating element/
2377          back reference.  Then the next state is the union set of
2378          these destinations and the results of the transition table.  */
2379       pstate = mctx->state_log[cur_idx];
2380       log_nodes = pstate->entrance_nodes;
2381       if (next_state != NULL)
2382         {
2383           table_nodes = next_state->entrance_nodes;
2384           *err = re_node_set_init_union (&next_nodes, table_nodes,
2385                                              log_nodes);
2386           if (BE (*err != REG_NOERROR, 0))
2387             return NULL;
2388         }
2389       else
2390         next_nodes = *log_nodes;
2391       /* Note: We already add the nodes of the initial state,
2392          then we don't need to add them here.  */
2393
2394       context = re_string_context_at (&mctx->input,
2395                                       re_string_cur_idx (&mctx->input) - 1,
2396                                       mctx->eflags);
2397       next_state = mctx->state_log[cur_idx]
2398         = re_acquire_state_context (err, dfa, &next_nodes, context);
2399       /* We don't need to check errors here, since the return value of
2400          this function is next_state and ERR is already set.  */
2401
2402       if (table_nodes != NULL)
2403         re_node_set_free (&next_nodes);
2404     }
2405
2406   if (BE (dfa->nbackref, 0) && next_state != NULL)
2407     {
2408       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2409          later.  We must check them here, since the back references in the
2410          next state might use them.  */
2411       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2412                                         cur_idx);
2413       if (BE (*err != REG_NOERROR, 0))
2414         return NULL;
2415
2416       /* If the next state has back references.  */
2417       if (next_state->has_backref)
2418         {
2419           *err = transit_state_bkref (mctx, &next_state->nodes);
2420           if (BE (*err != REG_NOERROR, 0))
2421             return NULL;
2422           next_state = mctx->state_log[cur_idx];
2423         }
2424     }
2425
2426   return next_state;
2427 }
2428
2429 /* Skip bytes in the input that correspond to part of a
2430    multi-byte match, then look in the log for a state
2431    from which to restart matching.  */
2432 static re_dfastate_t *
2433 internal_function
2434 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2435 {
2436   re_dfastate_t *cur_state;
2437   do
2438     {
2439       Idx max = mctx->state_log_top;
2440       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2441
2442       do
2443         {
2444           if (++cur_str_idx > max)
2445             return NULL;
2446           re_string_skip_bytes (&mctx->input, 1);
2447         }
2448       while (mctx->state_log[cur_str_idx] == NULL);
2449
2450       cur_state = merge_state_with_log (err, mctx, NULL);
2451     }
2452   while (*err == REG_NOERROR && cur_state == NULL);
2453   return cur_state;
2454 }
2455
2456 /* Helper functions for transit_state.  */
2457
2458 /* From the node set CUR_NODES, pick up the nodes whose types are
2459    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2460    expression. And register them to use them later for evaluating the
2461    correspoding back references.  */
2462
2463 static reg_errcode_t
2464 internal_function
2465 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2466                            Idx str_idx)
2467 {
2468   const re_dfa_t *const dfa = mctx->dfa;
2469   Idx node_idx;
2470   reg_errcode_t err;
2471
2472   /* TODO: This isn't efficient.
2473            Because there might be more than one nodes whose types are
2474            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2475            nodes.
2476            E.g. RE: (a){2}  */
2477   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2478     {
2479       Idx node = cur_nodes->elems[node_idx];
2480       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2481           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2482           && (dfa->used_bkref_map
2483               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2484         {
2485           err = match_ctx_add_subtop (mctx, node, str_idx);
2486           if (BE (err != REG_NOERROR, 0))
2487             return err;
2488         }
2489     }
2490   return REG_NOERROR;
2491 }
2492
2493 #if 0
2494 /* Return the next state to which the current state STATE will transit by
2495    accepting the current input byte.  */
2496
2497 static re_dfastate_t *
2498 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2499                   re_dfastate_t *state)
2500 {
2501   const re_dfa_t *const dfa = mctx->dfa;
2502   re_node_set next_nodes;
2503   re_dfastate_t *next_state;
2504   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2505   unsigned int context;
2506
2507   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2508   if (BE (*err != REG_NOERROR, 0))
2509     return NULL;
2510   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2511     {
2512       Idx cur_node = state->nodes.elems[node_cnt];
2513       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2514         {
2515           *err = re_node_set_merge (&next_nodes,
2516                                     dfa->eclosures + dfa->nexts[cur_node]);
2517           if (BE (*err != REG_NOERROR, 0))
2518             {
2519               re_node_set_free (&next_nodes);
2520               return NULL;
2521             }
2522         }
2523     }
2524   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2525   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2526   /* We don't need to check errors here, since the return value of
2527      this function is next_state and ERR is already set.  */
2528
2529   re_node_set_free (&next_nodes);
2530   re_string_skip_bytes (&mctx->input, 1);
2531   return next_state;
2532 }
2533 #endif
2534
2535 #ifdef RE_ENABLE_I18N
2536 static reg_errcode_t
2537 internal_function
2538 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2539 {
2540   const re_dfa_t *const dfa = mctx->dfa;
2541   reg_errcode_t err;
2542   Idx i;
2543
2544   for (i = 0; i < pstate->nodes.nelem; ++i)
2545     {
2546       re_node_set dest_nodes, *new_nodes;
2547       Idx cur_node_idx = pstate->nodes.elems[i];
2548       int naccepted;
2549       Idx dest_idx;
2550       unsigned int context;
2551       re_dfastate_t *dest_state;
2552
2553       if (!dfa->nodes[cur_node_idx].accept_mb)
2554         continue;
2555
2556       if (dfa->nodes[cur_node_idx].constraint)
2557         {
2558           context = re_string_context_at (&mctx->input,
2559                                           re_string_cur_idx (&mctx->input),
2560                                           mctx->eflags);
2561           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2562                                            context))
2563             continue;
2564         }
2565
2566       /* How many bytes the node can accept?  */
2567       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2568                                            re_string_cur_idx (&mctx->input));
2569       if (naccepted == 0)
2570         continue;
2571
2572       /* The node can accepts `naccepted' bytes.  */
2573       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2574       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2575                                : mctx->max_mb_elem_len);
2576       err = clean_state_log_if_needed (mctx, dest_idx);
2577       if (BE (err != REG_NOERROR, 0))
2578         return err;
2579 #ifdef DEBUG
2580       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2581 #endif
2582       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2583
2584       dest_state = mctx->state_log[dest_idx];
2585       if (dest_state == NULL)
2586         dest_nodes = *new_nodes;
2587       else
2588         {
2589           err = re_node_set_init_union (&dest_nodes,
2590                                         dest_state->entrance_nodes, new_nodes);
2591           if (BE (err != REG_NOERROR, 0))
2592             return err;
2593         }
2594       context = re_string_context_at (&mctx->input, dest_idx - 1,
2595                                       mctx->eflags);
2596       mctx->state_log[dest_idx]
2597         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2598       if (dest_state != NULL)
2599         re_node_set_free (&dest_nodes);
2600       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2601         return err;
2602     }
2603   return REG_NOERROR;
2604 }
2605 #endif /* RE_ENABLE_I18N */
2606
2607 static reg_errcode_t
2608 internal_function
2609 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2610 {
2611   const re_dfa_t *const dfa = mctx->dfa;
2612   reg_errcode_t err;
2613   Idx i;
2614   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2615
2616   for (i = 0; i < nodes->nelem; ++i)
2617     {
2618       Idx dest_str_idx, prev_nelem, bkc_idx;
2619       Idx node_idx = nodes->elems[i];
2620       unsigned int context;
2621       const re_token_t *node = dfa->nodes + node_idx;
2622       re_node_set *new_dest_nodes;
2623
2624       /* Check whether `node' is a backreference or not.  */
2625       if (node->type != OP_BACK_REF)
2626         continue;
2627
2628       if (node->constraint)
2629         {
2630           context = re_string_context_at (&mctx->input, cur_str_idx,
2631                                           mctx->eflags);
2632           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2633             continue;
2634         }
2635
2636       /* `node' is a backreference.
2637          Check the substring which the substring matched.  */
2638       bkc_idx = mctx->nbkref_ents;
2639       err = get_subexp (mctx, node_idx, cur_str_idx);
2640       if (BE (err != REG_NOERROR, 0))
2641         goto free_return;
2642
2643       /* And add the epsilon closures (which is `new_dest_nodes') of
2644          the backreference to appropriate state_log.  */
2645 #ifdef DEBUG
2646       assert (dfa->nexts[node_idx] != REG_MISSING);
2647 #endif
2648       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2649         {
2650           Idx subexp_len;
2651           re_dfastate_t *dest_state;
2652           struct re_backref_cache_entry *bkref_ent;
2653           bkref_ent = mctx->bkref_ents + bkc_idx;
2654           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2655             continue;
2656           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2657           new_dest_nodes = (subexp_len == 0
2658                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2659                             : dfa->eclosures + dfa->nexts[node_idx]);
2660           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2661                           - bkref_ent->subexp_from);
2662           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2663                                           mctx->eflags);
2664           dest_state = mctx->state_log[dest_str_idx];
2665           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2666                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2667           /* Add `new_dest_node' to state_log.  */
2668           if (dest_state == NULL)
2669             {
2670               mctx->state_log[dest_str_idx]
2671                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2672                                             context);
2673               if (BE (mctx->state_log[dest_str_idx] == NULL
2674                       && err != REG_NOERROR, 0))
2675                 goto free_return;
2676             }
2677           else
2678             {
2679               re_node_set dest_nodes;
2680               err = re_node_set_init_union (&dest_nodes,
2681                                             dest_state->entrance_nodes,
2682                                             new_dest_nodes);
2683               if (BE (err != REG_NOERROR, 0))
2684                 {
2685                   re_node_set_free (&dest_nodes);
2686                   goto free_return;
2687                 }
2688               mctx->state_log[dest_str_idx]
2689                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2690               re_node_set_free (&dest_nodes);
2691               if (BE (mctx->state_log[dest_str_idx] == NULL
2692                       && err != REG_NOERROR, 0))
2693                 goto free_return;
2694             }
2695           /* We need to check recursively if the backreference can epsilon
2696              transit.  */
2697           if (subexp_len == 0
2698               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2699             {
2700               err = check_subexp_matching_top (mctx, new_dest_nodes,
2701                                                cur_str_idx);
2702               if (BE (err != REG_NOERROR, 0))
2703                 goto free_return;
2704               err = transit_state_bkref (mctx, new_dest_nodes);
2705               if (BE (err != REG_NOERROR, 0))
2706                 goto free_return;
2707             }
2708         }
2709     }
2710   err = REG_NOERROR;
2711  free_return:
2712   return err;
2713 }
2714
2715 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2716    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2717    Note that we might collect inappropriate candidates here.
2718    However, the cost of checking them strictly here is too high, then we
2719    delay these checking for prune_impossible_nodes().  */
2720
2721 static reg_errcode_t
2722 internal_function __attribute_warn_unused_result__
2723 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2724 {
2725   const re_dfa_t *const dfa = mctx->dfa;
2726   Idx subexp_num, sub_top_idx;
2727   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2728   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2729   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2730   if (cache_idx != REG_MISSING)
2731     {
2732       const struct re_backref_cache_entry *entry
2733         = mctx->bkref_ents + cache_idx;
2734       do
2735         if (entry->node == bkref_node)
2736           return REG_NOERROR; /* We already checked it.  */
2737       while (entry++->more);
2738     }
2739
2740   subexp_num = dfa->nodes[bkref_node].opr.idx;
2741
2742   /* For each sub expression  */
2743   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2744     {
2745       reg_errcode_t err;
2746       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2747       re_sub_match_last_t *sub_last;
2748       Idx sub_last_idx, sl_str, bkref_str_off;
2749
2750       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2751         continue; /* It isn't related.  */
2752
2753       sl_str = sub_top->str_idx;
2754       bkref_str_off = bkref_str_idx;
2755       /* At first, check the last node of sub expressions we already
2756          evaluated.  */
2757       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2758         {
2759           regoff_t sl_str_diff;
2760           sub_last = sub_top->lasts[sub_last_idx];
2761           sl_str_diff = sub_last->str_idx - sl_str;
2762           /* The matched string by the sub expression match with the substring
2763              at the back reference?  */
2764           if (sl_str_diff > 0)
2765             {
2766               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2767                 {
2768                   /* Not enough chars for a successful match.  */
2769                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2770                     break;
2771
2772                   err = clean_state_log_if_needed (mctx,
2773                                                    bkref_str_off
2774                                                    + sl_str_diff);
2775                   if (BE (err != REG_NOERROR, 0))
2776                     return err;
2777                   buf = (const char *) re_string_get_buffer (&mctx->input);
2778                 }
2779               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2780                 /* We don't need to search this sub expression any more.  */
2781                 break;
2782             }
2783           bkref_str_off += sl_str_diff;
2784           sl_str += sl_str_diff;
2785           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2786                                 bkref_str_idx);
2787
2788           /* Reload buf, since the preceding call might have reallocated
2789              the buffer.  */
2790           buf = (const char *) re_string_get_buffer (&mctx->input);
2791
2792           if (err == REG_NOMATCH)
2793             continue;
2794           if (BE (err != REG_NOERROR, 0))
2795             return err;
2796         }
2797
2798       if (sub_last_idx < sub_top->nlasts)
2799         continue;
2800       if (sub_last_idx > 0)
2801         ++sl_str;
2802       /* Then, search for the other last nodes of the sub expression.  */
2803       for (; sl_str <= bkref_str_idx; ++sl_str)
2804         {
2805           Idx cls_node;
2806           regoff_t sl_str_off;
2807           const re_node_set *nodes;
2808           sl_str_off = sl_str - sub_top->str_idx;
2809           /* The matched string by the sub expression match with the substring
2810              at the back reference?  */
2811           if (sl_str_off > 0)
2812             {
2813               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2814                 {
2815                   /* If we are at the end of the input, we cannot match.  */
2816                   if (bkref_str_off >= mctx->input.len)
2817                     break;
2818
2819                   err = extend_buffers (mctx);
2820                   if (BE (err != REG_NOERROR, 0))
2821                     return err;
2822
2823                   buf = (const char *) re_string_get_buffer (&mctx->input);
2824                 }
2825               if (buf [bkref_str_off++] != buf[sl_str - 1])
2826                 break; /* We don't need to search this sub expression
2827                           any more.  */
2828             }
2829           if (mctx->state_log[sl_str] == NULL)
2830             continue;
2831           /* Does this state have a ')' of the sub expression?  */
2832           nodes = &mctx->state_log[sl_str]->nodes;
2833           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2834                                        OP_CLOSE_SUBEXP);
2835           if (cls_node == REG_MISSING)
2836             continue; /* No.  */
2837           if (sub_top->path == NULL)
2838             {
2839               sub_top->path = calloc (sizeof (state_array_t),
2840                                       sl_str - sub_top->str_idx + 1);
2841               if (sub_top->path == NULL)
2842                 return REG_ESPACE;
2843             }
2844           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2845              in the current context?  */
2846           err = check_arrival (mctx, sub_top->path, sub_top->node,
2847                                sub_top->str_idx, cls_node, sl_str,
2848                                OP_CLOSE_SUBEXP);
2849           if (err == REG_NOMATCH)
2850               continue;
2851           if (BE (err != REG_NOERROR, 0))
2852               return err;
2853           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2854           if (BE (sub_last == NULL, 0))
2855             return REG_ESPACE;
2856           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2857                                 bkref_str_idx);
2858           if (err == REG_NOMATCH)
2859             continue;
2860         }
2861     }
2862   return REG_NOERROR;
2863 }
2864
2865 /* Helper functions for get_subexp().  */
2866
2867 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2868    If it can arrive, register the sub expression expressed with SUB_TOP
2869    and SUB_LAST.  */
2870
2871 static reg_errcode_t
2872 internal_function
2873 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2874                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2875 {
2876   reg_errcode_t err;
2877   Idx to_idx;
2878   /* Can the subexpression arrive the back reference?  */
2879   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2880                        sub_last->str_idx, bkref_node, bkref_str,
2881                        OP_OPEN_SUBEXP);
2882   if (err != REG_NOERROR)
2883     return err;
2884   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2885                              sub_last->str_idx);
2886   if (BE (err != REG_NOERROR, 0))
2887     return err;
2888   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2889   return clean_state_log_if_needed (mctx, to_idx);
2890 }
2891
2892 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2893    Search '(' if FL_OPEN, or search ')' otherwise.
2894    TODO: This function isn't efficient...
2895          Because there might be more than one nodes whose types are
2896          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2897          nodes.
2898          E.g. RE: (a){2}  */
2899
2900 static Idx
2901 internal_function
2902 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2903                   Idx subexp_idx, int type)
2904 {
2905   Idx cls_idx;
2906   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2907     {
2908       Idx cls_node = nodes->elems[cls_idx];
2909       const re_token_t *node = dfa->nodes + cls_node;
2910       if (node->type == type
2911           && node->opr.idx == subexp_idx)
2912         return cls_node;
2913     }
2914   return REG_MISSING;
2915 }
2916
2917 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2918    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2919    heavily reused.
2920    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2921
2922 static reg_errcode_t
2923 internal_function __attribute_warn_unused_result__
2924 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2925                Idx top_str, Idx last_node, Idx last_str, int type)
2926 {
2927   const re_dfa_t *const dfa = mctx->dfa;
2928   reg_errcode_t err = REG_NOERROR;
2929   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2930   re_dfastate_t *cur_state = NULL;
2931   re_node_set *cur_nodes, next_nodes;
2932   re_dfastate_t **backup_state_log;
2933   unsigned int context;
2934
2935   subexp_num = dfa->nodes[top_node].opr.idx;
2936   /* Extend the buffer if we need.  */
2937   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2938     {
2939       re_dfastate_t **new_array;
2940       Idx old_alloc = path->alloc;
2941       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2942       if (BE (new_alloc < old_alloc, 0)
2943           || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2944         return REG_ESPACE;
2945       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2946       if (BE (new_array == NULL, 0))
2947         return REG_ESPACE;
2948       path->array = new_array;
2949       path->alloc = new_alloc;
2950       memset (new_array + old_alloc, '\0',
2951               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2952     }
2953
2954   str_idx = path->next_idx ? path->next_idx : top_str;
2955
2956   /* Temporary modify MCTX.  */
2957   backup_state_log = mctx->state_log;
2958   backup_cur_idx = mctx->input.cur_idx;
2959   mctx->state_log = path->array;
2960   mctx->input.cur_idx = str_idx;
2961
2962   /* Setup initial node set.  */
2963   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2964   if (str_idx == top_str)
2965     {
2966       err = re_node_set_init_1 (&next_nodes, top_node);
2967       if (BE (err != REG_NOERROR, 0))
2968         return err;
2969       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2970       if (BE (err != REG_NOERROR, 0))
2971         {
2972           re_node_set_free (&next_nodes);
2973           return err;
2974         }
2975     }
2976   else
2977     {
2978       cur_state = mctx->state_log[str_idx];
2979       if (cur_state && cur_state->has_backref)
2980         {
2981           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2982           if (BE (err != REG_NOERROR, 0))
2983             return err;
2984         }
2985       else
2986         re_node_set_init_empty (&next_nodes);
2987     }
2988   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2989     {
2990       if (next_nodes.nelem)
2991         {
2992           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2993                                     subexp_num, type);
2994           if (BE (err != REG_NOERROR, 0))
2995             {
2996               re_node_set_free (&next_nodes);
2997               return err;
2998             }
2999         }
3000       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3001       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3002         {
3003           re_node_set_free (&next_nodes);
3004           return err;
3005         }
3006       mctx->state_log[str_idx] = cur_state;
3007     }
3008
3009   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3010     {
3011       re_node_set_empty (&next_nodes);
3012       if (mctx->state_log[str_idx + 1])
3013         {
3014           err = re_node_set_merge (&next_nodes,
3015                                    &mctx->state_log[str_idx + 1]->nodes);
3016           if (BE (err != REG_NOERROR, 0))
3017             {
3018               re_node_set_free (&next_nodes);
3019               return err;
3020             }
3021         }
3022       if (cur_state)
3023         {
3024           err = check_arrival_add_next_nodes (mctx, str_idx,
3025                                               &cur_state->non_eps_nodes,
3026                                               &next_nodes);
3027           if (BE (err != REG_NOERROR, 0))
3028             {
3029               re_node_set_free (&next_nodes);
3030               return err;
3031             }
3032         }
3033       ++str_idx;
3034       if (next_nodes.nelem)
3035         {
3036           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3037           if (BE (err != REG_NOERROR, 0))
3038             {
3039               re_node_set_free (&next_nodes);
3040               return err;
3041             }
3042           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3043                                     subexp_num, type);
3044           if (BE (err != REG_NOERROR, 0))
3045             {
3046               re_node_set_free (&next_nodes);
3047               return err;
3048             }
3049         }
3050       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3051       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3052       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3053         {
3054           re_node_set_free (&next_nodes);
3055           return err;
3056         }
3057       mctx->state_log[str_idx] = cur_state;
3058       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3059     }
3060   re_node_set_free (&next_nodes);
3061   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3062                : &mctx->state_log[last_str]->nodes);
3063   path->next_idx = str_idx;
3064
3065   /* Fix MCTX.  */
3066   mctx->state_log = backup_state_log;
3067   mctx->input.cur_idx = backup_cur_idx;
3068
3069   /* Then check the current node set has the node LAST_NODE.  */
3070   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3071     return REG_NOERROR;
3072
3073   return REG_NOMATCH;
3074 }
3075
3076 /* Helper functions for check_arrival.  */
3077
3078 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3079    to NEXT_NODES.
3080    TODO: This function is similar to the functions transit_state*(),
3081          however this function has many additional works.
3082          Can't we unify them?  */
3083
3084 static reg_errcode_t
3085 internal_function __attribute_warn_unused_result__
3086 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3087                               re_node_set *cur_nodes, re_node_set *next_nodes)
3088 {
3089   const re_dfa_t *const dfa = mctx->dfa;
3090   bool ok;
3091   Idx cur_idx;
3092 #ifdef RE_ENABLE_I18N
3093   reg_errcode_t err = REG_NOERROR;
3094 #endif
3095   re_node_set union_set;
3096   re_node_set_init_empty (&union_set);
3097   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3098     {
3099       int naccepted = 0;
3100       Idx cur_node = cur_nodes->elems[cur_idx];
3101 #ifdef DEBUG
3102       re_token_type_t type = dfa->nodes[cur_node].type;
3103       assert (!IS_EPSILON_NODE (type));
3104 #endif
3105 #ifdef RE_ENABLE_I18N
3106       /* If the node may accept `multi byte'.  */
3107       if (dfa->nodes[cur_node].accept_mb)
3108         {
3109           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3110                                                str_idx);
3111           if (naccepted > 1)
3112             {
3113               re_dfastate_t *dest_state;
3114               Idx next_node = dfa->nexts[cur_node];
3115               Idx next_idx = str_idx + naccepted;
3116               dest_state = mctx->state_log[next_idx];
3117               re_node_set_empty (&union_set);
3118               if (dest_state)
3119                 {
3120                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3121                   if (BE (err != REG_NOERROR, 0))
3122                     {
3123                       re_node_set_free (&union_set);
3124                       return err;
3125                     }
3126                 }
3127               ok = re_node_set_insert (&union_set, next_node);
3128               if (BE (! ok, 0))
3129                 {
3130                   re_node_set_free (&union_set);
3131                   return REG_ESPACE;
3132                 }
3133               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3134                                                             &union_set);
3135               if (BE (mctx->state_log[next_idx] == NULL
3136                       && err != REG_NOERROR, 0))
3137                 {
3138                   re_node_set_free (&union_set);
3139                   return err;
3140                 }
3141             }
3142         }
3143 #endif /* RE_ENABLE_I18N */
3144       if (naccepted
3145           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3146         {
3147           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3148           if (BE (! ok, 0))
3149             {
3150               re_node_set_free (&union_set);
3151               return REG_ESPACE;
3152             }
3153         }
3154     }
3155   re_node_set_free (&union_set);
3156   return REG_NOERROR;
3157 }
3158
3159 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3160    CUR_NODES, however exclude the nodes which are:
3161     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3162     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3163 */
3164
3165 static reg_errcode_t
3166 internal_function
3167 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3168                           Idx ex_subexp, int type)
3169 {
3170   reg_errcode_t err;
3171   Idx idx, outside_node;
3172   re_node_set new_nodes;
3173 #ifdef DEBUG
3174   assert (cur_nodes->nelem);
3175 #endif
3176   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3177   if (BE (err != REG_NOERROR, 0))
3178     return err;
3179   /* Create a new node set NEW_NODES with the nodes which are epsilon
3180      closures of the node in CUR_NODES.  */
3181
3182   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3183     {
3184       Idx cur_node = cur_nodes->elems[idx];
3185       const re_node_set *eclosure = dfa->eclosures + cur_node;
3186       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3187       if (outside_node == REG_MISSING)
3188         {
3189           /* There are no problematic nodes, just merge them.  */
3190           err = re_node_set_merge (&new_nodes, eclosure);
3191           if (BE (err != REG_NOERROR, 0))
3192             {
3193               re_node_set_free (&new_nodes);
3194               return err;
3195             }
3196         }
3197       else
3198         {
3199           /* There are problematic nodes, re-calculate incrementally.  */
3200           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3201                                               ex_subexp, type);
3202           if (BE (err != REG_NOERROR, 0))
3203             {
3204               re_node_set_free (&new_nodes);
3205               return err;
3206             }
3207         }
3208     }
3209   re_node_set_free (cur_nodes);
3210   *cur_nodes = new_nodes;
3211   return REG_NOERROR;
3212 }
3213
3214 /* Helper function for check_arrival_expand_ecl.
3215    Check incrementally the epsilon closure of TARGET, and if it isn't
3216    problematic append it to DST_NODES.  */
3217
3218 static reg_errcode_t
3219 internal_function __attribute_warn_unused_result__
3220 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3221                               Idx target, Idx ex_subexp, int type)
3222 {
3223   Idx cur_node;
3224   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3225     {
3226       bool ok;
3227
3228       if (dfa->nodes[cur_node].type == type
3229           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3230         {
3231           if (type == OP_CLOSE_SUBEXP)
3232             {
3233               ok = re_node_set_insert (dst_nodes, cur_node);
3234               if (BE (! ok, 0))
3235                 return REG_ESPACE;
3236             }
3237           break;
3238         }
3239       ok = re_node_set_insert (dst_nodes, cur_node);
3240       if (BE (! ok, 0))
3241         return REG_ESPACE;
3242       if (dfa->edests[cur_node].nelem == 0)
3243         break;
3244       if (dfa->edests[cur_node].nelem == 2)
3245         {
3246           reg_errcode_t err;
3247           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3248                                               dfa->edests[cur_node].elems[1],
3249                                               ex_subexp, type);
3250           if (BE (err != REG_NOERROR, 0))
3251             return err;
3252         }
3253       cur_node = dfa->edests[cur_node].elems[0];
3254     }
3255   return REG_NOERROR;
3256 }
3257
3258
3259 /* For all the back references in the current state, calculate the
3260    destination of the back references by the appropriate entry
3261    in MCTX->BKREF_ENTS.  */
3262
3263 static reg_errcode_t
3264 internal_function __attribute_warn_unused_result__
3265 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3266                     Idx cur_str, Idx subexp_num, int type)
3267 {
3268   const re_dfa_t *const dfa = mctx->dfa;
3269   reg_errcode_t err;
3270   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3271   struct re_backref_cache_entry *ent;
3272
3273   if (cache_idx_start == REG_MISSING)
3274     return REG_NOERROR;
3275
3276  restart:
3277   ent = mctx->bkref_ents + cache_idx_start;
3278   do
3279     {
3280       Idx to_idx, next_node;
3281
3282       /* Is this entry ENT is appropriate?  */
3283       if (!re_node_set_contains (cur_nodes, ent->node))
3284         continue; /* No.  */
3285
3286       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3287       /* Calculate the destination of the back reference, and append it
3288          to MCTX->STATE_LOG.  */
3289       if (to_idx == cur_str)
3290         {
3291           /* The backreference did epsilon transit, we must re-check all the
3292              node in the current state.  */
3293           re_node_set new_dests;
3294           reg_errcode_t err2, err3;
3295           next_node = dfa->edests[ent->node].elems[0];
3296           if (re_node_set_contains (cur_nodes, next_node))
3297             continue;
3298           err = re_node_set_init_1 (&new_dests, next_node);
3299           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3300           err3 = re_node_set_merge (cur_nodes, &new_dests);
3301           re_node_set_free (&new_dests);
3302           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3303                   || err3 != REG_NOERROR, 0))
3304             {
3305               err = (err != REG_NOERROR ? err
3306                      : (err2 != REG_NOERROR ? err2 : err3));
3307               return err;
3308             }
3309           /* TODO: It is still inefficient...  */
3310           goto restart;
3311         }
3312       else
3313         {
3314           re_node_set union_set;
3315           next_node = dfa->nexts[ent->node];
3316           if (mctx->state_log[to_idx])
3317             {
3318               bool ok;
3319               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3320                                         next_node))
3321                 continue;
3322               err = re_node_set_init_copy (&union_set,
3323                                            &mctx->state_log[to_idx]->nodes);
3324               ok = re_node_set_insert (&union_set, next_node);
3325               if (BE (err != REG_NOERROR || ! ok, 0))
3326                 {
3327                   re_node_set_free (&union_set);
3328                   err = err != REG_NOERROR ? err : REG_ESPACE;
3329                   return err;
3330                 }
3331             }
3332           else
3333             {
3334               err = re_node_set_init_1 (&union_set, next_node);
3335               if (BE (err != REG_NOERROR, 0))
3336                 return err;
3337             }
3338           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3339           re_node_set_free (&union_set);
3340           if (BE (mctx->state_log[to_idx] == NULL
3341                   && err != REG_NOERROR, 0))
3342             return err;
3343         }
3344     }
3345   while (ent++->more);
3346   return REG_NOERROR;
3347 }
3348
3349 /* Build transition table for the state.
3350    Return true if successful.  */
3351
3352 static bool
3353 internal_function
3354 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3355 {
3356   reg_errcode_t err;
3357   Idx i, j;
3358   int ch;
3359   bool need_word_trtable = false;
3360   bitset_word_t elem, mask;
3361   bool dests_node_malloced = false;
3362   bool dest_states_malloced = false;
3363   Idx ndests; /* Number of the destination states from `state'.  */
3364   re_dfastate_t **trtable;
3365   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3366   re_node_set follows, *dests_node;
3367   bitset_t *dests_ch;
3368   bitset_t acceptable;
3369
3370   struct dests_alloc
3371   {
3372     re_node_set dests_node[SBC_MAX];
3373     bitset_t dests_ch[SBC_MAX];
3374   } *dests_alloc;
3375
3376   /* We build DFA states which corresponds to the destination nodes
3377      from `state'.  `dests_node[i]' represents the nodes which i-th
3378      destination state contains, and `dests_ch[i]' represents the
3379      characters which i-th destination state accepts.  */
3380   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3381     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3382   else
3383     {
3384       dests_alloc = re_malloc (struct dests_alloc, 1);
3385       if (BE (dests_alloc == NULL, 0))
3386         return false;
3387       dests_node_malloced = true;
3388     }
3389   dests_node = dests_alloc->dests_node;
3390   dests_ch = dests_alloc->dests_ch;
3391
3392   /* Initialize transiton table.  */
3393   state->word_trtable = state->trtable = NULL;
3394
3395   /* At first, group all nodes belonging to `state' into several
3396      destinations.  */
3397   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3398   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3399     {
3400       if (dests_node_malloced)
3401         free (dests_alloc);
3402       if (ndests == 0)
3403         {
3404           state->trtable = (re_dfastate_t **)
3405             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3406           if (BE (state->trtable == NULL, 0))
3407             return false;
3408           return true;
3409         }
3410       return false;
3411     }
3412
3413   err = re_node_set_alloc (&follows, ndests + 1);
3414   if (BE (err != REG_NOERROR, 0))
3415     goto out_free;
3416
3417   /* Avoid arithmetic overflow in size calculation.  */
3418   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3419             / (3 * sizeof (re_dfastate_t *)))
3420            < ndests),
3421           0))
3422     goto out_free;
3423
3424   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3425                          + ndests * 3 * sizeof (re_dfastate_t *)))
3426     dest_states = (re_dfastate_t **)
3427       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3428   else
3429     {
3430       dest_states = (re_dfastate_t **)
3431         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3432       if (BE (dest_states == NULL, 0))
3433         {
3434 out_free:
3435           if (dest_states_malloced)
3436             free (dest_states);
3437           re_node_set_free (&follows);
3438           for (i = 0; i < ndests; ++i)
3439             re_node_set_free (dests_node + i);
3440           if (dests_node_malloced)
3441             free (dests_alloc);
3442           return false;
3443         }
3444       dest_states_malloced = true;
3445     }
3446   dest_states_word = dest_states + ndests;
3447   dest_states_nl = dest_states_word + ndests;
3448   bitset_empty (acceptable);
3449
3450   /* Then build the states for all destinations.  */
3451   for (i = 0; i < ndests; ++i)
3452     {
3453       Idx next_node;
3454       re_node_set_empty (&follows);
3455       /* Merge the follows of this destination states.  */
3456       for (j = 0; j < dests_node[i].nelem; ++j)
3457         {
3458           next_node = dfa->nexts[dests_node[i].elems[j]];
3459           if (next_node != REG_MISSING)
3460             {
3461               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3462               if (BE (err != REG_NOERROR, 0))
3463                 goto out_free;
3464             }
3465         }
3466       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3467       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3468         goto out_free;
3469       /* If the new state has context constraint,
3470          build appropriate states for these contexts.  */
3471       if (dest_states[i]->has_constraint)
3472         {
3473           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3474                                                           CONTEXT_WORD);
3475           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3476             goto out_free;
3477
3478           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3479             need_word_trtable = true;
3480
3481           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3482                                                         CONTEXT_NEWLINE);
3483           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3484             goto out_free;
3485         }
3486       else
3487         {
3488           dest_states_word[i] = dest_states[i];
3489           dest_states_nl[i] = dest_states[i];
3490         }
3491       bitset_merge (acceptable, dests_ch[i]);
3492     }
3493
3494   if (!BE (need_word_trtable, 0))
3495     {
3496       /* We don't care about whether the following character is a word
3497          character, or we are in a single-byte character set so we can
3498          discern by looking at the character code: allocate a
3499          256-entry transition table.  */
3500       trtable = state->trtable =
3501         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3502       if (BE (trtable == NULL, 0))
3503         goto out_free;
3504
3505       /* For all characters ch...:  */
3506       for (i = 0; i < BITSET_WORDS; ++i)
3507         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3508              elem;
3509              mask <<= 1, elem >>= 1, ++ch)
3510           if (BE (elem & 1, 0))
3511             {
3512               /* There must be exactly one destination which accepts
3513                  character ch.  See group_nodes_into_DFAstates.  */
3514               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3515                 ;
3516
3517               /* j-th destination accepts the word character ch.  */
3518               if (dfa->word_char[i] & mask)
3519                 trtable[ch] = dest_states_word[j];
3520               else
3521                 trtable[ch] = dest_states[j];
3522             }
3523     }
3524   else
3525     {
3526       /* We care about whether the following character is a word
3527          character, and we are in a multi-byte character set: discern
3528          by looking at the character code: build two 256-entry
3529          transition tables, one starting at trtable[0] and one
3530          starting at trtable[SBC_MAX].  */
3531       trtable = state->word_trtable =
3532         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3533       if (BE (trtable == NULL, 0))
3534         goto out_free;
3535
3536       /* For all characters ch...:  */
3537       for (i = 0; i < BITSET_WORDS; ++i)
3538         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3539              elem;
3540              mask <<= 1, elem >>= 1, ++ch)
3541           if (BE (elem & 1, 0))
3542             {
3543               /* There must be exactly one destination which accepts
3544                  character ch.  See group_nodes_into_DFAstates.  */
3545               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3546                 ;
3547
3548               /* j-th destination accepts the word character ch.  */
3549               trtable[ch] = dest_states[j];
3550               trtable[ch + SBC_MAX] = dest_states_word[j];
3551             }
3552     }
3553
3554   /* new line */
3555   if (bitset_contain (acceptable, NEWLINE_CHAR))
3556     {
3557       /* The current state accepts newline character.  */
3558       for (j = 0; j < ndests; ++j)
3559         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3560           {
3561             /* k-th destination accepts newline character.  */
3562             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3563             if (need_word_trtable)
3564               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3565             /* There must be only one destination which accepts
3566                newline.  See group_nodes_into_DFAstates.  */
3567             break;
3568           }
3569     }
3570
3571   if (dest_states_malloced)
3572     free (dest_states);
3573
3574   re_node_set_free (&follows);
3575   for (i = 0; i < ndests; ++i)
3576     re_node_set_free (dests_node + i);
3577
3578   if (dests_node_malloced)
3579     free (dests_alloc);
3580
3581   return true;
3582 }
3583
3584 /* Group all nodes belonging to STATE into several destinations.
3585    Then for all destinations, set the nodes belonging to the destination
3586    to DESTS_NODE[i] and set the characters accepted by the destination
3587    to DEST_CH[i].  This function return the number of destinations.  */
3588
3589 static Idx
3590 internal_function
3591 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3592                             re_node_set *dests_node, bitset_t *dests_ch)
3593 {
3594   reg_errcode_t err;
3595   bool ok;
3596   Idx i, j, k;
3597   Idx ndests; /* Number of the destinations from `state'.  */
3598   bitset_t accepts; /* Characters a node can accept.  */
3599   const re_node_set *cur_nodes = &state->nodes;
3600   bitset_empty (accepts);
3601   ndests = 0;
3602
3603   /* For all the nodes belonging to `state',  */
3604   for (i = 0; i < cur_nodes->nelem; ++i)
3605     {
3606       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3607       re_token_type_t type = node->type;
3608       unsigned int constraint = node->constraint;
3609
3610       /* Enumerate all single byte character this node can accept.  */
3611       if (type == CHARACTER)
3612         bitset_set (accepts, node->opr.c);
3613       else if (type == SIMPLE_BRACKET)
3614         {
3615           bitset_merge (accepts, node->opr.sbcset);
3616         }
3617       else if (type == OP_PERIOD)
3618         {
3619 #ifdef RE_ENABLE_I18N
3620           if (dfa->mb_cur_max > 1)
3621             bitset_merge (accepts, dfa->sb_char);
3622           else
3623 #endif
3624             bitset_set_all (accepts);
3625           if (!(dfa->syntax & RE_DOT_NEWLINE))
3626             bitset_clear (accepts, '\n');
3627           if (dfa->syntax & RE_DOT_NOT_NULL)
3628             bitset_clear (accepts, '\0');
3629         }
3630 #ifdef RE_ENABLE_I18N
3631       else if (type == OP_UTF8_PERIOD)
3632         {
3633           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3634             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3635           else
3636             bitset_merge (accepts, utf8_sb_map);
3637           if (!(dfa->syntax & RE_DOT_NEWLINE))
3638             bitset_clear (accepts, '\n');
3639           if (dfa->syntax & RE_DOT_NOT_NULL)
3640             bitset_clear (accepts, '\0');
3641         }
3642 #endif
3643       else
3644         continue;
3645
3646       /* Check the `accepts' and sift the characters which are not
3647          match it the context.  */
3648       if (constraint)
3649         {
3650           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3651             {
3652               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3653               bitset_empty (accepts);
3654               if (accepts_newline)
3655                 bitset_set (accepts, NEWLINE_CHAR);
3656               else
3657                 continue;
3658             }
3659           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3660             {
3661               bitset_empty (accepts);
3662               continue;
3663             }
3664
3665           if (constraint & NEXT_WORD_CONSTRAINT)
3666             {
3667               bitset_word_t any_set = 0;
3668               if (type == CHARACTER && !node->word_char)
3669                 {
3670                   bitset_empty (accepts);
3671                   continue;
3672                 }
3673 #ifdef RE_ENABLE_I18N
3674               if (dfa->mb_cur_max > 1)
3675                 for (j = 0; j < BITSET_WORDS; ++j)
3676                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3677               else
3678 #endif
3679                 for (j = 0; j < BITSET_WORDS; ++j)
3680                   any_set |= (accepts[j] &= dfa->word_char[j]);
3681               if (!any_set)
3682                 continue;
3683             }
3684           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3685             {
3686               bitset_word_t any_set = 0;
3687               if (type == CHARACTER && node->word_char)
3688                 {
3689                   bitset_empty (accepts);
3690                   continue;
3691                 }
3692 #ifdef RE_ENABLE_I18N
3693               if (dfa->mb_cur_max > 1)
3694                 for (j = 0; j < BITSET_WORDS; ++j)
3695                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3696               else
3697 #endif
3698                 for (j = 0; j < BITSET_WORDS; ++j)
3699                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3700               if (!any_set)
3701                 continue;
3702             }
3703         }
3704
3705       /* Then divide `accepts' into DFA states, or create a new
3706          state.  Above, we make sure that accepts is not empty.  */
3707       for (j = 0; j < ndests; ++j)
3708         {
3709           bitset_t intersec; /* Intersection sets, see below.  */
3710           bitset_t remains;
3711           /* Flags, see below.  */
3712           bitset_word_t has_intersec, not_subset, not_consumed;
3713
3714           /* Optimization, skip if this state doesn't accept the character.  */
3715           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3716             continue;
3717
3718           /* Enumerate the intersection set of this state and `accepts'.  */
3719           has_intersec = 0;
3720           for (k = 0; k < BITSET_WORDS; ++k)
3721             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3722           /* And skip if the intersection set is empty.  */
3723           if (!has_intersec)
3724             continue;
3725
3726           /* Then check if this state is a subset of `accepts'.  */
3727           not_subset = not_consumed = 0;
3728           for (k = 0; k < BITSET_WORDS; ++k)
3729             {
3730               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3731               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3732             }
3733
3734           /* If this state isn't a subset of `accepts', create a
3735              new group state, which has the `remains'. */
3736           if (not_subset)
3737             {
3738               bitset_copy (dests_ch[ndests], remains);
3739               bitset_copy (dests_ch[j], intersec);
3740               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3741               if (BE (err != REG_NOERROR, 0))
3742                 goto error_return;
3743               ++ndests;
3744             }
3745
3746           /* Put the position in the current group. */
3747           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3748           if (BE (! ok, 0))
3749             goto error_return;
3750
3751           /* If all characters are consumed, go to next node. */
3752           if (!not_consumed)
3753             break;
3754         }
3755       /* Some characters remain, create a new group. */
3756       if (j == ndests)
3757         {
3758           bitset_copy (dests_ch[ndests], accepts);
3759           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3760           if (BE (err != REG_NOERROR, 0))
3761             goto error_return;
3762           ++ndests;
3763           bitset_empty (accepts);
3764         }
3765     }
3766   return ndests;
3767  error_return:
3768   for (j = 0; j < ndests; ++j)
3769     re_node_set_free (dests_node + j);
3770   return REG_MISSING;
3771 }
3772
3773 #ifdef RE_ENABLE_I18N
3774 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3775    Return the number of the bytes the node accepts.
3776    STR_IDX is the current index of the input string.
3777
3778    This function handles the nodes which can accept one character, or
3779    one collating element like '.', '[a-z]', opposite to the other nodes
3780    can only accept one byte.  */
3781
3782 static int
3783 internal_function
3784 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3785                          const re_string_t *input, Idx str_idx)
3786 {
3787   const re_token_t *node = dfa->nodes + node_idx;
3788   int char_len, elem_len;
3789   Idx i;
3790
3791   if (BE (node->type == OP_UTF8_PERIOD, 0))
3792     {
3793       unsigned char c = re_string_byte_at (input, str_idx), d;
3794       if (BE (c < 0xc2, 1))
3795         return 0;
3796
3797       if (str_idx + 2 > input->len)
3798         return 0;
3799
3800       d = re_string_byte_at (input, str_idx + 1);
3801       if (c < 0xe0)
3802         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3803       else if (c < 0xf0)
3804         {
3805           char_len = 3;
3806           if (c == 0xe0 && d < 0xa0)
3807             return 0;
3808         }
3809       else if (c < 0xf8)
3810         {
3811           char_len = 4;
3812           if (c == 0xf0 && d < 0x90)
3813             return 0;
3814         }
3815       else if (c < 0xfc)
3816         {
3817           char_len = 5;
3818           if (c == 0xf8 && d < 0x88)
3819             return 0;
3820         }
3821       else if (c < 0xfe)
3822         {
3823           char_len = 6;
3824           if (c == 0xfc && d < 0x84)
3825             return 0;
3826         }
3827       else
3828         return 0;
3829
3830       if (str_idx + char_len > input->len)
3831         return 0;
3832
3833       for (i = 1; i < char_len; ++i)
3834         {
3835           d = re_string_byte_at (input, str_idx + i);
3836           if (d < 0x80 || d > 0xbf)
3837             return 0;
3838         }
3839       return char_len;
3840     }
3841
3842   char_len = re_string_char_size_at (input, str_idx);
3843   if (node->type == OP_PERIOD)
3844     {
3845       if (char_len <= 1)
3846         return 0;
3847       /* FIXME: I don't think this if is needed, as both '\n'
3848          and '\0' are char_len == 1.  */
3849       /* '.' accepts any one character except the following two cases.  */
3850       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3851            re_string_byte_at (input, str_idx) == '\n') ||
3852           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3853            re_string_byte_at (input, str_idx) == '\0'))
3854         return 0;
3855       return char_len;
3856     }
3857
3858   elem_len = re_string_elem_size_at (input, str_idx);
3859   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3860     return 0;
3861
3862   if (node->type == COMPLEX_BRACKET)
3863     {
3864       const re_charset_t *cset = node->opr.mbcset;
3865 # ifdef _LIBC
3866       const unsigned char *pin
3867         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3868       Idx j;
3869       uint32_t nrules;
3870 # endif /* _LIBC */
3871       int match_len = 0;
3872       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3873                     ? re_string_wchar_at (input, str_idx) : 0);
3874
3875       /* match with multibyte character?  */
3876       for (i = 0; i < cset->nmbchars; ++i)
3877         if (wc == cset->mbchars[i])
3878           {
3879             match_len = char_len;
3880             goto check_node_accept_bytes_match;
3881           }
3882       /* match with character_class?  */
3883       for (i = 0; i < cset->nchar_classes; ++i)
3884         {
3885           wctype_t wt = cset->char_classes[i];
3886           if (__iswctype (wc, wt))
3887             {
3888               match_len = char_len;
3889               goto check_node_accept_bytes_match;
3890             }
3891         }
3892
3893 # ifdef _LIBC
3894       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3895       if (nrules != 0)
3896         {
3897           unsigned int in_collseq = 0;
3898           const int32_t *table, *indirect;
3899           const unsigned char *weights, *extra;
3900           const char *collseqwc;
3901           int32_t idx;
3902           /* This #include defines a local function!  */
3903 #  include <locale/weight.h>
3904
3905           /* match with collating_symbol?  */
3906           if (cset->ncoll_syms)
3907             extra = (const unsigned char *)
3908               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3909           for (i = 0; i < cset->ncoll_syms; ++i)
3910             {
3911               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3912               /* Compare the length of input collating element and
3913                  the length of current collating element.  */
3914               if (*coll_sym != elem_len)
3915                 continue;
3916               /* Compare each bytes.  */
3917               for (j = 0; j < *coll_sym; j++)
3918                 if (pin[j] != coll_sym[1 + j])
3919                   break;
3920               if (j == *coll_sym)
3921                 {
3922                   /* Match if every bytes is equal.  */
3923                   match_len = j;
3924                   goto check_node_accept_bytes_match;
3925                 }
3926             }
3927
3928           if (cset->nranges)
3929             {
3930               if (elem_len <= char_len)
3931                 {
3932                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3933                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3934                 }
3935               else
3936                 in_collseq = find_collation_sequence_value (pin, elem_len);
3937             }
3938           /* match with range expression?  */
3939           for (i = 0; i < cset->nranges; ++i)
3940             if (cset->range_starts[i] <= in_collseq
3941                 && in_collseq <= cset->range_ends[i])
3942               {
3943                 match_len = elem_len;
3944                 goto check_node_accept_bytes_match;
3945               }
3946
3947           /* match with equivalence_class?  */
3948           if (cset->nequiv_classes)
3949             {
3950               const unsigned char *cp = pin;
3951               table = (const int32_t *)
3952                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3953               weights = (const unsigned char *)
3954                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3955               extra = (const unsigned char *)
3956                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3957               indirect = (const int32_t *)
3958                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3959               int32_t idx = findidx (&cp);
3960               if (idx > 0)
3961                 for (i = 0; i < cset->nequiv_classes; ++i)
3962                   {
3963                     int32_t equiv_class_idx = cset->equiv_classes[i];
3964                     size_t weight_len = weights[idx & 0xffffff];
3965                     if (weight_len == weights[equiv_class_idx & 0xffffff]
3966                         && (idx >> 24) == (equiv_class_idx >> 24))
3967                       {
3968                         Idx cnt = 0;
3969
3970                         idx &= 0xffffff;
3971                         equiv_class_idx &= 0xffffff;
3972
3973                         while (cnt <= weight_len
3974                                && (weights[equiv_class_idx + 1 + cnt]
3975                                    == weights[idx + 1 + cnt]))
3976                           ++cnt;
3977                         if (cnt > weight_len)
3978                           {
3979                             match_len = elem_len;
3980                             goto check_node_accept_bytes_match;
3981                           }
3982                       }
3983                   }
3984             }
3985         }
3986       else
3987 # endif /* _LIBC */
3988         {
3989           /* match with range expression?  */
3990 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3991           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3992 #else
3993           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3994           cmp_buf[2] = wc;
3995 #endif
3996           for (i = 0; i < cset->nranges; ++i)
3997             {
3998               cmp_buf[0] = cset->range_starts[i];
3999               cmp_buf[4] = cset->range_ends[i];
4000               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4001                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4002                 {
4003                   match_len = char_len;
4004                   goto check_node_accept_bytes_match;
4005                 }
4006             }
4007         }
4008     check_node_accept_bytes_match:
4009       if (!cset->non_match)
4010         return match_len;
4011       else
4012         {
4013           if (match_len > 0)
4014             return 0;
4015           else
4016             return (elem_len > char_len) ? elem_len : char_len;
4017         }
4018     }
4019   return 0;
4020 }
4021
4022 # ifdef _LIBC
4023 static unsigned int
4024 internal_function
4025 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4026 {
4027   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4028   if (nrules == 0)
4029     {
4030       if (mbs_len == 1)
4031         {
4032           /* No valid character.  Match it as a single byte character.  */
4033           const unsigned char *collseq = (const unsigned char *)
4034             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4035           return collseq[mbs[0]];
4036         }
4037       return UINT_MAX;
4038     }
4039   else
4040     {
4041       int32_t idx;
4042       const unsigned char *extra = (const unsigned char *)
4043         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4044       int32_t extrasize = (const unsigned char *)
4045         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4046
4047       for (idx = 0; idx < extrasize;)
4048         {
4049           int mbs_cnt;
4050           bool found = false;
4051           int32_t elem_mbs_len;
4052           /* Skip the name of collating element name.  */
4053           idx = idx + extra[idx] + 1;
4054           elem_mbs_len = extra[idx++];
4055           if (mbs_len == elem_mbs_len)
4056             {
4057               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4058                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4059                   break;
4060               if (mbs_cnt == elem_mbs_len)
4061                 /* Found the entry.  */
4062                 found = true;
4063             }
4064           /* Skip the byte sequence of the collating element.  */
4065           idx += elem_mbs_len;
4066           /* Adjust for the alignment.  */
4067           idx = (idx + 3) & ~3;
4068           /* Skip the collation sequence value.  */
4069           idx += sizeof (uint32_t);
4070           /* Skip the wide char sequence of the collating element.  */
4071           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4072           /* If we found the entry, return the sequence value.  */
4073           if (found)
4074             return *(uint32_t *) (extra + idx);
4075           /* Skip the collation sequence value.  */
4076           idx += sizeof (uint32_t);
4077         }
4078       return UINT_MAX;
4079     }
4080 }
4081 # endif /* _LIBC */
4082 #endif /* RE_ENABLE_I18N */
4083
4084 /* Check whether the node accepts the byte which is IDX-th
4085    byte of the INPUT.  */
4086
4087 static bool
4088 internal_function
4089 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4090                    Idx idx)
4091 {
4092   unsigned char ch;
4093   ch = re_string_byte_at (&mctx->input, idx);
4094   switch (node->type)
4095     {
4096     case CHARACTER:
4097       if (node->opr.c != ch)
4098         return false;
4099       break;
4100
4101     case SIMPLE_BRACKET:
4102       if (!bitset_contain (node->opr.sbcset, ch))
4103         return false;
4104       break;
4105
4106 #ifdef RE_ENABLE_I18N
4107     case OP_UTF8_PERIOD:
4108       if (ch >= ASCII_CHARS)
4109         return false;
4110       /* FALLTHROUGH */
4111 #endif
4112     case OP_PERIOD:
4113       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4114           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4115         return false;
4116       break;
4117
4118     default:
4119       return false;
4120     }
4121
4122   if (node->constraint)
4123     {
4124       /* The node has constraints.  Check whether the current context
4125          satisfies the constraints.  */
4126       unsigned int context = re_string_context_at (&mctx->input, idx,
4127                                                    mctx->eflags);
4128       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4129         return false;
4130     }
4131
4132   return true;
4133 }
4134
4135 /* Extend the buffers, if the buffers have run out.  */
4136
4137 static reg_errcode_t
4138 internal_function __attribute_warn_unused_result__
4139 extend_buffers (re_match_context_t *mctx)
4140 {
4141   reg_errcode_t ret;
4142   re_string_t *pstr = &mctx->input;
4143
4144   /* Avoid overflow.  */
4145   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4146     return REG_ESPACE;
4147
4148   /* Double the lengthes of the buffers.  */
4149   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4150   if (BE (ret != REG_NOERROR, 0))
4151     return ret;
4152
4153   if (mctx->state_log != NULL)
4154     {
4155       /* And double the length of state_log.  */
4156       /* XXX We have no indication of the size of this buffer.  If this
4157          allocation fail we have no indication that the state_log array
4158          does not have the right size.  */
4159       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4160                                               pstr->bufs_len + 1);
4161       if (BE (new_array == NULL, 0))
4162         return REG_ESPACE;
4163       mctx->state_log = new_array;
4164     }
4165
4166   /* Then reconstruct the buffers.  */
4167   if (pstr->icase)
4168     {
4169 #ifdef RE_ENABLE_I18N
4170       if (pstr->mb_cur_max > 1)
4171         {
4172           ret = build_wcs_upper_buffer (pstr);
4173           if (BE (ret != REG_NOERROR, 0))
4174             return ret;
4175         }
4176       else
4177 #endif /* RE_ENABLE_I18N  */
4178         build_upper_buffer (pstr);
4179     }
4180   else
4181     {
4182 #ifdef RE_ENABLE_I18N
4183       if (pstr->mb_cur_max > 1)
4184         build_wcs_buffer (pstr);
4185       else
4186 #endif /* RE_ENABLE_I18N  */
4187         {
4188           if (pstr->trans != NULL)
4189             re_string_translate_buffer (pstr);
4190         }
4191     }
4192   return REG_NOERROR;
4193 }
4194
4195 \f
4196 /* Functions for matching context.  */
4197
4198 /* Initialize MCTX.  */
4199
4200 static reg_errcode_t
4201 internal_function __attribute_warn_unused_result__
4202 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4203 {
4204   mctx->eflags = eflags;
4205   mctx->match_last = REG_MISSING;
4206   if (n > 0)
4207     {
4208       /* Avoid overflow.  */
4209       size_t max_object_size =
4210         MAX (sizeof (struct re_backref_cache_entry),
4211              sizeof (re_sub_match_top_t *));
4212       if (BE (SIZE_MAX / max_object_size < n, 0))
4213         return REG_ESPACE;
4214
4215       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4216       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4217       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4218         return REG_ESPACE;
4219     }
4220   /* Already zero-ed by the caller.
4221      else
4222        mctx->bkref_ents = NULL;
4223      mctx->nbkref_ents = 0;
4224      mctx->nsub_tops = 0;  */
4225   mctx->abkref_ents = n;
4226   mctx->max_mb_elem_len = 1;
4227   mctx->asub_tops = n;
4228   return REG_NOERROR;
4229 }
4230
4231 /* Clean the entries which depend on the current input in MCTX.
4232    This function must be invoked when the matcher changes the start index
4233    of the input, or changes the input string.  */
4234
4235 static void
4236 internal_function
4237 match_ctx_clean (re_match_context_t *mctx)
4238 {
4239   Idx st_idx;
4240   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4241     {
4242       Idx sl_idx;
4243       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4244       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4245         {
4246           re_sub_match_last_t *last = top->lasts[sl_idx];
4247           re_free (last->path.array);
4248           re_free (last);
4249         }
4250       re_free (top->lasts);
4251       if (top->path)
4252         {
4253           re_free (top->path->array);
4254           re_free (top->path);
4255         }
4256       free (top);
4257     }
4258
4259   mctx->nsub_tops = 0;
4260   mctx->nbkref_ents = 0;
4261 }
4262
4263 /* Free all the memory associated with MCTX.  */
4264
4265 static void
4266 internal_function
4267 match_ctx_free (re_match_context_t *mctx)
4268 {
4269   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4270   match_ctx_clean (mctx);
4271   re_free (mctx->sub_tops);
4272   re_free (mctx->bkref_ents);
4273 }
4274
4275 /* Add a new backreference entry to MCTX.
4276    Note that we assume that caller never call this function with duplicate
4277    entry, and call with STR_IDX which isn't smaller than any existing entry.
4278 */
4279
4280 static reg_errcode_t
4281 internal_function __attribute_warn_unused_result__
4282 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4283                      Idx to)
4284 {
4285   if (mctx->nbkref_ents >= mctx->abkref_ents)
4286     {
4287       struct re_backref_cache_entry* new_entry;
4288       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4289                               mctx->abkref_ents * 2);
4290       if (BE (new_entry == NULL, 0))
4291         {
4292           re_free (mctx->bkref_ents);
4293           return REG_ESPACE;
4294         }
4295       mctx->bkref_ents = new_entry;
4296       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4297               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4298       mctx->abkref_ents *= 2;
4299     }
4300   if (mctx->nbkref_ents > 0
4301       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4302     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4303
4304   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4305   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4306   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4307   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4308
4309   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4310      If bit N is clear, means that this entry won't epsilon-transition to
4311      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4312      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4313      such node.
4314
4315      A backreference does not epsilon-transition unless it is empty, so set
4316      to all zeros if FROM != TO.  */
4317   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4318     = (from == to ? -1 : 0);
4319
4320   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4321   if (mctx->max_mb_elem_len < to - from)
4322     mctx->max_mb_elem_len = to - from;
4323   return REG_NOERROR;
4324 }
4325
4326 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4327    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4328
4329 static Idx
4330 internal_function
4331 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4332 {
4333   Idx left, right, mid, last;
4334   last = right = mctx->nbkref_ents;
4335   for (left = 0; left < right;)
4336     {
4337       mid = (left + right) / 2;
4338       if (mctx->bkref_ents[mid].str_idx < str_idx)
4339         left = mid + 1;
4340       else
4341         right = mid;
4342     }
4343   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4344     return left;
4345   else
4346     return REG_MISSING;
4347 }
4348
4349 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4350    at STR_IDX.  */
4351
4352 static reg_errcode_t
4353 internal_function __attribute_warn_unused_result__
4354 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4355 {
4356 #ifdef DEBUG
4357   assert (mctx->sub_tops != NULL);
4358   assert (mctx->asub_tops > 0);
4359 #endif
4360   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4361     {
4362       Idx new_asub_tops = mctx->asub_tops * 2;
4363       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4364                                                    re_sub_match_top_t *,
4365                                                    new_asub_tops);
4366       if (BE (new_array == NULL, 0))
4367         return REG_ESPACE;
4368       mctx->sub_tops = new_array;
4369       mctx->asub_tops = new_asub_tops;
4370     }
4371   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4372   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4373     return REG_ESPACE;
4374   mctx->sub_tops[mctx->nsub_tops]->node = node;
4375   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4376   return REG_NOERROR;
4377 }
4378
4379 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4380    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4381
4382 static re_sub_match_last_t *
4383 internal_function
4384 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4385 {
4386   re_sub_match_last_t *new_entry;
4387   if (BE (subtop->nlasts == subtop->alasts, 0))
4388     {
4389       Idx new_alasts = 2 * subtop->alasts + 1;
4390       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4391                                                     re_sub_match_last_t *,
4392                                                     new_alasts);
4393       if (BE (new_array == NULL, 0))
4394         return NULL;
4395       subtop->lasts = new_array;
4396       subtop->alasts = new_alasts;
4397     }
4398   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4399   if (BE (new_entry != NULL, 1))
4400     {
4401       subtop->lasts[subtop->nlasts] = new_entry;
4402       new_entry->node = node;
4403       new_entry->str_idx = str_idx;
4404       ++subtop->nlasts;
4405     }
4406   return new_entry;
4407 }
4408
4409 static void
4410 internal_function
4411 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4412                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4413 {
4414   sctx->sifted_states = sifted_sts;
4415   sctx->limited_states = limited_sts;
4416   sctx->last_node = last_node;
4417   sctx->last_str_idx = last_str_idx;
4418   re_node_set_init_empty (&sctx->limits);
4419 }