LCOV - PostgreSQL - src/backend/regex/regc

LCOV - code coverage report

Current view:	top level - src/backend/regex - regc_nfa.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	927	1134	81.7 %
Date:	2017-09-29 15:12:54	Functions:	53	55	96.4 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * NFA utilities.
       3             :  * This file is #included by regcomp.c.
       4             :  *
       5             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       6             :  *
       7             :  * Development of this software was funded, in part, by Cray Research Inc.,
       8             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       9             :  * Corporation, none of whom are responsible for the results.  The author
      10             :  * thanks all of them.
      11             :  *
      12             :  * Redistribution and use in source and binary forms -- with or without
      13             :  * modification -- are permitted for any purpose, provided that
      14             :  * redistributions in source form retain this entire copyright notice and
      15             :  * indicate the origin and nature of any modifications.
      16             :  *
      17             :  * I'd appreciate being given credit for this package in the documentation
      18             :  * of software which uses it, but that is not a requirement.
      19             :  *
      20             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      21             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      22             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      23             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      24             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      25             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      26             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      27             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      28             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      29             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             :  *
      31             :  * src/backend/regex/regc_nfa.c
      32             :  *
      33             :  *
      34             :  * One or two things that technically ought to be in here
      35             :  * are actually in color.c, thanks to some incestuous relationships in
      36             :  * the color chains.
      37             :  */
      38             : 
      39             : #define NISERR()    VISERR(nfa->v)
      40             : #define NERR(e)     VERR(nfa->v, (e))
      41             : 
      42             : 
      43             : /*
      44             :  * newnfa - set up an NFA
      45             :  */
      46             : static struct nfa *             /* the NFA, or NULL */
      47        1785 : newnfa(struct vars *v,
      48             :        struct colormap *cm,
      49             :        struct nfa *parent)      /* NULL if primary NFA */
      50             : {
      51             :     struct nfa *nfa;
      52             : 
      53        1785 :     nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
      54        1785 :     if (nfa == NULL)
      55             :     {
      56           0 :         ERR(REG_ESPACE);
      57           0 :         return NULL;
      58             :     }
      59             : 
      60        1785 :     nfa->states = NULL;
      61        1785 :     nfa->slast = NULL;
      62        1785 :     nfa->free = NULL;
      63        1785 :     nfa->nstates = 0;
      64        1785 :     nfa->cm = cm;
      65        1785 :     nfa->v = v;
      66        1785 :     nfa->bos[0] = nfa->bos[1] = COLORLESS;
      67        1785 :     nfa->eos[0] = nfa->eos[1] = COLORLESS;
      68        1785 :     nfa->parent = parent;        /* Precedes newfstate so parent is valid. */
      69        1785 :     nfa->post = newfstate(nfa, '@'); /* number 0 */
      70        1785 :     nfa->pre = newfstate(nfa, '>'); /* number 1 */
      71             : 
      72        1785 :     nfa->init = newstate(nfa);   /* may become invalid later */
      73        1785 :     nfa->final = newstate(nfa);
      74        1785 :     if (ISERR())
      75             :     {
      76           0 :         freenfa(nfa);
      77           0 :         return NULL;
      78             :     }
      79        1785 :     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
      80        1785 :     newarc(nfa, '^', 1, nfa->pre, nfa->init);
      81        1785 :     newarc(nfa, '^', 0, nfa->pre, nfa->init);
      82        1785 :     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
      83        1785 :     newarc(nfa, '$', 1, nfa->final, nfa->post);
      84        1785 :     newarc(nfa, '$', 0, nfa->final, nfa->post);
      85             : 
      86        1785 :     if (ISERR())
      87             :     {
      88           0 :         freenfa(nfa);
      89           0 :         return NULL;
      90             :     }
      91        1785 :     return nfa;
      92             : }
      93             : 
      94             : /*
      95             :  * freenfa - free an entire NFA
      96             :  */
      97             : static void
      98        1785 : freenfa(struct nfa *nfa)
      99             : {
     100             :     struct state *s;
     101             : 
     102       30390 :     while ((s = nfa->states) != NULL)
     103             :     {
     104       26820 :         s->nins = s->nouts = 0; /* don't worry about arcs */
     105       26820 :         freestate(nfa, s);
     106             :     }
     107       73698 :     while ((s = nfa->free) != NULL)
     108             :     {
     109       70128 :         nfa->free = s->next;
     110       70128 :         destroystate(nfa, s);
     111             :     }
     112             : 
     113        1785 :     nfa->slast = NULL;
     114        1785 :     nfa->nstates = -1;
     115        1785 :     nfa->pre = NULL;
     116        1785 :     nfa->post = NULL;
     117        1785 :     FREE(nfa);
     118        1785 : }
     119             : 
     120             : /*
     121             :  * newstate - allocate an NFA state, with zero flag value
     122             :  */
     123             : static struct state *           /* NULL on error */
     124       75658 : newstate(struct nfa *nfa)
     125             : {
     126             :     struct state *s;
     127             : 
     128             :     /*
     129             :      * This is a handy place to check for operation cancel during regex
     130             :      * compilation, since no code path will go very long without making a new
     131             :      * state or arc.
     132             :      */
     133       75658 :     if (CANCEL_REQUESTED(nfa->v->re))
     134             :     {
     135           0 :         NERR(REG_CANCEL);
     136           0 :         return NULL;
     137             :     }
     138             : 
     139       75658 :     if (nfa->free != NULL)
     140             :     {
     141        5530 :         s = nfa->free;
     142        5530 :         nfa->free = s->next;
     143             :     }
     144             :     else
     145             :     {
     146       70128 :         if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     147             :         {
     148           0 :             NERR(REG_ETOOBIG);
     149           0 :             return NULL;
     150             :         }
     151       70128 :         s = (struct state *) MALLOC(sizeof(struct state));
     152       70128 :         if (s == NULL)
     153             :         {
     154           0 :             NERR(REG_ESPACE);
     155           0 :             return NULL;
     156             :         }
     157       70128 :         nfa->v->spaceused += sizeof(struct state);
     158       70128 :         s->oas.next = NULL;
     159       70128 :         s->free = NULL;
     160       70128 :         s->noas = 0;
     161             :     }
     162             : 
     163       75658 :     assert(nfa->nstates >= 0);
     164       75658 :     s->no = nfa->nstates++;
     165       75658 :     s->flag = 0;
     166       75658 :     if (nfa->states == NULL)
     167        1785 :         nfa->states = s;
     168       75658 :     s->nins = 0;
     169       75658 :     s->ins = NULL;
     170       75658 :     s->nouts = 0;
     171       75658 :     s->outs = NULL;
     172       75658 :     s->tmp = NULL;
     173       75658 :     s->next = NULL;
     174       75658 :     if (nfa->slast != NULL)
     175             :     {
     176       73873 :         assert(nfa->slast->next == NULL);
     177       73873 :         nfa->slast->next = s;
     178             :     }
     179       75658 :     s->prev = nfa->slast;
     180       75658 :     nfa->slast = s;
     181       75658 :     return s;
     182             : }
     183             : 
     184             : /*
     185             :  * newfstate - allocate an NFA state with a specified flag value
     186             :  */
     187             : static struct state *           /* NULL on error */
     188        3570 : newfstate(struct nfa *nfa, int flag)
     189             : {
     190             :     struct state *s;
     191             : 
     192        3570 :     s = newstate(nfa);
     193        3570 :     if (s != NULL)
     194        3570 :         s->flag = (char) flag;
     195        3570 :     return s;
     196             : }
     197             : 
     198             : /*
     199             :  * dropstate - delete a state's inarcs and outarcs and free it
     200             :  */
     201             : static void
     202       48807 : dropstate(struct nfa *nfa,
     203             :           struct state *s)
     204             : {
     205             :     struct arc *a;
     206             : 
     207      109572 :     while ((a = s->ins) != NULL)
     208       11958 :         freearc(nfa, a);
     209      132252 :     while ((a = s->outs) != NULL)
     210       34638 :         freearc(nfa, a);
     211       48807 :     freestate(nfa, s);
     212       48807 : }
     213             : 
     214             : /*
     215             :  * freestate - free a state, which has no in-arcs or out-arcs
     216             :  */
     217             : static void
     218       75658 : freestate(struct nfa *nfa,
     219             :           struct state *s)
     220             : {
     221       75658 :     assert(s != NULL);
     222       75658 :     assert(s->nins == 0 && s->nouts == 0);
     223             : 
     224       75658 :     s->no = FREESTATE;
     225       75658 :     s->flag = 0;
     226       75658 :     if (s->next != NULL)
     227       72514 :         s->next->prev = s->prev;
     228             :     else
     229             :     {
     230        3144 :         assert(s == nfa->slast);
     231        3144 :         nfa->slast = s->prev;
     232             :     }
     233       75658 :     if (s->prev != NULL)
     234       48838 :         s->prev->next = s->next;
     235             :     else
     236             :     {
     237       26820 :         assert(s == nfa->states);
     238       26820 :         nfa->states = s->next;
     239             :     }
     240       75658 :     s->prev = NULL;
     241       75658 :     s->next = nfa->free;      /* don't delete it, put it on the free list */
     242       75658 :     nfa->free = s;
     243       75658 : }
     244             : 
     245             : /*
     246             :  * destroystate - really get rid of an already-freed state
     247             :  */
     248             : static void
     249       70128 : destroystate(struct nfa *nfa,
     250             :              struct state *s)
     251             : {
     252             :     struct arcbatch *ab;
     253             :     struct arcbatch *abnext;
     254             : 
     255       70128 :     assert(s->no == FREESTATE);
     256      271170 :     for (ab = s->oas.next; ab != NULL; ab = abnext)
     257             :     {
     258      201042 :         abnext = ab->next;
     259      201042 :         FREE(ab);
     260      201042 :         nfa->v->spaceused -= sizeof(struct arcbatch);
     261             :     }
     262       70128 :     s->ins = NULL;
     263       70128 :     s->outs = NULL;
     264       70128 :     s->next = NULL;
     265       70128 :     FREE(s);
     266       70128 :     nfa->v->spaceused -= sizeof(struct state);
     267       70128 : }
     268             : 
     269             : /*
     270             :  * newarc - set up a new arc within an NFA
     271             :  *
     272             :  * This function checks to make sure that no duplicate arcs are created.
     273             :  * In general we never want duplicates.
     274             :  */
     275             : static void
     276      256137 : newarc(struct nfa *nfa,
     277             :        int t,
     278             :        color co,
     279             :        struct state *from,
     280             :        struct state *to)
     281             : {
     282             :     struct arc *a;
     283             : 
     284      256137 :     assert(from != NULL && to != NULL);
     285             : 
     286             :     /*
     287             :      * This is a handy place to check for operation cancel during regex
     288             :      * compilation, since no code path will go very long without making a new
     289             :      * state or arc.
     290             :      */
     291      256137 :     if (CANCEL_REQUESTED(nfa->v->re))
     292             :     {
     293           0 :         NERR(REG_CANCEL);
     294           0 :         return;
     295             :     }
     296             : 
     297             :     /* check for duplicate arc, using whichever chain is shorter */
     298      256137 :     if (from->nouts <= to->nins)
     299             :     {
     300      572339 :         for (a = from->outs; a != NULL; a = a->outchain)
     301      399681 :             if (a->to == to && a->co == co && a->type == t)
     302        4069 :                 return;
     303             :     }
     304             :     else
     305             :     {
     306      379424 :         for (a = to->ins; a != NULL; a = a->inchain)
     307      305358 :             if (a->from == from && a->co == co && a->type == t)
     308        5344 :                 return;
     309             :     }
     310             : 
     311             :     /* no dup, so create the arc */
     312      246724 :     createarc(nfa, t, co, from, to);
     313             : }
     314             : 
     315             : /*
     316             :  * createarc - create a new arc within an NFA
     317             :  *
     318             :  * This function must *only* be used after verifying that there is no existing
     319             :  * identical arc (same type/color/from/to).
     320             :  */
     321             : static void
     322     2260459 : createarc(struct nfa *nfa,
     323             :           int t,
     324             :           color co,
     325             :           struct state *from,
     326             :           struct state *to)
     327             : {
     328             :     struct arc *a;
     329             : 
     330             :     /* the arc is physically allocated within its from-state */
     331     2260459 :     a = allocarc(nfa, from);
     332     2260459 :     if (NISERR())
     333     2260768 :         return;
     334     2260150 :     assert(a != NULL);
     335             : 
     336     2260150 :     a->type = t;
     337     2260150 :     a->co = co;
     338     2260150 :     a->to = to;
     339     2260150 :     a->from = from;
     340             : 
     341             :     /*
     342             :      * Put the new arc on the beginning, not the end, of the chains; it's
     343             :      * simpler here, and freearc() is the same cost either way.  See also the
     344             :      * logic in moveins() and its cohorts, as well as fixempties().
     345             :      */
     346     2260150 :     a->inchain = to->ins;
     347     2260150 :     a->inchainRev = NULL;
     348     2260150 :     if (to->ins)
     349     2177998 :         to->ins->inchainRev = a;
     350     2260150 :     to->ins = a;
     351     2260150 :     a->outchain = from->outs;
     352     2260150 :     a->outchainRev = NULL;
     353     2260150 :     if (from->outs)
     354     2183031 :         from->outs->outchainRev = a;
     355     2260150 :     from->outs = a;
     356             : 
     357     2260150 :     from->nouts++;
     358     2260150 :     to->nins++;
     359             : 
     360     2260150 :     if (COLORED(a) && nfa->parent == NULL)
     361       31329 :         colorchain(nfa->cm, a);
     362             : }
     363             : 
     364             : /*
     365             :  * allocarc - allocate a new out-arc within a state
     366             :  */
     367             : static struct arc *             /* NULL for failure */
     368     2260459 : allocarc(struct nfa *nfa,
     369             :          struct state *s)
     370             : {
     371             :     struct arc *a;
     372             : 
     373             :     /* shortcut */
     374     2260459 :     if (s->free == NULL && s->noas < ABSIZE)
     375             :     {
     376      167029 :         a = &s->oas.a[s->noas];
     377      167029 :         s->noas++;
     378      167029 :         return a;
     379             :     }
     380             : 
     381             :     /* if none at hand, get more */
     382     2093430 :     if (s->free == NULL)
     383             :     {
     384             :         struct arcbatch *newAb;
     385             :         int         i;
     386             : 
     387      201073 :         if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     388             :         {
     389          31 :             NERR(REG_ETOOBIG);
     390          31 :             return NULL;
     391             :         }
     392      201042 :         newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
     393      201042 :         if (newAb == NULL)
     394             :         {
     395           0 :             NERR(REG_ESPACE);
     396           0 :             return NULL;
     397             :         }
     398      201042 :         nfa->v->spaceused += sizeof(struct arcbatch);
     399      201042 :         newAb->next = s->oas.next;
     400      201042 :         s->oas.next = newAb;
     401             : 
     402     2211462 :         for (i = 0; i < ABSIZE; i++)
     403             :         {
     404     2010420 :             newAb->a[i].type = 0;
     405     2010420 :             newAb->a[i].freechain = &newAb->a[i + 1];
     406             :         }
     407      201042 :         newAb->a[ABSIZE - 1].freechain = NULL;
     408      201042 :         s->free = &newAb->a[0];
     409             :     }
     410     2093399 :     assert(s->free != NULL);
     411             : 
     412     2093399 :     a = s->free;
     413     2093399 :     s->free = a->freechain;
     414     2093399 :     return a;
     415             : }
     416             : 
     417             : /*
     418             :  * freearc - free an arc
     419             :  */
     420             : static void
     421      212234 : freearc(struct nfa *nfa,
     422             :         struct arc *victim)
     423             : {
     424      212234 :     struct state *from = victim->from;
     425      212234 :     struct state *to = victim->to;
     426             :     struct arc *predecessor;
     427             : 
     428      212234 :     assert(victim->type != 0);
     429             : 
     430             :     /* take it off color chain if necessary */
     431      212234 :     if (COLORED(victim) && nfa->parent == NULL)
     432       22517 :         uncolorchain(nfa->cm, victim);
     433             : 
     434             :     /* take it off source's out-chain */
     435      212234 :     assert(from != NULL);
     436      212234 :     predecessor = victim->outchainRev;
     437      212234 :     if (predecessor == NULL)
     438             :     {
     439       99911 :         assert(from->outs == victim);
     440       99911 :         from->outs = victim->outchain;
     441             :     }
     442             :     else
     443             :     {
     444      112323 :         assert(predecessor->outchain == victim);
     445      112323 :         predecessor->outchain = victim->outchain;
     446             :     }
     447      212234 :     if (victim->outchain != NULL)
     448             :     {
     449      115596 :         assert(victim->outchain->outchainRev == victim);
     450      115596 :         victim->outchain->outchainRev = predecessor;
     451             :     }
     452      212234 :     from->nouts--;
     453             : 
     454             :     /* take it off target's in-chain */
     455      212234 :     assert(to != NULL);
     456      212234 :     predecessor = victim->inchainRev;
     457      212234 :     if (predecessor == NULL)
     458             :     {
     459      111438 :         assert(to->ins == victim);
     460      111438 :         to->ins = victim->inchain;
     461             :     }
     462             :     else
     463             :     {
     464      100796 :         assert(predecessor->inchain == victim);
     465      100796 :         predecessor->inchain = victim->inchain;
     466             :     }
     467      212234 :     if (victim->inchain != NULL)
     468             :     {
     469      113589 :         assert(victim->inchain->inchainRev == victim);
     470      113589 :         victim->inchain->inchainRev = predecessor;
     471             :     }
     472      212234 :     to->nins--;
     473             : 
     474             :     /* clean up and place on from-state's free list */
     475      212234 :     victim->type = 0;
     476      212234 :     victim->from = NULL;     /* precautions... */
     477      212234 :     victim->to = NULL;
     478      212234 :     victim->inchain = NULL;
     479      212234 :     victim->inchainRev = NULL;
     480      212234 :     victim->outchain = NULL;
     481      212234 :     victim->outchainRev = NULL;
     482      212234 :     victim->freechain = from->free;
     483      212234 :     from->free = victim;
     484      212234 : }
     485             : 
     486             : /*
     487             :  * changearctarget - flip an arc to have a different to state
     488             :  *
     489             :  * Caller must have verified that there is no pre-existing duplicate arc.
     490             :  *
     491             :  * Note that because we store arcs in their from state, we can't easily have
     492             :  * a similar changearcsource function.
     493             :  */
     494             : static void
     495           0 : changearctarget(struct arc *a, struct state *newto)
     496             : {
     497           0 :     struct state *oldto = a->to;
     498             :     struct arc *predecessor;
     499             : 
     500           0 :     assert(oldto != newto);
     501             : 
     502             :     /* take it off old target's in-chain */
     503           0 :     assert(oldto != NULL);
     504           0 :     predecessor = a->inchainRev;
     505           0 :     if (predecessor == NULL)
     506             :     {
     507           0 :         assert(oldto->ins == a);
     508           0 :         oldto->ins = a->inchain;
     509             :     }
     510             :     else
     511             :     {
     512           0 :         assert(predecessor->inchain == a);
     513           0 :         predecessor->inchain = a->inchain;
     514             :     }
     515           0 :     if (a->inchain != NULL)
     516             :     {
     517           0 :         assert(a->inchain->inchainRev == a);
     518           0 :         a->inchain->inchainRev = predecessor;
     519             :     }
     520           0 :     oldto->nins--;
     521             : 
     522           0 :     a->to = newto;
     523             : 
     524             :     /* prepend it to new target's in-chain */
     525           0 :     a->inchain = newto->ins;
     526           0 :     a->inchainRev = NULL;
     527           0 :     if (newto->ins)
     528           0 :         newto->ins->inchainRev = a;
     529           0 :     newto->ins = a;
     530           0 :     newto->nins++;
     531           0 : }
     532             : 
     533             : /*
     534             :  * hasnonemptyout - Does state have a non-EMPTY out arc?
     535             :  */
     536             : static int
     537       21197 : hasnonemptyout(struct state *s)
     538             : {
     539             :     struct arc *a;
     540             : 
     541       30104 :     for (a = s->outs; a != NULL; a = a->outchain)
     542             :     {
     543       27570 :         if (a->type != EMPTY)
     544       18663 :             return 1;
     545             :     }
     546        2534 :     return 0;
     547             : }
     548             : 
     549             : /*
     550             :  * findarc - find arc, if any, from given source with given type and color
     551             :  * If there is more than one such arc, the result is random.
     552             :  */
     553             : static struct arc *
     554          71 : findarc(struct state *s,
     555             :         int type,
     556             :         color co)
     557             : {
     558             :     struct arc *a;
     559             : 
     560         166 :     for (a = s->outs; a != NULL; a = a->outchain)
     561         134 :         if (a->type == type && a->co == co)
     562          39 :             return a;
     563          32 :     return NULL;
     564             : }
     565             : 
     566             : /*
     567             :  * cparc - allocate a new arc within an NFA, copying details from old one
     568             :  */
     569             : static void
     570      201586 : cparc(struct nfa *nfa,
     571             :       struct arc *oa,
     572             :       struct state *from,
     573             :       struct state *to)
     574             : {
     575      201586 :     newarc(nfa, oa->type, oa->co, from, to);
     576      201586 : }
     577             : 
     578             : /*
     579             :  * sortins - sort the in arcs of a state by from/color/type
     580             :  */
     581             : static void
     582        5645 : sortins(struct nfa *nfa,
     583             :         struct state *s)
     584             : {
     585             :     struct arc **sortarray;
     586             :     struct arc *a;
     587        5645 :     int         n = s->nins;
     588             :     int         i;
     589             : 
     590        5645 :     if (n <= 1)
     591         152 :         return;                 /* nothing to do */
     592             :     /* make an array of arc pointers ... */
     593        5493 :     sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     594        5493 :     if (sortarray == NULL)
     595             :     {
     596           0 :         NERR(REG_ESPACE);
     597           0 :         return;
     598             :     }
     599        5493 :     i = 0;
     600       26185 :     for (a = s->ins; a != NULL; a = a->inchain)
     601       20692 :         sortarray[i++] = a;
     602        5493 :     assert(i == n);
     603             :     /* ... sort the array */
     604        5493 :     qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
     605             :     /* ... and rebuild arc list in order */
     606             :     /* it seems worth special-casing first and last items to simplify loop */
     607        5493 :     a = sortarray[0];
     608        5493 :     s->ins = a;
     609        5493 :     a->inchain = sortarray[1];
     610        5493 :     a->inchainRev = NULL;
     611       15199 :     for (i = 1; i < n - 1; i++)
     612             :     {
     613        9706 :         a = sortarray[i];
     614        9706 :         a->inchain = sortarray[i + 1];
     615        9706 :         a->inchainRev = sortarray[i - 1];
     616             :     }
     617        5493 :     a = sortarray[i];
     618        5493 :     a->inchain = NULL;
     619        5493 :     a->inchainRev = sortarray[i - 1];
     620        5493 :     FREE(sortarray);
     621             : }
     622             : 
     623             : static int
     624    10191167 : sortins_cmp(const void *a, const void *b)
     625             : {
     626    10191167 :     const struct arc *aa = *((const struct arc *const *) a);
     627    10191167 :     const struct arc *bb = *((const struct arc *const *) b);
     628             : 
     629             :     /* we check the fields in the order they are most likely to be different */
     630    10191167 :     if (aa->from->no < bb->from->no)
     631     8032822 :         return -1;
     632     2158345 :     if (aa->from->no > bb->from->no)
     633     2033526 :         return 1;
     634      124819 :     if (aa->co < bb->co)
     635       70818 :         return -1;
     636       54001 :     if (aa->co > bb->co)
     637       38646 :         return 1;
     638       15355 :     if (aa->type < bb->type)
     639        9144 :         return -1;
     640        6211 :     if (aa->type > bb->type)
     641        4624 :         return 1;
     642        1587 :     return 0;
     643             : }
     644             : 
     645             : /*
     646             :  * sortouts - sort the out arcs of a state by to/color/type
     647             :  */
     648             : static void
     649          32 : sortouts(struct nfa *nfa,
     650             :          struct state *s)
     651             : {
     652             :     struct arc **sortarray;
     653             :     struct arc *a;
     654          32 :     int         n = s->nouts;
     655             :     int         i;
     656             : 
     657          32 :     if (n <= 1)
     658          16 :         return;                 /* nothing to do */
     659             :     /* make an array of arc pointers ... */
     660          16 :     sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     661          16 :     if (sortarray == NULL)
     662             :     {
     663           0 :         NERR(REG_ESPACE);
     664           0 :         return;
     665             :     }
     666          16 :     i = 0;
     667         544 :     for (a = s->outs; a != NULL; a = a->outchain)
     668         528 :         sortarray[i++] = a;
     669          16 :     assert(i == n);
     670             :     /* ... sort the array */
     671          16 :     qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
     672             :     /* ... and rebuild arc list in order */
     673             :     /* it seems worth special-casing first and last items to simplify loop */
     674          16 :     a = sortarray[0];
     675          16 :     s->outs = a;
     676          16 :     a->outchain = sortarray[1];
     677          16 :     a->outchainRev = NULL;
     678         512 :     for (i = 1; i < n - 1; i++)
     679             :     {
     680         496 :         a = sortarray[i];
     681         496 :         a->outchain = sortarray[i + 1];
     682         496 :         a->outchainRev = sortarray[i - 1];
     683             :     }
     684          16 :     a = sortarray[i];
     685          16 :     a->outchain = NULL;
     686          16 :     a->outchainRev = sortarray[i - 1];
     687          16 :     FREE(sortarray);
     688             : }
     689             : 
     690             : static int
     691        3408 : sortouts_cmp(const void *a, const void *b)
     692             : {
     693        3408 :     const struct arc *aa = *((const struct arc *const *) a);
     694        3408 :     const struct arc *bb = *((const struct arc *const *) b);
     695             : 
     696             :     /* we check the fields in the order they are most likely to be different */
     697        3408 :     if (aa->to->no < bb->to->no)
     698        2144 :         return -1;
     699        1264 :     if (aa->to->no > bb->to->no)
     700        1264 :         return 1;
     701           0 :     if (aa->co < bb->co)
     702           0 :         return -1;
     703           0 :     if (aa->co > bb->co)
     704           0 :         return 1;
     705           0 :     if (aa->type < bb->type)
     706           0 :         return -1;
     707           0 :     if (aa->type > bb->type)
     708           0 :         return 1;
     709           0 :     return 0;
     710             : }
     711             : 
     712             : /*
     713             :  * Common decision logic about whether to use arc-by-arc operations or
     714             :  * sort/merge.  If there's just a few source arcs we cannot recoup the
     715             :  * cost of sorting the destination arc list, no matter how large it is.
     716             :  * Otherwise, limit the number of arc-by-arc comparisons to about 1000
     717             :  * (a somewhat arbitrary choice, but the breakeven point would probably
     718             :  * be machine dependent anyway).
     719             :  */
     720             : #define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
     721             :     ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
     722             : 
     723             : /*
     724             :  * moveins - move all in arcs of a state to another state
     725             :  *
     726             :  * You might think this could be done better by just updating the
     727             :  * existing arcs, and you would be right if it weren't for the need
     728             :  * for duplicate suppression, which makes it easier to just make new
     729             :  * ones to exploit the suppression built into newarc.
     730             :  *
     731             :  * However, if we have a whole lot of arcs to deal with, retail duplicate
     732             :  * checks become too slow.  In that case we proceed by sorting and merging
     733             :  * the arc lists, and then we can indeed just update the arcs in-place.
     734             :  */
     735             : static void
     736       44169 : moveins(struct nfa *nfa,
     737             :         struct state *oldState,
     738             :         struct state *newState)
     739             : {
     740       44169 :     assert(oldState != newState);
     741             : 
     742       44169 :     if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
     743       44126 :     {
     744             :         /* With not too many arcs, just do them one at a time */
     745             :         struct arc *a;
     746             : 
     747      156107 :         while ((a = oldState->ins) != NULL)
     748             :         {
     749       67855 :             cparc(nfa, a, a->from, newState);
     750       67855 :             freearc(nfa, a);
     751             :         }
     752             :     }
     753             :     else
     754             :     {
     755             :         /*
     756             :          * With many arcs, use a sort-merge approach.  Note changearctarget()
     757             :          * will put the arc onto the front of newState's chain, so it does not
     758             :          * break our walk through the sorted part of the chain.
     759             :          */
     760             :         struct arc *oa;
     761             :         struct arc *na;
     762             : 
     763             :         /*
     764             :          * Because we bypass newarc() in this code path, we'd better include a
     765             :          * cancel check.
     766             :          */
     767          43 :         if (CANCEL_REQUESTED(nfa->v->re))
     768             :         {
     769           0 :             NERR(REG_CANCEL);
     770           0 :             return;
     771             :         }
     772             : 
     773          43 :         sortins(nfa, oldState);
     774          43 :         sortins(nfa, newState);
     775          43 :         if (NISERR())
     776           0 :             return;             /* might have failed to sort */
     777          43 :         oa = oldState->ins;
     778          43 :         na = newState->ins;
     779        1830 :         while (oa != NULL && na != NULL)
     780             :         {
     781        1744 :             struct arc *a = oa;
     782             : 
     783        1744 :             switch (sortins_cmp(&oa, &na))
     784             :             {
     785             :                 case -1:
     786             :                     /* newState does not have anything matching oa */
     787           0 :                     oa = oa->inchain;
     788             : 
     789             :                     /*
     790             :                      * Rather than doing createarc+freearc, we can just unlink
     791             :                      * and relink the existing arc struct.
     792             :                      */
     793           0 :                     changearctarget(a, newState);
     794           0 :                     break;
     795             :                 case 0:
     796             :                     /* match, advance in both lists */
     797         254 :                     oa = oa->inchain;
     798         254 :                     na = na->inchain;
     799             :                     /* ... and drop duplicate arc from oldState */
     800         254 :                     freearc(nfa, a);
     801         254 :                     break;
     802             :                 case +1:
     803             :                     /* advance only na; oa might have a match later */
     804        1490 :                     na = na->inchain;
     805        1490 :                     break;
     806             :                 default:
     807           0 :                     assert(NOTREACHED);
     808             :             }
     809             :         }
     810          86 :         while (oa != NULL)
     811             :         {
     812             :             /* newState does not have anything matching oa */
     813           0 :             struct arc *a = oa;
     814             : 
     815           0 :             oa = oa->inchain;
     816           0 :             changearctarget(a, newState);
     817             :         }
     818             :     }
     819             : 
     820       44169 :     assert(oldState->nins == 0);
     821       44169 :     assert(oldState->ins == NULL);
     822             : }
     823             : 
     824             : /*
     825             :  * copyins - copy in arcs of a state to another state
     826             :  */
     827             : static void
     828        2618 : copyins(struct nfa *nfa,
     829             :         struct state *oldState,
     830             :         struct state *newState)
     831             : {
     832        2618 :     assert(oldState != newState);
     833             : 
     834        2618 :     if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
     835        2466 :     {
     836             :         /* With not too many arcs, just do them one at a time */
     837             :         struct arc *a;
     838             : 
     839       26208 :         for (a = oldState->ins; a != NULL; a = a->inchain)
     840       23742 :             cparc(nfa, a, a->from, newState);
     841             :     }
     842             :     else
     843             :     {
     844             :         /*
     845             :          * With many arcs, use a sort-merge approach.  Note that createarc()
     846             :          * will put new arcs onto the front of newState's chain, so it does
     847             :          * not break our walk through the sorted part of the chain.
     848             :          */
     849             :         struct arc *oa;
     850             :         struct arc *na;
     851             : 
     852             :         /*
     853             :          * Because we bypass newarc() in this code path, we'd better include a
     854             :          * cancel check.
     855             :          */
     856         152 :         if (CANCEL_REQUESTED(nfa->v->re))
     857             :         {
     858           0 :             NERR(REG_CANCEL);
     859           0 :             return;
     860             :         }
     861             : 
     862         152 :         sortins(nfa, oldState);
     863         152 :         sortins(nfa, newState);
     864         152 :         if (NISERR())
     865           0 :             return;             /* might have failed to sort */
     866         152 :         oa = oldState->ins;
     867         152 :         na = newState->ins;
     868         304 :         while (oa != NULL && na != NULL)
     869             :         {
     870           0 :             struct arc *a = oa;
     871             : 
     872           0 :             switch (sortins_cmp(&oa, &na))
     873             :             {
     874             :                 case -1:
     875             :                     /* newState does not have anything matching oa */
     876           0 :                     oa = oa->inchain;
     877           0 :                     createarc(nfa, a->type, a->co, a->from, newState);
     878           0 :                     break;
     879             :                 case 0:
     880             :                     /* match, advance in both lists */
     881           0 :                     oa = oa->inchain;
     882           0 :                     na = na->inchain;
     883           0 :                     break;
     884             :                 case +1:
     885             :                     /* advance only na; oa might have a match later */
     886           0 :                     na = na->inchain;
     887           0 :                     break;
     888             :                 default:
     889           0 :                     assert(NOTREACHED);
     890             :             }
     891             :         }
     892        6374 :         while (oa != NULL)
     893             :         {
     894             :             /* newState does not have anything matching oa */
     895        6070 :             struct arc *a = oa;
     896             : 
     897        6070 :             oa = oa->inchain;
     898        6070 :             createarc(nfa, a->type, a->co, a->from, newState);
     899             :         }
     900             :     }
     901             : }
     902             : 
     903             : /*
     904             :  * mergeins - merge a list of inarcs into a state
     905             :  *
     906             :  * This is much like copyins, but the source arcs are listed in an array,
     907             :  * and are not guaranteed unique.  It's okay to clobber the array contents.
     908             :  */
     909             : static void
     910       22221 : mergeins(struct nfa *nfa,
     911             :          struct state *s,
     912             :          struct arc **arcarray,
     913             :          int arccount)
     914             : {
     915             :     struct arc *na;
     916             :     int         i;
     917             :     int         j;
     918             : 
     919       22221 :     if (arccount <= 0)
     920       33932 :         return;
     921             : 
     922             :     /*
     923             :      * Because we bypass newarc() in this code path, we'd better include a
     924             :      * cancel check.
     925             :      */
     926        5255 :     if (CANCEL_REQUESTED(nfa->v->re))
     927             :     {
     928           0 :         NERR(REG_CANCEL);
     929           0 :         return;
     930             :     }
     931             : 
     932             :     /* Sort existing inarcs as well as proposed new ones */
     933        5255 :     sortins(nfa, s);
     934        5255 :     if (NISERR())
     935           0 :         return;                 /* might have failed to sort */
     936             : 
     937        5255 :     qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
     938             : 
     939             :     /*
     940             :      * arcarray very likely includes dups, so we must eliminate them.  (This
     941             :      * could be folded into the next loop, but it's not worth the trouble.)
     942             :      */
     943        5255 :     j = 0;
     944     2007774 :     for (i = 1; i < arccount; i++)
     945             :     {
     946     2002519 :         switch (sortins_cmp(&arcarray[j], &arcarray[i]))
     947             :         {
     948             :             case -1:
     949             :                 /* non-dup */
     950     2001891 :                 arcarray[++j] = arcarray[i];
     951     2001891 :                 break;
     952             :             case 0:
     953             :                 /* dup */
     954         628 :                 break;
     955             :             default:
     956             :                 /* trouble */
     957           0 :                 assert(NOTREACHED);
     958             :         }
     959             :     }
     960        5255 :     arccount = j + 1;
     961             : 
     962             :     /*
     963             :      * Now merge into s' inchain.  Note that createarc() will put new arcs
     964             :      * onto the front of s's chain, so it does not break our walk through the
     965             :      * sorted part of the chain.
     966             :      */
     967        5255 :     i = 0;
     968        5255 :     na = s->ins;
     969     2015615 :     while (i < arccount && na != NULL)
     970             :     {
     971     2005105 :         struct arc *a = arcarray[i];
     972             : 
     973     2005105 :         switch (sortins_cmp(&a, &na))
     974             :         {
     975             :             case -1:
     976             :                 /* s does not have anything matching a */
     977     1997462 :                 createarc(nfa, a->type, a->co, a->from, s);
     978     1997462 :                 i++;
     979     1997462 :                 break;
     980             :             case 0:
     981             :                 /* match, advance in both lists */
     982           9 :                 i++;
     983           9 :                 na = na->inchain;
     984           9 :                 break;
     985             :             case +1:
     986             :                 /* advance only na; array might have a match later */
     987        7634 :                 na = na->inchain;
     988        7634 :                 break;
     989             :             default:
     990           0 :                 assert(NOTREACHED);
     991             :         }
     992             :     }
     993       20185 :     while (i < arccount)
     994             :     {
     995             :         /* s does not have anything matching a */
     996        9675 :         struct arc *a = arcarray[i];
     997             : 
     998        9675 :         createarc(nfa, a->type, a->co, a->from, s);
     999        9675 :         i++;
    1000             :     }
    1001             : }
    1002             : 
    1003             : /*
    1004             :  * moveouts - move all out arcs of a state to another state
    1005             :  */
    1006             : static void
    1007       11707 : moveouts(struct nfa *nfa,
    1008             :          struct state *oldState,
    1009             :          struct state *newState)
    1010             : {
    1011       11707 :     assert(oldState != newState);
    1012             : 
    1013       11707 :     if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
    1014       11707 :     {
    1015             :         /* With not too many arcs, just do them one at a time */
    1016             :         struct arc *a;
    1017             : 
    1018       37850 :         while ((a = oldState->outs) != NULL)
    1019             :         {
    1020       14436 :             cparc(nfa, a, newState, a->to);
    1021       14436 :             freearc(nfa, a);
    1022             :         }
    1023             :     }
    1024             :     else
    1025             :     {
    1026             :         /*
    1027             :          * With many arcs, use a sort-merge approach.  Note that createarc()
    1028             :          * will put new arcs onto the front of newState's chain, so it does
    1029             :          * not break our walk through the sorted part of the chain.
    1030             :          */
    1031             :         struct arc *oa;
    1032             :         struct arc *na;
    1033             : 
    1034             :         /*
    1035             :          * Because we bypass newarc() in this code path, we'd better include a
    1036             :          * cancel check.
    1037             :          */
    1038           0 :         if (CANCEL_REQUESTED(nfa->v->re))
    1039             :         {
    1040           0 :             NERR(REG_CANCEL);
    1041           0 :             return;
    1042             :         }
    1043             : 
    1044           0 :         sortouts(nfa, oldState);
    1045           0 :         sortouts(nfa, newState);
    1046           0 :         if (NISERR())
    1047           0 :             return;             /* might have failed to sort */
    1048           0 :         oa = oldState->outs;
    1049           0 :         na = newState->outs;
    1050           0 :         while (oa != NULL && na != NULL)
    1051             :         {
    1052           0 :             struct arc *a = oa;
    1053             : 
    1054           0 :             switch (sortouts_cmp(&oa, &na))
    1055             :             {
    1056             :                 case -1:
    1057             :                     /* newState does not have anything matching oa */
    1058           0 :                     oa = oa->outchain;
    1059           0 :                     createarc(nfa, a->type, a->co, newState, a->to);
    1060           0 :                     freearc(nfa, a);
    1061           0 :                     break;
    1062             :                 case 0:
    1063             :                     /* match, advance in both lists */
    1064           0 :                     oa = oa->outchain;
    1065           0 :                     na = na->outchain;
    1066             :                     /* ... and drop duplicate arc from oldState */
    1067           0 :                     freearc(nfa, a);
    1068           0 :                     break;
    1069             :                 case +1:
    1070             :                     /* advance only na; oa might have a match later */
    1071           0 :                     na = na->outchain;
    1072           0 :                     break;
    1073             :                 default:
    1074           0 :                     assert(NOTREACHED);
    1075             :             }
    1076             :         }
    1077           0 :         while (oa != NULL)
    1078             :         {
    1079             :             /* newState does not have anything matching oa */
    1080           0 :             struct arc *a = oa;
    1081             : 
    1082           0 :             oa = oa->outchain;
    1083           0 :             createarc(nfa, a->type, a->co, newState, a->to);
    1084           0 :             freearc(nfa, a);
    1085             :         }
    1086             :     }
    1087             : 
    1088       11707 :     assert(oldState->nouts == 0);
    1089       11707 :     assert(oldState->outs == NULL);
    1090             : }
    1091             : 
    1092             : /*
    1093             :  * copyouts - copy out arcs of a state to another state
    1094             :  */
    1095             : static void
    1096        2875 : copyouts(struct nfa *nfa,
    1097             :          struct state *oldState,
    1098             :          struct state *newState)
    1099             : {
    1100        2875 :     assert(oldState != newState);
    1101             : 
    1102        2875 :     if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
    1103        2859 :     {
    1104             :         /* With not too many arcs, just do them one at a time */
    1105             :         struct arc *a;
    1106             : 
    1107       14716 :         for (a = oldState->outs; a != NULL; a = a->outchain)
    1108       11857 :             cparc(nfa, a, newState, a->to);
    1109             :     }
    1110             :     else
    1111             :     {
    1112             :         /*
    1113             :          * With many arcs, use a sort-merge approach.  Note that createarc()
    1114             :          * will put new arcs onto the front of newState's chain, so it does
    1115             :          * not break our walk through the sorted part of the chain.
    1116             :          */
    1117             :         struct arc *oa;
    1118             :         struct arc *na;
    1119             : 
    1120             :         /*
    1121             :          * Because we bypass newarc() in this code path, we'd better include a
    1122             :          * cancel check.
    1123             :          */
    1124          16 :         if (CANCEL_REQUESTED(nfa->v->re))
    1125             :         {
    1126           0 :             NERR(REG_CANCEL);
    1127           0 :             return;
    1128             :         }
    1129             : 
    1130          16 :         sortouts(nfa, oldState);
    1131          16 :         sortouts(nfa, newState);
    1132          16 :         if (NISERR())
    1133           0 :             return;             /* might have failed to sort */
    1134          16 :         oa = oldState->outs;
    1135          16 :         na = newState->outs;
    1136          32 :         while (oa != NULL && na != NULL)
    1137             :         {
    1138           0 :             struct arc *a = oa;
    1139             : 
    1140           0 :             switch (sortouts_cmp(&oa, &na))
    1141             :             {
    1142             :                 case -1:
    1143             :                     /* newState does not have anything matching oa */
    1144           0 :                     oa = oa->outchain;
    1145           0 :                     createarc(nfa, a->type, a->co, newState, a->to);
    1146           0 :                     break;
    1147             :                 case 0:
    1148             :                     /* match, advance in both lists */
    1149           0 :                     oa = oa->outchain;
    1150           0 :                     na = na->outchain;
    1151           0 :                     break;
    1152             :                 case +1:
    1153             :                     /* advance only na; oa might have a match later */
    1154           0 :                     na = na->outchain;
    1155           0 :                     break;
    1156             :                 default:
    1157           0 :                     assert(NOTREACHED);
    1158             :             }
    1159             :         }
    1160         560 :         while (oa != NULL)
    1161             :         {
    1162             :             /* newState does not have anything matching oa */
    1163         528 :             struct arc *a = oa;
    1164             : 
    1165         528 :             oa = oa->outchain;
    1166         528 :             createarc(nfa, a->type, a->co, newState, a->to);
    1167             :         }
    1168             :     }
    1169             : }
    1170             : 
    1171             : /*
    1172             :  * cloneouts - copy out arcs of a state to another state pair, modifying type
    1173             :  */
    1174             : static void
    1175          18 : cloneouts(struct nfa *nfa,
    1176             :           struct state *old,
    1177             :           struct state *from,
    1178             :           struct state *to,
    1179             :           int type)
    1180             : {
    1181             :     struct arc *a;
    1182             : 
    1183          18 :     assert(old != from);
    1184             : 
    1185          54 :     for (a = old->outs; a != NULL; a = a->outchain)
    1186          36 :         newarc(nfa, type, a->co, from, to);
    1187          18 : }
    1188             : 
    1189             : /*
    1190             :  * delsub - delete a sub-NFA, updating subre pointers if necessary
    1191             :  *
    1192             :  * This uses a recursive traversal of the sub-NFA, marking already-seen
    1193             :  * states using their tmp pointer.
    1194             :  */
    1195             : static void
    1196           9 : delsub(struct nfa *nfa,
    1197             :        struct state *lp,        /* the sub-NFA goes from here... */
    1198             :        struct state *rp)        /* ...to here, *not* inclusive */
    1199             : {
    1200           9 :     assert(lp != rp);
    1201             : 
    1202           9 :     rp->tmp = rp;                /* mark end */
    1203             : 
    1204           9 :     deltraverse(nfa, lp, lp);
    1205           9 :     if (NISERR())
    1206           9 :         return;                 /* asserts might not hold after failure */
    1207           9 :     assert(lp->nouts == 0 && rp->nins == 0);  /* did the job */
    1208           9 :     assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
    1209             : 
    1210           9 :     rp->tmp = NULL;              /* unmark end */
    1211           9 :     lp->tmp = NULL;              /* and begin, marked by deltraverse */
    1212             : }
    1213             : 
    1214             : /*
    1215             :  * deltraverse - the recursive heart of delsub
    1216             :  * This routine's basic job is to destroy all out-arcs of the state.
    1217             :  */
    1218             : static void
    1219          18 : deltraverse(struct nfa *nfa,
    1220             :             struct state *leftend,
    1221             :             struct state *s)
    1222             : {
    1223             :     struct arc *a;
    1224             :     struct state *to;
    1225             : 
    1226             :     /* Since this is recursive, it could be driven to stack overflow */
    1227          18 :     if (STACK_TOO_DEEP(nfa->v->re))
    1228             :     {
    1229           0 :         NERR(REG_ETOOBIG);
    1230           0 :         return;
    1231             :     }
    1232             : 
    1233          18 :     if (s->nouts == 0)
    1234           9 :         return;                 /* nothing to do */
    1235           9 :     if (s->tmp != NULL)
    1236           0 :         return;                 /* already in progress */
    1237             : 
    1238           9 :     s->tmp = s;                  /* mark as in progress */
    1239             : 
    1240          27 :     while ((a = s->outs) != NULL)
    1241             :     {
    1242           9 :         to = a->to;
    1243           9 :         deltraverse(nfa, leftend, to);
    1244           9 :         if (NISERR())
    1245           0 :             return;             /* asserts might not hold after failure */
    1246           9 :         assert(to->nouts == 0 || to->tmp != NULL);
    1247           9 :         freearc(nfa, a);
    1248           9 :         if (to->nins == 0 && to->tmp == NULL)
    1249             :         {
    1250           0 :             assert(to->nouts == 0);
    1251           0 :             freestate(nfa, to);
    1252             :         }
    1253             :     }
    1254             : 
    1255           9 :     assert(s->no != FREESTATE); /* we're still here */
    1256           9 :     assert(s == leftend || s->nins != 0);    /* and still reachable */
    1257           9 :     assert(s->nouts == 0);       /* but have no outarcs */
    1258             : 
    1259           9 :     s->tmp = NULL;               /* we're done here */
    1260             : }
    1261             : 
    1262             : /*
    1263             :  * dupnfa - duplicate sub-NFA
    1264             :  *
    1265             :  * Another recursive traversal, this time using tmp to point to duplicates
    1266             :  * as well as mark already-seen states.  (You knew there was a reason why
    1267             :  * it's a state pointer, didn't you? :-))
    1268             :  */
    1269             : static void
    1270        1589 : dupnfa(struct nfa *nfa,
    1271             :        struct state *start,     /* duplicate of subNFA starting here */
    1272             :        struct state *stop,      /* and stopping here */
    1273             :        struct state *from,      /* stringing duplicate from here */
    1274             :        struct state *to)        /* to here */
    1275             : {
    1276        1589 :     if (start == stop)
    1277             :     {
    1278          68 :         newarc(nfa, EMPTY, 0, from, to);
    1279        1657 :         return;
    1280             :     }
    1281             : 
    1282        1521 :     stop->tmp = to;
    1283        1521 :     duptraverse(nfa, start, from);
    1284             :     /* done, except for clearing out the tmp pointers */
    1285             : 
    1286        1521 :     stop->tmp = NULL;
    1287        1521 :     cleartraverse(nfa, start);
    1288             : }
    1289             : 
    1290             : /*
    1291             :  * duptraverse - recursive heart of dupnfa
    1292             :  */
    1293             : static void
    1294       66157 : duptraverse(struct nfa *nfa,
    1295             :             struct state *s,
    1296             :             struct state *stmp) /* s's duplicate, or NULL */
    1297             : {
    1298             :     struct arc *a;
    1299             : 
    1300             :     /* Since this is recursive, it could be driven to stack overflow */
    1301       66157 :     if (STACK_TOO_DEEP(nfa->v->re))
    1302             :     {
    1303           0 :         NERR(REG_ETOOBIG);
    1304           0 :         return;
    1305             :     }
    1306             : 
    1307       66157 :     if (s->tmp != NULL)
    1308       12646 :         return;                 /* already done */
    1309             : 
    1310       53511 :     s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
    1311       53511 :     if (s->tmp == NULL)
    1312             :     {
    1313           0 :         assert(NISERR());
    1314           0 :         return;
    1315             :     }
    1316             : 
    1317      118147 :     for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
    1318             :     {
    1319       64636 :         duptraverse(nfa, a->to, (struct state *) NULL);
    1320       64636 :         if (NISERR())
    1321           0 :             break;
    1322       64636 :         assert(a->to->tmp != NULL);
    1323       64636 :         cparc(nfa, a, s->tmp, a->to->tmp);
    1324             :     }
    1325             : }
    1326             : 
    1327             : /*
    1328             :  * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
    1329             :  */
    1330             : static void
    1331      219497 : cleartraverse(struct nfa *nfa,
    1332             :               struct state *s)
    1333             : {
    1334             :     struct arc *a;
    1335             : 
    1336             :     /* Since this is recursive, it could be driven to stack overflow */
    1337      219497 :     if (STACK_TOO_DEEP(nfa->v->re))
    1338             :     {
    1339           0 :         NERR(REG_ETOOBIG);
    1340           0 :         return;
    1341             :     }
    1342             : 
    1343      219497 :     if (s->tmp == NULL)
    1344       85214 :         return;
    1345      134283 :     s->tmp = NULL;
    1346             : 
    1347      348702 :     for (a = s->outs; a != NULL; a = a->outchain)
    1348      214419 :         cleartraverse(nfa, a->to);
    1349             : }
    1350             : 
    1351             : /*
    1352             :  * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
    1353             :  *
    1354             :  * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
    1355             :  * of a set of colors), return a state whose outarc list contains only PLAIN
    1356             :  * arcs of those color(s).  Otherwise return NULL.
    1357             :  *
    1358             :  * This is used before optimizing the NFA, so there may be EMPTY arcs, which
    1359             :  * we should ignore; the possibility of an EMPTY is why the result state could
    1360             :  * be different from s1.
    1361             :  *
    1362             :  * It's worth troubling to handle multiple parallel PLAIN arcs here because a
    1363             :  * bracket construct such as [abc] might yield either one or several parallel
    1364             :  * PLAIN arcs depending on earlier atoms in the expression.  We'd rather that
    1365             :  * that implementation detail not create user-visible performance differences.
    1366             :  */
    1367             : static struct state *
    1368          25 : single_color_transition(struct state *s1, struct state *s2)
    1369             : {
    1370             :     struct arc *a;
    1371             : 
    1372             :     /* Ignore leading EMPTY arc, if any */
    1373          25 :     if (s1->nouts == 1 && s1->outs->type == EMPTY)
    1374          25 :         s1 = s1->outs->to;
    1375             :     /* Likewise for any trailing EMPTY arc */
    1376          25 :     if (s2->nins == 1 && s2->ins->type == EMPTY)
    1377          25 :         s2 = s2->ins->from;
    1378             :     /* Perhaps we could have a single-state loop in between, if so reject */
    1379          25 :     if (s1 == s2)
    1380           0 :         return NULL;
    1381             :     /* s1 must have at least one outarc... */
    1382          25 :     if (s1->outs == NULL)
    1383           0 :         return NULL;
    1384             :     /* ... and they must all be PLAIN arcs to s2 */
    1385          45 :     for (a = s1->outs; a != NULL; a = a->outchain)
    1386             :     {
    1387          27 :         if (a->type != PLAIN || a->to != s2)
    1388           7 :             return NULL;
    1389             :     }
    1390             :     /* OK, return s1 as the possessor of the relevant outarcs */
    1391          18 :     return s1;
    1392             : }
    1393             : 
    1394             : /*
    1395             :  * specialcolors - fill in special colors for an NFA
    1396             :  */
    1397             : static void
    1398        1780 : specialcolors(struct nfa *nfa)
    1399             : {
    1400             :     /* false colors for BOS, BOL, EOS, EOL */
    1401        1780 :     if (nfa->parent == NULL)
    1402             :     {
    1403         248 :         nfa->bos[0] = pseudocolor(nfa->cm);
    1404         248 :         nfa->bos[1] = pseudocolor(nfa->cm);
    1405         248 :         nfa->eos[0] = pseudocolor(nfa->cm);
    1406         248 :         nfa->eos[1] = pseudocolor(nfa->cm);
    1407             :     }
    1408             :     else
    1409             :     {
    1410        1532 :         assert(nfa->parent->bos[0] != COLORLESS);
    1411        1532 :         nfa->bos[0] = nfa->parent->bos[0];
    1412        1532 :         assert(nfa->parent->bos[1] != COLORLESS);
    1413        1532 :         nfa->bos[1] = nfa->parent->bos[1];
    1414        1532 :         assert(nfa->parent->eos[0] != COLORLESS);
    1415        1532 :         nfa->eos[0] = nfa->parent->eos[0];
    1416        1532 :         assert(nfa->parent->eos[1] != COLORLESS);
    1417        1532 :         nfa->eos[1] = nfa->parent->eos[1];
    1418             :     }
    1419        1780 : }
    1420             : 
    1421             : /*
    1422             :  * optimize - optimize an NFA
    1423             :  *
    1424             :  * The main goal of this function is not so much "optimization" (though it
    1425             :  * does try to get rid of useless NFA states) as reducing the NFA to a form
    1426             :  * the regex executor can handle.  The executor, and indeed the cNFA format
    1427             :  * that is its input, can only handle PLAIN and LACON arcs.  The output of
    1428             :  * the regex parser also includes EMPTY (do-nothing) arcs, as well as
    1429             :  * ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
    1430             :  * We first get rid of EMPTY arcs and then deal with the constraint arcs.
    1431             :  * The hardest part of either job is to get rid of circular loops of the
    1432             :  * target arc type.  We would have to do that in any case, though, as such a
    1433             :  * loop would otherwise allow the executor to cycle through the loop endlessly
    1434             :  * without making any progress in the input string.
    1435             :  */
    1436             : static long                     /* re_info bits */
    1437        1779 : optimize(struct nfa *nfa,
    1438             :          FILE *f)               /* for debug output; NULL none */
    1439             : {
    1440             : #ifdef REG_DEBUG
    1441             :     int         verbose = (f != NULL) ? 1 : 0;
    1442             : 
    1443             :     if (verbose)
    1444             :         fprintf(f, "\ninitial cleanup:\n");
    1445             : #endif
    1446        1779 :     cleanup(nfa);               /* may simplify situation */
    1447             : #ifdef REG_DEBUG
    1448             :     if (verbose)
    1449             :         dumpnfa(nfa, f);
    1450             :     if (verbose)
    1451             :         fprintf(f, "\nempties:\n");
    1452             : #endif
    1453        1779 :     fixempties(nfa, f);         /* get rid of EMPTY arcs */
    1454             : #ifdef REG_DEBUG
    1455             :     if (verbose)
    1456             :         fprintf(f, "\nconstraints:\n");
    1457             : #endif
    1458        1779 :     fixconstraintloops(nfa, f); /* get rid of constraint loops */
    1459        1779 :     pullback(nfa, f);           /* pull back constraints backward */
    1460        1779 :     pushfwd(nfa, f);            /* push fwd constraints forward */
    1461             : #ifdef REG_DEBUG
    1462             :     if (verbose)
    1463             :         fprintf(f, "\nfinal cleanup:\n");
    1464             : #endif
    1465        1779 :     cleanup(nfa);               /* final tidying */
    1466             : #ifdef REG_DEBUG
    1467             :     if (verbose)
    1468             :         dumpnfa(nfa, f);
    1469             : #endif
    1470        1779 :     return analyze(nfa);        /* and analysis */
    1471             : }
    1472             : 
    1473             : /*
    1474             :  * pullback - pull back constraints backward to eliminate them
    1475             :  */
    1476             : static void
    1477        2377 : pullback(struct nfa *nfa,
    1478             :          FILE *f)               /* for debug output; NULL none */
    1479             : {
    1480             :     struct state *s;
    1481             :     struct state *nexts;
    1482             :     struct arc *a;
    1483             :     struct arc *nexta;
    1484             :     struct state *intermediates;
    1485             :     int         progress;
    1486             : 
    1487             :     /* find and pull until there are no more */
    1488             :     do
    1489             :     {
    1490        2377 :         progress = 0;
    1491       35177 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1492             :         {
    1493       32800 :             nexts = s->next;
    1494       32800 :             intermediates = NULL;
    1495      127265 :             for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    1496             :             {
    1497       94465 :                 nexta = a->outchain;
    1498       94465 :                 if (a->type == '^' || a->type == BEHIND)
    1499       10582 :                     if (pull(nfa, a, &intermediates))
    1500        3973 :                         progress = 1;
    1501             :             }
    1502             :             /* clear tmp fields of intermediate states created here */
    1503       65931 :             while (intermediates != NULL)
    1504             :             {
    1505         331 :                 struct state *ns = intermediates->tmp;
    1506             : 
    1507         331 :                 intermediates->tmp = NULL;
    1508         331 :                 intermediates = ns;
    1509             :             }
    1510             :             /* if s is now useless, get rid of it */
    1511       32800 :             if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1512        3684 :                 dropstate(nfa, s);
    1513             :         }
    1514        2377 :         if (progress && f != NULL)
    1515           0 :             dumpnfa(nfa, f);
    1516        2377 :     } while (progress && !NISERR());
    1517        1779 :     if (NISERR())
    1518        1780 :         return;
    1519             : 
    1520             :     /*
    1521             :      * Any ^ constraints we were able to pull to the start state can now be
    1522             :      * replaced by PLAIN arcs referencing the BOS or BOL colors.  There should
    1523             :      * be no other ^ or BEHIND arcs left in the NFA, though we do not check
    1524             :      * that here (compact() will fail if so).
    1525             :      */
    1526       18140 :     for (a = nfa->pre->outs; a != NULL; a = nexta)
    1527             :     {
    1528       16362 :         nexta = a->outchain;
    1529       16362 :         if (a->type == '^')
    1530             :         {
    1531        5016 :             assert(a->co == 0 || a->co == 1);
    1532        5016 :             newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
    1533        5016 :             freearc(nfa, a);
    1534             :         }
    1535             :     }
    1536             : }
    1537             : 
    1538             : /*
    1539             :  * pull - pull a back constraint backward past its source state
    1540             :  *
    1541             :  * Returns 1 if successful (which it always is unless the source is the
    1542             :  * start state or we have an internal error), 0 if nothing happened.
    1543             :  *
    1544             :  * A significant property of this function is that it deletes no pre-existing
    1545             :  * states, and no outarcs of the constraint's from state other than the given
    1546             :  * constraint arc.  This makes the loops in pullback() safe, at the cost that
    1547             :  * we may leave useless states behind.  Therefore, we leave it to pullback()
    1548             :  * to delete such states.
    1549             :  *
    1550             :  * If the from state has multiple back-constraint outarcs, and/or multiple
    1551             :  * compatible constraint inarcs, we only need to create one new intermediate
    1552             :  * state per combination of predecessor and successor states.  *intermediates
    1553             :  * points to a list of such intermediate states for this from state (chained
    1554             :  * through their tmp fields).
    1555             :  */
    1556             : static int
    1557       10582 : pull(struct nfa *nfa,
    1558             :      struct arc *con,
    1559             :      struct state **intermediates)
    1560             : {
    1561       10582 :     struct state *from = con->from;
    1562       10582 :     struct state *to = con->to;
    1563             :     struct arc *a;
    1564             :     struct arc *nexta;
    1565             :     struct state *s;
    1566             : 
    1567       10582 :     assert(from != to);         /* should have gotten rid of this earlier */
    1568       10582 :     if (from->flag)              /* can't pull back beyond start */
    1569        6609 :         return 0;
    1570        3973 :     if (from->nins == 0)
    1571             :     {                           /* unreachable */
    1572         573 :         freearc(nfa, con);
    1573         573 :         return 1;
    1574             :     }
    1575             : 
    1576             :     /*
    1577             :      * First, clone from state if necessary to avoid other outarcs.  This may
    1578             :      * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1579             :      * clone state again at the bottom.
    1580             :      */
    1581        3400 :     if (from->nouts > 1)
    1582             :     {
    1583        2618 :         s = newstate(nfa);
    1584        2618 :         if (NISERR())
    1585           0 :             return 0;
    1586        2618 :         copyins(nfa, from, s);  /* duplicate inarcs */
    1587        2618 :         cparc(nfa, con, s, to); /* move constraint arc */
    1588        2618 :         freearc(nfa, con);
    1589        2618 :         if (NISERR())
    1590           0 :             return 0;
    1591        2618 :         from = s;
    1592        2618 :         con = from->outs;
    1593             :     }
    1594        3400 :     assert(from->nouts == 1);
    1595             : 
    1596             :     /* propagate the constraint into the from state's inarcs */
    1597       42288 :     for (a = from->ins; a != NULL && !NISERR(); a = nexta)
    1598             :     {
    1599       38888 :         nexta = a->inchain;
    1600       38888 :         switch (combine(con, a))
    1601             :         {
    1602             :             case INCOMPATIBLE:  /* destroy the arc */
    1603       32198 :                 freearc(nfa, a);
    1604       32198 :                 break;
    1605             :             case SATISFIED:     /* no action needed */
    1606        3686 :                 break;
    1607             :             case COMPATIBLE:    /* swap the two arcs, more or less */
    1608             :                 /* need an intermediate state, but might have one already */
    1609        3639 :                 for (s = *intermediates; s != NULL; s = s->tmp)
    1610             :                 {
    1611        3308 :                     assert(s->nins > 0 && s->nouts > 0);
    1612        3308 :                     if (s->ins->from == a->from && s->outs->to == to)
    1613        2673 :                         break;
    1614             :                 }
    1615        3004 :                 if (s == NULL)
    1616             :                 {
    1617         331 :                     s = newstate(nfa);
    1618         331 :                     if (NISERR())
    1619           0 :                         return 0;
    1620         331 :                     s->tmp = *intermediates;
    1621         331 :                     *intermediates = s;
    1622             :                 }
    1623        3004 :                 cparc(nfa, con, a->from, s);
    1624        3004 :                 cparc(nfa, a, s, to);
    1625        3004 :                 freearc(nfa, a);
    1626        3004 :                 break;
    1627             :             default:
    1628           0 :                 assert(NOTREACHED);
    1629             :                 break;
    1630             :         }
    1631             :     }
    1632             : 
    1633             :     /* remaining inarcs, if any, incorporate the constraint */
    1634        3400 :     moveins(nfa, from, to);
    1635        3400 :     freearc(nfa, con);
    1636             :     /* from state is now useless, but we leave it to pullback() to clean up */
    1637        3400 :     return 1;
    1638             : }
    1639             : 
    1640             : /*
    1641             :  * pushfwd - push forward constraints forward to eliminate them
    1642             :  */
    1643             : static void
    1644        2437 : pushfwd(struct nfa *nfa,
    1645             :         FILE *f)                /* for debug output; NULL none */
    1646             : {
    1647             :     struct state *s;
    1648             :     struct state *nexts;
    1649             :     struct arc *a;
    1650             :     struct arc *nexta;
    1651             :     struct state *intermediates;
    1652             :     int         progress;
    1653             : 
    1654             :     /* find and push until there are no more */
    1655             :     do
    1656             :     {
    1657        2437 :         progress = 0;
    1658       32579 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1659             :         {
    1660       30142 :             nexts = s->next;
    1661       30142 :             intermediates = NULL;
    1662      108580 :             for (a = s->ins; a != NULL && !NISERR(); a = nexta)
    1663             :             {
    1664       78438 :                 nexta = a->inchain;
    1665       78438 :                 if (a->type == '$' || a->type == AHEAD)
    1666        8130 :                     if (push(nfa, a, &intermediates))
    1667        3582 :                         progress = 1;
    1668             :             }
    1669             :             /* clear tmp fields of intermediate states created here */
    1670       60284 :             while (intermediates != NULL)
    1671             :             {
    1672           0 :                 struct state *ns = intermediates->tmp;
    1673             : 
    1674           0 :                 intermediates->tmp = NULL;
    1675           0 :                 intermediates = ns;
    1676             :             }
    1677             :             /* if s is now useless, get rid of it */
    1678       30142 :             if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1679        3895 :                 dropstate(nfa, s);
    1680             :         }
    1681        2437 :         if (progress && f != NULL)
    1682           0 :             dumpnfa(nfa, f);
    1683        2437 :     } while (progress && !NISERR());
    1684        1779 :     if (NISERR())
    1685        1780 :         return;
    1686             : 
    1687             :     /*
    1688             :      * Any $ constraints we were able to push to the post state can now be
    1689             :      * replaced by PLAIN arcs referencing the EOS or EOL colors.  There should
    1690             :      * be no other $ or AHEAD arcs left in the NFA, though we do not check
    1691             :      * that here (compact() will fail if so).
    1692             :      */
    1693       12602 :     for (a = nfa->post->ins; a != NULL; a = nexta)
    1694             :     {
    1695       10824 :         nexta = a->inchain;
    1696       10824 :         if (a->type == '$')
    1697             :         {
    1698        3140 :             assert(a->co == 0 || a->co == 1);
    1699        3140 :             newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
    1700        3140 :             freearc(nfa, a);
    1701             :         }
    1702             :     }
    1703             : }
    1704             : 
    1705             : /*
    1706             :  * push - push a forward constraint forward past its destination state
    1707             :  *
    1708             :  * Returns 1 if successful (which it always is unless the destination is the
    1709             :  * post state or we have an internal error), 0 if nothing happened.
    1710             :  *
    1711             :  * A significant property of this function is that it deletes no pre-existing
    1712             :  * states, and no inarcs of the constraint's to state other than the given
    1713             :  * constraint arc.  This makes the loops in pushfwd() safe, at the cost that
    1714             :  * we may leave useless states behind.  Therefore, we leave it to pushfwd()
    1715             :  * to delete such states.
    1716             :  *
    1717             :  * If the to state has multiple forward-constraint inarcs, and/or multiple
    1718             :  * compatible constraint outarcs, we only need to create one new intermediate
    1719             :  * state per combination of predecessor and successor states.  *intermediates
    1720             :  * points to a list of such intermediate states for this to state (chained
    1721             :  * through their tmp fields).
    1722             :  */
    1723             : static int
    1724        8130 : push(struct nfa *nfa,
    1725             :      struct arc *con,
    1726             :      struct state **intermediates)
    1727             : {
    1728        8130 :     struct state *from = con->from;
    1729        8130 :     struct state *to = con->to;
    1730             :     struct arc *a;
    1731             :     struct arc *nexta;
    1732             :     struct state *s;
    1733             : 
    1734        8130 :     assert(to != from);         /* should have gotten rid of this earlier */
    1735        8130 :     if (to->flag)                /* can't push forward beyond end */
    1736        4548 :         return 0;
    1737        3582 :     if (to->nouts == 0)
    1738             :     {                           /* dead end */
    1739          67 :         freearc(nfa, con);
    1740          67 :         return 1;
    1741             :     }
    1742             : 
    1743             :     /*
    1744             :      * First, clone to state if necessary to avoid other inarcs.  This may
    1745             :      * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1746             :      * clone state again at the bottom.
    1747             :      */
    1748        3515 :     if (to->nins > 1)
    1749             :     {
    1750        2820 :         s = newstate(nfa);
    1751        2820 :         if (NISERR())
    1752           0 :             return 0;
    1753        2820 :         copyouts(nfa, to, s);   /* duplicate outarcs */
    1754        2820 :         cparc(nfa, con, from, s);   /* move constraint arc */
    1755        2820 :         freearc(nfa, con);
    1756        2820 :         if (NISERR())
    1757           0 :             return 0;
    1758        2820 :         to = s;
    1759        2820 :         con = to->ins;
    1760             :     }
    1761        3515 :     assert(to->nins == 1);
    1762             : 
    1763             :     /* propagate the constraint into the to state's outarcs */
    1764       21842 :     for (a = to->outs; a != NULL && !NISERR(); a = nexta)
    1765             :     {
    1766       18327 :         nexta = a->outchain;
    1767       18327 :         switch (combine(con, a))
    1768             :         {
    1769             :             case INCOMPATIBLE:  /* destroy the arc */
    1770       16274 :                 freearc(nfa, a);
    1771       16274 :                 break;
    1772             :             case SATISFIED:     /* no action needed */
    1773        2053 :                 break;
    1774             :             case COMPATIBLE:    /* swap the two arcs, more or less */
    1775             :                 /* need an intermediate state, but might have one already */
    1776           0 :                 for (s = *intermediates; s != NULL; s = s->tmp)
    1777             :                 {
    1778           0 :                     assert(s->nins > 0 && s->nouts > 0);
    1779           0 :                     if (s->ins->from == from && s->outs->to == a->to)
    1780           0 :                         break;
    1781             :                 }
    1782           0 :                 if (s == NULL)
    1783             :                 {
    1784           0 :                     s = newstate(nfa);
    1785           0 :                     if (NISERR())
    1786           0 :                         return 0;
    1787           0 :                     s->tmp = *intermediates;
    1788           0 :                     *intermediates = s;
    1789             :                 }
    1790           0 :                 cparc(nfa, con, s, a->to);
    1791           0 :                 cparc(nfa, a, from, s);
    1792           0 :                 freearc(nfa, a);
    1793           0 :                 break;
    1794             :             default:
    1795           0 :                 assert(NOTREACHED);
    1796             :                 break;
    1797             :         }
    1798             :     }
    1799             : 
    1800             :     /* remaining outarcs, if any, incorporate the constraint */
    1801        3515 :     moveouts(nfa, to, from);
    1802        3515 :     freearc(nfa, con);
    1803             :     /* to state is now useless, but we leave it to pushfwd() to clean up */
    1804        3515 :     return 1;
    1805             : }
    1806             : 
    1807             : /*
    1808             :  * combine - constraint lands on an arc, what happens?
    1809             :  *
    1810             :  * #def INCOMPATIBLE    1   // destroys arc
    1811             :  * #def SATISFIED       2   // constraint satisfied
    1812             :  * #def COMPATIBLE      3   // compatible but not satisfied yet
    1813             :  */
    1814             : static int
    1815       57215 : combine(struct arc *con,
    1816             :         struct arc *a)
    1817             : {
    1818             : #define  CA(ct,at)   (((ct)<<CHAR_BIT) | (at))
    1819             : 
    1820       57215 :     switch (CA(con->type, a->type))
    1821             :     {
    1822             :         case CA('^', PLAIN):    /* newlines are handled separately */
    1823             :         case CA('$', PLAIN):
    1824       44562 :             return INCOMPATIBLE;
    1825             :             break;
    1826             :         case CA(AHEAD, PLAIN):  /* color constraints meet colors */
    1827             :         case CA(BEHIND, PLAIN):
    1828        1281 :             if (con->co == a->co)
    1829         270 :                 return SATISFIED;
    1830        1011 :             return INCOMPATIBLE;
    1831             :             break;
    1832             :         case CA('^', '^'):      /* collision, similar constraints */
    1833             :         case CA('$', '$'):
    1834             :         case CA(AHEAD, AHEAD):
    1835             :         case CA(BEHIND, BEHIND):
    1836        7790 :             if (con->co == a->co) /* true duplication */
    1837        5469 :                 return SATISFIED;
    1838        2321 :             return INCOMPATIBLE;
    1839             :             break;
    1840             :         case CA('^', BEHIND):   /* collision, dissimilar constraints */
    1841             :         case CA(BEHIND, '^'):
    1842             :         case CA('$', AHEAD):
    1843             :         case CA(AHEAD, '$'):
    1844         578 :             return INCOMPATIBLE;
    1845             :             break;
    1846             :         case CA('^', '$'):      /* constraints passing each other */
    1847             :         case CA('^', AHEAD):
    1848             :         case CA(BEHIND, '$'):
    1849             :         case CA(BEHIND, AHEAD):
    1850             :         case CA('$', '^'):
    1851             :         case CA('$', BEHIND):
    1852             :         case CA(AHEAD, '^'):
    1853             :         case CA(AHEAD, BEHIND):
    1854             :         case CA('^', LACON):
    1855             :         case CA(BEHIND, LACON):
    1856             :         case CA('$', LACON):
    1857             :         case CA(AHEAD, LACON):
    1858        3004 :             return COMPATIBLE;
    1859             :             break;
    1860             :     }
    1861           0 :     assert(NOTREACHED);
    1862             :     return INCOMPATIBLE;        /* for benefit of blind compilers */
    1863             : }
    1864             : 
    1865             : /*
    1866             :  * fixempties - get rid of EMPTY arcs
    1867             :  */
    1868             : static void
    1869        1779 : fixempties(struct nfa *nfa,
    1870             :            FILE *f)             /* for debug output; NULL none */
    1871             : {
    1872             :     struct state *s;
    1873             :     struct state *s2;
    1874             :     struct state *nexts;
    1875             :     struct arc *a;
    1876             :     struct arc *nexta;
    1877             :     int         totalinarcs;
    1878             :     struct arc **inarcsorig;
    1879             :     struct arc **arcarray;
    1880             :     int         arccount;
    1881             :     int         prevnins;
    1882             :     int         nskip;
    1883             : 
    1884             :     /*
    1885             :      * First, get rid of any states whose sole out-arc is an EMPTY, since
    1886             :      * they're basically just aliases for their successor.  The parsing
    1887             :      * algorithm creates enough of these that it's worth special-casing this.
    1888             :      */
    1889       64886 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1890             :     {
    1891       63107 :         nexts = s->next;
    1892       63107 :         if (s->flag || s->nouts != 1)
    1893       15715 :             continue;
    1894       47392 :         a = s->outs;
    1895       47392 :         assert(a != NULL && a->outchain == NULL);
    1896       47392 :         if (a->type != EMPTY)
    1897       14890 :             continue;
    1898       32502 :         if (s != a->to)
    1899       32502 :             moveins(nfa, s, a->to);
    1900       32502 :         dropstate(nfa, s);
    1901             :     }
    1902             : 
    1903             :     /*
    1904             :      * Similarly, get rid of any state with a single EMPTY in-arc, by folding
    1905             :      * it into its predecessor.
    1906             :      */
    1907       32384 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1908             :     {
    1909       30605 :         nexts = s->next;
    1910             :         /* while we're at it, ensure tmp fields are clear for next step */
    1911       30605 :         assert(s->tmp == NULL);
    1912       30605 :         if (s->flag || s->nins != 1)
    1913       12633 :             continue;
    1914       17972 :         a = s->ins;
    1915       17972 :         assert(a != NULL && a->inchain == NULL);
    1916       17972 :         if (a->type != EMPTY)
    1917       13136 :             continue;
    1918        4836 :         if (s != a->from)
    1919        4836 :             moveouts(nfa, s, a->from);
    1920        4836 :         dropstate(nfa, s);
    1921             :     }
    1922             : 
    1923        1779 :     if (NISERR())
    1924           0 :         return;
    1925             : 
    1926             :     /*
    1927             :      * For each remaining NFA state, find all other states from which it is
    1928             :      * reachable by a chain of one or more EMPTY arcs.  Then generate new arcs
    1929             :      * that eliminate the need for each such chain.
    1930             :      *
    1931             :      * We could replace a chain of EMPTY arcs that leads from a "from" state
    1932             :      * to a "to" state either by pushing non-EMPTY arcs forward (linking
    1933             :      * directly from "from"'s predecessors to "to") or by pulling them back
    1934             :      * (linking directly from "from" to "to"'s successors).  We choose to
    1935             :      * always do the former; this choice is somewhat arbitrary, but the
    1936             :      * approach below requires that we uniformly do one or the other.
    1937             :      *
    1938             :      * Suppose we have a chain of N successive EMPTY arcs (where N can easily
    1939             :      * approach the size of the NFA).  All of the intermediate states must
    1940             :      * have additional inarcs and outarcs, else they'd have been removed by
    1941             :      * the steps above.  Assuming their inarcs are mostly not empties, we will
    1942             :      * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
    1943             :      * state in the chain must be duplicated to lead to all its successor
    1944             :      * states as well.  So there is no hope of doing less than O(N^2) work;
    1945             :      * however, we should endeavor to keep the big-O cost from being even
    1946             :      * worse than that, which it can easily become without care.  In
    1947             :      * particular, suppose we were to copy all S1's inarcs forward to S2, and
    1948             :      * then also to S3, and then later we consider pushing S2's inarcs forward
    1949             :      * to S3.  If we include the arcs already copied from S1 in that, we'd be
    1950             :      * doing O(N^3) work.  (The duplicate-arc elimination built into newarc()
    1951             :      * and its cohorts would get rid of the extra arcs, but not without cost.)
    1952             :      *
    1953             :      * We can avoid this cost by treating only arcs that existed at the start
    1954             :      * of this phase as candidates to be pushed forward.  To identify those,
    1955             :      * we remember the first inarc each state had to start with.  We rely on
    1956             :      * the fact that newarc() and friends put new arcs on the front of their
    1957             :      * to-states' inchains, and that this phase never deletes arcs, so that
    1958             :      * the original arcs must be the last arcs in their to-states' inchains.
    1959             :      *
    1960             :      * So the process here is that, for each state in the NFA, we gather up
    1961             :      * all non-EMPTY inarcs of states that can reach the target state via
    1962             :      * EMPTY arcs.  We then sort, de-duplicate, and merge these arcs into the
    1963             :      * target state's inchain.  (We can safely use sort-merge for this as long
    1964             :      * as we update each state's original-arcs pointer after we add arcs to
    1965             :      * it; the sort step of mergeins probably changed the order of the old
    1966             :      * arcs.)
    1967             :      *
    1968             :      * Another refinement worth making is that, because we only add non-EMPTY
    1969             :      * arcs during this phase, and all added arcs have the same from-state as
    1970             :      * the non-EMPTY arc they were cloned from, we know ahead of time that any
    1971             :      * states having only EMPTY outarcs will be useless for lack of outarcs
    1972             :      * after we drop the EMPTY arcs.  (They cannot gain non-EMPTY outarcs if
    1973             :      * they had none to start with.)  So we need not bother to update the
    1974             :      * inchains of such states at all.
    1975             :      */
    1976             : 
    1977             :     /* Remember the states' first original inarcs */
    1978             :     /* ... and while at it, count how many old inarcs there are altogether */
    1979        1779 :     inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
    1980        1779 :     if (inarcsorig == NULL)
    1981             :     {
    1982           0 :         NERR(REG_ESPACE);
    1983           0 :         return;
    1984             :     }
    1985        1779 :     totalinarcs = 0;
    1986       27548 :     for (s = nfa->states; s != NULL; s = s->next)
    1987             :     {
    1988       25769 :         inarcsorig[s->no] = s->ins;
    1989       25769 :         totalinarcs += s->nins;
    1990             :     }
    1991             : 
    1992             :     /*
    1993             :      * Create a workspace for accumulating the inarcs to be added to the
    1994             :      * current target state.  totalinarcs is probably a considerable
    1995             :      * overestimate of the space needed, but the NFA is unlikely to be large
    1996             :      * enough at this point to make it worth being smarter.
    1997             :      */
    1998        1779 :     arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
    1999        1779 :     if (arcarray == NULL)
    2000             :     {
    2001           0 :         NERR(REG_ESPACE);
    2002           0 :         FREE(inarcsorig);
    2003           0 :         return;
    2004             :     }
    2005             : 
    2006             :     /* And iterate over the target states */
    2007       26534 :     for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2008             :     {
    2009             :         /* Ignore target states without non-EMPTY outarcs, per note above */
    2010       24755 :         if (!s->flag && !hasnonemptyout(s))
    2011        2534 :             continue;
    2012             : 
    2013             :         /* Find predecessor states and accumulate their original inarcs */
    2014       22221 :         arccount = 0;
    2015     2022239 :         for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
    2016             :         {
    2017             :             /* Add s2's original inarcs to arcarray[], but ignore empties */
    2018     6024400 :             for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
    2019             :             {
    2020     4024382 :                 if (a->type != EMPTY)
    2021     2007774 :                     arcarray[arccount++] = a;
    2022             :             }
    2023             : 
    2024             :             /* Reset the tmp fields as we walk back */
    2025     2000018 :             nexts = s2->tmp;
    2026     2000018 :             s2->tmp = NULL;
    2027             :         }
    2028       22221 :         s->tmp = NULL;
    2029       22221 :         assert(arccount <= totalinarcs);
    2030             : 
    2031             :         /* Remember how many original inarcs this state has */
    2032       22221 :         prevnins = s->nins;
    2033             : 
    2034             :         /* Add non-duplicate inarcs to target state */
    2035       22221 :         mergeins(nfa, s, arcarray, arccount);
    2036             : 
    2037             :         /* Now we must update the state's inarcsorig pointer */
    2038       22221 :         nskip = s->nins - prevnins;
    2039       22221 :         a = s->ins;
    2040     2051270 :         while (nskip-- > 0)
    2041     2006828 :             a = a->inchain;
    2042       22221 :         inarcsorig[s->no] = a;
    2043             :     }
    2044             : 
    2045        1779 :     FREE(arcarray);
    2046        1779 :     FREE(inarcsorig);
    2047             : 
    2048        1779 :     if (NISERR())
    2049           1 :         return;
    2050             : 
    2051             :     /*
    2052             :      * Now remove all the EMPTY arcs, since we don't need them anymore.
    2053             :      */
    2054       24545 :     for (s = nfa->states; s != NULL; s = s->next)
    2055             :     {
    2056      104535 :         for (a = s->outs; a != NULL; a = nexta)
    2057             :         {
    2058       81768 :             nexta = a->outchain;
    2059       81768 :             if (a->type == EMPTY)
    2060        8775 :                 freearc(nfa, a);
    2061             :         }
    2062             :     }
    2063             : 
    2064             :     /*
    2065             :      * And remove any states that have become useless.  (This cleanup is not
    2066             :      * very thorough, and would be even less so if we tried to combine it with
    2067             :      * the previous step; but cleanup() will take care of anything we miss.)
    2068             :      */
    2069       24545 :     for (s = nfa->states; s != NULL; s = nexts)
    2070             :     {
    2071       22767 :         nexts = s->next;
    2072       22767 :         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2073        2534 :             dropstate(nfa, s);
    2074             :     }
    2075             : 
    2076        1778 :     if (f != NULL)
    2077           0 :         dumpnfa(nfa, f);
    2078             : }
    2079             : 
    2080             : /*
    2081             :  * emptyreachable - recursively find all states that can reach s by EMPTY arcs
    2082             :  *
    2083             :  * The return value is the last such state found.  Its tmp field links back
    2084             :  * to the next-to-last such state, and so on back to s, so that all these
    2085             :  * states can be located without searching the whole NFA.
    2086             :  *
    2087             :  * Since this is only used in fixempties(), we pass in the inarcsorig[] array
    2088             :  * maintained by that function.  This lets us skip over all new inarcs, which
    2089             :  * are certainly not EMPTY arcs.
    2090             :  *
    2091             :  * The maximum recursion depth here is equal to the length of the longest
    2092             :  * loop-free chain of EMPTY arcs, which is surely no more than the size of
    2093             :  * the NFA ... but that could still be enough to cause trouble.
    2094             :  */
    2095             : static struct state *
    2096     2022239 : emptyreachable(struct nfa *nfa,
    2097             :                struct state *s,
    2098             :                struct state *lastfound,
    2099             :                struct arc **inarcsorig)
    2100             : {
    2101             :     struct arc *a;
    2102             : 
    2103             :     /* Since this is recursive, it could be driven to stack overflow */
    2104     2022239 :     if (STACK_TOO_DEEP(nfa->v->re))
    2105             :     {
    2106           0 :         NERR(REG_ETOOBIG);
    2107           0 :         return lastfound;
    2108             :     }
    2109             : 
    2110     2022239 :     s->tmp = lastfound;
    2111     2022239 :     lastfound = s;
    2112     6102589 :     for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
    2113             :     {
    2114     4080350 :         if (a->type == EMPTY && a->from->tmp == NULL)
    2115     2000018 :             lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
    2116             :     }
    2117     2022239 :     return lastfound;
    2118             : }
    2119             : 
    2120             : /*
    2121             :  * isconstraintarc - detect whether an arc is of a constraint type
    2122             :  */
    2123             : static inline int
    2124      181085 : isconstraintarc(struct arc *a)
    2125             : {
    2126      181085 :     switch (a->type)
    2127             :     {
    2128             :         case '^':
    2129             :         case '$':
    2130             :         case BEHIND:
    2131             :         case AHEAD:
    2132             :         case LACON:
    2133       44107 :             return 1;
    2134             :     }
    2135      136978 :     return 0;
    2136             : }
    2137             : 
    2138             : /*
    2139             :  * hasconstraintout - does state have a constraint out arc?
    2140             :  */
    2141             : static int
    2142        6806 : hasconstraintout(struct state *s)
    2143             : {
    2144             :     struct arc *a;
    2145             : 
    2146       17006 :     for (a = s->outs; a != NULL; a = a->outchain)
    2147             :     {
    2148       13257 :         if (isconstraintarc(a))
    2149        3057 :             return 1;
    2150             :     }
    2151        3749 :     return 0;
    2152             : }
    2153             : 
    2154             : /*
    2155             :  * fixconstraintloops - get rid of loops containing only constraint arcs
    2156             :  *
    2157             :  * A loop of states that contains only constraint arcs is useless, since
    2158             :  * passing around the loop represents no forward progress.  Moreover, it
    2159             :  * would cause infinite looping in pullback/pushfwd, so we need to get rid
    2160             :  * of such loops before doing that.
    2161             :  */
    2162             : static void
    2163        1779 : fixconstraintloops(struct nfa *nfa,
    2164             :                    FILE *f)     /* for debug output; NULL none */
    2165             : {
    2166             :     struct state *s;
    2167             :     struct state *nexts;
    2168             :     struct arc *a;
    2169             :     struct arc *nexta;
    2170             :     int         hasconstraints;
    2171             : 
    2172             :     /*
    2173             :      * In the trivial case of a state that loops to itself, we can just drop
    2174             :      * the constraint arc altogether.  This is worth special-casing because
    2175             :      * such loops are far more common than loops containing multiple states.
    2176             :      * While we're at it, note whether any constraint arcs survive.
    2177             :      */
    2178        1779 :     hasconstraints = 0;
    2179       22012 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2180             :     {
    2181       20233 :         nexts = s->next;
    2182             :         /* while we're at it, ensure tmp fields are clear for next step */
    2183       20233 :         assert(s->tmp == NULL);
    2184       90740 :         for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    2185             :         {
    2186       70507 :             nexta = a->outchain;
    2187       70507 :             if (isconstraintarc(a))
    2188             :             {
    2189       12942 :                 if (a->to == s)
    2190         177 :                     freearc(nfa, a);
    2191             :                 else
    2192       12765 :                     hasconstraints = 1;
    2193             :             }
    2194             :         }
    2195             :         /* If we removed all the outarcs, the state is useless. */
    2196       20233 :         if (s->nouts == 0 && !s->flag)
    2197           0 :             dropstate(nfa, s);
    2198             :     }
    2199             : 
    2200             :     /* Nothing to do if no remaining constraint arcs */
    2201        1779 :     if (NISERR() || !hasconstraints)
    2202           1 :         return;
    2203             : 
    2204             :     /*
    2205             :      * Starting from each remaining NFA state, search outwards for a
    2206             :      * constraint loop.  If we find a loop, break the loop, then start the
    2207             :      * search over.  (We could possibly retain some state from the first scan,
    2208             :      * but it would complicate things greatly, and multi-state constraint
    2209             :      * loops are rare enough that it's not worth optimizing the case.)
    2210             :      */
    2211             : restart:
    2212       24082 :     for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2213             :     {
    2214       22304 :         if (findconstraintloop(nfa, s))
    2215         130 :             goto restart;
    2216             :     }
    2217             : 
    2218        1778 :     if (NISERR())
    2219           0 :         return;
    2220             : 
    2221             :     /*
    2222             :      * Now remove any states that have become useless.  (This cleanup is not
    2223             :      * very thorough, and would be even less so if we tried to combine it with
    2224             :      * the previous step; but cleanup() will take care of anything we miss.)
    2225             :      *
    2226             :      * Because findconstraintloop intentionally doesn't reset all tmp fields,
    2227             :      * we have to clear them after it's done.  This is a convenient place to
    2228             :      * do that, too.
    2229             :      */
    2230       22278 :     for (s = nfa->states; s != NULL; s = nexts)
    2231             :     {
    2232       20500 :         nexts = s->next;
    2233       20500 :         s->tmp = NULL;
    2234       20500 :         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2235          75 :             dropstate(nfa, s);
    2236             :     }
    2237             : 
    2238        1778 :     if (f != NULL)
    2239           0 :         dumpnfa(nfa, f);
    2240             : }
    2241             : 
    2242             : /*
    2243             :  * findconstraintloop - recursively find a loop of constraint arcs
    2244             :  *
    2245             :  * If we find a loop, break it by calling breakconstraintloop(), then
    2246             :  * return 1; otherwise return 0.
    2247             :  *
    2248             :  * State tmp fields are guaranteed all NULL on a success return, because
    2249             :  * breakconstraintloop does that.  After a failure return, any state that
    2250             :  * is known not to be part of a loop is marked with s->tmp == s; this allows
    2251             :  * us not to have to re-prove that fact on later calls.  (This convention is
    2252             :  * workable because we already eliminated single-state loops.)
    2253             :  *
    2254             :  * Note that the found loop doesn't necessarily include the first state we
    2255             :  * are called on.  Any loop reachable from that state will do.
    2256             :  *
    2257             :  * The maximum recursion depth here is one more than the length of the longest
    2258             :  * loop-free chain of constraint arcs, which is surely no more than the size
    2259             :  * of the NFA ... but that could still be enough to cause trouble.
    2260             :  */
    2261             : static int
    2262       43107 : findconstraintloop(struct nfa *nfa, struct state *s)
    2263             : {
    2264             :     struct arc *a;
    2265             : 
    2266             :     /* Since this is recursive, it could be driven to stack overflow */
    2267       43107 :     if (STACK_TOO_DEEP(nfa->v->re))
    2268             :     {
    2269           0 :         NERR(REG_ETOOBIG);
    2270           0 :         return 1;               /* to exit as quickly as possible */
    2271             :     }
    2272             : 
    2273       43107 :     if (s->tmp != NULL)
    2274             :     {
    2275             :         /* Already proven uninteresting? */
    2276       18978 :         if (s->tmp == s)
    2277       18848 :             return 0;
    2278             :         /* Found a loop involving s */
    2279         130 :         breakconstraintloop(nfa, s);
    2280             :         /* The tmp fields have been cleaned up by breakconstraintloop */
    2281         130 :         return 1;
    2282             :     }
    2283      111863 :     for (a = s->outs; a != NULL; a = a->outchain)
    2284             :     {
    2285       88148 :         if (isconstraintarc(a))
    2286             :         {
    2287       20803 :             struct state *sto = a->to;
    2288             : 
    2289       20803 :             assert(sto != s);
    2290       20803 :             s->tmp = sto;
    2291       20803 :             if (findconstraintloop(nfa, sto))
    2292         414 :                 return 1;
    2293             :         }
    2294             :     }
    2295             : 
    2296             :     /*
    2297             :      * If we get here, no constraint loop exists leading out from s.  Mark it
    2298             :      * with s->tmp == s so we need not rediscover that fact again later.
    2299             :      */
    2300       23715 :     s->tmp = s;
    2301       23715 :     return 0;
    2302             : }
    2303             : 
    2304             : /*
    2305             :  * breakconstraintloop - break a loop of constraint arcs
    2306             :  *
    2307             :  * sinitial is any one member state of the loop.  Each loop member's tmp
    2308             :  * field links to its successor within the loop.  (Note that this function
    2309             :  * will reset all the tmp fields to NULL.)
    2310             :  *
    2311             :  * We can break the loop by, for any one state S1 in the loop, cloning its
    2312             :  * loop successor state S2 (and possibly following states), and then moving
    2313             :  * all S1->S2 constraint arcs to point to the cloned S2.  The cloned S2 should
    2314             :  * copy any non-constraint outarcs of S2.  Constraint outarcs should be
    2315             :  * dropped if they point back to S1, else they need to be copied as arcs to
    2316             :  * similarly cloned states S3, S4, etc.  In general, each cloned state copies
    2317             :  * non-constraint outarcs, drops constraint outarcs that would lead to itself
    2318             :  * or any earlier cloned state, and sends other constraint outarcs to newly
    2319             :  * cloned states.  No cloned state will have any inarcs that aren't constraint
    2320             :  * arcs or do not lead from S1 or earlier-cloned states.  It's okay to drop
    2321             :  * constraint back-arcs since they would not take us to any state we've not
    2322             :  * already been in; therefore, no new constraint loop is created.  In this way
    2323             :  * we generate a modified NFA that can still represent every useful state
    2324             :  * sequence, but not sequences that represent state loops with no consumption
    2325             :  * of input data.  Note that the set of cloned states will certainly include
    2326             :  * all of the loop member states other than S1, and it may also include
    2327             :  * non-loop states that are reachable from S2 via constraint arcs.  This is
    2328             :  * important because there is no guarantee that findconstraintloop found a
    2329             :  * maximal loop (and searching for one would be NP-hard, so don't try).
    2330             :  * Frequently the "non-loop states" are actually part of a larger loop that
    2331             :  * we didn't notice, and indeed there may be several overlapping loops.
    2332             :  * This technique ensures convergence in such cases, while considering only
    2333             :  * the originally-found loop does not.
    2334             :  *
    2335             :  * If there is only one S1->S2 constraint arc, then that constraint is
    2336             :  * certainly satisfied when we enter any of the clone states.  This means that
    2337             :  * in the common case where many of the constraint arcs are identically
    2338             :  * labeled, we can merge together clone states linked by a similarly-labeled
    2339             :  * constraint: if we can get to the first one we can certainly get to the
    2340             :  * second, so there's no need to distinguish.  This greatly reduces the number
    2341             :  * of new states needed, so we preferentially break the given loop at a state
    2342             :  * pair where this is true.
    2343             :  *
    2344             :  * Furthermore, it's fairly common to find that a cloned successor state has
    2345             :  * no outarcs, especially if we're a bit aggressive about removing unnecessary
    2346             :  * outarcs.  If that happens, then there is simply not any interesting state
    2347             :  * that can be reached through the predecessor's loop arcs, which means we can
    2348             :  * break the loop just by removing those loop arcs, with no new states added.
    2349             :  */
    2350             : static void
    2351         130 : breakconstraintloop(struct nfa *nfa, struct state *sinitial)
    2352             : {
    2353             :     struct state *s;
    2354             :     struct state *shead;
    2355             :     struct state *stail;
    2356             :     struct state *sclone;
    2357             :     struct state *nexts;
    2358             :     struct arc *refarc;
    2359             :     struct arc *a;
    2360             :     struct arc *nexta;
    2361             : 
    2362             :     /*
    2363             :      * Start by identifying which loop step we want to break at.
    2364             :      * Preferentially this is one with only one constraint arc.  (XXX are
    2365             :      * there any other secondary heuristics we want to use here?)  Set refarc
    2366             :      * to point to the selected lone constraint arc, if there is one.
    2367             :      */
    2368         130 :     refarc = NULL;
    2369         130 :     s = sinitial;
    2370             :     do
    2371             :     {
    2372         296 :         nexts = s->tmp;
    2373         296 :         assert(nexts != s);     /* should not see any one-element loops */
    2374         296 :         if (refarc == NULL)
    2375             :         {
    2376         170 :             int         narcs = 0;
    2377             : 
    2378        1591 :             for (a = s->outs; a != NULL; a = a->outchain)
    2379             :             {
    2380        1421 :                 if (a->to == nexts && isconstraintarc(a))
    2381             :                 {
    2382         325 :                     refarc = a;
    2383         325 :                     narcs++;
    2384             :                 }
    2385             :             }
    2386         170 :             assert(narcs > 0);
    2387         170 :             if (narcs > 1)
    2388          58 :                 refarc = NULL;  /* multiple constraint arcs here, no good */
    2389             :         }
    2390         296 :         s = nexts;
    2391         296 :     } while (s != sinitial);
    2392             : 
    2393         130 :     if (refarc)
    2394             :     {
    2395             :         /* break at the refarc */
    2396         112 :         shead = refarc->from;
    2397         112 :         stail = refarc->to;
    2398         112 :         assert(stail == shead->tmp);
    2399             :     }
    2400             :     else
    2401             :     {
    2402             :         /* for lack of a better idea, break after sinitial */
    2403          18 :         shead = sinitial;
    2404          18 :         stail = sinitial->tmp;
    2405             :     }
    2406             : 
    2407             :     /*
    2408             :      * Reset the tmp fields so that we can use them for local storage in
    2409             :      * clonesuccessorstates.  (findconstraintloop won't mind, since it's just
    2410             :      * going to abandon its search anyway.)
    2411             :      */
    2412       14592 :     for (s = nfa->states; s != NULL; s = s->next)
    2413       14462 :         s->tmp = NULL;
    2414             : 
    2415             :     /*
    2416             :      * Recursively build clone state(s) as needed.
    2417             :      */
    2418         130 :     sclone = newstate(nfa);
    2419         130 :     if (sclone == NULL)
    2420             :     {
    2421           0 :         assert(NISERR());
    2422           0 :         return;
    2423             :     }
    2424             : 
    2425         130 :     clonesuccessorstates(nfa, stail, sclone, shead, refarc,
    2426             :                          NULL, NULL, nfa->nstates);
    2427             : 
    2428         130 :     if (NISERR())
    2429           0 :         return;
    2430             : 
    2431             :     /*
    2432             :      * It's possible that sclone has no outarcs at all, in which case it's
    2433             :      * useless.  (We don't try extremely hard to get rid of useless states
    2434             :      * here, but this is an easy and fairly common case.)
    2435             :      */
    2436         130 :     if (sclone->nouts == 0)
    2437             :     {
    2438          23 :         freestate(nfa, sclone);
    2439          23 :         sclone = NULL;
    2440             :     }
    2441             : 
    2442             :     /*
    2443             :      * Move shead's constraint-loop arcs to point to sclone, or just drop them
    2444             :      * if we discovered we don't need sclone.
    2445             :      */
    2446        1325 :     for (a = shead->outs; a != NULL; a = nexta)
    2447             :     {
    2448        1195 :         nexta = a->outchain;
    2449        1195 :         if (a->to == stail && isconstraintarc(a))
    2450             :         {
    2451         174 :             if (sclone)
    2452         143 :                 cparc(nfa, a, shead, sclone);
    2453         174 :             freearc(nfa, a);
    2454         174 :             if (NISERR())
    2455           0 :                 break;
    2456             :         }
    2457             :     }
    2458             : }
    2459             : 
    2460             : /*
    2461             :  * clonesuccessorstates - create a tree of constraint-arc successor states
    2462             :  *
    2463             :  * ssource is the state to be cloned, and sclone is the state to copy its
    2464             :  * outarcs into.  sclone's inarcs, if any, should already be set up.
    2465             :  *
    2466             :  * spredecessor is the original predecessor state that we are trying to build
    2467             :  * successors for (it may not be the immediate predecessor of ssource).
    2468             :  * refarc, if not NULL, is the original constraint arc that is known to have
    2469             :  * been traversed out of spredecessor to reach the successor(s).
    2470             :  *
    2471             :  * For each cloned successor state, we transiently create a "donemap" that is
    2472             :  * a boolean array showing which source states we've already visited for this
    2473             :  * clone state.  This prevents infinite recursion as well as useless repeat
    2474             :  * visits to the same state subtree (which can add up fast, since typical NFAs
    2475             :  * have multiple redundant arc pathways).  Each donemap is a char array
    2476             :  * indexed by state number.  The donemaps are all of the same size "nstates",
    2477             :  * which is nfa->nstates as of the start of the recursion.  This is enough to
    2478             :  * have entries for all pre-existing states, but *not* entries for clone
    2479             :  * states created during the recursion.  That's okay since we have no need to
    2480             :  * mark those.
    2481             :  *
    2482             :  * curdonemap is NULL when recursing to a new sclone state, or sclone's
    2483             :  * donemap when we are recursing without having created a new state (which we
    2484             :  * do when we decide we can merge a successor state into the current clone
    2485             :  * state).  outerdonemap is NULL at the top level and otherwise the parent
    2486             :  * clone state's donemap.
    2487             :  *
    2488             :  * The successor states we create and fill here form a strict tree structure,
    2489             :  * with each state having exactly one predecessor, except that the toplevel
    2490             :  * state has no inarcs as yet (breakconstraintloop will add its inarcs from
    2491             :  * spredecessor after we're done).  Thus, we can examine sclone's inarcs back
    2492             :  * to the root, plus refarc if any, to identify the set of constraints already
    2493             :  * known valid at the current point.  This allows us to avoid generating extra
    2494             :  * successor states.
    2495             :  */
    2496             : static void
    2497        1094 : clonesuccessorstates(struct nfa *nfa,
    2498             :                      struct state *ssource,
    2499             :                      struct state *sclone,
    2500             :                      struct state *spredecessor,
    2501             :                      struct arc *refarc,
    2502             :                      char *curdonemap,
    2503             :                      char *outerdonemap,
    2504             :                      int nstates)
    2505             : {
    2506             :     char       *donemap;
    2507             :     struct arc *a;
    2508             : 
    2509             :     /* Since this is recursive, it could be driven to stack overflow */
    2510        1094 :     if (STACK_TOO_DEEP(nfa->v->re))
    2511             :     {
    2512           0 :         NERR(REG_ETOOBIG);
    2513           0 :         return;
    2514             :     }
    2515             : 
    2516             :     /* If this state hasn't already got a donemap, create one */
    2517        1094 :     donemap = curdonemap;
    2518        1094 :     if (donemap == NULL)
    2519             :     {
    2520         290 :         donemap = (char *) MALLOC(nstates * sizeof(char));
    2521         290 :         if (donemap == NULL)
    2522             :         {
    2523           0 :             NERR(REG_ESPACE);
    2524           0 :             return;
    2525             :         }
    2526             : 
    2527         290 :         if (outerdonemap != NULL)
    2528             :         {
    2529             :             /*
    2530             :              * Not at outermost recursion level, so copy the outer level's
    2531             :              * donemap; this ensures that we see states in process of being
    2532             :              * visited at outer levels, or already merged into predecessor
    2533             :              * states, as ones we shouldn't traverse back to.
    2534             :              */
    2535         160 :             memcpy(donemap, outerdonemap, nstates * sizeof(char));
    2536             :         }
    2537             :         else
    2538             :         {
    2539             :             /* At outermost level, only spredecessor is off-limits */
    2540         130 :             memset(donemap, 0, nstates * sizeof(char));
    2541         130 :             assert(spredecessor->no < nstates);
    2542         130 :             donemap[spredecessor->no] = 1;
    2543             :         }
    2544             :     }
    2545             : 
    2546             :     /* Mark ssource as visited in the donemap */
    2547        1094 :     assert(ssource->no < nstates);
    2548        1094 :     assert(donemap[ssource->no] == 0);
    2549        1094 :     donemap[ssource->no] = 1;
    2550             : 
    2551             :     /*
    2552             :      * We proceed by first cloning all of ssource's outarcs, creating new
    2553             :      * clone states as needed but not doing more with them than that.  Then in
    2554             :      * a second pass, recurse to process the child clone states.  This allows
    2555             :      * us to have only one child clone state per reachable source state, even
    2556             :      * when there are multiple outarcs leading to the same state.  Also, when
    2557             :      * we do visit a child state, its set of inarcs is known exactly, which
    2558             :      * makes it safe to apply the constraint-is-already-checked optimization.
    2559             :      * Also, this ensures that we've merged all the states we can into the
    2560             :      * current clone before we recurse to any children, thus possibly saving
    2561             :      * them from making extra images of those states.
    2562             :      *
    2563             :      * While this function runs, child clone states of the current state are
    2564             :      * marked by setting their tmp fields to point to the original state they
    2565             :      * were cloned from.  This makes it possible to detect multiple outarcs
    2566             :      * leading to the same state, and also makes it easy to distinguish clone
    2567             :      * states from original states (which will have tmp == NULL).
    2568             :      */
    2569        9768 :     for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
    2570             :     {
    2571        8674 :         struct state *sto = a->to;
    2572             : 
    2573             :         /*
    2574             :          * We do not consider cloning successor states that have no constraint
    2575             :          * outarcs; just link to them as-is.  They cannot be part of a
    2576             :          * constraint loop so there is no need to make copies.  In particular,
    2577             :          * this rule keeps us from trying to clone the post state, which would
    2578             :          * be a bad idea.
    2579             :          */
    2580        8674 :         if (isconstraintarc(a) && hasconstraintout(sto))
    2581        1325 :         {
    2582             :             struct state *prevclone;
    2583             :             int         canmerge;
    2584             :             struct arc *a2;
    2585             : 
    2586             :             /*
    2587             :              * Back-link constraint arcs must not be followed.  Nor is there a
    2588             :              * need to revisit states previously merged into this clone.
    2589             :              */
    2590        3057 :             assert(sto->no < nstates);
    2591        3057 :             if (donemap[sto->no] != 0)
    2592        1732 :                 continue;
    2593             : 
    2594             :             /*
    2595             :              * Check whether we already have a child clone state for this
    2596             :              * source state.
    2597             :              */
    2598        1325 :             prevclone = NULL;
    2599       12309 :             for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
    2600             :             {
    2601       11345 :                 if (a2->to->tmp == sto)
    2602             :                 {
    2603         361 :                     prevclone = a2->to;
    2604         361 :                     break;
    2605             :                 }
    2606             :             }
    2607             : 
    2608             :             /*
    2609             :              * If this arc is labeled the same as refarc, or the same as any
    2610             :              * arc we must have traversed to get to sclone, then no additional
    2611             :              * constraints need to be met to get to sto, so we should just
    2612             :              * merge its outarcs into sclone.
    2613             :              */
    2614        1325 :             if (refarc && a->type == refarc->type && a->co == refarc->co)
    2615         804 :                 canmerge = 1;
    2616             :             else
    2617             :             {
    2618             :                 struct state *s;
    2619             : 
    2620         521 :                 canmerge = 0;
    2621        2437 :                 for (s = sclone; s->ins; s = s->ins->from)
    2622             :                 {
    2623        1921 :                     if (s->nins == 1 &&
    2624          10 :                         a->type == s->ins->type && a->co == s->ins->co)
    2625             :                     {
    2626           0 :                         canmerge = 1;
    2627           0 :                         break;
    2628             :                     }
    2629             :                 }
    2630             :             }
    2631             : 
    2632        1325 :             if (canmerge)
    2633             :             {
    2634             :                 /*
    2635             :                  * We can merge into sclone.  If we previously made a child
    2636             :                  * clone state, drop it; there's no need to visit it.  (This
    2637             :                  * can happen if ssource has multiple pathways to sto, and we
    2638             :                  * only just now found one that is provably a no-op.)
    2639             :                  */
    2640         804 :                 if (prevclone)
    2641           0 :                     dropstate(nfa, prevclone);  /* kills our outarc, too */
    2642             : 
    2643             :                 /* Recurse to merge sto's outarcs into sclone */
    2644         804 :                 clonesuccessorstates(nfa,
    2645             :                                      sto,
    2646             :                                      sclone,
    2647             :                                      spredecessor,
    2648             :                                      refarc,
    2649             :                                      donemap,
    2650             :                                      outerdonemap,
    2651             :                                      nstates);
    2652             :                 /* sto should now be marked as previously visited */
    2653         804 :                 assert(NISERR() || donemap[sto->no] == 1);
    2654             :             }
    2655         521 :             else if (prevclone)
    2656             :             {
    2657             :                 /*
    2658             :                  * We already have a clone state for this successor, so just
    2659             :                  * make another arc to it.
    2660             :                  */
    2661         361 :                 cparc(nfa, a, sclone, prevclone);
    2662             :             }
    2663             :             else
    2664             :             {
    2665             :                 /*
    2666             :                  * We need to create a new successor clone state.
    2667             :                  */
    2668             :                 struct state *stoclone;
    2669             : 
    2670         160 :                 stoclone = newstate(nfa);
    2671         160 :                 if (stoclone == NULL)
    2672             :                 {
    2673           0 :                     assert(NISERR());
    2674           0 :                     break;
    2675             :                 }
    2676             :                 /* Mark it as to what it's a clone of */
    2677         160 :                 stoclone->tmp = sto;
    2678             :                 /* ... and add the outarc leading to it */
    2679         160 :                 cparc(nfa, a, sclone, stoclone);
    2680             :             }
    2681             :         }
    2682             :         else
    2683             :         {
    2684             :             /*
    2685             :              * Non-constraint outarcs just get copied to sclone, as do outarcs
    2686             :              * leading to states with no constraint outarc.
    2687             :              */
    2688        5617 :             cparc(nfa, a, sclone, sto);
    2689             :         }
    2690             :     }
    2691             : 
    2692             :     /*
    2693             :      * If we are at outer level for this clone state, recurse to all its child
    2694             :      * clone states, clearing their tmp fields as we go.  (If we're not
    2695             :      * outermost for sclone, leave this to be done by the outer call level.)
    2696             :      * Note that if we have multiple outarcs leading to the same clone state,
    2697             :      * it will only be recursed-to once.
    2698             :      */
    2699        1094 :     if (curdonemap == NULL)
    2700             :     {
    2701        3261 :         for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
    2702             :         {
    2703        2971 :             struct state *stoclone = a->to;
    2704        2971 :             struct state *sto = stoclone->tmp;
    2705             : 
    2706        2971 :             if (sto != NULL)
    2707             :             {
    2708         160 :                 stoclone->tmp = NULL;
    2709         160 :                 clonesuccessorstates(nfa,
    2710             :                                      sto,
    2711             :                                      stoclone,
    2712             :                                      spredecessor,
    2713             :                                      refarc,
    2714             :                                      NULL,
    2715             :                                      donemap,
    2716             :                                      nstates);
    2717             :             }
    2718             :         }
    2719             : 
    2720             :         /* Don't forget to free sclone's donemap when done with it */
    2721         290 :         FREE(donemap);
    2722             :     }
    2723             : }
    2724             : 
    2725             : /*
    2726             :  * cleanup - clean up NFA after optimizations
    2727             :  */
    2728             : static void
    2729        3558 : cleanup(struct nfa *nfa)
    2730             : {
    2731             :     struct state *s;
    2732             :     struct state *nexts;
    2733             :     int         n;
    2734             : 
    2735        3558 :     if (NISERR())
    2736        3559 :         return;
    2737             : 
    2738             :     /* clear out unreachable or dead-end states */
    2739             :     /* use pre to mark reachable, then post to mark can-reach-post */
    2740        3557 :     markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
    2741        3557 :     markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
    2742       85611 :     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2743             :     {
    2744       82054 :         nexts = s->next;
    2745       82054 :         if (s->tmp != nfa->post && !s->flag)
    2746        1273 :             dropstate(nfa, s);
    2747             :     }
    2748        3557 :     assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
    2749        3557 :     cleartraverse(nfa, nfa->pre);
    2750        3557 :     assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
    2751             :     /* the nins==0 (final unreachable) case will be caught later */
    2752             : 
    2753             :     /* renumber surviving states */
    2754        3557 :     n = 0;
    2755       84338 :     for (s = nfa->states; s != NULL; s = s->next)
    2756       80781 :         s->no = n++;
    2757        3557 :     nfa->nstates = n;
    2758             : }
    2759             : 
    2760             : /*
    2761             :  * markreachable - recursive marking of reachable states
    2762             :  */
    2763             : static void
    2764      153405 : markreachable(struct nfa *nfa,
    2765             :               struct state *s,
    2766             :               struct state *okay,   /* consider only states with this mark */
    2767             :               struct state *mark)   /* the value to mark with */
    2768             : {
    2769             :     struct arc *a;
    2770             : 
    2771             :     /* Since this is recursive, it could be driven to stack overflow */
    2772      153405 :     if (STACK_TOO_DEEP(nfa->v->re))
    2773             :     {
    2774           0 :         NERR(REG_ETOOBIG);
    2775           0 :         return;
    2776             :     }
    2777             : 
    2778      153405 :     if (s->tmp != okay)
    2779       72618 :         return;
    2780       80787 :     s->tmp = mark;
    2781             : 
    2782      230635 :     for (a = s->outs; a != NULL; a = a->outchain)
    2783      149848 :         markreachable(nfa, a->to, okay, mark);
    2784             : }
    2785             : 
    2786             : /*
    2787             :  * markcanreach - recursive marking of states which can reach here
    2788             :  */
    2789             : static void
    2790      153851 : markcanreach(struct nfa *nfa,
    2791             :              struct state *s,
    2792             :              struct state *okay,    /* consider only states with this mark */
    2793             :              struct state *mark)    /* the value to mark with */
    2794             : {
    2795             :     struct arc *a;
    2796             : 
    2797             :     /* Since this is recursive, it could be driven to stack overflow */
    2798      153851 :     if (STACK_TOO_DEEP(nfa->v->re))
    2799             :     {
    2800           0 :         NERR(REG_ETOOBIG);
    2801           0 :         return;
    2802             :     }
    2803             : 
    2804      153851 :     if (s->tmp != okay)
    2805       73088 :         return;
    2806       80763 :     s->tmp = mark;
    2807             : 
    2808      231057 :     for (a = s->ins; a != NULL; a = a->inchain)
    2809      150294 :         markcanreach(nfa, a->from, okay, mark);
    2810             : }
    2811             : 
    2812             : /*
    2813             :  * analyze - ascertain potentially-useful facts about an optimized NFA
    2814             :  */
    2815             : static long                     /* re_info bits to be ORed in */
    2816        1779 : analyze(struct nfa *nfa)
    2817             : {
    2818             :     struct arc *a;
    2819             :     struct arc *aa;
    2820             : 
    2821        1779 :     if (NISERR())
    2822           1 :         return 0;
    2823             : 
    2824        1778 :     if (nfa->pre->outs == NULL)
    2825           9 :         return REG_UIMPOSSIBLE;
    2826        9248 :     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    2827       22067 :         for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
    2828       14588 :             if (aa->to == nfa->post)
    2829         696 :                 return REG_UEMPTYMATCH;
    2830        1073 :     return 0;
    2831             : }
    2832             : 
    2833             : /*
    2834             :  * compact - construct the compact representation of an NFA
    2835             :  */
    2836             : static void
    2837        1778 : compact(struct nfa *nfa,
    2838             :         struct cnfa *cnfa)
    2839             : {
    2840             :     struct state *s;
    2841             :     struct arc *a;
    2842             :     size_t      nstates;
    2843             :     size_t      narcs;
    2844             :     struct carc *ca;
    2845             :     struct carc *first;
    2846             : 
    2847        1778 :     assert(!NISERR());
    2848             : 
    2849        1778 :     nstates = 0;
    2850        1778 :     narcs = 0;
    2851       19507 :     for (s = nfa->states; s != NULL; s = s->next)
    2852             :     {
    2853       17729 :         nstates++;
    2854       17729 :         narcs += s->nouts + 1;   /* need one extra for endmarker */
    2855             :     }
    2856             : 
    2857        1778 :     cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
    2858        1778 :     cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
    2859        1778 :     cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
    2860        1778 :     if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
    2861             :     {
    2862           0 :         if (cnfa->stflags != NULL)
    2863           0 :             FREE(cnfa->stflags);
    2864           0 :         if (cnfa->states != NULL)
    2865           0 :             FREE(cnfa->states);
    2866           0 :         if (cnfa->arcs != NULL)
    2867           0 :             FREE(cnfa->arcs);
    2868           0 :         NERR(REG_ESPACE);
    2869        1778 :         return;
    2870             :     }
    2871        1778 :     cnfa->nstates = nstates;
    2872        1778 :     cnfa->pre = nfa->pre->no;
    2873        1778 :     cnfa->post = nfa->post->no;
    2874        1778 :     cnfa->bos[0] = nfa->bos[0];
    2875        1778 :     cnfa->bos[1] = nfa->bos[1];
    2876        1778 :     cnfa->eos[0] = nfa->eos[0];
    2877        1778 :     cnfa->eos[1] = nfa->eos[1];
    2878        1778 :     cnfa->ncolors = maxcolor(nfa->cm) + 1;
    2879        1778 :     cnfa->flags = 0;
    2880             : 
    2881        1778 :     ca = cnfa->arcs;
    2882       19507 :     for (s = nfa->states; s != NULL; s = s->next)
    2883             :     {
    2884       17729 :         assert((size_t) s->no < nstates);
    2885       17729 :         cnfa->stflags[s->no] = 0;
    2886       17729 :         cnfa->states[s->no] = ca;
    2887       17729 :         first = ca;
    2888       67788 :         for (a = s->outs; a != NULL; a = a->outchain)
    2889       50059 :             switch (a->type)
    2890             :             {
    2891             :                 case PLAIN:
    2892       50026 :                     ca->co = a->co;
    2893       50026 :                     ca->to = a->to->no;
    2894       50026 :                     ca++;
    2895       50026 :                     break;
    2896             :                 case LACON:
    2897          33 :                     assert(s->no != cnfa->pre);
    2898          33 :                     ca->co = (color) (cnfa->ncolors + a->co);
    2899          33 :                     ca->to = a->to->no;
    2900          33 :                     ca++;
    2901          33 :                     cnfa->flags |= HASLACONS;
    2902          33 :                     break;
    2903             :                 default:
    2904           0 :                     NERR(REG_ASSERT);
    2905           0 :                     break;
    2906             :             }
    2907       17729 :         carcsort(first, ca - first);
    2908       17729 :         ca->co = COLORLESS;
    2909       17729 :         ca->to = 0;
    2910       17729 :         ca++;
    2911             :     }
    2912        1778 :     assert(ca == &cnfa->arcs[narcs]);
    2913        1778 :     assert(cnfa->nstates != 0);
    2914             : 
    2915             :     /* mark no-progress states */
    2916       16567 :     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    2917       14789 :         cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
    2918        1778 :     cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
    2919             : }
    2920             : 
    2921             : /*
    2922             :  * carcsort - sort compacted-NFA arcs by color
    2923             :  */
    2924             : static void
    2925       17729 : carcsort(struct carc *first, size_t n)
    2926             : {
    2927       17729 :     if (n > 1)
    2928        4584 :         qsort(first, n, sizeof(struct carc), carc_cmp);
    2929       17729 : }
    2930             : 
    2931             : static int
    2932      128631 : carc_cmp(const void *a, const void *b)
    2933             : {
    2934      128631 :     const struct carc *aa = (const struct carc *) a;
    2935      128631 :     const struct carc *bb = (const struct carc *) b;
    2936             : 
    2937      128631 :     if (aa->co < bb->co)
    2938       26960 :         return -1;
    2939      101671 :     if (aa->co > bb->co)
    2940       40563 :         return +1;
    2941       61108 :     if (aa->to < bb->to)
    2942       32762 :         return -1;
    2943       28346 :     if (aa->to > bb->to)
    2944       28346 :         return +1;
    2945           0 :     return 0;
    2946             : }
    2947             : 
    2948             : /*
    2949             :  * freecnfa - free a compacted NFA
    2950             :  */
    2951             : static void
    2952         149 : freecnfa(struct cnfa *cnfa)
    2953             : {
    2954         149 :     assert(cnfa->nstates != 0); /* not empty already */
    2955         149 :     cnfa->nstates = 0;
    2956         149 :     FREE(cnfa->stflags);
    2957         149 :     FREE(cnfa->states);
    2958         149 :     FREE(cnfa->arcs);
    2959         149 : }
    2960             : 
    2961             : /*
    2962             :  * dumpnfa - dump an NFA in human-readable form
    2963             :  */
    2964             : static void
    2965           0 : dumpnfa(struct nfa *nfa,
    2966             :         FILE *f)
    2967             : {
    2968             : #ifdef REG_DEBUG
    2969             :     struct state *s;
    2970             :     int         nstates = 0;
    2971             :     int         narcs = 0;
    2972             : 
    2973             :     fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
    2974             :     if (nfa->bos[0] != COLORLESS)
    2975             :         fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
    2976             :     if (nfa->bos[1] != COLORLESS)
    2977             :         fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
    2978             :     if (nfa->eos[0] != COLORLESS)
    2979             :         fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
    2980             :     if (nfa->eos[1] != COLORLESS)
    2981             :         fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
    2982             :     fprintf(f, "\n");
    2983             :     for (s = nfa->states; s != NULL; s = s->next)
    2984             :     {
    2985             :         dumpstate(s, f);
    2986             :         nstates++;
    2987             :         narcs += s->nouts;
    2988             :     }
    2989             :     fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
    2990             :     if (nfa->parent == NULL)
    2991             :         dumpcolors(nfa->cm, f);
    2992             :     fflush(f);
    2993             : #endif
    2994           0 : }
    2995             : 
    2996             : #ifdef REG_DEBUG                /* subordinates of dumpnfa */
    2997             : 
    2998             : /*
    2999             :  * dumpstate - dump an NFA state in human-readable form
    3000             :  */
    3001             : static void
    3002             : dumpstate(struct state *s,
    3003             :           FILE *f)
    3004             : {
    3005             :     struct arc *a;
    3006             : 
    3007             :     fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
    3008             :             (s->flag) ? s->flag : '.');
    3009             :     if (s->prev != NULL && s->prev->next != s)
    3010             :         fprintf(f, "\tstate chain bad\n");
    3011             :     if (s->nouts == 0)
    3012             :         fprintf(f, "\tno out arcs\n");
    3013             :     else
    3014             :         dumparcs(s, f);
    3015             :     fflush(f);
    3016             :     for (a = s->ins; a != NULL; a = a->inchain)
    3017             :     {
    3018             :         if (a->to != s)
    3019             :             fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
    3020             :                     a->from->no, a->to->no, s->no);
    3021             :     }
    3022             : }
    3023             : 
    3024             : /*
    3025             :  * dumparcs - dump out-arcs in human-readable form
    3026             :  */
    3027             : static void
    3028             : dumparcs(struct state *s,
    3029             :          FILE *f)
    3030             : {
    3031             :     int         pos;
    3032             :     struct arc *a;
    3033             : 
    3034             :     /* printing oldest arcs first is usually clearer */
    3035             :     a = s->outs;
    3036             :     assert(a != NULL);
    3037             :     while (a->outchain != NULL)
    3038             :         a = a->outchain;
    3039             :     pos = 1;
    3040             :     do
    3041             :     {
    3042             :         dumparc(a, s, f);
    3043             :         if (pos == 5)
    3044             :         {
    3045             :             fprintf(f, "\n");
    3046             :             pos = 1;
    3047             :         }
    3048             :         else
    3049             :             pos++;
    3050             :         a = a->outchainRev;
    3051             :     } while (a != NULL);
    3052             :     if (pos != 1)
    3053             :         fprintf(f, "\n");
    3054             : }
    3055             : 
    3056             : /*
    3057             :  * dumparc - dump one outarc in readable form, including prefixing tab
    3058             :  */
    3059             : static void
    3060             : dumparc(struct arc *a,
    3061             :         struct state *s,
    3062             :         FILE *f)
    3063             : {
    3064             :     struct arc *aa;
    3065             :     struct arcbatch *ab;
    3066             : 
    3067             :     fprintf(f, "\t");
    3068             :     switch (a->type)
    3069             :     {
    3070             :         case PLAIN:
    3071             :             fprintf(f, "[%ld]", (long) a->co);
    3072             :             break;
    3073             :         case AHEAD:
    3074             :             fprintf(f, ">%ld>", (long) a->co);
    3075             :             break;
    3076             :         case BEHIND:
    3077             :             fprintf(f, "<%ld<", (long) a->co);
    3078             :             break;
    3079             :         case LACON:
    3080             :             fprintf(f, ":%ld:", (long) a->co);
    3081             :             break;
    3082             :         case '^':
    3083             :         case '$':
    3084             :             fprintf(f, "%c%d", a->type, (int) a->co);
    3085             :             break;
    3086             :         case EMPTY:
    3087             :             break;
    3088             :         default:
    3089             :             fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
    3090             :             break;
    3091             :     }
    3092             :     if (a->from != s)
    3093             :         fprintf(f, "?%d?", a->from->no);
    3094             :     for (ab = &a->from->oas; ab != NULL; ab = ab->next)
    3095             :     {
    3096             :         for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
    3097             :             if (aa == a)
    3098             :                 break;          /* NOTE BREAK OUT */
    3099             :         if (aa < &ab->a[ABSIZE])  /* propagate break */
    3100             :             break;              /* NOTE BREAK OUT */
    3101             :     }
    3102             :     if (ab == NULL)
    3103             :         fprintf(f, "?!?");        /* not in allocated space */
    3104             :     fprintf(f, "->");
    3105             :     if (a->to == NULL)
    3106             :     {
    3107             :         fprintf(f, "NULL");
    3108             :         return;
    3109             :     }
    3110             :     fprintf(f, "%d", a->to->no);
    3111             :     for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
    3112             :         if (aa == a)
    3113             :             break;              /* NOTE BREAK OUT */
    3114             :     if (aa == NULL)
    3115             :         fprintf(f, "?!?");        /* missing from in-chain */
    3116             : }
    3117             : #endif                          /* REG_DEBUG */
    3118             : 
    3119             : /*
    3120             :  * dumpcnfa - dump a compacted NFA in human-readable form
    3121             :  */
    3122             : #ifdef REG_DEBUG
    3123             : static void
    3124             : dumpcnfa(struct cnfa *cnfa,
    3125             :          FILE *f)
    3126             : {
    3127             :     int         st;
    3128             : 
    3129             :     fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
    3130             :     if (cnfa->bos[0] != COLORLESS)
    3131             :         fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
    3132             :     if (cnfa->bos[1] != COLORLESS)
    3133             :         fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
    3134             :     if (cnfa->eos[0] != COLORLESS)
    3135             :         fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
    3136             :     if (cnfa->eos[1] != COLORLESS)
    3137             :         fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
    3138             :     if (cnfa->flags & HASLACONS)
    3139             :         fprintf(f, ", haslacons");
    3140             :     fprintf(f, "\n");
    3141             :     for (st = 0; st < cnfa->nstates; st++)
    3142             :         dumpcstate(st, cnfa, f);
    3143             :     fflush(f);
    3144             : }
    3145             : #endif
    3146             : 
    3147             : #ifdef REG_DEBUG                /* subordinates of dumpcnfa */
    3148             : 
    3149             : /*
    3150             :  * dumpcstate - dump a compacted-NFA state in human-readable form
    3151             :  */
    3152             : static void
    3153             : dumpcstate(int st,
    3154             :            struct cnfa *cnfa,
    3155             :            FILE *f)
    3156             : {
    3157             :     struct carc *ca;
    3158             :     int         pos;
    3159             : 
    3160             :     fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
    3161             :     pos = 1;
    3162             :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
    3163             :     {
    3164             :         if (ca->co < cnfa->ncolors)
    3165             :             fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
    3166             :         else
    3167             :             fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
    3168             :         if (pos == 5)
    3169             :         {
    3170             :             fprintf(f, "\n");
    3171             :             pos = 1;
    3172             :         }
    3173             :         else
    3174             :             pos++;
    3175             :     }
    3176             :     if (ca == cnfa->states[st] || pos != 1)
    3177             :         fprintf(f, "\n");
    3178             :     fflush(f);
    3179             : }
    3180             : 
    3181             : #endif                          /* REG_DEBUG */

Generated by: LCOV version 1.11