LTP GCOV extension - code coverage report
Current view: directory - regex - engine.c
Test: PHP Code Coverage
Date: 2007-04-10 Instrumented lines: 405
Code covered: 0.0 % Executed lines: 0
Legend: not executed executed

       1                 : /*
       2                 :  * The matching engine and friends.  This file is #included by regexec.c
       3                 :  * after suitable #defines of a variety of macros used herein, so that
       4                 :  * different state representations can be used without duplicating masses
       5                 :  * of code.
       6                 :  */
       7                 : 
       8                 : #ifdef SNAMES
       9                 : #define matcher smatcher
      10                 : #define fast    sfast
      11                 : #define slow    sslow
      12                 : #define dissect sdissect
      13                 : #define backref sbackref
      14                 : #define step    sstep
      15                 : #define print   sprint
      16                 : #define at      sat
      17                 : #define match   smat
      18                 : #endif
      19                 : #ifdef LNAMES
      20                 : #define matcher lmatcher
      21                 : #define fast    lfast
      22                 : #define slow    lslow
      23                 : #define dissect ldissect
      24                 : #define backref lbackref
      25                 : #define step    lstep
      26                 : #define print   lprint
      27                 : #define at      lat
      28                 : #define match   lmat
      29                 : #endif
      30                 : 
      31                 : /* another structure passed up and down to avoid zillions of parameters */
      32                 : struct match {
      33                 :         struct re_guts *g;
      34                 :         int eflags;
      35                 :         regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
      36                 :         unsigned char *offp;            /* offsets work from here */
      37                 :         unsigned char *beginp;          /* start of string -- virtual NUL precedes */
      38                 :         unsigned char *endp;            /* end of string -- virtual NUL here */
      39                 :         unsigned char *coldp;           /* can be no match starting before here */
      40                 :         unsigned char **lastpos;                /* [nplus+1] */
      41                 :         STATEVARS;
      42                 :         states st;              /* current states */
      43                 :         states fresh;           /* states for a fresh start */
      44                 :         states tmp;             /* temporary */
      45                 :         states empty;           /* empty set of states */
      46                 : };
      47                 : 
      48                 : #include "engine.ih"
      49                 : 
      50                 : #ifdef REDEBUG
      51                 : #define SP(t, s, c)     print(m, t, s, c, stdout)
      52                 : #define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
      53                 : #define NOTE(str)       { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
      54                 : #else
      55                 : #define SP(t, s, c)     /* nothing */
      56                 : #define AT(t, p1, p2, s1, s2)   /* nothing */
      57                 : #define NOTE(s) /* nothing */
      58                 : #endif
      59                 : 
      60                 : /*
      61                 :  - matcher - the actual matching engine
      62                 :  == static int matcher(register struct re_guts *g, char *string, \
      63                 :  ==     size_t nmatch, regmatch_t pmatch[], int eflags);
      64                 :  */
      65                 : static int                      /* 0 success, REG_NOMATCH failure */
      66                 : matcher(g, string, nmatch, pmatch, eflags)
      67                 : register struct re_guts *g;
      68                 : unsigned char *string;
      69                 : size_t nmatch;
      70                 : regmatch_t pmatch[];
      71                 : int eflags;
      72               0 : {
      73                 :         register unsigned char *endp;
      74                 :         register size_t i;
      75                 :         struct match mv;
      76               0 :         register struct match *m = &mv;
      77                 :         register unsigned char *dp;
      78               0 :         const register sopno gf = g->firststate+1;   /* +1 for OEND */
      79               0 :         const register sopno gl = g->laststate;
      80                 :         unsigned char *start;
      81                 :         unsigned char *stop;
      82                 : 
      83                 :         /* simplify the situation where possible */
      84               0 :         if (g->cflags&REG_NOSUB)
      85               0 :                 nmatch = 0;
      86               0 :         if (eflags&REG_STARTEND) {
      87               0 :                 start = string + pmatch[0].rm_so;
      88               0 :                 stop = string + pmatch[0].rm_eo;
      89                 :         } else {
      90               0 :                 start = string;
      91               0 :                 stop = start + strlen(start);
      92                 :         }
      93               0 :         if (stop < start)
      94               0 :                 return(REG_INVARG);
      95                 : 
      96                 :         /* prescreening; this does wonders for this rather slow code */
      97               0 :         if (g->must != NULL) {
      98               0 :                 for (dp = start; dp < stop; dp++)
      99               0 :                         if (*dp == g->must[0] && stop - dp >= g->mlen &&
     100                 :                                 memcmp(dp, g->must, (size_t)g->mlen) == 0)
     101               0 :                                 break;
     102               0 :                 if (dp == stop)         /* we didn't find g->must */
     103               0 :                         return(REG_NOMATCH);
     104                 :         }
     105                 : 
     106                 :         /* match struct setup */
     107               0 :         m->g = g;
     108               0 :         m->eflags = eflags;
     109               0 :         m->pmatch = NULL;
     110               0 :         m->lastpos = NULL;
     111               0 :         m->offp = string;
     112               0 :         m->beginp = start;
     113               0 :         m->endp = stop;
     114               0 :         STATESETUP(m, 4);
     115               0 :         SETUP(m->st);
     116               0 :         SETUP(m->fresh);
     117               0 :         SETUP(m->tmp);
     118               0 :         SETUP(m->empty);
     119               0 :         CLEAR(m->empty);
     120                 : 
     121                 :         /* this loop does only one repetition except for backrefs */
     122                 :         for (;;) {
     123               0 :                 endp = fast(m, start, stop, gf, gl);
     124               0 :                 if (endp == NULL) {             /* a miss */
     125               0 :                         STATETEARDOWN(m);
     126               0 :                         return(REG_NOMATCH);
     127                 :                 }
     128               0 :                 if (nmatch == 0 && !g->backrefs)
     129               0 :                         break;          /* no further info needed */
     130                 : 
     131                 :                 /* where? */
     132                 :                 assert(m->coldp != NULL);
     133                 :                 for (;;) {
     134                 :                         NOTE("finding start");
     135               0 :                         endp = slow(m, m->coldp, stop, gf, gl);
     136               0 :                         if (endp != NULL)
     137               0 :                                 break;
     138                 :                         assert(m->coldp < m->endp);
     139               0 :                         m->coldp++;
     140               0 :                 }
     141               0 :                 if (nmatch == 1 && !g->backrefs)
     142               0 :                         break;          /* no further info needed */
     143                 : 
     144                 :                 /* oh my, he wants the subexpressions... */
     145               0 :                 if (m->pmatch == NULL)
     146               0 :                         m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
     147                 :                                                         sizeof(regmatch_t));
     148               0 :                 if (m->pmatch == NULL) {
     149               0 :                         STATETEARDOWN(m);
     150               0 :                         return(REG_ESPACE);
     151                 :                 }
     152               0 :                 for (i = 1; i <= m->g->nsub; i++)
     153               0 :                         m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
     154               0 :                 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
     155                 :                         NOTE("dissecting");
     156               0 :                         dp = dissect(m, m->coldp, endp, gf, gl);
     157                 :                 } else {
     158               0 :                         if (g->nplus > 0 && m->lastpos == NULL)
     159               0 :                                 m->lastpos = (unsigned char **)malloc((g->nplus+1) *
     160                 :                                                         sizeof(unsigned char *));
     161               0 :                         if (g->nplus > 0 && m->lastpos == NULL) {
     162               0 :                                 free((char *)m->pmatch);
     163               0 :                                 STATETEARDOWN(m);
     164               0 :                                 return(REG_ESPACE);
     165                 :                         }
     166                 :                         NOTE("backref dissect");
     167               0 :                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
     168                 :                 }
     169               0 :                 if (dp != NULL)
     170               0 :                         break;
     171                 : 
     172                 :                 /* uh-oh... we couldn't find a subexpression-level match */
     173                 :                 assert(g->backrefs); /* must be back references doing it */
     174                 :                 assert(g->nplus == 0 || m->lastpos != NULL);
     175                 :                 for (;;) {
     176               0 :                         if (dp != NULL || endp <= m->coldp)
     177                 :                                 break;          /* defeat */
     178                 :                         NOTE("backoff");
     179               0 :                         endp = slow(m, m->coldp, endp-1, gf, gl);
     180               0 :                         if (endp == NULL)
     181               0 :                                 break;          /* defeat */
     182                 :                         /* try it on a shorter possibility */
     183                 : #ifndef NDEBUG
     184                 :                         for (i = 1; i <= m->g->nsub; i++) {
     185                 :                                 assert(m->pmatch[i].rm_so == -1);
     186                 :                                 assert(m->pmatch[i].rm_eo == -1);
     187                 :                         }
     188                 : #endif
     189                 :                         NOTE("backoff dissect");
     190               0 :                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
     191               0 :                 }
     192                 :                 assert(dp == NULL || dp == endp);
     193               0 :                 if (dp != NULL)         /* found a shorter one */
     194               0 :                         break;
     195                 : 
     196                 :                 /* despite initial appearances, there is no match here */
     197                 :                 NOTE("false alarm");
     198               0 :                 start = m->coldp + 1;        /* recycle starting later */
     199                 :                 assert(start <= stop);
     200               0 :         }
     201                 : 
     202                 :         /* fill in the details if requested */
     203               0 :         if (nmatch > 0) {
     204               0 :                 pmatch[0].rm_so = m->coldp - m->offp;
     205               0 :                 pmatch[0].rm_eo = endp - m->offp;
     206                 :         }
     207               0 :         if (nmatch > 1) {
     208                 :                 assert(m->pmatch != NULL);
     209               0 :                 for (i = 1; i < nmatch; i++)
     210               0 :                         if (i <= m->g->nsub)
     211               0 :                                 pmatch[i] = m->pmatch[i];
     212                 :                         else {
     213               0 :                                 pmatch[i].rm_so = -1;
     214               0 :                                 pmatch[i].rm_eo = -1;
     215                 :                         }
     216                 :         }
     217                 : 
     218               0 :         if (m->pmatch != NULL)
     219               0 :                 free((char *)m->pmatch);
     220               0 :         if (m->lastpos != NULL)
     221               0 :                 free((char *)m->lastpos);
     222               0 :         STATETEARDOWN(m);
     223               0 :         return(0);
     224                 : }
     225                 : 
     226                 : /*
     227                 :  - dissect - figure out what matched what, no back references
     228                 :  == static unsigned char *dissect(register struct match *m, unsigned char *start, \
     229                 :  ==     unsigned char *stop, sopno startst, sopno stopst);
     230                 :  */
     231                 : static unsigned char *                  /* == stop (success) always */
     232                 : dissect(m, start, stop, startst, stopst)
     233                 : register struct match *m;
     234                 : unsigned char *start;
     235                 : unsigned char *stop;
     236                 : sopno startst;
     237                 : sopno stopst;
     238               0 : {
     239                 :         register int i;
     240                 :         register sopno ss;      /* start sop of current subRE */
     241                 :         register sopno es;      /* end sop of current subRE */
     242                 :         register unsigned char *sp;     /* start of string matched by it */
     243                 :         register unsigned char *stp;    /* string matched by it cannot pass here */
     244                 :         register unsigned char *rest;   /* start of rest of string */
     245                 :         register unsigned char *tail;   /* string unmatched by rest of RE */
     246                 :         register sopno ssub;    /* start sop of subsubRE */
     247                 :         register sopno esub;    /* end sop of subsubRE */
     248                 :         register unsigned char *ssp;    /* start of string matched by subsubRE */
     249                 :         register unsigned char *sep;    /* end of string matched by subsubRE */
     250                 :         register unsigned char *oldssp; /* previous ssp */
     251                 :         register unsigned char *dp;
     252                 : 
     253                 :         AT("diss", start, stop, startst, stopst);
     254               0 :         sp = start;
     255               0 :         for (ss = startst; ss < stopst; ss = es) {
     256                 :                 /* identify end of subRE */
     257               0 :                 es = ss;
     258               0 :                 switch (OP(m->g->strip[es])) {
     259                 :                 case OPLUS_:
     260                 :                 case OQUEST_:
     261               0 :                         es += OPND(m->g->strip[es]);
     262               0 :                         break;
     263                 :                 case OCH_:
     264               0 :                         while (OP(m->g->strip[es]) != O_CH)
     265               0 :                                 es += OPND(m->g->strip[es]);
     266                 :                         break;
     267                 :                 }
     268               0 :                 es++;
     269                 : 
     270                 :                 /* figure out what it matched */
     271               0 :                 switch (OP(m->g->strip[ss])) {
     272                 :                 case OEND:
     273                 :                         assert(PHP_REGEX_NOPE);
     274               0 :                         break;
     275                 :                 case OCHAR:
     276               0 :                         sp++;
     277               0 :                         break;
     278                 :                 case OBOL:
     279                 :                 case OEOL:
     280                 :                 case OBOW:
     281                 :                 case OEOW:
     282               0 :                         break;
     283                 :                 case OANY:
     284                 :                 case OANYOF:
     285               0 :                         sp++;
     286               0 :                         break;
     287                 :                 case OBACK_:
     288                 :                 case O_BACK:
     289                 :                         assert(PHP_REGEX_NOPE);
     290               0 :                         break;
     291                 :                 /* cases where length of match is hard to find */
     292                 :                 case OQUEST_:
     293               0 :                         stp = stop;
     294                 :                         for (;;) {
     295                 :                                 /* how long could this one be? */
     296               0 :                                 rest = slow(m, sp, stp, ss, es);
     297                 :                                 assert(rest != NULL);   /* it did match */
     298                 :                                 /* could the rest match the rest? */
     299               0 :                                 tail = slow(m, rest, stop, es, stopst);
     300               0 :                                 if (tail == stop)
     301               0 :                                         break;          /* yes! */
     302                 :                                 /* no -- try a shorter match for this one */
     303               0 :                                 stp = rest - 1;
     304                 :                                 assert(stp >= sp);   /* it did work */
     305               0 :                         }
     306               0 :                         ssub = ss + 1;
     307               0 :                         esub = es - 1;
     308                 :                         /* did innards match? */
     309               0 :                         if (slow(m, sp, rest, ssub, esub) != NULL) {
     310               0 :                                 dp = dissect(m, sp, rest, ssub, esub);
     311                 :                                 assert(dp == rest);
     312                 :                         } else          /* no */
     313                 :                                 assert(sp == rest);
     314               0 :                         sp = rest;
     315               0 :                         break;
     316                 :                 case OPLUS_:
     317               0 :                         stp = stop;
     318                 :                         for (;;) {
     319                 :                                 /* how long could this one be? */
     320               0 :                                 rest = slow(m, sp, stp, ss, es);
     321                 :                                 assert(rest != NULL);   /* it did match */
     322                 :                                 /* could the rest match the rest? */
     323               0 :                                 tail = slow(m, rest, stop, es, stopst);
     324               0 :                                 if (tail == stop)
     325               0 :                                         break;          /* yes! */
     326                 :                                 /* no -- try a shorter match for this one */
     327               0 :                                 stp = rest - 1;
     328                 :                                 assert(stp >= sp);   /* it did work */
     329               0 :                         }
     330               0 :                         ssub = ss + 1;
     331               0 :                         esub = es - 1;
     332               0 :                         ssp = sp;
     333               0 :                         oldssp = ssp;
     334                 :                         for (;;) {      /* find last match of innards */
     335               0 :                                 sep = slow(m, ssp, rest, ssub, esub);
     336               0 :                                 if (sep == NULL || sep == ssp)
     337                 :                                         break;  /* failed or matched null */
     338               0 :                                 oldssp = ssp;   /* on to next try */
     339               0 :                                 ssp = sep;
     340               0 :                         }
     341               0 :                         if (sep == NULL) {
     342                 :                                 /* last successful match */
     343               0 :                                 sep = ssp;
     344               0 :                                 ssp = oldssp;
     345                 :                         }
     346                 :                         assert(sep == rest);    /* must exhaust substring */
     347                 :                         assert(slow(m, ssp, sep, ssub, esub) == rest);
     348               0 :                         dp = dissect(m, ssp, sep, ssub, esub);
     349                 :                         assert(dp == sep);
     350               0 :                         sp = rest;
     351               0 :                         break;
     352                 :                 case OCH_:
     353               0 :                         stp = stop;
     354                 :                         for (;;) {
     355                 :                                 /* how long could this one be? */
     356               0 :                                 rest = slow(m, sp, stp, ss, es);
     357                 :                                 assert(rest != NULL);   /* it did match */
     358                 :                                 /* could the rest match the rest? */
     359               0 :                                 tail = slow(m, rest, stop, es, stopst);
     360               0 :                                 if (tail == stop)
     361               0 :                                         break;          /* yes! */
     362                 :                                 /* no -- try a shorter match for this one */
     363               0 :                                 stp = rest - 1;
     364                 :                                 assert(stp >= sp);   /* it did work */
     365               0 :                         }
     366               0 :                         ssub = ss + 1;
     367               0 :                         esub = ss + OPND(m->g->strip[ss]) - 1;
     368                 :                         assert(OP(m->g->strip[esub]) == OOR1);
     369                 :                         for (;;) {      /* find first matching branch */
     370               0 :                                 if (slow(m, sp, rest, ssub, esub) == rest)
     371               0 :                                         break;  /* it matched all of it */
     372                 :                                 /* that one missed, try next one */
     373                 :                                 assert(OP(m->g->strip[esub]) == OOR1);
     374               0 :                                 esub++;
     375                 :                                 assert(OP(m->g->strip[esub]) == OOR2);
     376               0 :                                 ssub = esub + 1;
     377               0 :                                 esub += OPND(m->g->strip[esub]);
     378               0 :                                 if (OP(m->g->strip[esub]) == OOR2)
     379               0 :                                         esub--;
     380                 :                                 else
     381                 :                                         assert(OP(m->g->strip[esub]) == O_CH);
     382               0 :                         }
     383               0 :                         dp = dissect(m, sp, rest, ssub, esub);
     384                 :                         assert(dp == rest);
     385               0 :                         sp = rest;
     386               0 :                         break;
     387                 :                 case O_PLUS:
     388                 :                 case O_QUEST:
     389                 :                 case OOR1:
     390                 :                 case OOR2:
     391                 :                 case O_CH:
     392                 :                         assert(PHP_REGEX_NOPE);
     393               0 :                         break;
     394                 :                 case OLPAREN:
     395               0 :                         i = OPND(m->g->strip[ss]);
     396                 :                         assert(0 < i && i <= m->g->nsub);
     397               0 :                         m->pmatch[i].rm_so = sp - m->offp;
     398               0 :                         break;
     399                 :                 case ORPAREN:
     400               0 :                         i = OPND(m->g->strip[ss]);
     401                 :                         assert(0 < i && i <= m->g->nsub);
     402               0 :                         m->pmatch[i].rm_eo = sp - m->offp;
     403                 :                         break;
     404                 :                 default:                /* uh oh */
     405                 :                         assert(PHP_REGEX_NOPE);
     406                 :                         break;
     407                 :                 }
     408                 :         }
     409                 : 
     410                 :         assert(sp == stop);
     411               0 :         return(sp);
     412                 : }
     413                 : 
     414                 : /*
     415                 :  - backref - figure out what matched what, figuring in back references
     416                 :  == static unsigned char *backref(register struct match *m, unsigned char *start, \
     417                 :  ==     unsigned char *stop, sopno startst, sopno stopst, sopno lev);
     418                 :  */
     419                 : static unsigned char *                  /* == stop (success) or NULL (failure) */
     420                 : backref(m, start, stop, startst, stopst, lev)
     421                 : register struct match *m;
     422                 : unsigned char *start;
     423                 : unsigned char *stop;
     424                 : sopno startst;
     425                 : sopno stopst;
     426                 : sopno lev;                      /* PLUS nesting level */
     427               0 : {
     428                 :         register int i;
     429                 :         register sopno ss;      /* start sop of current subRE */
     430                 :         register unsigned char *sp;     /* start of string matched by it */
     431                 :         register sopno ssub;    /* start sop of subsubRE */
     432                 :         register sopno esub;    /* end sop of subsubRE */
     433                 :         register unsigned char *ssp;    /* start of string matched by subsubRE */
     434                 :         register unsigned char *dp;
     435                 :         register size_t len;
     436                 :         register int hard;
     437                 :         register sop s;
     438                 :         register regoff_t offsave;
     439                 :         register cset *cs;
     440                 : 
     441                 :         AT("back", start, stop, startst, stopst);
     442               0 :         sp = start;
     443                 : 
     444                 :         /* get as far as we can with easy stuff */
     445               0 :         hard = 0;
     446               0 :         for (ss = startst; !hard && ss < stopst; ss++)
     447               0 :                 switch (OP(s = m->g->strip[ss])) {
     448                 :                 case OCHAR:
     449               0 :                         if (sp == stop || *sp++ != (unsigned char)OPND(s))
     450               0 :                                 return(NULL);
     451               0 :                         break;
     452                 :                 case OANY:
     453               0 :                         if (sp == stop)
     454               0 :                                 return(NULL);
     455               0 :                         sp++;
     456               0 :                         break;
     457                 :                 case OANYOF:
     458               0 :                         cs = &m->g->sets[OPND(s)];
     459               0 :                         if (sp == stop || !CHIN(cs, *sp++))
     460               0 :                                 return(NULL);
     461               0 :                         break;
     462                 :                 case OBOL:
     463               0 :                         if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
     464                 :                                         (sp < m->endp && *(sp-1) == '\n' &&
     465                 :                                                 (m->g->cflags&REG_NEWLINE)) )
     466                 :                                 { /* yes */ }
     467                 :                         else
     468               0 :                                 return(NULL);
     469               0 :                         break;
     470                 :                 case OEOL:
     471               0 :                         if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
     472                 :                                         (sp < m->endp && *sp == '\n' &&
     473                 :                                                 (m->g->cflags&REG_NEWLINE)) )
     474                 :                                 { /* yes */ }
     475                 :                         else
     476               0 :                                 return(NULL);
     477               0 :                         break;
     478                 :                 case OBOW:
     479               0 :                         if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
     480                 :                                         (sp < m->endp && *(sp-1) == '\n' &&
     481                 :                                                 (m->g->cflags&REG_NEWLINE)) ||
     482                 :                                         (sp > m->beginp &&
     483                 :                                                         !ISWORD(*(sp-1))) ) &&
     484                 :                                         (sp < m->endp && ISWORD(*sp)) )
     485                 :                                 { /* yes */ }
     486                 :                         else
     487               0 :                                 return(NULL);
     488               0 :                         break;
     489                 :                 case OEOW:
     490               0 :                         if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
     491                 :                                         (sp < m->endp && *sp == '\n' &&
     492                 :                                                 (m->g->cflags&REG_NEWLINE)) ||
     493                 :                                         (sp < m->endp && !ISWORD(*sp)) ) &&
     494                 :                                         (sp > m->beginp && ISWORD(*(sp-1))) )
     495                 :                                 { /* yes */ }
     496                 :                         else
     497               0 :                                 return(NULL);
     498               0 :                         break;
     499                 :                 case O_QUEST:
     500               0 :                         break;
     501                 :                 case OOR1:      /* matches null but needs to skip */
     502               0 :                         ss++;
     503               0 :                         s = m->g->strip[ss];
     504                 :                         do {
     505                 :                                 assert(OP(s) == OOR2);
     506               0 :                                 ss += OPND(s);
     507               0 :                         } while (OP(s = m->g->strip[ss]) != O_CH);
     508                 :                         /* note that the ss++ gets us past the O_CH */
     509               0 :                         break;
     510                 :                 default:        /* have to make a choice */
     511               0 :                         hard = 1;
     512                 :                         break;
     513                 :                 }
     514               0 :         if (!hard) {            /* that was it! */
     515               0 :                 if (sp != stop)
     516               0 :                         return(NULL);
     517               0 :                 return(sp);
     518                 :         }
     519               0 :         ss--;                   /* adjust for the for's final increment */
     520                 : 
     521                 :         /* the hard stuff */
     522                 :         AT("hard", sp, stop, ss, stopst);
     523               0 :         s = m->g->strip[ss];
     524               0 :         switch (OP(s)) {
     525                 :         case OBACK_:            /* the vilest depths */
     526               0 :                 i = OPND(s);
     527                 :                 assert(0 < i && i <= m->g->nsub);
     528               0 :                 if (m->pmatch[i].rm_eo == -1)
     529               0 :                         return(NULL);
     530                 :                 assert(m->pmatch[i].rm_so != -1);
     531               0 :                 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
     532                 :                 assert(stop - m->beginp >= len);
     533               0 :                 if (sp > stop - len)
     534               0 :                         return(NULL);   /* not enough left to match */
     535               0 :                 ssp = m->offp + m->pmatch[i].rm_so;
     536               0 :                 if (memcmp(sp, ssp, len) != 0)
     537               0 :                         return(NULL);
     538               0 :                 while (m->g->strip[ss] != SOP(O_BACK, i))
     539               0 :                         ss++;
     540               0 :                 return(backref(m, sp+len, stop, ss+1, stopst, lev));
     541                 :                 break;
     542                 :         case OQUEST_:           /* to null or not */
     543               0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     544               0 :                 if (dp != NULL)
     545               0 :                         return(dp);     /* not */
     546               0 :                 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
     547                 :                 break;
     548                 :         case OPLUS_:
     549                 :                 assert(m->lastpos != NULL);
     550                 :                 assert(lev+1 <= m->g->nplus);
     551               0 :                 m->lastpos[lev+1] = sp;
     552               0 :                 return(backref(m, sp, stop, ss+1, stopst, lev+1));
     553                 :                 break;
     554                 :         case O_PLUS:
     555               0 :                 if (sp == m->lastpos[lev])   /* last pass matched null */
     556               0 :                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
     557                 :                 /* try another pass */
     558               0 :                 m->lastpos[lev] = sp;
     559               0 :                 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
     560               0 :                 if (dp == NULL)
     561               0 :                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
     562                 :                 else
     563               0 :                         return(dp);
     564                 :                 break;
     565                 :         case OCH_:              /* find the right one, if any */
     566               0 :                 ssub = ss + 1;
     567               0 :                 esub = ss + OPND(s) - 1;
     568                 :                 assert(OP(m->g->strip[esub]) == OOR1);
     569                 :                 for (;;) {      /* find first matching branch */
     570               0 :                         dp = backref(m, sp, stop, ssub, esub, lev);
     571               0 :                         if (dp != NULL)
     572               0 :                                 return(dp);
     573                 :                         /* that one missed, try next one */
     574               0 :                         if (OP(m->g->strip[esub]) == O_CH)
     575               0 :                                 return(NULL);   /* there is none */
     576               0 :                         esub++;
     577                 :                         assert(OP(m->g->strip[esub]) == OOR2);
     578               0 :                         ssub = esub + 1;
     579               0 :                         esub += OPND(m->g->strip[esub]);
     580               0 :                         if (OP(m->g->strip[esub]) == OOR2)
     581               0 :                                 esub--;
     582                 :                         else
     583                 :                                 assert(OP(m->g->strip[esub]) == O_CH);
     584               0 :                 }
     585                 :                 break;
     586                 :         case OLPAREN:           /* must undo assignment if rest fails */
     587               0 :                 i = OPND(s);
     588                 :                 assert(0 < i && i <= m->g->nsub);
     589               0 :                 offsave = m->pmatch[i].rm_so;
     590               0 :                 m->pmatch[i].rm_so = sp - m->offp;
     591               0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     592               0 :                 if (dp != NULL)
     593               0 :                         return(dp);
     594               0 :                 m->pmatch[i].rm_so = offsave;
     595               0 :                 return(NULL);
     596                 :                 break;
     597                 :         case ORPAREN:           /* must undo assignment if rest fails */
     598               0 :                 i = OPND(s);
     599                 :                 assert(0 < i && i <= m->g->nsub);
     600               0 :                 offsave = m->pmatch[i].rm_eo;
     601               0 :                 m->pmatch[i].rm_eo = sp - m->offp;
     602               0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     603               0 :                 if (dp != NULL)
     604               0 :                         return(dp);
     605               0 :                 m->pmatch[i].rm_eo = offsave;
     606               0 :                 return(NULL);
     607                 :                 break;
     608                 :         default:                /* uh oh */
     609                 :                 assert(PHP_REGEX_NOPE);
     610                 :                 break;
     611                 :         }
     612                 : 
     613                 :         /* "can't happen" */
     614                 :         assert(PHP_REGEX_NOPE);
     615                 :         /* NOTREACHED */
     616               0 :         return((unsigned char *)NULL);  /* dummy */
     617                 : }
     618                 : 
     619                 : /*
     620                 :  - fast - step through the string at top speed
     621                 :  == static unsigned char *fast(register struct match *m, unsigned char *start, \
     622                 :  ==     unsigned char *stop, sopno startst, sopno stopst);
     623                 :  */
     624                 : static unsigned char *                  /* where tentative match ended, or NULL */
     625                 : fast(m, start, stop, startst, stopst)
     626                 : register struct match *m;
     627                 : unsigned char *start;
     628                 : unsigned char *stop;
     629                 : sopno startst;
     630                 : sopno stopst;
     631               0 : {
     632               0 :         register states st = m->st;
     633               0 :         register states fresh = m->fresh;
     634               0 :         register states tmp = m->tmp;
     635               0 :         register unsigned char *p = start;
     636               0 :         register int c = (start == m->beginp) ? OUT : *(start-1);
     637                 :         register int lastc;     /* previous c */
     638                 :         register int flagch;
     639                 :         register int i;
     640                 :         register unsigned char *coldp;  /* last p after which no match was underway */
     641                 : 
     642               0 :         CLEAR(st);
     643               0 :         SET1(st, startst);
     644               0 :         st = step(m->g, startst, stopst, st, NOTHING, st);
     645               0 :         ASSIGN(fresh, st);
     646                 :         SP("start", st, *p);
     647               0 :         coldp = NULL;
     648                 :         for (;;) {
     649                 :                 /* next character */
     650               0 :                 lastc = c;
     651               0 :                 c = (p == m->endp) ? OUT : *p;
     652               0 :                 if (EQ(st, fresh))
     653               0 :                         coldp = p;
     654                 : 
     655                 :                 /* is there an EOL and/or BOL between lastc and c? */
     656               0 :                 flagch = '\0';
     657               0 :                 i = 0;
     658               0 :                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
     659                 :                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
     660               0 :                         flagch = BOL;
     661               0 :                         i = m->g->nbol;
     662                 :                 }
     663               0 :                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
     664                 :                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
     665               0 :                         flagch = (flagch == BOL) ? BOLEOL : EOL;
     666               0 :                         i += m->g->neol;
     667                 :                 }
     668               0 :                 if (i != 0) {
     669               0 :                         for (; i > 0; i--)
     670               0 :                                 st = step(m->g, startst, stopst, st, flagch, st);
     671                 :                         SP("boleol", st, c);
     672                 :                 }
     673                 : 
     674                 :                 /* how about a word boundary? */
     675               0 :                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
     676                 :                                         (c != OUT && ISWORD(c)) ) {
     677               0 :                         flagch = BOW;
     678                 :                 }
     679               0 :                 if ( (lastc != OUT && ISWORD(lastc)) &&
     680                 :                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
     681               0 :                         flagch = EOW;
     682                 :                 }
     683               0 :                 if (flagch == BOW || flagch == EOW) {
     684               0 :                         st = step(m->g, startst, stopst, st, flagch, st);
     685                 :                         SP("boweow", st, c);
     686                 :                 }
     687                 : 
     688                 :                 /* are we done? */
     689               0 :                 if (ISSET(st, stopst) || p == stop)
     690                 :                         break;          /* NOTE BREAK OUT */
     691                 : 
     692                 :                 /* no, we must deal with this character */
     693               0 :                 ASSIGN(tmp, st);
     694               0 :                 ASSIGN(st, fresh);
     695                 :                 assert(c != OUT);
     696               0 :                 st = step(m->g, startst, stopst, tmp, c, st);
     697                 :                 SP("aft", st, c);
     698                 :                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
     699               0 :                 p++;
     700               0 :         }
     701                 : 
     702                 :         assert(coldp != NULL);
     703               0 :         m->coldp = coldp;
     704               0 :         if (ISSET(st, stopst))
     705               0 :                 return(p+1);
     706                 :         else
     707               0 :                 return(NULL);
     708                 : }
     709                 : 
     710                 : /*
     711                 :  - slow - step through the string more deliberately
     712                 :  == static unsigned char *slow(register struct match *m, unsigned char *start, \
     713                 :  ==     unsigned char *stop, sopno startst, sopno stopst);
     714                 :  */
     715                 : static unsigned char *                  /* where it ended */
     716                 : slow(m, start, stop, startst, stopst)
     717                 : register struct match *m;
     718                 : unsigned char *start;
     719                 : unsigned char *stop;
     720                 : sopno startst;
     721                 : sopno stopst;
     722               0 : {
     723               0 :         register states st = m->st;
     724               0 :         register states empty = m->empty;
     725               0 :         register states tmp = m->tmp;
     726               0 :         register unsigned char *p = start;
     727               0 :         register int c = (start == m->beginp) ? OUT : *(start-1);
     728                 :         register int lastc;     /* previous c */
     729                 :         register int flagch;
     730                 :         register int i;
     731                 :         register unsigned char *matchp; /* last p at which a match ended */
     732                 : 
     733                 :         AT("slow", start, stop, startst, stopst);
     734               0 :         CLEAR(st);
     735               0 :         SET1(st, startst);
     736                 :         SP("sstart", st, *p);
     737               0 :         st = step(m->g, startst, stopst, st, NOTHING, st);
     738               0 :         matchp = NULL;
     739                 :         for (;;) {
     740                 :                 /* next character */
     741               0 :                 lastc = c;
     742               0 :                 c = (p == m->endp) ? OUT : *p;
     743                 : 
     744                 :                 /* is there an EOL and/or BOL between lastc and c? */
     745               0 :                 flagch = '\0';
     746               0 :                 i = 0;
     747               0 :                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
     748                 :                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
     749               0 :                         flagch = BOL;
     750               0 :                         i = m->g->nbol;
     751                 :                 }
     752               0 :                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
     753                 :                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
     754               0 :                         flagch = (flagch == BOL) ? BOLEOL : EOL;
     755               0 :                         i += m->g->neol;
     756                 :                 }
     757               0 :                 if (i != 0) {
     758               0 :                         for (; i > 0; i--)
     759               0 :                                 st = step(m->g, startst, stopst, st, flagch, st);
     760                 :                         SP("sboleol", st, c);
     761                 :                 }
     762                 : 
     763                 :                 /* how about a word boundary? */
     764               0 :                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
     765                 :                                         (c != OUT && ISWORD(c)) ) {
     766               0 :                         flagch = BOW;
     767                 :                 }
     768               0 :                 if ( (lastc != OUT && ISWORD(lastc)) &&
     769                 :                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
     770               0 :                         flagch = EOW;
     771                 :                 }
     772               0 :                 if (flagch == BOW || flagch == EOW) {
     773               0 :                         st = step(m->g, startst, stopst, st, flagch, st);
     774                 :                         SP("sboweow", st, c);
     775                 :                 }
     776                 : 
     777                 :                 /* are we done? */
     778               0 :                 if (ISSET(st, stopst))
     779               0 :                         matchp = p;
     780               0 :                 if (EQ(st, empty) || p == stop)
     781                 :                         break;          /* NOTE BREAK OUT */
     782                 : 
     783                 :                 /* no, we must deal with this character */
     784               0 :                 ASSIGN(tmp, st);
     785               0 :                 ASSIGN(st, empty);
     786                 :                 assert(c != OUT);
     787               0 :                 st = step(m->g, startst, stopst, tmp, c, st);
     788                 :                 SP("saft", st, c);
     789                 :                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
     790               0 :                 p++;
     791               0 :         }
     792                 : 
     793               0 :         return(matchp);
     794                 : }
     795                 : 
     796                 : 
     797                 : /*
     798                 :  - step - map set of states reachable before char to set reachable after
     799                 :  == static states step(register struct re_guts *g, sopno start, sopno stop, \
     800                 :  ==     register states bef, int ch, register states aft);
     801                 :  == #define     BOL     (OUT+1)
     802                 :  == #define     EOL     (BOL+1)
     803                 :  == #define     BOLEOL  (BOL+2)
     804                 :  == #define     NOTHING (BOL+3)
     805                 :  == #define     BOW     (BOL+4)
     806                 :  == #define     EOW     (BOL+5)
     807                 :  == #define     CODEMAX (BOL+5)         // highest code used
     808                 :  == #define     NONCHAR(c)      ((c) > UCHAR_MAX)
     809                 :  == #define     NNONCHAR        (CODEMAX-UCHAR_MAX)
     810                 :  */
     811                 : static states
     812                 : step(g, start, stop, bef, ch, aft)
     813                 : register struct re_guts *g;
     814                 : sopno start;                    /* start state within strip */
     815                 : sopno stop;                     /* state after stop state within strip */
     816                 : register states bef;            /* states reachable before */
     817                 : int ch;                         /* character or NONCHAR code */
     818                 : register states aft;            /* states already known reachable after */
     819               0 : {
     820                 :         register cset *cs;
     821                 :         register sop s;
     822                 :         register sopno pc;
     823                 :         register onestate here;         /* note, macros know this name */
     824                 :         register sopno look;
     825                 :         register long i;
     826                 : 
     827               0 :         for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
     828               0 :                 s = g->strip[pc];
     829               0 :                 switch (OP(s)) {
     830                 :                 case OEND:
     831                 :                         assert(pc == stop-1);
     832               0 :                         break;
     833                 :                 case OCHAR:
     834                 :                         /* only characters can match */
     835                 :                         assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s));
     836               0 :                         if (ch == (unsigned char)OPND(s))
     837               0 :                                 FWD(aft, bef, 1);
     838               0 :                         break;
     839                 :                 case OBOL:
     840               0 :                         if (ch == BOL || ch == BOLEOL)
     841               0 :                                 FWD(aft, bef, 1);
     842               0 :                         break;
     843                 :                 case OEOL:
     844               0 :                         if (ch == EOL || ch == BOLEOL)
     845               0 :                                 FWD(aft, bef, 1);
     846               0 :                         break;
     847                 :                 case OBOW:
     848               0 :                         if (ch == BOW)
     849               0 :                                 FWD(aft, bef, 1);
     850               0 :                         break;
     851                 :                 case OEOW:
     852               0 :                         if (ch == EOW)
     853               0 :                                 FWD(aft, bef, 1);
     854               0 :                         break;
     855                 :                 case OANY:
     856               0 :                         if (!NONCHAR(ch))
     857               0 :                                 FWD(aft, bef, 1);
     858               0 :                         break;
     859                 :                 case OANYOF:
     860               0 :                         cs = &g->sets[OPND(s)];
     861               0 :                         if (!NONCHAR(ch) && CHIN(cs, ch))
     862               0 :                                 FWD(aft, bef, 1);
     863               0 :                         break;
     864                 :                 case OBACK_:            /* ignored here */
     865                 :                 case O_BACK:
     866               0 :                         FWD(aft, aft, 1);
     867               0 :                         break;
     868                 :                 case OPLUS_:            /* forward, this is just an empty */
     869               0 :                         FWD(aft, aft, 1);
     870               0 :                         break;
     871                 :                 case O_PLUS:            /* both forward and back */
     872               0 :                         FWD(aft, aft, 1);
     873               0 :                         i = ISSETBACK(aft, OPND(s));
     874               0 :                         BACK(aft, aft, OPND(s));
     875               0 :                         if (!i && ISSETBACK(aft, OPND(s))) {
     876                 :                                 /* oho, must reconsider loop body */
     877               0 :                                 pc -= OPND(s) + 1;
     878               0 :                                 INIT(here, pc);
     879                 :                         }
     880               0 :                         break;
     881                 :                 case OQUEST_:           /* two branches, both forward */
     882               0 :                         FWD(aft, aft, 1);
     883               0 :                         FWD(aft, aft, OPND(s));
     884               0 :                         break;
     885                 :                 case O_QUEST:           /* just an empty */
     886               0 :                         FWD(aft, aft, 1);
     887               0 :                         break;
     888                 :                 case OLPAREN:           /* not significant here */
     889                 :                 case ORPAREN:
     890               0 :                         FWD(aft, aft, 1);
     891               0 :                         break;
     892                 :                 case OCH_:              /* mark the first two branches */
     893               0 :                         FWD(aft, aft, 1);
     894                 :                         assert(OP(g->strip[pc+OPND(s)]) == OOR2);
     895               0 :                         FWD(aft, aft, OPND(s));
     896               0 :                         break;
     897                 :                 case OOR1:              /* done a branch, find the O_CH */
     898               0 :                         if (ISSTATEIN(aft, here)) {
     899               0 :                                 for (look = 1;
     900               0 :                                                 OP(s = g->strip[pc+look]) != O_CH;
     901               0 :                                                 look += OPND(s))
     902                 :                                         assert(OP(s) == OOR2);
     903               0 :                                 FWD(aft, aft, look);
     904                 :                         }
     905               0 :                         break;
     906                 :                 case OOR2:              /* propagate OCH_'s marking */
     907               0 :                         FWD(aft, aft, 1);
     908               0 :                         if (OP(g->strip[pc+OPND(s)]) != O_CH) {
     909                 :                                 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
     910               0 :                                 FWD(aft, aft, OPND(s));
     911                 :                         }
     912               0 :                         break;
     913                 :                 case O_CH:              /* just empty */
     914               0 :                         FWD(aft, aft, 1);
     915                 :                         break;
     916                 :                 default:                /* ooooops... */
     917                 :                         assert(PHP_REGEX_NOPE);
     918                 :                         break;
     919                 :                 }
     920                 :         }
     921                 : 
     922               0 :         return(aft);
     923                 : }
     924                 : 
     925                 : #ifdef REDEBUG
     926                 : /*
     927                 :  - print - print a set of states
     928                 :  == #ifdef REDEBUG
     929                 :  == static void print(struct match *m, unsigned char *caption, states st, \
     930                 :  ==     int ch, FILE *d);
     931                 :  == #endif
     932                 :  */
     933                 : static void
     934                 : print(m, caption, st, ch, d)
     935                 : struct match *m;
     936                 : unsigned char *caption;
     937                 : states st;
     938                 : int ch;
     939                 : FILE *d;
     940                 : {
     941                 :         register struct re_guts *g = m->g;
     942                 :         register int i;
     943                 :         register int first = 1;
     944                 : 
     945                 :         if (!(m->eflags&REG_TRACE))
     946                 :                 return;
     947                 : 
     948                 :         fprintf(d, "%s", caption);
     949                 :         if (ch != '\0')
     950                 :                 fprintf(d, " %s", pchar(ch));
     951                 :         for (i = 0; i < g->nstates; i++)
     952                 :                 if (ISSET(st, i)) {
     953                 :                         fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
     954                 :                         first = 0;
     955                 :                 }
     956                 :         fprintf(d, "\n");
     957                 : }
     958                 : 
     959                 : /* 
     960                 :  - at - print current situation
     961                 :  == #ifdef REDEBUG
     962                 :  == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \
     963                 :  ==                                             sopno startst, sopno stopst);
     964                 :  == #endif
     965                 :  */
     966                 : static void
     967                 : at(m, title, start, stop, startst, stopst)
     968                 : struct match *m;
     969                 : unsigned char *title;
     970                 : unsigned char *start;
     971                 : unsigned char *stop;
     972                 : sopno startst;
     973                 : sopno stopst;
     974                 : {
     975                 :         if (!(m->eflags&REG_TRACE))
     976                 :                 return;
     977                 : 
     978                 :         printf("%s %s-", title, pchar(*start));
     979                 :         printf("%s ", pchar(*stop));
     980                 :         printf("%ld-%ld\n", (long)startst, (long)stopst);
     981                 : }
     982                 : 
     983                 : #ifndef PCHARDONE
     984                 : #define PCHARDONE       /* never again */
     985                 : /*
     986                 :  - pchar - make a character printable
     987                 :  == #ifdef REDEBUG
     988                 :  == static unsigned char *pchar(int ch);
     989                 :  == #endif
     990                 :  *
     991                 :  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
     992                 :  * duplicate here avoids having a debugging-capable regexec.o tied to
     993                 :  * a matching debug.o, and this is convenient.  It all disappears in
     994                 :  * the non-debug compilation anyway, so it doesn't matter much.
     995                 :  */
     996                 : static unsigned char *                  /* -> representation */
     997                 : pchar(ch)
     998                 : int ch;
     999                 : {
    1000                 :         static unsigned char pbuf[10];
    1001                 : 
    1002                 :         if (isprint(ch) || ch == ' ')
    1003                 :                 sprintf(pbuf, "%c", ch);
    1004                 :         else
    1005                 :                 sprintf(pbuf, "\\%o", ch);
    1006                 :         return(pbuf);
    1007                 : }
    1008                 : #endif
    1009                 : #endif
    1010                 : 
    1011                 : #undef  matcher
    1012                 : #undef  fast
    1013                 : #undef  slow
    1014                 : #undef  dissect
    1015                 : #undef  backref
    1016                 : #undef  step
    1017                 : #undef  print
    1018                 : #undef  at
    1019                 : #undef  match

Generated by: LTP GCOV extension version 1.5