LTP GCOV extension - code coverage report
Current view: directory - regex - regcomp.c
Test: PHP Code Coverage
Date: 2007-04-10 Instrumented lines: 676
Code covered: 0.0 % Executed lines: 0
Legend: not executed executed

       1                 : #include <sys/types.h>
       2                 : #include <stdio.h>
       3                 : #include <string.h>
       4                 : #include <ctype.h>
       5                 : #include <limits.h>
       6                 : #include <stdlib.h>
       7                 : 
       8                 : #define POSIX_MISTAKE
       9                 : 
      10                 : #include "utils.h"
      11                 : #include "regex.h"
      12                 : #include "regex2.h"
      13                 : 
      14                 : #include "cclass.h"
      15                 : #include "cname.h"
      16                 : 
      17                 : /*
      18                 :  * parse structure, passed up and down to avoid global variables and
      19                 :  * other clumsinesses
      20                 :  */
      21                 : struct parse {
      22                 :         unsigned char *next;            /* next character in RE */
      23                 :         unsigned char *end;             /* end of string (-> NUL normally) */
      24                 :         int error;              /* has an error been seen? */
      25                 :         sop *strip;             /* malloced strip */
      26                 :         sopno ssize;            /* malloced strip size (allocated) */
      27                 :         sopno slen;             /* malloced strip length (used) */
      28                 :         int ncsalloc;           /* number of csets allocated */
      29                 :         struct re_guts *g;
      30                 : #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
      31                 :         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
      32                 :         sopno pend[NPAREN];     /* -> ) ([0] unused) */
      33                 : };
      34                 : 
      35                 : #include "regcomp.ih"
      36                 : 
      37                 : static unsigned char nuls[10];          /* place to point scanner in event of error */
      38                 : 
      39                 : /*
      40                 :  * macros for use with parse structure
      41                 :  * BEWARE:  these know that the parse structure is named `p' !!!
      42                 :  */
      43                 : #define PEEK()  (*p->next)
      44                 : #define PEEK2() (*(p->next+1))
      45                 : #define MORE()  (p->next < p->end)
      46                 : #define MORE2() (p->next+1 < p->end)
      47                 : #define SEE(c)  (MORE() && PEEK() == (c))
      48                 : #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      49                 : #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
      50                 : #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
      51                 : #define NEXT()  (p->next++)
      52                 : #define NEXT2() (p->next += 2)
      53                 : #define NEXTn(n)        (p->next += (n))
      54                 : #define GETNEXT()       (*p->next++)
      55                 : #define SETERROR(e)     seterr(p, (e))
      56                 : #define REQUIRE(co, e)  (void) ((co) || SETERROR(e))
      57                 : #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
      58                 : #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
      59                 : #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
      60                 : #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
      61                 : #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
      62                 : #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
      63                 : #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
      64                 : #define HERE()          (p->slen)
      65                 : #define THERE()         (p->slen - 1)
      66                 : #define THERETHERE()    (p->slen - 2)
      67                 : #define DROP(n) (p->slen -= (n))
      68                 : 
      69                 : #ifndef NDEBUG
      70                 : static int never = 0;           /* for use in asserts; shuts lint up */
      71                 : #else
      72                 : #define never   0               /* some <assert.h>s have bugs too */
      73                 : #endif
      74                 : 
      75                 : /*
      76                 :  - regcomp - interface for parser and compilation
      77                 :  = API_EXPORT(int) regcomp(regex_t *, const char *, int);
      78                 :  = #define      REG_BASIC       0000
      79                 :  = #define      REG_EXTENDED    0001
      80                 :  = #define      REG_ICASE       0002
      81                 :  = #define      REG_NOSUB       0004
      82                 :  = #define      REG_NEWLINE     0010
      83                 :  = #define      REG_NOSPEC      0020
      84                 :  = #define      REG_PEND        0040
      85                 :  = #define      REG_DUMP        0200
      86                 :  */
      87                 : API_EXPORT(int)                 /* 0 success, otherwise REG_something */
      88                 : regcomp(preg, pattern, cflags)
      89                 : regex_t *preg;
      90                 : const char *pattern;
      91                 : int cflags;
      92               0 : {
      93                 :         struct parse pa;
      94                 :         register struct re_guts *g;
      95               0 :         register struct parse *p = &pa;
      96                 :         register int i;
      97                 :         register size_t len;
      98                 : #ifdef REDEBUG
      99                 : #       define  GOODFLAGS(f)    (f)
     100                 : #else
     101                 : #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
     102                 : #endif
     103                 : 
     104               0 :         cflags = GOODFLAGS(cflags);
     105               0 :         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
     106               0 :                 return(REG_INVARG);
     107                 : 
     108               0 :         if (cflags&REG_PEND) {
     109               0 :                 if (preg->re_endp < pattern)
     110               0 :                         return(REG_INVARG);
     111               0 :                 len = preg->re_endp - pattern;
     112                 :         } else
     113               0 :                 len = strlen((char *)pattern);
     114                 : 
     115                 :         /* do the mallocs early so failure handling is easy */
     116               0 :         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
     117                 :                                                         (NC-1)*sizeof(cat_t));
     118               0 :         if (g == NULL)
     119               0 :                 return(REG_ESPACE);
     120               0 :         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;      /* ugh */
     121               0 :         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
     122               0 :         p->slen = 0;
     123               0 :         if (p->strip == NULL) {
     124               0 :                 free((char *)g);
     125               0 :                 return(REG_ESPACE);
     126                 :         }
     127                 : 
     128                 :         /* set things up */
     129               0 :         p->g = g;
     130               0 :         p->next = (unsigned char *)pattern;  /* convenience; we do not modify it */
     131               0 :         p->end = p->next + len;
     132               0 :         p->error = 0;
     133               0 :         p->ncsalloc = 0;
     134               0 :         for (i = 0; i < NPAREN; i++) {
     135               0 :                 p->pbegin[i] = 0;
     136               0 :                 p->pend[i] = 0;
     137                 :         }
     138               0 :         g->csetsize = NC;
     139               0 :         g->sets = NULL;
     140               0 :         g->setbits = NULL;
     141               0 :         g->ncsets = 0;
     142               0 :         g->cflags = cflags;
     143               0 :         g->iflags = 0;
     144               0 :         g->nbol = 0;
     145               0 :         g->neol = 0;
     146               0 :         g->must = NULL;
     147               0 :         g->mlen = 0;
     148               0 :         g->nsub = 0;
     149               0 :         g->ncategories = 1;  /* category 0 is "everything else" */
     150               0 :         g->categories = &g->catspace[0];
     151               0 :         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
     152               0 :         g->backrefs = 0;
     153                 : 
     154                 :         /* do it */
     155               0 :         EMIT(OEND, 0);
     156               0 :         g->firststate = THERE();
     157               0 :         if (cflags&REG_EXTENDED)
     158               0 :                 p_ere(p, OUT);
     159               0 :         else if (cflags&REG_NOSPEC)
     160               0 :                 p_str(p);
     161                 :         else
     162               0 :                 p_bre(p, OUT, OUT);
     163               0 :         EMIT(OEND, 0);
     164               0 :         g->laststate = THERE();
     165                 : 
     166                 :         /* tidy up loose ends and fill things in */
     167               0 :         categorize(p, g);
     168               0 :         stripsnug(p, g);
     169               0 :         findmust(p, g);
     170               0 :         g->nplus = pluscount(p, g);
     171               0 :         g->magic = MAGIC2;
     172               0 :         preg->re_nsub = g->nsub;
     173               0 :         preg->re_g = g;
     174               0 :         preg->re_magic = MAGIC1;
     175                 : #ifndef REDEBUG
     176                 :         /* not debugging, so can't rely on the assert() in regexec() */
     177               0 :         if (g->iflags&BAD)
     178               0 :                 SETERROR(REG_ASSERT);
     179                 : #endif
     180                 : 
     181                 :         /* win or lose, we're done */
     182               0 :         if (p->error != 0)   /* lose */
     183               0 :                 regfree(preg);
     184               0 :         return(p->error);
     185                 : }
     186                 : 
     187                 : /*
     188                 :  - p_ere - ERE parser top level, concatenation and alternation
     189                 :  == static void p_ere(register struct parse *p, int stop);
     190                 :  */
     191                 : static void
     192                 : p_ere(p, stop)
     193                 : register struct parse *p;
     194                 : int stop;                       /* character this ERE should end at */
     195               0 : {
     196                 :         register unsigned char c;
     197               0 :         register sopno prevback = 0;
     198               0 :         register sopno prevfwd = 0;
     199                 :         register sopno conc;
     200               0 :         register int first = 1;         /* is this the first alternative? */
     201                 : 
     202                 :         for (;;) {
     203                 :                 /* do a bunch of concatenated expressions */
     204               0 :                 conc = HERE();
     205               0 :                 while (MORE() && (c = PEEK()) != '|' && c != stop)
     206               0 :                         p_ere_exp(p);
     207               0 :                 (void) REQUIRE(HERE() != conc, REG_EMPTY);      /* require nonempty */
     208                 : 
     209               0 :                 if (!EAT('|'))
     210               0 :                         break;          /* NOTE BREAK OUT */
     211                 : 
     212               0 :                 if (first) {
     213               0 :                         INSERT(OCH_, conc);     /* offset is wrong */
     214               0 :                         prevfwd = conc;
     215               0 :                         prevback = conc;
     216               0 :                         first = 0;
     217                 :                 }
     218               0 :                 ASTERN(OOR1, prevback);
     219               0 :                 prevback = THERE();
     220               0 :                 AHEAD(prevfwd);                 /* fix previous offset */
     221               0 :                 prevfwd = HERE();
     222               0 :                 EMIT(OOR2, 0);                  /* offset is very wrong */
     223               0 :         }
     224                 : 
     225               0 :         if (!first) {           /* tail-end fixups */
     226               0 :                 AHEAD(prevfwd);
     227               0 :                 ASTERN(O_CH, prevback);
     228                 :         }
     229                 : 
     230                 :         assert(!MORE() || SEE(stop));
     231               0 : }
     232                 : 
     233                 : /*
     234                 :  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
     235                 :  == static void p_ere_exp(register struct parse *p);
     236                 :  */
     237                 : static void
     238                 : p_ere_exp(p)
     239                 : register struct parse *p;
     240               0 : {
     241                 :         register unsigned char c;
     242                 :         register sopno pos;
     243                 :         register int count;
     244                 :         register int count2;
     245                 :         register sopno subno;
     246               0 :         int wascaret = 0;
     247                 : 
     248                 :         assert(MORE());         /* caller should have ensured this */
     249               0 :         c = GETNEXT();
     250                 : 
     251               0 :         pos = HERE();
     252               0 :         switch (c) {
     253                 :         case '(':
     254               0 :                 REQUIRE(MORE(), REG_EPAREN);
     255               0 :                 p->g->nsub++;
     256               0 :                 subno = p->g->nsub;
     257               0 :                 if (subno < NPAREN)
     258               0 :                         p->pbegin[subno] = HERE();
     259               0 :                 EMIT(OLPAREN, subno);
     260               0 :                 if (!SEE(')'))
     261               0 :                         p_ere(p, ')');
     262               0 :                 if (subno < NPAREN) {
     263               0 :                         p->pend[subno] = HERE();
     264                 :                         assert(p->pend[subno] != 0);
     265                 :                 }
     266               0 :                 EMIT(ORPAREN, subno);
     267               0 :                 MUSTEAT(')', REG_EPAREN);
     268               0 :                 break;
     269                 : #ifndef POSIX_MISTAKE
     270                 :         case ')':               /* happens only if no current unmatched ( */
     271                 :                 /*
     272                 :                  * You may ask, why the ifndef?  Because I didn't notice
     273                 :                  * this until slightly too late for 1003.2, and none of the
     274                 :                  * other 1003.2 regular-expression reviewers noticed it at
     275                 :                  * all.  So an unmatched ) is legal POSIX, at least until
     276                 :                  * we can get it fixed.
     277                 :                  */
     278                 :                 SETERROR(REG_EPAREN);
     279                 :                 break;
     280                 : #endif
     281                 :         case '^':
     282               0 :                 EMIT(OBOL, 0);
     283               0 :                 p->g->iflags |= USEBOL;
     284               0 :                 p->g->nbol++;
     285               0 :                 wascaret = 1;
     286               0 :                 break;
     287                 :         case '$':
     288               0 :                 EMIT(OEOL, 0);
     289               0 :                 p->g->iflags |= USEEOL;
     290               0 :                 p->g->neol++;
     291               0 :                 break;
     292                 :         case '|':
     293               0 :                 SETERROR(REG_EMPTY);
     294               0 :                 break;
     295                 :         case '*':
     296                 :         case '+':
     297                 :         case '?':
     298               0 :                 SETERROR(REG_BADRPT);
     299               0 :                 break;
     300                 :         case '.':
     301               0 :                 if (p->g->cflags&REG_NEWLINE)
     302               0 :                         nonnewline(p);
     303                 :                 else
     304               0 :                         EMIT(OANY, 0);
     305               0 :                 break;
     306                 :         case '[':
     307               0 :                 p_bracket(p);
     308               0 :                 break;
     309                 :         case '\\':
     310               0 :                 REQUIRE(MORE(), REG_EESCAPE);
     311               0 :                 c = GETNEXT();
     312               0 :                 ordinary(p, c);
     313               0 :                 break;
     314                 :         case '{':               /* okay as ordinary except if digit follows */
     315               0 :                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
     316                 :                 /* FALLTHROUGH */
     317                 :         default:
     318               0 :                 ordinary(p, c);
     319                 :                 break;
     320                 :         }
     321                 : 
     322               0 :         if (!MORE())
     323               0 :                 return;
     324               0 :         c = PEEK();
     325                 :         /* we call { a repetition if followed by a digit */
     326               0 :         if (!( c == '*' || c == '+' || c == '?' ||
     327                 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ))
     328               0 :                 return;         /* no repetition, we're done */
     329               0 :         NEXT();
     330                 : 
     331               0 :         REQUIRE(!wascaret, REG_BADRPT);
     332               0 :         switch (c) {
     333                 :         case '*':       /* implemented as +? */
     334                 :                 /* this case does not require the (y|) trick, noKLUDGE */
     335               0 :                 INSERT(OPLUS_, pos);
     336               0 :                 ASTERN(O_PLUS, pos);
     337               0 :                 INSERT(OQUEST_, pos);
     338               0 :                 ASTERN(O_QUEST, pos);
     339               0 :                 break;
     340                 :         case '+':
     341               0 :                 INSERT(OPLUS_, pos);
     342               0 :                 ASTERN(O_PLUS, pos);
     343               0 :                 break;
     344                 :         case '?':
     345                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     346               0 :                 INSERT(OCH_, pos);              /* offset slightly wrong */
     347               0 :                 ASTERN(OOR1, pos);              /* this one's right */
     348               0 :                 AHEAD(pos);                     /* fix the OCH_ */
     349               0 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     350               0 :                 AHEAD(THERE());                 /* ...so fix it */
     351               0 :                 ASTERN(O_CH, THERETHERE());
     352               0 :                 break;
     353                 :         case '{':
     354               0 :                 count = p_count(p);
     355               0 :                 if (EAT(',')) {
     356               0 :                         if (isdigit(PEEK())) {
     357               0 :                                 count2 = p_count(p);
     358               0 :                                 REQUIRE(count <= count2, REG_BADBR);
     359                 :                         } else          /* single number with comma */
     360               0 :                                 count2 = INFINITY;
     361                 :                 } else          /* just a single number */
     362               0 :                         count2 = count;
     363               0 :                 repeat(p, pos, count, count2);
     364               0 :                 if (!EAT('}')) {        /* error heuristics */
     365               0 :                         while (MORE() && PEEK() != '}')
     366               0 :                                 NEXT();
     367               0 :                         REQUIRE(MORE(), REG_EBRACE);
     368               0 :                         SETERROR(REG_BADBR);
     369                 :                 }
     370                 :                 break;
     371                 :         }
     372                 : 
     373               0 :         if (!MORE())
     374               0 :                 return;
     375               0 :         c = PEEK();
     376               0 :         if (!( c == '*' || c == '+' || c == '?' ||
     377                 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
     378               0 :                 return;
     379               0 :         SETERROR(REG_BADRPT);
     380                 : }
     381                 : 
     382                 : /*
     383                 :  - p_str - string (no metacharacters) "parser"
     384                 :  == static void p_str(register struct parse *p);
     385                 :  */
     386                 : static void
     387                 : p_str(p)
     388                 : register struct parse *p;
     389               0 : {
     390               0 :         REQUIRE(MORE(), REG_EMPTY);
     391               0 :         while (MORE())
     392               0 :                 ordinary(p, GETNEXT());
     393               0 : }
     394                 : 
     395                 : /*
     396                 :  - p_bre - BRE parser top level, anchoring and concatenation
     397                 :  == static void p_bre(register struct parse *p, register int end1, \
     398                 :  ==     register int end2);
     399                 :  * Giving end1 as OUT essentially eliminates the end1/end2 check.
     400                 :  *
     401                 :  * This implementation is a bit of a kludge, in that a trailing $ is first
     402                 :  * taken as an ordinary character and then revised to be an anchor.  The
     403                 :  * only undesirable side effect is that '$' gets included as a character
     404                 :  * category in such cases.  This is fairly harmless; not worth fixing.
     405                 :  * The amount of lookahead needed to avoid this kludge is excessive.
     406                 :  */
     407                 : static void
     408                 : p_bre(p, end1, end2)
     409                 : register struct parse *p;
     410                 : register int end1;              /* first terminating character */
     411                 : register int end2;              /* second terminating character */
     412               0 : {
     413               0 :         register sopno start = HERE();
     414               0 :         register int first = 1;                 /* first subexpression? */
     415               0 :         register int wasdollar = 0;
     416                 : 
     417               0 :         if (EAT('^')) {
     418               0 :                 EMIT(OBOL, 0);
     419               0 :                 p->g->iflags |= USEBOL;
     420               0 :                 p->g->nbol++;
     421                 :         }
     422               0 :         while (MORE() && !SEETWO(end1, end2)) {
     423               0 :                 wasdollar = p_simp_re(p, first);
     424               0 :                 first = 0;
     425                 :         }
     426               0 :         if (wasdollar) {        /* oops, that was a trailing anchor */
     427               0 :                 DROP(1);
     428               0 :                 EMIT(OEOL, 0);
     429               0 :                 p->g->iflags |= USEEOL;
     430               0 :                 p->g->neol++;
     431                 :         }
     432                 : 
     433               0 :         REQUIRE(HERE() != start, REG_EMPTY);    /* require nonempty */
     434               0 : }
     435                 : 
     436                 : /*
     437                 :  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
     438                 :  == static int p_simp_re(register struct parse *p, int starordinary);
     439                 :  */
     440                 : static int                      /* was the simple RE an unbackslashed $? */
     441                 : p_simp_re(p, starordinary)
     442                 : register struct parse *p;
     443                 : int starordinary;               /* is a leading * an ordinary character? */
     444               0 : {
     445                 :         register int c;
     446                 :         register int count;
     447                 :         register int count2;
     448                 :         register sopno pos;
     449                 :         register int i;
     450                 :         register sopno subno;
     451                 : #       define  BACKSL  (1<<CHAR_BIT)
     452                 : 
     453               0 :         pos = HERE();           /* repetion op, if any, covers from here */
     454                 : 
     455                 :         assert(MORE());         /* caller should have ensured this */
     456               0 :         c = GETNEXT();
     457               0 :         if (c == '\\') {
     458               0 :                 REQUIRE(MORE(), REG_EESCAPE);
     459               0 :                 c = BACKSL | (unsigned char)GETNEXT();
     460                 :         }
     461               0 :         switch (c) {
     462                 :         case '.':
     463               0 :                 if (p->g->cflags&REG_NEWLINE)
     464               0 :                         nonnewline(p);
     465                 :                 else
     466               0 :                         EMIT(OANY, 0);
     467               0 :                 break;
     468                 :         case '[':
     469               0 :                 p_bracket(p);
     470               0 :                 break;
     471                 :         case BACKSL|'{':
     472               0 :                 SETERROR(REG_BADRPT);
     473               0 :                 break;
     474                 :         case BACKSL|'(':
     475               0 :                 p->g->nsub++;
     476               0 :                 subno = p->g->nsub;
     477               0 :                 if (subno < NPAREN)
     478               0 :                         p->pbegin[subno] = HERE();
     479               0 :                 EMIT(OLPAREN, subno);
     480                 :                 /* the MORE here is an error heuristic */
     481               0 :                 if (MORE() && !SEETWO('\\', ')'))
     482               0 :                         p_bre(p, '\\', ')');
     483               0 :                 if (subno < NPAREN) {
     484               0 :                         p->pend[subno] = HERE();
     485                 :                         assert(p->pend[subno] != 0);
     486                 :                 }
     487               0 :                 EMIT(ORPAREN, subno);
     488               0 :                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
     489               0 :                 break;
     490                 :         case BACKSL|')':        /* should not get here -- must be user */
     491                 :         case BACKSL|'}':
     492               0 :                 SETERROR(REG_EPAREN);
     493               0 :                 break;
     494                 :         case BACKSL|'1':
     495                 :         case BACKSL|'2':
     496                 :         case BACKSL|'3':
     497                 :         case BACKSL|'4':
     498                 :         case BACKSL|'5':
     499                 :         case BACKSL|'6':
     500                 :         case BACKSL|'7':
     501                 :         case BACKSL|'8':
     502                 :         case BACKSL|'9':
     503               0 :                 i = (c&~BACKSL) - '0';
     504                 :                 assert(i < NPAREN);
     505               0 :                 if (p->pend[i] != 0) {
     506                 :                         assert(i <= p->g->nsub);
     507               0 :                         EMIT(OBACK_, i);
     508                 :                         assert(p->pbegin[i] != 0);
     509                 :                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
     510                 :                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
     511               0 :                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
     512               0 :                         EMIT(O_BACK, i);
     513                 :                 } else
     514               0 :                         SETERROR(REG_ESUBREG);
     515               0 :                 p->g->backrefs = 1;
     516               0 :                 break;
     517                 :         case '*':
     518               0 :                 REQUIRE(starordinary, REG_BADRPT);
     519                 :                 /* FALLTHROUGH */
     520                 :         default:
     521               0 :                 ordinary(p, (unsigned char)c);  /* takes off BACKSL, if any */
     522                 :                 break;
     523                 :         }
     524                 : 
     525               0 :         if (EAT('*')) {         /* implemented as +? */
     526                 :                 /* this case does not require the (y|) trick, noKLUDGE */
     527               0 :                 INSERT(OPLUS_, pos);
     528               0 :                 ASTERN(O_PLUS, pos);
     529               0 :                 INSERT(OQUEST_, pos);
     530               0 :                 ASTERN(O_QUEST, pos);
     531               0 :         } else if (EATTWO('\\', '{')) {
     532               0 :                 count = p_count(p);
     533               0 :                 if (EAT(',')) {
     534               0 :                         if (MORE() && isdigit(PEEK())) {
     535               0 :                                 count2 = p_count(p);
     536               0 :                                 REQUIRE(count <= count2, REG_BADBR);
     537                 :                         } else          /* single number with comma */
     538               0 :                                 count2 = INFINITY;
     539                 :                 } else          /* just a single number */
     540               0 :                         count2 = count;
     541               0 :                 repeat(p, pos, count, count2);
     542               0 :                 if (!EATTWO('\\', '}')) {       /* error heuristics */
     543               0 :                         while (MORE() && !SEETWO('\\', '}'))
     544               0 :                                 NEXT();
     545               0 :                         REQUIRE(MORE(), REG_EBRACE);
     546               0 :                         SETERROR(REG_BADBR);
     547                 :                 }
     548               0 :         } else if (c == (unsigned char)'$')     /* $ (but not \$) ends it */
     549               0 :                 return(1);
     550                 : 
     551               0 :         return(0);
     552                 : }
     553                 : 
     554                 : /*
     555                 :  - p_count - parse a repetition count
     556                 :  == static int p_count(register struct parse *p);
     557                 :  */
     558                 : static int                      /* the value */
     559                 : p_count(p)
     560                 : register struct parse *p;
     561               0 : {
     562               0 :         register int count = 0;
     563               0 :         register int ndigits = 0;
     564                 : 
     565               0 :         while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
     566               0 :                 count = count*10 + (GETNEXT() - '0');
     567               0 :                 ndigits++;
     568                 :         }
     569                 : 
     570               0 :         REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
     571               0 :         return(count);
     572                 : }
     573                 : 
     574                 : /*
     575                 :  - p_bracket - parse a bracketed character list
     576                 :  == static void p_bracket(register struct parse *p);
     577                 :  *
     578                 :  * Note a significant property of this code:  if the allocset() did SETERROR,
     579                 :  * no set operations are done.
     580                 :  */
     581                 : static void
     582                 : p_bracket(p)
     583                 : register struct parse *p;
     584               0 : {
     585               0 :         register cset *cs = allocset(p);
     586               0 :         register int invert = 0;
     587                 : 
     588                 :         /* Dept of Truly Sickening Special-Case Kludges */
     589               0 :         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
     590               0 :                 EMIT(OBOW, 0);
     591               0 :                 NEXTn(6);
     592               0 :                 return;
     593                 :         }
     594               0 :         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
     595               0 :                 EMIT(OEOW, 0);
     596               0 :                 NEXTn(6);
     597               0 :                 return;
     598                 :         }
     599                 : 
     600               0 :         if (EAT('^'))
     601               0 :                 invert++;       /* make note to invert set at end */
     602               0 :         if (EAT(']'))
     603               0 :                 CHadd(cs, ']');
     604               0 :         else if (EAT('-'))
     605               0 :                 CHadd(cs, '-');
     606               0 :         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
     607               0 :                 p_b_term(p, cs);
     608               0 :         if (EAT('-'))
     609               0 :                 CHadd(cs, '-');
     610               0 :         MUSTEAT(']', REG_EBRACK);
     611                 : 
     612               0 :         if (p->error != 0)   /* don't mess things up further */
     613               0 :                 return;
     614                 : 
     615               0 :         if (p->g->cflags&REG_ICASE) {
     616                 :                 register int i;
     617                 :                 register int ci;
     618                 : 
     619               0 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     620               0 :                         if (CHIN(cs, i) && isalpha(i)) {
     621               0 :                                 ci = othercase(i);
     622               0 :                                 if (ci != i)
     623               0 :                                         CHadd(cs, ci);
     624                 :                         }
     625               0 :                 if (cs->multis != NULL)
     626               0 :                         mccase(p, cs);
     627                 :         }
     628               0 :         if (invert) {
     629                 :                 register int i;
     630                 : 
     631               0 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     632               0 :                         if (CHIN(cs, i))
     633               0 :                                 CHsub(cs, i);
     634                 :                         else
     635               0 :                                 CHadd(cs, i);
     636               0 :                 if (p->g->cflags&REG_NEWLINE)
     637               0 :                         CHsub(cs, '\n');
     638               0 :                 if (cs->multis != NULL)
     639               0 :                         mcinvert(p, cs);
     640                 :         }
     641                 : 
     642                 :         assert(cs->multis == NULL);          /* xxx */
     643                 : 
     644               0 :         if (nch(p, cs) == 1) {          /* optimize singleton sets */
     645               0 :                 ordinary(p, firstch(p, cs));
     646               0 :                 freeset(p, cs);
     647                 :         } else
     648               0 :                 EMIT(OANYOF, freezeset(p, cs));
     649                 : }
     650                 : 
     651                 : /*
     652                 :  - p_b_term - parse one term of a bracketed character list
     653                 :  == static void p_b_term(register struct parse *p, register cset *cs);
     654                 :  */
     655                 : static void
     656                 : p_b_term(p, cs)
     657                 : register struct parse *p;
     658                 : register cset *cs;
     659               0 : {
     660                 :         register unsigned char c;
     661                 :         register unsigned char start, finish;
     662                 :         register int i;
     663                 : 
     664                 :         /* classify what we've got */
     665               0 :         switch ((MORE()) ? PEEK() : '\0') {
     666                 :         case '[':
     667               0 :                 c = (MORE2()) ? PEEK2() : '\0';
     668               0 :                 break;
     669                 :         case '-':
     670               0 :                 SETERROR(REG_ERANGE);
     671               0 :                 return;                 /* NOTE RETURN */
     672                 :                 break;
     673                 :         default:
     674               0 :                 c = '\0';
     675                 :                 break;
     676                 :         }
     677                 : 
     678               0 :         switch (c) {
     679                 :         case ':':               /* character class */
     680               0 :                 NEXT2();
     681               0 :                 REQUIRE(MORE(), REG_EBRACK);
     682               0 :                 c = PEEK();
     683               0 :                 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
     684               0 :                 p_b_cclass(p, cs);
     685               0 :                 REQUIRE(MORE(), REG_EBRACK);
     686               0 :                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
     687               0 :                 break;
     688                 :         case '=':               /* equivalence class */
     689               0 :                 NEXT2();
     690               0 :                 REQUIRE(MORE(), REG_EBRACK);
     691               0 :                 c = PEEK();
     692               0 :                 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
     693               0 :                 p_b_eclass(p, cs);
     694               0 :                 REQUIRE(MORE(), REG_EBRACK);
     695               0 :                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
     696               0 :                 break;
     697                 :         default:                /* symbol, ordinary character, or range */
     698                 : /* xxx revision needed for multichar stuff */
     699               0 :                 start = p_b_symbol(p);
     700               0 :                 if (SEE('-') && MORE2() && PEEK2() != ']') {
     701                 :                         /* range */
     702               0 :                         NEXT();
     703               0 :                         if (EAT('-'))
     704               0 :                                 finish = '-';
     705                 :                         else
     706               0 :                                 finish = p_b_symbol(p);
     707                 :                 } else
     708               0 :                         finish = start;
     709                 : /* xxx what about signed chars here... */
     710               0 :                 REQUIRE(start <= finish, REG_ERANGE);
     711               0 :                 for (i = start; i <= finish; i++)
     712               0 :                         CHadd(cs, i);
     713                 :                 break;
     714                 :         }
     715                 : }
     716                 : 
     717                 : /*
     718                 :  - p_b_cclass - parse a character-class name and deal with it
     719                 :  == static void p_b_cclass(register struct parse *p, register cset *cs);
     720                 :  */
     721                 : static void
     722                 : p_b_cclass(p, cs)
     723                 : register struct parse *p;
     724                 : register cset *cs;
     725               0 : {
     726               0 :         register unsigned char *sp = p->next;
     727                 :         register struct cclass *cp;
     728                 :         register size_t len;
     729                 :         register unsigned char *u;
     730                 :         register unsigned char c;
     731                 : 
     732               0 :         while (MORE() && isalpha(PEEK()))
     733               0 :                 NEXT();
     734               0 :         len = p->next - sp;
     735               0 :         for (cp = cclasses; cp->name != NULL; cp++)
     736               0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     737               0 :                         break;
     738               0 :         if (cp->name == NULL) {
     739                 :                 /* oops, didn't find it */
     740               0 :                 SETERROR(REG_ECTYPE);
     741               0 :                 return;
     742                 :         }
     743                 : 
     744               0 :         u = cp->chars;
     745               0 :         while ((c = *u++) != '\0')
     746               0 :                 CHadd(cs, c);
     747               0 :         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
     748               0 :                 MCadd(p, cs, u);
     749                 : }
     750                 : 
     751                 : /*
     752                 :  - p_b_eclass - parse an equivalence-class name and deal with it
     753                 :  == static void p_b_eclass(register struct parse *p, register cset *cs);
     754                 :  *
     755                 :  * This implementation is incomplete. xxx
     756                 :  */
     757                 : static void
     758                 : p_b_eclass(p, cs)
     759                 : register struct parse *p;
     760                 : register cset *cs;
     761               0 : {
     762                 :         register unsigned char c;
     763                 : 
     764               0 :         c = p_b_coll_elem(p, '=');
     765               0 :         CHadd(cs, c);
     766               0 : }
     767                 : 
     768                 : /*
     769                 :  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
     770                 :  == static char p_b_symbol(register struct parse *p);
     771                 :  */
     772                 : static unsigned char                    /* value of symbol */
     773                 : p_b_symbol(p)
     774                 : register struct parse *p;
     775               0 : {
     776                 :         register unsigned char value;
     777                 : 
     778               0 :         REQUIRE(MORE(), REG_EBRACK);
     779               0 :         if (!EATTWO('[', '.'))
     780               0 :                 return(GETNEXT());
     781                 : 
     782                 :         /* collating symbol */
     783               0 :         value = p_b_coll_elem(p, '.');
     784               0 :         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
     785               0 :         return(value);
     786                 : }
     787                 : 
     788                 : /*
     789                 :  - p_b_coll_elem - parse a collating-element name and look it up
     790                 :  == static char p_b_coll_elem(register struct parse *p, int endc);
     791                 :  */
     792                 : static unsigned char                    /* value of collating element */
     793                 : p_b_coll_elem(p, endc)
     794                 : register struct parse *p;
     795                 : int endc;                       /* name ended by endc,']' */
     796               0 : {
     797               0 :         register unsigned char *sp = p->next;
     798                 :         register struct cname *cp;
     799                 :         register int len;
     800                 : 
     801               0 :         while (MORE() && !SEETWO(endc, ']'))
     802               0 :                 NEXT();
     803               0 :         if (!MORE()) {
     804               0 :                 SETERROR(REG_EBRACK);
     805               0 :                 return(0);
     806                 :         }
     807               0 :         len = p->next - sp;
     808               0 :         for (cp = cnames; cp->name != NULL; cp++)
     809               0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     810               0 :                         return(cp->code);    /* known name */
     811               0 :         if (len == 1)
     812               0 :                 return(*sp);    /* single character */
     813               0 :         SETERROR(REG_ECOLLATE);                 /* neither */
     814               0 :         return(0);
     815                 : }
     816                 : 
     817                 : /*
     818                 :  - othercase - return the case counterpart of an alphabetic
     819                 :  == static char othercase(int ch);
     820                 :  */
     821                 : static unsigned char                    /* if no counterpart, return ch */
     822                 : othercase(ch)
     823                 : int ch;
     824               0 : {
     825                 :         assert(isalpha(ch));
     826               0 :         if (isupper(ch))
     827               0 :                 return(tolower(ch));
     828               0 :         else if (islower(ch))
     829               0 :                 return(toupper(ch));
     830                 :         else                    /* peculiar, but could happen */
     831               0 :                 return(ch);
     832                 : }
     833                 : 
     834                 : /*
     835                 :  - bothcases - emit a dualcase version of a two-case character
     836                 :  == static void bothcases(register struct parse *p, int ch);
     837                 :  *
     838                 :  * Boy, is this implementation ever a kludge...
     839                 :  */
     840                 : static void
     841                 : bothcases(p, ch)
     842                 : register struct parse *p;
     843                 : int ch;
     844               0 : {
     845               0 :         register unsigned char *oldnext = p->next;
     846               0 :         register unsigned char *oldend = p->end;
     847                 :         unsigned char bracket[3];
     848                 : 
     849                 :         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
     850               0 :         p->next = bracket;
     851               0 :         p->end = bracket+2;
     852               0 :         bracket[0] = ch;
     853               0 :         bracket[1] = ']';
     854               0 :         bracket[2] = '\0';
     855               0 :         p_bracket(p);
     856                 :         assert(p->next == bracket+2);
     857               0 :         p->next = oldnext;
     858               0 :         p->end = oldend;
     859               0 : }
     860                 : 
     861                 : /*
     862                 :  - ordinary - emit an ordinary character
     863                 :  == static void ordinary(register struct parse *p, register int ch);
     864                 :  */
     865                 : static void
     866                 : ordinary(p, ch)
     867                 : register struct parse *p;
     868                 : register int ch;
     869               0 : {
     870               0 :         register cat_t *cap = p->g->categories;
     871                 : 
     872               0 :         if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
     873               0 :                 bothcases(p, ch);
     874                 :         else {
     875               0 :                 EMIT(OCHAR, (unsigned char)ch);
     876               0 :                 if (cap[ch] == 0)
     877               0 :                         cap[ch] = p->g->ncategories++;
     878                 :         }
     879               0 : }
     880                 : 
     881                 : /*
     882                 :  - nonnewline - emit REG_NEWLINE version of OANY
     883                 :  == static void nonnewline(register struct parse *p);
     884                 :  *
     885                 :  * Boy, is this implementation ever a kludge...
     886                 :  */
     887                 : static void
     888                 : nonnewline(p)
     889                 : register struct parse *p;
     890               0 : {
     891               0 :         register unsigned char *oldnext = p->next;
     892               0 :         register unsigned char *oldend = p->end;
     893                 :         unsigned char bracket[4];
     894                 : 
     895               0 :         p->next = bracket;
     896               0 :         p->end = bracket+3;
     897               0 :         bracket[0] = '^';
     898               0 :         bracket[1] = '\n';
     899               0 :         bracket[2] = ']';
     900               0 :         bracket[3] = '\0';
     901               0 :         p_bracket(p);
     902                 :         assert(p->next == bracket+3);
     903               0 :         p->next = oldnext;
     904               0 :         p->end = oldend;
     905               0 : }
     906                 : 
     907                 : /*
     908                 :  - repeat - generate code for a bounded repetition, recursively if needed
     909                 :  == static void repeat(register struct parse *p, sopno start, int from, int to);
     910                 :  */
     911                 : static void
     912                 : repeat(p, start, from, to)
     913                 : register struct parse *p;
     914                 : sopno start;                    /* operand from here to end of strip */
     915                 : int from;                       /* repeated from this number */
     916                 : int to;                         /* to this number of times (maybe INFINITY) */
     917               0 : {
     918               0 :         register sopno finish = HERE();
     919                 : #       define  N       2
     920                 : #       define  INF     3
     921                 : #       define  REP(f, t)       ((f)*8 + (t))
     922                 : #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
     923                 :         register sopno copy;
     924                 : 
     925               0 :         if (p->error != 0)   /* head off possible runaway recursion */
     926               0 :                 return;
     927                 : 
     928                 :         assert(from <= to);
     929                 : 
     930               0 :         switch (REP(MAP(from), MAP(to))) {
     931                 :         case REP(0, 0):                 /* must be user doing this */
     932               0 :                 DROP(finish-start);     /* drop the operand */
     933               0 :                 break;
     934                 :         case REP(0, 1):                 /* as x{1,1}? */
     935                 :         case REP(0, N):                 /* as x{1,n}? */
     936                 :         case REP(0, INF):               /* as x{1,}? */
     937                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     938               0 :                 INSERT(OCH_, start);            /* offset is wrong... */
     939               0 :                 repeat(p, start+1, 1, to);
     940               0 :                 ASTERN(OOR1, start);
     941               0 :                 AHEAD(start);                   /* ... fix it */
     942               0 :                 EMIT(OOR2, 0);
     943               0 :                 AHEAD(THERE());
     944               0 :                 ASTERN(O_CH, THERETHERE());
     945               0 :                 break;
     946                 :         case REP(1, 1):                 /* trivial case */
     947                 :                 /* done */
     948               0 :                 break;
     949                 :         case REP(1, N):                 /* as x?x{1,n-1} */
     950                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     951               0 :                 INSERT(OCH_, start);
     952               0 :                 ASTERN(OOR1, start);
     953               0 :                 AHEAD(start);
     954               0 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     955               0 :                 AHEAD(THERE());                 /* ...so fix it */
     956               0 :                 ASTERN(O_CH, THERETHERE());
     957               0 :                 copy = dupl(p, start+1, finish+1);
     958                 :                 assert(copy == finish+4);
     959               0 :                 repeat(p, copy, 1, to-1);
     960               0 :                 break;
     961                 :         case REP(1, INF):               /* as x+ */
     962               0 :                 INSERT(OPLUS_, start);
     963               0 :                 ASTERN(O_PLUS, start);
     964               0 :                 break;
     965                 :         case REP(N, N):                 /* as xx{m-1,n-1} */
     966               0 :                 copy = dupl(p, start, finish);
     967               0 :                 repeat(p, copy, from-1, to-1);
     968               0 :                 break;
     969                 :         case REP(N, INF):               /* as xx{n-1,INF} */
     970               0 :                 copy = dupl(p, start, finish);
     971               0 :                 repeat(p, copy, from-1, to);
     972               0 :                 break;
     973                 :         default:                        /* "can't happen" */
     974               0 :                 SETERROR(REG_ASSERT);   /* just in case */
     975                 :                 break;
     976                 :         }
     977                 : }
     978                 : 
     979                 : /*
     980                 :  - seterr - set an error condition
     981                 :  == static int seterr(register struct parse *p, int e);
     982                 :  */
     983                 : static int                      /* useless but makes type checking happy */
     984                 : seterr(p, e)
     985                 : register struct parse *p;
     986                 : int e;
     987               0 : {
     988               0 :         if (p->error == 0)   /* keep earliest error condition */
     989               0 :                 p->error = e;
     990               0 :         p->next = nuls;              /* try to bring things to a halt */
     991               0 :         p->end = nuls;
     992               0 :         return(0);              /* make the return value well-defined */
     993                 : }
     994                 : 
     995                 : /*
     996                 :  - allocset - allocate a set of characters for []
     997                 :  == static cset *allocset(register struct parse *p);
     998                 :  */
     999                 : static cset *
    1000                 : allocset(p)
    1001                 : register struct parse *p;
    1002               0 : {
    1003               0 :         register int no = p->g->ncsets++;
    1004                 :         register size_t nc;
    1005                 :         register size_t nbytes;
    1006                 :         register cset *cs;
    1007               0 :         register size_t css = (size_t)p->g->csetsize;
    1008                 :         register int i;
    1009                 : 
    1010               0 :         if (no >= p->ncsalloc) {  /* need another column of space */
    1011               0 :                 p->ncsalloc += CHAR_BIT;
    1012               0 :                 nc = p->ncsalloc;
    1013                 :                 assert(nc % CHAR_BIT == 0);
    1014               0 :                 nbytes = nc / CHAR_BIT * css;
    1015               0 :                 if (p->g->sets == NULL)
    1016               0 :                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
    1017                 :                 else
    1018               0 :                         p->g->sets = (cset *)realloc((unsigned char *)p->g->sets,
    1019                 :                                                         nc * sizeof(cset));
    1020               0 :                 if (p->g->setbits == NULL)
    1021               0 :                         p->g->setbits = (uch *)malloc(nbytes);
    1022                 :                 else {
    1023               0 :                         p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits,
    1024                 :                                                                 nbytes);
    1025                 :                         /* xxx this isn't right if setbits is now NULL */
    1026               0 :                         for (i = 0; i < no; i++)
    1027               0 :                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
    1028                 :                 }
    1029               0 :                 if (p->g->sets != NULL && p->g->setbits != NULL)
    1030               0 :                         (void) memset((unsigned char *)p->g->setbits + (nbytes - css),
    1031                 :                                                                 0, css);
    1032                 :                 else {
    1033               0 :                         no = 0;
    1034               0 :                         SETERROR(REG_ESPACE);
    1035                 :                         /* caller's responsibility not to do set ops */
    1036                 :                 }
    1037                 :         }
    1038                 : 
    1039                 :         assert(p->g->sets != NULL);       /* xxx */
    1040               0 :         cs = &p->g->sets[no];
    1041               0 :         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
    1042               0 :         cs->mask = 1 << ((no) % CHAR_BIT);
    1043               0 :         cs->hash = 0;
    1044               0 :         cs->smultis = 0;
    1045               0 :         cs->multis = NULL;
    1046                 : 
    1047               0 :         return(cs);
    1048                 : }
    1049                 : 
    1050                 : /*
    1051                 :  - freeset - free a now-unused set
    1052                 :  == static void freeset(register struct parse *p, register cset *cs);
    1053                 :  */
    1054                 : static void
    1055                 : freeset(p, cs)
    1056                 : register struct parse *p;
    1057                 : register cset *cs;
    1058               0 : {
    1059                 :         register size_t i;
    1060               0 :         register cset *top = &p->g->sets[p->g->ncsets];
    1061               0 :         register size_t css = (size_t)p->g->csetsize;
    1062                 : 
    1063               0 :         for (i = 0; i < css; i++)
    1064               0 :                 CHsub(cs, i);
    1065               0 :         if (cs == top-1)        /* recover only the easy case */
    1066               0 :                 p->g->ncsets--;
    1067               0 : }
    1068                 : 
    1069                 : /*
    1070                 :  - freezeset - final processing on a set of characters
    1071                 :  == static int freezeset(register struct parse *p, register cset *cs);
    1072                 :  *
    1073                 :  * The main task here is merging identical sets.  This is usually a waste
    1074                 :  * of time (although the hash code minimizes the overhead), but can win
    1075                 :  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
    1076                 :  * is done using addition rather than xor -- all ASCII [aA] sets xor to
    1077                 :  * the same value!
    1078                 :  */
    1079                 : static int                      /* set number */
    1080                 : freezeset(p, cs)
    1081                 : register struct parse *p;
    1082                 : register cset *cs;
    1083               0 : {
    1084               0 :         register uch h = cs->hash;
    1085                 :         register size_t i;
    1086               0 :         register cset *top = &p->g->sets[p->g->ncsets];
    1087                 :         register cset *cs2;
    1088               0 :         register size_t css = (size_t)p->g->csetsize;
    1089                 : 
    1090                 :         /* look for an earlier one which is the same */
    1091               0 :         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
    1092               0 :                 if (cs2->hash == h && cs2 != cs) {
    1093                 :                         /* maybe */
    1094               0 :                         for (i = 0; i < css; i++)
    1095               0 :                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
    1096               0 :                                         break;          /* no */
    1097               0 :                         if (i == css)
    1098               0 :                                 break;                  /* yes */
    1099                 :                 }
    1100                 : 
    1101               0 :         if (cs2 < top) {     /* found one */
    1102               0 :                 freeset(p, cs);
    1103               0 :                 cs = cs2;
    1104                 :         }
    1105                 : 
    1106               0 :         return((int)(cs - p->g->sets));
    1107                 : }
    1108                 : 
    1109                 : /*
    1110                 :  - firstch - return first character in a set (which must have at least one)
    1111                 :  == static int firstch(register struct parse *p, register cset *cs);
    1112                 :  */
    1113                 : static int                      /* character; there is no "none" value */
    1114                 : firstch(p, cs)
    1115                 : register struct parse *p;
    1116                 : register cset *cs;
    1117               0 : {
    1118                 :         register size_t i;
    1119               0 :         register size_t css = (size_t)p->g->csetsize;
    1120                 : 
    1121               0 :         for (i = 0; i < css; i++)
    1122               0 :                 if (CHIN(cs, i))
    1123               0 :                         return((unsigned char)i);
    1124                 :         assert(never);
    1125               0 :         return(0);              /* arbitrary */
    1126                 : }
    1127                 : 
    1128                 : /*
    1129                 :  - nch - number of characters in a set
    1130                 :  == static int nch(register struct parse *p, register cset *cs);
    1131                 :  */
    1132                 : static int
    1133                 : nch(p, cs)
    1134                 : register struct parse *p;
    1135                 : register cset *cs;
    1136               0 : {
    1137                 :         register size_t i;
    1138               0 :         register size_t css = (size_t)p->g->csetsize;
    1139               0 :         register int n = 0;
    1140                 : 
    1141               0 :         for (i = 0; i < css; i++)
    1142               0 :                 if (CHIN(cs, i))
    1143               0 :                         n++;
    1144               0 :         return(n);
    1145                 : }
    1146                 : 
    1147                 : /*
    1148                 :  - mcadd - add a collating element to a cset
    1149                 :  == static void mcadd(register struct parse *p, register cset *cs, \
    1150                 :  ==     register char *cp);
    1151                 :  */
    1152                 : static void
    1153                 : mcadd(p, cs, cp)
    1154                 : register struct parse *p;
    1155                 : register cset *cs;
    1156                 : register unsigned char *cp;
    1157               0 : {
    1158               0 :         register size_t oldend = cs->smultis;
    1159                 : 
    1160               0 :         cs->smultis += strlen(cp) + 1;
    1161               0 :         if (cs->multis == NULL)
    1162               0 :                 cs->multis = malloc(cs->smultis);
    1163                 :         else
    1164               0 :                 cs->multis = realloc(cs->multis, cs->smultis);
    1165               0 :         if (cs->multis == NULL) {
    1166               0 :                 SETERROR(REG_ESPACE);
    1167               0 :                 return;
    1168                 :         }
    1169                 : 
    1170               0 :         (void) strcpy(cs->multis + oldend - 1, cp);
    1171               0 :         cs->multis[cs->smultis - 1] = '\0';
    1172                 : }
    1173                 : 
    1174                 : #if 0
    1175                 : /*
    1176                 :  - mcsub - subtract a collating element from a cset
    1177                 :  == static void mcsub(register cset *cs, register unsigned char *cp);
    1178                 :  */
    1179                 : static void
    1180                 : mcsub(cs, cp)
    1181                 : register unsigned cset *cs;
    1182                 : register unsigned char *cp;
    1183                 : {
    1184                 :         register unsigned char *fp = mcfind(cs, cp);
    1185                 :         register size_t len = strlen(fp);
    1186                 : 
    1187                 :         assert(fp != NULL);
    1188                 :         (void) memmove(fp, fp + len + 1,
    1189                 :                                 cs->smultis - (fp + len + 1 - cs->multis));
    1190                 :         cs->smultis -= len;
    1191                 : 
    1192                 :         if (cs->smultis == 0) {
    1193                 :                 free(cs->multis);
    1194                 :                 cs->multis = NULL;
    1195                 :                 return;
    1196                 :         }
    1197                 : 
    1198                 :         cs->multis = realloc(cs->multis, cs->smultis);
    1199                 :         assert(cs->multis != NULL);
    1200                 : }
    1201                 : 
    1202                 : /*
    1203                 :  - mcin - is a collating element in a cset?
    1204                 :  == static int mcin(register cset *cs, register unsigned char *cp);
    1205                 :  */
    1206                 : static int
    1207                 : mcin(cs, cp)
    1208                 : register cset *cs;
    1209                 : register unsigned char *cp;
    1210                 : {
    1211                 :         return(mcfind(cs, cp) != NULL);
    1212                 : }
    1213                 : 
    1214                 : 
    1215                 : /*
    1216                 :  - mcfind - find a collating element in a cset
    1217                 :  == static unsigned char *mcfind(register cset *cs, register unsigned char *cp);
    1218                 :  */
    1219                 : static unsigned char *
    1220                 : mcfind(cs, cp)
    1221                 : register cset *cs;
    1222                 : register unsigned char *cp;
    1223                 : {
    1224                 :         register unsigned char *p;
    1225                 : 
    1226                 :         if (cs->multis == NULL)
    1227                 :                 return(NULL);
    1228                 :         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
    1229                 :                 if (strcmp(cp, p) == 0)
    1230                 :                         return(p);
    1231                 :         return(NULL);
    1232                 : }
    1233                 : #endif
    1234                 : 
    1235                 : /*
    1236                 :  - mcinvert - invert the list of collating elements in a cset
    1237                 :  == static void mcinvert(register struct parse *p, register cset *cs);
    1238                 :  *
    1239                 :  * This would have to know the set of possibilities.  Implementation
    1240                 :  * is deferred.
    1241                 :  */
    1242                 : static void
    1243                 : mcinvert(p, cs)
    1244                 : register struct parse *p;
    1245                 : register cset *cs;
    1246               0 : {
    1247                 :         assert(cs->multis == NULL);  /* xxx */
    1248               0 : }
    1249                 : 
    1250                 : /*
    1251                 :  - mccase - add case counterparts of the list of collating elements in a cset
    1252                 :  == static void mccase(register struct parse *p, register cset *cs);
    1253                 :  *
    1254                 :  * This would have to know the set of possibilities.  Implementation
    1255                 :  * is deferred.
    1256                 :  */
    1257                 : static void
    1258                 : mccase(p, cs)
    1259                 : register struct parse *p;
    1260                 : register cset *cs;
    1261               0 : {
    1262                 :         assert(cs->multis == NULL);  /* xxx */
    1263               0 : }
    1264                 : 
    1265                 : /*
    1266                 :  - isinsets - is this character in any sets?
    1267                 :  == static int isinsets(register struct re_guts *g, int c);
    1268                 :  */
    1269                 : static int                      /* predicate */
    1270                 : isinsets(g, c)
    1271                 : register struct re_guts *g;
    1272                 : int c;
    1273               0 : {
    1274                 :         register uch *col;
    1275                 :         register int i;
    1276               0 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1277               0 :         register unsigned uc = (unsigned char)c;
    1278                 : 
    1279               0 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1280               0 :                 if (col[uc] != 0)
    1281               0 :                         return(1);
    1282               0 :         return(0);
    1283                 : }
    1284                 : 
    1285                 : /*
    1286                 :  - samesets - are these two characters in exactly the same sets?
    1287                 :  == static int samesets(register struct re_guts *g, int c1, int c2);
    1288                 :  */
    1289                 : static int                      /* predicate */
    1290                 : samesets(g, c1, c2)
    1291                 : register struct re_guts *g;
    1292                 : int c1;
    1293                 : int c2;
    1294               0 : {
    1295                 :         register uch *col;
    1296                 :         register int i;
    1297               0 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1298               0 :         register unsigned uc1 = (unsigned char)c1;
    1299               0 :         register unsigned uc2 = (unsigned char)c2;
    1300                 : 
    1301               0 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1302               0 :                 if (col[uc1] != col[uc2])
    1303               0 :                         return(0);
    1304               0 :         return(1);
    1305                 : }
    1306                 : 
    1307                 : /*
    1308                 :  - categorize - sort out character categories
    1309                 :  == static void categorize(struct parse *p, register struct re_guts *g);
    1310                 :  */
    1311                 : static void
    1312                 : categorize(p, g)
    1313                 : struct parse *p;
    1314                 : register struct re_guts *g;
    1315               0 : {
    1316               0 :         register cat_t *cats = g->categories;
    1317                 :         register int c;
    1318                 :         register int c2;
    1319                 :         register cat_t cat;
    1320                 : 
    1321                 :         /* avoid making error situations worse */
    1322               0 :         if (p->error != 0)
    1323               0 :                 return;
    1324                 : 
    1325               0 :         for (c = 0; c <= UCHAR_MAX; c++)
    1326               0 :                 if (cats[c] == 0 && isinsets(g, c)) {
    1327               0 :                         cat = g->ncategories++;
    1328               0 :                         cats[c] = cat;
    1329               0 :                         for (c2 = c+1; c2 <= UCHAR_MAX; c2++)
    1330               0 :                                 if (cats[c2] == 0 && samesets(g, c, c2))
    1331               0 :                                         cats[c2] = cat;
    1332                 :                 }
    1333                 : }
    1334                 : 
    1335                 : /*
    1336                 :  - dupl - emit a duplicate of a bunch of sops
    1337                 :  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
    1338                 :  */
    1339                 : static sopno                    /* start of duplicate */
    1340                 : dupl(p, start, finish)
    1341                 : register struct parse *p;
    1342                 : sopno start;                    /* from here */
    1343                 : sopno finish;                   /* to this less one */
    1344               0 : {
    1345               0 :         register sopno ret = HERE();
    1346               0 :         register sopno len = finish - start;
    1347                 : 
    1348                 :         assert(finish >= start);
    1349               0 :         if (len == 0)
    1350               0 :                 return(ret);
    1351               0 :         enlarge(p, p->ssize + len);  /* this many unexpected additions */
    1352                 :         assert(p->ssize >= p->slen + len);
    1353               0 :         (void) memcpy((char *)(p->strip + p->slen),
    1354                 :                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
    1355               0 :         p->slen += len;
    1356               0 :         return(ret);
    1357                 : }
    1358                 : 
    1359                 : /*
    1360                 :  - doemit - emit a strip operator
    1361                 :  == static void doemit(register struct parse *p, sop op, size_t opnd);
    1362                 :  *
    1363                 :  * It might seem better to implement this as a macro with a function as
    1364                 :  * hard-case backup, but it's just too big and messy unless there are
    1365                 :  * some changes to the data structures.  Maybe later.
    1366                 :  */
    1367                 : static void
    1368                 : doemit(p, op, opnd)
    1369                 : register struct parse *p;
    1370                 : sop op;
    1371                 : size_t opnd;
    1372               0 : {
    1373                 :         /* avoid making error situations worse */
    1374               0 :         if (p->error != 0)
    1375               0 :                 return;
    1376                 : 
    1377                 :         /* deal with oversize operands ("can't happen", more or less) */
    1378                 :         assert(opnd < 1<<OPSHIFT);
    1379                 : 
    1380                 :         /* deal with undersized strip */
    1381               0 :         if (p->slen >= p->ssize)
    1382               0 :                 enlarge(p, (p->ssize+1) / 2 * 3);    /* +50% */
    1383                 :         assert(p->slen < p->ssize);
    1384                 : 
    1385                 :         /* finally, it's all reduced to the easy case */
    1386               0 :         p->strip[p->slen++] = SOP(op, opnd);
    1387                 : }
    1388                 : 
    1389                 : /*
    1390                 :  - doinsert - insert a sop into the strip
    1391                 :  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
    1392                 :  */
    1393                 : static void
    1394                 : doinsert(p, op, opnd, pos)
    1395                 : register struct parse *p;
    1396                 : sop op;
    1397                 : size_t opnd;
    1398                 : sopno pos;
    1399               0 : {
    1400                 :         register sopno sn;
    1401                 :         register sop s;
    1402                 :         register int i;
    1403                 : 
    1404                 :         /* avoid making error situations worse */
    1405               0 :         if (p->error != 0)
    1406               0 :                 return;
    1407                 : 
    1408               0 :         sn = HERE();
    1409               0 :         EMIT(op, opnd);         /* do checks, ensure space */
    1410                 :         assert(HERE() == sn+1);
    1411               0 :         s = p->strip[sn];
    1412                 : 
    1413                 :         /* adjust paren pointers */
    1414                 :         assert(pos > 0);
    1415               0 :         for (i = 1; i < NPAREN; i++) {
    1416               0 :                 if (p->pbegin[i] >= pos) {
    1417               0 :                         p->pbegin[i]++;
    1418                 :                 }
    1419               0 :                 if (p->pend[i] >= pos) {
    1420               0 :                         p->pend[i]++;
    1421                 :                 }
    1422                 :         }
    1423                 : 
    1424               0 :         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
    1425                 :                                                 (HERE()-pos-1)*sizeof(sop));
    1426               0 :         p->strip[pos] = s;
    1427                 : }
    1428                 : 
    1429                 : /*
    1430                 :  - dofwd - complete a forward reference
    1431                 :  == static void dofwd(register struct parse *p, sopno pos, sop value);
    1432                 :  */
    1433                 : static void
    1434                 : dofwd(p, pos, value)
    1435                 : register struct parse *p;
    1436                 : register sopno pos;
    1437                 : sop value;
    1438               0 : {
    1439                 :         /* avoid making error situations worse */
    1440               0 :         if (p->error != 0)
    1441               0 :                 return;
    1442                 : 
    1443                 :         assert(value < 1<<OPSHIFT);
    1444               0 :         p->strip[pos] = OP(p->strip[pos]) | value;
    1445                 : }
    1446                 : 
    1447                 : /*
    1448                 :  - enlarge - enlarge the strip
    1449                 :  == static void enlarge(register struct parse *p, sopno size);
    1450                 :  */
    1451                 : static void
    1452                 : enlarge(p, size)
    1453                 : register struct parse *p;
    1454                 : register sopno size;
    1455               0 : {
    1456                 :         register sop *sp;
    1457                 : 
    1458               0 :         if (p->ssize >= size)
    1459               0 :                 return;
    1460                 : 
    1461               0 :         sp = (sop *)realloc(p->strip, size*sizeof(sop));
    1462               0 :         if (sp == NULL) {
    1463               0 :                 SETERROR(REG_ESPACE);
    1464               0 :                 return;
    1465                 :         }
    1466               0 :         p->strip = sp;
    1467               0 :         p->ssize = size;
    1468                 : }
    1469                 : 
    1470                 : /*
    1471                 :  - stripsnug - compact the strip
    1472                 :  == static void stripsnug(register struct parse *p, register struct re_guts *g);
    1473                 :  */
    1474                 : static void
    1475                 : stripsnug(p, g)
    1476                 : register struct parse *p;
    1477                 : register struct re_guts *g;
    1478               0 : {
    1479               0 :         g->nstates = p->slen;
    1480               0 :         g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop));
    1481               0 :         if (g->strip == NULL) {
    1482               0 :                 SETERROR(REG_ESPACE);
    1483               0 :                 g->strip = p->strip;
    1484                 :         }
    1485               0 : }
    1486                 : 
    1487                 : /*
    1488                 :  - findmust - fill in must and mlen with longest mandatory literal string
    1489                 :  == static void findmust(register struct parse *p, register struct re_guts *g);
    1490                 :  *
    1491                 :  * This algorithm could do fancy things like analyzing the operands of |
    1492                 :  * for common subsequences.  Someday.  This code is simple and finds most
    1493                 :  * of the interesting cases.
    1494                 :  *
    1495                 :  * Note that must and mlen got initialized during setup.
    1496                 :  */
    1497                 : static void
    1498                 : findmust(p, g)
    1499                 : struct parse *p;
    1500                 : register struct re_guts *g;
    1501               0 : {
    1502                 :         register sop *scan;
    1503               0 :         sop *start = NULL;
    1504               0 :         register sop *newstart = NULL;
    1505                 :         register sopno newlen;
    1506                 :         register sop s;
    1507                 :         register unsigned char *cp;
    1508                 :         register sopno i;
    1509                 : 
    1510                 :         /* avoid making error situations worse */
    1511               0 :         if (p->error != 0)
    1512               0 :                 return;
    1513                 : 
    1514                 :         /* find the longest OCHAR sequence in strip */
    1515               0 :         newlen = 0;
    1516               0 :         scan = g->strip + 1;
    1517                 :         do {
    1518               0 :                 s = *scan++;
    1519               0 :                 switch (OP(s)) {
    1520                 :                 case OCHAR:             /* sequence member */
    1521               0 :                         if (newlen == 0)                /* new sequence */
    1522               0 :                                 newstart = scan - 1;
    1523               0 :                         newlen++;
    1524               0 :                         break;
    1525                 :                 case OPLUS_:            /* things that don't break one */
    1526                 :                 case OLPAREN:
    1527                 :                 case ORPAREN:
    1528               0 :                         break;
    1529                 :                 case OQUEST_:           /* things that must be skipped */
    1530                 :                 case OCH_:
    1531               0 :                         scan--;
    1532                 :                         do {
    1533               0 :                                 scan += OPND(s);
    1534               0 :                                 s = *scan;
    1535                 :                                 /* assert() interferes w debug printouts */
    1536               0 :                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
    1537                 :                                                         OP(s) != OOR2) {
    1538               0 :                                         g->iflags |= BAD;
    1539               0 :                                         return;
    1540                 :                                 }
    1541               0 :                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
    1542                 :                         /* fallthrough */
    1543                 :                 default:                /* things that break a sequence */
    1544               0 :                         if (newlen > g->mlen) {           /* ends one */
    1545               0 :                                 start = newstart;
    1546               0 :                                 g->mlen = newlen;
    1547                 :                         }
    1548               0 :                         newlen = 0;
    1549                 :                         break;
    1550                 :                 }
    1551               0 :         } while (OP(s) != OEND);
    1552                 : 
    1553               0 :         if (g->mlen == 0)            /* there isn't one */
    1554               0 :                 return;
    1555                 : 
    1556               0 :         if (!start) {
    1557               0 :                 g->mlen = 0;
    1558               0 :                 return;
    1559                 :         }
    1560                 : 
    1561                 :         /* turn it into a character string */
    1562               0 :         g->must = malloc((size_t)g->mlen + 1);
    1563               0 :         if (g->must == NULL) {               /* argh; just forget it */
    1564               0 :                 g->mlen = 0;
    1565               0 :                 return;
    1566                 :         }
    1567               0 :         cp = g->must;
    1568               0 :         scan = start;
    1569               0 :         for (i = g->mlen; i > 0; i--) {
    1570               0 :                 while (OP(s = *scan++) != OCHAR)
    1571                 :                         continue;
    1572                 :                 assert(cp < g->must + g->mlen);
    1573               0 :                 *cp++ = (unsigned char)OPND(s);
    1574                 :         }
    1575                 :         assert(cp == g->must + g->mlen);
    1576               0 :         *cp++ = '\0';           /* just on general principles */
    1577                 : }
    1578                 : 
    1579                 : /*
    1580                 :  - pluscount - count + nesting
    1581                 :  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
    1582                 :  */
    1583                 : static sopno                    /* nesting depth */
    1584                 : pluscount(p, g)
    1585                 : struct parse *p;
    1586                 : register struct re_guts *g;
    1587               0 : {
    1588                 :         register sop *scan;
    1589                 :         register sop s;
    1590               0 :         register sopno plusnest = 0;
    1591               0 :         register sopno maxnest = 0;
    1592                 : 
    1593               0 :         if (p->error != 0)
    1594               0 :                 return(0);      /* there may not be an OEND */
    1595                 : 
    1596               0 :         scan = g->strip + 1;
    1597                 :         do {
    1598               0 :                 s = *scan++;
    1599               0 :                 switch (OP(s)) {
    1600                 :                 case OPLUS_:
    1601               0 :                         plusnest++;
    1602               0 :                         break;
    1603                 :                 case O_PLUS:
    1604               0 :                         if (plusnest > maxnest)
    1605               0 :                                 maxnest = plusnest;
    1606               0 :                         plusnest--;
    1607                 :                         break;
    1608                 :                 }
    1609               0 :         } while (OP(s) != OEND);
    1610               0 :         if (plusnest != 0)
    1611               0 :                 g->iflags |= BAD;
    1612               0 :         return(maxnest);
    1613                 : }

Generated by: LTP GCOV extension version 1.5