/*1* internals of regex_t2*/3#define MAGIC1 ((('r'^0200)<<8) | 'e')45/*6* The internal representation is a *strip*, a sequence of7* operators ending with an endmarker. (Some terminology etc. is a8* historical relic of earlier versions which used multiple strips.)9* Certain oddities in the representation are there to permit running10* the machinery backwards; in particular, any deviation from sequential11* flow must be marked at both its source and its destination. Some12* fine points:13*14* - OPLUS_ and O_PLUS are *inside* the loop they create.15* - OQUEST_ and O_QUEST are *outside* the bypass they create.16* - OCH_ and O_CH are *outside* the multi-way branch they create, while17* OOR1 and OOR2 are respectively the end and the beginning of one of18* the branches. Note that there is an implicit OOR2 following OCH_19* and an implicit OOR1 preceding O_CH.20*21* In state representations, an operator's bit is on to signify a state22* immediately *preceding* "execution" of that operator.23*/24typedef long sop; /* strip operator */25typedef long sopno;26typedef unsigned char uch;2728#define OPRMASK 0x7c00000029#define OPDMASK 0x03ffffff30#define OPSHIFT (26)31#define OP(n) ((n)&OPRMASK)32#define OPND(n) ((n)&OPDMASK)33#define SOP(op, opnd) ((op)|(opnd))3435/* operators meaning operand */36/* (back, fwd are offsets) */37#define OEND (1<<OPSHIFT) /* endmarker - */38#define OCHAR (2<<OPSHIFT) /* character unsigned char */39#define OBOL (3<<OPSHIFT) /* left anchor - */40#define OEOL (4<<OPSHIFT) /* right anchor - */41#define OANY (5<<OPSHIFT) /* . - */42#define OANYOF (6<<OPSHIFT) /* [...] set number */43#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */44#define O_BACK (8<<OPSHIFT) /* end \d paren number */45#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */46#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */47#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */48#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */49#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */50#define ORPAREN (14<<OPSHIFT) /* ) back to ( */51#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */52#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */53#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */54#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */55#define OBOW (19<<OPSHIFT) /* begin word - */56#define OEOW (20<<OPSHIFT) /* end word - */5758/*59* Structure for [] character-set representation. Character sets are60* done as bit vectors, grouped 8 to a byte vector for compactness.61* The individual set therefore has both a pointer to the byte vector62* and a mask to pick out the relevant bit of each byte. A hash code63* simplifies testing whether two sets could be identical.64*65* This will get trickier for multicharacter collating elements. As66* preliminary hooks for dealing with such things, we also carry along67* a string of multi-character elements, and decide the size of the68* vectors at run time.69*/70typedef struct {71uch *ptr; /* -> uch [csetsize] */72uch mask; /* bit within array */73uch hash; /* hash code */74size_t smultis;75char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */76} cset;7778/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */79#define CHadd(cs, c) ((cs)->ptr[(int)(c)] |= (cs)->mask, (cs)->hash = (uch)((cs)->hash + (c)))80#define CHsub(cs, c) ((cs)->ptr[(int)(c)] &= (uch)~(cs)->mask, (cs)->hash = (uch)((cs)->hash - (c)))81#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)82#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */83#define MCsub(p, cs, cp) mcsub(p, cs, cp)84#define MCin(p, cs, cp) mcin(p, cs, cp)85#define NC (CHAR_MAX - CHAR_MIN + 1)8687/* stuff for character categories */88typedef unsigned char cat_t;8990/*91* main compiled-expression structure92*/93struct re_guts {94int magic;95# define MAGIC2 ((('R'^0200)<<8)|'E')96sop *strip; /* malloced area for strip */97int csetsize; /* number of bits in a cset vector */98int ncsets; /* number of csets in use */99cset *sets; /* -> cset [ncsets] */100uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */101int cflags; /* copy of regcomp() cflags argument */102sopno nstates; /* = number of sops */103sopno firststate; /* the initial OEND (normally 0) */104sopno laststate; /* the final OEND */105int iflags; /* internal flags */106# define USEBOL 01 /* used ^ */107# define USEEOL 02 /* used $ */108# define BAD 04 /* something wrong */109int nbol; /* number of ^ used */110int neol; /* number of $ used */111int ncategories; /* how many character categories */112cat_t *categories; /* ->catspace[-CHAR_MIN] */113char *must; /* match must contain this string */114int mlen; /* length of must */115size_t nsub; /* copy of re_nsub */116int backrefs; /* does it use back references? */117sopno nplus; /* how deep does it nest +s? */118/* catspace must be last */119cat_t catspace[1]; /* actually [NC] */120};121122/* misc utilities */123#define OUT (CHAR_MAX+1) /* a non-character value */124#define ISWORD(c) (isalnum(c) || (c) == '_')125126/* switch off assertions (if not already off) if no REDEBUG */127#ifndef REDEBUG128#ifndef NDEBUG129#define NDEBUG /* no assertions please */130#endif131#endif132133134