Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/regex/regdecomp.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
24
/*
25
* posix regex decompiler
26
*/
27
28
#include "reglib.h"
29
30
#undef ismeta
31
#define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
32
#define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
33
34
static void
35
detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
36
{
37
register Trie_node_t* y;
38
char* o;
39
int k;
40
41
o = p;
42
k = 1;
43
do
44
{
45
if (k)
46
{
47
o = p;
48
if (p < e)
49
*p++ = x->c;
50
}
51
sfputc(sp, x->c);
52
for (y = x->sib; y; y = y->sib)
53
{
54
sfputc(sp, '|');
55
sfputc(sp, '<');
56
sfwrite(sp, b, p - b);
57
sfputc(sp, '>');
58
detrie(y, sp, b, p, e, delimiter);
59
}
60
if (x->end && x->son)
61
{
62
sfputc(sp, '|');
63
sfputc(sp, '{');
64
sfwrite(sp, b, p - b);
65
sfputc(sp, '}');
66
p = o;
67
}
68
} while (x = x->son);
69
}
70
71
static int
72
decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
73
{
74
Rex_t* q;
75
unsigned char* s;
76
unsigned char* t;
77
int c;
78
int m;
79
int cb;
80
int cd;
81
int cr;
82
int ib;
83
int ie;
84
int nb;
85
int ne;
86
unsigned char ic[2*UCHAR_MAX];
87
unsigned char nc[2*UCHAR_MAX];
88
89
do
90
{
91
switch (e->type)
92
{
93
case REX_ALT:
94
if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
95
return 1;
96
sfputc(sp, '|');
97
if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
98
return 1;
99
break;
100
case REX_BACK:
101
sfprintf(sp, "\\%d", e->lo);
102
break;
103
case REX_BEG:
104
if (type < SRE)
105
sfputc(sp, '^');
106
break;
107
case REX_END:
108
if (type < SRE)
109
sfputc(sp, '$');
110
break;
111
case REX_WBEG:
112
meta(sp, '<', type, 1, delimiter);
113
break;
114
case REX_WEND:
115
meta(sp, '<', type, 1, delimiter);
116
break;
117
case REX_WORD:
118
sfprintf(sp, "\\w");
119
break;
120
case REX_CLASS:
121
case REX_COLL_CLASS:
122
case REX_ONECHAR:
123
case REX_DOT:
124
case REX_REP:
125
if (type >= SRE)
126
{
127
c = ')';
128
if (e->hi == RE_DUP_INF)
129
{
130
if (!e->lo)
131
sfputc(sp, '*');
132
else if (e->lo == 1)
133
sfputc(sp, '+');
134
else
135
sfprintf(sp, "{%d,}", e->lo);
136
}
137
else if (e->hi != 1)
138
sfprintf(sp, "{%d,%d}", e->lo, e->hi);
139
else if (e->lo == 0)
140
sfputc(sp, '?');
141
else
142
c = 0;
143
}
144
switch (e->type)
145
{
146
case REX_REP:
147
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
148
return 1;
149
break;
150
case REX_CLASS:
151
sfputc(sp, '[');
152
nb = ne = ib = ie = -2;
153
cb = cd = cr = 0;
154
s = nc;
155
t = ic;
156
for (m = 0; m <= UCHAR_MAX; m++)
157
if (settst(e->re.charclass, m))
158
{
159
if (m == ']')
160
cb = 1;
161
else if (m == '-')
162
cr = 1;
163
else if (m == delimiter)
164
cd = 1;
165
else if (nb < 0)
166
ne = nb = m;
167
else if (ne == (m - 1))
168
ne = m;
169
else
170
{
171
if (ne == nb)
172
*s++ = ne;
173
else
174
{
175
*s++ = nb;
176
*s++ = '-';
177
*s++ = ne;
178
}
179
ne = nb = m;
180
}
181
}
182
else
183
{
184
if (m == ']')
185
cb = -1;
186
else if (m == '-')
187
cr = -1;
188
else if (m == delimiter)
189
cd = -1;
190
else if (ib < 0)
191
ie = ib = m;
192
else if (ie == (m - 1))
193
ie = m;
194
else
195
{
196
if (ie == ib)
197
*t++ = ie;
198
else
199
{
200
*t++ = ib;
201
*t++ = '-';
202
*t++ = ie;
203
}
204
ie = ib = m;
205
}
206
}
207
if (nb >= 0)
208
{
209
*s++ = nb;
210
if (ne != nb)
211
{
212
*s++ = '-';
213
*s++ = ne;
214
}
215
}
216
if (ib >= 0)
217
{
218
*t++ = ib;
219
if (ie != ib)
220
{
221
*t++ = '-';
222
*t++ = ie;
223
}
224
}
225
if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
226
{
227
sfputc(sp, '^');
228
if (cb < 0)
229
sfputc(sp, ']');
230
if (cr < 0)
231
sfputc(sp, '-');
232
if (cd < 0 && delimiter > 0)
233
{
234
if (flags & REG_ESCAPE)
235
sfputc(sp, '\\');
236
sfputc(sp, delimiter);
237
}
238
sfwrite(sp, ic, t - ic);
239
}
240
else
241
{
242
if (cb > 0)
243
sfputc(sp, ']');
244
if (cr > 0)
245
sfputc(sp, '-');
246
if (cd > 0 && delimiter > 0)
247
{
248
if (flags & REG_ESCAPE)
249
sfputc(sp, '\\');
250
sfputc(sp, delimiter);
251
}
252
if (nc[0] == '^')
253
{
254
sfwrite(sp, nc + 1, s - nc - 1);
255
sfputc(sp, '^');
256
}
257
else
258
sfwrite(sp, nc, s - nc);
259
}
260
sfputc(sp, ']');
261
break;
262
case REX_COLL_CLASS:
263
break;
264
case REX_ONECHAR:
265
meta(sp, e->re.onechar, type, 0, delimiter);
266
break;
267
case REX_DOT:
268
sfputc(sp, '.');
269
break;
270
}
271
if (type < SRE)
272
{
273
if (e->hi == RE_DUP_INF)
274
{
275
if (!e->lo)
276
sfputc(sp, '*');
277
else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
278
meta(sp, '+', type, 1, delimiter);
279
else
280
{
281
meta(sp, '{', type, 1, delimiter);
282
sfprintf(sp, "%d,", e->lo);
283
meta(sp, '}', type, 1, delimiter);
284
}
285
}
286
else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
287
{
288
meta(sp, '{', type, 1, delimiter);
289
sfprintf(sp, "%d,%d", e->lo, e->hi);
290
meta(sp, '}', type, 1, delimiter);
291
}
292
else if (e->lo == 0)
293
meta(sp, '?', type, 1, delimiter);
294
}
295
else if (c)
296
sfputc(sp, c);
297
break;
298
case REX_STRING:
299
case REX_KMP:
300
t = (s = e->re.string.base) + e->re.string.size;
301
while (s < t)
302
{
303
c = *s++;
304
meta(sp, c, type, 0, delimiter);
305
}
306
break;
307
case REX_TRIE:
308
ib = 0;
309
for (c = 0; c <= UCHAR_MAX; c++)
310
if (e->re.trie.root[c])
311
{
312
char pfx[1024];
313
314
if (ib)
315
sfputc(sp, '|');
316
else
317
ib = 1;
318
detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
319
}
320
break;
321
case REX_NEG:
322
if (type >= SRE)
323
sfprintf(sp, "!(");
324
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
325
return 1;
326
if (type >= SRE)
327
sfputc(sp, ')');
328
else
329
sfputc(sp, '!');
330
break;
331
case REX_CONJ:
332
if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
333
return 1;
334
sfputc(sp, '&');
335
if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
336
return 1;
337
break;
338
case REX_GROUP:
339
if (type >= SRE)
340
sfputc(sp, '@');
341
meta(sp, '(', type, 1, delimiter);
342
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
343
return 1;
344
meta(sp, ')', type, 1, delimiter);
345
break;
346
case REX_GROUP_AHEAD:
347
case REX_GROUP_AHEAD_NOT:
348
case REX_GROUP_BEHIND:
349
case REX_GROUP_BEHIND_NOT:
350
meta(sp, '(', type, 1, delimiter);
351
sfputc(sp, '?');
352
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
353
return 1;
354
meta(sp, ')', type, 1, delimiter);
355
break;
356
case REX_GROUP_COND:
357
meta(sp, '(', type, 1, delimiter);
358
sfputc(sp, '?');
359
if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
360
return 1;
361
if (q = e->re.group.expr.binary.right)
362
{
363
sfputc(sp, ':');
364
if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
365
return 1;
366
sfputc(sp, ':');
367
if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
368
return 1;
369
}
370
meta(sp, ')', type, 1, delimiter);
371
break;
372
case REX_GROUP_CUT:
373
meta(sp, '(', type, 1, delimiter);
374
sfputc(sp, '?');
375
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
376
return 1;
377
meta(sp, ')', type, 1, delimiter);
378
break;
379
case REX_BM:
380
break;
381
default:
382
sfprintf(sp, "<ERROR:REX_%d>", e->type);
383
break;
384
}
385
} while (e = e->next);
386
return 0;
387
}
388
389
/*
390
* reconstruct pattern from compiled re p into sp
391
*/
392
393
size_t
394
regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
395
{
396
Sfio_t* sp;
397
char* s;
398
int type;
399
int delimiter;
400
size_t r;
401
402
if (!(sp = sfstropen()))
403
return 0;
404
if (flags == (regflags_t)~0)
405
flags = p->env->flags;
406
switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
407
{
408
case 0:
409
type = BRE;
410
break;
411
case REG_AUGMENTED:
412
case REG_AUGMENTED|REG_EXTENDED:
413
type = ARE;
414
break;
415
case REG_EXTENDED:
416
type = ERE;
417
break;
418
case REG_SHELL:
419
type = SRE;
420
break;
421
default:
422
type = KRE;
423
break;
424
}
425
if (flags & REG_DELIMITED)
426
{
427
delimiter = '/';
428
sfputc(sp, delimiter);
429
}
430
else
431
delimiter = -1;
432
if (decomp(p->env->rex, sp, type, delimiter, flags))
433
r = 0;
434
else
435
{
436
if (delimiter > 0)
437
sfputc(sp, delimiter);
438
if ((r = sfstrtell(sp) + 1) <= n)
439
{
440
if (!(s = sfstruse(sp)))
441
r = 0;
442
else
443
memcpy(buf, s, r);
444
}
445
}
446
sfstrclose(sp);
447
return r;
448
}
449
450