Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/regex/regclass.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
/*
24
* RE character class support
25
*/
26
27
#include "reglib.h"
28
29
struct Ctype_s; typedef struct Ctype_s Ctype_t;
30
31
struct Ctype_s
32
{
33
const char* name;
34
size_t size;
35
regclass_t ctype;
36
Ctype_t* next;
37
#if _lib_wctype
38
wctype_t wtype;
39
#endif
40
};
41
42
static Ctype_t* ctypes;
43
44
/*
45
* this stuff gets around posix failure to define isblank,
46
* and the fact that ctype functions are macros
47
* and any local extensions that may not even have functions or macros
48
*/
49
50
#if _need_iswblank
51
52
int
53
_reg_iswblank(wint_t wc)
54
{
55
static int initialized;
56
static wctype_t wt;
57
58
if (!initialized)
59
{
60
initialized = 1;
61
wt = wctype("blank");
62
}
63
return iswctype(wc, wt);
64
}
65
66
#endif
67
68
static int Isalnum(int c) { return iswalnum(c); }
69
static int Isalpha(int c) { return iswalpha(c); }
70
static int Isblank(int c) { return iswblank(c); }
71
static int Iscntrl(int c) { return iswcntrl(c); }
72
static int Isdigit(int c) { return iswdigit(c); }
73
static int Notdigit(int c) { return !iswdigit(c); }
74
static int Isgraph(int c) { return iswgraph(c); }
75
static int Islower(int c) { return iswlower(c); }
76
static int Isprint(int c) { return iswprint(c); }
77
static int Ispunct(int c) { return iswpunct(c); }
78
static int Isspace(int c) { return iswspace(c); }
79
static int Notspace(int c) { return !iswspace(c); }
80
static int Isupper(int c) { return iswupper(c); }
81
static int Isword(int c) { return iswalnum(c) || c == '_'; }
82
static int Notword(int c) { return !iswalnum(c) && c != '_'; }
83
static int Isxdigit(int c) { return iswxdigit(c);}
84
85
#if _lib_wctype
86
87
static int Is_wc_1(int);
88
static int Is_wc_2(int);
89
static int Is_wc_3(int);
90
static int Is_wc_4(int);
91
static int Is_wc_5(int);
92
static int Is_wc_6(int);
93
static int Is_wc_7(int);
94
static int Is_wc_8(int);
95
static int Is_wc_9(int);
96
static int Is_wc_10(int);
97
static int Is_wc_11(int);
98
static int Is_wc_12(int);
99
static int Is_wc_13(int);
100
static int Is_wc_14(int);
101
static int Is_wc_15(int);
102
static int Is_wc_16(int);
103
104
#endif
105
106
#define SZ(s) s,(sizeof(s)-1)
107
108
static Ctype_t ctype[] =
109
{
110
{ SZ("alnum"), Isalnum },
111
{ SZ("alpha"), Isalpha },
112
{ SZ("blank"), Isblank },
113
{ SZ("cntrl"), Iscntrl },
114
{ SZ("digit"), Isdigit },
115
{ SZ("graph"), Isgraph },
116
{ SZ("lower"), Islower },
117
{ SZ("print"), Isprint },
118
{ SZ("punct"), Ispunct },
119
{ SZ("space"), Isspace },
120
{ SZ("upper"), Isupper },
121
{ SZ("word"), Isword },
122
{ SZ("xdigit"),Isxdigit},
123
124
#define CTYPES 13
125
126
#if _lib_wctype
127
{ 0, 0, Is_wc_1 },
128
{ 0, 0, Is_wc_2 },
129
{ 0, 0, Is_wc_3 },
130
{ 0, 0, Is_wc_4 },
131
{ 0, 0, Is_wc_5 },
132
{ 0, 0, Is_wc_6 },
133
{ 0, 0, Is_wc_7 },
134
{ 0, 0, Is_wc_8 },
135
{ 0, 0, Is_wc_9 },
136
{ 0, 0, Is_wc_10 },
137
{ 0, 0, Is_wc_11 },
138
{ 0, 0, Is_wc_12 },
139
{ 0, 0, Is_wc_13 },
140
{ 0, 0, Is_wc_14 },
141
{ 0, 0, Is_wc_15 },
142
{ 0, 0, Is_wc_16 },
143
144
#define WTYPES 16
145
146
#else
147
148
#define WTYPES 0
149
150
#endif
151
};
152
153
#if _lib_wctype
154
155
static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
156
static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
157
static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
158
static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
159
static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
160
static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
161
static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
162
static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
163
static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); }
164
static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); }
165
static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); }
166
static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); }
167
static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); }
168
static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); }
169
static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); }
170
static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); }
171
172
#endif
173
174
/*
175
* return pointer to ctype function for :class:] in s
176
* s points to the first char after the initial [
177
* dynamic wctype classes are locale-specific
178
* dynamic entry locale is punned in Ctype_t.next
179
* the search does a lazy (one entry at a time) flush on locale mismatch
180
* if e!=0 it points to next char in s
181
* 0 returned on error
182
*/
183
184
regclass_t
185
regclass(const char* s, char** e)
186
{
187
register Ctype_t* cp;
188
register int c;
189
register size_t n;
190
register const char* t;
191
Ctype_t* lc;
192
Ctype_t* xp;
193
Ctype_t* zp;
194
195
if (!(c = *s++))
196
return 0;
197
for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
198
if (*t != c || !(n = t - s))
199
return 0;
200
for (cp = ctypes; cp; cp = cp->next)
201
if (n == cp->size && strneq(s, cp->name, n))
202
goto found;
203
xp = zp = 0;
204
lc = (Ctype_t*)setlocale(LC_CTYPE, NiL);
205
for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
206
{
207
#if _lib_wctype
208
if (!zp)
209
{
210
if (!cp->size)
211
zp = cp;
212
else if (!xp && cp->next && cp->next != lc)
213
xp = cp;
214
}
215
#endif
216
if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc))
217
goto found;
218
}
219
#if _lib_wctype
220
if (!(cp = zp))
221
{
222
if (!(cp = xp))
223
return 0;
224
cp->size = 0;
225
if (!streq(cp->name, s))
226
{
227
free((char*)cp->name);
228
cp->name = 0;
229
}
230
}
231
if (!cp->name)
232
{
233
if (!(cp->name = (const char*)memdup(s, n + 1)))
234
return 0;
235
*((char*)cp->name + n) = 0;
236
}
237
/* mvs.390 needs the (char*) cast -- barf */
238
if (!(cp->wtype = wctype((char*)cp->name)))
239
{
240
free((char*)cp->name);
241
cp->name = 0;
242
return 0;
243
}
244
cp->size = n;
245
cp->next = lc;
246
#endif
247
found:
248
if (e)
249
*e = (char*)t + 2;
250
return cp->ctype;
251
}
252
253
/*
254
* associate the ctype function fun with name
255
*/
256
257
int
258
regaddclass(const char* name, regclass_t fun)
259
{
260
register Ctype_t* cp;
261
register Ctype_t* np;
262
register size_t n;
263
264
n = strlen(name);
265
for (cp = ctypes; cp; cp = cp->next)
266
if (cp->size == n && strneq(name, cp->name, n))
267
{
268
cp->ctype = fun;
269
return 0;
270
}
271
if (!(np = newof(0, Ctype_t, 1, n + 1)))
272
return REG_ESPACE;
273
np->size = n;
274
np->name = strcpy((char*)(np + 1), name);
275
np->ctype = fun;
276
np->next = ctypes;
277
ctypes = np;
278
return 0;
279
}
280
281
/*
282
* return pointer to ctype function for token
283
*/
284
285
regclass_t
286
classfun(int type)
287
{
288
switch (type)
289
{
290
case T_ALNUM: return Isword;
291
case T_ALNUM_NOT: return Notword;
292
case T_DIGIT: return Isdigit;
293
case T_DIGIT_NOT: return Notdigit;
294
case T_SPACE: return Isspace;
295
case T_SPACE_NOT: return Notspace;
296
}
297
return 0;
298
}
299
300