Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libpp/ppfsm.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1986-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
/*
22
* Glenn Fowler
23
* AT&T Research
24
*
25
* preprocessor and proto lexical analyzer fsm
26
* define PROTOMAIN for standalone proto
27
*/
28
29
#include "pplib.h"
30
#include "ppfsm.h"
31
32
/*
33
* lexical FSM encoding
34
* derived from a standalone ansi cpp by Dennis Ritchie
35
* modified for libpp by Glenn Fowler
36
*
37
* fsm[] is initialized from fsminit[]. The encoding is blown out into
38
* fsm[] for time efficiency. When in state state, and one of the
39
* characters in ch arrives, enter nextstate. States >= TERMINAL are
40
* either final, or at least require special action. In fsminit[] there
41
* is a line for each <state,charset,nextstate>. Early entries are
42
* overwritten by later ones. C_XXX is the universal set and should
43
* always be first. Some of the fsminit[] entries are templates for
44
* groups of states. The OP entries trigger the state copies. States
45
* above TERMINAL are represented in fsm[] as negative values. S_TOK and
46
* S_TOKB encode the resulting token type in the upper bits. These actions
47
* differ in that S_TOKB has a lookahead char.
48
*
49
* fsm[] has three start states:
50
*
51
* PROTO proto (ANSI -> K&R,C++,ANSI)
52
* QUICK standalone ppcpp()
53
* TOKEN tokenizing pplex()
54
*
55
* If the next state remains the same then the fsm[] transition value is 0.
56
* MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
57
* fsm[state+1][0] which is ~S_EOB for all states. This preserves the
58
* power of 2 fsm[] row size for efficient array indexing. Thanks to
59
* D. G. Korn for the last two observations. The pseudo non-terminal state
60
* fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
61
*
62
* The bit layout is:
63
*
64
* TERM arg SPLICE next
65
* 15 14-8 7 6-0
66
*/
67
68
/*
69
* NOTE: these must be `control' characters for all native codesets
70
* currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
71
*/
72
73
#define C_DEC 001
74
#define C_EOF 002
75
#define C_HEX 003
76
#define C_LET 021
77
#define C_OCT 022
78
#define C_XXX 023
79
80
#define OP (-1)
81
#define END 0
82
#define COPY 1
83
84
#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
85
86
struct fsminit /* fsm initialization row */
87
{
88
int state; /* if in this state */
89
unsigned char ch[4]; /* and see one of these */
90
int nextstate; /* enter this state if <TERMINAL*/
91
};
92
93
static struct fsminit fsminit[] =
94
{
95
/* proto start state */
96
{ PROTO, { C_XXX }, S_CHR, },
97
{ PROTO, { C_EOF }, S_EOF, },
98
{ PROTO, { C_DEC }, BAD1, },
99
{ PROTO, { '.' }, DOT, },
100
{ PROTO, { C_LET }, NID, },
101
{ PROTO, { 'L' }, LIT, },
102
{ PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
103
{ PROTO, { 'r', 's', 't', 'v' }, RES1, },
104
{ PROTO, { 'w', 'N' }, RES1, },
105
{ PROTO, { '"', '\'' }, S_LITBEG, },
106
{ PROTO, { '/' }, COM1, },
107
{ PROTO, { '\n' }, S_NL, },
108
{ PROTO, { ' ','\t','\f','\v' }, WS1, },
109
110
/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
111
{ RES1, { C_XXX }, S_MACRO, },
112
{ RES1, { C_LET, C_DEC }, NID, },
113
{ RES1, { 'a' }, RES1a, },
114
{ RES1, { 'e' }, RES1e, },
115
{ RES1, { 'f' }, RES1f, },
116
{ RES1, { 'h' }, RES1h, },
117
{ RES1, { 'l' }, RES1l, },
118
{ RES1, { 'n' }, RES1n, },
119
{ RES1, { 'o' }, RES1o, },
120
{ RES1, { 't' }, RES1t, },
121
{ RES1, { 'x' }, RES1x, },
122
{ RES1, { 'y' }, RES1y, },
123
124
/* proto reserved {va_start} */
125
{ RES1a, { C_XXX }, S_RESERVED, },
126
{ RES1a, { C_LET, C_DEC }, NID, },
127
{ RES1a, { '_','s','t','a' }, RES1a, },
128
{ RES1a, { 'r' }, RES1a, },
129
130
/* proto reserved {return} */
131
{ RES1e, { C_XXX }, S_RESERVED, },
132
{ RES1e, { C_LET, C_DEC }, NID, },
133
{ RES1e, { 't','u','r','n' }, RES1e, },
134
135
/* proto reserved {if} */
136
{ RES1f, { C_XXX }, S_RESERVED, },
137
{ RES1f, { C_LET, C_DEC }, NID, },
138
139
/* proto reserved {while} */
140
{ RES1h, { C_XXX }, S_RESERVED, },
141
{ RES1h, { C_LET, C_DEC }, NID, },
142
{ RES1h, { 'i','l','e' }, RES1h, },
143
144
/* proto reserved {else} */
145
{ RES1l, { C_XXX }, S_RESERVED, },
146
{ RES1l, { C_LET, C_DEC }, NID, },
147
{ RES1l, { 's','e' }, RES1l, },
148
149
/* proto reserved {inline} */
150
{ RES1n, { C_XXX }, S_RESERVED, },
151
{ RES1n, { C_LET, C_DEC }, NID, },
152
{ RES1n, { 'l','i','n','e' }, RES1n, },
153
154
/* proto reserved {do,for,void} */
155
{ RES1o, { C_XXX }, S_RESERVED, },
156
{ RES1o, { C_LET, C_DEC }, NID, },
157
{ RES1o, { 'r','i','d','N' }, RES1o, },
158
159
/* proto reserved {static} */
160
{ RES1t, { C_XXX }, S_RESERVED, },
161
{ RES1t, { C_LET, C_DEC }, NID, },
162
{ RES1t, { 'a','t','i','c' }, RES1t, },
163
164
/* proto reserved {extern} */
165
{ RES1x, { C_XXX }, S_RESERVED, },
166
{ RES1x, { C_LET, C_DEC }, NID, },
167
{ RES1x, { 't','e','r','n' }, RES1x, },
168
169
/* proto reserved {typedef} */
170
{ RES1y, { C_XXX }, S_RESERVED, },
171
{ RES1y, { C_LET, C_DEC }, NID, },
172
{ RES1y, { 'p','e','d','f' }, RES1y, },
173
174
/* saw /, perhaps start of comment */
175
{ COM1, { C_XXX }, S_CHRB, },
176
{ COM1, { '*' }, COM2, },
177
#if PROTOMAIN
178
{ COM1, { '/' }, COM5, },
179
#endif
180
181
/* saw / *, start of comment */
182
{ COM2, { C_XXX }, COM2, },
183
{ COM2, { '\n', C_EOF }, S_COMMENT, },
184
{ COM2, { '/' }, COM4, },
185
{ COM2, { '*' }, COM3, },
186
{ COM2, { '#', ';', ')' }, QUAL(COM2), },
187
188
/* saw the * possibly ending a comment */
189
{ COM3, { C_XXX }, COM2, },
190
{ COM3, { '\n', C_EOF }, S_COMMENT, },
191
{ COM3, { '#', ';', ')' }, QUAL(COM2), },
192
{ COM3, { '*' }, COM3, },
193
{ COM3, { '/' }, S_COMMENT, },
194
195
/* saw / in / * comment, possible malformed nest */
196
{ COM4, { C_XXX }, COM2, },
197
{ COM4, { '*', '\n', C_EOF }, S_COMMENT, },
198
{ COM4, { '/' }, COM4, },
199
200
/* saw / /, start of comment */
201
{ COM5, { C_XXX }, COM5, },
202
{ COM5, { '\n', C_EOF }, S_COMMENT, },
203
{ COM5, { '/' }, COM6, },
204
{ COM5, { '*' }, COM7, },
205
206
/* saw / in / / comment, possible malformed nest */
207
{ COM6, { C_XXX }, COM5, },
208
{ COM6, { '*', '\n', C_EOF }, S_COMMENT, },
209
{ COM6, { '/' }, COM6, },
210
211
/* saw * in / /, possible malformed nest */
212
{ COM7, { C_XXX }, COM5, },
213
{ COM7, { '\n', C_EOF }, S_COMMENT, },
214
{ COM7, { '*' }, COM7, },
215
{ COM7, { '/' }, S_COMMENT, },
216
217
/* normal identifier -- always a macro candidate */
218
{ NID, { C_XXX }, S_MACRO, },
219
{ NID, { C_LET, C_DEC }, NID, },
220
221
/* saw ., operator or dbl constant */
222
{ DOT, { C_XXX }, S_CHRB, },
223
{ DOT, { '.' }, DOT2, },
224
{ DOT, { C_DEC }, BAD1, },
225
226
/* saw .., possible ... */
227
{ DOT2, { C_XXX }, BACK(T_INVALID), },
228
{ DOT2, { '.' }, KEEP(T_VARIADIC), },
229
230
/* saw L (possible start of normal wide literal) */
231
{ LIT, { C_XXX }, S_MACRO, },
232
{ LIT, { C_LET, C_DEC }, NID, },
233
{ LIT, { '"', '\'' }, QUAL(LIT1), },
234
235
/* saw " or ' beginning literal */
236
{ LIT1, { C_XXX }, LIT1, },
237
{ LIT1, { '"', '\'' }, S_LITEND, },
238
{ LIT1, { '\n', C_EOF }, S_LITEND, },
239
{ LIT1, { '\\' }, LIT2, },
240
241
/* saw \ in literal */
242
{ LIT2, { C_XXX }, S_LITESC, },
243
{ LIT2, { '\n', C_EOF }, S_LITEND, },
244
245
/* eat malformed numeric constant */
246
{ BAD1, { C_XXX }, BACK(T_INVALID), },
247
{ BAD1, { C_LET, C_DEC, '.' }, BAD1, },
248
{ BAD1, { 'e', 'E' }, BAD2, },
249
250
/* eat malformed numeric fraction|exponent */
251
{ BAD2, { C_XXX }, BACK(T_INVALID), },
252
{ BAD2, { C_LET, C_DEC, '.' }, BAD1, },
253
{ BAD2, { '+', '-' }, BAD1, },
254
255
/* saw white space, eat it up */
256
{ WS1, { C_XXX }, S_WS, },
257
{ WS1, { ' ', '\t' }, WS1, },
258
{ WS1, { '\f', '\v' }, S_VS, },
259
260
#if !PROTOMAIN
261
262
/* quick template */
263
{ QUICK, { C_XXX }, QTOK, },
264
{ QUICK, { C_EOF, MARK }, S_CHRB, },
265
{ QUICK, { C_LET, C_DEC }, QID, },
266
{ QUICK, { 'L' }, LIT0, },
267
{ QUICK, { '"', '\'' }, S_LITBEG, },
268
{ QUICK, { '/' }, S_CHRB, },
269
{ QUICK, { '*' }, QCOM, },
270
{ QUICK, { '#' }, SHARP1, },
271
{ QUICK, { '\n' }, S_NL, },
272
{ QUICK, { '\f', '\v' }, S_VS, },
273
274
/* copy QUICK to QUICK+1 through MAC0+1 */
275
{ OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
276
277
/* quick start state */
278
{ QUICK, { C_EOF }, S_EOF, },
279
{ QUICK, { C_DEC }, QNUM, },
280
{ QUICK, { MARK }, QTOK, },
281
{ QUICK, { '/' }, COM1, },
282
{ QUICK, { ' ', '\t' }, QUICK, },
283
284
/* grab non-macro tokens */
285
{ QTOK, { C_DEC }, QNUM, },
286
287
/* grab numeric and invalid tokens */
288
{ QNUM, { C_LET, C_DEC, '.' }, QNUM, },
289
{ QNUM, { 'e', 'E' }, QEXP, },
290
291
/* grab exponent token */
292
{ QEXP, { C_LET, C_DEC, '.' }, QNUM, },
293
{ QEXP, { '+', '-' }, QNUM, },
294
295
/* saw *, grab possible bad comment terminator */
296
{ QCOM, { C_DEC }, QNUM, },
297
{ QCOM, { '/' }, S_COMMENT, },
298
299
/* saw L (possible start of wide string or first macro char) */
300
{ MAC0, { 'L' }, QID, },
301
{ MAC0, { '"', '\'' }, QUAL(LIT1), },
302
303
/* macro candidate template */
304
{ MAC0+1, { 'L' }, QID, },
305
306
/* copy MAC0+1 to MAC0+2 through MACN */
307
{ OP, {MAC0+1,MAC0+2,MACN}, COPY },
308
309
/* saw L (possible start of wide string or macro L) */
310
{ HIT0, { C_XXX }, S_MACRO, },
311
{ HIT0, { C_LET, C_DEC }, QID, },
312
{ HIT0, { '"', '\'' }, QUAL(LIT1), },
313
314
/* macro hit template */
315
{ HIT0+1, { C_XXX }, S_MACRO, },
316
{ HIT0+1, { C_LET, C_DEC }, QID, },
317
318
/* copy HIT0+1 to HIT0+2 through HITN */
319
{ OP, {HIT0+1,HIT0+2,HITN}, COPY },
320
321
/* saw L (possible start of wide literal) */
322
{ LIT0, { C_XXX }, S_MACRO, },
323
{ LIT0, { C_LET, C_DEC }, QID, },
324
{ LIT0, { '"', '\'' }, QUAL(LIT1), },
325
326
/* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
327
{ COM1, { '=' }, KEEP(T_DIVEQ), },
328
329
/* normal start state */
330
{ TOKEN, { C_XXX }, S_HUH, },
331
{ TOKEN, { C_EOF }, S_EOF, },
332
{ TOKEN, { C_DEC }, DEC1, },
333
{ TOKEN, { '0' }, OCT1, },
334
{ TOKEN, { '.' }, DOT1, },
335
{ TOKEN, { C_LET }, NID, },
336
{ TOKEN, { 'L' }, LIT, },
337
{ TOKEN, { '"', '\'', '<' }, S_LITBEG, },
338
{ TOKEN, { '/' }, COM1, },
339
{ TOKEN, { '\n' }, S_NL, },
340
{ TOKEN, { ' ', '\t' }, WS1, },
341
{ TOKEN, { '\f', '\v' }, S_VS, },
342
{ TOKEN, { '#' }, SHARP1, },
343
{ TOKEN, { ':' }, COLON1, },
344
{ TOKEN, { '%' }, PCT1, },
345
{ TOKEN, { '&' }, AND1, },
346
{ TOKEN, { '*' }, STAR1, },
347
{ TOKEN, { '+' }, PLUS1, },
348
{ TOKEN, { '-' }, MINUS1, },
349
{ TOKEN, { '=' }, EQ1, },
350
{ TOKEN, { '!' }, NOT1, },
351
{ TOKEN, { '>' }, GT1, },
352
{ TOKEN, { '^' }, CIRC1, },
353
{ TOKEN, { '|' }, OR1, },
354
{ TOKEN, { '(', ')', '[', ']' }, S_CHR, },
355
{ TOKEN, { '{', '}', ',', ';' }, S_CHR, },
356
{ TOKEN, { '~', '?' }, S_CHR, },
357
358
/* saw 0, possible oct|hex|dec|dbl constant */
359
{ OCT1, { C_XXX }, BACK(T_DECIMAL), },
360
{ OCT1, { C_LET, C_DEC }, BAD1, },
361
{ OCT1, { C_OCT }, OCT2, },
362
{ OCT1, { 'e', 'E' }, DBL2, },
363
{ OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
364
{ OCT1, { 'x', 'X' }, HEX1, },
365
{ OCT1, { '.' }, DBL1, },
366
367
/* saw 0<oct>, oct constant */
368
{ OCT2, { C_XXX }, BACK(T_OCTAL), },
369
{ OCT2, { C_LET, C_DEC }, BAD1, },
370
{ OCT2, { C_OCT }, OCT2, },
371
{ OCT2, { 'e', 'E' }, DBL2, },
372
{ OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
373
{ OCT2, { '.' }, DBL1, },
374
375
/* oct constant qualifier */
376
{ OCT3, { C_XXX }, BACK(T_OCTAL), },
377
{ OCT3, { C_LET, C_DEC, '.' }, BAD1, },
378
{ OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
379
380
/* saw 0 [xX], hex constant */
381
{ HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
382
{ HEX1, { C_LET }, BAD1, },
383
{ HEX1, { C_HEX }, HEX1, },
384
{ HEX1, { 'e', 'E' }, HEX3, },
385
{ HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
386
{ HEX1, { '.' }, HEX4, },
387
{ HEX1, { 'p', 'P' }, HEX5, },
388
389
/* hex constant qualifier */
390
{ HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
391
{ HEX2, { C_LET, C_DEC, '.' }, BAD1, },
392
{ HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
393
394
/* hex [eE][-+] botch */
395
{ HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
396
{ HEX3, { C_LET, '.', '-', '+'},BAD1, },
397
{ HEX3, { C_HEX }, HEX1, },
398
{ HEX3, { 'e', 'E' }, HEX3, },
399
{ HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
400
401
/* hex dbl fraction */
402
{ HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
403
{ HEX4, { C_LET, '.' }, BAD1, },
404
{ HEX4, { C_HEX }, HEX4, },
405
{ HEX4, { 'p', 'P' }, HEX5, },
406
{ HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
407
408
/* optional hex dbl exponent sign */
409
{ HEX5, { C_XXX }, BACK(T_INVALID), },
410
{ HEX5, { C_LET, '.' }, BAD1, },
411
{ HEX5, { '+', '-' }, HEX6, },
412
{ HEX5, { C_DEC }, HEX7, },
413
414
/* mandatory hex dbl exponent first digit */
415
{ HEX6, { C_XXX }, BACK(T_INVALID), },
416
{ HEX6, { C_LET, '.' }, BAD1, },
417
{ HEX6, { C_DEC }, HEX7, },
418
419
/* hex dbl exponent digits */
420
{ HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
421
{ HEX7, { C_LET, '.' }, BAD1, },
422
{ HEX7, { C_DEC }, HEX7, },
423
{ HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
424
425
/* hex dbl constant qualifier */
426
{ HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
427
{ HEX8, { C_LET, '.' }, BAD1, },
428
{ HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
429
430
/* saw <dec>, dec constant */
431
{ DEC1, { C_XXX }, BACK(T_DECIMAL), },
432
{ DEC1, { C_LET }, BAD1, },
433
{ DEC1, { C_DEC }, DEC1, },
434
{ DEC1, { 'e', 'E' }, DBL2, },
435
{ DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
436
{ DEC1, { '.' }, DBL1, },
437
438
/* dec constant qualifier */
439
{ DEC2, { C_XXX }, BACK(T_DECIMAL), },
440
{ DEC2, { C_LET, C_DEC }, BAD1, },
441
{ DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
442
443
/* saw ., operator or dbl constant */
444
{ DOT1, { C_XXX }, S_CHRB, },
445
{ DOT1, { '.' }, DOT2, },
446
{ DOT1, { C_DEC }, DBL1, },
447
448
/* dbl fraction */
449
{ DBL1, { C_XXX }, BACK(T_DOUBLE), },
450
{ DBL1, { C_LET, '.' }, BAD1, },
451
{ DBL1, { C_DEC }, DBL1, },
452
{ DBL1, { 'e', 'E' }, DBL2, },
453
{ DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
454
455
/* optional dbl exponent sign */
456
{ DBL2, { C_XXX }, BACK(T_INVALID), },
457
{ DBL2, { C_LET, '.' }, BAD1, },
458
{ DBL2, { '+', '-' }, DBL3, },
459
{ DBL2, { C_DEC }, DBL4, },
460
461
/* mandatory dbl exponent first digit */
462
{ DBL3, { C_XXX }, BACK(T_INVALID), },
463
{ DBL3, { C_LET, '.' }, BAD1, },
464
{ DBL3, { C_DEC }, DBL4, },
465
466
/* dbl exponent digits */
467
{ DBL4, { C_XXX }, BACK(T_DOUBLE), },
468
{ DBL4, { C_LET, '.' }, BAD1, },
469
{ DBL4, { C_DEC }, DBL4, },
470
{ DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
471
472
/* dbl constant qualifier */
473
{ DBL5, { C_XXX }, BACK(T_DOUBLE), },
474
{ DBL5, { C_LET, '.' }, BAD1, },
475
{ DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
476
477
/* saw < starting include header */
478
{ HDR1, { C_XXX }, HDR1, },
479
{ HDR1, { '>', '\n', C_EOF }, S_LITEND, },
480
481
/* saw <binop><space> expecting = */
482
{ BIN1, { C_XXX }, S_HUH, },
483
{ BIN1, { ' ', '\t' }, BIN1, },
484
485
/* 2-char ops */
486
487
{ SHARP1, { C_XXX }, S_SHARP, },
488
489
{ PCT1, { C_XXX }, S_CHRB, },
490
{ PCT1, { '=' }, KEEP(T_MODEQ), },
491
492
{ AND1, { C_XXX }, S_CHRB, },
493
{ AND1, { '=' }, KEEP(T_ANDEQ), },
494
{ AND1, { '&' }, KEEP(T_ANDAND), },
495
496
{ STAR1, { C_XXX }, S_CHRB, },
497
{ STAR1, { '=' }, KEEP(T_MPYEQ), },
498
{ STAR1, { '/' }, S_COMMENT, },
499
500
{ PLUS1, { C_XXX }, S_CHRB, },
501
{ PLUS1, { '=' }, KEEP(T_ADDEQ), },
502
{ PLUS1, { '+' }, KEEP(T_ADDADD), },
503
504
{ MINUS1, { C_XXX }, S_CHRB, },
505
{ MINUS1, { '=' }, KEEP(T_SUBEQ), },
506
{ MINUS1, { '-' }, KEEP(T_SUBSUB), },
507
{ MINUS1, { '>' }, KEEP(T_PTRMEM), },
508
509
{ COLON1, { C_XXX }, S_CHRB, },
510
{ COLON1, { '=', '>' }, S_HUH, },
511
512
{ LT1, { C_XXX }, S_CHRB, },
513
{ LT1, { '=' }, KEEP(T_LE), },
514
{ LT1, { '<' }, LSH1, },
515
516
{ EQ1, { C_XXX }, S_CHRB, },
517
{ EQ1, { '=' }, KEEP(T_EQ), },
518
519
{ NOT1, { C_XXX }, S_CHRB, },
520
{ NOT1, { '=' }, KEEP(T_NE), },
521
522
{ GT1, { C_XXX }, S_CHRB, },
523
{ GT1, { '=' }, KEEP(T_GE), },
524
{ GT1, { '>' }, RSH1, },
525
526
{ CIRC1, { C_XXX }, S_CHRB, },
527
{ CIRC1, { '=' }, KEEP(T_XOREQ), },
528
529
{ OR1, { C_XXX }, S_CHRB, },
530
{ OR1, { '=' }, KEEP(T_OREQ), },
531
{ OR1, { '|' }, KEEP(T_OROR), },
532
533
/* 3-char ops */
534
535
{ ARROW1, { C_XXX }, BACK(T_PTRMEM), },
536
{ ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
537
538
{ LSH1, { C_XXX }, BACK(T_LSHIFT), },
539
{ LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
540
541
{ RSH1, { C_XXX }, BACK(T_RSHIFT), },
542
{ RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
543
544
#endif
545
546
/* end */
547
{ OP, { 0 }, END, }
548
};
549
550
short fsm[TERMINAL+1][MAX+1];
551
552
char trigraph[MAX+1];
553
554
#if PROTOMAIN
555
static char spl[] = { '\\', '\r', 0 };
556
static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
557
#else
558
static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
559
static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
560
#endif
561
static char* let = &aln[10];
562
static char hex[] = "fedcbaFEDCBA9876543210";
563
static char* dec = &hex[12];
564
static char* oct = &hex[14];
565
566
/*
567
* runtime FSM modifications
568
* ppfsm(FSM_INIT,0) must be called first
569
*/
570
571
void
572
ppfsm(int op, register char* s)
573
{
574
register int c;
575
register int n;
576
register int i;
577
register short* rp;
578
register struct fsminit* fp;
579
#if !PROTOMAIN
580
char* t;
581
int x;
582
#endif
583
584
switch (op)
585
{
586
587
#if !PROTOMAIN
588
589
case FSM_IDADD:
590
while (c = *s++)
591
if (!ppisid(c))
592
{
593
if (fsm[TOKEN][c] == ~S_HUH)
594
{
595
setid(c);
596
for (i = 0; i < TERMINAL; i++)
597
fsm[i][c] = IDSTATE(fsm[i]['_']);
598
}
599
else error(2, "%c: cannot add to identifier set", c);
600
}
601
break;
602
603
case FSM_IDDEL:
604
while (c = *s++)
605
if (ppisid(c))
606
{
607
clrid(c);
608
for (i = 0; i < TERMINAL; i++)
609
fsm[i][c] = ~S_HUH;
610
}
611
break;
612
613
#endif
614
615
case FSM_INIT:
616
for (fp = fsminit;; fp++)
617
{
618
if ((n = fp->nextstate) >= TERMINAL) n = ~n;
619
if (fp->state == OP)
620
{
621
#if !PROTOMAIN
622
switch (n)
623
{
624
case COPY:
625
c = fp->ch[0];
626
n = fp->ch[2];
627
for (i = fp->ch[1]; i <= n; i++)
628
copy(i, c);
629
continue;
630
default:
631
break;
632
}
633
#endif
634
break;
635
}
636
rp = fsm[fp->state];
637
for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
638
{
639
switch (c)
640
{
641
case C_XXX:
642
for (c = 0; c <= MAX; c++)
643
rp[c] = n;
644
/*FALLTHROUGH*/
645
646
case C_EOF:
647
fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
648
continue;
649
650
case C_LET:
651
s = let;
652
break;
653
654
case C_HEX:
655
s = hex;
656
break;
657
658
case C_DEC:
659
s = dec;
660
break;
661
662
case C_OCT:
663
s = oct;
664
break;
665
666
default:
667
rp[c] = n;
668
continue;
669
}
670
while (c = *s++)
671
rp[c] = n;
672
}
673
}
674
675
/*
676
* install splice special cases
677
* and same non-terminal transitions
678
*/
679
680
for (i = 0; i < TERMINAL; i++)
681
{
682
rp = fsm[i];
683
s = spl;
684
while (c = *s++)
685
if (c != MARK || !INCOMMENT(rp))
686
{
687
if (rp[c] >= 0) rp[c] = ~rp[c];
688
rp[c] &= ~SPLICE;
689
}
690
rp[EOB] = ~S_EOB;
691
for (c = 0; c <= MAX; c++)
692
if (rp[c] == i)
693
rp[c] = 0;
694
}
695
fsm[TERMINAL][0] = ~S_EOB;
696
697
#if !PROTOMAIN
698
699
/*
700
* default character types
701
*/
702
703
s = let;
704
while (c = *s++)
705
setid(c);
706
s = dec;
707
while (c = *s++)
708
setdig(c);
709
s = spl;
710
do setsplice(c = *s++); while (c);
711
712
/*
713
* trigraph map
714
*/
715
716
trigraph['='] = '#';
717
trigraph['('] = '[';
718
trigraph['/'] = '\\';
719
trigraph[')'] = ']';
720
trigraph['\''] = '^';
721
trigraph['<'] = '{';
722
trigraph['!'] = '|';
723
trigraph['>'] = '}';
724
trigraph['-'] = '~';
725
#endif
726
break;
727
728
#if !PROTOMAIN
729
730
case FSM_PLUSPLUS:
731
if (pp.option & PLUSPLUS)
732
{
733
fsm[COLON1][':'] = ~KEEP(T_SCOPE);
734
fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
735
fsm[MINUS1]['>'] = ARROW1;
736
fsm[COM1]['/'] = COM5;
737
t = "%<:";
738
for (i = 0; i < TERMINAL; i++)
739
{
740
rp = fsm[i];
741
if (!INCOMMENT(rp) && !INQUOTE(rp))
742
{
743
s = t;
744
while (c = *s++)
745
{
746
if (rp[c] > 0) rp[c] = ~rp[c];
747
else if (!rp[c]) rp[c] = ~i;
748
rp[c] &= ~SPLICE;
749
}
750
}
751
}
752
s = t;
753
while (c = *s++) setsplice(c);
754
}
755
else
756
{
757
fsm[COLON1][':'] = ~S_CHRB;
758
fsm[DOT1]['*'] = ~S_CHRB;
759
fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
760
fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
761
}
762
break;
763
764
#if COMPATIBLE
765
766
case FSM_COMPATIBILITY:
767
if (pp.state & COMPATIBILITY)
768
{
769
fsm[HEX1]['e'] = HEX1;
770
fsm[HEX1]['E'] = HEX1;
771
fsm[QNUM]['e'] = QNUM;
772
fsm[QNUM]['E'] = QNUM;
773
fsm[QNUM]['u'] = ~QUAL(QNUM);
774
fsm[QNUM]['U'] = ~QUAL(QNUM);
775
}
776
else
777
{
778
fsm[HEX1]['e'] = HEX3;
779
fsm[HEX1]['E'] = HEX3;
780
fsm[QNUM]['e'] = QEXP;
781
fsm[QNUM]['E'] = QEXP;
782
fsm[QNUM]['u'] = QNUM;
783
fsm[QNUM]['U'] = QNUM;
784
}
785
break;
786
787
#endif
788
789
case FSM_QUOTADD:
790
while (c = *s++)
791
if (fsm[TOKEN][c] == ~S_HUH)
792
for (i = 0; i < TERMINAL; i++)
793
fsm[i][c] = fsm[i]['"'];
794
else error(2, "%c: cannot add to quote set", c);
795
break;
796
797
case FSM_QUOTDEL:
798
while (c = *s++)
799
if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
800
for (i = 0; i < TERMINAL; i++)
801
fsm[i][c] = fsm[i]['_'];
802
break;
803
804
case FSM_OPSPACE:
805
n = s ? BIN1 : ~S_CHRB;
806
fsm[COM1][' '] = fsm[COM1]['\t'] = n;
807
fsm[AND1][' '] = fsm[AND1]['\t'] = n;
808
fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
809
fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
810
fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
811
fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
812
fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
813
fsm[OR1][' '] = fsm[OR1]['\t'] = n;
814
fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
815
fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
816
break;
817
818
case FSM_MACRO:
819
if (pp.truncate && strlen(s) >= pp.truncate)
820
{
821
x = s[pp.truncate];
822
s[pp.truncate] = 0;
823
}
824
else x = -1;
825
i = MAC0 + ((c = *s++) != 'L');
826
if ((n = fsm[QUICK][c]) != (i + NMAC))
827
{
828
n = i;
829
if (!*s) n += NMAC;
830
}
831
if (fsm[QUICK][c] != n)
832
fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
833
if (c = *s++)
834
{
835
for (;;)
836
{
837
if ((i = n) < HIT0)
838
{
839
if (n < MACN) n++;
840
if (!*s)
841
{
842
n += NMAC;
843
break;
844
}
845
if (fsm[i][c] < HIT0)
846
fsm[i][c] = n;
847
if (fsm[i + NMAC][c] < HIT0)
848
fsm[i + NMAC][c] = n;
849
}
850
else
851
{
852
if (n < HITN) n++;
853
if (!*s) break;
854
if (fsm[i][c] < HIT0)
855
{
856
n -= NMAC;
857
fsm[i][c] = n;
858
}
859
}
860
c = *s++;
861
}
862
if (x >= 0)
863
{
864
*s = x;
865
for (n = CHAR_MIN; n <= CHAR_MAX; n++)
866
if (ppisidig(n))
867
fsm[HITN][n] = HITN;
868
n = HITN;
869
}
870
if (fsm[i][c] < n)
871
fsm[i][c] = n;
872
if (i < HIT0 && fsm[i + NMAC][c] < n)
873
fsm[i + NMAC][c] = n;
874
}
875
break;
876
877
#endif
878
879
}
880
}
881
882
#if !PROTOMAIN
883
884
/*
885
* file buffer refill
886
* c is current input char
887
*/
888
889
void
890
refill(register int c)
891
{
892
if (pp.in->flags & IN_eof)
893
{
894
pp.in->nextchr--;
895
c = 0;
896
}
897
else
898
{
899
*((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
900
c =
901
#if PROTOTYPE
902
(pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
903
#endif
904
read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
905
}
906
if (c > 0)
907
{
908
if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
909
else pp.in->flags &= ~IN_newline;
910
#if PROTOTYPE
911
if (!(pp.in->flags & IN_prototype))
912
#endif
913
if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
914
{
915
pp.in->flags |= IN_eof;
916
close(pp.in->fd);
917
pp.in->fd = -1;
918
}
919
}
920
else
921
{
922
if (c < 0)
923
{
924
error(ERROR_SYSTEM|3, "read error");
925
c = 0;
926
}
927
else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
928
{
929
static char ket[] = { 0, '}', '\n', 0 };
930
931
pp.in->flags ^= IN_c;
932
pp.in->nextchr = ket + 1;
933
c = 2;
934
}
935
pp.in->flags |= IN_eof;
936
}
937
#if CHECKPOINT
938
pp.in->buflen = c;
939
#endif
940
pp.in->nextchr[c] = 0;
941
debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
942
if (pp.test & 0x0080)
943
sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
944
}
945
946
#endif
947
948