Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/tools/wmc/mcl.c
4389 views
1
/*
2
* Wine Message Compiler lexical scanner
3
*
4
* Copyright 2000 Bertho A. Stultiens (BS)
5
*
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19
*/
20
21
#include "config.h"
22
23
#include <stdio.h>
24
#include <stdlib.h>
25
#include <ctype.h>
26
#include <assert.h>
27
#include <string.h>
28
29
#include "wmc.h"
30
#include "utils.h"
31
#include "lang.h"
32
33
#include "mcy.tab.h"
34
35
/*
36
* Keywords are case insensitive. All normal input is treated as
37
* being in codepage iso-8859-1 for ascii input files (unicode
38
* page 0) and as equivalent unicode if unicode input is selected.
39
* All normal input, which is not part of a message text, is
40
* enforced to be unicode page 0. Otherwise an error will be
41
* generated. The normal file data should only be ASCII because
42
* that is the basic definition of the grammar.
43
*
44
* Byteorder or unicode input is determined automatically by
45
* reading the first 8 bytes and checking them against unicode
46
* page 0 byteorder (hibyte must be 0).
47
* -- FIXME --
48
* Alternatively, the input is checked against a special byte
49
* sequence to identify the file.
50
* -- FIXME --
51
*
52
*
53
* Keywords:
54
* Codepages
55
* Facility
56
* FacilityNames
57
* LanguageNames
58
* MessageId
59
* MessageIdTypedef
60
* Severity
61
* SeverityNames
62
* SymbolicName
63
*
64
* Default added identifiers for classes:
65
* SeverityNames:
66
* Success = 0x0
67
* Informational = 0x1
68
* Warning = 0x2
69
* Error = 0x3
70
* FacilityNames:
71
* System = 0x0FF
72
* Application = 0xFFF
73
*
74
* The 'Codepages' keyword is a wmc extension.
75
*/
76
77
static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
78
static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
79
static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
80
static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
81
static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
82
static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
83
static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
84
static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
85
static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
86
static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
87
static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
88
static const WCHAR ustr_dxgi[] = { 'D', 'x', 'g', 'i', 0 };
89
static const WCHAR ustr_null[] = { 'N', 'u', 'l', 'l', 0 };
90
static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
91
static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
92
static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
93
static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
94
static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
95
static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
96
static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
97
static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
98
/*
99
* This table is to beat any form of "expression building" to check for
100
* correct filename characters. It is also used for ident checks.
101
* FIXME: use it more consistently.
102
*/
103
104
#define CH_SHORTNAME 0x01
105
#define CH_LONGNAME 0x02
106
#define CH_IDENT 0x04
107
#define CH_NUMBER 0x08
108
/*#define CH_WILDCARD 0x10*/
109
/*#define CH_DOT 0x20*/
110
#define CH_PUNCT 0x40
111
#define CH_INVALID 0x80
112
113
static const char char_table[256] = {
114
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
115
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
116
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
117
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
118
0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
119
0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
120
0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
121
0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
122
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
123
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
124
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
125
0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
126
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
127
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
128
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
129
0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
130
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
131
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
132
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
133
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
134
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
135
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
136
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
137
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
138
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
139
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
140
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
141
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
142
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
143
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
144
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
145
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
146
};
147
148
static int isisochar(int ch)
149
{
150
return !(ch & (~0xff));
151
}
152
153
static int codepage;
154
155
void set_codepage(int cp)
156
{
157
codepage = cp;
158
}
159
160
/*
161
* Input functions
162
*/
163
#define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
164
165
static int nungetstack = 0;
166
static int allocungetstack = 0;
167
static char *ungetstack = NULL;
168
static int ninputbuffer = 0;
169
static WCHAR inputbuffer[INPUTBUFFER_SIZE];
170
171
/*
172
* Fill the input buffer with *one* line of input.
173
* The line is '\n' terminated so that scanning
174
* messages with translation works as expected
175
* (otherwise we cannot pre-translate because the
176
* language is first known one line before the
177
* actual message).
178
*/
179
static int fill_inputbuffer(void)
180
{
181
static enum input_mode { INPUT_UNKNOWN, INPUT_ASCII, INPUT_UTF8, INPUT_UNICODE } mode;
182
static int swapped;
183
static unsigned char utf8_bom[3] = { 0xef, 0xbb, 0xbf };
184
WCHAR *wbuf;
185
int i, pos = 0, len = 0;
186
char buffer[INPUTBUFFER_SIZE];
187
188
if (mode == INPUT_UNKNOWN)
189
{
190
len = fread( buffer, 1, 8, yyin );
191
wbuf = (WCHAR *)buffer;
192
if (len >= 3 && !memcmp( buffer, utf8_bom, 3 ))
193
{
194
mode = INPUT_UTF8;
195
memmove( buffer, buffer + 3, len - 3 );
196
len -= 3;
197
}
198
else if (len == 8)
199
{
200
if (wbuf[0] == 0xfeff || wbuf[0] == 0xfffe)
201
{
202
mode = INPUT_UNICODE;
203
pos = 1;
204
swapped = (wbuf[0] == 0xfffe);
205
}
206
else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0xff00))
207
{
208
mode = INPUT_UNICODE;
209
}
210
else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0x00ff))
211
{
212
mode = INPUT_UNICODE;
213
swapped = 1;
214
}
215
}
216
217
if (mode == INPUT_UNICODE)
218
{
219
len = 4 - pos;
220
memcpy( inputbuffer, wbuf + pos, len * sizeof(WCHAR) );
221
}
222
else if (mode == INPUT_UNKNOWN) mode = unicodein ? INPUT_UTF8 : INPUT_ASCII;
223
}
224
225
switch (mode)
226
{
227
case INPUT_ASCII:
228
if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
229
wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer );
230
memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
231
free( wbuf );
232
return 1;
233
case INPUT_UTF8:
234
if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
235
wbuf = utf8_to_unicode( buffer, strlen(buffer), &ninputbuffer );
236
memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
237
free( wbuf );
238
return 1;
239
case INPUT_UNICODE:
240
len += fread( inputbuffer + len, sizeof(WCHAR), INPUTBUFFER_SIZE - len, yyin );
241
if (!len) break;
242
if (swapped) for (i = 0; i < len; i++) inputbuffer[i] = (inputbuffer[i] << 8) | (inputbuffer[i] >> 8);
243
ninputbuffer = len;
244
return 1;
245
case INPUT_UNKNOWN:
246
break;
247
}
248
if (ferror(yyin)) xyyerror( "Fatal: reading input failed\n" );
249
return 0;
250
}
251
252
static int get_unichar(void)
253
{
254
static WCHAR *b = NULL;
255
char_number++;
256
257
if(nungetstack)
258
return ungetstack[--nungetstack];
259
260
if(!ninputbuffer)
261
{
262
if(!fill_inputbuffer())
263
return EOF;
264
b = inputbuffer;
265
}
266
267
ninputbuffer--;
268
return *b++;
269
}
270
271
static void unget_unichar(int ch)
272
{
273
if(ch == EOF)
274
return;
275
276
char_number--;
277
278
if(nungetstack == allocungetstack)
279
{
280
allocungetstack += 32;
281
ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
282
}
283
284
ungetstack[nungetstack++] = (WCHAR)ch;
285
}
286
287
288
/*
289
* Normal character stack.
290
* Used for number scanning.
291
*/
292
static int ncharstack = 0;
293
static int alloccharstack = 0;
294
static char *charstack = NULL;
295
296
static void empty_char_stack(void)
297
{
298
ncharstack = 0;
299
}
300
301
static void push_char(int ch)
302
{
303
if(ncharstack == alloccharstack)
304
{
305
alloccharstack += 32;
306
charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
307
}
308
charstack[ncharstack++] = (char)ch;
309
}
310
311
static int tos_char_stack(void)
312
{
313
if(!ncharstack)
314
return 0;
315
else
316
return (int)(charstack[ncharstack-1] & 0xff);
317
}
318
319
static char *get_char_stack(void)
320
{
321
return charstack;
322
}
323
324
/*
325
* Unicode character stack.
326
* Used for general scanner.
327
*/
328
static int nunicharstack = 0;
329
static int allocunicharstack = 0;
330
static WCHAR *unicharstack = NULL;
331
332
static void empty_unichar_stack(void)
333
{
334
nunicharstack = 0;
335
}
336
337
static void push_unichar(int ch)
338
{
339
if(nunicharstack == allocunicharstack)
340
{
341
allocunicharstack += 128;
342
unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
343
}
344
unicharstack[nunicharstack++] = (WCHAR)ch;
345
}
346
347
#if 0
348
static int tos_unichar_stack(void)
349
{
350
if(!nunicharstack)
351
return 0;
352
else
353
return (int)(unicharstack[nunicharstack-1] & 0xffff);
354
}
355
#endif
356
357
static WCHAR *get_unichar_stack(void)
358
{
359
return unicharstack;
360
}
361
362
/*
363
* Number scanner
364
*
365
* state | ch | next state
366
* ------+-----------------+--------------------------
367
* 0 | [0] | 1
368
* 0 | [1-9] | 4
369
* 0 | . | error (should never occur)
370
* 1 | [xX] | 2
371
* 1 | [0-7] | 3
372
* 1 | [89a-wyzA-WYZ_] | error invalid digit
373
* 1 | . | return 0
374
* 2 | [0-9a-fA-F] | 2
375
* 2 | [g-zG-Z_] | error invalid hex digit
376
* 2 | . | return (hex-number) if TOS != [xX] else error
377
* 3 | [0-7] | 3
378
* 3 | [89a-zA-Z_] | error invalid octal digit
379
* 3 | . | return (octal-number)
380
* 4 | [0-9] | 4
381
* 4 | [a-zA-Z_] | error invalid decimal digit
382
* 4 | . | return (decimal-number)
383
*
384
* All non-identifier characters [^a-zA-Z_0-9] terminate the scan
385
* and return the value. This is not entirely correct, but close
386
* enough (should check punctuators as trailing context, but the
387
* char_table is not adapted to that and it is questionable whether
388
* it is worth the trouble).
389
* All non-iso-8859-1 characters are an error.
390
*/
391
static int scan_number(int ch)
392
{
393
int state = 0;
394
int base = 10;
395
empty_char_stack();
396
397
while(1)
398
{
399
if(!isisochar(ch))
400
xyyerror("Invalid digit\n");
401
402
switch(state)
403
{
404
case 0:
405
if(isdigit(ch))
406
{
407
push_char(ch);
408
if(ch == '0')
409
state = 1;
410
else
411
state = 4;
412
}
413
else
414
internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
415
break;
416
case 1:
417
if(ch == 'x' || ch == 'X')
418
{
419
push_char(ch);
420
state = 2;
421
}
422
else if(ch >= '0' && ch <= '7')
423
{
424
push_char(ch);
425
state = 3;
426
}
427
else if(isalpha(ch) || ch == '_')
428
xyyerror("Invalid number digit\n");
429
else
430
{
431
unget_unichar(ch);
432
mcy_lval.num = 0;
433
return tNUMBER;
434
}
435
break;
436
case 2:
437
if(isxdigit(ch))
438
push_char(ch);
439
else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
440
xyyerror("Invalid hex digit\n");
441
else
442
{
443
base = 16;
444
goto finish;
445
}
446
break;
447
case 3:
448
if(ch >= '0' && ch <= '7')
449
push_char(ch);
450
else if(isalnum(ch) || ch == '_')
451
xyyerror("Invalid octal digit\n");
452
else
453
{
454
base = 8;
455
goto finish;
456
}
457
break;
458
case 4:
459
if(isdigit(ch))
460
push_char(ch);
461
else if(isalnum(ch) || ch == '_')
462
xyyerror("Invalid decimal digit\n");
463
else
464
{
465
base = 10;
466
goto finish;
467
}
468
break;
469
default:
470
internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
471
}
472
ch = get_unichar();
473
}
474
finish:
475
unget_unichar(ch);
476
push_char(0);
477
mcy_lval.num = strtoul(get_char_stack(), NULL, base);
478
return tNUMBER;
479
}
480
481
static void newline(void)
482
{
483
line_number++;
484
char_number = 1;
485
}
486
487
static int unisort(const void *p1, const void *p2)
488
{
489
return unistricmp(((const struct token *)p1)->name, ((const struct token *)p2)->name);
490
}
491
492
static struct token *tokentable = NULL;
493
static int ntokentable = 0;
494
495
struct token *lookup_token(const WCHAR *s)
496
{
497
struct token tok;
498
499
tok.name = s;
500
return (struct token *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
501
}
502
503
void add_token(enum tok_enum type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
504
{
505
ntokentable++;
506
tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
507
tokentable[ntokentable-1].type = type;
508
tokentable[ntokentable-1].name = name;
509
tokentable[ntokentable-1].token = tok;
510
tokentable[ntokentable-1].codepage = cp;
511
tokentable[ntokentable-1].alias = alias;
512
tokentable[ntokentable-1].fixed = fix;
513
qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
514
}
515
516
void get_tokentable(struct token **tab, int *len)
517
{
518
assert(tab != NULL);
519
assert(len != NULL);
520
*tab = tokentable;
521
*len = ntokentable;
522
}
523
524
/*
525
* The scanner
526
*
527
*/
528
int mcy_lex(void)
529
{
530
static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
531
static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
532
static int isinit = 0;
533
int ch;
534
535
if(!isinit)
536
{
537
isinit++;
538
set_codepage(WMC_DEFAULT_CODEPAGE);
539
add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
540
add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
541
add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
542
add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
543
add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
544
add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
545
add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
546
add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
547
add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
548
add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
549
add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
550
add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
551
add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
552
add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
553
add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
554
add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
555
add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
556
add_token(tok_facility, ustr_dxgi, 0x87a, 0, NULL, 0);
557
add_token(tok_facility, ustr_null, 0x000, 0, NULL, 0);
558
add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
559
}
560
561
empty_unichar_stack();
562
563
while(1)
564
{
565
if(want_line)
566
{
567
while((ch = get_unichar()) != '\n')
568
{
569
if(ch == EOF)
570
xyyerror("Unexpected EOF\n");
571
push_unichar(ch);
572
}
573
newline();
574
push_unichar(ch);
575
push_unichar(0);
576
if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
577
{
578
want_line = 0;
579
/* Reset the codepage to our default after each message */
580
set_codepage(WMC_DEFAULT_CODEPAGE);
581
return tMSGEND;
582
}
583
mcy_lval.str = xunistrdup(get_unichar_stack());
584
return tLINE;
585
}
586
587
ch = get_unichar();
588
589
if(ch == EOF)
590
return EOF;
591
592
if(ch == '\n')
593
{
594
newline();
595
if(want_nl)
596
{
597
want_nl = 0;
598
return tNL;
599
}
600
continue;
601
}
602
603
if(isisochar(ch))
604
{
605
if(want_file)
606
{
607
int n = 0;
608
while(n < 8 && isisochar(ch))
609
{
610
int t = char_table[ch];
611
if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
612
break;
613
614
push_unichar(ch);
615
n++;
616
ch = get_unichar();
617
}
618
unget_unichar(ch);
619
push_unichar(0);
620
want_file = 0;
621
mcy_lval.str = xunistrdup(get_unichar_stack());
622
return tFILE;
623
}
624
625
if(char_table[ch] & CH_IDENT)
626
{
627
struct token *tok;
628
while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
629
{
630
push_unichar(ch);
631
ch = get_unichar();
632
}
633
unget_unichar(ch);
634
push_unichar(0);
635
if(!(tok = lookup_token(get_unichar_stack())))
636
{
637
mcy_lval.str = xunistrdup(get_unichar_stack());
638
return tIDENT;
639
}
640
switch(tok->type)
641
{
642
case tok_keyword:
643
return tok->token;
644
645
case tok_language:
646
codepage = tok->codepage;
647
/* Fall through */
648
case tok_severity:
649
case tok_facility:
650
mcy_lval.tok = tok;
651
return tTOKEN;
652
653
default:
654
internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
655
}
656
}
657
658
if(isspace(ch)) /* Ignore space */
659
continue;
660
661
if(isdigit(ch))
662
return scan_number(ch);
663
}
664
665
switch(ch)
666
{
667
case ':':
668
case '=':
669
case '+':
670
case '(':
671
case ')':
672
return ch;
673
case ';':
674
while(ch != '\n' && ch != EOF)
675
{
676
push_unichar(ch);
677
ch = get_unichar();
678
}
679
newline();
680
push_unichar(ch); /* Include the newline */
681
push_unichar(0);
682
mcy_lval.str = xunistrdup(get_unichar_stack());
683
return tCOMMENT;
684
default:
685
xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
686
}
687
}
688
}
689
690