Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/reshadefx/src/effect_lexer.cpp
4246 views
1
/*
2
* Copyright (C) 2014 Patrick Mours
3
* SPDX-License-Identifier: BSD-3-Clause
4
*/
5
6
#include "effect_lexer.hpp"
7
#include <cassert>
8
#include <string_view>
9
#include <unordered_map> // Used for static lookup tables
10
11
using namespace reshadefx;
12
13
enum token_type
14
{
15
DIGIT = '0',
16
IDENT = 'A',
17
SPACE = ' ',
18
};
19
20
// Lookup table which translates a given char to a token type
21
static const unsigned int s_type_lookup[256] = {
22
0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, SPACE,
23
'\n', SPACE, SPACE, SPACE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
24
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25
0x00, 0x00, SPACE, '!', '"', '#', '$', '%', '&', '\'',
26
'(', ')', '*', '+', ',', '-', '.', '/', DIGIT, DIGIT,
27
DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, ':', ';',
28
'<', '=', '>', '?', '@', IDENT, IDENT, IDENT, IDENT, IDENT,
29
IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
30
IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
31
IDENT, '[', '\\', ']', '^', IDENT, 0x00, IDENT, IDENT, IDENT,
32
IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
33
IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
34
IDENT, IDENT, IDENT, '{', '|', '}', '~', 0x00, 0x00, 0x00,
35
};
36
37
// Lookup tables which translate a given string literal to a token and backwards
38
static const std::unordered_map<tokenid, std::string_view> s_token_lookup = {
39
{ tokenid::end_of_file, "end of file" },
40
{ tokenid::exclaim, "!" },
41
{ tokenid::hash, "#" },
42
{ tokenid::dollar, "$" },
43
{ tokenid::percent, "%" },
44
{ tokenid::ampersand, "&" },
45
{ tokenid::parenthesis_open, "(" },
46
{ tokenid::parenthesis_close, ")" },
47
{ tokenid::star, "*" },
48
{ tokenid::plus, "+" },
49
{ tokenid::comma, "," },
50
{ tokenid::minus, "-" },
51
{ tokenid::dot, "." },
52
{ tokenid::slash, "/" },
53
{ tokenid::colon, ":" },
54
{ tokenid::semicolon, ";" },
55
{ tokenid::less, "<" },
56
{ tokenid::equal, "=" },
57
{ tokenid::greater, ">" },
58
{ tokenid::question, "?" },
59
{ tokenid::at, "@" },
60
{ tokenid::bracket_open, "[" },
61
{ tokenid::backslash, "\\" },
62
{ tokenid::bracket_close, "]" },
63
{ tokenid::caret, "^" },
64
{ tokenid::brace_open, "{" },
65
{ tokenid::pipe, "|" },
66
{ tokenid::brace_close, "}" },
67
{ tokenid::tilde, "~" },
68
{ tokenid::exclaim_equal, "!=" },
69
{ tokenid::percent_equal, "%=" },
70
{ tokenid::ampersand_ampersand, "&&" },
71
{ tokenid::ampersand_equal, "&=" },
72
{ tokenid::star_equal, "*=" },
73
{ tokenid::plus_plus, "++" },
74
{ tokenid::plus_equal, "+=" },
75
{ tokenid::minus_minus, "--" },
76
{ tokenid::minus_equal, "-=" },
77
{ tokenid::arrow, "->" },
78
{ tokenid::ellipsis, "..." },
79
{ tokenid::slash_equal, "|=" },
80
{ tokenid::colon_colon, "::" },
81
{ tokenid::less_less_equal, "<<=" },
82
{ tokenid::less_less, "<<" },
83
{ tokenid::less_equal, "<=" },
84
{ tokenid::equal_equal, "==" },
85
{ tokenid::greater_greater_equal, ">>=" },
86
{ tokenid::greater_greater, ">>" },
87
{ tokenid::greater_equal, ">=" },
88
{ tokenid::caret_equal, "^=" },
89
{ tokenid::pipe_equal, "|=" },
90
{ tokenid::pipe_pipe, "||" },
91
{ tokenid::identifier, "identifier" },
92
{ tokenid::reserved, "reserved word" },
93
{ tokenid::true_literal, "true" },
94
{ tokenid::false_literal, "false" },
95
{ tokenid::int_literal, "integral literal" },
96
{ tokenid::uint_literal, "integral literal" },
97
{ tokenid::float_literal, "floating point literal" },
98
{ tokenid::double_literal, "floating point literal" },
99
{ tokenid::string_literal, "string literal" },
100
{ tokenid::namespace_, "namespace" },
101
{ tokenid::struct_, "struct" },
102
{ tokenid::technique, "technique" },
103
{ tokenid::pass, "pass" },
104
{ tokenid::for_, "for" },
105
{ tokenid::while_, "while" },
106
{ tokenid::do_, "do" },
107
{ tokenid::if_, "if" },
108
{ tokenid::else_, "else" },
109
{ tokenid::switch_, "switch" },
110
{ tokenid::case_, "case" },
111
{ tokenid::default_, "default" },
112
{ tokenid::break_, "break" },
113
{ tokenid::continue_, "continue" },
114
{ tokenid::return_, "return" },
115
{ tokenid::discard_, "discard" },
116
{ tokenid::extern_, "extern" },
117
{ tokenid::static_, "static" },
118
{ tokenid::uniform_, "uniform" },
119
{ tokenid::volatile_, "volatile" },
120
{ tokenid::precise, "precise" },
121
{ tokenid::groupshared, "groupshared" },
122
{ tokenid::in, "in" },
123
{ tokenid::out, "out" },
124
{ tokenid::inout, "inout" },
125
{ tokenid::const_, "const" },
126
{ tokenid::linear, "linear" },
127
{ tokenid::noperspective, "noperspective" },
128
{ tokenid::centroid, "centroid" },
129
{ tokenid::nointerpolation, "nointerpolation" },
130
{ tokenid::void_, "void" },
131
{ tokenid::bool_, "bool" },
132
{ tokenid::bool2, "bool2" },
133
{ tokenid::bool3, "bool3" },
134
{ tokenid::bool4, "bool4" },
135
{ tokenid::bool2x2, "bool2x2" },
136
{ tokenid::bool2x3, "bool2x3" },
137
{ tokenid::bool2x4, "bool2x4" },
138
{ tokenid::bool3x2, "bool3x2" },
139
{ tokenid::bool3x3, "bool3x3" },
140
{ tokenid::bool3x4, "bool3x4" },
141
{ tokenid::bool4x2, "bool4x2" },
142
{ tokenid::bool4x3, "bool4x3" },
143
{ tokenid::bool4x4, "bool4x4" },
144
{ tokenid::int_, "int" },
145
{ tokenid::int2, "int2" },
146
{ tokenid::int3, "int3" },
147
{ tokenid::int4, "int4" },
148
{ tokenid::int2x2, "int2x2" },
149
{ tokenid::int2x3, "int2x3" },
150
{ tokenid::int2x4, "int2x4" },
151
{ tokenid::int3x2, "int3x2" },
152
{ tokenid::int3x3, "int3x3" },
153
{ tokenid::int3x4, "int3x4" },
154
{ tokenid::int4x2, "int4x2" },
155
{ tokenid::int4x3, "int4x3" },
156
{ tokenid::int4x4, "int4x4" },
157
{ tokenid::min16int, "min16int" },
158
{ tokenid::min16int2, "min16int2" },
159
{ tokenid::min16int3, "min16int3" },
160
{ tokenid::min16int4, "min16int4" },
161
{ tokenid::uint_, "uint" },
162
{ tokenid::uint2, "uint2" },
163
{ tokenid::uint3, "uint3" },
164
{ tokenid::uint4, "uint4" },
165
{ tokenid::uint2x2, "uint2x2" },
166
{ tokenid::uint2x3, "uint2x3" },
167
{ tokenid::uint2x4, "uint2x4" },
168
{ tokenid::uint3x2, "uint3x2" },
169
{ tokenid::uint3x3, "uint3x3" },
170
{ tokenid::uint3x4, "uint3x4" },
171
{ tokenid::uint4x2, "uint4x2" },
172
{ tokenid::uint4x3, "uint4x3" },
173
{ tokenid::uint4x4, "uint4x4" },
174
{ tokenid::min16uint, "min16uint" },
175
{ tokenid::min16uint2, "min16uint2" },
176
{ tokenid::min16uint3, "min16uint3" },
177
{ tokenid::min16uint4, "min16uint4" },
178
{ tokenid::float_, "float" },
179
{ tokenid::float2, "float2" },
180
{ tokenid::float3, "float3" },
181
{ tokenid::float4, "float4" },
182
{ tokenid::float2x2, "float2x2" },
183
{ tokenid::float2x3, "float2x3" },
184
{ tokenid::float2x4, "float2x4" },
185
{ tokenid::float3x2, "float3x2" },
186
{ tokenid::float3x3, "float3x3" },
187
{ tokenid::float3x4, "float3x4" },
188
{ tokenid::float4x2, "float4x2" },
189
{ tokenid::float4x3, "float4x3" },
190
{ tokenid::float4x4, "float4x4" },
191
{ tokenid::min16float, "min16float" },
192
{ tokenid::min16float2, "min16float2" },
193
{ tokenid::min16float3, "min16float3" },
194
{ tokenid::min16float4, "min16float4" },
195
{ tokenid::vector, "vector" },
196
{ tokenid::matrix, "matrix" },
197
{ tokenid::string_, "string" },
198
{ tokenid::texture1d, "texture1D" },
199
{ tokenid::texture2d, "texture2D" },
200
{ tokenid::texture3d, "texture3D" },
201
{ tokenid::sampler1d, "sampler1D" },
202
{ tokenid::sampler2d, "sampler2D" },
203
{ tokenid::sampler3d, "sampler3D" },
204
{ tokenid::storage1d, "storage1D" },
205
{ tokenid::storage2d, "storage2D" },
206
{ tokenid::storage3d, "storage3D" },
207
};
208
static const std::unordered_map<std::string_view, tokenid> s_keyword_lookup = {
209
{ "asm", tokenid::reserved },
210
{ "asm_fragment", tokenid::reserved },
211
{ "auto", tokenid::reserved },
212
{ "bool", tokenid::bool_ },
213
{ "bool2", tokenid::bool2 },
214
{ "bool2x1", tokenid::bool2 },
215
{ "bool2x2", tokenid::bool2x2 },
216
{ "bool2x3", tokenid::bool2x3 },
217
{ "bool2x4", tokenid::bool2x4 },
218
{ "bool3", tokenid::bool3 },
219
{ "bool3x1", tokenid::bool3 },
220
{ "bool3x2", tokenid::bool3x2 },
221
{ "bool3x3", tokenid::bool3x3 },
222
{ "bool3x4", tokenid::bool3x4 },
223
{ "bool4", tokenid::bool4 },
224
{ "bool4x1", tokenid::bool4 },
225
{ "bool4x2", tokenid::bool4x2 },
226
{ "bool4x3", tokenid::bool4x3 },
227
{ "bool4x4", tokenid::bool4x4 },
228
{ "break", tokenid::break_ },
229
{ "case", tokenid::case_ },
230
{ "cast", tokenid::reserved },
231
{ "catch", tokenid::reserved },
232
{ "centroid", tokenid::reserved },
233
{ "char", tokenid::reserved },
234
{ "class", tokenid::reserved },
235
{ "column_major", tokenid::reserved },
236
{ "compile", tokenid::reserved },
237
{ "const", tokenid::const_ },
238
{ "const_cast", tokenid::reserved },
239
{ "continue", tokenid::continue_ },
240
{ "default", tokenid::default_ },
241
{ "delete", tokenid::reserved },
242
{ "discard", tokenid::discard_ },
243
{ "do", tokenid::do_ },
244
{ "double", tokenid::reserved },
245
{ "dword", tokenid::uint_ },
246
{ "dword2", tokenid::uint2 },
247
{ "dword2x1", tokenid::uint2 },
248
{ "dword2x2", tokenid::uint2x2 },
249
{ "dword2x3", tokenid::uint2x3 },
250
{ "dword2x4", tokenid::uint2x4 },
251
{ "dword3", tokenid::uint3, },
252
{ "dword3x1", tokenid::uint3 },
253
{ "dword3x2", tokenid::uint3x2 },
254
{ "dword3x3", tokenid::uint3x3 },
255
{ "dword3x4", tokenid::uint3x4 },
256
{ "dword4", tokenid::uint4 },
257
{ "dword4x1", tokenid::uint4 },
258
{ "dword4x2", tokenid::uint4x2 },
259
{ "dword4x3", tokenid::uint4x3 },
260
{ "dword4x4", tokenid::uint4x4 },
261
{ "dynamic_cast", tokenid::reserved },
262
{ "else", tokenid::else_ },
263
{ "enum", tokenid::reserved },
264
{ "explicit", tokenid::reserved },
265
{ "extern", tokenid::extern_ },
266
{ "external", tokenid::reserved },
267
{ "false", tokenid::false_literal },
268
{ "FALSE", tokenid::false_literal },
269
{ "float", tokenid::float_ },
270
{ "float2", tokenid::float2 },
271
{ "float2x1", tokenid::float2 },
272
{ "float2x2", tokenid::float2x2 },
273
{ "float2x3", tokenid::float2x3 },
274
{ "float2x4", tokenid::float2x4 },
275
{ "float3", tokenid::float3 },
276
{ "float3x1", tokenid::float3 },
277
{ "float3x2", tokenid::float3x2 },
278
{ "float3x3", tokenid::float3x3 },
279
{ "float3x4", tokenid::float3x4 },
280
{ "float4", tokenid::float4 },
281
{ "float4x1", tokenid::float4 },
282
{ "float4x2", tokenid::float4x2 },
283
{ "float4x3", tokenid::float4x3 },
284
{ "float4x4", tokenid::float4x4 },
285
{ "for", tokenid::for_ },
286
{ "foreach", tokenid::reserved },
287
{ "friend", tokenid::reserved },
288
{ "globallycoherent", tokenid::reserved },
289
{ "goto", tokenid::reserved },
290
{ "groupshared", tokenid::groupshared },
291
{ "half", tokenid::reserved },
292
{ "half2", tokenid::reserved },
293
{ "half2x1", tokenid::reserved },
294
{ "half2x2", tokenid::reserved },
295
{ "half2x3", tokenid::reserved },
296
{ "half2x4", tokenid::reserved },
297
{ "half3", tokenid::reserved },
298
{ "half3x1", tokenid::reserved },
299
{ "half3x2", tokenid::reserved },
300
{ "half3x3", tokenid::reserved },
301
{ "half3x4", tokenid::reserved },
302
{ "half4", tokenid::reserved },
303
{ "half4x1", tokenid::reserved },
304
{ "half4x2", tokenid::reserved },
305
{ "half4x3", tokenid::reserved },
306
{ "half4x4", tokenid::reserved },
307
{ "if", tokenid::if_ },
308
{ "in", tokenid::in },
309
{ "inline", tokenid::reserved },
310
{ "inout", tokenid::inout },
311
{ "int", tokenid::int_ },
312
{ "int2", tokenid::int2 },
313
{ "int2x1", tokenid::int2 },
314
{ "int2x2", tokenid::int2x2 },
315
{ "int2x3", tokenid::int2x3 },
316
{ "int2x4", tokenid::int2x4 },
317
{ "int3", tokenid::int3 },
318
{ "int3x1", tokenid::int3 },
319
{ "int3x2", tokenid::int3x2 },
320
{ "int3x3", tokenid::int3x3 },
321
{ "int3x4", tokenid::int3x4 },
322
{ "int4", tokenid::int4 },
323
{ "int4x1", tokenid::int4 },
324
{ "int4x2", tokenid::int4x2 },
325
{ "int4x3", tokenid::int4x3 },
326
{ "int4x4", tokenid::int4x4 },
327
{ "interface", tokenid::reserved },
328
{ "linear", tokenid::linear },
329
{ "long", tokenid::reserved },
330
{ "matrix", tokenid::matrix },
331
{ "min16float", tokenid::min16float },
332
{ "min16float2", tokenid::min16float2 },
333
{ "min16float3", tokenid::min16float3 },
334
{ "min16float4", tokenid::min16float4 },
335
{ "min16int", tokenid::min16int },
336
{ "min16int2", tokenid::min16int2 },
337
{ "min16int3", tokenid::min16int3 },
338
{ "min16int4", tokenid::min16int4 },
339
{ "min16uint", tokenid::min16uint },
340
{ "min16uint2", tokenid::min16uint2 },
341
{ "min16uint3", tokenid::min16uint3 },
342
{ "min16uint4", tokenid::min16uint4 },
343
{ "mutable", tokenid::reserved },
344
{ "namespace", tokenid::namespace_ },
345
{ "new", tokenid::reserved },
346
{ "noinline", tokenid::reserved },
347
{ "nointerpolation", tokenid::nointerpolation },
348
{ "noperspective", tokenid::noperspective },
349
{ "operator", tokenid::reserved },
350
{ "out", tokenid::out },
351
{ "packed", tokenid::reserved },
352
{ "packoffset", tokenid::reserved },
353
{ "pass", tokenid::pass },
354
{ "precise", tokenid::precise },
355
{ "private", tokenid::reserved },
356
{ "protected", tokenid::reserved },
357
{ "public", tokenid::reserved },
358
{ "register", tokenid::reserved },
359
{ "reinterpret_cast", tokenid::reserved },
360
{ "restrict", tokenid::reserved },
361
{ "return", tokenid::return_ },
362
{ "row_major", tokenid::reserved },
363
{ "sample", tokenid::reserved },
364
{ "sampler", tokenid::sampler2d },
365
{ "sampler1D", tokenid::sampler1d },
366
{ "sampler1DArray", tokenid::reserved },
367
{ "sampler2D", tokenid::sampler2d },
368
{ "sampler2DArray", tokenid::reserved },
369
{ "sampler2DMS", tokenid::reserved },
370
{ "sampler2DMSArray", tokenid::reserved },
371
{ "sampler3D", tokenid::sampler3d },
372
{ "sampler_state", tokenid::reserved },
373
{ "samplerCube", tokenid::reserved },
374
{ "samplerCubeArray", tokenid::reserved },
375
{ "samplerCUBE", tokenid::reserved },
376
{ "samplerRect", tokenid::reserved },
377
{ "samplerRECT", tokenid::reserved },
378
{ "SamplerState", tokenid::reserved },
379
{ "storage", tokenid::storage2d },
380
{ "storage1D", tokenid::storage1d },
381
{ "storage2D", tokenid::storage2d },
382
{ "storage3D", tokenid::storage3d },
383
{ "shared", tokenid::reserved },
384
{ "short", tokenid::reserved },
385
{ "signed", tokenid::reserved },
386
{ "sizeof", tokenid::reserved },
387
{ "snorm", tokenid::reserved },
388
{ "static", tokenid::static_ },
389
{ "static_cast", tokenid::reserved },
390
{ "string", tokenid::string_ },
391
{ "struct", tokenid::struct_ },
392
{ "switch", tokenid::switch_ },
393
{ "technique", tokenid::technique },
394
{ "template", tokenid::reserved },
395
{ "texture", tokenid::texture2d },
396
{ "Texture1D", tokenid::reserved },
397
{ "texture1D", tokenid::texture1d },
398
{ "Texture1DArray", tokenid::reserved },
399
{ "Texture2D", tokenid::reserved },
400
{ "texture2D", tokenid::texture2d },
401
{ "Texture2DArray", tokenid::reserved },
402
{ "Texture2DMS", tokenid::reserved },
403
{ "Texture2DMSArray", tokenid::reserved },
404
{ "Texture3D", tokenid::reserved },
405
{ "texture3D", tokenid::texture3d },
406
{ "textureCUBE", tokenid::reserved },
407
{ "TextureCube", tokenid::reserved },
408
{ "TextureCubeArray", tokenid::reserved },
409
{ "textureRECT", tokenid::reserved },
410
{ "this", tokenid::reserved },
411
{ "true", tokenid::true_literal },
412
{ "TRUE", tokenid::true_literal },
413
{ "try", tokenid::reserved },
414
{ "typedef", tokenid::reserved },
415
{ "uint", tokenid::uint_ },
416
{ "uint2", tokenid::uint2 },
417
{ "uint2x1", tokenid::uint2 },
418
{ "uint2x2", tokenid::uint2x2 },
419
{ "uint2x3", tokenid::uint2x3 },
420
{ "uint2x4", tokenid::uint2x4 },
421
{ "uint3", tokenid::uint3 },
422
{ "uint3x1", tokenid::uint3 },
423
{ "uint3x2", tokenid::uint3x2 },
424
{ "uint3x3", tokenid::uint3x3 },
425
{ "uint3x4", tokenid::uint3x4 },
426
{ "uint4", tokenid::uint4 },
427
{ "uint4x1", tokenid::uint4 },
428
{ "uint4x2", tokenid::uint4x2 },
429
{ "uint4x3", tokenid::uint4x3 },
430
{ "uint4x4", tokenid::uint4x4 },
431
{ "uniform", tokenid::uniform_ },
432
{ "union", tokenid::reserved },
433
{ "unorm", tokenid::reserved },
434
{ "unsigned", tokenid::reserved },
435
{ "using", tokenid::reserved },
436
{ "vector", tokenid::vector },
437
{ "virtual", tokenid::reserved },
438
{ "void", tokenid::void_ },
439
{ "volatile", tokenid::volatile_ },
440
{ "while", tokenid::while_ }
441
};
442
static const std::unordered_map<std::string_view, tokenid> s_pp_directive_lookup = {
443
{ "define", tokenid::hash_def },
444
{ "undef", tokenid::hash_undef },
445
{ "if", tokenid::hash_if },
446
{ "ifdef", tokenid::hash_ifdef },
447
{ "ifndef", tokenid::hash_ifndef },
448
{ "else", tokenid::hash_else },
449
{ "elif", tokenid::hash_elif },
450
{ "endif", tokenid::hash_endif },
451
{ "error", tokenid::hash_error },
452
{ "warning", tokenid::hash_warning },
453
{ "pragma", tokenid::hash_pragma },
454
{ "include", tokenid::hash_include },
455
};
456
457
static bool is_octal_digit(char c)
458
{
459
return static_cast<unsigned>(c - '0') < 8;
460
}
461
static bool is_decimal_digit(char c)
462
{
463
return static_cast<unsigned>(c - '0') < 10;
464
}
465
static bool is_hexadecimal_digit(char c)
466
{
467
return is_decimal_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
468
}
469
470
static bool is_digit(char c, int radix)
471
{
472
switch (radix)
473
{
474
case 8:
475
return is_octal_digit(c);
476
case 10:
477
return is_decimal_digit(c);
478
case 16:
479
return is_hexadecimal_digit(c);
480
}
481
482
return false;
483
}
484
static long long octal_to_decimal(long long n)
485
{
486
long long m = 0;
487
488
while (n != 0)
489
{
490
m *= 8;
491
m += n & 7;
492
n >>= 3;
493
}
494
495
while (m != 0)
496
{
497
n *= 10;
498
n += m & 7;
499
m >>= 3;
500
}
501
502
return n;
503
}
504
505
std::string reshadefx::token::id_to_name(tokenid id)
506
{
507
const auto it = s_token_lookup.find(id);
508
if (it != s_token_lookup.end())
509
return std::string(it->second);
510
return "unknown";
511
}
512
513
reshadefx::token reshadefx::lexer::lex()
514
{
515
bool is_at_line_begin = _cur_location.column <= 1;
516
517
token tok;
518
next_token:
519
// Reset token data
520
tok.location = _cur_location;
521
tok.offset = input_offset();
522
tok.length = 1;
523
tok.literal_as_double = 0;
524
tok.literal_as_string.clear();
525
526
assert(_cur <= _end);
527
528
// Do a character type lookup for the current character
529
switch (s_type_lookup[uint8_t(*_cur)])
530
{
531
case 0xFF: // EOF
532
tok.id = tokenid::end_of_file;
533
return tok;
534
case SPACE:
535
skip_space();
536
if (_ignore_whitespace || is_at_line_begin || *_cur == '\n')
537
goto next_token;
538
tok.id = tokenid::space;
539
tok.length = input_offset() - tok.offset;
540
return tok;
541
case '\n':
542
_cur++;
543
_cur_location.line++;
544
_cur_location.column = 1;
545
is_at_line_begin = true;
546
if (_ignore_whitespace)
547
goto next_token;
548
tok.id = tokenid::end_of_line;
549
return tok;
550
case DIGIT:
551
parse_numeric_literal(tok);
552
break;
553
case IDENT:
554
parse_identifier(tok);
555
break;
556
case '!':
557
if (_cur[1] == '=')
558
tok.id = tokenid::exclaim_equal,
559
tok.length = 2;
560
else
561
tok.id = tokenid::exclaim;
562
break;
563
case '"':
564
parse_string_literal(tok, _escape_string_literals);
565
break;
566
case '#':
567
if (is_at_line_begin)
568
{
569
if (!parse_pp_directive(tok) || _ignore_pp_directives)
570
{
571
skip_to_next_line();
572
goto next_token;
573
}
574
} // These braces are important so the 'else' is matched to the right 'if' statement
575
else
576
tok.id = tokenid::hash;
577
break;
578
case '$':
579
tok.id = tokenid::dollar;
580
break;
581
case '%':
582
if (_cur[1] == '=')
583
tok.id = tokenid::percent_equal,
584
tok.length = 2;
585
else
586
tok.id = tokenid::percent;
587
break;
588
case '&':
589
if (_cur[1] == '&')
590
tok.id = tokenid::ampersand_ampersand,
591
tok.length = 2;
592
else if (_cur[1] == '=')
593
tok.id = tokenid::ampersand_equal,
594
tok.length = 2;
595
else
596
tok.id = tokenid::ampersand;
597
break;
598
case '(':
599
tok.id = tokenid::parenthesis_open;
600
break;
601
case ')':
602
tok.id = tokenid::parenthesis_close;
603
break;
604
case '*':
605
if (_cur[1] == '=')
606
tok.id = tokenid::star_equal,
607
tok.length = 2;
608
else
609
tok.id = tokenid::star;
610
break;
611
case '+':
612
if (_cur[1] == '+')
613
tok.id = tokenid::plus_plus,
614
tok.length = 2;
615
else if (_cur[1] == '=')
616
tok.id = tokenid::plus_equal,
617
tok.length = 2;
618
else
619
tok.id = tokenid::plus;
620
break;
621
case ',':
622
tok.id = tokenid::comma;
623
break;
624
case '-':
625
if (_cur[1] == '-')
626
tok.id = tokenid::minus_minus,
627
tok.length = 2;
628
else if (_cur[1] == '=')
629
tok.id = tokenid::minus_equal,
630
tok.length = 2;
631
else if (_cur[1] == '>')
632
tok.id = tokenid::arrow,
633
tok.length = 2;
634
else
635
tok.id = tokenid::minus;
636
break;
637
case '.':
638
if (s_type_lookup[uint8_t(_cur[1])] == DIGIT)
639
parse_numeric_literal(tok);
640
else if (_cur[1] == '.' && _cur[2] == '.')
641
tok.id = tokenid::ellipsis,
642
tok.length = 3;
643
else
644
tok.id = tokenid::dot;
645
break;
646
case '/':
647
if (_cur[1] == '/')
648
{
649
skip_to_next_line();
650
if (_ignore_comments)
651
goto next_token;
652
tok.id = tokenid::single_line_comment;
653
tok.length = input_offset() - tok.offset;
654
return tok;
655
}
656
else if (_cur[1] == '*')
657
{
658
while (_cur < _end)
659
{
660
if (*_cur == '\n')
661
{
662
_cur_location.line++;
663
_cur_location.column = 1;
664
}
665
else if (_cur[0] == '*' && _cur[1] == '/')
666
{
667
skip(2);
668
break;
669
}
670
skip(1);
671
}
672
if (_ignore_comments)
673
goto next_token;
674
tok.id = tokenid::multi_line_comment;
675
tok.length = input_offset() - tok.offset;
676
return tok;
677
}
678
else if (_cur[1] == '=')
679
tok.id = tokenid::slash_equal,
680
tok.length = 2;
681
else
682
tok.id = tokenid::slash;
683
break;
684
case ':':
685
if (_cur[1] == ':')
686
tok.id = tokenid::colon_colon,
687
tok.length = 2;
688
else
689
tok.id = tokenid::colon;
690
break;
691
case ';':
692
tok.id = tokenid::semicolon;
693
break;
694
case '<':
695
if (_cur[1] == '<')
696
if (_cur[2] == '=')
697
tok.id = tokenid::less_less_equal,
698
tok.length = 3;
699
else
700
tok.id = tokenid::less_less,
701
tok.length = 2;
702
else if (_cur[1] == '=')
703
tok.id = tokenid::less_equal,
704
tok.length = 2;
705
else
706
tok.id = tokenid::less;
707
break;
708
case '=':
709
if (_cur[1] == '=')
710
tok.id = tokenid::equal_equal,
711
tok.length = 2;
712
else
713
tok.id = tokenid::equal;
714
break;
715
case '>':
716
if (_cur[1] == '>')
717
if (_cur[2] == '=')
718
tok.id = tokenid::greater_greater_equal,
719
tok.length = 3;
720
else
721
tok.id = tokenid::greater_greater,
722
tok.length = 2;
723
else if (_cur[1] == '=')
724
tok.id = tokenid::greater_equal,
725
tok.length = 2;
726
else
727
tok.id = tokenid::greater;
728
break;
729
case '?':
730
tok.id = tokenid::question;
731
break;
732
case '@':
733
tok.id = tokenid::at;
734
break;
735
case '[':
736
tok.id = tokenid::bracket_open;
737
break;
738
case '\\':
739
if (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n'))
740
{
741
// Skip to next line if current line ends with a backslash
742
skip_space();
743
if (_ignore_whitespace)
744
goto next_token;
745
tok.id = tokenid::space;
746
tok.length = input_offset() - tok.offset;
747
return tok;
748
}
749
tok.id = tokenid::backslash;
750
break;
751
case ']':
752
tok.id = tokenid::bracket_close;
753
break;
754
case '^':
755
if (_cur[1] == '=')
756
tok.id = tokenid::caret_equal,
757
tok.length = 2;
758
else
759
tok.id = tokenid::caret;
760
break;
761
case '{':
762
tok.id = tokenid::brace_open;
763
break;
764
case '|':
765
if (_cur[1] == '=')
766
tok.id = tokenid::pipe_equal,
767
tok.length = 2;
768
else if (_cur[1] == '|')
769
tok.id = tokenid::pipe_pipe,
770
tok.length = 2;
771
else
772
tok.id = tokenid::pipe;
773
break;
774
case '}':
775
tok.id = tokenid::brace_close;
776
break;
777
case '~':
778
tok.id = tokenid::tilde;
779
break;
780
default:
781
tok.id = tokenid::unknown;
782
break;
783
}
784
785
skip(tok.length);
786
787
return tok;
788
}
789
790
void reshadefx::lexer::skip(size_t length)
791
{
792
_cur += length;
793
_cur_location.column += static_cast<unsigned int>(length);
794
}
795
void reshadefx::lexer::skip_space()
796
{
797
// Skip each character until a space is found
798
while (_cur < _end)
799
{
800
if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
801
{
802
skip(_cur[1] == '\r' ? 3 : 2);
803
_cur_location.line++;
804
_cur_location.column = 1;
805
continue;
806
}
807
808
if (s_type_lookup[uint8_t(*_cur)] == SPACE)
809
skip(1);
810
else
811
break;
812
}
813
}
814
void reshadefx::lexer::skip_to_next_line()
815
{
816
// Skip each character until a new line feed is found
817
while (*_cur != '\n' && _cur < _end)
818
{
819
#if 0
820
if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
821
{
822
skip(_cur[1] == '\r' ? 3 : 2);
823
_cur_location.line++;
824
_cur_location.column = 1;
825
continue;
826
}
827
#endif
828
829
skip(1);
830
}
831
}
832
833
void reshadefx::lexer::reset_to_offset(size_t offset)
834
{
835
assert(offset < _input.size());
836
_cur = _input.data() + offset;
837
}
838
839
void reshadefx::lexer::parse_identifier(token &tok) const
840
{
841
auto *const begin = _cur, *end = begin;
842
843
// Skip to the end of the identifier sequence
844
while (s_type_lookup[uint8_t(*end)] == IDENT || s_type_lookup[uint8_t(*end)] == DIGIT)
845
end++;
846
847
tok.id = tokenid::identifier;
848
tok.offset = input_offset();
849
tok.length = end - begin;
850
tok.literal_as_string.assign(begin, end);
851
852
if (_ignore_keywords)
853
return;
854
855
if (const auto it = s_keyword_lookup.find(tok.literal_as_string);
856
it != s_keyword_lookup.end())
857
tok.id = it->second;
858
}
859
bool reshadefx::lexer::parse_pp_directive(token &tok)
860
{
861
skip(1); // Skip the '#'
862
skip_space(); // Skip any space between the '#' and directive
863
parse_identifier(tok);
864
865
if (const auto it = s_pp_directive_lookup.find(tok.literal_as_string);
866
it != s_pp_directive_lookup.end())
867
{
868
tok.id = it->second;
869
return true;
870
}
871
else if (!_ignore_line_directives && tok.literal_as_string == "line") // The #line directive needs special handling
872
{
873
skip(tok.length); // The 'parse_identifier' does not update the pointer to the current character, so do that now
874
skip_space();
875
parse_numeric_literal(tok);
876
skip(tok.length);
877
878
_cur_location.line = tok.literal_as_int;
879
880
// Need to subtract one since the line containing #line does not count into the statistics
881
if (_cur_location.line != 0)
882
_cur_location.line--;
883
884
skip_space();
885
886
// Check if this #line directive has an file name attached to it
887
if (_cur[0] == '"')
888
{
889
token temptok;
890
parse_string_literal(temptok, false);
891
892
_cur_location.source = std::move(temptok.literal_as_string);
893
}
894
895
// Do not return the #line directive as token to the caller
896
return false;
897
}
898
899
tok.id = tokenid::hash_unknown;
900
901
return true;
902
}
903
void reshadefx::lexer::parse_string_literal(token &tok, bool escape)
904
{
905
auto *const begin = _cur, *end = begin + 1; // Skip first quote character right away
906
907
for (auto c = *end; c != '"'; c = *++end)
908
{
909
if (c == '\n' || end >= _end)
910
{
911
// Line feed reached, the string literal is done (technically this should be an error, but the lexer does not report errors, so ignore it)
912
end--;
913
if (end[0] == '\r') end--;
914
break;
915
}
916
917
if (c == '\r')
918
{
919
// Silently ignore carriage return characters
920
continue;
921
}
922
923
if (unsigned int n = (end[1] == '\r' && end + 2 < _end) ? 2 : 1;
924
c == '\\' && end[n] == '\n')
925
{
926
// Escape character found at end of line, the string literal continues on to the next line
927
end += n;
928
_cur_location.line++;
929
continue;
930
}
931
932
// Handle escape sequences
933
if (c == '\\' && escape)
934
{
935
unsigned int n = 0;
936
937
// Any character following the '\' is not parsed as usual, so increment pointer here (this makes sure '\"' does not abort the outer loop as well)
938
switch (c = *++end)
939
{
940
case '0':
941
case '1':
942
case '2':
943
case '3':
944
case '4':
945
case '5':
946
case '6':
947
case '7':
948
for (unsigned int i = 0; i < 3 && is_octal_digit(*end) && end < _end; i++)
949
{
950
c = *end++;
951
n = (n << 3) | (c - '0');
952
}
953
// For simplicity the number is limited to what fits in a single character
954
c = n & 0xFF;
955
// The octal parsing loop above incremented one pass the escape sequence, so step back
956
end--;
957
break;
958
case 'a':
959
c = '\a';
960
break;
961
case 'b':
962
c = '\b';
963
break;
964
case 'f':
965
c = '\f';
966
break;
967
case 'n':
968
c = '\n';
969
break;
970
case 'r':
971
c = '\r';
972
break;
973
case 't':
974
c = '\t';
975
break;
976
case 'v':
977
c = '\v';
978
break;
979
case 'x':
980
if (is_hexadecimal_digit(*++end))
981
{
982
while (is_hexadecimal_digit(*end) && end < _end)
983
{
984
c = *end++;
985
n = (n << 4) | (is_decimal_digit(c) ? (c - '0') : (c - 55 - 32 * (c & 0x20)));
986
}
987
988
// For simplicity the number is limited to what fits in a single character
989
c = n & 0xFF;
990
}
991
// The hexadecimal parsing loop and check above incremented one pass the escape sequence, so step back
992
end--;
993
break;
994
}
995
}
996
997
tok.literal_as_string += c;
998
}
999
1000
tok.id = tokenid::string_literal;
1001
tok.length = end - begin + 1;
1002
1003
// Free up unused memory
1004
tok.literal_as_string.shrink_to_fit();
1005
}
1006
void reshadefx::lexer::parse_numeric_literal(token &tok) const
1007
{
1008
// This routine handles both integer and floating point numbers
1009
auto *const begin = _cur, *end = _cur;
1010
int mantissa_size = 0, decimal_location = -1, radix = 10;
1011
long long fraction = 0, exponent = 0;
1012
1013
// If a literal starts with '0' it is either an octal or hexadecimal ('0x') value
1014
if (begin[0] == '0')
1015
{
1016
if (begin[1] == 'x' || begin[1] == 'X')
1017
{
1018
end = begin + 2;
1019
radix = 16;
1020
}
1021
else
1022
{
1023
radix = 8;
1024
}
1025
}
1026
1027
for (; mantissa_size <= 18; mantissa_size++, end++)
1028
{
1029
auto c = *end;
1030
1031
if (is_decimal_digit(c))
1032
{
1033
c -= '0';
1034
1035
if (c >= radix)
1036
break;
1037
}
1038
else if (radix == 16)
1039
{
1040
// Hexadecimal values can contain the letters A to F
1041
if (c >= 'A' && c <= 'F')
1042
c -= 'A' - 10;
1043
else if (c >= 'a' && c <= 'f')
1044
c -= 'a' - 10;
1045
else
1046
break;
1047
}
1048
else
1049
{
1050
if (c != '.' || decimal_location >= 0)
1051
break;
1052
1053
// Found a decimal character, as such convert current values
1054
if (radix == 8)
1055
{
1056
radix = 10;
1057
fraction = octal_to_decimal(fraction);
1058
}
1059
1060
decimal_location = mantissa_size;
1061
continue;
1062
}
1063
1064
fraction *= radix;
1065
fraction += c;
1066
}
1067
1068
// Ignore additional digits that cannot affect the value
1069
while (is_digit(*end, radix))
1070
end++;
1071
1072
// If a decimal character was found, this is a floating point value, otherwise an integer one
1073
if (decimal_location < 0)
1074
{
1075
tok.id = tokenid::int_literal;
1076
decimal_location = mantissa_size;
1077
}
1078
else
1079
{
1080
tok.id = tokenid::float_literal;
1081
mantissa_size -= 1;
1082
}
1083
1084
// Literals can be followed by an exponent
1085
if (*end == 'E' || *end == 'e')
1086
{
1087
auto tmp = end + 1;
1088
const bool negative = *tmp == '-';
1089
1090
if (negative || *tmp == '+')
1091
tmp++;
1092
1093
if (is_decimal_digit(*tmp))
1094
{
1095
end = tmp;
1096
1097
tok.id = tokenid::float_literal;
1098
1099
do {
1100
exponent *= 10;
1101
exponent += (*end++) - '0';
1102
} while (is_decimal_digit(*end));
1103
1104
if (negative)
1105
exponent = -exponent;
1106
}
1107
}
1108
1109
// Various suffixes force specific literal types
1110
if (*end == 'F' || *end == 'f')
1111
{
1112
end++; // Consume the suffix
1113
tok.id = tokenid::float_literal;
1114
}
1115
else if (*end == 'L' || *end == 'l')
1116
{
1117
end++; // Consume the suffix
1118
tok.id = tokenid::double_literal;
1119
}
1120
else if (tok.id == tokenid::int_literal && (*end == 'U' || *end == 'u')) // The 'u' suffix is only valid on integers and needs to be ignored otherwise
1121
{
1122
end++; // Consume the suffix
1123
tok.id = tokenid::uint_literal;
1124
}
1125
1126
if (tok.id == tokenid::float_literal || tok.id == tokenid::double_literal)
1127
{
1128
exponent += decimal_location - mantissa_size;
1129
1130
const bool exponent_negative = exponent < 0;
1131
1132
if (exponent_negative)
1133
exponent = -exponent;
1134
1135
// Limit exponent
1136
if (exponent > 511)
1137
exponent = 511;
1138
1139
// Quick exponent calculation
1140
double e = 1.0;
1141
const double powers_of_10[] = {
1142
10.,
1143
100.,
1144
1.0e4,
1145
1.0e8,
1146
1.0e16,
1147
1.0e32,
1148
1.0e64,
1149
1.0e128,
1150
1.0e256
1151
};
1152
1153
for (auto d = powers_of_10; exponent != 0; exponent >>= 1, d++)
1154
if (exponent & 1)
1155
e *= *d;
1156
1157
if (tok.id == tokenid::float_literal)
1158
tok.literal_as_float = exponent_negative ? fraction / static_cast<float>(e) : fraction * static_cast<float>(e);
1159
else
1160
tok.literal_as_double = exponent_negative ? fraction / e : fraction * e;
1161
}
1162
else
1163
{
1164
// Limit the maximum value to what fits into our token structure
1165
tok.literal_as_uint = static_cast<unsigned int>(fraction & 0xFFFFFFFF);
1166
}
1167
1168
tok.length = end - begin;
1169
}
1170
1171