CoCalc -- effect

GitHub Repository: stenzek/duckstation
Path: blob/master/dep/reshadefx/src/effect_lexer.cpp
⁴²⁴⁶ views
1
/*
2
 * Copyright (C) 2014 Patrick Mours
3
 * SPDX-License-Identifier: BSD-3-Clause
4
 */
5

6
#include "effect_lexer.hpp"
7
#include <cassert>
8
#include <string_view>
9
#include <unordered_map> // Used for static lookup tables
10

11
using namespace reshadefx;
12

13
enum token_type
14
{
15
	DIGIT = '0',
16
	IDENT = 'A',
17
	SPACE = ' ',
18
};
19

20
// Lookup table which translates a given char to a token type
21
static const unsigned int s_type_lookup[256] = {
22
	 0xFF,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00, SPACE,
23
	 '\n', SPACE, SPACE, SPACE,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,
24
	 0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,
25
	 0x00,  0x00, SPACE,   '!',   '"',   '#',   '$',   '%',   '&',  '\'',
26
	  '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/', DIGIT, DIGIT,
27
	DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,   ':',   ';',
28
	  '<',   '=',   '>',   '?',   '@', IDENT, IDENT, IDENT, IDENT, IDENT,
29
	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
30
	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
31
	IDENT,   '[',  '\\',   ']',   '^', IDENT,  0x00, IDENT, IDENT, IDENT,
32
	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
33
	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
34
	IDENT, IDENT, IDENT,   '{',   '|',   '}',   '~',  0x00,  0x00,  0x00,
35
};
36

37
// Lookup tables which translate a given string literal to a token and backwards
38
static const std::unordered_map<tokenid, std::string_view> s_token_lookup = {
39
	{ tokenid::end_of_file, "end of file" },
40
	{ tokenid::exclaim, "!" },
41
	{ tokenid::hash, "#" },
42
	{ tokenid::dollar, "$" },
43
	{ tokenid::percent, "%" },
44
	{ tokenid::ampersand, "&" },
45
	{ tokenid::parenthesis_open, "(" },
46
	{ tokenid::parenthesis_close, ")" },
47
	{ tokenid::star, "*" },
48
	{ tokenid::plus, "+" },
49
	{ tokenid::comma, "," },
50
	{ tokenid::minus, "-" },
51
	{ tokenid::dot, "." },
52
	{ tokenid::slash, "/" },
53
	{ tokenid::colon, ":" },
54
	{ tokenid::semicolon, ";" },
55
	{ tokenid::less, "<" },
56
	{ tokenid::equal, "=" },
57
	{ tokenid::greater, ">" },
58
	{ tokenid::question, "?" },
59
	{ tokenid::at, "@" },
60
	{ tokenid::bracket_open, "[" },
61
	{ tokenid::backslash, "\\" },
62
	{ tokenid::bracket_close, "]" },
63
	{ tokenid::caret, "^" },
64
	{ tokenid::brace_open, "{" },
65
	{ tokenid::pipe, "|" },
66
	{ tokenid::brace_close, "}" },
67
	{ tokenid::tilde, "~" },
68
	{ tokenid::exclaim_equal, "!=" },
69
	{ tokenid::percent_equal, "%=" },
70
	{ tokenid::ampersand_ampersand, "&&" },
71
	{ tokenid::ampersand_equal, "&=" },
72
	{ tokenid::star_equal, "*=" },
73
	{ tokenid::plus_plus, "++" },
74
	{ tokenid::plus_equal, "+=" },
75
	{ tokenid::minus_minus, "--" },
76
	{ tokenid::minus_equal, "-=" },
77
	{ tokenid::arrow, "->" },
78
	{ tokenid::ellipsis, "..." },
79
	{ tokenid::slash_equal, "|=" },
80
	{ tokenid::colon_colon, "::" },
81
	{ tokenid::less_less_equal, "<<=" },
82
	{ tokenid::less_less, "<<" },
83
	{ tokenid::less_equal, "<=" },
84
	{ tokenid::equal_equal, "==" },
85
	{ tokenid::greater_greater_equal, ">>=" },
86
	{ tokenid::greater_greater, ">>" },
87
	{ tokenid::greater_equal, ">=" },
88
	{ tokenid::caret_equal, "^=" },
89
	{ tokenid::pipe_equal, "|=" },
90
	{ tokenid::pipe_pipe, "||" },
91
	{ tokenid::identifier, "identifier" },
92
	{ tokenid::reserved, "reserved word" },
93
	{ tokenid::true_literal, "true" },
94
	{ tokenid::false_literal, "false" },
95
	{ tokenid::int_literal, "integral literal" },
96
	{ tokenid::uint_literal, "integral literal" },
97
	{ tokenid::float_literal, "floating point literal" },
98
	{ tokenid::double_literal, "floating point literal" },
99
	{ tokenid::string_literal, "string literal" },
100
	{ tokenid::namespace_, "namespace" },
101
	{ tokenid::struct_, "struct" },
102
	{ tokenid::technique, "technique" },
103
	{ tokenid::pass, "pass" },
104
	{ tokenid::for_, "for" },
105
	{ tokenid::while_, "while" },
106
	{ tokenid::do_, "do" },
107
	{ tokenid::if_, "if" },
108
	{ tokenid::else_, "else" },
109
	{ tokenid::switch_, "switch" },
110
	{ tokenid::case_, "case" },
111
	{ tokenid::default_, "default" },
112
	{ tokenid::break_, "break" },
113
	{ tokenid::continue_, "continue" },
114
	{ tokenid::return_, "return" },
115
	{ tokenid::discard_, "discard" },
116
	{ tokenid::extern_, "extern" },
117
	{ tokenid::static_, "static" },
118
	{ tokenid::uniform_, "uniform" },
119
	{ tokenid::volatile_, "volatile" },
120
	{ tokenid::precise, "precise" },
121
	{ tokenid::groupshared, "groupshared" },
122
	{ tokenid::in, "in" },
123
	{ tokenid::out, "out" },
124
	{ tokenid::inout, "inout" },
125
	{ tokenid::const_, "const" },
126
	{ tokenid::linear, "linear" },
127
	{ tokenid::noperspective, "noperspective" },
128
	{ tokenid::centroid, "centroid" },
129
	{ tokenid::nointerpolation, "nointerpolation" },
130
	{ tokenid::void_, "void" },
131
	{ tokenid::bool_, "bool" },
132
	{ tokenid::bool2, "bool2" },
133
	{ tokenid::bool3, "bool3" },
134
	{ tokenid::bool4, "bool4" },
135
	{ tokenid::bool2x2, "bool2x2" },
136
	{ tokenid::bool2x3, "bool2x3" },
137
	{ tokenid::bool2x4, "bool2x4" },
138
	{ tokenid::bool3x2, "bool3x2" },
139
	{ tokenid::bool3x3, "bool3x3" },
140
	{ tokenid::bool3x4, "bool3x4" },
141
	{ tokenid::bool4x2, "bool4x2" },
142
	{ tokenid::bool4x3, "bool4x3" },
143
	{ tokenid::bool4x4, "bool4x4" },
144
	{ tokenid::int_, "int" },
145
	{ tokenid::int2, "int2" },
146
	{ tokenid::int3, "int3" },
147
	{ tokenid::int4, "int4" },
148
	{ tokenid::int2x2, "int2x2" },
149
	{ tokenid::int2x3, "int2x3" },
150
	{ tokenid::int2x4, "int2x4" },
151
	{ tokenid::int3x2, "int3x2" },
152
	{ tokenid::int3x3, "int3x3" },
153
	{ tokenid::int3x4, "int3x4" },
154
	{ tokenid::int4x2, "int4x2" },
155
	{ tokenid::int4x3, "int4x3" },
156
	{ tokenid::int4x4, "int4x4" },
157
	{ tokenid::min16int, "min16int" },
158
	{ tokenid::min16int2, "min16int2" },
159
	{ tokenid::min16int3, "min16int3" },
160
	{ tokenid::min16int4, "min16int4" },
161
	{ tokenid::uint_, "uint" },
162
	{ tokenid::uint2, "uint2" },
163
	{ tokenid::uint3, "uint3" },
164
	{ tokenid::uint4, "uint4" },
165
	{ tokenid::uint2x2, "uint2x2" },
166
	{ tokenid::uint2x3, "uint2x3" },
167
	{ tokenid::uint2x4, "uint2x4" },
168
	{ tokenid::uint3x2, "uint3x2" },
169
	{ tokenid::uint3x3, "uint3x3" },
170
	{ tokenid::uint3x4, "uint3x4" },
171
	{ tokenid::uint4x2, "uint4x2" },
172
	{ tokenid::uint4x3, "uint4x3" },
173
	{ tokenid::uint4x4, "uint4x4" },
174
	{ tokenid::min16uint, "min16uint" },
175
	{ tokenid::min16uint2, "min16uint2" },
176
	{ tokenid::min16uint3, "min16uint3" },
177
	{ tokenid::min16uint4, "min16uint4" },
178
	{ tokenid::float_, "float" },
179
	{ tokenid::float2, "float2" },
180
	{ tokenid::float3, "float3" },
181
	{ tokenid::float4, "float4" },
182
	{ tokenid::float2x2, "float2x2" },
183
	{ tokenid::float2x3, "float2x3" },
184
	{ tokenid::float2x4, "float2x4" },
185
	{ tokenid::float3x2, "float3x2" },
186
	{ tokenid::float3x3, "float3x3" },
187
	{ tokenid::float3x4, "float3x4" },
188
	{ tokenid::float4x2, "float4x2" },
189
	{ tokenid::float4x3, "float4x3" },
190
	{ tokenid::float4x4, "float4x4" },
191
	{ tokenid::min16float, "min16float" },
192
	{ tokenid::min16float2, "min16float2" },
193
	{ tokenid::min16float3, "min16float3" },
194
	{ tokenid::min16float4, "min16float4" },
195
	{ tokenid::vector, "vector" },
196
	{ tokenid::matrix, "matrix" },
197
	{ tokenid::string_, "string" },
198
	{ tokenid::texture1d, "texture1D" },
199
	{ tokenid::texture2d, "texture2D" },
200
	{ tokenid::texture3d, "texture3D" },
201
	{ tokenid::sampler1d, "sampler1D" },
202
	{ tokenid::sampler2d, "sampler2D" },
203
	{ tokenid::sampler3d, "sampler3D" },
204
	{ tokenid::storage1d, "storage1D" },
205
	{ tokenid::storage2d, "storage2D" },
206
	{ tokenid::storage3d, "storage3D" },
207
};
208
static const std::unordered_map<std::string_view, tokenid> s_keyword_lookup = {
209
	{ "asm", tokenid::reserved },
210
	{ "asm_fragment", tokenid::reserved },
211
	{ "auto", tokenid::reserved },
212
	{ "bool", tokenid::bool_ },
213
	{ "bool2", tokenid::bool2 },
214
	{ "bool2x1", tokenid::bool2 },
215
	{ "bool2x2", tokenid::bool2x2 },
216
	{ "bool2x3", tokenid::bool2x3 },
217
	{ "bool2x4", tokenid::bool2x4 },
218
	{ "bool3", tokenid::bool3 },
219
	{ "bool3x1", tokenid::bool3 },
220
	{ "bool3x2", tokenid::bool3x2 },
221
	{ "bool3x3", tokenid::bool3x3 },
222
	{ "bool3x4", tokenid::bool3x4 },
223
	{ "bool4", tokenid::bool4 },
224
	{ "bool4x1", tokenid::bool4 },
225
	{ "bool4x2", tokenid::bool4x2 },
226
	{ "bool4x3", tokenid::bool4x3 },
227
	{ "bool4x4", tokenid::bool4x4 },
228
	{ "break", tokenid::break_ },
229
	{ "case", tokenid::case_ },
230
	{ "cast", tokenid::reserved },
231
	{ "catch", tokenid::reserved },
232
	{ "centroid", tokenid::reserved },
233
	{ "char", tokenid::reserved },
234
	{ "class", tokenid::reserved },
235
	{ "column_major", tokenid::reserved },
236
	{ "compile", tokenid::reserved },
237
	{ "const", tokenid::const_ },
238
	{ "const_cast", tokenid::reserved },
239
	{ "continue", tokenid::continue_ },
240
	{ "default", tokenid::default_ },
241
	{ "delete", tokenid::reserved },
242
	{ "discard", tokenid::discard_ },
243
	{ "do", tokenid::do_ },
244
	{ "double", tokenid::reserved },
245
	{ "dword", tokenid::uint_ },
246
	{ "dword2", tokenid::uint2 },
247
	{ "dword2x1", tokenid::uint2 },
248
	{ "dword2x2", tokenid::uint2x2 },
249
	{ "dword2x3", tokenid::uint2x3 },
250
	{ "dword2x4", tokenid::uint2x4 },
251
	{ "dword3", tokenid::uint3, },
252
	{ "dword3x1", tokenid::uint3 },
253
	{ "dword3x2", tokenid::uint3x2 },
254
	{ "dword3x3", tokenid::uint3x3 },
255
	{ "dword3x4", tokenid::uint3x4 },
256
	{ "dword4", tokenid::uint4 },
257
	{ "dword4x1", tokenid::uint4 },
258
	{ "dword4x2", tokenid::uint4x2 },
259
	{ "dword4x3", tokenid::uint4x3 },
260
	{ "dword4x4", tokenid::uint4x4 },
261
	{ "dynamic_cast", tokenid::reserved },
262
	{ "else", tokenid::else_ },
263
	{ "enum", tokenid::reserved },
264
	{ "explicit", tokenid::reserved },
265
	{ "extern", tokenid::extern_ },
266
	{ "external", tokenid::reserved },
267
	{ "false", tokenid::false_literal },
268
	{ "FALSE", tokenid::false_literal },
269
	{ "float", tokenid::float_ },
270
	{ "float2", tokenid::float2 },
271
	{ "float2x1", tokenid::float2 },
272
	{ "float2x2", tokenid::float2x2 },
273
	{ "float2x3", tokenid::float2x3 },
274
	{ "float2x4", tokenid::float2x4 },
275
	{ "float3", tokenid::float3 },
276
	{ "float3x1", tokenid::float3 },
277
	{ "float3x2", tokenid::float3x2 },
278
	{ "float3x3", tokenid::float3x3 },
279
	{ "float3x4", tokenid::float3x4 },
280
	{ "float4", tokenid::float4 },
281
	{ "float4x1", tokenid::float4 },
282
	{ "float4x2", tokenid::float4x2 },
283
	{ "float4x3", tokenid::float4x3 },
284
	{ "float4x4", tokenid::float4x4 },
285
	{ "for", tokenid::for_ },
286
	{ "foreach", tokenid::reserved },
287
	{ "friend", tokenid::reserved },
288
	{ "globallycoherent", tokenid::reserved },
289
	{ "goto", tokenid::reserved },
290
	{ "groupshared", tokenid::groupshared },
291
	{ "half", tokenid::reserved },
292
	{ "half2", tokenid::reserved },
293
	{ "half2x1", tokenid::reserved },
294
	{ "half2x2", tokenid::reserved },
295
	{ "half2x3", tokenid::reserved },
296
	{ "half2x4", tokenid::reserved },
297
	{ "half3", tokenid::reserved },
298
	{ "half3x1", tokenid::reserved },
299
	{ "half3x2", tokenid::reserved },
300
	{ "half3x3", tokenid::reserved },
301
	{ "half3x4", tokenid::reserved },
302
	{ "half4", tokenid::reserved },
303
	{ "half4x1", tokenid::reserved },
304
	{ "half4x2", tokenid::reserved },
305
	{ "half4x3", tokenid::reserved },
306
	{ "half4x4", tokenid::reserved },
307
	{ "if", tokenid::if_ },
308
	{ "in", tokenid::in },
309
	{ "inline", tokenid::reserved },
310
	{ "inout", tokenid::inout },
311
	{ "int", tokenid::int_ },
312
	{ "int2", tokenid::int2 },
313
	{ "int2x1", tokenid::int2 },
314
	{ "int2x2", tokenid::int2x2 },
315
	{ "int2x3", tokenid::int2x3 },
316
	{ "int2x4", tokenid::int2x4 },
317
	{ "int3", tokenid::int3 },
318
	{ "int3x1", tokenid::int3 },
319
	{ "int3x2", tokenid::int3x2 },
320
	{ "int3x3", tokenid::int3x3 },
321
	{ "int3x4", tokenid::int3x4 },
322
	{ "int4", tokenid::int4 },
323
	{ "int4x1", tokenid::int4 },
324
	{ "int4x2", tokenid::int4x2 },
325
	{ "int4x3", tokenid::int4x3 },
326
	{ "int4x4", tokenid::int4x4 },
327
	{ "interface", tokenid::reserved },
328
	{ "linear", tokenid::linear },
329
	{ "long", tokenid::reserved },
330
	{ "matrix", tokenid::matrix },
331
	{ "min16float", tokenid::min16float },
332
	{ "min16float2", tokenid::min16float2 },
333
	{ "min16float3", tokenid::min16float3 },
334
	{ "min16float4", tokenid::min16float4 },
335
	{ "min16int", tokenid::min16int },
336
	{ "min16int2", tokenid::min16int2 },
337
	{ "min16int3", tokenid::min16int3 },
338
	{ "min16int4", tokenid::min16int4 },
339
	{ "min16uint", tokenid::min16uint },
340
	{ "min16uint2", tokenid::min16uint2 },
341
	{ "min16uint3", tokenid::min16uint3 },
342
	{ "min16uint4", tokenid::min16uint4 },
343
	{ "mutable", tokenid::reserved },
344
	{ "namespace", tokenid::namespace_ },
345
	{ "new", tokenid::reserved },
346
	{ "noinline", tokenid::reserved },
347
	{ "nointerpolation", tokenid::nointerpolation },
348
	{ "noperspective", tokenid::noperspective },
349
	{ "operator", tokenid::reserved },
350
	{ "out", tokenid::out },
351
	{ "packed", tokenid::reserved },
352
	{ "packoffset", tokenid::reserved },
353
	{ "pass", tokenid::pass },
354
	{ "precise", tokenid::precise },
355
	{ "private", tokenid::reserved },
356
	{ "protected", tokenid::reserved },
357
	{ "public", tokenid::reserved },
358
	{ "register", tokenid::reserved },
359
	{ "reinterpret_cast", tokenid::reserved },
360
	{ "restrict", tokenid::reserved },
361
	{ "return", tokenid::return_ },
362
	{ "row_major", tokenid::reserved },
363
	{ "sample", tokenid::reserved },
364
	{ "sampler", tokenid::sampler2d },
365
	{ "sampler1D", tokenid::sampler1d },
366
	{ "sampler1DArray", tokenid::reserved },
367
	{ "sampler2D", tokenid::sampler2d },
368
	{ "sampler2DArray", tokenid::reserved },
369
	{ "sampler2DMS", tokenid::reserved },
370
	{ "sampler2DMSArray", tokenid::reserved },
371
	{ "sampler3D", tokenid::sampler3d },
372
	{ "sampler_state", tokenid::reserved },
373
	{ "samplerCube", tokenid::reserved },
374
	{ "samplerCubeArray", tokenid::reserved },
375
	{ "samplerCUBE", tokenid::reserved },
376
	{ "samplerRect", tokenid::reserved },
377
	{ "samplerRECT", tokenid::reserved },
378
	{ "SamplerState", tokenid::reserved },
379
	{ "storage", tokenid::storage2d },
380
	{ "storage1D", tokenid::storage1d },
381
	{ "storage2D", tokenid::storage2d },
382
	{ "storage3D", tokenid::storage3d },
383
	{ "shared", tokenid::reserved },
384
	{ "short", tokenid::reserved },
385
	{ "signed", tokenid::reserved },
386
	{ "sizeof", tokenid::reserved },
387
	{ "snorm", tokenid::reserved },
388
	{ "static", tokenid::static_ },
389
	{ "static_cast", tokenid::reserved },
390
	{ "string", tokenid::string_ },
391
	{ "struct", tokenid::struct_ },
392
	{ "switch", tokenid::switch_ },
393
	{ "technique", tokenid::technique },
394
	{ "template", tokenid::reserved },
395
	{ "texture", tokenid::texture2d },
396
	{ "Texture1D", tokenid::reserved },
397
	{ "texture1D", tokenid::texture1d },
398
	{ "Texture1DArray", tokenid::reserved },
399
	{ "Texture2D", tokenid::reserved },
400
	{ "texture2D", tokenid::texture2d },
401
	{ "Texture2DArray", tokenid::reserved },
402
	{ "Texture2DMS", tokenid::reserved },
403
	{ "Texture2DMSArray", tokenid::reserved },
404
	{ "Texture3D", tokenid::reserved },
405
	{ "texture3D", tokenid::texture3d },
406
	{ "textureCUBE", tokenid::reserved },
407
	{ "TextureCube", tokenid::reserved },
408
	{ "TextureCubeArray", tokenid::reserved },
409
	{ "textureRECT", tokenid::reserved },
410
	{ "this", tokenid::reserved },
411
	{ "true", tokenid::true_literal },
412
	{ "TRUE", tokenid::true_literal },
413
	{ "try", tokenid::reserved },
414
	{ "typedef", tokenid::reserved },
415
	{ "uint", tokenid::uint_ },
416
	{ "uint2", tokenid::uint2 },
417
	{ "uint2x1", tokenid::uint2 },
418
	{ "uint2x2", tokenid::uint2x2 },
419
	{ "uint2x3", tokenid::uint2x3 },
420
	{ "uint2x4", tokenid::uint2x4 },
421
	{ "uint3", tokenid::uint3 },
422
	{ "uint3x1", tokenid::uint3 },
423
	{ "uint3x2", tokenid::uint3x2 },
424
	{ "uint3x3", tokenid::uint3x3 },
425
	{ "uint3x4", tokenid::uint3x4 },
426
	{ "uint4", tokenid::uint4 },
427
	{ "uint4x1", tokenid::uint4 },
428
	{ "uint4x2", tokenid::uint4x2 },
429
	{ "uint4x3", tokenid::uint4x3 },
430
	{ "uint4x4", tokenid::uint4x4 },
431
	{ "uniform", tokenid::uniform_ },
432
	{ "union", tokenid::reserved },
433
	{ "unorm", tokenid::reserved },
434
	{ "unsigned", tokenid::reserved },
435
	{ "using", tokenid::reserved },
436
	{ "vector", tokenid::vector },
437
	{ "virtual", tokenid::reserved },
438
	{ "void", tokenid::void_ },
439
	{ "volatile", tokenid::volatile_ },
440
	{ "while", tokenid::while_ }
441
};
442
static const std::unordered_map<std::string_view, tokenid> s_pp_directive_lookup = {
443
	{ "define", tokenid::hash_def },
444
	{ "undef", tokenid::hash_undef },
445
	{ "if", tokenid::hash_if },
446
	{ "ifdef", tokenid::hash_ifdef },
447
	{ "ifndef", tokenid::hash_ifndef },
448
	{ "else", tokenid::hash_else },
449
	{ "elif", tokenid::hash_elif },
450
	{ "endif", tokenid::hash_endif },
451
	{ "error", tokenid::hash_error },
452
	{ "warning", tokenid::hash_warning },
453
	{ "pragma", tokenid::hash_pragma },
454
	{ "include", tokenid::hash_include },
455
};
456

457
static bool is_octal_digit(char c)
458
{
459
	return static_cast<unsigned>(c - '0') < 8;
460
}
461
static bool is_decimal_digit(char c)
462
{
463
	return static_cast<unsigned>(c - '0') < 10;
464
}
465
static bool is_hexadecimal_digit(char c)
466
{
467
	return is_decimal_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
468
}
469

470
static bool is_digit(char c, int radix)
471
{
472
	switch (radix)
473
	{
474
	case 8:
475
		return is_octal_digit(c);
476
	case 10:
477
		return is_decimal_digit(c);
478
	case 16:
479
		return is_hexadecimal_digit(c);
480
	}
481

482
	return false;
483
}
484
static long long octal_to_decimal(long long n)
485
{
486
	long long m = 0;
487

488
	while (n != 0)
489
	{
490
		m *= 8;
491
		m += n & 7;
492
		n >>= 3;
493
	}
494

495
	while (m != 0)
496
	{
497
		n *= 10;
498
		n += m & 7;
499
		m >>= 3;
500
	}
501

502
	return n;
503
}
504

505
std::string reshadefx::token::id_to_name(tokenid id)
506
{
507
	const auto it = s_token_lookup.find(id);
508
	if (it != s_token_lookup.end())
509
		return std::string(it->second);
510
	return "unknown";
511
}
512

513
reshadefx::token reshadefx::lexer::lex()
514
{
515
	bool is_at_line_begin = _cur_location.column <= 1;
516

517
	token tok;
518
next_token:
519
	// Reset token data
520
	tok.location = _cur_location;
521
	tok.offset = input_offset();
522
	tok.length = 1;
523
	tok.literal_as_double = 0;
524
	tok.literal_as_string.clear();
525

526
	assert(_cur <= _end);
527

528
	// Do a character type lookup for the current character
529
	switch (s_type_lookup[uint8_t(*_cur)])
530
	{
531
	case 0xFF: // EOF
532
		tok.id = tokenid::end_of_file;
533
		return tok;
534
	case SPACE:
535
		skip_space();
536
		if (_ignore_whitespace || is_at_line_begin || *_cur == '\n')
537
			goto next_token;
538
		tok.id = tokenid::space;
539
		tok.length = input_offset() - tok.offset;
540
		return tok;
541
	case '\n':
542
		_cur++;
543
		_cur_location.line++;
544
		_cur_location.column = 1;
545
		is_at_line_begin = true;
546
		if (_ignore_whitespace)
547
			goto next_token;
548
		tok.id = tokenid::end_of_line;
549
		return tok;
550
	case DIGIT:
551
		parse_numeric_literal(tok);
552
		break;
553
	case IDENT:
554
		parse_identifier(tok);
555
		break;
556
	case '!':
557
		if (_cur[1] == '=')
558
			tok.id = tokenid::exclaim_equal,
559
			tok.length = 2;
560
		else
561
			tok.id = tokenid::exclaim;
562
		break;
563
	case '"':
564
		parse_string_literal(tok, _escape_string_literals);
565
		break;
566
	case '#':
567
		if (is_at_line_begin)
568
		{
569
			if (!parse_pp_directive(tok) || _ignore_pp_directives)
570
			{
571
				skip_to_next_line();
572
				goto next_token;
573
			}
574
		} // These braces are important so the 'else' is matched to the right 'if' statement
575
		else
576
		tok.id = tokenid::hash;
577
		break;
578
	case '$':
579
		tok.id = tokenid::dollar;
580
		break;
581
	case '%':
582
		if (_cur[1] == '=')
583
			tok.id = tokenid::percent_equal,
584
			tok.length = 2;
585
		else
586
			tok.id = tokenid::percent;
587
		break;
588
	case '&':
589
		if (_cur[1] == '&')
590
			tok.id = tokenid::ampersand_ampersand,
591
			tok.length = 2;
592
		else if (_cur[1] == '=')
593
			tok.id = tokenid::ampersand_equal,
594
			tok.length = 2;
595
		else
596
			tok.id = tokenid::ampersand;
597
		break;
598
	case '(':
599
		tok.id = tokenid::parenthesis_open;
600
		break;
601
	case ')':
602
		tok.id = tokenid::parenthesis_close;
603
		break;
604
	case '*':
605
		if (_cur[1] == '=')
606
			tok.id = tokenid::star_equal,
607
			tok.length = 2;
608
		else
609
			tok.id = tokenid::star;
610
		break;
611
	case '+':
612
		if (_cur[1] == '+')
613
			tok.id = tokenid::plus_plus,
614
			tok.length = 2;
615
		else if (_cur[1] == '=')
616
			tok.id = tokenid::plus_equal,
617
			tok.length = 2;
618
		else
619
			tok.id = tokenid::plus;
620
		break;
621
	case ',':
622
		tok.id = tokenid::comma;
623
		break;
624
	case '-':
625
		if (_cur[1] == '-')
626
			tok.id = tokenid::minus_minus,
627
			tok.length = 2;
628
		else if (_cur[1] == '=')
629
			tok.id = tokenid::minus_equal,
630
			tok.length = 2;
631
		else if (_cur[1] == '>')
632
			tok.id = tokenid::arrow,
633
			tok.length = 2;
634
		else
635
			tok.id = tokenid::minus;
636
		break;
637
	case '.':
638
		if (s_type_lookup[uint8_t(_cur[1])] == DIGIT)
639
			parse_numeric_literal(tok);
640
		else if (_cur[1] == '.' && _cur[2] == '.')
641
			tok.id = tokenid::ellipsis,
642
			tok.length = 3;
643
		else
644
			tok.id = tokenid::dot;
645
		break;
646
	case '/':
647
		if (_cur[1] == '/')
648
		{
649
			skip_to_next_line();
650
			if (_ignore_comments)
651
				goto next_token;
652
			tok.id = tokenid::single_line_comment;
653
			tok.length = input_offset() - tok.offset;
654
			return tok;
655
		}
656
		else if (_cur[1] == '*')
657
		{
658
			while (_cur < _end)
659
			{
660
				if (*_cur == '\n')
661
				{
662
					_cur_location.line++;
663
					_cur_location.column = 1;
664
				}
665
				else if (_cur[0] == '*' && _cur[1] == '/')
666
				{
667
					skip(2);
668
					break;
669
				}
670
				skip(1);
671
			}
672
			if (_ignore_comments)
673
				goto next_token;
674
			tok.id = tokenid::multi_line_comment;
675
			tok.length = input_offset() - tok.offset;
676
			return tok;
677
		}
678
		else if (_cur[1] == '=')
679
			tok.id = tokenid::slash_equal,
680
			tok.length = 2;
681
		else
682
			tok.id = tokenid::slash;
683
		break;
684
	case ':':
685
		if (_cur[1] == ':')
686
			tok.id = tokenid::colon_colon,
687
			tok.length = 2;
688
		else
689
			tok.id = tokenid::colon;
690
		break;
691
	case ';':
692
		tok.id = tokenid::semicolon;
693
		break;
694
	case '<':
695
		if (_cur[1] == '<')
696
			if (_cur[2] == '=')
697
				tok.id = tokenid::less_less_equal,
698
				tok.length = 3;
699
			else
700
				tok.id = tokenid::less_less,
701
				tok.length = 2;
702
		else if (_cur[1] == '=')
703
			tok.id = tokenid::less_equal,
704
			tok.length = 2;
705
		else
706
			tok.id = tokenid::less;
707
		break;
708
	case '=':
709
		if (_cur[1] == '=')
710
			tok.id = tokenid::equal_equal,
711
			tok.length = 2;
712
		else
713
			tok.id = tokenid::equal;
714
		break;
715
	case '>':
716
		if (_cur[1] == '>')
717
			if (_cur[2] == '=')
718
				tok.id = tokenid::greater_greater_equal,
719
				tok.length = 3;
720
			else
721
				tok.id = tokenid::greater_greater,
722
				tok.length = 2;
723
		else if (_cur[1] == '=')
724
			tok.id = tokenid::greater_equal,
725
			tok.length = 2;
726
		else
727
			tok.id = tokenid::greater;
728
		break;
729
	case '?':
730
		tok.id = tokenid::question;
731
		break;
732
	case '@':
733
		tok.id = tokenid::at;
734
		break;
735
	case '[':
736
		tok.id = tokenid::bracket_open;
737
		break;
738
	case '\\':
739
		if (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n'))
740
		{
741
			// Skip to next line if current line ends with a backslash
742
			skip_space();
743
			if (_ignore_whitespace)
744
				goto next_token;
745
			tok.id = tokenid::space;
746
			tok.length = input_offset() - tok.offset;
747
			return tok;
748
		}
749
		tok.id = tokenid::backslash;
750
		break;
751
	case ']':
752
		tok.id = tokenid::bracket_close;
753
		break;
754
	case '^':
755
		if (_cur[1] == '=')
756
			tok.id = tokenid::caret_equal,
757
			tok.length = 2;
758
		else
759
			tok.id = tokenid::caret;
760
		break;
761
	case '{':
762
		tok.id = tokenid::brace_open;
763
		break;
764
	case '|':
765
		if (_cur[1] == '=')
766
			tok.id = tokenid::pipe_equal,
767
			tok.length = 2;
768
		else if (_cur[1] == '|')
769
			tok.id = tokenid::pipe_pipe,
770
			tok.length = 2;
771
		else
772
			tok.id = tokenid::pipe;
773
		break;
774
	case '}':
775
		tok.id = tokenid::brace_close;
776
		break;
777
	case '~':
778
		tok.id = tokenid::tilde;
779
		break;
780
	default:
781
		tok.id = tokenid::unknown;
782
		break;
783
	}
784

785
	skip(tok.length);
786

787
	return tok;
788
}
789

790
void reshadefx::lexer::skip(size_t length)
791
{
792
	_cur += length;
793
	_cur_location.column += static_cast<unsigned int>(length);
794
}
795
void reshadefx::lexer::skip_space()
796
{
797
	// Skip each character until a space is found
798
	while (_cur < _end)
799
	{
800
		if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
801
		{
802
			skip(_cur[1] == '\r' ? 3 : 2);
803
			_cur_location.line++;
804
			_cur_location.column = 1;
805
			continue;
806
		}
807

808
		if (s_type_lookup[uint8_t(*_cur)] == SPACE)
809
			skip(1);
810
		else
811
			break;
812
	}
813
}
814
void reshadefx::lexer::skip_to_next_line()
815
{
816
	// Skip each character until a new line feed is found
817
	while (*_cur != '\n' && _cur < _end)
818
	{
819
#if 0
820
		if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
821
		{
822
			skip(_cur[1] == '\r' ? 3 : 2);
823
			_cur_location.line++;
824
			_cur_location.column = 1;
825
			continue;
826
		}
827
#endif
828

829
		skip(1);
830
	}
831
}
832

833
void reshadefx::lexer::reset_to_offset(size_t offset)
834
{
835
	assert(offset < _input.size());
836
	_cur = _input.data() + offset;
837
}
838

839
void reshadefx::lexer::parse_identifier(token &tok) const
840
{
841
	auto *const begin = _cur, *end = begin;
842

843
	// Skip to the end of the identifier sequence
844
	while (s_type_lookup[uint8_t(*end)] == IDENT || s_type_lookup[uint8_t(*end)] == DIGIT)
845
		end++;
846

847
	tok.id = tokenid::identifier;
848
	tok.offset = input_offset();
849
	tok.length = end - begin;
850
	tok.literal_as_string.assign(begin, end);
851

852
	if (_ignore_keywords)
853
		return;
854

855
	if (const auto it = s_keyword_lookup.find(tok.literal_as_string);
856
		it != s_keyword_lookup.end())
857
		tok.id = it->second;
858
}
859
bool reshadefx::lexer::parse_pp_directive(token &tok)
860
{
861
	skip(1); // Skip the '#'
862
	skip_space(); // Skip any space between the '#' and directive
863
	parse_identifier(tok);
864

865
	if (const auto it = s_pp_directive_lookup.find(tok.literal_as_string);
866
		it != s_pp_directive_lookup.end())
867
	{
868
		tok.id = it->second;
869
		return true;
870
	}
871
	else if (!_ignore_line_directives && tok.literal_as_string == "line") // The #line directive needs special handling
872
	{
873
		skip(tok.length); // The 'parse_identifier' does not update the pointer to the current character, so do that now
874
		skip_space();
875
		parse_numeric_literal(tok);
876
		skip(tok.length);
877

878
		_cur_location.line = tok.literal_as_int;
879

880
		// Need to subtract one since the line containing #line does not count into the statistics
881
		if (_cur_location.line != 0)
882
			_cur_location.line--;
883

884
		skip_space();
885

886
		// Check if this #line directive has an file name attached to it
887
		if (_cur[0] == '"')
888
		{
889
			token temptok;
890
			parse_string_literal(temptok, false);
891

892
			_cur_location.source = std::move(temptok.literal_as_string);
893
		}
894

895
		// Do not return the #line directive as token to the caller
896
		return false;
897
	}
898

899
	tok.id = tokenid::hash_unknown;
900

901
	return true;
902
}
903
void reshadefx::lexer::parse_string_literal(token &tok, bool escape)
904
{
905
	auto *const begin = _cur, *end = begin + 1; // Skip first quote character right away
906

907
	for (auto c = *end; c != '"'; c = *++end)
908
	{
909
		if (c == '\n' || end >= _end)
910
		{
911
			// Line feed reached, the string literal is done (technically this should be an error, but the lexer does not report errors, so ignore it)
912
			end--;
913
			if (end[0] == '\r') end--;
914
			break;
915
		}
916

917
		if (c == '\r')
918
		{
919
			// Silently ignore carriage return characters
920
			continue;
921
		}
922

923
		if (unsigned int n = (end[1] == '\r' && end + 2 < _end) ? 2 : 1;
924
			c == '\\' && end[n] == '\n')
925
		{
926
			// Escape character found at end of line, the string literal continues on to the next line
927
			end += n;
928
			_cur_location.line++;
929
			continue;
930
		}
931

932
		// Handle escape sequences
933
		if (c == '\\' && escape)
934
		{
935
			unsigned int n = 0;
936

937
			// Any character following the '\' is not parsed as usual, so increment pointer here (this makes sure '\"' does not abort the outer loop as well)
938
			switch (c = *++end)
939
			{
940
			case '0':
941
			case '1':
942
			case '2':
943
			case '3':
944
			case '4':
945
			case '5':
946
			case '6':
947
			case '7':
948
				for (unsigned int i = 0; i < 3 && is_octal_digit(*end) && end < _end; i++)
949
				{
950
					c = *end++;
951
					n = (n << 3) | (c - '0');
952
				}
953
				// For simplicity the number is limited to what fits in a single character
954
				c = n & 0xFF;
955
				// The octal parsing loop above incremented one pass the escape sequence, so step back
956
				end--;
957
				break;
958
			case 'a':
959
				c = '\a';
960
				break;
961
			case 'b':
962
				c = '\b';
963
				break;
964
			case 'f':
965
				c = '\f';
966
				break;
967
			case 'n':
968
				c = '\n';
969
				break;
970
			case 'r':
971
				c = '\r';
972
				break;
973
			case 't':
974
				c = '\t';
975
				break;
976
			case 'v':
977
				c = '\v';
978
				break;
979
			case 'x':
980
				if (is_hexadecimal_digit(*++end))
981
				{
982
					while (is_hexadecimal_digit(*end) && end < _end)
983
					{
984
						c = *end++;
985
						n = (n << 4) | (is_decimal_digit(c) ? (c - '0') : (c - 55 - 32 * (c & 0x20)));
986
					}
987

988
					// For simplicity the number is limited to what fits in a single character
989
					c = n & 0xFF;
990
				}
991
				// The hexadecimal parsing loop and check above incremented one pass the escape sequence, so step back
992
				end--;
993
				break;
994
			}
995
		}
996

997
		tok.literal_as_string += c;
998
	}
999

1000
	tok.id = tokenid::string_literal;
1001
	tok.length = end - begin + 1;
1002

1003
	// Free up unused memory
1004
	tok.literal_as_string.shrink_to_fit();
1005
}
1006
void reshadefx::lexer::parse_numeric_literal(token &tok) const
1007
{
1008
	// This routine handles both integer and floating point numbers
1009
	auto *const begin = _cur, *end = _cur;
1010
	int mantissa_size = 0, decimal_location = -1, radix = 10;
1011
	long long fraction = 0, exponent = 0;
1012

1013
	// If a literal starts with '0' it is either an octal or hexadecimal ('0x') value
1014
	if (begin[0] == '0')
1015
	{
1016
		if (begin[1] == 'x' || begin[1] == 'X')
1017
		{
1018
			end = begin + 2;
1019
			radix = 16;
1020
		}
1021
		else
1022
		{
1023
			radix = 8;
1024
		}
1025
	}
1026

1027
	for (; mantissa_size <= 18; mantissa_size++, end++)
1028
	{
1029
		auto c = *end;
1030

1031
		if (is_decimal_digit(c))
1032
		{
1033
			c -= '0';
1034

1035
			if (c >= radix)
1036
				break;
1037
		}
1038
		else if (radix == 16)
1039
		{
1040
			// Hexadecimal values can contain the letters A to F
1041
			if (c >= 'A' && c <= 'F')
1042
				c -= 'A' - 10;
1043
			else if (c >= 'a' && c <= 'f')
1044
				c -= 'a' - 10;
1045
			else
1046
				break;
1047
		}
1048
		else
1049
		{
1050
			if (c != '.' || decimal_location >= 0)
1051
				break;
1052

1053
			// Found a decimal character, as such convert current values
1054
			if (radix == 8)
1055
			{
1056
				radix = 10;
1057
				fraction = octal_to_decimal(fraction);
1058
			}
1059

1060
			decimal_location = mantissa_size;
1061
			continue;
1062
		}
1063

1064
		fraction *= radix;
1065
		fraction += c;
1066
	}
1067

1068
	// Ignore additional digits that cannot affect the value
1069
	while (is_digit(*end, radix))
1070
		end++;
1071

1072
	// If a decimal character was found, this is a floating point value, otherwise an integer one
1073
	if (decimal_location < 0)
1074
	{
1075
		tok.id = tokenid::int_literal;
1076
		decimal_location = mantissa_size;
1077
	}
1078
	else
1079
	{
1080
		tok.id = tokenid::float_literal;
1081
		mantissa_size -= 1;
1082
	}
1083

1084
	// Literals can be followed by an exponent
1085
	if (*end == 'E' || *end == 'e')
1086
	{
1087
		auto tmp = end + 1;
1088
		const bool negative = *tmp == '-';
1089

1090
		if (negative || *tmp == '+')
1091
			tmp++;
1092

1093
		if (is_decimal_digit(*tmp))
1094
		{
1095
			end = tmp;
1096

1097
			tok.id = tokenid::float_literal;
1098

1099
			do {
1100
				exponent *= 10;
1101
				exponent += (*end++) - '0';
1102
			} while (is_decimal_digit(*end));
1103

1104
			if (negative)
1105
				exponent = -exponent;
1106
		}
1107
	}
1108

1109
	// Various suffixes force specific literal types
1110
	if (*end == 'F' || *end == 'f')
1111
	{
1112
		end++; // Consume the suffix
1113
		tok.id = tokenid::float_literal;
1114
	}
1115
	else if (*end == 'L' || *end == 'l')
1116
	{
1117
		end++; // Consume the suffix
1118
		tok.id = tokenid::double_literal;
1119
	}
1120
	else if (tok.id == tokenid::int_literal && (*end == 'U' || *end == 'u')) // The 'u' suffix is only valid on integers and needs to be ignored otherwise
1121
	{
1122
		end++; // Consume the suffix
1123
		tok.id = tokenid::uint_literal;
1124
	}
1125

1126
	if (tok.id == tokenid::float_literal || tok.id == tokenid::double_literal)
1127
	{
1128
		exponent += decimal_location - mantissa_size;
1129

1130
		const bool exponent_negative = exponent < 0;
1131

1132
		if (exponent_negative)
1133
			exponent = -exponent;
1134

1135
		// Limit exponent
1136
		if (exponent > 511)
1137
			exponent = 511;
1138

1139
		// Quick exponent calculation
1140
		double e = 1.0;
1141
		const double powers_of_10[] = {
1142
			10.,
1143
			100.,
1144
			1.0e4,
1145
			1.0e8,
1146
			1.0e16,
1147
			1.0e32,
1148
			1.0e64,
1149
			1.0e128,
1150
			1.0e256
1151
		};
1152

1153
		for (auto d = powers_of_10; exponent != 0; exponent >>= 1, d++)
1154
			if (exponent & 1)
1155
				e *= *d;
1156

1157
		if (tok.id == tokenid::float_literal)
1158
			tok.literal_as_float = exponent_negative ? fraction / static_cast<float>(e) : fraction * static_cast<float>(e);
1159
		else
1160
			tok.literal_as_double = exponent_negative ? fraction / e : fraction * e;
1161
	}
1162
	else
1163
	{
1164
		// Limit the maximum value to what fits into our token structure
1165
		tok.literal_as_uint = static_cast<unsigned int>(fraction & 0xFFFFFFFF);
1166
	}
1167

1168
	tok.length = end - begin;
1169
}
1170

1171
Product

Resources

Company