CoCalc -- effect_preprocessor.cpp

GitHub Repository: stenzek/duckstation
Path: blob/master/dep/reshadefx/src/effect_preprocessor.cpp
⁴²⁴⁶ views
1
/*
2
 * Copyright (C) 2014 Patrick Mours
3
 * SPDX-License-Identifier: BSD-3-Clause
4
 */
5

6
#include "effect_lexer.hpp"
7
#include "effect_preprocessor.hpp"
8
#include <cstdio> // fclose, fopen, fread, fseek
9
#include <cassert>
10
#include <algorithm> // std::find_if
11

12
#ifndef _WIN32
13
	// On Linux systems the native path encoding is UTF-8 already, so no conversion necessary
14
	#define u8path(p) path(p)
15
	#define u8string() string()
16
#endif
17

18
enum op_type
19
{
20
	op_none = -1,
21

22
	op_or,
23
	op_and,
24
	op_bitor,
25
	op_bitxor,
26
	op_bitand,
27
	op_not_equal,
28
	op_equal,
29
	op_less,
30
	op_greater,
31
	op_less_equal,
32
	op_greater_equal,
33
	op_leftshift,
34
	op_rightshift,
35
	op_add,
36
	op_subtract,
37
	op_modulo,
38
	op_divide,
39
	op_multiply,
40
	op_plus,
41
	op_negate,
42
	op_not,
43
	op_bitnot,
44
	op_parentheses
45
};
46

47
enum macro_replacement
48
{
49
	macro_replacement_start = '\x00',
50
	macro_replacement_argument = '\xFD',
51
	macro_replacement_concat = '\xFF',
52
	macro_replacement_stringize = '\xFE',
53
};
54

55
static const int s_precedence_lookup[] = {
56
	0, 1, 2, 3, 4, // bitwise operators
57
	5, 6, 7, 7, 7, 7, // logical operators
58
	8, 8, // left shift, right shift
59
	9, 9, // add, subtract
60
	10, 10, 10, // modulo, divide, multiply
61
	11, 11, 11, 11 // unary operators
62
};
63

64
static bool read_file(const std::string &path, std::string &file_data, reshadefx::preprocessor::include_read_file_callback &cb)
65
{
66
	if (!cb(path, file_data))
67
		return false;
68

69
	// Append a new line feed to the end of the input string to avoid issues with parsing
70
	file_data.push_back('\n');
71

72
	// Remove BOM (0xefbbbf means 0xfeff)
73
	if (file_data.size() >= 3 &&
74
		static_cast<unsigned char>(file_data[0]) == 0xef &&
75
		static_cast<unsigned char>(file_data[1]) == 0xbb &&
76
		static_cast<unsigned char>(file_data[2]) == 0xbf)
77
		file_data.erase(0, 3);
78

79
	return true;
80
}
81

82
bool reshadefx::preprocessor::stdfs_read_file_callback(const std::string &path, std::string &data)
83
{
84
	// Read file contents into memory
85
	const std::filesystem::path fspath(path);
86
#ifndef _WIN32
87
	FILE *const file = fopen(fspath.c_str(), "rb");
88
#else
89
	FILE *const file = _wfsopen(fspath.generic_wstring().c_str(), L"rb", SH_DENYWR);
90
#endif
91
	if (file == nullptr)
92
		return false;
93

94
	fseek(file, 0, SEEK_END);
95
	const size_t file_size = ftell(file);
96
	fseek(file, 0, SEEK_SET);
97

98
	const size_t file_size_read = fread(data.data(), 1, file_size, file);
99

100
	// No longer need to have a handle open to the file, since all data was read, so can safely close it
101
	fclose(file);
102

103
	if (file_size_read != file_size)
104
		return false;
105

106
	return true;
107
}
108

109
bool reshadefx::preprocessor::stdfs_file_exists_callback(const std::string &path)
110
{
111
	return std::filesystem::exists(std::filesystem::path(path));
112
}
113

114
template <char ESCAPE_CHAR = '\\'>
115
static std::string escape_string(std::string s)
116
{
117
	for (size_t offset = 0; (offset = s.find(ESCAPE_CHAR, offset)) != std::string::npos; offset += 2)
118
		s.insert(offset, "\\", 1);
119
	return '\"' + s + '\"';
120
}
121

122
reshadefx::preprocessor::preprocessor()
123
	: _file_exists_cb(stdfs_file_exists_callback)
124
	, _read_file_cb(stdfs_read_file_callback)
125
{
126
}
127
reshadefx::preprocessor::~preprocessor()
128
{
129
}
130

131
void reshadefx::preprocessor::set_include_callbacks(include_file_exists_callback file_exists,
132
                                                    include_read_file_callback read_file)
133
{
134
	_file_exists_cb = file_exists;
135
	_read_file_cb = read_file;
136
}
137

138
void reshadefx::preprocessor::add_include_path(const std::string &path)
139
{
140
	assert(!path.empty());
141
	_include_paths.push_back(std::filesystem::path(path));
142
}
143
bool reshadefx::preprocessor::add_macro_definition(const std::string &name, const macro &macro)
144
{
145
	assert(!name.empty());
146
	return _macros.emplace(name, macro).second;
147
}
148

149
bool reshadefx::preprocessor::append_file(const std::string &path)
150
{
151
	std::string source_code;
152
	if (!read_file(path, source_code, _read_file_cb))
153
		return false;
154

155
	return append_string(std::move(source_code), path);
156
}
157
bool reshadefx::preprocessor::append_string(std::string source_code, const std::string &path /* = std::string() */)
158
{
159
	// Enforce all input strings to end with a line feed
160
	assert(!source_code.empty() && source_code.back() == '\n');
161

162
	// Only consider new errors added below for the success of this call
163
	const size_t errors_offset = _errors.length();
164

165
	// Give this push a name, so that lexer location starts at a new line
166
	// This is necessary in case this string starts with a preprocessor directive, since the lexer only reports those as such if they appear at the beginning of a new line
167
	// But without a name, the lexer location is set to the last token location, which most likely will not be at the start of the line
168
	push(std::move(source_code), path.empty() ? "unknown" : path);
169
	parse();
170

171
	return _errors.find(": preprocessor error: ", errors_offset) == std::string::npos;
172
}
173

174
std::vector<std::filesystem::path> reshadefx::preprocessor::included_files() const
175
{
176
	std::vector<std::filesystem::path> files;
177
	files.reserve(_file_cache.size());
178
	for (const std::pair<std::string, std::string> &cache_entry : _file_cache)
179
		files.push_back(std::filesystem::u8path(cache_entry.first));
180
	return files;
181
}
182
std::vector<std::pair<std::string, std::string>> reshadefx::preprocessor::used_macro_definitions() const
183
{
184
	std::vector<std::pair<std::string, std::string>> defines;
185
	defines.reserve(_used_macros.size());
186
	for (const std::string &name : _used_macros)
187
		if (const auto it = _macros.find(name);
188
			// Do not include function-like macros, since they are more likely to contain a complex replacement list
189
			it != _macros.end() && !it->second.is_function_like)
190
			defines.emplace_back(name, it->second.replacement_list);
191
	return defines;
192
}
193

194
void reshadefx::preprocessor::error(const location &location, const std::string &message)
195
{
196
	_errors += location.source;
197
	_errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')';
198
	_errors += ": preprocessor error: ";
199
	_errors += message;
200
	_errors += '\n';
201
}
202
void reshadefx::preprocessor::warning(const location &location, const std::string &message)
203
{
204
	_errors += location.source;
205
	_errors += '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')';
206
	_errors += ": preprocessor warning: ";
207
	_errors += message;
208
	_errors += '\n';
209
}
210

211
void reshadefx::preprocessor::push(std::string input, const std::string &name)
212
{
213
	location start_location = !name.empty() ?
214
		// Start at the beginning of the file when pushing a new file
215
		location(name, 1) :
216
		// Start with last known token location when pushing an unnamed string
217
		_token.location;
218

219
	input_level level = { name };
220
	level.lexer.reset(new lexer(
221
		std::move(input),
222
		true  /* ignore_comments */,
223
		false /* ignore_whitespace */,
224
		false /* ignore_pp_directives */,
225
		false /* ignore_line_directives */,
226
		true  /* ignore_keywords */,
227
		false /* escape_string_literals */,
228
		start_location));
229
	level.next_token.id = tokenid::unknown;
230
	level.next_token.location = start_location; // This is used in 'consume' to initialize the output location
231

232
	// Inherit hidden macros from parent
233
	if (!_input_stack.empty())
234
		level.hidden_macros = _input_stack.back().hidden_macros;
235

236
	_input_stack.push_back(std::move(level));
237
	_next_input_index = _input_stack.size() - 1;
238

239
	// Advance into the input stack to update next token
240
	consume();
241
}
242

243
bool reshadefx::preprocessor::peek(tokenid tokid) const
244
{
245
	if (_input_stack.empty())
246
		return tokid == tokenid::end_of_file;
247

248
	return _input_stack[_next_input_index].next_token == tokid;
249
}
250
void reshadefx::preprocessor::consume()
251
{
252
	_current_input_index = _next_input_index;
253

254
	if (_input_stack.empty())
255
	{
256
		// End of input has been reached already (this can happen when the input text is not terminated with a new line)
257
		assert(_current_input_index == 0);
258
		return;
259
	}
260

261
	// Clear out input stack, now that the current token is overwritten
262
	while (_input_stack.size() > (_current_input_index + 1))
263
		_input_stack.pop_back();
264

265
	// Update location information after switching input levels
266
	input_level &input = _input_stack[_current_input_index];
267
	if (!input.name.empty() && input.name != _output_location.source)
268
	{
269
		_output += "#line " + std::to_string(input.next_token.location.line) + " \"" + input.name + "\"\n";
270
		// Line number is increased before checking against next token in 'tokenid::end_of_line' handling in 'parse' function below, so compensate for that here
271
		_output_location.line = input.next_token.location.line - 1;
272
		_output_location.source = input.name;
273
	}
274

275
	// Set current token
276
	_token = std::move(input.next_token);
277
	_current_token_raw_data = input.lexer->input_string().substr(_token.offset, _token.length);
278

279
	// Get the next token
280
	input.next_token = input.lexer->lex();
281

282
	// Verify string literals (since the lexer cannot throw errors itself)
283
	if (_token == tokenid::string_literal && _current_token_raw_data.back() != '\"')
284
		error(_token.location, "unterminated string literal");
285

286
	// Pop input level if lexical analysis has reached the end of it
287
	// This ensures the EOF token is not consumed until the very last file
288
	while (peek(tokenid::end_of_file))
289
	{
290
		// Remove any unterminated blocks from the stack
291
		for (; !_if_stack.empty() && _if_stack.back().input_index >= _next_input_index; _if_stack.pop_back())
292
			error(_if_stack.back().pp_token.location, "unterminated #if");
293

294
		if (_next_input_index == 0)
295
		{
296
			// End of input has been reached, so cannot pop further and this is the last token
297
			_input_stack.pop_back();
298
			return;
299
		}
300
		else
301
		{
302
			_next_input_index -= 1;
303
		}
304
	}
305
}
306
void reshadefx::preprocessor::consume_until(tokenid tokid)
307
{
308
	while (!accept(tokid) && !peek(tokenid::end_of_file))
309
	{
310
		consume();
311
	}
312
}
313

314
bool reshadefx::preprocessor::accept(tokenid tokid, bool ignore_whitespace)
315
{
316
	if (ignore_whitespace)
317
	{
318
		while (peek(tokenid::space))
319
		{
320
			consume();
321
		}
322
	}
323

324
	if (peek(tokid))
325
	{
326
		consume();
327
		return true;
328
	}
329

330
	return false;
331
}
332
bool reshadefx::preprocessor::expect(tokenid tokid)
333
{
334
	if (!accept(tokid))
335
	{
336
		if (_input_stack.empty())
337
			return tokid == tokenid::end_of_line || tokid == tokenid::end_of_file;
338

339
		token actual_token = _input_stack[_next_input_index].next_token;
340
		actual_token.location.source = _output_location.source;
341

342
		if (actual_token == tokenid::end_of_line)
343
			error(actual_token.location, "syntax error: unexpected new line");
344
		else
345
			error(actual_token.location, "syntax error: unexpected token '" +
346
				_input_stack[_next_input_index].lexer->input_string().substr(actual_token.offset, actual_token.length) + '\'');
347

348
		return false;
349
	}
350

351
	return true;
352
}
353

354
void reshadefx::preprocessor::parse()
355
{
356
	std::string line;
357

358
	// Consume all tokens in the input
359
	while (!peek(tokenid::end_of_file))
360
	{
361
		consume();
362

363
		_recursion_count = 0;
364

365
		const bool skip = !_if_stack.empty() && _if_stack.back().skipping;
366

367
		switch (_token)
368
		{
369
		case tokenid::hash_if:
370
			parse_if();
371
			if (!skip && !expect(tokenid::end_of_line))
372
				consume_until(tokenid::end_of_line);
373
			continue;
374
		case tokenid::hash_ifdef:
375
			parse_ifdef();
376
			if (!skip && !expect(tokenid::end_of_line))
377
				consume_until(tokenid::end_of_line);
378
			continue;
379
		case tokenid::hash_ifndef:
380
			parse_ifndef();
381
			if (!skip && !expect(tokenid::end_of_line))
382
				consume_until(tokenid::end_of_line);
383
			continue;
384
		case tokenid::hash_else:
385
			parse_else();
386
			if (!skip && !expect(tokenid::end_of_line))
387
				consume_until(tokenid::end_of_line);
388
			continue;
389
		case tokenid::hash_elif:
390
			parse_elif();
391
			if (!skip && !expect(tokenid::end_of_line))
392
				consume_until(tokenid::end_of_line);
393
			continue;
394
		case tokenid::hash_endif:
395
			parse_endif();
396
			if (!skip && !expect(tokenid::end_of_line))
397
				consume_until(tokenid::end_of_line);
398
			continue;
399
		default:
400
			// All other tokens are handled below
401
			break;
402
		}
403

404
		if (skip)
405
			// Ignore token since the current section is disabled
406
			continue;
407

408
		switch (_token)
409
		{
410
		case tokenid::hash_def:
411
			parse_def();
412
			if (!expect(tokenid::end_of_line))
413
				consume_until(tokenid::end_of_line);
414
			continue;
415
		case tokenid::hash_undef:
416
			parse_undef();
417
			if (!expect(tokenid::end_of_line))
418
				consume_until(tokenid::end_of_line);
419
			continue;
420
		case tokenid::hash_error:
421
			parse_error();
422
			if (!expect(tokenid::end_of_line))
423
				consume_until(tokenid::end_of_line);
424
			continue;
425
		case tokenid::hash_warning:
426
			parse_warning();
427
			if (!expect(tokenid::end_of_line))
428
				consume_until(tokenid::end_of_line);
429
			continue;
430
		case tokenid::hash_pragma:
431
			parse_pragma();
432
			if (!expect(tokenid::end_of_line))
433
				consume_until(tokenid::end_of_line);
434
			continue;
435
		case tokenid::hash_include:
436
			parse_include();
437
			continue;
438
		case tokenid::hash_unknown:
439
			// Standalone "#" is valid and should be ignored
440
			if (_token.length != 0)
441
				error(_token.location, "unrecognized preprocessing directive '" + _token.literal_as_string + '\'');
442
			if (!expect(tokenid::end_of_line))
443
				consume_until(tokenid::end_of_line);
444
			continue;
445
		case tokenid::end_of_line:
446
			if (line.empty())
447
				continue; // Do not append empty lines to output, instead emit "#line" statements
448
			_output_location.line++;
449
			if (_token.location.line != _output_location.line)
450
			{
451
				_output += "#line " + std::to_string(_token.location.line) + '\n';
452
				_output_location.line = _token.location.line;
453
			}
454
			_output += line;
455
			_output += '\n';
456
			line.clear();
457
			continue;
458
		case tokenid::identifier:
459
			if (evaluate_identifier_as_macro())
460
				continue;
461
			[[fallthrough]];
462
		default:
463
			line += _current_token_raw_data;
464
			break;
465
		}
466
	}
467

468
	// Append the last line after the EOF token was reached to the output
469
	_output += line;
470
	_output += '\n';
471
}
472

473
void reshadefx::preprocessor::parse_def()
474
{
475
	if (!expect(tokenid::identifier))
476
		return;
477
	if (_token.literal_as_string == "defined")
478
		return warning(_token.location, "macro name 'defined' is reserved");
479

480
	macro m;
481
	const location location = std::move(_token.location);
482
	const std::string macro_name = std::move(_token.literal_as_string);
483

484
	// Only create function-like macro if the parenthesis follows the macro name without any whitespace between
485
	if (accept(tokenid::parenthesis_open, false))
486
	{
487
		m.is_function_like = true;
488

489
		while (accept(tokenid::identifier))
490
		{
491
			m.parameters.push_back(_token.literal_as_string);
492

493
			if (!accept(tokenid::comma))
494
				break;
495
		}
496

497
		if (accept(tokenid::ellipsis))
498
			m.is_variadic = true;
499

500
		if (!expect(tokenid::parenthesis_close))
501
			return;
502
	}
503

504
	create_macro_replacement_list(m);
505

506
	if (!add_macro_definition(macro_name, m))
507
		return error(location, "redefinition of '" + macro_name + "'");
508
}
509
void reshadefx::preprocessor::parse_undef()
510
{
511
	if (!expect(tokenid::identifier))
512
		return;
513
	if (_token.literal_as_string == "defined")
514
		return warning(_token.location, "macro name 'defined' is reserved");
515

516
	_macros.erase(_token.literal_as_string);
517
}
518

519
void reshadefx::preprocessor::parse_if()
520
{
521
	if_level level;
522
	level.pp_token = _token;
523
	level.input_index = _current_input_index;
524

525
	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
526
	if (parent_skipping)
527
	{
528
		level.value = false;
529
		level.skipping = true;
530
	}
531
	else
532
	{
533
		// Evaluate expression after updating 'pp_token', so that it points at the beginning # token
534
		level.value = evaluate_expression();
535
		level.skipping = !level.value;
536
	}
537

538
	_if_stack.push_back(std::move(level));
539
}
540
void reshadefx::preprocessor::parse_ifdef()
541
{
542
	if_level level;
543
	level.pp_token = _token;
544
	level.input_index = _current_input_index;
545

546
	if (!expect(tokenid::identifier))
547
		return;
548

549
	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
550
	if (parent_skipping)
551
	{
552
		level.value = false;
553
		level.skipping = true;
554
	}
555
	else
556
	{
557
		level.value = is_defined(_token.literal_as_string);
558
		level.skipping = !level.value;
559

560
		// Only add to used macro list if this #ifdef is active and the macro was not defined before
561
		if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
562
			_used_macros.emplace(_token.literal_as_string);
563
	}
564

565
	_if_stack.push_back(std::move(level));
566
}
567
void reshadefx::preprocessor::parse_ifndef()
568
{
569
	if_level level;
570
	level.pp_token = _token;
571
	level.input_index = _current_input_index;
572

573
	if (!expect(tokenid::identifier))
574
		return;
575

576
	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
577
	if (parent_skipping)
578
	{
579
		level.value = false;
580
		level.skipping = true;
581
	}
582
	else
583
	{
584
		level.value = !is_defined(_token.literal_as_string);
585
		level.skipping = !level.value;
586

587
		// Only add to used macro list if this #ifndef is active and the macro was not defined before
588
		if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
589
			_used_macros.emplace(_token.literal_as_string);
590
	}
591

592
	_if_stack.push_back(std::move(level));
593
}
594
void reshadefx::preprocessor::parse_elif()
595
{
596
	if (_if_stack.empty())
597
		return error(_token.location, "missing #if for #elif");
598

599
	if_level &level = _if_stack.back();
600
	if (level.pp_token == tokenid::hash_else)
601
		return error(_token.location, "#elif is not allowed after #else");
602

603
	// Update 'pp_token' before evaluating expression, so that it points at the beginning # token
604
	level.pp_token = _token;
605
	level.input_index = _current_input_index;
606

607
	const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
608
	if (parent_skipping)
609
	{
610
		level.value = false;
611
		level.skipping = true;
612
	}
613
	else
614
	{
615
		const bool condition_result = evaluate_expression();
616
		level.skipping = level.value || !condition_result;
617

618
		if (!level.value)
619
			level.value = condition_result;
620
	}
621
}
622
void reshadefx::preprocessor::parse_else()
623
{
624
	if (_if_stack.empty())
625
		return error(_token.location, "missing #if for #else");
626

627
	if_level &level = _if_stack.back();
628
	if (level.pp_token == tokenid::hash_else)
629
		return error(_token.location, "#else is not allowed after #else");
630

631
	level.pp_token = _token;
632
	level.input_index = _current_input_index;
633

634
	const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
635
	if (parent_skipping)
636
	{
637
		level.value = false;
638
		level.skipping = true;
639
	}
640
	else
641
	{
642
		level.skipping = parent_skipping || level.value;
643

644
		if (!level.value)
645
			level.value = true;
646
	}
647
}
648
void reshadefx::preprocessor::parse_endif()
649
{
650
	if (_if_stack.empty())
651
		return error(_token.location, "missing #if for #endif");
652

653
	_if_stack.pop_back();
654
}
655

656
void reshadefx::preprocessor::parse_error()
657
{
658
	const location keyword_location = std::move(_token.location);
659

660
	if (!expect(tokenid::string_literal))
661
		return;
662

663
	error(keyword_location, _token.literal_as_string);
664
}
665
void reshadefx::preprocessor::parse_warning()
666
{
667
	const location keyword_location = std::move(_token.location);
668

669
	if (!expect(tokenid::string_literal))
670
		return;
671

672
	warning(keyword_location, _token.literal_as_string);
673
}
674

675
void reshadefx::preprocessor::parse_pragma()
676
{
677
	const location keyword_location = std::move(_token.location);
678

679
	if (!expect(tokenid::identifier))
680
		return;
681

682
	std::string pragma = std::move(_token.literal_as_string);
683
	std::string pragma_args;
684

685
	// Ignore whitespace preceding the argument list
686
	accept(tokenid::space);
687

688
	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
689
	{
690
		consume();
691

692
		if (_token == tokenid::identifier && evaluate_identifier_as_macro())
693
			continue;
694

695
		// Collapse all whitespace down to a single space
696
		if (_token == tokenid::space)
697
			pragma_args += ' ';
698
		else
699
			pragma_args += _current_token_raw_data;
700
	}
701

702
	if (pragma == "once")
703
	{
704
		// Clear file contents, so that future include statements simply push an empty string instead of these file contents again
705
		if (const auto it = _file_cache.find(_output_location.source); it != _file_cache.end())
706
			it->second.clear();
707
		return;
708
	}
709

710
	if (pragma == "warning" || pragma == "reshade")
711
	{
712
		_used_pragmas.emplace_back(std::move(pragma), std::move(pragma_args));
713
		return;
714
	}
715

716
	warning(keyword_location, "unknown pragma ignored");
717
}
718

719
void reshadefx::preprocessor::parse_include()
720
{
721
	const location keyword_location = std::move(_token.location);
722

723
	while (accept(tokenid::identifier))
724
	{
725
		if (!evaluate_identifier_as_macro())
726
		{
727
			error(_token.location, "syntax error: unexpected identifier in #include");
728
			consume_until(tokenid::end_of_line);
729
			return;
730
		}
731
	}
732

733
	if (!expect(tokenid::string_literal))
734
	{
735
		consume_until(tokenid::end_of_line);
736
		return;
737
	}
738

739
	std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
740
	std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
741
	file_path.replace_filename(file_name);
742

743
	if (!_file_exists_cb(file_path.u8string()))
744
		for (const std::filesystem::path &include_path : _include_paths)
745
			if (_file_exists_cb((file_path = include_path / file_name).u8string()))
746
				break;
747

748
	const std::string file_path_string = file_path.u8string();
749

750
	// Detect recursive include and abort to avoid infinite loop
751
	if (std::find_if(_input_stack.begin(), _input_stack.end(),
752
			[&file_path_string](const input_level &level) { return level.name == file_path_string; }) != _input_stack.end())
753
		return error(_token.location, "recursive #include");
754

755
	std::string input;
756
	if (const auto it = _file_cache.find(file_path_string); it != _file_cache.end())
757
	{
758
		input = it->second;
759
	}
760
	else
761
	{
762
		if (!read_file(file_path_string, input, _read_file_cb))
763
			return error(keyword_location, "could not open included file '" + file_name.u8string() + '\'');
764

765
		_file_cache.emplace(file_path_string, input);
766
	}
767

768
	// Skip end of line character following the include statement before pushing, so that the line number is already pointing to the next line when popping out of it again
769
	if (!expect(tokenid::end_of_line))
770
		consume_until(tokenid::end_of_line);
771

772
	// Clear out input stack before pushing include, so that hidden macros do not bleed into the include
773
	while (_input_stack.size() > (_next_input_index + 1))
774
		_input_stack.pop_back();
775

776
	push(std::move(input), file_path_string);
777
}
778

779
bool reshadefx::preprocessor::evaluate_expression()
780
{
781
	struct rpn_token
782
	{
783
		int value;
784
		bool is_op;
785
	};
786

787
	size_t rpn_index = 0;
788
	size_t stack_index = 0;
789
	const size_t STACK_SIZE = 128;
790
	rpn_token rpn[STACK_SIZE];
791
	int stack[STACK_SIZE];
792

793
	// Keep track of previous token to figure out data type of expression
794
	tokenid previous_token = _token;
795

796
	// Run shunting-yard algorithm
797
	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
798
	{
799
		if (stack_index >= STACK_SIZE || rpn_index >= STACK_SIZE)
800
			return error(_token.location, "expression evaluator ran out of stack space"), false;
801

802
		consume();
803

804
		auto op = op_none;
805
		bool left_associative = true;
806
		bool parenthesis_matched = false;
807

808
		switch (_token)
809
		{
810
		case tokenid::space:
811
			continue;
812
		case tokenid::exclaim:
813
			op = op_not;
814
			left_associative = false;
815
			break;
816
		case tokenid::percent:
817
			op = op_modulo;
818
			break;
819
		case tokenid::ampersand:
820
			op = op_bitand;
821
			break;
822
		case tokenid::star:
823
			op = op_multiply;
824
			break;
825
		case tokenid::plus:
826
			left_associative =
827
				previous_token == tokenid::int_literal ||
828
				previous_token == tokenid::uint_literal ||
829
				previous_token == tokenid::identifier ||
830
				previous_token == tokenid::parenthesis_close;
831
			op = left_associative ? op_add : op_plus;
832
			break;
833
		case tokenid::minus:
834
			left_associative =
835
				previous_token == tokenid::int_literal ||
836
				previous_token == tokenid::uint_literal ||
837
				previous_token == tokenid::identifier ||
838
				previous_token == tokenid::parenthesis_close;
839
			op = left_associative ? op_subtract : op_negate;
840
			break;
841
		case tokenid::slash:
842
			op = op_divide;
843
			break;
844
		case tokenid::less:
845
			op = op_less;
846
			break;
847
		case tokenid::greater:
848
			op = op_greater;
849
			break;
850
		case tokenid::caret:
851
			op = op_bitxor;
852
			break;
853
		case tokenid::pipe:
854
			op = op_bitor;
855
			break;
856
		case tokenid::tilde:
857
			op = op_bitnot;
858
			left_associative = false;
859
			break;
860
		case tokenid::exclaim_equal:
861
			op = op_not_equal;
862
			break;
863
		case tokenid::ampersand_ampersand:
864
			op = op_and;
865
			break;
866
		case tokenid::less_less:
867
			op = op_leftshift;
868
			break;
869
		case tokenid::less_equal:
870
			op = op_less_equal;
871
			break;
872
		case tokenid::equal_equal:
873
			op = op_equal;
874
			break;
875
		case tokenid::greater_greater:
876
			op = op_rightshift;
877
			break;
878
		case tokenid::greater_equal:
879
			op = op_greater_equal;
880
			break;
881
		case tokenid::pipe_pipe:
882
			op = op_or;
883
			break;
884
		default:
885
			// This is not an operator token
886
			break;
887
		}
888

889
		switch (_token)
890
		{
891
		case tokenid::parenthesis_open:
892
			stack[stack_index++] = op_parentheses;
893
			break;
894
		case tokenid::parenthesis_close:
895
			parenthesis_matched = false;
896
			while (stack_index > 0)
897
			{
898
				const int op2 = stack[--stack_index];
899
				if (op2 == op_parentheses)
900
				{
901
					parenthesis_matched = true;
902
					break;
903
				}
904

905
				rpn[rpn_index++] = { op2, true };
906
			}
907

908
			if (!parenthesis_matched)
909
				return error(_token.location, "unmatched ')'"), false;
910
			break;
911
		case tokenid::identifier:
912
			if (evaluate_identifier_as_macro())
913
				continue;
914

915
			if (_token.literal_as_string == "exists")
916
			{
917
				const bool has_parentheses = accept(tokenid::parenthesis_open);
918

919
				while (accept(tokenid::identifier))
920
				{
921
					if (!evaluate_identifier_as_macro())
922
					{
923
						error(_token.location, "syntax error: unexpected identifier after 'exists'");
924
						return false;
925
					}
926
				}
927

928
				if (!expect(tokenid::string_literal))
929
					return false;
930

931
				std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
932
				std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
933
				file_path.replace_filename(file_name);
934

935
				if (has_parentheses && !expect(tokenid::parenthesis_close))
936
					return false;
937

938
				if (!_file_exists_cb(file_path.u8string()))
939
					for (const std::filesystem::path &include_path : _include_paths)
940
						if (_file_exists_cb((file_path = include_path / file_name).u8string()))
941
							break;
942

943
				rpn[rpn_index++] = { _file_exists_cb(file_path.u8string()) ? 1 : 0, false };
944
				continue;
945
			}
946
			if (_token.literal_as_string == "defined")
947
			{
948
				const bool has_parentheses = accept(tokenid::parenthesis_open);
949

950
				if (!expect(tokenid::identifier))
951
					return false;
952

953
				const std::string macro_name = std::move(_token.literal_as_string);
954

955
				if (has_parentheses && !expect(tokenid::parenthesis_close))
956
					return false;
957

958
				rpn[rpn_index++] = { is_defined(macro_name) ? 1 : 0, false };
959
				continue;
960
			}
961

962
			// An identifier that cannot be replaced with a number becomes zero
963
			rpn[rpn_index++] = { 0, false };
964
			break;
965
		case tokenid::int_literal:
966
		case tokenid::uint_literal:
967
			rpn[rpn_index++] = { _token.literal_as_int, false };
968
			break;
969
		default:
970
			if (op == op_none)
971
				return error(_token.location, "invalid expression"), false;
972

973
			while (stack_index > 0)
974
			{
975
				const int prev_op = stack[stack_index - 1];
976
				if (prev_op == op_parentheses)
977
					break;
978

979
				if (left_associative ?
980
					(s_precedence_lookup[op] > s_precedence_lookup[prev_op]) :
981
					(s_precedence_lookup[op] >= s_precedence_lookup[prev_op]))
982
					break;
983

984
				stack_index--;
985
				rpn[rpn_index++] = { prev_op, true };
986
			}
987

988
			stack[stack_index++] = op;
989
			break;
990
		}
991

992
		previous_token = _token;
993
	}
994

995
	while (stack_index > 0)
996
	{
997
		const int op = stack[--stack_index];
998
		if (op == op_parentheses)
999
			return error(_token.location, "unmatched ')'"), false;
1000

1001
		rpn[rpn_index++] = { op, true };
1002
	}
1003

1004
#define UNARY_OPERATION(op) { \
1005
	if (stack_index < 1) \
1006
		return error(_token.location, "invalid expression"), 0; \
1007
	stack[stack_index - 1] = op stack[stack_index - 1]; \
1008
	}
1009
#define BINARY_OPERATION(op) { \
1010
	if (stack_index < 2) \
1011
		return error(_token.location, "invalid expression"), 0; \
1012
	stack[stack_index - 2] = stack[stack_index - 2] op stack[stack_index - 1]; \
1013
	stack_index--; \
1014
	}
1015

1016
	// Evaluate reverse polish notation output
1017
	for (rpn_token *token = rpn; rpn_index--; token++)
1018
	{
1019
		if (token->is_op)
1020
		{
1021
			switch (token->value)
1022
			{
1023
			case op_or:
1024
				BINARY_OPERATION(||);
1025
				break;
1026
			case op_and:
1027
				BINARY_OPERATION(&&);
1028
				break;
1029
			case op_bitor:
1030
				BINARY_OPERATION(|);
1031
				break;
1032
			case op_bitxor:
1033
				BINARY_OPERATION(^);
1034
				break;
1035
			case op_bitand:
1036
				BINARY_OPERATION(&);
1037
				break;
1038
			case op_not_equal:
1039
				BINARY_OPERATION(!=);
1040
				break;
1041
			case op_equal:
1042
				BINARY_OPERATION(==);
1043
				break;
1044
			case op_less:
1045
				BINARY_OPERATION(<);
1046
				break;
1047
			case op_greater:
1048
				BINARY_OPERATION(>);
1049
				break;
1050
			case op_less_equal:
1051
				BINARY_OPERATION(<=);
1052
				break;
1053
			case op_greater_equal:
1054
				BINARY_OPERATION(>=);
1055
				break;
1056
			case op_leftshift:
1057
				BINARY_OPERATION(<<);
1058
				break;
1059
			case op_rightshift:
1060
				BINARY_OPERATION(>>);
1061
				break;
1062
			case op_add:
1063
				BINARY_OPERATION(+);
1064
				break;
1065
			case op_subtract:
1066
				BINARY_OPERATION(-);
1067
				break;
1068
			case op_modulo:
1069
				if (stack[stack_index - 1] == 0)
1070
					return error(_token.location, "right operand of '%' is zero"), 0;
1071
				BINARY_OPERATION(%);
1072
				break;
1073
			case op_divide:
1074
				if (stack[stack_index - 1] == 0)
1075
					return error(_token.location, "division by zero"), 0;
1076
				BINARY_OPERATION(/);
1077
				break;
1078
			case op_multiply:
1079
				BINARY_OPERATION(*);
1080
				break;
1081
			case op_plus:
1082
				UNARY_OPERATION(+);
1083
				break;
1084
			case op_negate:
1085
				UNARY_OPERATION(-);
1086
				break;
1087
			case op_not:
1088
				UNARY_OPERATION(!);
1089
				break;
1090
			case op_bitnot:
1091
				UNARY_OPERATION(~);
1092
				break;
1093
			}
1094
		}
1095
		else
1096
		{
1097
			stack[stack_index++] = token->value;
1098
		}
1099
	}
1100

1101
	if (stack_index != 1)
1102
		return error(_token.location, "invalid expression"), false;
1103

1104
	return stack[0] != 0;
1105
}
1106

1107
bool reshadefx::preprocessor::evaluate_identifier_as_macro()
1108
{
1109
	if (_token.literal_as_string == "__LINE__")
1110
	{
1111
		push(std::to_string(_token.location.line));
1112
		return true;
1113
	}
1114
	if (_token.literal_as_string == "__FILE__")
1115
	{
1116
		push(escape_string(_token.location.source));
1117
		return true;
1118
	}
1119
	if (_token.literal_as_string == "__FILE_STEM__")
1120
	{
1121
		const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem();
1122
		push(escape_string(file_stem.u8string()));
1123
		return true;
1124
	}
1125
	if (_token.literal_as_string == "__FILE_STEM_HASH__")
1126
	{
1127
		const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem();
1128
		push(std::to_string(std::hash<std::string>()(file_stem.u8string()) & 0xFFFFFFFF));
1129
		return true;
1130
	}
1131
	if (_token.literal_as_string == "__FILE_NAME__")
1132
	{
1133
		const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename();
1134
		push(escape_string(file_name.u8string()));
1135
		return true;
1136
	}
1137
	if (_token.literal_as_string == "__FILE_NAME_HASH__")
1138
	{
1139
		const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename();
1140
		push(std::to_string(std::hash<std::string>()(file_name.u8string()) & 0xFFFFFFFF));
1141
		return true;
1142
	}
1143

1144
	const auto it = _macros.find(_token.literal_as_string);
1145
	if (it == _macros.end())
1146
		return false;
1147

1148
	if (!_input_stack.empty())
1149
	{
1150
		const std::unordered_set<std::string> &hidden_macros = _input_stack[_current_input_index].hidden_macros;
1151
		if (hidden_macros.find(_token.literal_as_string) != hidden_macros.end())
1152
			return false;
1153
	}
1154

1155
	const location macro_location = _token.location;
1156
	if (_recursion_count++ >= 256)
1157
		return error(macro_location, "macro recursion too high"), false;
1158

1159
	std::vector<std::string> arguments;
1160
	if (it->second.is_function_like)
1161
	{
1162
		if (!accept(tokenid::parenthesis_open))
1163
			return false; // Function like macro used without arguments, handle that like a normal identifier instead
1164

1165
		while (true)
1166
		{
1167
			int parentheses_level = 0;
1168
			std::string argument;
1169

1170
			// Ignore whitespace preceding the argument
1171
			accept(tokenid::space);
1172

1173
			if (accept(tokenid::parenthesis_close))
1174
				break; // Special case for when there are no arguments
1175

1176
			while (true)
1177
			{
1178
				if (peek(tokenid::end_of_file))
1179
					return error(macro_location, "unexpected end of file in macro expansion"), false;
1180

1181
				// Consume all tokens of the argument
1182
				consume();
1183

1184
				if (_token == tokenid::comma && parentheses_level == 0 && !(it->second.is_variadic && arguments.size() == it->second.parameters.size()))
1185
					break; // Comma marks end of an argument (unless this is the last argument in a variadic macro invocation)
1186
				if (_token == tokenid::parenthesis_open)
1187
					parentheses_level++;
1188
				if (_token == tokenid::parenthesis_close && --parentheses_level < 0)
1189
					break;
1190

1191
				// Collapse all whitespace down to a single space
1192
				if (_token == tokenid::space)
1193
					argument += ' ';
1194
				else
1195
					argument += _current_token_raw_data;
1196
			}
1197

1198
			// Trim whitespace following the argument
1199
			if (argument.size() && argument.back() == ' ')
1200
				argument.pop_back();
1201

1202
			arguments.push_back(std::move(argument));
1203

1204
			if (parentheses_level < 0)
1205
				break;
1206
		}
1207
	}
1208

1209
	expand_macro(it->first, it->second, arguments);
1210

1211
	return true;
1212
}
1213

1214
bool reshadefx::preprocessor::is_defined(const std::string &name) const
1215
{
1216
	return _macros.find(name) != _macros.end() ||
1217
		// Check built-in macros as well
1218
		name == "__LINE__" ||
1219
		name == "__FILE__" ||
1220
		name == "__FILE_NAME__" ||
1221
		name == "__FILE_STEM__";
1222
}
1223

1224
void reshadefx::preprocessor::expand_macro(const std::string &name, const macro &macro, const std::vector<std::string> &arguments)
1225
{
1226
	if (macro.replacement_list.empty())
1227
		return;
1228

1229
	// Verify argument count for function-like macros
1230
	if (arguments.size() < macro.parameters.size())
1231
		return warning(_token.location, "not enough arguments for function-like macro invocation '" + name + "'");
1232
	if (arguments.size() > macro.parameters.size() && !macro.is_variadic)
1233
		return warning(_token.location, "too many arguments for function-like macro invocation '" + name + "'");
1234

1235
	std::string input;
1236
	input.reserve(macro.replacement_list.size());
1237

1238
	for (size_t offset = 0; offset < macro.replacement_list.size(); ++offset)
1239
	{
1240
		if (macro.replacement_list[offset] != macro_replacement_start)
1241
		{
1242
			input += macro.replacement_list[offset];
1243
			continue;
1244
		}
1245

1246
		// This is a special replacement sequence
1247
		const char type = macro.replacement_list[++offset];
1248
		const char index = macro.replacement_list[++offset];
1249
		if (static_cast<size_t>(index) >= arguments.size())
1250
		{
1251
			if (macro.is_variadic)
1252
			{
1253
				// The concatenation operator has a special meaning when placed between a comma and a variable argument, deleting the preceding comma
1254
				if (type == macro_replacement_concat && input.back() == ',')
1255
					input.pop_back();
1256
				if (type == macro_replacement_stringize)
1257
					input += "\"\"";
1258
			}
1259
			continue;
1260
		}
1261

1262
		switch (type)
1263
		{
1264
		case macro_replacement_argument:
1265
			// Argument prescan
1266
			push(arguments[index] + static_cast<char>(macro_replacement_argument));
1267
			while (true)
1268
			{
1269
				// Consume all tokens of the argument (until the end marker is reached)
1270
				consume();
1271

1272
				if (_token == tokenid::unknown) // 'macro_replacement_argument' is 'tokenid::unknown'
1273
					break;
1274
				if (_token == tokenid::identifier && evaluate_identifier_as_macro())
1275
					continue;
1276

1277
				input += _current_token_raw_data;
1278
			}
1279
			assert(_current_token_raw_data[0] == macro_replacement_argument);
1280
			break;
1281
		case macro_replacement_concat:
1282
			input += arguments[index];
1283
			break;
1284
		case macro_replacement_stringize:
1285
			// Adds backslashes to escape quotes
1286
			input += escape_string<'\"'>(arguments[index]);
1287
			break;
1288
		}
1289
	}
1290

1291
	push(std::move(input));
1292

1293
	// Avoid expanding macros again that are referencing themselves
1294
	_input_stack[_current_input_index].hidden_macros.insert(name);
1295
}
1296

1297
void reshadefx::preprocessor::create_macro_replacement_list(macro &macro)
1298
{
1299
	// Since the number of parameters is encoded in the string, it may not exceed the available size of a char
1300
	if (macro.parameters.size() >= std::numeric_limits<unsigned char>::max())
1301
		return error(_token.location, "too many macro parameters");
1302

1303
	// Ignore whitespace preceding the replacement list
1304
	accept(tokenid::space);
1305

1306
	bool next_concat = false;
1307

1308
	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
1309
	{
1310
		consume();
1311

1312
		switch (_token)
1313
		{
1314
		case tokenid::hash:
1315
			if (accept(tokenid::hash, false))
1316
			{
1317
				if (macro.replacement_list.empty())
1318
					return error(_token.location, "## cannot appear at start of macro expansion");
1319
				if (peek(tokenid::end_of_line))
1320
					return error(_token.location, "## cannot appear at end of macro expansion");
1321

1322
				// Remove any whitespace preceding or following the concatenation operator (so "a ## b" becomes "ab")
1323
				if (macro.replacement_list.back() == ' ')
1324
					macro.replacement_list.pop_back();
1325
				accept(tokenid::space);
1326

1327
				// Disable macro expansion for any argument preceding or following the ## token concatenation operator
1328
				if (macro.replacement_list.size() > 2 && macro.replacement_list[macro.replacement_list.size() - 2] == macro_replacement_argument)
1329
					macro.replacement_list[macro.replacement_list.size() - 2] = macro_replacement_concat;
1330
				next_concat = true;
1331
				continue;
1332
			}
1333
			if (macro.is_function_like)
1334
			{
1335
				if (!expect(tokenid::identifier))
1336
					return;
1337

1338
				const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
1339
				if (it == macro.parameters.end() && !(macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
1340
					return error(_token.location, "# must be followed by parameter name");
1341

1342
				// Start a # stringize operator
1343
				macro.replacement_list += macro_replacement_start;
1344
				macro.replacement_list += macro_replacement_stringize;
1345
				macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
1346
				next_concat = false;
1347
				continue;
1348
			}
1349
			break;
1350
		case tokenid::space:
1351
			// Collapse all whitespace down to a single space
1352
			macro.replacement_list += ' ';
1353
			continue;
1354
		case tokenid::minus:
1355
			// Special case to handle things like "#define NUM -1\n -NUM", which would otherwise result in "--1", making parsing fail
1356
			if (macro.replacement_list.empty())
1357
				macro.replacement_list += ' ';
1358
			break;
1359
		case tokenid::identifier:
1360
			if (const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
1361
				it != macro.parameters.end() || (macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
1362
			{
1363
				macro.replacement_list += macro_replacement_start;
1364
				macro.replacement_list += static_cast<char>(next_concat ? macro_replacement_concat : macro_replacement_argument);
1365
				macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
1366
				next_concat = false;
1367
				continue;
1368
			}
1369
			break;
1370
		default:
1371
			// Token needs no special handling, raw data is added to macro below
1372
			break;
1373
		}
1374

1375
		macro.replacement_list += _current_token_raw_data;
1376
		next_concat = false;
1377
	}
1378

1379
	// Trim whitespace following the replacement list
1380
	if (macro.replacement_list.size() && macro.replacement_list.back() == ' ')
1381
		macro.replacement_list.pop_back();
1382
}
1383

1384
Product

Resources

Company