Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/kyua/utils/text/templates.cpp
48199 views
1
// Copyright 2012 The Kyua Authors.
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are
6
// met:
7
//
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of Google Inc. nor the names of its contributors
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
//
17
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29
#include "utils/text/templates.hpp"
30
31
#include <algorithm>
32
#include <fstream>
33
#include <sstream>
34
#include <stack>
35
36
#include "utils/format/macros.hpp"
37
#include "utils/fs/path.hpp"
38
#include "utils/noncopyable.hpp"
39
#include "utils/sanity.hpp"
40
#include "utils/text/exceptions.hpp"
41
#include "utils/text/operations.ipp"
42
43
namespace text = utils::text;
44
45
46
namespace {
47
48
49
/// Definition of a template statement.
50
///
51
/// A template statement is a particular line in the input file that is
52
/// preceeded by a template marker. This class provides a high-level
53
/// representation of the contents of such statement and a mechanism to parse
54
/// the textual line into this high-level representation.
55
class statement_def {
56
public:
57
/// Types of the known statements.
58
enum statement_type {
59
/// Alternative clause of a conditional.
60
///
61
/// Takes no arguments.
62
type_else,
63
64
/// End of conditional marker.
65
///
66
/// Takes no arguments.
67
type_endif,
68
69
/// End of loop marker.
70
///
71
/// Takes no arguments.
72
type_endloop,
73
74
/// Beginning of a conditional.
75
///
76
/// Takes a single argument, which denotes the name of the variable or
77
/// vector to check for existence. This is the only expression
78
/// supported.
79
type_if,
80
81
/// Beginning of a loop over all the elements of a vector.
82
///
83
/// Takes two arguments: the name of the vector over which to iterate
84
/// and the name of the iterator to later index this vector.
85
type_loop,
86
};
87
88
private:
89
/// Internal data describing the structure of a particular statement type.
90
struct type_descriptor {
91
/// The native type of the statement.
92
statement_type type;
93
94
/// The expected number of arguments.
95
unsigned int n_arguments;
96
97
/// Constructs a new type descriptor.
98
///
99
/// \param type_ The native type of the statement.
100
/// \param n_arguments_ The expected number of arguments.
101
type_descriptor(const statement_type type_,
102
const unsigned int n_arguments_)
103
: type(type_), n_arguments(n_arguments_)
104
{
105
}
106
};
107
108
/// Mapping of statement type names to their definitions.
109
typedef std::map< std::string, type_descriptor > types_map;
110
111
/// Description of the different statement types.
112
///
113
/// This static map is initialized once and reused later for any statement
114
/// lookup. Unfortunately, we cannot perform this initialization in a
115
/// static manner without C++11.
116
static types_map _types;
117
118
/// Generates a new types definition map.
119
///
120
/// \return A new types definition map, to be assigned to _types.
121
static types_map
122
generate_types_map(void)
123
{
124
// If you change this, please edit the comments in the enum above.
125
types_map types;
126
types.insert(types_map::value_type(
127
"else", type_descriptor(type_else, 0)));
128
types.insert(types_map::value_type(
129
"endif", type_descriptor(type_endif, 0)));
130
types.insert(types_map::value_type(
131
"endloop", type_descriptor(type_endloop, 0)));
132
types.insert(types_map::value_type(
133
"if", type_descriptor(type_if, 1)));
134
types.insert(types_map::value_type(
135
"loop", type_descriptor(type_loop, 2)));
136
return types;
137
}
138
139
public:
140
/// The type of the statement.
141
statement_type type;
142
143
/// The arguments to the statement, in textual form.
144
const std::vector< std::string > arguments;
145
146
/// Creates a new statement.
147
///
148
/// \param type_ The type of the statement.
149
/// \param arguments_ The arguments to the statement.
150
statement_def(const statement_type& type_,
151
const std::vector< std::string >& arguments_) :
152
type(type_), arguments(arguments_)
153
{
154
#if !defined(NDEBUG)
155
for (types_map::const_iterator iter = _types.begin();
156
iter != _types.end(); ++iter) {
157
const type_descriptor& descriptor = (*iter).second;
158
if (descriptor.type == type_) {
159
PRE(descriptor.n_arguments == arguments_.size());
160
return;
161
}
162
}
163
UNREACHABLE;
164
#endif
165
}
166
167
/// Parses a statement.
168
///
169
/// \param line The textual representation of the statement without any
170
/// prefix.
171
///
172
/// \return The parsed statement.
173
///
174
/// \throw text::syntax_error If the statement is not correctly defined.
175
static statement_def
176
parse(const std::string& line)
177
{
178
if (_types.empty())
179
_types = generate_types_map();
180
181
const std::vector< std::string > words = text::split(line, ' ');
182
if (words.empty())
183
throw text::syntax_error("Empty statement");
184
185
const types_map::const_iterator iter = _types.find(words[0]);
186
if (iter == _types.end())
187
throw text::syntax_error(F("Unknown statement '%s'") % words[0]);
188
const type_descriptor& descriptor = (*iter).second;
189
190
if (words.size() - 1 != descriptor.n_arguments)
191
throw text::syntax_error(F("Invalid number of arguments for "
192
"statement '%s'") % words[0]);
193
194
std::vector< std::string > new_arguments;
195
new_arguments.resize(words.size() - 1);
196
std::copy(words.begin() + 1, words.end(), new_arguments.begin());
197
198
return statement_def(descriptor.type, new_arguments);
199
}
200
};
201
202
203
statement_def::types_map statement_def::_types;
204
205
206
/// Definition of a loop.
207
///
208
/// This simple structure is used to keep track of the parameters of a loop.
209
struct loop_def {
210
/// The name of the vector over which this loop is iterating.
211
std::string vector;
212
213
/// The name of the iterator defined by this loop.
214
std::string iterator;
215
216
/// Position in the input to which to rewind to on looping.
217
///
218
/// This position points to the line after the loop statement, not the loop
219
/// itself. This is one of the reasons why we have this structure, so that
220
/// we can maintain the data about the loop without having to re-process it.
221
std::istream::pos_type position;
222
223
/// Constructs a new loop definition.
224
///
225
/// \param vector_ The name of the vector (first argument).
226
/// \param iterator_ The name of the iterator (second argumnet).
227
/// \param position_ Position of the next line after the loop statement.
228
loop_def(const std::string& vector_, const std::string& iterator_,
229
const std::istream::pos_type position_) :
230
vector(vector_), iterator(iterator_), position(position_)
231
{
232
}
233
};
234
235
236
/// Stateful class to instantiate the templates in an input stream.
237
///
238
/// The goal of this parser is to scan the input once and not buffer anything in
239
/// memory. The only exception are loops: loops are reinterpreted on every
240
/// iteration from the same input file by rewidining the stream to the
241
/// appropriate position.
242
class templates_parser : utils::noncopyable {
243
/// The templates to apply.
244
///
245
/// Note that this is not const because the parser has to have write access
246
/// to the templates. In particular, it needs to be able to define the
247
/// iterators as regular variables.
248
text::templates_def _templates;
249
250
/// Prefix that marks a line as a statement.
251
const std::string _prefix;
252
253
/// Delimiter to surround an expression instantiation.
254
const std::string _delimiter;
255
256
/// Whether to skip incoming lines or not.
257
///
258
/// The top of the stack is true whenever we encounter a conditional that
259
/// evaluates to false or a loop that does not have any iterations left.
260
/// Under these circumstances, we need to continue scanning the input stream
261
/// until we find the matching closing endif or endloop construct.
262
///
263
/// This is a stack rather than a plain boolean to allow us deal with
264
/// if-else clauses.
265
std::stack< bool > _skip;
266
267
/// Current count of nested conditionals.
268
unsigned int _if_level;
269
270
/// Level of the top-most conditional that evaluated to false.
271
unsigned int _exit_if_level;
272
273
/// Current count of nested loops.
274
unsigned int _loop_level;
275
276
/// Level of the top-most loop that does not have any iterations left.
277
unsigned int _exit_loop_level;
278
279
/// Information about all the nested loops up to the current point.
280
std::stack< loop_def > _loops;
281
282
/// Checks if a line is a statement or not.
283
///
284
/// \param line The line to validate.
285
///
286
/// \return True if the line looks like a statement, which is determined by
287
/// checking if the line starts by the predefined prefix.
288
bool
289
is_statement(const std::string& line)
290
{
291
return ((line.length() >= _prefix.length() &&
292
line.substr(0, _prefix.length()) == _prefix) &&
293
(line.length() < _delimiter.length() ||
294
line.substr(0, _delimiter.length()) != _delimiter));
295
}
296
297
/// Parses a given statement line into a statement definition.
298
///
299
/// \param line The line to validate; it must be a valid statement.
300
///
301
/// \return The parsed statement.
302
///
303
/// \throw text::syntax_error If the input is not a valid statement.
304
statement_def
305
parse_statement(const std::string& line)
306
{
307
PRE(is_statement(line));
308
return statement_def::parse(line.substr(_prefix.length()));
309
}
310
311
/// Processes a line from the input when not in skip mode.
312
///
313
/// \param line The line to be processed.
314
/// \param input The input stream from which the line was read. The current
315
/// position in the stream must be after the line being processed.
316
/// \param output The output stream into which to write the results.
317
///
318
/// \throw text::syntax_error If the input is not valid.
319
void
320
handle_normal(const std::string& line, std::istream& input,
321
std::ostream& output)
322
{
323
if (!is_statement(line)) {
324
// Fast path. Mostly to avoid an indentation level for the big
325
// chunk of code below.
326
output << line << '\n';
327
return;
328
}
329
330
const statement_def statement = parse_statement(line);
331
332
switch (statement.type) {
333
case statement_def::type_else:
334
_skip.top() = !_skip.top();
335
break;
336
337
case statement_def::type_endif:
338
_if_level--;
339
break;
340
341
case statement_def::type_endloop: {
342
PRE(_loops.size() == _loop_level);
343
loop_def& loop = _loops.top();
344
345
const std::size_t next_index = 1 + text::to_type< std::size_t >(
346
_templates.get_variable(loop.iterator));
347
348
if (next_index < _templates.get_vector(loop.vector).size()) {
349
_templates.add_variable(loop.iterator, F("%s") % next_index);
350
input.seekg(loop.position);
351
} else {
352
_loop_level--;
353
_loops.pop();
354
_templates.remove_variable(loop.iterator);
355
}
356
} break;
357
358
case statement_def::type_if: {
359
_if_level++;
360
const std::string value = _templates.evaluate(
361
statement.arguments[0]);
362
if (value.empty() || value == "0" || value == "false") {
363
_exit_if_level = _if_level;
364
_skip.push(true);
365
} else {
366
_skip.push(false);
367
}
368
} break;
369
370
case statement_def::type_loop: {
371
_loop_level++;
372
373
const loop_def loop(statement.arguments[0], statement.arguments[1],
374
input.tellg());
375
if (_templates.get_vector(loop.vector).empty()) {
376
_exit_loop_level = _loop_level;
377
_skip.push(true);
378
} else {
379
_templates.add_variable(loop.iterator, "0");
380
_loops.push(loop);
381
_skip.push(false);
382
}
383
} break;
384
}
385
}
386
387
/// Processes a line from the input when in skip mode.
388
///
389
/// \param line The line to be processed.
390
///
391
/// \throw text::syntax_error If the input is not valid.
392
void
393
handle_skip(const std::string& line)
394
{
395
PRE(_skip.top());
396
397
if (!is_statement(line))
398
return;
399
400
const statement_def statement = parse_statement(line);
401
switch (statement.type) {
402
case statement_def::type_else:
403
if (_exit_if_level == _if_level)
404
_skip.top() = !_skip.top();
405
break;
406
407
case statement_def::type_endif:
408
INV(_if_level >= _exit_if_level);
409
if (_if_level == _exit_if_level)
410
_skip.top() = false;
411
_if_level--;
412
_skip.pop();
413
break;
414
415
case statement_def::type_endloop:
416
INV(_loop_level >= _exit_loop_level);
417
if (_loop_level == _exit_loop_level)
418
_skip.top() = false;
419
_loop_level--;
420
_skip.pop();
421
break;
422
423
case statement_def::type_if:
424
_if_level++;
425
_skip.push(true);
426
break;
427
428
case statement_def::type_loop:
429
_loop_level++;
430
_skip.push(true);
431
break;
432
433
default:
434
break;
435
}
436
}
437
438
/// Evaluates expressions on a given input line.
439
///
440
/// An expression is surrounded by _delimiter on both sides. We scan the
441
/// string from left to right finding any expressions that may appear, yank
442
/// them out and call templates_def::evaluate() to get their value.
443
///
444
/// Lonely or unbalanced appearances of _delimiter on the input line are
445
/// not considered an error, given that the user may actually want to supply
446
/// that character sequence without being interpreted as a template.
447
///
448
/// \param in_line The input line from which to evaluate expressions.
449
///
450
/// \return The evaluated line.
451
///
452
/// \throw text::syntax_error If the expressions in the line are malformed.
453
std::string
454
evaluate(const std::string& in_line)
455
{
456
std::string out_line;
457
458
std::string::size_type last_pos = 0;
459
while (last_pos != std::string::npos) {
460
const std::string::size_type open_pos = in_line.find(
461
_delimiter, last_pos);
462
if (open_pos == std::string::npos) {
463
out_line += in_line.substr(last_pos);
464
last_pos = std::string::npos;
465
} else {
466
const std::string::size_type close_pos = in_line.find(
467
_delimiter, open_pos + _delimiter.length());
468
if (close_pos == std::string::npos) {
469
out_line += in_line.substr(last_pos);
470
last_pos = std::string::npos;
471
} else {
472
out_line += in_line.substr(last_pos, open_pos - last_pos);
473
out_line += _templates.evaluate(in_line.substr(
474
open_pos + _delimiter.length(),
475
close_pos - open_pos - _delimiter.length()));
476
last_pos = close_pos + _delimiter.length();
477
}
478
}
479
}
480
481
return out_line;
482
}
483
484
public:
485
/// Constructs a new template parser.
486
///
487
/// \param templates_ The templates to apply to the processed file.
488
/// \param prefix_ The prefix that identifies lines as statements.
489
/// \param delimiter_ Delimiter to surround a variable instantiation.
490
templates_parser(const text::templates_def& templates_,
491
const std::string& prefix_,
492
const std::string& delimiter_) :
493
_templates(templates_),
494
_prefix(prefix_),
495
_delimiter(delimiter_),
496
_if_level(0),
497
_exit_if_level(0),
498
_loop_level(0),
499
_exit_loop_level(0)
500
{
501
}
502
503
/// Applies the templates to a given input.
504
///
505
/// \param input The stream to which to apply the templates.
506
/// \param output The stream into which to write the results.
507
///
508
/// \throw text::syntax_error If the input is not valid. Note that the
509
/// is not guaranteed to be unmodified on exit if an error is
510
/// encountered.
511
void
512
instantiate(std::istream& input, std::ostream& output)
513
{
514
std::string line;
515
while (std::getline(input, line).good()) {
516
if (!_skip.empty() && _skip.top())
517
handle_skip(line);
518
else
519
handle_normal(evaluate(line), input, output);
520
}
521
}
522
};
523
524
525
} // anonymous namespace
526
527
528
/// Constructs an empty templates definition.
529
text::templates_def::templates_def(void)
530
{
531
}
532
533
534
/// Sets a string variable in the templates.
535
///
536
/// If the variable already exists, its value is replaced. This behavior is
537
/// required to implement iterators, but client code should really not be
538
/// redefining variables.
539
///
540
/// \pre The variable must not already exist as a vector.
541
///
542
/// \param name The name of the variable to set.
543
/// \param value The value to set the given variable to.
544
void
545
text::templates_def::add_variable(const std::string& name,
546
const std::string& value)
547
{
548
PRE(_vectors.find(name) == _vectors.end());
549
_variables[name] = value;
550
}
551
552
553
/// Unsets a string variable from the templates.
554
///
555
/// Client code has no reason to use this. This is only required to implement
556
/// proper scoping of loop iterators.
557
///
558
/// \pre The variable must exist.
559
///
560
/// \param name The name of the variable to remove from the templates.
561
void
562
text::templates_def::remove_variable(const std::string& name)
563
{
564
PRE(_variables.find(name) != _variables.end());
565
_variables.erase(_variables.find(name));
566
}
567
568
569
/// Creates a new vector in the templates.
570
///
571
/// If the vector already exists, it is cleared. Client code should really not
572
/// be redefining variables.
573
///
574
/// \pre The vector must not already exist as a variable.
575
///
576
/// \param name The name of the vector to set.
577
void
578
text::templates_def::add_vector(const std::string& name)
579
{
580
PRE(_variables.find(name) == _variables.end());
581
_vectors[name] = strings_vector();
582
}
583
584
585
/// Adds a value to an existing vector in the templates.
586
///
587
/// \pre name The vector must exist.
588
///
589
/// \param name The name of the vector to append the value to.
590
/// \param value The textual value to append to the vector.
591
void
592
text::templates_def::add_to_vector(const std::string& name,
593
const std::string& value)
594
{
595
PRE(_variables.find(name) == _variables.end());
596
PRE(_vectors.find(name) != _vectors.end());
597
_vectors[name].push_back(value);
598
}
599
600
601
/// Checks whether a given identifier exists as a variable or a vector.
602
///
603
/// This is used to implement the evaluation of conditions in if clauses.
604
///
605
/// \param name The name of the variable or vector.
606
///
607
/// \return True if the given name exists as a variable or a vector; false
608
/// otherwise.
609
bool
610
text::templates_def::exists(const std::string& name) const
611
{
612
return (_variables.find(name) != _variables.end() ||
613
_vectors.find(name) != _vectors.end());
614
}
615
616
617
/// Gets the value of a variable.
618
///
619
/// \param name The name of the variable.
620
///
621
/// \return The value of the requested variable.
622
///
623
/// \throw text::syntax_error If the variable does not exist.
624
const std::string&
625
text::templates_def::get_variable(const std::string& name) const
626
{
627
const variables_map::const_iterator iter = _variables.find(name);
628
if (iter == _variables.end())
629
throw text::syntax_error(F("Unknown variable '%s'") % name);
630
return (*iter).second;
631
}
632
633
634
/// Gets a vector.
635
///
636
/// \param name The name of the vector.
637
///
638
/// \return A reference to the requested vector.
639
///
640
/// \throw text::syntax_error If the vector does not exist.
641
const text::templates_def::strings_vector&
642
text::templates_def::get_vector(const std::string& name) const
643
{
644
const vectors_map::const_iterator iter = _vectors.find(name);
645
if (iter == _vectors.end())
646
throw text::syntax_error(F("Unknown vector '%s'") % name);
647
return (*iter).second;
648
}
649
650
651
/// Indexes a vector and gets the value.
652
///
653
/// \param name The name of the vector to index.
654
/// \param index_name The name of a variable representing the index to use.
655
/// This must be convertible to a natural.
656
///
657
/// \return The value of the vector at the given index.
658
///
659
/// \throw text::syntax_error If the vector does not existor if the index is out
660
/// of range.
661
const std::string&
662
text::templates_def::get_vector(const std::string& name,
663
const std::string& index_name) const
664
{
665
const strings_vector& vector = get_vector(name);
666
const std::string& index_str = get_variable(index_name);
667
668
std::size_t index;
669
try {
670
index = text::to_type< std::size_t >(index_str);
671
} catch (const text::syntax_error& e) {
672
throw text::syntax_error(F("Index '%s' not an integer, value '%s'") %
673
index_name % index_str);
674
}
675
if (index >= vector.size())
676
throw text::syntax_error(F("Index '%s' out of range at position '%s'") %
677
index_name % index);
678
679
return vector[index];
680
}
681
682
683
/// Evaluates a expression using these templates.
684
///
685
/// An expression is a query on the current templates to fetch a particular
686
/// value. The value is always returned as a string, as this is how templates
687
/// are internally stored.
688
///
689
/// \param expression The expression to evaluate. This should not include any
690
/// of the delimiters used in the user input, as otherwise the expression
691
/// will not be evaluated properly.
692
///
693
/// \return The result of the expression evaluation as a string.
694
///
695
/// \throw text::syntax_error If there is any problem while evaluating the
696
/// expression.
697
std::string
698
text::templates_def::evaluate(const std::string& expression) const
699
{
700
const std::string::size_type paren_open = expression.find('(');
701
if (paren_open == std::string::npos) {
702
return get_variable(expression);
703
} else {
704
const std::string::size_type paren_close = expression.find(
705
')', paren_open);
706
if (paren_close == std::string::npos)
707
throw text::syntax_error(F("Expected ')' in expression '%s')") %
708
expression);
709
if (paren_close != expression.length() - 1)
710
throw text::syntax_error(F("Unexpected text found after ')' in "
711
"expression '%s'") % expression);
712
713
const std::string arg0 = expression.substr(0, paren_open);
714
const std::string arg1 = expression.substr(
715
paren_open + 1, paren_close - paren_open - 1);
716
if (arg0 == "defined") {
717
return exists(arg1) ? "true" : "false";
718
} else if (arg0 == "length") {
719
return F("%s") % get_vector(arg1).size();
720
} else {
721
return get_vector(arg0, arg1);
722
}
723
}
724
}
725
726
727
/// Applies a set of templates to an input stream.
728
///
729
/// \param templates The templates to use.
730
/// \param input The input to process.
731
/// \param output The stream to which to write the processed text.
732
///
733
/// \throw text::syntax_error If there is any problem processing the input.
734
void
735
text::instantiate(const templates_def& templates,
736
std::istream& input, std::ostream& output)
737
{
738
templates_parser parser(templates, "%", "%%");
739
parser.instantiate(input, output);
740
}
741
742
743
/// Applies a set of templates to an input file and writes an output file.
744
///
745
/// \param templates The templates to use.
746
/// \param input_file The path to the input to process.
747
/// \param output_file The path to the file into which to write the output.
748
///
749
/// \throw text::error If the input or output files cannot be opened.
750
/// \throw text::syntax_error If there is any problem processing the input.
751
void
752
text::instantiate(const templates_def& templates,
753
const fs::path& input_file, const fs::path& output_file)
754
{
755
std::ifstream input(input_file.c_str());
756
if (!input)
757
throw text::error(F("Failed to open %s for read") % input_file);
758
759
std::ofstream output(output_file.c_str());
760
if (!output)
761
throw text::error(F("Failed to open %s for write") % output_file);
762
763
instantiate(templates, input, output);
764
}
765
766