Path: blob/main/contrib/kyua/utils/text/templates.cpp
48199 views
// Copyright 2012 The Kyua Authors.1// All rights reserved.2//3// Redistribution and use in source and binary forms, with or without4// modification, are permitted provided that the following conditions are5// met:6//7// * Redistributions of source code must retain the above copyright8// notice, this list of conditions and the following disclaimer.9// * Redistributions in binary form must reproduce the above copyright10// notice, this list of conditions and the following disclaimer in the11// documentation and/or other materials provided with the distribution.12// * Neither the name of Google Inc. nor the names of its contributors13// may be used to endorse or promote products derived from this software14// without specific prior written permission.15//16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.2728#include "utils/text/templates.hpp"2930#include <algorithm>31#include <fstream>32#include <sstream>33#include <stack>3435#include "utils/format/macros.hpp"36#include "utils/fs/path.hpp"37#include "utils/noncopyable.hpp"38#include "utils/sanity.hpp"39#include "utils/text/exceptions.hpp"40#include "utils/text/operations.ipp"4142namespace text = utils::text;434445namespace {464748/// Definition of a template statement.49///50/// A template statement is a particular line in the input file that is51/// preceeded by a template marker. This class provides a high-level52/// representation of the contents of such statement and a mechanism to parse53/// the textual line into this high-level representation.54class statement_def {55public:56/// Types of the known statements.57enum statement_type {58/// Alternative clause of a conditional.59///60/// Takes no arguments.61type_else,6263/// End of conditional marker.64///65/// Takes no arguments.66type_endif,6768/// End of loop marker.69///70/// Takes no arguments.71type_endloop,7273/// Beginning of a conditional.74///75/// Takes a single argument, which denotes the name of the variable or76/// vector to check for existence. This is the only expression77/// supported.78type_if,7980/// Beginning of a loop over all the elements of a vector.81///82/// Takes two arguments: the name of the vector over which to iterate83/// and the name of the iterator to later index this vector.84type_loop,85};8687private:88/// Internal data describing the structure of a particular statement type.89struct type_descriptor {90/// The native type of the statement.91statement_type type;9293/// The expected number of arguments.94unsigned int n_arguments;9596/// Constructs a new type descriptor.97///98/// \param type_ The native type of the statement.99/// \param n_arguments_ The expected number of arguments.100type_descriptor(const statement_type type_,101const unsigned int n_arguments_)102: type(type_), n_arguments(n_arguments_)103{104}105};106107/// Mapping of statement type names to their definitions.108typedef std::map< std::string, type_descriptor > types_map;109110/// Description of the different statement types.111///112/// This static map is initialized once and reused later for any statement113/// lookup. Unfortunately, we cannot perform this initialization in a114/// static manner without C++11.115static types_map _types;116117/// Generates a new types definition map.118///119/// \return A new types definition map, to be assigned to _types.120static types_map121generate_types_map(void)122{123// If you change this, please edit the comments in the enum above.124types_map types;125types.insert(types_map::value_type(126"else", type_descriptor(type_else, 0)));127types.insert(types_map::value_type(128"endif", type_descriptor(type_endif, 0)));129types.insert(types_map::value_type(130"endloop", type_descriptor(type_endloop, 0)));131types.insert(types_map::value_type(132"if", type_descriptor(type_if, 1)));133types.insert(types_map::value_type(134"loop", type_descriptor(type_loop, 2)));135return types;136}137138public:139/// The type of the statement.140statement_type type;141142/// The arguments to the statement, in textual form.143const std::vector< std::string > arguments;144145/// Creates a new statement.146///147/// \param type_ The type of the statement.148/// \param arguments_ The arguments to the statement.149statement_def(const statement_type& type_,150const std::vector< std::string >& arguments_) :151type(type_), arguments(arguments_)152{153#if !defined(NDEBUG)154for (types_map::const_iterator iter = _types.begin();155iter != _types.end(); ++iter) {156const type_descriptor& descriptor = (*iter).second;157if (descriptor.type == type_) {158PRE(descriptor.n_arguments == arguments_.size());159return;160}161}162UNREACHABLE;163#endif164}165166/// Parses a statement.167///168/// \param line The textual representation of the statement without any169/// prefix.170///171/// \return The parsed statement.172///173/// \throw text::syntax_error If the statement is not correctly defined.174static statement_def175parse(const std::string& line)176{177if (_types.empty())178_types = generate_types_map();179180const std::vector< std::string > words = text::split(line, ' ');181if (words.empty())182throw text::syntax_error("Empty statement");183184const types_map::const_iterator iter = _types.find(words[0]);185if (iter == _types.end())186throw text::syntax_error(F("Unknown statement '%s'") % words[0]);187const type_descriptor& descriptor = (*iter).second;188189if (words.size() - 1 != descriptor.n_arguments)190throw text::syntax_error(F("Invalid number of arguments for "191"statement '%s'") % words[0]);192193std::vector< std::string > new_arguments;194new_arguments.resize(words.size() - 1);195std::copy(words.begin() + 1, words.end(), new_arguments.begin());196197return statement_def(descriptor.type, new_arguments);198}199};200201202statement_def::types_map statement_def::_types;203204205/// Definition of a loop.206///207/// This simple structure is used to keep track of the parameters of a loop.208struct loop_def {209/// The name of the vector over which this loop is iterating.210std::string vector;211212/// The name of the iterator defined by this loop.213std::string iterator;214215/// Position in the input to which to rewind to on looping.216///217/// This position points to the line after the loop statement, not the loop218/// itself. This is one of the reasons why we have this structure, so that219/// we can maintain the data about the loop without having to re-process it.220std::istream::pos_type position;221222/// Constructs a new loop definition.223///224/// \param vector_ The name of the vector (first argument).225/// \param iterator_ The name of the iterator (second argumnet).226/// \param position_ Position of the next line after the loop statement.227loop_def(const std::string& vector_, const std::string& iterator_,228const std::istream::pos_type position_) :229vector(vector_), iterator(iterator_), position(position_)230{231}232};233234235/// Stateful class to instantiate the templates in an input stream.236///237/// The goal of this parser is to scan the input once and not buffer anything in238/// memory. The only exception are loops: loops are reinterpreted on every239/// iteration from the same input file by rewidining the stream to the240/// appropriate position.241class templates_parser : utils::noncopyable {242/// The templates to apply.243///244/// Note that this is not const because the parser has to have write access245/// to the templates. In particular, it needs to be able to define the246/// iterators as regular variables.247text::templates_def _templates;248249/// Prefix that marks a line as a statement.250const std::string _prefix;251252/// Delimiter to surround an expression instantiation.253const std::string _delimiter;254255/// Whether to skip incoming lines or not.256///257/// The top of the stack is true whenever we encounter a conditional that258/// evaluates to false or a loop that does not have any iterations left.259/// Under these circumstances, we need to continue scanning the input stream260/// until we find the matching closing endif or endloop construct.261///262/// This is a stack rather than a plain boolean to allow us deal with263/// if-else clauses.264std::stack< bool > _skip;265266/// Current count of nested conditionals.267unsigned int _if_level;268269/// Level of the top-most conditional that evaluated to false.270unsigned int _exit_if_level;271272/// Current count of nested loops.273unsigned int _loop_level;274275/// Level of the top-most loop that does not have any iterations left.276unsigned int _exit_loop_level;277278/// Information about all the nested loops up to the current point.279std::stack< loop_def > _loops;280281/// Checks if a line is a statement or not.282///283/// \param line The line to validate.284///285/// \return True if the line looks like a statement, which is determined by286/// checking if the line starts by the predefined prefix.287bool288is_statement(const std::string& line)289{290return ((line.length() >= _prefix.length() &&291line.substr(0, _prefix.length()) == _prefix) &&292(line.length() < _delimiter.length() ||293line.substr(0, _delimiter.length()) != _delimiter));294}295296/// Parses a given statement line into a statement definition.297///298/// \param line The line to validate; it must be a valid statement.299///300/// \return The parsed statement.301///302/// \throw text::syntax_error If the input is not a valid statement.303statement_def304parse_statement(const std::string& line)305{306PRE(is_statement(line));307return statement_def::parse(line.substr(_prefix.length()));308}309310/// Processes a line from the input when not in skip mode.311///312/// \param line The line to be processed.313/// \param input The input stream from which the line was read. The current314/// position in the stream must be after the line being processed.315/// \param output The output stream into which to write the results.316///317/// \throw text::syntax_error If the input is not valid.318void319handle_normal(const std::string& line, std::istream& input,320std::ostream& output)321{322if (!is_statement(line)) {323// Fast path. Mostly to avoid an indentation level for the big324// chunk of code below.325output << line << '\n';326return;327}328329const statement_def statement = parse_statement(line);330331switch (statement.type) {332case statement_def::type_else:333_skip.top() = !_skip.top();334break;335336case statement_def::type_endif:337_if_level--;338break;339340case statement_def::type_endloop: {341PRE(_loops.size() == _loop_level);342loop_def& loop = _loops.top();343344const std::size_t next_index = 1 + text::to_type< std::size_t >(345_templates.get_variable(loop.iterator));346347if (next_index < _templates.get_vector(loop.vector).size()) {348_templates.add_variable(loop.iterator, F("%s") % next_index);349input.seekg(loop.position);350} else {351_loop_level--;352_loops.pop();353_templates.remove_variable(loop.iterator);354}355} break;356357case statement_def::type_if: {358_if_level++;359const std::string value = _templates.evaluate(360statement.arguments[0]);361if (value.empty() || value == "0" || value == "false") {362_exit_if_level = _if_level;363_skip.push(true);364} else {365_skip.push(false);366}367} break;368369case statement_def::type_loop: {370_loop_level++;371372const loop_def loop(statement.arguments[0], statement.arguments[1],373input.tellg());374if (_templates.get_vector(loop.vector).empty()) {375_exit_loop_level = _loop_level;376_skip.push(true);377} else {378_templates.add_variable(loop.iterator, "0");379_loops.push(loop);380_skip.push(false);381}382} break;383}384}385386/// Processes a line from the input when in skip mode.387///388/// \param line The line to be processed.389///390/// \throw text::syntax_error If the input is not valid.391void392handle_skip(const std::string& line)393{394PRE(_skip.top());395396if (!is_statement(line))397return;398399const statement_def statement = parse_statement(line);400switch (statement.type) {401case statement_def::type_else:402if (_exit_if_level == _if_level)403_skip.top() = !_skip.top();404break;405406case statement_def::type_endif:407INV(_if_level >= _exit_if_level);408if (_if_level == _exit_if_level)409_skip.top() = false;410_if_level--;411_skip.pop();412break;413414case statement_def::type_endloop:415INV(_loop_level >= _exit_loop_level);416if (_loop_level == _exit_loop_level)417_skip.top() = false;418_loop_level--;419_skip.pop();420break;421422case statement_def::type_if:423_if_level++;424_skip.push(true);425break;426427case statement_def::type_loop:428_loop_level++;429_skip.push(true);430break;431432default:433break;434}435}436437/// Evaluates expressions on a given input line.438///439/// An expression is surrounded by _delimiter on both sides. We scan the440/// string from left to right finding any expressions that may appear, yank441/// them out and call templates_def::evaluate() to get their value.442///443/// Lonely or unbalanced appearances of _delimiter on the input line are444/// not considered an error, given that the user may actually want to supply445/// that character sequence without being interpreted as a template.446///447/// \param in_line The input line from which to evaluate expressions.448///449/// \return The evaluated line.450///451/// \throw text::syntax_error If the expressions in the line are malformed.452std::string453evaluate(const std::string& in_line)454{455std::string out_line;456457std::string::size_type last_pos = 0;458while (last_pos != std::string::npos) {459const std::string::size_type open_pos = in_line.find(460_delimiter, last_pos);461if (open_pos == std::string::npos) {462out_line += in_line.substr(last_pos);463last_pos = std::string::npos;464} else {465const std::string::size_type close_pos = in_line.find(466_delimiter, open_pos + _delimiter.length());467if (close_pos == std::string::npos) {468out_line += in_line.substr(last_pos);469last_pos = std::string::npos;470} else {471out_line += in_line.substr(last_pos, open_pos - last_pos);472out_line += _templates.evaluate(in_line.substr(473open_pos + _delimiter.length(),474close_pos - open_pos - _delimiter.length()));475last_pos = close_pos + _delimiter.length();476}477}478}479480return out_line;481}482483public:484/// Constructs a new template parser.485///486/// \param templates_ The templates to apply to the processed file.487/// \param prefix_ The prefix that identifies lines as statements.488/// \param delimiter_ Delimiter to surround a variable instantiation.489templates_parser(const text::templates_def& templates_,490const std::string& prefix_,491const std::string& delimiter_) :492_templates(templates_),493_prefix(prefix_),494_delimiter(delimiter_),495_if_level(0),496_exit_if_level(0),497_loop_level(0),498_exit_loop_level(0)499{500}501502/// Applies the templates to a given input.503///504/// \param input The stream to which to apply the templates.505/// \param output The stream into which to write the results.506///507/// \throw text::syntax_error If the input is not valid. Note that the508/// is not guaranteed to be unmodified on exit if an error is509/// encountered.510void511instantiate(std::istream& input, std::ostream& output)512{513std::string line;514while (std::getline(input, line).good()) {515if (!_skip.empty() && _skip.top())516handle_skip(line);517else518handle_normal(evaluate(line), input, output);519}520}521};522523524} // anonymous namespace525526527/// Constructs an empty templates definition.528text::templates_def::templates_def(void)529{530}531532533/// Sets a string variable in the templates.534///535/// If the variable already exists, its value is replaced. This behavior is536/// required to implement iterators, but client code should really not be537/// redefining variables.538///539/// \pre The variable must not already exist as a vector.540///541/// \param name The name of the variable to set.542/// \param value The value to set the given variable to.543void544text::templates_def::add_variable(const std::string& name,545const std::string& value)546{547PRE(_vectors.find(name) == _vectors.end());548_variables[name] = value;549}550551552/// Unsets a string variable from the templates.553///554/// Client code has no reason to use this. This is only required to implement555/// proper scoping of loop iterators.556///557/// \pre The variable must exist.558///559/// \param name The name of the variable to remove from the templates.560void561text::templates_def::remove_variable(const std::string& name)562{563PRE(_variables.find(name) != _variables.end());564_variables.erase(_variables.find(name));565}566567568/// Creates a new vector in the templates.569///570/// If the vector already exists, it is cleared. Client code should really not571/// be redefining variables.572///573/// \pre The vector must not already exist as a variable.574///575/// \param name The name of the vector to set.576void577text::templates_def::add_vector(const std::string& name)578{579PRE(_variables.find(name) == _variables.end());580_vectors[name] = strings_vector();581}582583584/// Adds a value to an existing vector in the templates.585///586/// \pre name The vector must exist.587///588/// \param name The name of the vector to append the value to.589/// \param value The textual value to append to the vector.590void591text::templates_def::add_to_vector(const std::string& name,592const std::string& value)593{594PRE(_variables.find(name) == _variables.end());595PRE(_vectors.find(name) != _vectors.end());596_vectors[name].push_back(value);597}598599600/// Checks whether a given identifier exists as a variable or a vector.601///602/// This is used to implement the evaluation of conditions in if clauses.603///604/// \param name The name of the variable or vector.605///606/// \return True if the given name exists as a variable or a vector; false607/// otherwise.608bool609text::templates_def::exists(const std::string& name) const610{611return (_variables.find(name) != _variables.end() ||612_vectors.find(name) != _vectors.end());613}614615616/// Gets the value of a variable.617///618/// \param name The name of the variable.619///620/// \return The value of the requested variable.621///622/// \throw text::syntax_error If the variable does not exist.623const std::string&624text::templates_def::get_variable(const std::string& name) const625{626const variables_map::const_iterator iter = _variables.find(name);627if (iter == _variables.end())628throw text::syntax_error(F("Unknown variable '%s'") % name);629return (*iter).second;630}631632633/// Gets a vector.634///635/// \param name The name of the vector.636///637/// \return A reference to the requested vector.638///639/// \throw text::syntax_error If the vector does not exist.640const text::templates_def::strings_vector&641text::templates_def::get_vector(const std::string& name) const642{643const vectors_map::const_iterator iter = _vectors.find(name);644if (iter == _vectors.end())645throw text::syntax_error(F("Unknown vector '%s'") % name);646return (*iter).second;647}648649650/// Indexes a vector and gets the value.651///652/// \param name The name of the vector to index.653/// \param index_name The name of a variable representing the index to use.654/// This must be convertible to a natural.655///656/// \return The value of the vector at the given index.657///658/// \throw text::syntax_error If the vector does not existor if the index is out659/// of range.660const std::string&661text::templates_def::get_vector(const std::string& name,662const std::string& index_name) const663{664const strings_vector& vector = get_vector(name);665const std::string& index_str = get_variable(index_name);666667std::size_t index;668try {669index = text::to_type< std::size_t >(index_str);670} catch (const text::syntax_error& e) {671throw text::syntax_error(F("Index '%s' not an integer, value '%s'") %672index_name % index_str);673}674if (index >= vector.size())675throw text::syntax_error(F("Index '%s' out of range at position '%s'") %676index_name % index);677678return vector[index];679}680681682/// Evaluates a expression using these templates.683///684/// An expression is a query on the current templates to fetch a particular685/// value. The value is always returned as a string, as this is how templates686/// are internally stored.687///688/// \param expression The expression to evaluate. This should not include any689/// of the delimiters used in the user input, as otherwise the expression690/// will not be evaluated properly.691///692/// \return The result of the expression evaluation as a string.693///694/// \throw text::syntax_error If there is any problem while evaluating the695/// expression.696std::string697text::templates_def::evaluate(const std::string& expression) const698{699const std::string::size_type paren_open = expression.find('(');700if (paren_open == std::string::npos) {701return get_variable(expression);702} else {703const std::string::size_type paren_close = expression.find(704')', paren_open);705if (paren_close == std::string::npos)706throw text::syntax_error(F("Expected ')' in expression '%s')") %707expression);708if (paren_close != expression.length() - 1)709throw text::syntax_error(F("Unexpected text found after ')' in "710"expression '%s'") % expression);711712const std::string arg0 = expression.substr(0, paren_open);713const std::string arg1 = expression.substr(714paren_open + 1, paren_close - paren_open - 1);715if (arg0 == "defined") {716return exists(arg1) ? "true" : "false";717} else if (arg0 == "length") {718return F("%s") % get_vector(arg1).size();719} else {720return get_vector(arg0, arg1);721}722}723}724725726/// Applies a set of templates to an input stream.727///728/// \param templates The templates to use.729/// \param input The input to process.730/// \param output The stream to which to write the processed text.731///732/// \throw text::syntax_error If there is any problem processing the input.733void734text::instantiate(const templates_def& templates,735std::istream& input, std::ostream& output)736{737templates_parser parser(templates, "%", "%%");738parser.instantiate(input, output);739}740741742/// Applies a set of templates to an input file and writes an output file.743///744/// \param templates The templates to use.745/// \param input_file The path to the input to process.746/// \param output_file The path to the file into which to write the output.747///748/// \throw text::error If the input or output files cannot be opened.749/// \throw text::syntax_error If there is any problem processing the input.750void751text::instantiate(const templates_def& templates,752const fs::path& input_file, const fs::path& output_file)753{754std::ifstream input(input_file.c_str());755if (!input)756throw text::error(F("Failed to open %s for read") % input_file);757758std::ofstream output(output_file.c_str());759if (!output)760throw text::error(F("Failed to open %s for write") % output_file);761762instantiate(templates, input, output);763}764765766