Path: blob/main/contrib/kyua/utils/text/operations.cpp
48180 views
// Copyright 2012 The Kyua Authors.1// All rights reserved.2//3// Redistribution and use in source and binary forms, with or without4// modification, are permitted provided that the following conditions are5// met:6//7// * Redistributions of source code must retain the above copyright8// notice, this list of conditions and the following disclaimer.9// * Redistributions in binary form must reproduce the above copyright10// notice, this list of conditions and the following disclaimer in the11// documentation and/or other materials provided with the distribution.12// * Neither the name of Google Inc. nor the names of its contributors13// may be used to endorse or promote products derived from this software14// without specific prior written permission.15//16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.2728#include "utils/text/operations.ipp"2930#include <sstream>3132#include "utils/format/macros.hpp"33#include "utils/sanity.hpp"3435namespace text = utils::text;363738/// Replaces XML special characters from an input string.39///40/// The list of XML special characters is specified here:41/// http://www.w3.org/TR/xml11/#charsets42///43/// \param in The input to quote.44///45/// \return A quoted string without any XML special characters.46std::string47text::escape_xml(const std::string& in)48{49std::ostringstream quoted;5051for (std::string::const_iterator it = in.begin();52it != in.end(); ++it) {53unsigned char c = (unsigned char)*it;54if (c == '"') {55quoted << """;56} else if (c == '&') {57quoted << "&";58} else if (c == '<') {59quoted << "<";60} else if (c == '>') {61quoted << ">";62} else if (c == '\'') {63quoted << "'";64} else if ((c >= 0x01 && c <= 0x08) ||65(c >= 0x0B && c <= 0x0C) ||66(c >= 0x0E && c <= 0x1F) ||67(c >= 0x7F && c <= 0x84) ||68(c >= 0x86 && c <= 0x9F)) {69// for RestrictedChar characters, escape them70// as '&#[decimal ASCII value];'71// so that in the XML file we will see the escaped72// character.73quoted << "&#" << static_cast< std::string::size_type >(*it)74<< ";";75} else {76quoted << *it;77}78}79return quoted.str();80}818283/// Surrounds a string with quotes, escaping the quote itself if needed.84///85/// \param text The string to quote.86/// \param quote The quote character to use.87///88/// \return The quoted string.89std::string90text::quote(const std::string& text, const char quote)91{92std::ostringstream quoted;93quoted << quote;9495std::string::size_type start_pos = 0;96std::string::size_type last_pos = text.find(quote);97while (last_pos != std::string::npos) {98quoted << text.substr(start_pos, last_pos - start_pos) << '\\';99start_pos = last_pos;100last_pos = text.find(quote, start_pos + 1);101}102quoted << text.substr(start_pos);103104quoted << quote;105return quoted.str();106}107108109/// Fills a paragraph to the specified length.110///111/// This preserves any sequence of spaces in the input and any possible112/// newlines. Sequences of spaces may be split in half (and thus one space is113/// lost), but the rest of the spaces will be preserved as either trailing or114/// leading spaces.115///116/// \param input The string to refill.117/// \param target_width The width to refill the paragraph to.118///119/// \return The refilled paragraph as a sequence of independent lines.120std::vector< std::string >121text::refill(const std::string& input, const std::size_t target_width)122{123std::vector< std::string > output;124125std::string::size_type start = 0;126while (start < input.length()) {127std::string::size_type width;128if (start + target_width >= input.length())129width = input.length() - start;130else {131if (input[start + target_width] == ' ') {132width = target_width;133} else {134const std::string::size_type pos = input.find_last_of(135" ", start + target_width - 1);136if (pos == std::string::npos || pos < start + 1) {137width = input.find_first_of(" ", start + target_width);138if (width == std::string::npos)139width = input.length() - start;140else141width -= start;142} else {143width = pos - start;144}145}146}147INV(width != std::string::npos);148INV(start + width <= input.length());149INV(input[start + width] == ' ' || input[start + width] == '\0');150output.push_back(input.substr(start, width));151152start += width + 1;153}154155if (input.empty()) {156INV(output.empty());157output.push_back("");158}159160return output;161}162163164/// Fills a paragraph to the specified length.165///166/// See the documentation for refill() for additional details.167///168/// \param input The string to refill.169/// \param target_width The width to refill the paragraph to.170///171/// \return The refilled paragraph as a string with embedded newlines.172std::string173text::refill_as_string(const std::string& input, const std::size_t target_width)174{175return join(refill(input, target_width), "\n");176}177178179/// Replaces all occurrences of a substring in a string.180///181/// \param input The string in which to perform the replacement.182/// \param search The pattern to be replaced.183/// \param replacement The substring to replace search with.184///185/// \return A copy of input with the replacements performed.186std::string187text::replace_all(const std::string& input, const std::string& search,188const std::string& replacement)189{190std::string output;191192std::string::size_type pos, lastpos = 0;193while ((pos = input.find(search, lastpos)) != std::string::npos) {194output += input.substr(lastpos, pos - lastpos);195output += replacement;196lastpos = pos + search.length();197}198output += input.substr(lastpos);199200return output;201}202203204/// Splits a string into different components.205///206/// \param str The string to split.207/// \param delimiter The separator to use to split the words.208///209/// \return The different words in the input string as split by the provided210/// delimiter.211std::vector< std::string >212text::split(const std::string& str, const char delimiter)213{214std::vector< std::string > words;215if (!str.empty()) {216std::string::size_type pos = str.find(delimiter);217words.push_back(str.substr(0, pos));218while (pos != std::string::npos) {219++pos;220const std::string::size_type next = str.find(delimiter, pos);221words.push_back(str.substr(pos, next - pos));222pos = next;223}224}225return words;226}227228229/// Converts a string to a boolean.230///231/// \param str The string to convert.232///233/// \return The converted string, if the input string was valid.234///235/// \throw std::value_error If the input string does not represent a valid236/// boolean value.237template<>238bool239text::to_type(const std::string& str)240{241if (str == "true")242return true;243else if (str == "false")244return false;245else246throw value_error(F("Invalid boolean value '%s'") % str);247}248249250/// Identity function for to_type, for genericity purposes.251///252/// \param str The string to convert.253///254/// \return The input string.255template<>256std::string257text::to_type(const std::string& str)258{259return str;260}261262263