Path: blob/main/contrib/llvm-project/libcxx/include/__format/write_escaped.h
35260 views
// -*- C++ -*-1//===----------------------------------------------------------------------===//2//3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.4// See https://llvm.org/LICENSE.txt for license information.5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception6//7//===----------------------------------------------------------------------===//89#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H10#define _LIBCPP___FORMAT_WRITE_ESCAPED_H1112#include <__algorithm/ranges_copy.h>13#include <__algorithm/ranges_for_each.h>14#include <__charconv/to_chars_integral.h>15#include <__charconv/to_chars_result.h>16#include <__chrono/statically_widen.h>17#include <__format/escaped_output_table.h>18#include <__format/formatter_output.h>19#include <__format/parser_std_format_spec.h>20#include <__format/unicode.h>21#include <__iterator/back_insert_iterator.h>22#include <__memory/addressof.h>23#include <__system_error/errc.h>24#include <__type_traits/make_unsigned.h>25#include <__utility/move.h>26#include <string_view>2728#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)29# pragma GCC system_header30#endif3132_LIBCPP_PUSH_MACROS33#include <__undef_macros>3435_LIBCPP_BEGIN_NAMESPACE_STD3637namespace __formatter {3839#if _LIBCPP_STD_VER >= 204041/// Writes a string using format's width estimation algorithm.42///43/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the44/// input is ASCII.45template <class _CharT>46_LIBCPP_HIDE_FROM_ABI auto47__write_string(basic_string_view<_CharT> __str,48output_iterator<const _CharT&> auto __out_it,49__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {50if (!__specs.__has_precision())51return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);5253int __size = __formatter::__truncate(__str, __specs.__precision_);5455return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);56}5758#endif // _LIBCPP_STD_VER >= 2059#if _LIBCPP_STD_VER >= 236061struct __nul_terminator {};6263template <class _CharT>64_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {65return *__cstr == _CharT('\0');66}6768template <class _CharT>69_LIBCPP_HIDE_FROM_ABI void70__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {71back_insert_iterator __out_it{__str};72std::ranges::copy(__prefix, __nul_terminator{}, __out_it);7374char __buffer[8];75to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);76_LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");77std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);7879__str += _CharT('}');80}8182// [format.string.escaped]/2.2.1.283// ...84// then the sequence \u{hex-digit-sequence} is appended to E, where85// hex-digit-sequence is the shortest hexadecimal representation of C using86// lower-case hexadecimal digits.87template <class _CharT>88_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {89__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));90}9192// [format.string.escaped]/2.2.393// Otherwise (X is a sequence of ill-formed code units), each code unit U is94// appended to E in order as the sequence \x{hex-digit-sequence}, where95// hex-digit-sequence is the shortest hexadecimal representation of U using96// lower-case hexadecimal digits.97template <class _CharT>98_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {99__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));100}101102template <class _CharT>103[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool104__is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {105# ifdef _LIBCPP_HAS_NO_UNICODE106// For ASCII assume everything above 127 is printable.107if (__value > 127)108return false;109# endif110111// [format.string.escaped]/2.2.1.2.1112// CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar113// value whose Unicode property General_Category has a value in the groups114// Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,115if (!__escaped_output_table::__needs_escape(__value))116// [format.string.escaped]/2.2.1.2.2117// CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar118// value with the Unicode property Grapheme_Extend=Yes as described by UAX119// #44 of the Unicode Standard and C is not immediately preceded in S by a120// character P appended to E without translation to an escape sequence,121if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=122__extended_grapheme_custer_property_boundary::__property::__Extend)123return false;124125__formatter::__write_well_formed_escaped_code_unit(__str, __value);126return true;127}128129template <class _CharT>130[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {131return static_cast<make_unsigned_t<_CharT>>(__value);132}133134enum class __escape_quotation_mark { __apostrophe, __double_quote };135136// [format.string.escaped]/2137template <class _CharT>138[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(139basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {140// 2.2.1.1 - Mapped character in [tab:format.escape.sequences]141switch (__value) {142case _CharT('\t'):143__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");144return true;145case _CharT('\n'):146__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");147return true;148case _CharT('\r'):149__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");150return true;151case _CharT('\''):152if (__mark == __escape_quotation_mark::__apostrophe)153__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");154else155__str += __value;156return true;157case _CharT('"'):158if (__mark == __escape_quotation_mark::__double_quote)159__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");160else161__str += __value;162return true;163case _CharT('\\'):164__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");165return true;166167// 2.2.1.2 - Space168case _CharT(' '):169__str += __value;170return true;171}172173// 2.2.2174// Otherwise, if X is a shift sequence, the effect on E and further175// decoding of S is unspecified.176// For now shift sequences are ignored and treated as Unicode. Other parts177// of the format library do the same. It's unknown how ostream treats them.178// TODO FMT determine what to do with shift sequences.179180// 2.2.1.2.1 and 2.2.1.2.2 - Escape181return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));182}183184template <class _CharT>185_LIBCPP_HIDE_FROM_ABI void186__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {187__unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};188189// When the first code unit has the property Grapheme_Extend=Yes it needs to190// be escaped. This happens when the previous code unit was also escaped.191bool __escape = true;192while (!__view.__at_end()) {193auto __first = __view.__position();194typename __unicode::__consume_result __result = __view.__consume();195if (__result.__status == __unicode::__consume_result::__ok) {196__escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);197if (!__escape)198// 2.2.1.3 - Add the character199ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));200} else {201// 2.2.3 sequence of ill-formed code units202ranges::for_each(__first, __view.__position(), [&](_CharT __value) {203__formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));204});205}206}207}208209template <class _CharT>210_LIBCPP_HIDE_FROM_ABI auto211__format_escaped_char(_CharT __value,212output_iterator<const _CharT&> auto __out_it,213__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {214basic_string<_CharT> __str;215__str += _CharT('\'');216__formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);217__str += _CharT('\'');218return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());219}220221template <class _CharT>222_LIBCPP_HIDE_FROM_ABI auto223__format_escaped_string(basic_string_view<_CharT> __values,224output_iterator<const _CharT&> auto __out_it,225__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {226basic_string<_CharT> __str;227__str += _CharT('"');228__formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);229__str += _CharT('"');230return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);231}232233#endif // _LIBCPP_STD_VER >= 23234235} // namespace __formatter236237_LIBCPP_END_NAMESPACE_STD238239_LIBCPP_POP_MACROS240241#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H242243244