Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/libcxx/include/__format/write_escaped.h
35260 views
1
// -*- C++ -*-
2
//===----------------------------------------------------------------------===//
3
//
4
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5
// See https://llvm.org/LICENSE.txt for license information.
6
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
//
8
//===----------------------------------------------------------------------===//
9
10
#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
11
#define _LIBCPP___FORMAT_WRITE_ESCAPED_H
12
13
#include <__algorithm/ranges_copy.h>
14
#include <__algorithm/ranges_for_each.h>
15
#include <__charconv/to_chars_integral.h>
16
#include <__charconv/to_chars_result.h>
17
#include <__chrono/statically_widen.h>
18
#include <__format/escaped_output_table.h>
19
#include <__format/formatter_output.h>
20
#include <__format/parser_std_format_spec.h>
21
#include <__format/unicode.h>
22
#include <__iterator/back_insert_iterator.h>
23
#include <__memory/addressof.h>
24
#include <__system_error/errc.h>
25
#include <__type_traits/make_unsigned.h>
26
#include <__utility/move.h>
27
#include <string_view>
28
29
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
30
# pragma GCC system_header
31
#endif
32
33
_LIBCPP_PUSH_MACROS
34
#include <__undef_macros>
35
36
_LIBCPP_BEGIN_NAMESPACE_STD
37
38
namespace __formatter {
39
40
#if _LIBCPP_STD_VER >= 20
41
42
/// Writes a string using format's width estimation algorithm.
43
///
44
/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
45
/// input is ASCII.
46
template <class _CharT>
47
_LIBCPP_HIDE_FROM_ABI auto
48
__write_string(basic_string_view<_CharT> __str,
49
output_iterator<const _CharT&> auto __out_it,
50
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
51
if (!__specs.__has_precision())
52
return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
53
54
int __size = __formatter::__truncate(__str, __specs.__precision_);
55
56
return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
57
}
58
59
#endif // _LIBCPP_STD_VER >= 20
60
#if _LIBCPP_STD_VER >= 23
61
62
struct __nul_terminator {};
63
64
template <class _CharT>
65
_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
66
return *__cstr == _CharT('\0');
67
}
68
69
template <class _CharT>
70
_LIBCPP_HIDE_FROM_ABI void
71
__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
72
back_insert_iterator __out_it{__str};
73
std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
74
75
char __buffer[8];
76
to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
77
_LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
78
std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
79
80
__str += _CharT('}');
81
}
82
83
// [format.string.escaped]/2.2.1.2
84
// ...
85
// then the sequence \u{hex-digit-sequence} is appended to E, where
86
// hex-digit-sequence is the shortest hexadecimal representation of C using
87
// lower-case hexadecimal digits.
88
template <class _CharT>
89
_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
90
__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
91
}
92
93
// [format.string.escaped]/2.2.3
94
// Otherwise (X is a sequence of ill-formed code units), each code unit U is
95
// appended to E in order as the sequence \x{hex-digit-sequence}, where
96
// hex-digit-sequence is the shortest hexadecimal representation of U using
97
// lower-case hexadecimal digits.
98
template <class _CharT>
99
_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
100
__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
101
}
102
103
template <class _CharT>
104
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
105
__is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {
106
# ifdef _LIBCPP_HAS_NO_UNICODE
107
// For ASCII assume everything above 127 is printable.
108
if (__value > 127)
109
return false;
110
# endif
111
112
// [format.string.escaped]/2.2.1.2.1
113
// CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
114
// value whose Unicode property General_Category has a value in the groups
115
// Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,
116
if (!__escaped_output_table::__needs_escape(__value))
117
// [format.string.escaped]/2.2.1.2.2
118
// CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
119
// value with the Unicode property Grapheme_Extend=Yes as described by UAX
120
// #44 of the Unicode Standard and C is not immediately preceded in S by a
121
// character P appended to E without translation to an escape sequence,
122
if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=
123
__extended_grapheme_custer_property_boundary::__property::__Extend)
124
return false;
125
126
__formatter::__write_well_formed_escaped_code_unit(__str, __value);
127
return true;
128
}
129
130
template <class _CharT>
131
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
132
return static_cast<make_unsigned_t<_CharT>>(__value);
133
}
134
135
enum class __escape_quotation_mark { __apostrophe, __double_quote };
136
137
// [format.string.escaped]/2
138
template <class _CharT>
139
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(
140
basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {
141
// 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
142
switch (__value) {
143
case _CharT('\t'):
144
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
145
return true;
146
case _CharT('\n'):
147
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
148
return true;
149
case _CharT('\r'):
150
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
151
return true;
152
case _CharT('\''):
153
if (__mark == __escape_quotation_mark::__apostrophe)
154
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
155
else
156
__str += __value;
157
return true;
158
case _CharT('"'):
159
if (__mark == __escape_quotation_mark::__double_quote)
160
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
161
else
162
__str += __value;
163
return true;
164
case _CharT('\\'):
165
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
166
return true;
167
168
// 2.2.1.2 - Space
169
case _CharT(' '):
170
__str += __value;
171
return true;
172
}
173
174
// 2.2.2
175
// Otherwise, if X is a shift sequence, the effect on E and further
176
// decoding of S is unspecified.
177
// For now shift sequences are ignored and treated as Unicode. Other parts
178
// of the format library do the same. It's unknown how ostream treats them.
179
// TODO FMT determine what to do with shift sequences.
180
181
// 2.2.1.2.1 and 2.2.1.2.2 - Escape
182
return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));
183
}
184
185
template <class _CharT>
186
_LIBCPP_HIDE_FROM_ABI void
187
__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
188
__unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
189
190
// When the first code unit has the property Grapheme_Extend=Yes it needs to
191
// be escaped. This happens when the previous code unit was also escaped.
192
bool __escape = true;
193
while (!__view.__at_end()) {
194
auto __first = __view.__position();
195
typename __unicode::__consume_result __result = __view.__consume();
196
if (__result.__status == __unicode::__consume_result::__ok) {
197
__escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);
198
if (!__escape)
199
// 2.2.1.3 - Add the character
200
ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
201
} else {
202
// 2.2.3 sequence of ill-formed code units
203
ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
204
__formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
205
});
206
}
207
}
208
}
209
210
template <class _CharT>
211
_LIBCPP_HIDE_FROM_ABI auto
212
__format_escaped_char(_CharT __value,
213
output_iterator<const _CharT&> auto __out_it,
214
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
215
basic_string<_CharT> __str;
216
__str += _CharT('\'');
217
__formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
218
__str += _CharT('\'');
219
return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
220
}
221
222
template <class _CharT>
223
_LIBCPP_HIDE_FROM_ABI auto
224
__format_escaped_string(basic_string_view<_CharT> __values,
225
output_iterator<const _CharT&> auto __out_it,
226
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
227
basic_string<_CharT> __str;
228
__str += _CharT('"');
229
__formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
230
__str += _CharT('"');
231
return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
232
}
233
234
#endif // _LIBCPP_STD_VER >= 23
235
236
} // namespace __formatter
237
238
_LIBCPP_END_NAMESPACE_STD
239
240
_LIBCPP_POP_MACROS
241
242
#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
243
244