Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/kyua/utils/text/regex.cpp
48178 views
1
// Copyright 2014 The Kyua Authors.
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are
6
// met:
7
//
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of Google Inc. nor the names of its contributors
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
//
17
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29
#include "utils/text/regex.hpp"
30
31
extern "C" {
32
#include <sys/types.h>
33
34
#include <regex.h>
35
}
36
37
#include "utils/auto_array.ipp"
38
#include "utils/defs.hpp"
39
#include "utils/format/macros.hpp"
40
#include "utils/noncopyable.hpp"
41
#include "utils/sanity.hpp"
42
#include "utils/text/exceptions.hpp"
43
44
namespace text = utils::text;
45
46
47
namespace {
48
49
50
static void throw_regex_error(const int, const ::regex_t*, const std::string&)
51
UTILS_NORETURN;
52
53
54
/// Constructs and raises a regex_error.
55
///
56
/// \param error The error code returned by regcomp(3) or regexec(3).
57
/// \param preg The native regex object that caused this error.
58
/// \param prefix Error message prefix string.
59
///
60
/// \throw regex_error The constructed exception.
61
static void
62
throw_regex_error(const int error, const ::regex_t* preg,
63
const std::string& prefix)
64
{
65
char buffer[1024];
66
67
// TODO(jmmv): Would be nice to handle the case where the message does
68
// not fit in the temporary buffer.
69
(void)::regerror(error, preg, buffer, sizeof(buffer));
70
71
throw text::regex_error(F("%s: %s") % prefix % buffer);
72
}
73
74
75
} // anonymous namespace
76
77
78
/// Internal implementation for regex_matches.
79
struct utils::text::regex_matches::impl : utils::noncopyable {
80
/// String on which we are matching.
81
///
82
/// In theory, we could take a reference here instead of a copy, and make
83
/// it a requirement for the caller to ensure that the lifecycle of the
84
/// input string outlasts the lifecycle of the regex_matches. However, that
85
/// contract is very easy to break with hardcoded strings (as we do in
86
/// tests). Just go for the safer case here.
87
const std::string _string;
88
89
/// Maximum number of matching groups we expect, including the full match.
90
///
91
/// In other words, this is the size of the _matches array.
92
const std::size_t _nmatches;
93
94
/// Native regular expression match representation.
95
utils::auto_array< ::regmatch_t > _matches;
96
97
/// Constructor.
98
///
99
/// This executes the regex on the given string and sets up the internal
100
/// class state based on the results.
101
///
102
/// \param preg The native regex object.
103
/// \param str The string on which to execute the regex.
104
/// \param ngroups Number of capture groups in the regex. This is an upper
105
/// bound and may be greater than the actual matches.
106
///
107
/// \throw regex_error If the call to regexec(3) fails.
108
impl(const ::regex_t* preg, const std::string& str,
109
const std::size_t ngroups) :
110
_string(str),
111
_nmatches(ngroups + 1),
112
_matches(new ::regmatch_t[_nmatches])
113
{
114
const int error = ::regexec(preg, _string.c_str(), _nmatches,
115
_matches.get(), 0);
116
if (error == REG_NOMATCH) {
117
_matches.reset(NULL);
118
} else if (error != 0) {
119
throw_regex_error(error, preg,
120
F("regexec on '%s' failed") % _string);
121
}
122
}
123
124
/// Destructor.
125
~impl(void)
126
{
127
}
128
};
129
130
131
/// Constructor.
132
///
133
/// \param pimpl Constructed implementation of the object.
134
text::regex_matches::regex_matches(std::shared_ptr< impl > pimpl) :
135
_pimpl(pimpl)
136
{
137
}
138
139
140
/// Destructor.
141
text::regex_matches::~regex_matches(void)
142
{
143
}
144
145
146
/// Returns the number of matches in this object.
147
///
148
/// Note that this does not correspond to the number of groups provided at
149
/// construction time. The returned value here accounts for only the returned
150
/// valid matches.
151
///
152
/// \return Number of matches, including the full match.
153
std::size_t
154
text::regex_matches::count(void) const
155
{
156
std::size_t total = 0;
157
if (_pimpl->_matches.get() != NULL) {
158
for (std::size_t i = 0; i < _pimpl->_nmatches; ++i) {
159
if (_pimpl->_matches[i].rm_so != -1)
160
++total;
161
}
162
INV(total <= _pimpl->_nmatches);
163
}
164
return total;
165
}
166
167
168
/// Gets a match.
169
///
170
/// \param index Number of the match to get. Index 0 always contains the match
171
/// of the whole regex.
172
///
173
/// \pre There regex must have matched the input string.
174
/// \pre index must be lower than count().
175
///
176
/// \return The textual match.
177
std::string
178
text::regex_matches::get(const std::size_t index) const
179
{
180
PRE(*this);
181
PRE(index < count());
182
183
const ::regmatch_t* match = &_pimpl->_matches[index];
184
185
return std::string(_pimpl->_string.c_str() + match->rm_so,
186
match->rm_eo - match->rm_so);
187
}
188
189
190
/// Checks if there are any matches.
191
///
192
/// \return True if the object contains one or more matches; false otherwise.
193
text::regex_matches::operator bool(void) const
194
{
195
return _pimpl->_matches.get() != NULL;
196
}
197
198
199
/// Internal implementation for regex.
200
struct utils::text::regex::impl : utils::noncopyable {
201
/// Native regular expression representation.
202
::regex_t _preg;
203
204
/// Number of capture groups in the regular expression. This is an upper
205
/// bound and does NOT include the default full string match.
206
std::size_t _ngroups;
207
208
/// Constructor.
209
///
210
/// This compiles the given regular expression.
211
///
212
/// \param regex_ The regular expression to compile.
213
/// \param ngroups Number of capture groups in the regular expression. This
214
/// is an upper bound and does NOT include the default full string
215
/// match.
216
/// \param ignore_case Whether to ignore case during matching.
217
///
218
/// \throw regex_error If the call to regcomp(3) fails.
219
impl(const std::string& regex_, const std::size_t ngroups,
220
const bool ignore_case) :
221
_ngroups(ngroups)
222
{
223
const int flags = REG_EXTENDED | (ignore_case ? REG_ICASE : 0);
224
const int error = ::regcomp(&_preg, regex_.c_str(), flags);
225
if (error != 0)
226
throw_regex_error(error, &_preg, F("regcomp on '%s' failed")
227
% regex_);
228
}
229
230
/// Destructor.
231
~impl(void)
232
{
233
::regfree(&_preg);
234
}
235
};
236
237
238
/// Constructor.
239
///
240
/// \param pimpl Constructed implementation of the object.
241
text::regex::regex(std::shared_ptr< impl > pimpl) : _pimpl(pimpl)
242
{
243
}
244
245
246
/// Destructor.
247
text::regex::~regex(void)
248
{
249
}
250
251
252
/// Compiles a new regular expression.
253
///
254
/// \param regex_ The regular expression to compile.
255
/// \param ngroups Number of capture groups in the regular expression. This is
256
/// an upper bound and does NOT include the default full string match.
257
/// \param ignore_case Whether to ignore case during matching.
258
///
259
/// \return A new regular expression, ready to match strings.
260
///
261
/// \throw regex_error If the regular expression is invalid and cannot be
262
/// compiled.
263
text::regex
264
text::regex::compile(const std::string& regex_, const std::size_t ngroups,
265
const bool ignore_case)
266
{
267
return regex(std::shared_ptr< impl >(new impl(regex_, ngroups,
268
ignore_case)));
269
}
270
271
272
/// Matches the regular expression against a string.
273
///
274
/// \param str String to match the regular expression against.
275
///
276
/// \return A new regex_matches object with the results of the match.
277
text::regex_matches
278
text::regex::match(const std::string& str) const
279
{
280
std::shared_ptr< regex_matches::impl > pimpl(new regex_matches::impl(
281
&_pimpl->_preg, str, _pimpl->_ngroups));
282
return regex_matches(pimpl);
283
}
284
285
286
/// Compiles and matches a regular expression once.
287
///
288
/// This is syntactic sugar to simplify the instantiation of a new regex object
289
/// and its subsequent match on a string.
290
///
291
/// \param regex_ The regular expression to compile and match.
292
/// \param str String to match the regular expression against.
293
/// \param ngroups Number of capture groups in the regular expression.
294
/// \param ignore_case Whether to ignore case during matching.
295
///
296
/// \return A new regex_matches object with the results of the match.
297
text::regex_matches
298
text::match_regex(const std::string& regex_, const std::string& str,
299
const std::size_t ngroups, const bool ignore_case)
300
{
301
return regex::compile(regex_, ngroups, ignore_case).match(str);
302
}
303
304