Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/Common/src/StringUtils.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "Luau/StringUtils.h"
3
4
#include "Luau/Common.h"
5
6
#include <array>
7
#include <vector>
8
#include <string>
9
#include <string.h>
10
#include <stdint.h>
11
12
namespace Luau
13
{
14
15
void vformatAppend(std::string& ret, const char* fmt, va_list args)
16
{
17
va_list argscopy;
18
va_copy(argscopy, args);
19
#ifdef _MSC_VER
20
int actualSize = _vscprintf(fmt, argscopy);
21
#else
22
int actualSize = vsnprintf(NULL, 0, fmt, argscopy);
23
#endif
24
va_end(argscopy);
25
26
if (actualSize <= 0)
27
return;
28
29
size_t sz = ret.size();
30
ret.resize(sz + actualSize);
31
vsnprintf(&ret[0] + sz, actualSize + 1, fmt, args);
32
}
33
34
std::string format(const char* fmt, ...)
35
{
36
std::string result;
37
va_list args;
38
va_start(args, fmt);
39
vformatAppend(result, fmt, args);
40
va_end(args);
41
return result;
42
}
43
44
void formatAppend(std::string& str, const char* fmt, ...)
45
{
46
va_list args;
47
va_start(args, fmt);
48
vformatAppend(str, fmt, args);
49
va_end(args);
50
}
51
52
std::string vformat(const char* fmt, va_list args)
53
{
54
std::string ret;
55
vformatAppend(ret, fmt, args);
56
return ret;
57
}
58
59
template<typename String>
60
static std::string joinImpl(const std::vector<String>& segments, std::string_view delimiter)
61
{
62
if (segments.empty())
63
return "";
64
65
size_t len = (segments.size() - 1) * delimiter.size();
66
for (const auto& sv : segments)
67
len += sv.size();
68
69
std::string result;
70
result.resize(len);
71
char* dest = const_cast<char*>(result.data()); // This const_cast is only necessary until C++17
72
73
auto it = segments.begin();
74
memcpy(dest, it->data(), it->size());
75
dest += it->size();
76
++it;
77
for (; it != segments.end(); ++it)
78
{
79
memcpy(dest, delimiter.data(), delimiter.size());
80
dest += delimiter.size();
81
memcpy(dest, it->data(), it->size());
82
dest += it->size();
83
}
84
85
LUAU_ASSERT(dest == result.data() + len);
86
87
return result;
88
}
89
90
std::string join(const std::vector<std::string_view>& segments, std::string_view delimiter)
91
{
92
return joinImpl(segments, delimiter);
93
}
94
95
std::string join(const std::vector<std::string>& segments, std::string_view delimiter)
96
{
97
return joinImpl(segments, delimiter);
98
}
99
100
std::vector<std::string_view> split(std::string_view s, char delimiter)
101
{
102
std::vector<std::string_view> result;
103
104
while (!s.empty())
105
{
106
auto index = s.find(delimiter);
107
if (index == std::string::npos)
108
{
109
result.push_back(s);
110
break;
111
}
112
result.push_back(s.substr(0, index));
113
s.remove_prefix(index + 1);
114
}
115
116
return result;
117
}
118
119
size_t editDistance(std::string_view a, std::string_view b)
120
{
121
// When there are matching prefix and suffix, they end up computing as zero cost, effectively making it no-op. We drop these characters.
122
while (!a.empty() && !b.empty() && a.front() == b.front())
123
{
124
a.remove_prefix(1);
125
b.remove_prefix(1);
126
}
127
128
while (!a.empty() && !b.empty() && a.back() == b.back())
129
{
130
a.remove_suffix(1);
131
b.remove_suffix(1);
132
}
133
134
// Since we know the edit distance is the difference of the length of A and B discounting the matching prefixes and suffixes,
135
// it is therefore pointless to run the rest of this function to find that out. We immediately infer this size and return it.
136
if (a.empty())
137
return b.size();
138
if (b.empty())
139
return a.size();
140
141
size_t maxDistance = a.size() + b.size();
142
143
std::vector<size_t> distances((a.size() + 2) * (b.size() + 2), 0);
144
auto getPos = [b](size_t x, size_t y) -> size_t
145
{
146
return (x * (b.size() + 2)) + y;
147
};
148
149
distances[0] = maxDistance;
150
151
for (size_t x = 0; x <= a.size(); ++x)
152
{
153
distances[getPos(x + 1, 0)] = maxDistance;
154
distances[getPos(x + 1, 1)] = x;
155
}
156
157
for (size_t y = 0; y <= b.size(); ++y)
158
{
159
distances[getPos(0, y + 1)] = maxDistance;
160
distances[getPos(1, y + 1)] = y;
161
}
162
163
std::array<size_t, 256> seenCharToRow;
164
seenCharToRow.fill(0);
165
166
for (size_t x = 1; x <= a.size(); ++x)
167
{
168
size_t lastMatchedY = 0;
169
170
for (size_t y = 1; y <= b.size(); ++y)
171
{
172
// The value of b[N] can be negative with unicode characters
173
unsigned char bSeenCharIndex = static_cast<unsigned char>(b[y - 1]);
174
size_t x1 = seenCharToRow[bSeenCharIndex];
175
size_t y1 = lastMatchedY;
176
177
size_t cost = 1;
178
if (a[x - 1] == b[y - 1])
179
{
180
cost = 0;
181
lastMatchedY = y;
182
}
183
184
size_t transposition = distances[getPos(x1, y1)] + (x - x1 - 1) + 1 + (y - y1 - 1);
185
size_t substitution = distances[getPos(x, y)] + cost;
186
size_t insertion = distances[getPos(x, y + 1)] + 1;
187
size_t deletion = distances[getPos(x + 1, y)] + 1;
188
189
// It's more performant to use std::min(size_t, size_t) rather than the initializer_list overload.
190
// Until proven otherwise, please do not change this.
191
distances[getPos(x + 1, y + 1)] = std::min(std::min(insertion, deletion), std::min(substitution, transposition));
192
}
193
194
// The value of a[N] can be negative with unicode characters
195
unsigned char aSeenCharIndex = static_cast<unsigned char>(a[x - 1]);
196
seenCharToRow[aSeenCharIndex] = x;
197
}
198
199
return distances[getPos(a.size() + 1, b.size() + 1)];
200
}
201
202
bool startsWith(std::string_view haystack, std::string_view needle)
203
{
204
// ::starts_with is C++20
205
return haystack.size() >= needle.size() && haystack.substr(0, needle.size()) == needle;
206
}
207
208
bool equalsLower(std::string_view lhs, std::string_view rhs)
209
{
210
if (lhs.size() != rhs.size())
211
return false;
212
213
for (size_t i = 0; i < lhs.size(); ++i)
214
if (tolower(uint8_t(lhs[i])) != tolower(uint8_t(rhs[i])))
215
return false;
216
217
return true;
218
}
219
220
size_t hashRange(const char* data, size_t size)
221
{
222
// FNV-1a
223
uint32_t hash = 2166136261;
224
225
for (size_t i = 0; i < size; ++i)
226
{
227
hash ^= uint8_t(data[i]);
228
hash *= 16777619;
229
}
230
231
return hash;
232
}
233
234
bool isIdentifier(std::string_view s)
235
{
236
return (s.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_") == std::string::npos);
237
}
238
239
std::string escape(std::string_view s, bool escapeForInterpString)
240
{
241
std::string r;
242
r.reserve(s.size() + 50); // arbitrary number to guess how many characters we'll be inserting
243
244
for (uint8_t c : s)
245
{
246
if (c >= ' ' && c != '\\' && c != '\'' && c != '\"' && c != '`' && c != '{')
247
r += c;
248
else
249
{
250
r += '\\';
251
252
if (escapeForInterpString && (c == '`' || c == '{'))
253
{
254
r += c;
255
continue;
256
}
257
258
switch (c)
259
{
260
case '\a':
261
r += 'a';
262
break;
263
case '\b':
264
r += 'b';
265
break;
266
case '\f':
267
r += 'f';
268
break;
269
case '\n':
270
r += 'n';
271
break;
272
case '\r':
273
r += 'r';
274
break;
275
case '\t':
276
r += 't';
277
break;
278
case '\v':
279
r += 'v';
280
break;
281
case '\'':
282
r += '\'';
283
break;
284
case '\"':
285
r += '\"';
286
break;
287
case '\\':
288
r += '\\';
289
break;
290
default:
291
Luau::formatAppend(r, "%03u", c);
292
}
293
}
294
}
295
296
return r;
297
}
298
299
static bool isWhitespace(char c)
300
{
301
return c == ' ' || c == '\n' || c == '\r' || c == '\t';
302
}
303
304
std::string_view strip(std::string_view s)
305
{
306
while (!s.empty() && isWhitespace(s.front()))
307
s.remove_prefix(1);
308
309
while (!s.empty() && isWhitespace(s.back()))
310
s.remove_suffix(1);
311
312
return s;
313
}
314
315
} // namespace Luau
316
317