Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lldb/source/Utility/Args.cpp
39587 views
1
//===-- Args.cpp ----------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "lldb/Utility/Args.h"
10
#include "lldb/Utility/FileSpec.h"
11
#include "lldb/Utility/Stream.h"
12
#include "lldb/Utility/StringList.h"
13
#include "llvm/ADT/StringSwitch.h"
14
15
using namespace lldb;
16
using namespace lldb_private;
17
18
// A helper function for argument parsing.
19
// Parses the initial part of the first argument using normal double quote
20
// rules: backslash escapes the double quote and itself. The parsed string is
21
// appended to the second argument. The function returns the unparsed portion
22
// of the string, starting at the closing quote.
23
static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted,
24
std::string &result) {
25
// Inside double quotes, '\' and '"' are special.
26
static const char *k_escapable_characters = "\"\\";
27
while (true) {
28
// Skip over regular characters and append them.
29
size_t regular = quoted.find_first_of(k_escapable_characters);
30
result += quoted.substr(0, regular);
31
quoted = quoted.substr(regular);
32
33
// If we have reached the end of string or the closing quote, we're done.
34
if (quoted.empty() || quoted.front() == '"')
35
break;
36
37
// We have found a backslash.
38
quoted = quoted.drop_front();
39
40
if (quoted.empty()) {
41
// A lone backslash at the end of string, let's just append it.
42
result += '\\';
43
break;
44
}
45
46
// If the character after the backslash is not an allowed escapable
47
// character, we leave the character sequence untouched.
48
if (strchr(k_escapable_characters, quoted.front()) == nullptr)
49
result += '\\';
50
51
result += quoted.front();
52
quoted = quoted.drop_front();
53
}
54
55
return quoted;
56
}
57
58
static size_t ArgvToArgc(const char **argv) {
59
if (!argv)
60
return 0;
61
size_t count = 0;
62
while (*argv++)
63
++count;
64
return count;
65
}
66
67
// Trims all whitespace that can separate command line arguments from the left
68
// side of the string.
69
static llvm::StringRef ltrimForArgs(llvm::StringRef str) {
70
static const char *k_space_separators = " \t";
71
return str.ltrim(k_space_separators);
72
}
73
74
// A helper function for SetCommandString. Parses a single argument from the
75
// command string, processing quotes and backslashes in a shell-like manner.
76
// The function returns a tuple consisting of the parsed argument, the quote
77
// char used, and the unparsed portion of the string starting at the first
78
// unqouted, unescaped whitespace character.
79
static std::tuple<std::string, char, llvm::StringRef>
80
ParseSingleArgument(llvm::StringRef command) {
81
// Argument can be split into multiple discontiguous pieces, for example:
82
// "Hello ""World"
83
// this would result in a single argument "Hello World" (without the quotes)
84
// since the quotes would be removed and there is not space between the
85
// strings.
86
std::string arg;
87
88
// Since we can have multiple quotes that form a single command in a command
89
// like: "Hello "world'!' (which will make a single argument "Hello world!")
90
// we remember the first quote character we encounter and use that for the
91
// quote character.
92
char first_quote_char = '\0';
93
94
bool arg_complete = false;
95
do {
96
// Skip over regular characters and append them.
97
size_t regular = command.find_first_of(" \t\r\"'`\\");
98
arg += command.substr(0, regular);
99
command = command.substr(regular);
100
101
if (command.empty())
102
break;
103
104
char special = command.front();
105
command = command.drop_front();
106
switch (special) {
107
case '\\':
108
if (command.empty()) {
109
arg += '\\';
110
break;
111
}
112
113
// If the character after the backslash is not an allowed escapable
114
// character, we leave the character sequence untouched.
115
if (strchr(" \t\\'\"`", command.front()) == nullptr)
116
arg += '\\';
117
118
arg += command.front();
119
command = command.drop_front();
120
121
break;
122
123
case ' ':
124
case '\t':
125
case '\r':
126
// We are not inside any quotes, we just found a space after an argument.
127
// We are done.
128
arg_complete = true;
129
break;
130
131
case '"':
132
case '\'':
133
case '`':
134
// We found the start of a quote scope.
135
if (first_quote_char == '\0')
136
first_quote_char = special;
137
138
if (special == '"')
139
command = ParseDoubleQuotes(command, arg);
140
else {
141
// For single quotes, we simply skip ahead to the matching quote
142
// character (or the end of the string).
143
size_t quoted = command.find(special);
144
arg += command.substr(0, quoted);
145
command = command.substr(quoted);
146
}
147
148
// If we found a closing quote, skip it.
149
if (!command.empty())
150
command = command.drop_front();
151
152
break;
153
}
154
} while (!arg_complete);
155
156
return std::make_tuple(arg, first_quote_char, command);
157
}
158
159
Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote) : quote(quote) {
160
size_t size = str.size();
161
ptr.reset(new char[size + 1]);
162
163
::memcpy(data(), str.data() ? str.data() : "", size);
164
ptr[size] = 0;
165
}
166
167
// Args constructor
168
Args::Args(llvm::StringRef command) { SetCommandString(command); }
169
170
Args::Args(const Args &rhs) { *this = rhs; }
171
172
Args::Args(const StringList &list) : Args() {
173
for (const std::string &arg : list)
174
AppendArgument(arg);
175
}
176
177
Args::Args(llvm::ArrayRef<llvm::StringRef> args) : Args() {
178
for (llvm::StringRef arg : args)
179
AppendArgument(arg);
180
}
181
182
Args &Args::operator=(const Args &rhs) {
183
Clear();
184
185
m_argv.clear();
186
m_entries.clear();
187
for (auto &entry : rhs.m_entries) {
188
m_entries.emplace_back(entry.ref(), entry.quote);
189
m_argv.push_back(m_entries.back().data());
190
}
191
m_argv.push_back(nullptr);
192
return *this;
193
}
194
195
// Destructor
196
Args::~Args() = default;
197
198
void Args::Dump(Stream &s, const char *label_name) const {
199
if (!label_name)
200
return;
201
202
int i = 0;
203
for (auto &entry : m_entries) {
204
s.Indent();
205
s.Format("{0}[{1}]=\"{2}\"\n", label_name, i++, entry.ref());
206
}
207
s.Format("{0}[{1}]=NULL\n", label_name, i);
208
s.EOL();
209
}
210
211
bool Args::GetCommandString(std::string &command) const {
212
command.clear();
213
214
for (size_t i = 0; i < m_entries.size(); ++i) {
215
if (i > 0)
216
command += ' ';
217
char quote = m_entries[i].quote;
218
if (quote != '\0')
219
command += quote;
220
command += m_entries[i].ref();
221
if (quote != '\0')
222
command += quote;
223
}
224
225
return !m_entries.empty();
226
}
227
228
bool Args::GetQuotedCommandString(std::string &command) const {
229
command.clear();
230
231
for (size_t i = 0; i < m_entries.size(); ++i) {
232
if (i > 0)
233
command += ' ';
234
235
if (m_entries[i].quote) {
236
command += m_entries[i].quote;
237
command += m_entries[i].ref();
238
command += m_entries[i].quote;
239
} else {
240
command += m_entries[i].ref();
241
}
242
}
243
244
return !m_entries.empty();
245
}
246
247
void Args::SetCommandString(llvm::StringRef command) {
248
Clear();
249
m_argv.clear();
250
251
command = ltrimForArgs(command);
252
std::string arg;
253
char quote;
254
while (!command.empty()) {
255
std::tie(arg, quote, command) = ParseSingleArgument(command);
256
m_entries.emplace_back(arg, quote);
257
m_argv.push_back(m_entries.back().data());
258
command = ltrimForArgs(command);
259
}
260
m_argv.push_back(nullptr);
261
}
262
263
const char *Args::GetArgumentAtIndex(size_t idx) const {
264
if (idx < m_argv.size())
265
return m_argv[idx];
266
return nullptr;
267
}
268
269
char **Args::GetArgumentVector() {
270
assert(!m_argv.empty());
271
// TODO: functions like execve and posix_spawnp exhibit undefined behavior
272
// when argv or envp is null. So the code below is actually wrong. However,
273
// other code in LLDB depends on it being null. The code has been acting
274
// this way for some time, so it makes sense to leave it this way until
275
// someone has the time to come along and fix it.
276
return (m_argv.size() > 1) ? m_argv.data() : nullptr;
277
}
278
279
const char **Args::GetConstArgumentVector() const {
280
assert(!m_argv.empty());
281
return (m_argv.size() > 1) ? const_cast<const char **>(m_argv.data())
282
: nullptr;
283
}
284
285
void Args::Shift() {
286
// Don't pop the last NULL terminator from the argv array
287
if (m_entries.empty())
288
return;
289
m_argv.erase(m_argv.begin());
290
m_entries.erase(m_entries.begin());
291
}
292
293
void Args::Unshift(llvm::StringRef arg_str, char quote_char) {
294
InsertArgumentAtIndex(0, arg_str, quote_char);
295
}
296
297
void Args::AppendArguments(const Args &rhs) {
298
assert(m_argv.size() == m_entries.size() + 1);
299
assert(m_argv.back() == nullptr);
300
m_argv.pop_back();
301
for (auto &entry : rhs.m_entries) {
302
m_entries.emplace_back(entry.ref(), entry.quote);
303
m_argv.push_back(m_entries.back().data());
304
}
305
m_argv.push_back(nullptr);
306
}
307
308
void Args::AppendArguments(const char **argv) {
309
size_t argc = ArgvToArgc(argv);
310
311
assert(m_argv.size() == m_entries.size() + 1);
312
assert(m_argv.back() == nullptr);
313
m_argv.pop_back();
314
for (auto arg : llvm::ArrayRef(argv, argc)) {
315
m_entries.emplace_back(arg, '\0');
316
m_argv.push_back(m_entries.back().data());
317
}
318
319
m_argv.push_back(nullptr);
320
}
321
322
void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) {
323
InsertArgumentAtIndex(GetArgumentCount(), arg_str, quote_char);
324
}
325
326
void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
327
char quote_char) {
328
assert(m_argv.size() == m_entries.size() + 1);
329
assert(m_argv.back() == nullptr);
330
331
if (idx > m_entries.size())
332
return;
333
m_entries.emplace(m_entries.begin() + idx, arg_str, quote_char);
334
m_argv.insert(m_argv.begin() + idx, m_entries[idx].data());
335
}
336
337
void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
338
char quote_char) {
339
assert(m_argv.size() == m_entries.size() + 1);
340
assert(m_argv.back() == nullptr);
341
342
if (idx >= m_entries.size())
343
return;
344
345
m_entries[idx] = ArgEntry(arg_str, quote_char);
346
m_argv[idx] = m_entries[idx].data();
347
}
348
349
void Args::DeleteArgumentAtIndex(size_t idx) {
350
if (idx >= m_entries.size())
351
return;
352
353
m_argv.erase(m_argv.begin() + idx);
354
m_entries.erase(m_entries.begin() + idx);
355
}
356
357
void Args::SetArguments(size_t argc, const char **argv) {
358
Clear();
359
360
auto args = llvm::ArrayRef(argv, argc);
361
m_entries.resize(argc);
362
m_argv.resize(argc + 1);
363
for (size_t i = 0; i < args.size(); ++i) {
364
char quote =
365
((args[i][0] == '\'') || (args[i][0] == '"') || (args[i][0] == '`'))
366
? args[i][0]
367
: '\0';
368
369
m_entries[i] = ArgEntry(args[i], quote);
370
m_argv[i] = m_entries[i].data();
371
}
372
}
373
374
void Args::SetArguments(const char **argv) {
375
SetArguments(ArgvToArgc(argv), argv);
376
}
377
378
void Args::Clear() {
379
m_entries.clear();
380
m_argv.clear();
381
m_argv.push_back(nullptr);
382
}
383
384
std::string Args::GetShellSafeArgument(const FileSpec &shell,
385
llvm::StringRef unsafe_arg) {
386
struct ShellDescriptor {
387
llvm::StringRef m_basename;
388
llvm::StringRef m_escapables;
389
};
390
391
static ShellDescriptor g_Shells[] = {{"bash", " '\"<>()&;"},
392
{"fish", " '\"<>()&\\|;"},
393
{"tcsh", " '\"<>()&;"},
394
{"zsh", " '\"<>()&;\\|"},
395
{"sh", " '\"<>()&;"}};
396
397
// safe minimal set
398
llvm::StringRef escapables = " '\"";
399
400
auto basename = shell.GetFilename().GetStringRef();
401
if (!basename.empty()) {
402
for (const auto &Shell : g_Shells) {
403
if (Shell.m_basename == basename) {
404
escapables = Shell.m_escapables;
405
break;
406
}
407
}
408
}
409
410
std::string safe_arg;
411
safe_arg.reserve(unsafe_arg.size());
412
// Add a \ before every character that needs to be escaped.
413
for (char c : unsafe_arg) {
414
if (escapables.contains(c))
415
safe_arg.push_back('\\');
416
safe_arg.push_back(c);
417
}
418
return safe_arg;
419
}
420
421
lldb::Encoding Args::StringToEncoding(llvm::StringRef s,
422
lldb::Encoding fail_value) {
423
return llvm::StringSwitch<lldb::Encoding>(s)
424
.Case("uint", eEncodingUint)
425
.Case("sint", eEncodingSint)
426
.Case("ieee754", eEncodingIEEE754)
427
.Case("vector", eEncodingVector)
428
.Default(fail_value);
429
}
430
431
uint32_t Args::StringToGenericRegister(llvm::StringRef s) {
432
if (s.empty())
433
return LLDB_INVALID_REGNUM;
434
uint32_t result = llvm::StringSwitch<uint32_t>(s)
435
.Case("pc", LLDB_REGNUM_GENERIC_PC)
436
.Case("sp", LLDB_REGNUM_GENERIC_SP)
437
.Case("fp", LLDB_REGNUM_GENERIC_FP)
438
.Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA)
439
.Case("flags", LLDB_REGNUM_GENERIC_FLAGS)
440
.Case("arg1", LLDB_REGNUM_GENERIC_ARG1)
441
.Case("arg2", LLDB_REGNUM_GENERIC_ARG2)
442
.Case("arg3", LLDB_REGNUM_GENERIC_ARG3)
443
.Case("arg4", LLDB_REGNUM_GENERIC_ARG4)
444
.Case("arg5", LLDB_REGNUM_GENERIC_ARG5)
445
.Case("arg6", LLDB_REGNUM_GENERIC_ARG6)
446
.Case("arg7", LLDB_REGNUM_GENERIC_ARG7)
447
.Case("arg8", LLDB_REGNUM_GENERIC_ARG8)
448
.Case("tp", LLDB_REGNUM_GENERIC_TP)
449
.Default(LLDB_INVALID_REGNUM);
450
return result;
451
}
452
453
void Args::EncodeEscapeSequences(const char *src, std::string &dst) {
454
dst.clear();
455
if (src) {
456
for (const char *p = src; *p != '\0'; ++p) {
457
size_t non_special_chars = ::strcspn(p, "\\");
458
if (non_special_chars > 0) {
459
dst.append(p, non_special_chars);
460
p += non_special_chars;
461
if (*p == '\0')
462
break;
463
}
464
465
if (*p == '\\') {
466
++p; // skip the slash
467
switch (*p) {
468
case 'a':
469
dst.append(1, '\a');
470
break;
471
case 'b':
472
dst.append(1, '\b');
473
break;
474
case 'f':
475
dst.append(1, '\f');
476
break;
477
case 'n':
478
dst.append(1, '\n');
479
break;
480
case 'r':
481
dst.append(1, '\r');
482
break;
483
case 't':
484
dst.append(1, '\t');
485
break;
486
case 'v':
487
dst.append(1, '\v');
488
break;
489
case '\\':
490
dst.append(1, '\\');
491
break;
492
case '\'':
493
dst.append(1, '\'');
494
break;
495
case '"':
496
dst.append(1, '"');
497
break;
498
case '0':
499
// 1 to 3 octal chars
500
{
501
// Make a string that can hold onto the initial zero char, up to 3
502
// octal digits, and a terminating NULL.
503
char oct_str[5] = {'\0', '\0', '\0', '\0', '\0'};
504
505
int i;
506
for (i = 0; (p[i] >= '0' && p[i] <= '7') && i < 4; ++i)
507
oct_str[i] = p[i];
508
509
// We don't want to consume the last octal character since the main
510
// for loop will do this for us, so we advance p by one less than i
511
// (even if i is zero)
512
p += i - 1;
513
unsigned long octal_value = ::strtoul(oct_str, nullptr, 8);
514
if (octal_value <= UINT8_MAX) {
515
dst.append(1, static_cast<char>(octal_value));
516
}
517
}
518
break;
519
520
case 'x':
521
// hex number in the format
522
if (isxdigit(p[1])) {
523
++p; // Skip the 'x'
524
525
// Make a string that can hold onto two hex chars plus a
526
// NULL terminator
527
char hex_str[3] = {*p, '\0', '\0'};
528
if (isxdigit(p[1])) {
529
++p; // Skip the first of the two hex chars
530
hex_str[1] = *p;
531
}
532
533
unsigned long hex_value = strtoul(hex_str, nullptr, 16);
534
if (hex_value <= UINT8_MAX)
535
dst.append(1, static_cast<char>(hex_value));
536
} else {
537
dst.append(1, 'x');
538
}
539
break;
540
541
default:
542
// Just desensitize any other character by just printing what came
543
// after the '\'
544
dst.append(1, *p);
545
break;
546
}
547
}
548
}
549
}
550
}
551
552
void Args::ExpandEscapedCharacters(const char *src, std::string &dst) {
553
dst.clear();
554
if (src) {
555
for (const char *p = src; *p != '\0'; ++p) {
556
if (llvm::isPrint(*p))
557
dst.append(1, *p);
558
else {
559
switch (*p) {
560
case '\a':
561
dst.append("\\a");
562
break;
563
case '\b':
564
dst.append("\\b");
565
break;
566
case '\f':
567
dst.append("\\f");
568
break;
569
case '\n':
570
dst.append("\\n");
571
break;
572
case '\r':
573
dst.append("\\r");
574
break;
575
case '\t':
576
dst.append("\\t");
577
break;
578
case '\v':
579
dst.append("\\v");
580
break;
581
case '\'':
582
dst.append("\\'");
583
break;
584
case '"':
585
dst.append("\\\"");
586
break;
587
case '\\':
588
dst.append("\\\\");
589
break;
590
default: {
591
// Just encode as octal
592
dst.append("\\0");
593
char octal_str[32];
594
snprintf(octal_str, sizeof(octal_str), "%o", *p);
595
dst.append(octal_str);
596
} break;
597
}
598
}
599
}
600
}
601
}
602
603
std::string Args::EscapeLLDBCommandArgument(const std::string &arg,
604
char quote_char) {
605
const char *chars_to_escape = nullptr;
606
switch (quote_char) {
607
case '\0':
608
chars_to_escape = " \t\\'\"`";
609
break;
610
case '"':
611
chars_to_escape = "$\"`\\";
612
break;
613
case '`':
614
case '\'':
615
return arg;
616
default:
617
assert(false && "Unhandled quote character");
618
return arg;
619
}
620
621
std::string res;
622
res.reserve(arg.size());
623
for (char c : arg) {
624
if (::strchr(chars_to_escape, c))
625
res.push_back('\\');
626
res.push_back(c);
627
}
628
return res;
629
}
630
631
OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) {
632
SetFromString(arg_string);
633
}
634
635
void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) {
636
const llvm::StringRef original_args = arg_string;
637
638
arg_string = ltrimForArgs(arg_string);
639
std::string arg;
640
char quote;
641
642
// If the string doesn't start with a dash, we just have no options and just
643
// a raw part.
644
if (!arg_string.starts_with("-")) {
645
m_suffix = std::string(original_args);
646
return;
647
}
648
649
bool found_suffix = false;
650
while (!arg_string.empty()) {
651
// The length of the prefix before parsing.
652
std::size_t prev_prefix_length = original_args.size() - arg_string.size();
653
654
// Parse the next argument from the remaining string.
655
std::tie(arg, quote, arg_string) = ParseSingleArgument(arg_string);
656
657
// If we get an unquoted '--' argument, then we reached the suffix part
658
// of the command.
659
Args::ArgEntry entry(arg, quote);
660
if (!entry.IsQuoted() && arg == "--") {
661
// The remaining line is the raw suffix, and the line we parsed so far
662
// needs to be interpreted as arguments.
663
m_has_args = true;
664
m_suffix = std::string(arg_string);
665
found_suffix = true;
666
667
// The length of the prefix after parsing.
668
std::size_t prefix_length = original_args.size() - arg_string.size();
669
670
// Take the string we know contains all the arguments and actually parse
671
// it as proper arguments.
672
llvm::StringRef prefix = original_args.take_front(prev_prefix_length);
673
m_args = Args(prefix);
674
m_arg_string = prefix;
675
676
// We also record the part of the string that contains the arguments plus
677
// the delimiter.
678
m_arg_string_with_delimiter = original_args.take_front(prefix_length);
679
680
// As the rest of the string became the raw suffix, we are done here.
681
break;
682
}
683
684
arg_string = ltrimForArgs(arg_string);
685
}
686
687
// If we didn't find a suffix delimiter, the whole string is the raw suffix.
688
if (!found_suffix)
689
m_suffix = std::string(original_args);
690
}
691
692