Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmjsoncpp/src/lib_json/json_reader.cpp
3158 views
1
// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2
// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3
// Distributed under MIT license, or public domain if desired and
4
// recognized in your jurisdiction.
5
// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7
#if !defined(JSON_IS_AMALGAMATION)
8
#include "json_tool.h"
9
#include <json/assertions.h>
10
#include <json/reader.h>
11
#include <json/value.h>
12
#endif // if !defined(JSON_IS_AMALGAMATION)
13
#include <algorithm>
14
#include <cassert>
15
#include <cmath>
16
#include <cstring>
17
#include <iostream>
18
#include <istream>
19
#include <limits>
20
#include <memory>
21
#include <set>
22
#include <sstream>
23
#include <utility>
24
25
#include <cstdio>
26
#if __cplusplus >= 201103L
27
28
#if !defined(sscanf)
29
#define sscanf std::sscanf
30
#endif
31
32
#endif //__cplusplus
33
34
#if defined(_MSC_VER)
35
#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
36
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
37
#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
38
#endif //_MSC_VER
39
40
#if defined(_MSC_VER)
41
// Disable warning about strdup being deprecated.
42
#pragma warning(disable : 4996)
43
#endif
44
45
// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
46
// time to change the stack limit
47
#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
48
#define JSONCPP_DEPRECATED_STACK_LIMIT 1000
49
#endif
50
51
static size_t const stackLimit_g =
52
JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
53
54
namespace Json {
55
56
#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
57
using CharReaderPtr = std::unique_ptr<CharReader>;
58
#else
59
using CharReaderPtr = std::auto_ptr<CharReader>;
60
#endif
61
62
// Implementation of class Features
63
// ////////////////////////////////
64
65
Features::Features() = default;
66
67
Features Features::all() { return {}; }
68
69
Features Features::strictMode() {
70
Features features;
71
features.allowComments_ = false;
72
features.strictRoot_ = true;
73
features.allowDroppedNullPlaceholders_ = false;
74
features.allowNumericKeys_ = false;
75
return features;
76
}
77
78
// Implementation of class Reader
79
// ////////////////////////////////
80
81
bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
82
return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
83
}
84
85
// Class Reader
86
// //////////////////////////////////////////////////////////////////
87
88
Reader::Reader() : features_(Features::all()) {}
89
90
Reader::Reader(const Features& features) : features_(features) {}
91
92
bool Reader::parse(const std::string& document, Value& root,
93
bool collectComments) {
94
document_.assign(document.begin(), document.end());
95
const char* begin = document_.c_str();
96
const char* end = begin + document_.length();
97
return parse(begin, end, root, collectComments);
98
}
99
100
bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
101
// std::istream_iterator<char> begin(is);
102
// std::istream_iterator<char> end;
103
// Those would allow streamed input from a file, if parse() were a
104
// template function.
105
106
// Since String is reference-counted, this at least does not
107
// create an extra copy.
108
String doc(std::istreambuf_iterator<char>(is), {});
109
return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110
}
111
112
bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113
bool collectComments) {
114
if (!features_.allowComments_) {
115
collectComments = false;
116
}
117
118
begin_ = beginDoc;
119
end_ = endDoc;
120
collectComments_ = collectComments;
121
current_ = begin_;
122
lastValueEnd_ = nullptr;
123
lastValue_ = nullptr;
124
commentsBefore_.clear();
125
errors_.clear();
126
while (!nodes_.empty())
127
nodes_.pop();
128
nodes_.push(&root);
129
130
bool successful = readValue();
131
Token token;
132
readTokenSkippingComments(token);
133
if (collectComments_ && !commentsBefore_.empty())
134
root.setComment(commentsBefore_, commentAfter);
135
if (features_.strictRoot_) {
136
if (!root.isArray() && !root.isObject()) {
137
// Set error location to start of doc, ideally should be first token found
138
// in doc
139
token.type_ = tokenError;
140
token.start_ = beginDoc;
141
token.end_ = endDoc;
142
addError(
143
"A valid JSON document must be either an array or an object value.",
144
token);
145
return false;
146
}
147
}
148
return successful;
149
}
150
151
bool Reader::readValue() {
152
// readValue() may call itself only if it calls readObject() or ReadArray().
153
// These methods execute nodes_.push() just before and nodes_.pop)() just
154
// after calling readValue(). parse() executes one nodes_.push(), so > instead
155
// of >=.
156
if (nodes_.size() > stackLimit_g)
157
throwRuntimeError("Exceeded stackLimit in readValue().");
158
159
Token token;
160
readTokenSkippingComments(token);
161
bool successful = true;
162
163
if (collectComments_ && !commentsBefore_.empty()) {
164
currentValue().setComment(commentsBefore_, commentBefore);
165
commentsBefore_.clear();
166
}
167
168
switch (token.type_) {
169
case tokenObjectBegin:
170
successful = readObject(token);
171
currentValue().setOffsetLimit(current_ - begin_);
172
break;
173
case tokenArrayBegin:
174
successful = readArray(token);
175
currentValue().setOffsetLimit(current_ - begin_);
176
break;
177
case tokenNumber:
178
successful = decodeNumber(token);
179
break;
180
case tokenString:
181
successful = decodeString(token);
182
break;
183
case tokenTrue: {
184
Value v(true);
185
currentValue().swapPayload(v);
186
currentValue().setOffsetStart(token.start_ - begin_);
187
currentValue().setOffsetLimit(token.end_ - begin_);
188
} break;
189
case tokenFalse: {
190
Value v(false);
191
currentValue().swapPayload(v);
192
currentValue().setOffsetStart(token.start_ - begin_);
193
currentValue().setOffsetLimit(token.end_ - begin_);
194
} break;
195
case tokenNull: {
196
Value v;
197
currentValue().swapPayload(v);
198
currentValue().setOffsetStart(token.start_ - begin_);
199
currentValue().setOffsetLimit(token.end_ - begin_);
200
} break;
201
case tokenArraySeparator:
202
case tokenObjectEnd:
203
case tokenArrayEnd:
204
if (features_.allowDroppedNullPlaceholders_) {
205
// "Un-read" the current token and mark the current value as a null
206
// token.
207
current_--;
208
Value v;
209
currentValue().swapPayload(v);
210
currentValue().setOffsetStart(current_ - begin_ - 1);
211
currentValue().setOffsetLimit(current_ - begin_);
212
break;
213
} // Else, fall through...
214
default:
215
currentValue().setOffsetStart(token.start_ - begin_);
216
currentValue().setOffsetLimit(token.end_ - begin_);
217
return addError("Syntax error: value, object or array expected.", token);
218
}
219
220
if (collectComments_) {
221
lastValueEnd_ = current_;
222
lastValue_ = &currentValue();
223
}
224
225
return successful;
226
}
227
228
bool Reader::readTokenSkippingComments(Token& token) {
229
bool success = readToken(token);
230
if (features_.allowComments_) {
231
while (success && token.type_ == tokenComment) {
232
success = readToken(token);
233
}
234
}
235
return success;
236
}
237
238
bool Reader::readToken(Token& token) {
239
skipSpaces();
240
token.start_ = current_;
241
Char c = getNextChar();
242
bool ok = true;
243
switch (c) {
244
case '{':
245
token.type_ = tokenObjectBegin;
246
break;
247
case '}':
248
token.type_ = tokenObjectEnd;
249
break;
250
case '[':
251
token.type_ = tokenArrayBegin;
252
break;
253
case ']':
254
token.type_ = tokenArrayEnd;
255
break;
256
case '"':
257
token.type_ = tokenString;
258
ok = readString();
259
break;
260
case '/':
261
token.type_ = tokenComment;
262
ok = readComment();
263
break;
264
case '0':
265
case '1':
266
case '2':
267
case '3':
268
case '4':
269
case '5':
270
case '6':
271
case '7':
272
case '8':
273
case '9':
274
case '-':
275
token.type_ = tokenNumber;
276
readNumber();
277
break;
278
case 't':
279
token.type_ = tokenTrue;
280
ok = match("rue", 3);
281
break;
282
case 'f':
283
token.type_ = tokenFalse;
284
ok = match("alse", 4);
285
break;
286
case 'n':
287
token.type_ = tokenNull;
288
ok = match("ull", 3);
289
break;
290
case ',':
291
token.type_ = tokenArraySeparator;
292
break;
293
case ':':
294
token.type_ = tokenMemberSeparator;
295
break;
296
case 0:
297
token.type_ = tokenEndOfStream;
298
break;
299
default:
300
ok = false;
301
break;
302
}
303
if (!ok)
304
token.type_ = tokenError;
305
token.end_ = current_;
306
return ok;
307
}
308
309
void Reader::skipSpaces() {
310
while (current_ != end_) {
311
Char c = *current_;
312
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313
++current_;
314
else
315
break;
316
}
317
}
318
319
bool Reader::match(const Char* pattern, int patternLength) {
320
if (end_ - current_ < patternLength)
321
return false;
322
int index = patternLength;
323
while (index--)
324
if (current_[index] != pattern[index])
325
return false;
326
current_ += patternLength;
327
return true;
328
}
329
330
bool Reader::readComment() {
331
Location commentBegin = current_ - 1;
332
Char c = getNextChar();
333
bool successful = false;
334
if (c == '*')
335
successful = readCStyleComment();
336
else if (c == '/')
337
successful = readCppStyleComment();
338
if (!successful)
339
return false;
340
341
if (collectComments_) {
342
CommentPlacement placement = commentBefore;
343
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344
if (c != '*' || !containsNewLine(commentBegin, current_))
345
placement = commentAfterOnSameLine;
346
}
347
348
addComment(commentBegin, current_, placement);
349
}
350
return true;
351
}
352
353
String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354
String normalized;
355
normalized.reserve(static_cast<size_t>(end - begin));
356
Reader::Location current = begin;
357
while (current != end) {
358
char c = *current++;
359
if (c == '\r') {
360
if (current != end && *current == '\n')
361
// convert dos EOL
362
++current;
363
// convert Mac EOL
364
normalized += '\n';
365
} else {
366
normalized += c;
367
}
368
}
369
return normalized;
370
}
371
372
void Reader::addComment(Location begin, Location end,
373
CommentPlacement placement) {
374
assert(collectComments_);
375
const String& normalized = normalizeEOL(begin, end);
376
if (placement == commentAfterOnSameLine) {
377
assert(lastValue_ != nullptr);
378
lastValue_->setComment(normalized, placement);
379
} else {
380
commentsBefore_ += normalized;
381
}
382
}
383
384
bool Reader::readCStyleComment() {
385
while ((current_ + 1) < end_) {
386
Char c = getNextChar();
387
if (c == '*' && *current_ == '/')
388
break;
389
}
390
return getNextChar() == '/';
391
}
392
393
bool Reader::readCppStyleComment() {
394
while (current_ != end_) {
395
Char c = getNextChar();
396
if (c == '\n')
397
break;
398
if (c == '\r') {
399
// Consume DOS EOL. It will be normalized in addComment.
400
if (current_ != end_ && *current_ == '\n')
401
getNextChar();
402
// Break on Moc OS 9 EOL.
403
break;
404
}
405
}
406
return true;
407
}
408
409
void Reader::readNumber() {
410
Location p = current_;
411
char c = '0'; // stopgap for already consumed character
412
// integral part
413
while (c >= '0' && c <= '9')
414
c = (current_ = p) < end_ ? *p++ : '\0';
415
// fractional part
416
if (c == '.') {
417
c = (current_ = p) < end_ ? *p++ : '\0';
418
while (c >= '0' && c <= '9')
419
c = (current_ = p) < end_ ? *p++ : '\0';
420
}
421
// exponential part
422
if (c == 'e' || c == 'E') {
423
c = (current_ = p) < end_ ? *p++ : '\0';
424
if (c == '+' || c == '-')
425
c = (current_ = p) < end_ ? *p++ : '\0';
426
while (c >= '0' && c <= '9')
427
c = (current_ = p) < end_ ? *p++ : '\0';
428
}
429
}
430
431
bool Reader::readString() {
432
Char c = '\0';
433
while (current_ != end_) {
434
c = getNextChar();
435
if (c == '\\')
436
getNextChar();
437
else if (c == '"')
438
break;
439
}
440
return c == '"';
441
}
442
443
bool Reader::readObject(Token& token) {
444
Token tokenName;
445
String name;
446
Value init(objectValue);
447
currentValue().swapPayload(init);
448
currentValue().setOffsetStart(token.start_ - begin_);
449
while (readTokenSkippingComments(tokenName)) {
450
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
451
return true;
452
name.clear();
453
if (tokenName.type_ == tokenString) {
454
if (!decodeString(tokenName, name))
455
return recoverFromError(tokenObjectEnd);
456
} else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
457
Value numberName;
458
if (!decodeNumber(tokenName, numberName))
459
return recoverFromError(tokenObjectEnd);
460
name = numberName.asString();
461
} else {
462
break;
463
}
464
465
Token colon;
466
if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
467
return addErrorAndRecover("Missing ':' after object member name", colon,
468
tokenObjectEnd);
469
}
470
Value& value = currentValue()[name];
471
nodes_.push(&value);
472
bool ok = readValue();
473
nodes_.pop();
474
if (!ok) // error already set
475
return recoverFromError(tokenObjectEnd);
476
477
Token comma;
478
if (!readTokenSkippingComments(comma) ||
479
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
480
return addErrorAndRecover("Missing ',' or '}' in object declaration",
481
comma, tokenObjectEnd);
482
}
483
if (comma.type_ == tokenObjectEnd)
484
return true;
485
}
486
return addErrorAndRecover("Missing '}' or object member name", tokenName,
487
tokenObjectEnd);
488
}
489
490
bool Reader::readArray(Token& token) {
491
Value init(arrayValue);
492
currentValue().swapPayload(init);
493
currentValue().setOffsetStart(token.start_ - begin_);
494
skipSpaces();
495
if (current_ != end_ && *current_ == ']') // empty array
496
{
497
Token endArray;
498
readToken(endArray);
499
return true;
500
}
501
int index = 0;
502
for (;;) {
503
Value& value = currentValue()[index++];
504
nodes_.push(&value);
505
bool ok = readValue();
506
nodes_.pop();
507
if (!ok) // error already set
508
return recoverFromError(tokenArrayEnd);
509
510
Token currentToken;
511
// Accept Comment after last item in the array.
512
ok = readTokenSkippingComments(currentToken);
513
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
514
currentToken.type_ != tokenArrayEnd);
515
if (!ok || badTokenType) {
516
return addErrorAndRecover("Missing ',' or ']' in array declaration",
517
currentToken, tokenArrayEnd);
518
}
519
if (currentToken.type_ == tokenArrayEnd)
520
break;
521
}
522
return true;
523
}
524
525
bool Reader::decodeNumber(Token& token) {
526
Value decoded;
527
if (!decodeNumber(token, decoded))
528
return false;
529
currentValue().swapPayload(decoded);
530
currentValue().setOffsetStart(token.start_ - begin_);
531
currentValue().setOffsetLimit(token.end_ - begin_);
532
return true;
533
}
534
535
bool Reader::decodeNumber(Token& token, Value& decoded) {
536
// Attempts to parse the number as an integer. If the number is
537
// larger than the maximum supported value of an integer then
538
// we decode the number as a double.
539
Location current = token.start_;
540
bool isNegative = *current == '-';
541
if (isNegative)
542
++current;
543
// TODO: Help the compiler do the div and mod at compile time or get rid of
544
// them.
545
Value::LargestUInt maxIntegerValue =
546
isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
547
: Value::maxLargestUInt;
548
Value::LargestUInt threshold = maxIntegerValue / 10;
549
Value::LargestUInt value = 0;
550
while (current < token.end_) {
551
Char c = *current++;
552
if (c < '0' || c > '9')
553
return decodeDouble(token, decoded);
554
auto digit(static_cast<Value::UInt>(c - '0'));
555
if (value >= threshold) {
556
// We've hit or exceeded the max value divided by 10 (rounded down). If
557
// a) we've only just touched the limit, b) this is the last digit, and
558
// c) it's small enough to fit in that rounding delta, we're okay.
559
// Otherwise treat this number as a double to avoid overflow.
560
if (value > threshold || current != token.end_ ||
561
digit > maxIntegerValue % 10) {
562
return decodeDouble(token, decoded);
563
}
564
}
565
value = value * 10 + digit;
566
}
567
if (isNegative && value == maxIntegerValue)
568
decoded = Value::minLargestInt;
569
else if (isNegative)
570
decoded = -Value::LargestInt(value);
571
else if (value <= Value::LargestUInt(Value::maxInt))
572
decoded = Value::LargestInt(value);
573
else
574
decoded = value;
575
return true;
576
}
577
578
bool Reader::decodeDouble(Token& token) {
579
Value decoded;
580
if (!decodeDouble(token, decoded))
581
return false;
582
currentValue().swapPayload(decoded);
583
currentValue().setOffsetStart(token.start_ - begin_);
584
currentValue().setOffsetLimit(token.end_ - begin_);
585
return true;
586
}
587
588
bool Reader::decodeDouble(Token& token, Value& decoded) {
589
double value = 0;
590
IStringStream is(String(token.start_, token.end_));
591
if (!(is >> value)) {
592
if (value == std::numeric_limits<double>::max())
593
value = std::numeric_limits<double>::infinity();
594
else if (value == std::numeric_limits<double>::lowest())
595
value = -std::numeric_limits<double>::infinity();
596
else if (!std::isinf(value))
597
return addError(
598
"'" + String(token.start_, token.end_) + "' is not a number.", token);
599
}
600
decoded = value;
601
return true;
602
}
603
604
bool Reader::decodeString(Token& token) {
605
String decoded_string;
606
if (!decodeString(token, decoded_string))
607
return false;
608
Value decoded(decoded_string);
609
currentValue().swapPayload(decoded);
610
currentValue().setOffsetStart(token.start_ - begin_);
611
currentValue().setOffsetLimit(token.end_ - begin_);
612
return true;
613
}
614
615
bool Reader::decodeString(Token& token, String& decoded) {
616
decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
617
Location current = token.start_ + 1; // skip '"'
618
Location end = token.end_ - 1; // do not include '"'
619
while (current != end) {
620
Char c = *current++;
621
if (c == '"')
622
break;
623
if (c == '\\') {
624
if (current == end)
625
return addError("Empty escape sequence in string", token, current);
626
Char escape = *current++;
627
switch (escape) {
628
case '"':
629
decoded += '"';
630
break;
631
case '/':
632
decoded += '/';
633
break;
634
case '\\':
635
decoded += '\\';
636
break;
637
case 'b':
638
decoded += '\b';
639
break;
640
case 'f':
641
decoded += '\f';
642
break;
643
case 'n':
644
decoded += '\n';
645
break;
646
case 'r':
647
decoded += '\r';
648
break;
649
case 't':
650
decoded += '\t';
651
break;
652
case 'u': {
653
unsigned int unicode;
654
if (!decodeUnicodeCodePoint(token, current, end, unicode))
655
return false;
656
decoded += codePointToUTF8(unicode);
657
} break;
658
default:
659
return addError("Bad escape sequence in string", token, current);
660
}
661
} else {
662
decoded += c;
663
}
664
}
665
return true;
666
}
667
668
bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
669
Location end, unsigned int& unicode) {
670
671
if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
672
return false;
673
if (unicode >= 0xD800 && unicode <= 0xDBFF) {
674
// surrogate pairs
675
if (end - current < 6)
676
return addError(
677
"additional six characters expected to parse unicode surrogate pair.",
678
token, current);
679
if (*(current++) == '\\' && *(current++) == 'u') {
680
unsigned int surrogatePair;
681
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
682
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
683
} else
684
return false;
685
} else
686
return addError("expecting another \\u token to begin the second half of "
687
"a unicode surrogate pair",
688
token, current);
689
}
690
return true;
691
}
692
693
bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
694
Location end,
695
unsigned int& ret_unicode) {
696
if (end - current < 4)
697
return addError(
698
"Bad unicode escape sequence in string: four digits expected.", token,
699
current);
700
int unicode = 0;
701
for (int index = 0; index < 4; ++index) {
702
Char c = *current++;
703
unicode *= 16;
704
if (c >= '0' && c <= '9')
705
unicode += c - '0';
706
else if (c >= 'a' && c <= 'f')
707
unicode += c - 'a' + 10;
708
else if (c >= 'A' && c <= 'F')
709
unicode += c - 'A' + 10;
710
else
711
return addError(
712
"Bad unicode escape sequence in string: hexadecimal digit expected.",
713
token, current);
714
}
715
ret_unicode = static_cast<unsigned int>(unicode);
716
return true;
717
}
718
719
bool Reader::addError(const String& message, Token& token, Location extra) {
720
ErrorInfo info;
721
info.token_ = token;
722
info.message_ = message;
723
info.extra_ = extra;
724
errors_.push_back(info);
725
return false;
726
}
727
728
bool Reader::recoverFromError(TokenType skipUntilToken) {
729
size_t const errorCount = errors_.size();
730
Token skip;
731
for (;;) {
732
if (!readToken(skip))
733
errors_.resize(errorCount); // discard errors caused by recovery
734
if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
735
break;
736
}
737
errors_.resize(errorCount);
738
return false;
739
}
740
741
bool Reader::addErrorAndRecover(const String& message, Token& token,
742
TokenType skipUntilToken) {
743
addError(message, token);
744
return recoverFromError(skipUntilToken);
745
}
746
747
Value& Reader::currentValue() { return *(nodes_.top()); }
748
749
Reader::Char Reader::getNextChar() {
750
if (current_ == end_)
751
return 0;
752
return *current_++;
753
}
754
755
void Reader::getLocationLineAndColumn(Location location, int& line,
756
int& column) const {
757
Location current = begin_;
758
Location lastLineStart = current;
759
line = 0;
760
while (current < location && current != end_) {
761
Char c = *current++;
762
if (c == '\r') {
763
if (current != end_ && *current == '\n')
764
++current;
765
lastLineStart = current;
766
++line;
767
} else if (c == '\n') {
768
lastLineStart = current;
769
++line;
770
}
771
}
772
// column & line start at 1
773
column = int(location - lastLineStart) + 1;
774
++line;
775
}
776
777
String Reader::getLocationLineAndColumn(Location location) const {
778
int line, column;
779
getLocationLineAndColumn(location, line, column);
780
char buffer[18 + 16 + 16 + 1];
781
jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
782
return buffer;
783
}
784
785
// Deprecated. Preserved for backward compatibility
786
String Reader::getFormatedErrorMessages() const {
787
return getFormattedErrorMessages();
788
}
789
790
String Reader::getFormattedErrorMessages() const {
791
String formattedMessage;
792
for (const auto& error : errors_) {
793
formattedMessage +=
794
"* " + getLocationLineAndColumn(error.token_.start_) + "\n";
795
formattedMessage += " " + error.message_ + "\n";
796
if (error.extra_)
797
formattedMessage +=
798
"See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
799
}
800
return formattedMessage;
801
}
802
803
std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
804
std::vector<Reader::StructuredError> allErrors;
805
for (const auto& error : errors_) {
806
Reader::StructuredError structured;
807
structured.offset_start = error.token_.start_ - begin_;
808
structured.offset_limit = error.token_.end_ - begin_;
809
structured.message = error.message_;
810
allErrors.push_back(structured);
811
}
812
return allErrors;
813
}
814
815
bool Reader::pushError(const Value& value, const String& message) {
816
ptrdiff_t const length = end_ - begin_;
817
if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
818
return false;
819
Token token;
820
token.type_ = tokenError;
821
token.start_ = begin_ + value.getOffsetStart();
822
token.end_ = begin_ + value.getOffsetLimit();
823
ErrorInfo info;
824
info.token_ = token;
825
info.message_ = message;
826
info.extra_ = nullptr;
827
errors_.push_back(info);
828
return true;
829
}
830
831
bool Reader::pushError(const Value& value, const String& message,
832
const Value& extra) {
833
ptrdiff_t const length = end_ - begin_;
834
if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
835
extra.getOffsetLimit() > length)
836
return false;
837
Token token;
838
token.type_ = tokenError;
839
token.start_ = begin_ + value.getOffsetStart();
840
token.end_ = begin_ + value.getOffsetLimit();
841
ErrorInfo info;
842
info.token_ = token;
843
info.message_ = message;
844
info.extra_ = begin_ + extra.getOffsetStart();
845
errors_.push_back(info);
846
return true;
847
}
848
849
bool Reader::good() const { return errors_.empty(); }
850
851
// Originally copied from the Features class (now deprecated), used internally
852
// for features implementation.
853
class OurFeatures {
854
public:
855
static OurFeatures all();
856
bool allowComments_;
857
bool allowTrailingCommas_;
858
bool strictRoot_;
859
bool allowDroppedNullPlaceholders_;
860
bool allowNumericKeys_;
861
bool allowSingleQuotes_;
862
bool failIfExtra_;
863
bool rejectDupKeys_;
864
bool allowSpecialFloats_;
865
bool skipBom_;
866
size_t stackLimit_;
867
}; // OurFeatures
868
869
OurFeatures OurFeatures::all() { return {}; }
870
871
// Implementation of class Reader
872
// ////////////////////////////////
873
874
// Originally copied from the Reader class (now deprecated), used internally
875
// for implementing JSON reading.
876
class OurReader {
877
public:
878
using Char = char;
879
using Location = const Char*;
880
881
explicit OurReader(OurFeatures const& features);
882
bool parse(const char* beginDoc, const char* endDoc, Value& root,
883
bool collectComments = true);
884
String getFormattedErrorMessages() const;
885
std::vector<CharReader::StructuredError> getStructuredErrors() const;
886
887
private:
888
OurReader(OurReader const&); // no impl
889
void operator=(OurReader const&); // no impl
890
891
enum TokenType {
892
tokenEndOfStream = 0,
893
tokenObjectBegin,
894
tokenObjectEnd,
895
tokenArrayBegin,
896
tokenArrayEnd,
897
tokenString,
898
tokenNumber,
899
tokenTrue,
900
tokenFalse,
901
tokenNull,
902
tokenNaN,
903
tokenPosInf,
904
tokenNegInf,
905
tokenArraySeparator,
906
tokenMemberSeparator,
907
tokenComment,
908
tokenError
909
};
910
911
class Token {
912
public:
913
TokenType type_;
914
Location start_;
915
Location end_;
916
};
917
918
class ErrorInfo {
919
public:
920
Token token_;
921
String message_;
922
Location extra_;
923
};
924
925
using Errors = std::deque<ErrorInfo>;
926
927
bool readToken(Token& token);
928
bool readTokenSkippingComments(Token& token);
929
void skipSpaces();
930
void skipBom(bool skipBom);
931
bool match(const Char* pattern, int patternLength);
932
bool readComment();
933
bool readCStyleComment(bool* containsNewLineResult);
934
bool readCppStyleComment();
935
bool readString();
936
bool readStringSingleQuote();
937
bool readNumber(bool checkInf);
938
bool readValue();
939
bool readObject(Token& token);
940
bool readArray(Token& token);
941
bool decodeNumber(Token& token);
942
bool decodeNumber(Token& token, Value& decoded);
943
bool decodeString(Token& token);
944
bool decodeString(Token& token, String& decoded);
945
bool decodeDouble(Token& token);
946
bool decodeDouble(Token& token, Value& decoded);
947
bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
948
unsigned int& unicode);
949
bool decodeUnicodeEscapeSequence(Token& token, Location& current,
950
Location end, unsigned int& unicode);
951
bool addError(const String& message, Token& token, Location extra = nullptr);
952
bool recoverFromError(TokenType skipUntilToken);
953
bool addErrorAndRecover(const String& message, Token& token,
954
TokenType skipUntilToken);
955
void skipUntilSpace();
956
Value& currentValue();
957
Char getNextChar();
958
void getLocationLineAndColumn(Location location, int& line,
959
int& column) const;
960
String getLocationLineAndColumn(Location location) const;
961
void addComment(Location begin, Location end, CommentPlacement placement);
962
963
static String normalizeEOL(Location begin, Location end);
964
static bool containsNewLine(Location begin, Location end);
965
966
using Nodes = std::stack<Value*>;
967
968
Nodes nodes_{};
969
Errors errors_{};
970
String document_{};
971
Location begin_ = nullptr;
972
Location end_ = nullptr;
973
Location current_ = nullptr;
974
Location lastValueEnd_ = nullptr;
975
Value* lastValue_ = nullptr;
976
bool lastValueHasAComment_ = false;
977
String commentsBefore_{};
978
979
OurFeatures const features_;
980
bool collectComments_ = false;
981
}; // OurReader
982
983
// complete copy of Read impl, for OurReader
984
985
bool OurReader::containsNewLine(OurReader::Location begin,
986
OurReader::Location end) {
987
return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
988
}
989
990
OurReader::OurReader(OurFeatures const& features) : features_(features) {}
991
992
bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
993
bool collectComments) {
994
if (!features_.allowComments_) {
995
collectComments = false;
996
}
997
998
begin_ = beginDoc;
999
end_ = endDoc;
1000
collectComments_ = collectComments;
1001
current_ = begin_;
1002
lastValueEnd_ = nullptr;
1003
lastValue_ = nullptr;
1004
commentsBefore_.clear();
1005
errors_.clear();
1006
while (!nodes_.empty())
1007
nodes_.pop();
1008
nodes_.push(&root);
1009
1010
// skip byte order mark if it exists at the beginning of the UTF-8 text.
1011
skipBom(features_.skipBom_);
1012
bool successful = readValue();
1013
nodes_.pop();
1014
Token token;
1015
readTokenSkippingComments(token);
1016
if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1017
addError("Extra non-whitespace after JSON value.", token);
1018
return false;
1019
}
1020
if (collectComments_ && !commentsBefore_.empty())
1021
root.setComment(commentsBefore_, commentAfter);
1022
if (features_.strictRoot_) {
1023
if (!root.isArray() && !root.isObject()) {
1024
// Set error location to start of doc, ideally should be first token found
1025
// in doc
1026
token.type_ = tokenError;
1027
token.start_ = beginDoc;
1028
token.end_ = endDoc;
1029
addError(
1030
"A valid JSON document must be either an array or an object value.",
1031
token);
1032
return false;
1033
}
1034
}
1035
return successful;
1036
}
1037
1038
bool OurReader::readValue() {
1039
// To preserve the old behaviour we cast size_t to int.
1040
if (nodes_.size() > features_.stackLimit_)
1041
throwRuntimeError("Exceeded stackLimit in readValue().");
1042
Token token;
1043
readTokenSkippingComments(token);
1044
bool successful = true;
1045
1046
if (collectComments_ && !commentsBefore_.empty()) {
1047
currentValue().setComment(commentsBefore_, commentBefore);
1048
commentsBefore_.clear();
1049
}
1050
1051
switch (token.type_) {
1052
case tokenObjectBegin:
1053
successful = readObject(token);
1054
currentValue().setOffsetLimit(current_ - begin_);
1055
break;
1056
case tokenArrayBegin:
1057
successful = readArray(token);
1058
currentValue().setOffsetLimit(current_ - begin_);
1059
break;
1060
case tokenNumber:
1061
successful = decodeNumber(token);
1062
break;
1063
case tokenString:
1064
successful = decodeString(token);
1065
break;
1066
case tokenTrue: {
1067
Value v(true);
1068
currentValue().swapPayload(v);
1069
currentValue().setOffsetStart(token.start_ - begin_);
1070
currentValue().setOffsetLimit(token.end_ - begin_);
1071
} break;
1072
case tokenFalse: {
1073
Value v(false);
1074
currentValue().swapPayload(v);
1075
currentValue().setOffsetStart(token.start_ - begin_);
1076
currentValue().setOffsetLimit(token.end_ - begin_);
1077
} break;
1078
case tokenNull: {
1079
Value v;
1080
currentValue().swapPayload(v);
1081
currentValue().setOffsetStart(token.start_ - begin_);
1082
currentValue().setOffsetLimit(token.end_ - begin_);
1083
} break;
1084
case tokenNaN: {
1085
Value v(std::numeric_limits<double>::quiet_NaN());
1086
currentValue().swapPayload(v);
1087
currentValue().setOffsetStart(token.start_ - begin_);
1088
currentValue().setOffsetLimit(token.end_ - begin_);
1089
} break;
1090
case tokenPosInf: {
1091
Value v(std::numeric_limits<double>::infinity());
1092
currentValue().swapPayload(v);
1093
currentValue().setOffsetStart(token.start_ - begin_);
1094
currentValue().setOffsetLimit(token.end_ - begin_);
1095
} break;
1096
case tokenNegInf: {
1097
Value v(-std::numeric_limits<double>::infinity());
1098
currentValue().swapPayload(v);
1099
currentValue().setOffsetStart(token.start_ - begin_);
1100
currentValue().setOffsetLimit(token.end_ - begin_);
1101
} break;
1102
case tokenArraySeparator:
1103
case tokenObjectEnd:
1104
case tokenArrayEnd:
1105
if (features_.allowDroppedNullPlaceholders_) {
1106
// "Un-read" the current token and mark the current value as a null
1107
// token.
1108
current_--;
1109
Value v;
1110
currentValue().swapPayload(v);
1111
currentValue().setOffsetStart(current_ - begin_ - 1);
1112
currentValue().setOffsetLimit(current_ - begin_);
1113
break;
1114
} // else, fall through ...
1115
default:
1116
currentValue().setOffsetStart(token.start_ - begin_);
1117
currentValue().setOffsetLimit(token.end_ - begin_);
1118
return addError("Syntax error: value, object or array expected.", token);
1119
}
1120
1121
if (collectComments_) {
1122
lastValueEnd_ = current_;
1123
lastValueHasAComment_ = false;
1124
lastValue_ = &currentValue();
1125
}
1126
1127
return successful;
1128
}
1129
1130
bool OurReader::readTokenSkippingComments(Token& token) {
1131
bool success = readToken(token);
1132
if (features_.allowComments_) {
1133
while (success && token.type_ == tokenComment) {
1134
success = readToken(token);
1135
}
1136
}
1137
return success;
1138
}
1139
1140
bool OurReader::readToken(Token& token) {
1141
skipSpaces();
1142
token.start_ = current_;
1143
Char c = getNextChar();
1144
bool ok = true;
1145
switch (c) {
1146
case '{':
1147
token.type_ = tokenObjectBegin;
1148
break;
1149
case '}':
1150
token.type_ = tokenObjectEnd;
1151
break;
1152
case '[':
1153
token.type_ = tokenArrayBegin;
1154
break;
1155
case ']':
1156
token.type_ = tokenArrayEnd;
1157
break;
1158
case '"':
1159
token.type_ = tokenString;
1160
ok = readString();
1161
break;
1162
case '\'':
1163
if (features_.allowSingleQuotes_) {
1164
token.type_ = tokenString;
1165
ok = readStringSingleQuote();
1166
} else {
1167
// If we don't allow single quotes, this is a failure case.
1168
ok = false;
1169
}
1170
break;
1171
case '/':
1172
token.type_ = tokenComment;
1173
ok = readComment();
1174
break;
1175
case '0':
1176
case '1':
1177
case '2':
1178
case '3':
1179
case '4':
1180
case '5':
1181
case '6':
1182
case '7':
1183
case '8':
1184
case '9':
1185
token.type_ = tokenNumber;
1186
readNumber(false);
1187
break;
1188
case '-':
1189
if (readNumber(true)) {
1190
token.type_ = tokenNumber;
1191
} else {
1192
token.type_ = tokenNegInf;
1193
ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1194
}
1195
break;
1196
case '+':
1197
if (readNumber(true)) {
1198
token.type_ = tokenNumber;
1199
} else {
1200
token.type_ = tokenPosInf;
1201
ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1202
}
1203
break;
1204
case 't':
1205
token.type_ = tokenTrue;
1206
ok = match("rue", 3);
1207
break;
1208
case 'f':
1209
token.type_ = tokenFalse;
1210
ok = match("alse", 4);
1211
break;
1212
case 'n':
1213
token.type_ = tokenNull;
1214
ok = match("ull", 3);
1215
break;
1216
case 'N':
1217
if (features_.allowSpecialFloats_) {
1218
token.type_ = tokenNaN;
1219
ok = match("aN", 2);
1220
} else {
1221
ok = false;
1222
}
1223
break;
1224
case 'I':
1225
if (features_.allowSpecialFloats_) {
1226
token.type_ = tokenPosInf;
1227
ok = match("nfinity", 7);
1228
} else {
1229
ok = false;
1230
}
1231
break;
1232
case ',':
1233
token.type_ = tokenArraySeparator;
1234
break;
1235
case ':':
1236
token.type_ = tokenMemberSeparator;
1237
break;
1238
case 0:
1239
token.type_ = tokenEndOfStream;
1240
break;
1241
default:
1242
ok = false;
1243
break;
1244
}
1245
if (!ok)
1246
token.type_ = tokenError;
1247
token.end_ = current_;
1248
return ok;
1249
}
1250
1251
void OurReader::skipSpaces() {
1252
while (current_ != end_) {
1253
Char c = *current_;
1254
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1255
++current_;
1256
else
1257
break;
1258
}
1259
}
1260
1261
void OurReader::skipBom(bool skipBom) {
1262
// The default behavior is to skip BOM.
1263
if (skipBom) {
1264
if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1265
begin_ += 3;
1266
current_ = begin_;
1267
}
1268
}
1269
}
1270
1271
bool OurReader::match(const Char* pattern, int patternLength) {
1272
if (end_ - current_ < patternLength)
1273
return false;
1274
int index = patternLength;
1275
while (index--)
1276
if (current_[index] != pattern[index])
1277
return false;
1278
current_ += patternLength;
1279
return true;
1280
}
1281
1282
bool OurReader::readComment() {
1283
const Location commentBegin = current_ - 1;
1284
const Char c = getNextChar();
1285
bool successful = false;
1286
bool cStyleWithEmbeddedNewline = false;
1287
1288
const bool isCStyleComment = (c == '*');
1289
const bool isCppStyleComment = (c == '/');
1290
if (isCStyleComment) {
1291
successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1292
} else if (isCppStyleComment) {
1293
successful = readCppStyleComment();
1294
}
1295
1296
if (!successful)
1297
return false;
1298
1299
if (collectComments_) {
1300
CommentPlacement placement = commentBefore;
1301
1302
if (!lastValueHasAComment_) {
1303
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1304
if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1305
placement = commentAfterOnSameLine;
1306
lastValueHasAComment_ = true;
1307
}
1308
}
1309
}
1310
1311
addComment(commentBegin, current_, placement);
1312
}
1313
return true;
1314
}
1315
1316
String OurReader::normalizeEOL(OurReader::Location begin,
1317
OurReader::Location end) {
1318
String normalized;
1319
normalized.reserve(static_cast<size_t>(end - begin));
1320
OurReader::Location current = begin;
1321
while (current != end) {
1322
char c = *current++;
1323
if (c == '\r') {
1324
if (current != end && *current == '\n')
1325
// convert dos EOL
1326
++current;
1327
// convert Mac EOL
1328
normalized += '\n';
1329
} else {
1330
normalized += c;
1331
}
1332
}
1333
return normalized;
1334
}
1335
1336
void OurReader::addComment(Location begin, Location end,
1337
CommentPlacement placement) {
1338
assert(collectComments_);
1339
const String& normalized = normalizeEOL(begin, end);
1340
if (placement == commentAfterOnSameLine) {
1341
assert(lastValue_ != nullptr);
1342
lastValue_->setComment(normalized, placement);
1343
} else {
1344
commentsBefore_ += normalized;
1345
}
1346
}
1347
1348
bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1349
*containsNewLineResult = false;
1350
1351
while ((current_ + 1) < end_) {
1352
Char c = getNextChar();
1353
if (c == '*' && *current_ == '/')
1354
break;
1355
if (c == '\n')
1356
*containsNewLineResult = true;
1357
}
1358
1359
return getNextChar() == '/';
1360
}
1361
1362
bool OurReader::readCppStyleComment() {
1363
while (current_ != end_) {
1364
Char c = getNextChar();
1365
if (c == '\n')
1366
break;
1367
if (c == '\r') {
1368
// Consume DOS EOL. It will be normalized in addComment.
1369
if (current_ != end_ && *current_ == '\n')
1370
getNextChar();
1371
// Break on Moc OS 9 EOL.
1372
break;
1373
}
1374
}
1375
return true;
1376
}
1377
1378
bool OurReader::readNumber(bool checkInf) {
1379
Location p = current_;
1380
if (checkInf && p != end_ && *p == 'I') {
1381
current_ = ++p;
1382
return false;
1383
}
1384
char c = '0'; // stopgap for already consumed character
1385
// integral part
1386
while (c >= '0' && c <= '9')
1387
c = (current_ = p) < end_ ? *p++ : '\0';
1388
// fractional part
1389
if (c == '.') {
1390
c = (current_ = p) < end_ ? *p++ : '\0';
1391
while (c >= '0' && c <= '9')
1392
c = (current_ = p) < end_ ? *p++ : '\0';
1393
}
1394
// exponential part
1395
if (c == 'e' || c == 'E') {
1396
c = (current_ = p) < end_ ? *p++ : '\0';
1397
if (c == '+' || c == '-')
1398
c = (current_ = p) < end_ ? *p++ : '\0';
1399
while (c >= '0' && c <= '9')
1400
c = (current_ = p) < end_ ? *p++ : '\0';
1401
}
1402
return true;
1403
}
1404
bool OurReader::readString() {
1405
Char c = 0;
1406
while (current_ != end_) {
1407
c = getNextChar();
1408
if (c == '\\')
1409
getNextChar();
1410
else if (c == '"')
1411
break;
1412
}
1413
return c == '"';
1414
}
1415
1416
bool OurReader::readStringSingleQuote() {
1417
Char c = 0;
1418
while (current_ != end_) {
1419
c = getNextChar();
1420
if (c == '\\')
1421
getNextChar();
1422
else if (c == '\'')
1423
break;
1424
}
1425
return c == '\'';
1426
}
1427
1428
bool OurReader::readObject(Token& token) {
1429
Token tokenName;
1430
String name;
1431
Value init(objectValue);
1432
currentValue().swapPayload(init);
1433
currentValue().setOffsetStart(token.start_ - begin_);
1434
while (readTokenSkippingComments(tokenName)) {
1435
if (tokenName.type_ == tokenObjectEnd &&
1436
(name.empty() ||
1437
features_.allowTrailingCommas_)) // empty object or trailing comma
1438
return true;
1439
name.clear();
1440
if (tokenName.type_ == tokenString) {
1441
if (!decodeString(tokenName, name))
1442
return recoverFromError(tokenObjectEnd);
1443
} else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1444
Value numberName;
1445
if (!decodeNumber(tokenName, numberName))
1446
return recoverFromError(tokenObjectEnd);
1447
name = numberName.asString();
1448
} else {
1449
break;
1450
}
1451
if (name.length() >= (1U << 30))
1452
throwRuntimeError("keylength >= 2^30");
1453
if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1454
String msg = "Duplicate key: '" + name + "'";
1455
return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1456
}
1457
1458
Token colon;
1459
if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1460
return addErrorAndRecover("Missing ':' after object member name", colon,
1461
tokenObjectEnd);
1462
}
1463
Value& value = currentValue()[name];
1464
nodes_.push(&value);
1465
bool ok = readValue();
1466
nodes_.pop();
1467
if (!ok) // error already set
1468
return recoverFromError(tokenObjectEnd);
1469
1470
Token comma;
1471
if (!readTokenSkippingComments(comma) ||
1472
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1473
return addErrorAndRecover("Missing ',' or '}' in object declaration",
1474
comma, tokenObjectEnd);
1475
}
1476
if (comma.type_ == tokenObjectEnd)
1477
return true;
1478
}
1479
return addErrorAndRecover("Missing '}' or object member name", tokenName,
1480
tokenObjectEnd);
1481
}
1482
1483
bool OurReader::readArray(Token& token) {
1484
Value init(arrayValue);
1485
currentValue().swapPayload(init);
1486
currentValue().setOffsetStart(token.start_ - begin_);
1487
int index = 0;
1488
for (;;) {
1489
skipSpaces();
1490
if (current_ != end_ && *current_ == ']' &&
1491
(index == 0 ||
1492
(features_.allowTrailingCommas_ &&
1493
!features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1494
// comma
1495
{
1496
Token endArray;
1497
readToken(endArray);
1498
return true;
1499
}
1500
Value& value = currentValue()[index++];
1501
nodes_.push(&value);
1502
bool ok = readValue();
1503
nodes_.pop();
1504
if (!ok) // error already set
1505
return recoverFromError(tokenArrayEnd);
1506
1507
Token currentToken;
1508
// Accept Comment after last item in the array.
1509
ok = readTokenSkippingComments(currentToken);
1510
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1511
currentToken.type_ != tokenArrayEnd);
1512
if (!ok || badTokenType) {
1513
return addErrorAndRecover("Missing ',' or ']' in array declaration",
1514
currentToken, tokenArrayEnd);
1515
}
1516
if (currentToken.type_ == tokenArrayEnd)
1517
break;
1518
}
1519
return true;
1520
}
1521
1522
bool OurReader::decodeNumber(Token& token) {
1523
Value decoded;
1524
if (!decodeNumber(token, decoded))
1525
return false;
1526
currentValue().swapPayload(decoded);
1527
currentValue().setOffsetStart(token.start_ - begin_);
1528
currentValue().setOffsetLimit(token.end_ - begin_);
1529
return true;
1530
}
1531
1532
bool OurReader::decodeNumber(Token& token, Value& decoded) {
1533
// Attempts to parse the number as an integer. If the number is
1534
// larger than the maximum supported value of an integer then
1535
// we decode the number as a double.
1536
Location current = token.start_;
1537
const bool isNegative = *current == '-';
1538
if (isNegative) {
1539
++current;
1540
}
1541
1542
// We assume we can represent the largest and smallest integer types as
1543
// unsigned integers with separate sign. This is only true if they can fit
1544
// into an unsigned integer.
1545
static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1546
"Int must be smaller than UInt");
1547
1548
// We need to convert minLargestInt into a positive number. The easiest way
1549
// to do this conversion is to assume our "threshold" value of minLargestInt
1550
// divided by 10 can fit in maxLargestInt when absolute valued. This should
1551
// be a safe assumption.
1552
static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1553
"The absolute value of minLargestInt must be greater than or "
1554
"equal to maxLargestInt");
1555
static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1556
"The absolute value of minLargestInt must be only 1 magnitude "
1557
"larger than maxLargest Int");
1558
1559
static constexpr Value::LargestUInt positive_threshold =
1560
Value::maxLargestUInt / 10;
1561
static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1562
1563
// For the negative values, we have to be more careful. Since typically
1564
// -Value::minLargestInt will cause an overflow, we first divide by 10 and
1565
// then take the inverse. This assumes that minLargestInt is only a single
1566
// power of 10 different in magnitude, which we check above. For the last
1567
// digit, we take the modulus before negating for the same reason.
1568
static constexpr auto negative_threshold =
1569
Value::LargestUInt(-(Value::minLargestInt / 10));
1570
static constexpr auto negative_last_digit =
1571
Value::UInt(-(Value::minLargestInt % 10));
1572
1573
const Value::LargestUInt threshold =
1574
isNegative ? negative_threshold : positive_threshold;
1575
const Value::UInt max_last_digit =
1576
isNegative ? negative_last_digit : positive_last_digit;
1577
1578
Value::LargestUInt value = 0;
1579
while (current < token.end_) {
1580
Char c = *current++;
1581
if (c < '0' || c > '9')
1582
return decodeDouble(token, decoded);
1583
1584
const auto digit(static_cast<Value::UInt>(c - '0'));
1585
if (value >= threshold) {
1586
// We've hit or exceeded the max value divided by 10 (rounded down). If
1587
// a) we've only just touched the limit, meaning value == threshold,
1588
// b) this is the last digit, or
1589
// c) it's small enough to fit in that rounding delta, we're okay.
1590
// Otherwise treat this number as a double to avoid overflow.
1591
if (value > threshold || current != token.end_ ||
1592
digit > max_last_digit) {
1593
return decodeDouble(token, decoded);
1594
}
1595
}
1596
value = value * 10 + digit;
1597
}
1598
1599
if (isNegative) {
1600
// We use the same magnitude assumption here, just in case.
1601
const auto last_digit = static_cast<Value::UInt>(value % 10);
1602
decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1603
} else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1604
decoded = Value::LargestInt(value);
1605
} else {
1606
decoded = value;
1607
}
1608
1609
return true;
1610
}
1611
1612
bool OurReader::decodeDouble(Token& token) {
1613
Value decoded;
1614
if (!decodeDouble(token, decoded))
1615
return false;
1616
currentValue().swapPayload(decoded);
1617
currentValue().setOffsetStart(token.start_ - begin_);
1618
currentValue().setOffsetLimit(token.end_ - begin_);
1619
return true;
1620
}
1621
1622
bool OurReader::decodeDouble(Token& token, Value& decoded) {
1623
double value = 0;
1624
IStringStream is(String(token.start_, token.end_));
1625
if (!(is >> value)) {
1626
if (value == std::numeric_limits<double>::max())
1627
value = std::numeric_limits<double>::infinity();
1628
else if (value == std::numeric_limits<double>::lowest())
1629
value = -std::numeric_limits<double>::infinity();
1630
else if (!std::isinf(value))
1631
return addError(
1632
"'" + String(token.start_, token.end_) + "' is not a number.", token);
1633
}
1634
decoded = value;
1635
return true;
1636
}
1637
1638
bool OurReader::decodeString(Token& token) {
1639
String decoded_string;
1640
if (!decodeString(token, decoded_string))
1641
return false;
1642
Value decoded(decoded_string);
1643
currentValue().swapPayload(decoded);
1644
currentValue().setOffsetStart(token.start_ - begin_);
1645
currentValue().setOffsetLimit(token.end_ - begin_);
1646
return true;
1647
}
1648
1649
bool OurReader::decodeString(Token& token, String& decoded) {
1650
decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1651
Location current = token.start_ + 1; // skip '"'
1652
Location end = token.end_ - 1; // do not include '"'
1653
while (current != end) {
1654
Char c = *current++;
1655
if (c == '"')
1656
break;
1657
if (c == '\\') {
1658
if (current == end)
1659
return addError("Empty escape sequence in string", token, current);
1660
Char escape = *current++;
1661
switch (escape) {
1662
case '"':
1663
decoded += '"';
1664
break;
1665
case '/':
1666
decoded += '/';
1667
break;
1668
case '\\':
1669
decoded += '\\';
1670
break;
1671
case 'b':
1672
decoded += '\b';
1673
break;
1674
case 'f':
1675
decoded += '\f';
1676
break;
1677
case 'n':
1678
decoded += '\n';
1679
break;
1680
case 'r':
1681
decoded += '\r';
1682
break;
1683
case 't':
1684
decoded += '\t';
1685
break;
1686
case 'u': {
1687
unsigned int unicode;
1688
if (!decodeUnicodeCodePoint(token, current, end, unicode))
1689
return false;
1690
decoded += codePointToUTF8(unicode);
1691
} break;
1692
default:
1693
return addError("Bad escape sequence in string", token, current);
1694
}
1695
} else {
1696
decoded += c;
1697
}
1698
}
1699
return true;
1700
}
1701
1702
bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1703
Location end, unsigned int& unicode) {
1704
1705
unicode = 0; // Convince clang-analyzer that this is initialized before use.
1706
if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1707
return false;
1708
if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1709
// surrogate pairs
1710
if (end - current < 6)
1711
return addError(
1712
"additional six characters expected to parse unicode surrogate pair.",
1713
token, current);
1714
if (*(current++) == '\\' && *(current++) == 'u') {
1715
unsigned int surrogatePair;
1716
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1717
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1718
} else
1719
return false;
1720
} else
1721
return addError("expecting another \\u token to begin the second half of "
1722
"a unicode surrogate pair",
1723
token, current);
1724
}
1725
return true;
1726
}
1727
1728
bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1729
Location end,
1730
unsigned int& ret_unicode) {
1731
if (end - current < 4)
1732
return addError(
1733
"Bad unicode escape sequence in string: four digits expected.", token,
1734
current);
1735
int unicode = 0;
1736
for (int index = 0; index < 4; ++index) {
1737
Char c = *current++;
1738
unicode *= 16;
1739
if (c >= '0' && c <= '9')
1740
unicode += c - '0';
1741
else if (c >= 'a' && c <= 'f')
1742
unicode += c - 'a' + 10;
1743
else if (c >= 'A' && c <= 'F')
1744
unicode += c - 'A' + 10;
1745
else
1746
return addError(
1747
"Bad unicode escape sequence in string: hexadecimal digit expected.",
1748
token, current);
1749
}
1750
ret_unicode = static_cast<unsigned int>(unicode);
1751
return true;
1752
}
1753
1754
bool OurReader::addError(const String& message, Token& token, Location extra) {
1755
ErrorInfo info;
1756
info.token_ = token;
1757
info.message_ = message;
1758
info.extra_ = extra;
1759
errors_.push_back(info);
1760
return false;
1761
}
1762
1763
bool OurReader::recoverFromError(TokenType skipUntilToken) {
1764
size_t errorCount = errors_.size();
1765
Token skip;
1766
for (;;) {
1767
if (!readToken(skip))
1768
errors_.resize(errorCount); // discard errors caused by recovery
1769
if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1770
break;
1771
}
1772
errors_.resize(errorCount);
1773
return false;
1774
}
1775
1776
bool OurReader::addErrorAndRecover(const String& message, Token& token,
1777
TokenType skipUntilToken) {
1778
addError(message, token);
1779
return recoverFromError(skipUntilToken);
1780
}
1781
1782
Value& OurReader::currentValue() { return *(nodes_.top()); }
1783
1784
OurReader::Char OurReader::getNextChar() {
1785
if (current_ == end_)
1786
return 0;
1787
return *current_++;
1788
}
1789
1790
void OurReader::getLocationLineAndColumn(Location location, int& line,
1791
int& column) const {
1792
Location current = begin_;
1793
Location lastLineStart = current;
1794
line = 0;
1795
while (current < location && current != end_) {
1796
Char c = *current++;
1797
if (c == '\r') {
1798
if (current != end_ && *current == '\n')
1799
++current;
1800
lastLineStart = current;
1801
++line;
1802
} else if (c == '\n') {
1803
lastLineStart = current;
1804
++line;
1805
}
1806
}
1807
// column & line start at 1
1808
column = int(location - lastLineStart) + 1;
1809
++line;
1810
}
1811
1812
String OurReader::getLocationLineAndColumn(Location location) const {
1813
int line, column;
1814
getLocationLineAndColumn(location, line, column);
1815
char buffer[18 + 16 + 16 + 1];
1816
jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1817
return buffer;
1818
}
1819
1820
String OurReader::getFormattedErrorMessages() const {
1821
String formattedMessage;
1822
for (const auto& error : errors_) {
1823
formattedMessage +=
1824
"* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1825
formattedMessage += " " + error.message_ + "\n";
1826
if (error.extra_)
1827
formattedMessage +=
1828
"See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1829
}
1830
return formattedMessage;
1831
}
1832
1833
std::vector<CharReader::StructuredError>
1834
OurReader::getStructuredErrors() const {
1835
std::vector<CharReader::StructuredError> allErrors;
1836
for (const auto& error : errors_) {
1837
CharReader::StructuredError structured;
1838
structured.offset_start = error.token_.start_ - begin_;
1839
structured.offset_limit = error.token_.end_ - begin_;
1840
structured.message = error.message_;
1841
allErrors.push_back(structured);
1842
}
1843
return allErrors;
1844
}
1845
1846
class OurCharReader : public CharReader {
1847
1848
public:
1849
OurCharReader(bool collectComments, OurFeatures const& features)
1850
: CharReader(
1851
std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1852
1853
protected:
1854
class OurImpl : public Impl {
1855
public:
1856
OurImpl(bool collectComments, OurFeatures const& features)
1857
: collectComments_(collectComments), reader_(features) {}
1858
1859
bool parse(char const* beginDoc, char const* endDoc, Value* root,
1860
String* errs) override {
1861
bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1862
if (errs) {
1863
*errs = reader_.getFormattedErrorMessages();
1864
}
1865
return ok;
1866
}
1867
1868
std::vector<CharReader::StructuredError>
1869
getStructuredErrors() const override {
1870
return reader_.getStructuredErrors();
1871
}
1872
1873
private:
1874
bool const collectComments_;
1875
OurReader reader_;
1876
};
1877
};
1878
1879
CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1880
CharReaderBuilder::~CharReaderBuilder() = default;
1881
CharReader* CharReaderBuilder::newCharReader() const {
1882
bool collectComments = settings_["collectComments"].asBool();
1883
OurFeatures features = OurFeatures::all();
1884
features.allowComments_ = settings_["allowComments"].asBool();
1885
features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1886
features.strictRoot_ = settings_["strictRoot"].asBool();
1887
features.allowDroppedNullPlaceholders_ =
1888
settings_["allowDroppedNullPlaceholders"].asBool();
1889
features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1890
features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1891
1892
// Stack limit is always a size_t, so we get this as an unsigned int
1893
// regardless of it we have 64-bit integer support enabled.
1894
features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1895
features.failIfExtra_ = settings_["failIfExtra"].asBool();
1896
features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1897
features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1898
features.skipBom_ = settings_["skipBom"].asBool();
1899
return new OurCharReader(collectComments, features);
1900
}
1901
1902
bool CharReaderBuilder::validate(Json::Value* invalid) const {
1903
static const auto& valid_keys = *new std::set<String>{
1904
"collectComments",
1905
"allowComments",
1906
"allowTrailingCommas",
1907
"strictRoot",
1908
"allowDroppedNullPlaceholders",
1909
"allowNumericKeys",
1910
"allowSingleQuotes",
1911
"stackLimit",
1912
"failIfExtra",
1913
"rejectDupKeys",
1914
"allowSpecialFloats",
1915
"skipBom",
1916
};
1917
for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1918
auto key = si.name();
1919
if (valid_keys.count(key))
1920
continue;
1921
if (invalid)
1922
(*invalid)[key] = *si;
1923
else
1924
return false;
1925
}
1926
return invalid ? invalid->empty() : true;
1927
}
1928
1929
Value& CharReaderBuilder::operator[](const String& key) {
1930
return settings_[key];
1931
}
1932
// static
1933
void CharReaderBuilder::strictMode(Json::Value* settings) {
1934
//! [CharReaderBuilderStrictMode]
1935
(*settings)["allowComments"] = false;
1936
(*settings)["allowTrailingCommas"] = false;
1937
(*settings)["strictRoot"] = true;
1938
(*settings)["allowDroppedNullPlaceholders"] = false;
1939
(*settings)["allowNumericKeys"] = false;
1940
(*settings)["allowSingleQuotes"] = false;
1941
(*settings)["stackLimit"] = 1000;
1942
(*settings)["failIfExtra"] = true;
1943
(*settings)["rejectDupKeys"] = true;
1944
(*settings)["allowSpecialFloats"] = false;
1945
(*settings)["skipBom"] = true;
1946
//! [CharReaderBuilderStrictMode]
1947
}
1948
// static
1949
void CharReaderBuilder::setDefaults(Json::Value* settings) {
1950
//! [CharReaderBuilderDefaults]
1951
(*settings)["collectComments"] = true;
1952
(*settings)["allowComments"] = true;
1953
(*settings)["allowTrailingCommas"] = true;
1954
(*settings)["strictRoot"] = false;
1955
(*settings)["allowDroppedNullPlaceholders"] = false;
1956
(*settings)["allowNumericKeys"] = false;
1957
(*settings)["allowSingleQuotes"] = false;
1958
(*settings)["stackLimit"] = 1000;
1959
(*settings)["failIfExtra"] = false;
1960
(*settings)["rejectDupKeys"] = false;
1961
(*settings)["allowSpecialFloats"] = false;
1962
(*settings)["skipBom"] = true;
1963
//! [CharReaderBuilderDefaults]
1964
}
1965
// static
1966
void CharReaderBuilder::ecma404Mode(Json::Value* settings) {
1967
//! [CharReaderBuilderECMA404Mode]
1968
(*settings)["allowComments"] = false;
1969
(*settings)["allowTrailingCommas"] = false;
1970
(*settings)["strictRoot"] = false;
1971
(*settings)["allowDroppedNullPlaceholders"] = false;
1972
(*settings)["allowNumericKeys"] = false;
1973
(*settings)["allowSingleQuotes"] = false;
1974
(*settings)["stackLimit"] = 1000;
1975
(*settings)["failIfExtra"] = true;
1976
(*settings)["rejectDupKeys"] = false;
1977
(*settings)["allowSpecialFloats"] = false;
1978
(*settings)["skipBom"] = false;
1979
//! [CharReaderBuilderECMA404Mode]
1980
}
1981
1982
std::vector<CharReader::StructuredError>
1983
CharReader::getStructuredErrors() const {
1984
return _impl->getStructuredErrors();
1985
}
1986
1987
bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1988
String* errs) {
1989
return _impl->parse(beginDoc, endDoc, root, errs);
1990
}
1991
1992
//////////////////////////////////
1993
// global functions
1994
1995
bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1996
String* errs) {
1997
OStringStream ssin;
1998
ssin << sin.rdbuf();
1999
String doc = std::move(ssin).str();
2000
char const* begin = doc.data();
2001
char const* end = begin + doc.size();
2002
// Note that we do not actually need a null-terminator.
2003
CharReaderPtr const reader(fact.newCharReader());
2004
return reader->parse(begin, end, root, errs);
2005
}
2006
2007
IStream& operator>>(IStream& sin, Value& root) {
2008
CharReaderBuilder b;
2009
String errs;
2010
bool ok = parseFromStream(b, sin, &root, &errs);
2011
if (!ok) {
2012
throwRuntimeError(errs);
2013
}
2014
return sin;
2015
}
2016
2017
} // namespace Json
2018
2019