CoCalc -- json_reader.cpp

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmjsoncpp/src/lib_json/json_reader.cpp
³¹⁵⁸ views
1
// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2
// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3
// Distributed under MIT license, or public domain if desired and
4
// recognized in your jurisdiction.
5
// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6

7
#if !defined(JSON_IS_AMALGAMATION)
8
#include "json_tool.h"
9
#include <json/assertions.h>
10
#include <json/reader.h>
11
#include <json/value.h>
12
#endif // if !defined(JSON_IS_AMALGAMATION)
13
#include <algorithm>
14
#include <cassert>
15
#include <cmath>
16
#include <cstring>
17
#include <iostream>
18
#include <istream>
19
#include <limits>
20
#include <memory>
21
#include <set>
22
#include <sstream>
23
#include <utility>
24

25
#include <cstdio>
26
#if __cplusplus >= 201103L
27

28
#if !defined(sscanf)
29
#define sscanf std::sscanf
30
#endif
31

32
#endif //__cplusplus
33

34
#if defined(_MSC_VER)
35
#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
36
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
37
#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
38
#endif //_MSC_VER
39

40
#if defined(_MSC_VER)
41
// Disable warning about strdup being deprecated.
42
#pragma warning(disable : 4996)
43
#endif
44

45
// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
46
// time to change the stack limit
47
#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
48
#define JSONCPP_DEPRECATED_STACK_LIMIT 1000
49
#endif
50

51
static size_t const stackLimit_g =
52
    JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
53

54
namespace Json {
55

56
#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
57
using CharReaderPtr = std::unique_ptr<CharReader>;
58
#else
59
using CharReaderPtr = std::auto_ptr<CharReader>;
60
#endif
61

62
// Implementation of class Features
63
// ////////////////////////////////
64

65
Features::Features() = default;
66

67
Features Features::all() { return {}; }
68

69
Features Features::strictMode() {
70
  Features features;
71
  features.allowComments_ = false;
72
  features.strictRoot_ = true;
73
  features.allowDroppedNullPlaceholders_ = false;
74
  features.allowNumericKeys_ = false;
75
  return features;
76
}
77

78
// Implementation of class Reader
79
// ////////////////////////////////
80

81
bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
82
  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
83
}
84

85
// Class Reader
86
// //////////////////////////////////////////////////////////////////
87

88
Reader::Reader() : features_(Features::all()) {}
89

90
Reader::Reader(const Features& features) : features_(features) {}
91

92
bool Reader::parse(const std::string& document, Value& root,
93
                   bool collectComments) {
94
  document_.assign(document.begin(), document.end());
95
  const char* begin = document_.c_str();
96
  const char* end = begin + document_.length();
97
  return parse(begin, end, root, collectComments);
98
}
99

100
bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
101
  // std::istream_iterator<char> begin(is);
102
  // std::istream_iterator<char> end;
103
  // Those would allow streamed input from a file, if parse() were a
104
  // template function.
105

106
  // Since String is reference-counted, this at least does not
107
  // create an extra copy.
108
  String doc(std::istreambuf_iterator<char>(is), {});
109
  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110
}
111

112
bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113
                   bool collectComments) {
114
  if (!features_.allowComments_) {
115
    collectComments = false;
116
  }
117

118
  begin_ = beginDoc;
119
  end_ = endDoc;
120
  collectComments_ = collectComments;
121
  current_ = begin_;
122
  lastValueEnd_ = nullptr;
123
  lastValue_ = nullptr;
124
  commentsBefore_.clear();
125
  errors_.clear();
126
  while (!nodes_.empty())
127
    nodes_.pop();
128
  nodes_.push(&root);
129

130
  bool successful = readValue();
131
  Token token;
132
  readTokenSkippingComments(token);
133
  if (collectComments_ && !commentsBefore_.empty())
134
    root.setComment(commentsBefore_, commentAfter);
135
  if (features_.strictRoot_) {
136
    if (!root.isArray() && !root.isObject()) {
137
      // Set error location to start of doc, ideally should be first token found
138
      // in doc
139
      token.type_ = tokenError;
140
      token.start_ = beginDoc;
141
      token.end_ = endDoc;
142
      addError(
143
          "A valid JSON document must be either an array or an object value.",
144
          token);
145
      return false;
146
    }
147
  }
148
  return successful;
149
}
150

151
bool Reader::readValue() {
152
  // readValue() may call itself only if it calls readObject() or ReadArray().
153
  // These methods execute nodes_.push() just before and nodes_.pop)() just
154
  // after calling readValue(). parse() executes one nodes_.push(), so > instead
155
  // of >=.
156
  if (nodes_.size() > stackLimit_g)
157
    throwRuntimeError("Exceeded stackLimit in readValue().");
158

159
  Token token;
160
  readTokenSkippingComments(token);
161
  bool successful = true;
162

163
  if (collectComments_ && !commentsBefore_.empty()) {
164
    currentValue().setComment(commentsBefore_, commentBefore);
165
    commentsBefore_.clear();
166
  }
167

168
  switch (token.type_) {
169
  case tokenObjectBegin:
170
    successful = readObject(token);
171
    currentValue().setOffsetLimit(current_ - begin_);
172
    break;
173
  case tokenArrayBegin:
174
    successful = readArray(token);
175
    currentValue().setOffsetLimit(current_ - begin_);
176
    break;
177
  case tokenNumber:
178
    successful = decodeNumber(token);
179
    break;
180
  case tokenString:
181
    successful = decodeString(token);
182
    break;
183
  case tokenTrue: {
184
    Value v(true);
185
    currentValue().swapPayload(v);
186
    currentValue().setOffsetStart(token.start_ - begin_);
187
    currentValue().setOffsetLimit(token.end_ - begin_);
188
  } break;
189
  case tokenFalse: {
190
    Value v(false);
191
    currentValue().swapPayload(v);
192
    currentValue().setOffsetStart(token.start_ - begin_);
193
    currentValue().setOffsetLimit(token.end_ - begin_);
194
  } break;
195
  case tokenNull: {
196
    Value v;
197
    currentValue().swapPayload(v);
198
    currentValue().setOffsetStart(token.start_ - begin_);
199
    currentValue().setOffsetLimit(token.end_ - begin_);
200
  } break;
201
  case tokenArraySeparator:
202
  case tokenObjectEnd:
203
  case tokenArrayEnd:
204
    if (features_.allowDroppedNullPlaceholders_) {
205
      // "Un-read" the current token and mark the current value as a null
206
      // token.
207
      current_--;
208
      Value v;
209
      currentValue().swapPayload(v);
210
      currentValue().setOffsetStart(current_ - begin_ - 1);
211
      currentValue().setOffsetLimit(current_ - begin_);
212
      break;
213
    } // Else, fall through...
214
  default:
215
    currentValue().setOffsetStart(token.start_ - begin_);
216
    currentValue().setOffsetLimit(token.end_ - begin_);
217
    return addError("Syntax error: value, object or array expected.", token);
218
  }
219

220
  if (collectComments_) {
221
    lastValueEnd_ = current_;
222
    lastValue_ = &currentValue();
223
  }
224

225
  return successful;
226
}
227

228
bool Reader::readTokenSkippingComments(Token& token) {
229
  bool success = readToken(token);
230
  if (features_.allowComments_) {
231
    while (success && token.type_ == tokenComment) {
232
      success = readToken(token);
233
    }
234
  }
235
  return success;
236
}
237

238
bool Reader::readToken(Token& token) {
239
  skipSpaces();
240
  token.start_ = current_;
241
  Char c = getNextChar();
242
  bool ok = true;
243
  switch (c) {
244
  case '{':
245
    token.type_ = tokenObjectBegin;
246
    break;
247
  case '}':
248
    token.type_ = tokenObjectEnd;
249
    break;
250
  case '[':
251
    token.type_ = tokenArrayBegin;
252
    break;
253
  case ']':
254
    token.type_ = tokenArrayEnd;
255
    break;
256
  case '"':
257
    token.type_ = tokenString;
258
    ok = readString();
259
    break;
260
  case '/':
261
    token.type_ = tokenComment;
262
    ok = readComment();
263
    break;
264
  case '0':
265
  case '1':
266
  case '2':
267
  case '3':
268
  case '4':
269
  case '5':
270
  case '6':
271
  case '7':
272
  case '8':
273
  case '9':
274
  case '-':
275
    token.type_ = tokenNumber;
276
    readNumber();
277
    break;
278
  case 't':
279
    token.type_ = tokenTrue;
280
    ok = match("rue", 3);
281
    break;
282
  case 'f':
283
    token.type_ = tokenFalse;
284
    ok = match("alse", 4);
285
    break;
286
  case 'n':
287
    token.type_ = tokenNull;
288
    ok = match("ull", 3);
289
    break;
290
  case ',':
291
    token.type_ = tokenArraySeparator;
292
    break;
293
  case ':':
294
    token.type_ = tokenMemberSeparator;
295
    break;
296
  case 0:
297
    token.type_ = tokenEndOfStream;
298
    break;
299
  default:
300
    ok = false;
301
    break;
302
  }
303
  if (!ok)
304
    token.type_ = tokenError;
305
  token.end_ = current_;
306
  return ok;
307
}
308

309
void Reader::skipSpaces() {
310
  while (current_ != end_) {
311
    Char c = *current_;
312
    if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313
      ++current_;
314
    else
315
      break;
316
  }
317
}
318

319
bool Reader::match(const Char* pattern, int patternLength) {
320
  if (end_ - current_ < patternLength)
321
    return false;
322
  int index = patternLength;
323
  while (index--)
324
    if (current_[index] != pattern[index])
325
      return false;
326
  current_ += patternLength;
327
  return true;
328
}
329

330
bool Reader::readComment() {
331
  Location commentBegin = current_ - 1;
332
  Char c = getNextChar();
333
  bool successful = false;
334
  if (c == '*')
335
    successful = readCStyleComment();
336
  else if (c == '/')
337
    successful = readCppStyleComment();
338
  if (!successful)
339
    return false;
340

341
  if (collectComments_) {
342
    CommentPlacement placement = commentBefore;
343
    if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344
      if (c != '*' || !containsNewLine(commentBegin, current_))
345
        placement = commentAfterOnSameLine;
346
    }
347

348
    addComment(commentBegin, current_, placement);
349
  }
350
  return true;
351
}
352

353
String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354
  String normalized;
355
  normalized.reserve(static_cast<size_t>(end - begin));
356
  Reader::Location current = begin;
357
  while (current != end) {
358
    char c = *current++;
359
    if (c == '\r') {
360
      if (current != end && *current == '\n')
361
        // convert dos EOL
362
        ++current;
363
      // convert Mac EOL
364
      normalized += '\n';
365
    } else {
366
      normalized += c;
367
    }
368
  }
369
  return normalized;
370
}
371

372
void Reader::addComment(Location begin, Location end,
373
                        CommentPlacement placement) {
374
  assert(collectComments_);
375
  const String& normalized = normalizeEOL(begin, end);
376
  if (placement == commentAfterOnSameLine) {
377
    assert(lastValue_ != nullptr);
378
    lastValue_->setComment(normalized, placement);
379
  } else {
380
    commentsBefore_ += normalized;
381
  }
382
}
383

384
bool Reader::readCStyleComment() {
385
  while ((current_ + 1) < end_) {
386
    Char c = getNextChar();
387
    if (c == '*' && *current_ == '/')
388
      break;
389
  }
390
  return getNextChar() == '/';
391
}
392

393
bool Reader::readCppStyleComment() {
394
  while (current_ != end_) {
395
    Char c = getNextChar();
396
    if (c == '\n')
397
      break;
398
    if (c == '\r') {
399
      // Consume DOS EOL. It will be normalized in addComment.
400
      if (current_ != end_ && *current_ == '\n')
401
        getNextChar();
402
      // Break on Moc OS 9 EOL.
403
      break;
404
    }
405
  }
406
  return true;
407
}
408

409
void Reader::readNumber() {
410
  Location p = current_;
411
  char c = '0'; // stopgap for already consumed character
412
  // integral part
413
  while (c >= '0' && c <= '9')
414
    c = (current_ = p) < end_ ? *p++ : '\0';
415
  // fractional part
416
  if (c == '.') {
417
    c = (current_ = p) < end_ ? *p++ : '\0';
418
    while (c >= '0' && c <= '9')
419
      c = (current_ = p) < end_ ? *p++ : '\0';
420
  }
421
  // exponential part
422
  if (c == 'e' || c == 'E') {
423
    c = (current_ = p) < end_ ? *p++ : '\0';
424
    if (c == '+' || c == '-')
425
      c = (current_ = p) < end_ ? *p++ : '\0';
426
    while (c >= '0' && c <= '9')
427
      c = (current_ = p) < end_ ? *p++ : '\0';
428
  }
429
}
430

431
bool Reader::readString() {
432
  Char c = '\0';
433
  while (current_ != end_) {
434
    c = getNextChar();
435
    if (c == '\\')
436
      getNextChar();
437
    else if (c == '"')
438
      break;
439
  }
440
  return c == '"';
441
}
442

443
bool Reader::readObject(Token& token) {
444
  Token tokenName;
445
  String name;
446
  Value init(objectValue);
447
  currentValue().swapPayload(init);
448
  currentValue().setOffsetStart(token.start_ - begin_);
449
  while (readTokenSkippingComments(tokenName)) {
450
    if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
451
      return true;
452
    name.clear();
453
    if (tokenName.type_ == tokenString) {
454
      if (!decodeString(tokenName, name))
455
        return recoverFromError(tokenObjectEnd);
456
    } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
457
      Value numberName;
458
      if (!decodeNumber(tokenName, numberName))
459
        return recoverFromError(tokenObjectEnd);
460
      name = numberName.asString();
461
    } else {
462
      break;
463
    }
464

465
    Token colon;
466
    if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
467
      return addErrorAndRecover("Missing ':' after object member name", colon,
468
                                tokenObjectEnd);
469
    }
470
    Value& value = currentValue()[name];
471
    nodes_.push(&value);
472
    bool ok = readValue();
473
    nodes_.pop();
474
    if (!ok) // error already set
475
      return recoverFromError(tokenObjectEnd);
476

477
    Token comma;
478
    if (!readTokenSkippingComments(comma) ||
479
        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
480
      return addErrorAndRecover("Missing ',' or '}' in object declaration",
481
                                comma, tokenObjectEnd);
482
    }
483
    if (comma.type_ == tokenObjectEnd)
484
      return true;
485
  }
486
  return addErrorAndRecover("Missing '}' or object member name", tokenName,
487
                            tokenObjectEnd);
488
}
489

490
bool Reader::readArray(Token& token) {
491
  Value init(arrayValue);
492
  currentValue().swapPayload(init);
493
  currentValue().setOffsetStart(token.start_ - begin_);
494
  skipSpaces();
495
  if (current_ != end_ && *current_ == ']') // empty array
496
  {
497
    Token endArray;
498
    readToken(endArray);
499
    return true;
500
  }
501
  int index = 0;
502
  for (;;) {
503
    Value& value = currentValue()[index++];
504
    nodes_.push(&value);
505
    bool ok = readValue();
506
    nodes_.pop();
507
    if (!ok) // error already set
508
      return recoverFromError(tokenArrayEnd);
509

510
    Token currentToken;
511
    // Accept Comment after last item in the array.
512
    ok = readTokenSkippingComments(currentToken);
513
    bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
514
                         currentToken.type_ != tokenArrayEnd);
515
    if (!ok || badTokenType) {
516
      return addErrorAndRecover("Missing ',' or ']' in array declaration",
517
                                currentToken, tokenArrayEnd);
518
    }
519
    if (currentToken.type_ == tokenArrayEnd)
520
      break;
521
  }
522
  return true;
523
}
524

525
bool Reader::decodeNumber(Token& token) {
526
  Value decoded;
527
  if (!decodeNumber(token, decoded))
528
    return false;
529
  currentValue().swapPayload(decoded);
530
  currentValue().setOffsetStart(token.start_ - begin_);
531
  currentValue().setOffsetLimit(token.end_ - begin_);
532
  return true;
533
}
534

535
bool Reader::decodeNumber(Token& token, Value& decoded) {
536
  // Attempts to parse the number as an integer. If the number is
537
  // larger than the maximum supported value of an integer then
538
  // we decode the number as a double.
539
  Location current = token.start_;
540
  bool isNegative = *current == '-';
541
  if (isNegative)
542
    ++current;
543
  // TODO: Help the compiler do the div and mod at compile time or get rid of
544
  // them.
545
  Value::LargestUInt maxIntegerValue =
546
      isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
547
                 : Value::maxLargestUInt;
548
  Value::LargestUInt threshold = maxIntegerValue / 10;
549
  Value::LargestUInt value = 0;
550
  while (current < token.end_) {
551
    Char c = *current++;
552
    if (c < '0' || c > '9')
553
      return decodeDouble(token, decoded);
554
    auto digit(static_cast<Value::UInt>(c - '0'));
555
    if (value >= threshold) {
556
      // We've hit or exceeded the max value divided by 10 (rounded down). If
557
      // a) we've only just touched the limit, b) this is the last digit, and
558
      // c) it's small enough to fit in that rounding delta, we're okay.
559
      // Otherwise treat this number as a double to avoid overflow.
560
      if (value > threshold || current != token.end_ ||
561
          digit > maxIntegerValue % 10) {
562
        return decodeDouble(token, decoded);
563
      }
564
    }
565
    value = value * 10 + digit;
566
  }
567
  if (isNegative && value == maxIntegerValue)
568
    decoded = Value::minLargestInt;
569
  else if (isNegative)
570
    decoded = -Value::LargestInt(value);
571
  else if (value <= Value::LargestUInt(Value::maxInt))
572
    decoded = Value::LargestInt(value);
573
  else
574
    decoded = value;
575
  return true;
576
}
577

578
bool Reader::decodeDouble(Token& token) {
579
  Value decoded;
580
  if (!decodeDouble(token, decoded))
581
    return false;
582
  currentValue().swapPayload(decoded);
583
  currentValue().setOffsetStart(token.start_ - begin_);
584
  currentValue().setOffsetLimit(token.end_ - begin_);
585
  return true;
586
}
587

588
bool Reader::decodeDouble(Token& token, Value& decoded) {
589
  double value = 0;
590
  IStringStream is(String(token.start_, token.end_));
591
  if (!(is >> value)) {
592
    if (value == std::numeric_limits<double>::max())
593
      value = std::numeric_limits<double>::infinity();
594
    else if (value == std::numeric_limits<double>::lowest())
595
      value = -std::numeric_limits<double>::infinity();
596
    else if (!std::isinf(value))
597
      return addError(
598
          "'" + String(token.start_, token.end_) + "' is not a number.", token);
599
  }
600
  decoded = value;
601
  return true;
602
}
603

604
bool Reader::decodeString(Token& token) {
605
  String decoded_string;
606
  if (!decodeString(token, decoded_string))
607
    return false;
608
  Value decoded(decoded_string);
609
  currentValue().swapPayload(decoded);
610
  currentValue().setOffsetStart(token.start_ - begin_);
611
  currentValue().setOffsetLimit(token.end_ - begin_);
612
  return true;
613
}
614

615
bool Reader::decodeString(Token& token, String& decoded) {
616
  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
617
  Location current = token.start_ + 1; // skip '"'
618
  Location end = token.end_ - 1;       // do not include '"'
619
  while (current != end) {
620
    Char c = *current++;
621
    if (c == '"')
622
      break;
623
    if (c == '\\') {
624
      if (current == end)
625
        return addError("Empty escape sequence in string", token, current);
626
      Char escape = *current++;
627
      switch (escape) {
628
      case '"':
629
        decoded += '"';
630
        break;
631
      case '/':
632
        decoded += '/';
633
        break;
634
      case '\\':
635
        decoded += '\\';
636
        break;
637
      case 'b':
638
        decoded += '\b';
639
        break;
640
      case 'f':
641
        decoded += '\f';
642
        break;
643
      case 'n':
644
        decoded += '\n';
645
        break;
646
      case 'r':
647
        decoded += '\r';
648
        break;
649
      case 't':
650
        decoded += '\t';
651
        break;
652
      case 'u': {
653
        unsigned int unicode;
654
        if (!decodeUnicodeCodePoint(token, current, end, unicode))
655
          return false;
656
        decoded += codePointToUTF8(unicode);
657
      } break;
658
      default:
659
        return addError("Bad escape sequence in string", token, current);
660
      }
661
    } else {
662
      decoded += c;
663
    }
664
  }
665
  return true;
666
}
667

668
bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
669
                                    Location end, unsigned int& unicode) {
670

671
  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
672
    return false;
673
  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
674
    // surrogate pairs
675
    if (end - current < 6)
676
      return addError(
677
          "additional six characters expected to parse unicode surrogate pair.",
678
          token, current);
679
    if (*(current++) == '\\' && *(current++) == 'u') {
680
      unsigned int surrogatePair;
681
      if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
682
        unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
683
      } else
684
        return false;
685
    } else
686
      return addError("expecting another \\u token to begin the second half of "
687
                      "a unicode surrogate pair",
688
                      token, current);
689
  }
690
  return true;
691
}
692

693
bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
694
                                         Location end,
695
                                         unsigned int& ret_unicode) {
696
  if (end - current < 4)
697
    return addError(
698
        "Bad unicode escape sequence in string: four digits expected.", token,
699
        current);
700
  int unicode = 0;
701
  for (int index = 0; index < 4; ++index) {
702
    Char c = *current++;
703
    unicode *= 16;
704
    if (c >= '0' && c <= '9')
705
      unicode += c - '0';
706
    else if (c >= 'a' && c <= 'f')
707
      unicode += c - 'a' + 10;
708
    else if (c >= 'A' && c <= 'F')
709
      unicode += c - 'A' + 10;
710
    else
711
      return addError(
712
          "Bad unicode escape sequence in string: hexadecimal digit expected.",
713
          token, current);
714
  }
715
  ret_unicode = static_cast<unsigned int>(unicode);
716
  return true;
717
}
718

719
bool Reader::addError(const String& message, Token& token, Location extra) {
720
  ErrorInfo info;
721
  info.token_ = token;
722
  info.message_ = message;
723
  info.extra_ = extra;
724
  errors_.push_back(info);
725
  return false;
726
}
727

728
bool Reader::recoverFromError(TokenType skipUntilToken) {
729
  size_t const errorCount = errors_.size();
730
  Token skip;
731
  for (;;) {
732
    if (!readToken(skip))
733
      errors_.resize(errorCount); // discard errors caused by recovery
734
    if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
735
      break;
736
  }
737
  errors_.resize(errorCount);
738
  return false;
739
}
740

741
bool Reader::addErrorAndRecover(const String& message, Token& token,
742
                                TokenType skipUntilToken) {
743
  addError(message, token);
744
  return recoverFromError(skipUntilToken);
745
}
746

747
Value& Reader::currentValue() { return *(nodes_.top()); }
748

749
Reader::Char Reader::getNextChar() {
750
  if (current_ == end_)
751
    return 0;
752
  return *current_++;
753
}
754

755
void Reader::getLocationLineAndColumn(Location location, int& line,
756
                                      int& column) const {
757
  Location current = begin_;
758
  Location lastLineStart = current;
759
  line = 0;
760
  while (current < location && current != end_) {
761
    Char c = *current++;
762
    if (c == '\r') {
763
      if (current != end_ && *current == '\n')
764
        ++current;
765
      lastLineStart = current;
766
      ++line;
767
    } else if (c == '\n') {
768
      lastLineStart = current;
769
      ++line;
770
    }
771
  }
772
  // column & line start at 1
773
  column = int(location - lastLineStart) + 1;
774
  ++line;
775
}
776

777
String Reader::getLocationLineAndColumn(Location location) const {
778
  int line, column;
779
  getLocationLineAndColumn(location, line, column);
780
  char buffer[18 + 16 + 16 + 1];
781
  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
782
  return buffer;
783
}
784

785
// Deprecated. Preserved for backward compatibility
786
String Reader::getFormatedErrorMessages() const {
787
  return getFormattedErrorMessages();
788
}
789

790
String Reader::getFormattedErrorMessages() const {
791
  String formattedMessage;
792
  for (const auto& error : errors_) {
793
    formattedMessage +=
794
        "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
795
    formattedMessage += "  " + error.message_ + "\n";
796
    if (error.extra_)
797
      formattedMessage +=
798
          "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
799
  }
800
  return formattedMessage;
801
}
802

803
std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
804
  std::vector<Reader::StructuredError> allErrors;
805
  for (const auto& error : errors_) {
806
    Reader::StructuredError structured;
807
    structured.offset_start = error.token_.start_ - begin_;
808
    structured.offset_limit = error.token_.end_ - begin_;
809
    structured.message = error.message_;
810
    allErrors.push_back(structured);
811
  }
812
  return allErrors;
813
}
814

815
bool Reader::pushError(const Value& value, const String& message) {
816
  ptrdiff_t const length = end_ - begin_;
817
  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
818
    return false;
819
  Token token;
820
  token.type_ = tokenError;
821
  token.start_ = begin_ + value.getOffsetStart();
822
  token.end_ = begin_ + value.getOffsetLimit();
823
  ErrorInfo info;
824
  info.token_ = token;
825
  info.message_ = message;
826
  info.extra_ = nullptr;
827
  errors_.push_back(info);
828
  return true;
829
}
830

831
bool Reader::pushError(const Value& value, const String& message,
832
                       const Value& extra) {
833
  ptrdiff_t const length = end_ - begin_;
834
  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
835
      extra.getOffsetLimit() > length)
836
    return false;
837
  Token token;
838
  token.type_ = tokenError;
839
  token.start_ = begin_ + value.getOffsetStart();
840
  token.end_ = begin_ + value.getOffsetLimit();
841
  ErrorInfo info;
842
  info.token_ = token;
843
  info.message_ = message;
844
  info.extra_ = begin_ + extra.getOffsetStart();
845
  errors_.push_back(info);
846
  return true;
847
}
848

849
bool Reader::good() const { return errors_.empty(); }
850

851
// Originally copied from the Features class (now deprecated), used internally
852
// for features implementation.
853
class OurFeatures {
854
public:
855
  static OurFeatures all();
856
  bool allowComments_;
857
  bool allowTrailingCommas_;
858
  bool strictRoot_;
859
  bool allowDroppedNullPlaceholders_;
860
  bool allowNumericKeys_;
861
  bool allowSingleQuotes_;
862
  bool failIfExtra_;
863
  bool rejectDupKeys_;
864
  bool allowSpecialFloats_;
865
  bool skipBom_;
866
  size_t stackLimit_;
867
}; // OurFeatures
868

869
OurFeatures OurFeatures::all() { return {}; }
870

871
// Implementation of class Reader
872
// ////////////////////////////////
873

874
// Originally copied from the Reader class (now deprecated), used internally
875
// for implementing JSON reading.
876
class OurReader {
877
public:
878
  using Char = char;
879
  using Location = const Char*;
880

881
  explicit OurReader(OurFeatures const& features);
882
  bool parse(const char* beginDoc, const char* endDoc, Value& root,
883
             bool collectComments = true);
884
  String getFormattedErrorMessages() const;
885
  std::vector<CharReader::StructuredError> getStructuredErrors() const;
886

887
private:
888
  OurReader(OurReader const&);      // no impl
889
  void operator=(OurReader const&); // no impl
890

891
  enum TokenType {
892
    tokenEndOfStream = 0,
893
    tokenObjectBegin,
894
    tokenObjectEnd,
895
    tokenArrayBegin,
896
    tokenArrayEnd,
897
    tokenString,
898
    tokenNumber,
899
    tokenTrue,
900
    tokenFalse,
901
    tokenNull,
902
    tokenNaN,
903
    tokenPosInf,
904
    tokenNegInf,
905
    tokenArraySeparator,
906
    tokenMemberSeparator,
907
    tokenComment,
908
    tokenError
909
  };
910

911
  class Token {
912
  public:
913
    TokenType type_;
914
    Location start_;
915
    Location end_;
916
  };
917

918
  class ErrorInfo {
919
  public:
920
    Token token_;
921
    String message_;
922
    Location extra_;
923
  };
924

925
  using Errors = std::deque<ErrorInfo>;
926

927
  bool readToken(Token& token);
928
  bool readTokenSkippingComments(Token& token);
929
  void skipSpaces();
930
  void skipBom(bool skipBom);
931
  bool match(const Char* pattern, int patternLength);
932
  bool readComment();
933
  bool readCStyleComment(bool* containsNewLineResult);
934
  bool readCppStyleComment();
935
  bool readString();
936
  bool readStringSingleQuote();
937
  bool readNumber(bool checkInf);
938
  bool readValue();
939
  bool readObject(Token& token);
940
  bool readArray(Token& token);
941
  bool decodeNumber(Token& token);
942
  bool decodeNumber(Token& token, Value& decoded);
943
  bool decodeString(Token& token);
944
  bool decodeString(Token& token, String& decoded);
945
  bool decodeDouble(Token& token);
946
  bool decodeDouble(Token& token, Value& decoded);
947
  bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
948
                              unsigned int& unicode);
949
  bool decodeUnicodeEscapeSequence(Token& token, Location& current,
950
                                   Location end, unsigned int& unicode);
951
  bool addError(const String& message, Token& token, Location extra = nullptr);
952
  bool recoverFromError(TokenType skipUntilToken);
953
  bool addErrorAndRecover(const String& message, Token& token,
954
                          TokenType skipUntilToken);
955
  void skipUntilSpace();
956
  Value& currentValue();
957
  Char getNextChar();
958
  void getLocationLineAndColumn(Location location, int& line,
959
                                int& column) const;
960
  String getLocationLineAndColumn(Location location) const;
961
  void addComment(Location begin, Location end, CommentPlacement placement);
962

963
  static String normalizeEOL(Location begin, Location end);
964
  static bool containsNewLine(Location begin, Location end);
965

966
  using Nodes = std::stack<Value*>;
967

968
  Nodes nodes_{};
969
  Errors errors_{};
970
  String document_{};
971
  Location begin_ = nullptr;
972
  Location end_ = nullptr;
973
  Location current_ = nullptr;
974
  Location lastValueEnd_ = nullptr;
975
  Value* lastValue_ = nullptr;
976
  bool lastValueHasAComment_ = false;
977
  String commentsBefore_{};
978

979
  OurFeatures const features_;
980
  bool collectComments_ = false;
981
}; // OurReader
982

983
// complete copy of Read impl, for OurReader
984

985
bool OurReader::containsNewLine(OurReader::Location begin,
986
                                OurReader::Location end) {
987
  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
988
}
989

990
OurReader::OurReader(OurFeatures const& features) : features_(features) {}
991

992
bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
993
                      bool collectComments) {
994
  if (!features_.allowComments_) {
995
    collectComments = false;
996
  }
997

998
  begin_ = beginDoc;
999
  end_ = endDoc;
1000
  collectComments_ = collectComments;
1001
  current_ = begin_;
1002
  lastValueEnd_ = nullptr;
1003
  lastValue_ = nullptr;
1004
  commentsBefore_.clear();
1005
  errors_.clear();
1006
  while (!nodes_.empty())
1007
    nodes_.pop();
1008
  nodes_.push(&root);
1009

1010
  // skip byte order mark if it exists at the beginning of the UTF-8 text.
1011
  skipBom(features_.skipBom_);
1012
  bool successful = readValue();
1013
  nodes_.pop();
1014
  Token token;
1015
  readTokenSkippingComments(token);
1016
  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1017
    addError("Extra non-whitespace after JSON value.", token);
1018
    return false;
1019
  }
1020
  if (collectComments_ && !commentsBefore_.empty())
1021
    root.setComment(commentsBefore_, commentAfter);
1022
  if (features_.strictRoot_) {
1023
    if (!root.isArray() && !root.isObject()) {
1024
      // Set error location to start of doc, ideally should be first token found
1025
      // in doc
1026
      token.type_ = tokenError;
1027
      token.start_ = beginDoc;
1028
      token.end_ = endDoc;
1029
      addError(
1030
          "A valid JSON document must be either an array or an object value.",
1031
          token);
1032
      return false;
1033
    }
1034
  }
1035
  return successful;
1036
}
1037

1038
bool OurReader::readValue() {
1039
  //  To preserve the old behaviour we cast size_t to int.
1040
  if (nodes_.size() > features_.stackLimit_)
1041
    throwRuntimeError("Exceeded stackLimit in readValue().");
1042
  Token token;
1043
  readTokenSkippingComments(token);
1044
  bool successful = true;
1045

1046
  if (collectComments_ && !commentsBefore_.empty()) {
1047
    currentValue().setComment(commentsBefore_, commentBefore);
1048
    commentsBefore_.clear();
1049
  }
1050

1051
  switch (token.type_) {
1052
  case tokenObjectBegin:
1053
    successful = readObject(token);
1054
    currentValue().setOffsetLimit(current_ - begin_);
1055
    break;
1056
  case tokenArrayBegin:
1057
    successful = readArray(token);
1058
    currentValue().setOffsetLimit(current_ - begin_);
1059
    break;
1060
  case tokenNumber:
1061
    successful = decodeNumber(token);
1062
    break;
1063
  case tokenString:
1064
    successful = decodeString(token);
1065
    break;
1066
  case tokenTrue: {
1067
    Value v(true);
1068
    currentValue().swapPayload(v);
1069
    currentValue().setOffsetStart(token.start_ - begin_);
1070
    currentValue().setOffsetLimit(token.end_ - begin_);
1071
  } break;
1072
  case tokenFalse: {
1073
    Value v(false);
1074
    currentValue().swapPayload(v);
1075
    currentValue().setOffsetStart(token.start_ - begin_);
1076
    currentValue().setOffsetLimit(token.end_ - begin_);
1077
  } break;
1078
  case tokenNull: {
1079
    Value v;
1080
    currentValue().swapPayload(v);
1081
    currentValue().setOffsetStart(token.start_ - begin_);
1082
    currentValue().setOffsetLimit(token.end_ - begin_);
1083
  } break;
1084
  case tokenNaN: {
1085
    Value v(std::numeric_limits<double>::quiet_NaN());
1086
    currentValue().swapPayload(v);
1087
    currentValue().setOffsetStart(token.start_ - begin_);
1088
    currentValue().setOffsetLimit(token.end_ - begin_);
1089
  } break;
1090
  case tokenPosInf: {
1091
    Value v(std::numeric_limits<double>::infinity());
1092
    currentValue().swapPayload(v);
1093
    currentValue().setOffsetStart(token.start_ - begin_);
1094
    currentValue().setOffsetLimit(token.end_ - begin_);
1095
  } break;
1096
  case tokenNegInf: {
1097
    Value v(-std::numeric_limits<double>::infinity());
1098
    currentValue().swapPayload(v);
1099
    currentValue().setOffsetStart(token.start_ - begin_);
1100
    currentValue().setOffsetLimit(token.end_ - begin_);
1101
  } break;
1102
  case tokenArraySeparator:
1103
  case tokenObjectEnd:
1104
  case tokenArrayEnd:
1105
    if (features_.allowDroppedNullPlaceholders_) {
1106
      // "Un-read" the current token and mark the current value as a null
1107
      // token.
1108
      current_--;
1109
      Value v;
1110
      currentValue().swapPayload(v);
1111
      currentValue().setOffsetStart(current_ - begin_ - 1);
1112
      currentValue().setOffsetLimit(current_ - begin_);
1113
      break;
1114
    } // else, fall through ...
1115
  default:
1116
    currentValue().setOffsetStart(token.start_ - begin_);
1117
    currentValue().setOffsetLimit(token.end_ - begin_);
1118
    return addError("Syntax error: value, object or array expected.", token);
1119
  }
1120

1121
  if (collectComments_) {
1122
    lastValueEnd_ = current_;
1123
    lastValueHasAComment_ = false;
1124
    lastValue_ = &currentValue();
1125
  }
1126

1127
  return successful;
1128
}
1129

1130
bool OurReader::readTokenSkippingComments(Token& token) {
1131
  bool success = readToken(token);
1132
  if (features_.allowComments_) {
1133
    while (success && token.type_ == tokenComment) {
1134
      success = readToken(token);
1135
    }
1136
  }
1137
  return success;
1138
}
1139

1140
bool OurReader::readToken(Token& token) {
1141
  skipSpaces();
1142
  token.start_ = current_;
1143
  Char c = getNextChar();
1144
  bool ok = true;
1145
  switch (c) {
1146
  case '{':
1147
    token.type_ = tokenObjectBegin;
1148
    break;
1149
  case '}':
1150
    token.type_ = tokenObjectEnd;
1151
    break;
1152
  case '[':
1153
    token.type_ = tokenArrayBegin;
1154
    break;
1155
  case ']':
1156
    token.type_ = tokenArrayEnd;
1157
    break;
1158
  case '"':
1159
    token.type_ = tokenString;
1160
    ok = readString();
1161
    break;
1162
  case '\'':
1163
    if (features_.allowSingleQuotes_) {
1164
      token.type_ = tokenString;
1165
      ok = readStringSingleQuote();
1166
    } else {
1167
      // If we don't allow single quotes, this is a failure case.
1168
      ok = false;
1169
    }
1170
    break;
1171
  case '/':
1172
    token.type_ = tokenComment;
1173
    ok = readComment();
1174
    break;
1175
  case '0':
1176
  case '1':
1177
  case '2':
1178
  case '3':
1179
  case '4':
1180
  case '5':
1181
  case '6':
1182
  case '7':
1183
  case '8':
1184
  case '9':
1185
    token.type_ = tokenNumber;
1186
    readNumber(false);
1187
    break;
1188
  case '-':
1189
    if (readNumber(true)) {
1190
      token.type_ = tokenNumber;
1191
    } else {
1192
      token.type_ = tokenNegInf;
1193
      ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1194
    }
1195
    break;
1196
  case '+':
1197
    if (readNumber(true)) {
1198
      token.type_ = tokenNumber;
1199
    } else {
1200
      token.type_ = tokenPosInf;
1201
      ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1202
    }
1203
    break;
1204
  case 't':
1205
    token.type_ = tokenTrue;
1206
    ok = match("rue", 3);
1207
    break;
1208
  case 'f':
1209
    token.type_ = tokenFalse;
1210
    ok = match("alse", 4);
1211
    break;
1212
  case 'n':
1213
    token.type_ = tokenNull;
1214
    ok = match("ull", 3);
1215
    break;
1216
  case 'N':
1217
    if (features_.allowSpecialFloats_) {
1218
      token.type_ = tokenNaN;
1219
      ok = match("aN", 2);
1220
    } else {
1221
      ok = false;
1222
    }
1223
    break;
1224
  case 'I':
1225
    if (features_.allowSpecialFloats_) {
1226
      token.type_ = tokenPosInf;
1227
      ok = match("nfinity", 7);
1228
    } else {
1229
      ok = false;
1230
    }
1231
    break;
1232
  case ',':
1233
    token.type_ = tokenArraySeparator;
1234
    break;
1235
  case ':':
1236
    token.type_ = tokenMemberSeparator;
1237
    break;
1238
  case 0:
1239
    token.type_ = tokenEndOfStream;
1240
    break;
1241
  default:
1242
    ok = false;
1243
    break;
1244
  }
1245
  if (!ok)
1246
    token.type_ = tokenError;
1247
  token.end_ = current_;
1248
  return ok;
1249
}
1250

1251
void OurReader::skipSpaces() {
1252
  while (current_ != end_) {
1253
    Char c = *current_;
1254
    if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1255
      ++current_;
1256
    else
1257
      break;
1258
  }
1259
}
1260

1261
void OurReader::skipBom(bool skipBom) {
1262
  // The default behavior is to skip BOM.
1263
  if (skipBom) {
1264
    if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1265
      begin_ += 3;
1266
      current_ = begin_;
1267
    }
1268
  }
1269
}
1270

1271
bool OurReader::match(const Char* pattern, int patternLength) {
1272
  if (end_ - current_ < patternLength)
1273
    return false;
1274
  int index = patternLength;
1275
  while (index--)
1276
    if (current_[index] != pattern[index])
1277
      return false;
1278
  current_ += patternLength;
1279
  return true;
1280
}
1281

1282
bool OurReader::readComment() {
1283
  const Location commentBegin = current_ - 1;
1284
  const Char c = getNextChar();
1285
  bool successful = false;
1286
  bool cStyleWithEmbeddedNewline = false;
1287

1288
  const bool isCStyleComment = (c == '*');
1289
  const bool isCppStyleComment = (c == '/');
1290
  if (isCStyleComment) {
1291
    successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1292
  } else if (isCppStyleComment) {
1293
    successful = readCppStyleComment();
1294
  }
1295

1296
  if (!successful)
1297
    return false;
1298

1299
  if (collectComments_) {
1300
    CommentPlacement placement = commentBefore;
1301

1302
    if (!lastValueHasAComment_) {
1303
      if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1304
        if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1305
          placement = commentAfterOnSameLine;
1306
          lastValueHasAComment_ = true;
1307
        }
1308
      }
1309
    }
1310

1311
    addComment(commentBegin, current_, placement);
1312
  }
1313
  return true;
1314
}
1315

1316
String OurReader::normalizeEOL(OurReader::Location begin,
1317
                               OurReader::Location end) {
1318
  String normalized;
1319
  normalized.reserve(static_cast<size_t>(end - begin));
1320
  OurReader::Location current = begin;
1321
  while (current != end) {
1322
    char c = *current++;
1323
    if (c == '\r') {
1324
      if (current != end && *current == '\n')
1325
        // convert dos EOL
1326
        ++current;
1327
      // convert Mac EOL
1328
      normalized += '\n';
1329
    } else {
1330
      normalized += c;
1331
    }
1332
  }
1333
  return normalized;
1334
}
1335

1336
void OurReader::addComment(Location begin, Location end,
1337
                           CommentPlacement placement) {
1338
  assert(collectComments_);
1339
  const String& normalized = normalizeEOL(begin, end);
1340
  if (placement == commentAfterOnSameLine) {
1341
    assert(lastValue_ != nullptr);
1342
    lastValue_->setComment(normalized, placement);
1343
  } else {
1344
    commentsBefore_ += normalized;
1345
  }
1346
}
1347

1348
bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1349
  *containsNewLineResult = false;
1350

1351
  while ((current_ + 1) < end_) {
1352
    Char c = getNextChar();
1353
    if (c == '*' && *current_ == '/')
1354
      break;
1355
    if (c == '\n')
1356
      *containsNewLineResult = true;
1357
  }
1358

1359
  return getNextChar() == '/';
1360
}
1361

1362
bool OurReader::readCppStyleComment() {
1363
  while (current_ != end_) {
1364
    Char c = getNextChar();
1365
    if (c == '\n')
1366
      break;
1367
    if (c == '\r') {
1368
      // Consume DOS EOL. It will be normalized in addComment.
1369
      if (current_ != end_ && *current_ == '\n')
1370
        getNextChar();
1371
      // Break on Moc OS 9 EOL.
1372
      break;
1373
    }
1374
  }
1375
  return true;
1376
}
1377

1378
bool OurReader::readNumber(bool checkInf) {
1379
  Location p = current_;
1380
  if (checkInf && p != end_ && *p == 'I') {
1381
    current_ = ++p;
1382
    return false;
1383
  }
1384
  char c = '0'; // stopgap for already consumed character
1385
  // integral part
1386
  while (c >= '0' && c <= '9')
1387
    c = (current_ = p) < end_ ? *p++ : '\0';
1388
  // fractional part
1389
  if (c == '.') {
1390
    c = (current_ = p) < end_ ? *p++ : '\0';
1391
    while (c >= '0' && c <= '9')
1392
      c = (current_ = p) < end_ ? *p++ : '\0';
1393
  }
1394
  // exponential part
1395
  if (c == 'e' || c == 'E') {
1396
    c = (current_ = p) < end_ ? *p++ : '\0';
1397
    if (c == '+' || c == '-')
1398
      c = (current_ = p) < end_ ? *p++ : '\0';
1399
    while (c >= '0' && c <= '9')
1400
      c = (current_ = p) < end_ ? *p++ : '\0';
1401
  }
1402
  return true;
1403
}
1404
bool OurReader::readString() {
1405
  Char c = 0;
1406
  while (current_ != end_) {
1407
    c = getNextChar();
1408
    if (c == '\\')
1409
      getNextChar();
1410
    else if (c == '"')
1411
      break;
1412
  }
1413
  return c == '"';
1414
}
1415

1416
bool OurReader::readStringSingleQuote() {
1417
  Char c = 0;
1418
  while (current_ != end_) {
1419
    c = getNextChar();
1420
    if (c == '\\')
1421
      getNextChar();
1422
    else if (c == '\'')
1423
      break;
1424
  }
1425
  return c == '\'';
1426
}
1427

1428
bool OurReader::readObject(Token& token) {
1429
  Token tokenName;
1430
  String name;
1431
  Value init(objectValue);
1432
  currentValue().swapPayload(init);
1433
  currentValue().setOffsetStart(token.start_ - begin_);
1434
  while (readTokenSkippingComments(tokenName)) {
1435
    if (tokenName.type_ == tokenObjectEnd &&
1436
        (name.empty() ||
1437
         features_.allowTrailingCommas_)) // empty object or trailing comma
1438
      return true;
1439
    name.clear();
1440
    if (tokenName.type_ == tokenString) {
1441
      if (!decodeString(tokenName, name))
1442
        return recoverFromError(tokenObjectEnd);
1443
    } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1444
      Value numberName;
1445
      if (!decodeNumber(tokenName, numberName))
1446
        return recoverFromError(tokenObjectEnd);
1447
      name = numberName.asString();
1448
    } else {
1449
      break;
1450
    }
1451
    if (name.length() >= (1U << 30))
1452
      throwRuntimeError("keylength >= 2^30");
1453
    if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1454
      String msg = "Duplicate key: '" + name + "'";
1455
      return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1456
    }
1457

1458
    Token colon;
1459
    if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1460
      return addErrorAndRecover("Missing ':' after object member name", colon,
1461
                                tokenObjectEnd);
1462
    }
1463
    Value& value = currentValue()[name];
1464
    nodes_.push(&value);
1465
    bool ok = readValue();
1466
    nodes_.pop();
1467
    if (!ok) // error already set
1468
      return recoverFromError(tokenObjectEnd);
1469

1470
    Token comma;
1471
    if (!readTokenSkippingComments(comma) ||
1472
        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1473
      return addErrorAndRecover("Missing ',' or '}' in object declaration",
1474
                                comma, tokenObjectEnd);
1475
    }
1476
    if (comma.type_ == tokenObjectEnd)
1477
      return true;
1478
  }
1479
  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1480
                            tokenObjectEnd);
1481
}
1482

1483
bool OurReader::readArray(Token& token) {
1484
  Value init(arrayValue);
1485
  currentValue().swapPayload(init);
1486
  currentValue().setOffsetStart(token.start_ - begin_);
1487
  int index = 0;
1488
  for (;;) {
1489
    skipSpaces();
1490
    if (current_ != end_ && *current_ == ']' &&
1491
        (index == 0 ||
1492
         (features_.allowTrailingCommas_ &&
1493
          !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1494
                                                      // comma
1495
    {
1496
      Token endArray;
1497
      readToken(endArray);
1498
      return true;
1499
    }
1500
    Value& value = currentValue()[index++];
1501
    nodes_.push(&value);
1502
    bool ok = readValue();
1503
    nodes_.pop();
1504
    if (!ok) // error already set
1505
      return recoverFromError(tokenArrayEnd);
1506

1507
    Token currentToken;
1508
    // Accept Comment after last item in the array.
1509
    ok = readTokenSkippingComments(currentToken);
1510
    bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1511
                         currentToken.type_ != tokenArrayEnd);
1512
    if (!ok || badTokenType) {
1513
      return addErrorAndRecover("Missing ',' or ']' in array declaration",
1514
                                currentToken, tokenArrayEnd);
1515
    }
1516
    if (currentToken.type_ == tokenArrayEnd)
1517
      break;
1518
  }
1519
  return true;
1520
}
1521

1522
bool OurReader::decodeNumber(Token& token) {
1523
  Value decoded;
1524
  if (!decodeNumber(token, decoded))
1525
    return false;
1526
  currentValue().swapPayload(decoded);
1527
  currentValue().setOffsetStart(token.start_ - begin_);
1528
  currentValue().setOffsetLimit(token.end_ - begin_);
1529
  return true;
1530
}
1531

1532
bool OurReader::decodeNumber(Token& token, Value& decoded) {
1533
  // Attempts to parse the number as an integer. If the number is
1534
  // larger than the maximum supported value of an integer then
1535
  // we decode the number as a double.
1536
  Location current = token.start_;
1537
  const bool isNegative = *current == '-';
1538
  if (isNegative) {
1539
    ++current;
1540
  }
1541

1542
  // We assume we can represent the largest and smallest integer types as
1543
  // unsigned integers with separate sign. This is only true if they can fit
1544
  // into an unsigned integer.
1545
  static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1546
                "Int must be smaller than UInt");
1547

1548
  // We need to convert minLargestInt into a positive number. The easiest way
1549
  // to do this conversion is to assume our "threshold" value of minLargestInt
1550
  // divided by 10 can fit in maxLargestInt when absolute valued. This should
1551
  // be a safe assumption.
1552
  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1553
                "The absolute value of minLargestInt must be greater than or "
1554
                "equal to maxLargestInt");
1555
  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1556
                "The absolute value of minLargestInt must be only 1 magnitude "
1557
                "larger than maxLargest Int");
1558

1559
  static constexpr Value::LargestUInt positive_threshold =
1560
      Value::maxLargestUInt / 10;
1561
  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1562

1563
  // For the negative values, we have to be more careful. Since typically
1564
  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1565
  // then take the inverse. This assumes that minLargestInt is only a single
1566
  // power of 10 different in magnitude, which we check above. For the last
1567
  // digit, we take the modulus before negating for the same reason.
1568
  static constexpr auto negative_threshold =
1569
      Value::LargestUInt(-(Value::minLargestInt / 10));
1570
  static constexpr auto negative_last_digit =
1571
      Value::UInt(-(Value::minLargestInt % 10));
1572

1573
  const Value::LargestUInt threshold =
1574
      isNegative ? negative_threshold : positive_threshold;
1575
  const Value::UInt max_last_digit =
1576
      isNegative ? negative_last_digit : positive_last_digit;
1577

1578
  Value::LargestUInt value = 0;
1579
  while (current < token.end_) {
1580
    Char c = *current++;
1581
    if (c < '0' || c > '9')
1582
      return decodeDouble(token, decoded);
1583

1584
    const auto digit(static_cast<Value::UInt>(c - '0'));
1585
    if (value >= threshold) {
1586
      // We've hit or exceeded the max value divided by 10 (rounded down). If
1587
      // a) we've only just touched the limit, meaning value == threshold,
1588
      // b) this is the last digit, or
1589
      // c) it's small enough to fit in that rounding delta, we're okay.
1590
      // Otherwise treat this number as a double to avoid overflow.
1591
      if (value > threshold || current != token.end_ ||
1592
          digit > max_last_digit) {
1593
        return decodeDouble(token, decoded);
1594
      }
1595
    }
1596
    value = value * 10 + digit;
1597
  }
1598

1599
  if (isNegative) {
1600
    // We use the same magnitude assumption here, just in case.
1601
    const auto last_digit = static_cast<Value::UInt>(value % 10);
1602
    decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1603
  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1604
    decoded = Value::LargestInt(value);
1605
  } else {
1606
    decoded = value;
1607
  }
1608

1609
  return true;
1610
}
1611

1612
bool OurReader::decodeDouble(Token& token) {
1613
  Value decoded;
1614
  if (!decodeDouble(token, decoded))
1615
    return false;
1616
  currentValue().swapPayload(decoded);
1617
  currentValue().setOffsetStart(token.start_ - begin_);
1618
  currentValue().setOffsetLimit(token.end_ - begin_);
1619
  return true;
1620
}
1621

1622
bool OurReader::decodeDouble(Token& token, Value& decoded) {
1623
  double value = 0;
1624
  IStringStream is(String(token.start_, token.end_));
1625
  if (!(is >> value)) {
1626
    if (value == std::numeric_limits<double>::max())
1627
      value = std::numeric_limits<double>::infinity();
1628
    else if (value == std::numeric_limits<double>::lowest())
1629
      value = -std::numeric_limits<double>::infinity();
1630
    else if (!std::isinf(value))
1631
      return addError(
1632
          "'" + String(token.start_, token.end_) + "' is not a number.", token);
1633
  }
1634
  decoded = value;
1635
  return true;
1636
}
1637

1638
bool OurReader::decodeString(Token& token) {
1639
  String decoded_string;
1640
  if (!decodeString(token, decoded_string))
1641
    return false;
1642
  Value decoded(decoded_string);
1643
  currentValue().swapPayload(decoded);
1644
  currentValue().setOffsetStart(token.start_ - begin_);
1645
  currentValue().setOffsetLimit(token.end_ - begin_);
1646
  return true;
1647
}
1648

1649
bool OurReader::decodeString(Token& token, String& decoded) {
1650
  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1651
  Location current = token.start_ + 1; // skip '"'
1652
  Location end = token.end_ - 1;       // do not include '"'
1653
  while (current != end) {
1654
    Char c = *current++;
1655
    if (c == '"')
1656
      break;
1657
    if (c == '\\') {
1658
      if (current == end)
1659
        return addError("Empty escape sequence in string", token, current);
1660
      Char escape = *current++;
1661
      switch (escape) {
1662
      case '"':
1663
        decoded += '"';
1664
        break;
1665
      case '/':
1666
        decoded += '/';
1667
        break;
1668
      case '\\':
1669
        decoded += '\\';
1670
        break;
1671
      case 'b':
1672
        decoded += '\b';
1673
        break;
1674
      case 'f':
1675
        decoded += '\f';
1676
        break;
1677
      case 'n':
1678
        decoded += '\n';
1679
        break;
1680
      case 'r':
1681
        decoded += '\r';
1682
        break;
1683
      case 't':
1684
        decoded += '\t';
1685
        break;
1686
      case 'u': {
1687
        unsigned int unicode;
1688
        if (!decodeUnicodeCodePoint(token, current, end, unicode))
1689
          return false;
1690
        decoded += codePointToUTF8(unicode);
1691
      } break;
1692
      default:
1693
        return addError("Bad escape sequence in string", token, current);
1694
      }
1695
    } else {
1696
      decoded += c;
1697
    }
1698
  }
1699
  return true;
1700
}
1701

1702
bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1703
                                       Location end, unsigned int& unicode) {
1704

1705
  unicode = 0; // Convince clang-analyzer that this is initialized before use.
1706
  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1707
    return false;
1708
  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1709
    // surrogate pairs
1710
    if (end - current < 6)
1711
      return addError(
1712
          "additional six characters expected to parse unicode surrogate pair.",
1713
          token, current);
1714
    if (*(current++) == '\\' && *(current++) == 'u') {
1715
      unsigned int surrogatePair;
1716
      if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1717
        unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1718
      } else
1719
        return false;
1720
    } else
1721
      return addError("expecting another \\u token to begin the second half of "
1722
                      "a unicode surrogate pair",
1723
                      token, current);
1724
  }
1725
  return true;
1726
}
1727

1728
bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1729
                                            Location end,
1730
                                            unsigned int& ret_unicode) {
1731
  if (end - current < 4)
1732
    return addError(
1733
        "Bad unicode escape sequence in string: four digits expected.", token,
1734
        current);
1735
  int unicode = 0;
1736
  for (int index = 0; index < 4; ++index) {
1737
    Char c = *current++;
1738
    unicode *= 16;
1739
    if (c >= '0' && c <= '9')
1740
      unicode += c - '0';
1741
    else if (c >= 'a' && c <= 'f')
1742
      unicode += c - 'a' + 10;
1743
    else if (c >= 'A' && c <= 'F')
1744
      unicode += c - 'A' + 10;
1745
    else
1746
      return addError(
1747
          "Bad unicode escape sequence in string: hexadecimal digit expected.",
1748
          token, current);
1749
  }
1750
  ret_unicode = static_cast<unsigned int>(unicode);
1751
  return true;
1752
}
1753

1754
bool OurReader::addError(const String& message, Token& token, Location extra) {
1755
  ErrorInfo info;
1756
  info.token_ = token;
1757
  info.message_ = message;
1758
  info.extra_ = extra;
1759
  errors_.push_back(info);
1760
  return false;
1761
}
1762

1763
bool OurReader::recoverFromError(TokenType skipUntilToken) {
1764
  size_t errorCount = errors_.size();
1765
  Token skip;
1766
  for (;;) {
1767
    if (!readToken(skip))
1768
      errors_.resize(errorCount); // discard errors caused by recovery
1769
    if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1770
      break;
1771
  }
1772
  errors_.resize(errorCount);
1773
  return false;
1774
}
1775

1776
bool OurReader::addErrorAndRecover(const String& message, Token& token,
1777
                                   TokenType skipUntilToken) {
1778
  addError(message, token);
1779
  return recoverFromError(skipUntilToken);
1780
}
1781

1782
Value& OurReader::currentValue() { return *(nodes_.top()); }
1783

1784
OurReader::Char OurReader::getNextChar() {
1785
  if (current_ == end_)
1786
    return 0;
1787
  return *current_++;
1788
}
1789

1790
void OurReader::getLocationLineAndColumn(Location location, int& line,
1791
                                         int& column) const {
1792
  Location current = begin_;
1793
  Location lastLineStart = current;
1794
  line = 0;
1795
  while (current < location && current != end_) {
1796
    Char c = *current++;
1797
    if (c == '\r') {
1798
      if (current != end_ && *current == '\n')
1799
        ++current;
1800
      lastLineStart = current;
1801
      ++line;
1802
    } else if (c == '\n') {
1803
      lastLineStart = current;
1804
      ++line;
1805
    }
1806
  }
1807
  // column & line start at 1
1808
  column = int(location - lastLineStart) + 1;
1809
  ++line;
1810
}
1811

1812
String OurReader::getLocationLineAndColumn(Location location) const {
1813
  int line, column;
1814
  getLocationLineAndColumn(location, line, column);
1815
  char buffer[18 + 16 + 16 + 1];
1816
  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1817
  return buffer;
1818
}
1819

1820
String OurReader::getFormattedErrorMessages() const {
1821
  String formattedMessage;
1822
  for (const auto& error : errors_) {
1823
    formattedMessage +=
1824
        "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1825
    formattedMessage += "  " + error.message_ + "\n";
1826
    if (error.extra_)
1827
      formattedMessage +=
1828
          "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1829
  }
1830
  return formattedMessage;
1831
}
1832

1833
std::vector<CharReader::StructuredError>
1834
OurReader::getStructuredErrors() const {
1835
  std::vector<CharReader::StructuredError> allErrors;
1836
  for (const auto& error : errors_) {
1837
    CharReader::StructuredError structured;
1838
    structured.offset_start = error.token_.start_ - begin_;
1839
    structured.offset_limit = error.token_.end_ - begin_;
1840
    structured.message = error.message_;
1841
    allErrors.push_back(structured);
1842
  }
1843
  return allErrors;
1844
}
1845

1846
class OurCharReader : public CharReader {
1847

1848
public:
1849
  OurCharReader(bool collectComments, OurFeatures const& features)
1850
      : CharReader(
1851
            std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1852

1853
protected:
1854
  class OurImpl : public Impl {
1855
  public:
1856
    OurImpl(bool collectComments, OurFeatures const& features)
1857
        : collectComments_(collectComments), reader_(features) {}
1858

1859
    bool parse(char const* beginDoc, char const* endDoc, Value* root,
1860
               String* errs) override {
1861
      bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1862
      if (errs) {
1863
        *errs = reader_.getFormattedErrorMessages();
1864
      }
1865
      return ok;
1866
    }
1867

1868
    std::vector<CharReader::StructuredError>
1869
    getStructuredErrors() const override {
1870
      return reader_.getStructuredErrors();
1871
    }
1872

1873
  private:
1874
    bool const collectComments_;
1875
    OurReader reader_;
1876
  };
1877
};
1878

1879
CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1880
CharReaderBuilder::~CharReaderBuilder() = default;
1881
CharReader* CharReaderBuilder::newCharReader() const {
1882
  bool collectComments = settings_["collectComments"].asBool();
1883
  OurFeatures features = OurFeatures::all();
1884
  features.allowComments_ = settings_["allowComments"].asBool();
1885
  features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1886
  features.strictRoot_ = settings_["strictRoot"].asBool();
1887
  features.allowDroppedNullPlaceholders_ =
1888
      settings_["allowDroppedNullPlaceholders"].asBool();
1889
  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1890
  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1891

1892
  // Stack limit is always a size_t, so we get this as an unsigned int
1893
  // regardless of it we have 64-bit integer support enabled.
1894
  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1895
  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1896
  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1897
  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1898
  features.skipBom_ = settings_["skipBom"].asBool();
1899
  return new OurCharReader(collectComments, features);
1900
}
1901

1902
bool CharReaderBuilder::validate(Json::Value* invalid) const {
1903
  static const auto& valid_keys = *new std::set<String>{
1904
      "collectComments",
1905
      "allowComments",
1906
      "allowTrailingCommas",
1907
      "strictRoot",
1908
      "allowDroppedNullPlaceholders",
1909
      "allowNumericKeys",
1910
      "allowSingleQuotes",
1911
      "stackLimit",
1912
      "failIfExtra",
1913
      "rejectDupKeys",
1914
      "allowSpecialFloats",
1915
      "skipBom",
1916
  };
1917
  for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1918
    auto key = si.name();
1919
    if (valid_keys.count(key))
1920
      continue;
1921
    if (invalid)
1922
      (*invalid)[key] = *si;
1923
    else
1924
      return false;
1925
  }
1926
  return invalid ? invalid->empty() : true;
1927
}
1928

1929
Value& CharReaderBuilder::operator[](const String& key) {
1930
  return settings_[key];
1931
}
1932
// static
1933
void CharReaderBuilder::strictMode(Json::Value* settings) {
1934
  //! [CharReaderBuilderStrictMode]
1935
  (*settings)["allowComments"] = false;
1936
  (*settings)["allowTrailingCommas"] = false;
1937
  (*settings)["strictRoot"] = true;
1938
  (*settings)["allowDroppedNullPlaceholders"] = false;
1939
  (*settings)["allowNumericKeys"] = false;
1940
  (*settings)["allowSingleQuotes"] = false;
1941
  (*settings)["stackLimit"] = 1000;
1942
  (*settings)["failIfExtra"] = true;
1943
  (*settings)["rejectDupKeys"] = true;
1944
  (*settings)["allowSpecialFloats"] = false;
1945
  (*settings)["skipBom"] = true;
1946
  //! [CharReaderBuilderStrictMode]
1947
}
1948
// static
1949
void CharReaderBuilder::setDefaults(Json::Value* settings) {
1950
  //! [CharReaderBuilderDefaults]
1951
  (*settings)["collectComments"] = true;
1952
  (*settings)["allowComments"] = true;
1953
  (*settings)["allowTrailingCommas"] = true;
1954
  (*settings)["strictRoot"] = false;
1955
  (*settings)["allowDroppedNullPlaceholders"] = false;
1956
  (*settings)["allowNumericKeys"] = false;
1957
  (*settings)["allowSingleQuotes"] = false;
1958
  (*settings)["stackLimit"] = 1000;
1959
  (*settings)["failIfExtra"] = false;
1960
  (*settings)["rejectDupKeys"] = false;
1961
  (*settings)["allowSpecialFloats"] = false;
1962
  (*settings)["skipBom"] = true;
1963
  //! [CharReaderBuilderDefaults]
1964
}
1965
// static
1966
void CharReaderBuilder::ecma404Mode(Json::Value* settings) {
1967
  //! [CharReaderBuilderECMA404Mode]
1968
  (*settings)["allowComments"] = false;
1969
  (*settings)["allowTrailingCommas"] = false;
1970
  (*settings)["strictRoot"] = false;
1971
  (*settings)["allowDroppedNullPlaceholders"] = false;
1972
  (*settings)["allowNumericKeys"] = false;
1973
  (*settings)["allowSingleQuotes"] = false;
1974
  (*settings)["stackLimit"] = 1000;
1975
  (*settings)["failIfExtra"] = true;
1976
  (*settings)["rejectDupKeys"] = false;
1977
  (*settings)["allowSpecialFloats"] = false;
1978
  (*settings)["skipBom"] = false;
1979
  //! [CharReaderBuilderECMA404Mode]
1980
}
1981

1982
std::vector<CharReader::StructuredError>
1983
CharReader::getStructuredErrors() const {
1984
  return _impl->getStructuredErrors();
1985
}
1986

1987
bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1988
                       String* errs) {
1989
  return _impl->parse(beginDoc, endDoc, root, errs);
1990
}
1991

1992
//////////////////////////////////
1993
// global functions
1994

1995
bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1996
                     String* errs) {
1997
  OStringStream ssin;
1998
  ssin << sin.rdbuf();
1999
  String doc = std::move(ssin).str();
2000
  char const* begin = doc.data();
2001
  char const* end = begin + doc.size();
2002
  // Note that we do not actually need a null-terminator.
2003
  CharReaderPtr const reader(fact.newCharReader());
2004
  return reader->parse(begin, end, root, errs);
2005
}
2006

2007
IStream& operator>>(IStream& sin, Value& root) {
2008
  CharReaderBuilder b;
2009
  String errs;
2010
  bool ok = parseFromStream(b, sin, &root, &errs);
2011
  if (!ok) {
2012
    throwRuntimeError(errs);
2013
  }
2014
  return sin;
2015
}
2016

2017
} // namespace Json
2018

2019
Product

Resources

Company