Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/common/json.ts
3291 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
export const enum ScanError {
7
None = 0,
8
UnexpectedEndOfComment = 1,
9
UnexpectedEndOfString = 2,
10
UnexpectedEndOfNumber = 3,
11
InvalidUnicode = 4,
12
InvalidEscapeCharacter = 5,
13
InvalidCharacter = 6
14
}
15
16
export const enum SyntaxKind {
17
OpenBraceToken = 1,
18
CloseBraceToken = 2,
19
OpenBracketToken = 3,
20
CloseBracketToken = 4,
21
CommaToken = 5,
22
ColonToken = 6,
23
NullKeyword = 7,
24
TrueKeyword = 8,
25
FalseKeyword = 9,
26
StringLiteral = 10,
27
NumericLiteral = 11,
28
LineCommentTrivia = 12,
29
BlockCommentTrivia = 13,
30
LineBreakTrivia = 14,
31
Trivia = 15,
32
Unknown = 16,
33
EOF = 17
34
}
35
36
/**
37
* The scanner object, representing a JSON scanner at a position in the input string.
38
*/
39
export interface JSONScanner {
40
/**
41
* Sets the scan position to a new offset. A call to 'scan' is needed to get the first token.
42
*/
43
setPosition(pos: number): void;
44
/**
45
* Read the next token. Returns the token code.
46
*/
47
scan(): SyntaxKind;
48
/**
49
* Returns the current scan position, which is after the last read token.
50
*/
51
getPosition(): number;
52
/**
53
* Returns the last read token.
54
*/
55
getToken(): SyntaxKind;
56
/**
57
* Returns the last read token value. The value for strings is the decoded string content. For numbers its of type number, for boolean it's true or false.
58
*/
59
getTokenValue(): string;
60
/**
61
* The start offset of the last read token.
62
*/
63
getTokenOffset(): number;
64
/**
65
* The length of the last read token.
66
*/
67
getTokenLength(): number;
68
/**
69
* An error code of the last scan.
70
*/
71
getTokenError(): ScanError;
72
}
73
74
75
76
export interface ParseError {
77
error: ParseErrorCode;
78
offset: number;
79
length: number;
80
}
81
82
export const enum ParseErrorCode {
83
InvalidSymbol = 1,
84
InvalidNumberFormat = 2,
85
PropertyNameExpected = 3,
86
ValueExpected = 4,
87
ColonExpected = 5,
88
CommaExpected = 6,
89
CloseBraceExpected = 7,
90
CloseBracketExpected = 8,
91
EndOfFileExpected = 9,
92
InvalidCommentToken = 10,
93
UnexpectedEndOfComment = 11,
94
UnexpectedEndOfString = 12,
95
UnexpectedEndOfNumber = 13,
96
InvalidUnicode = 14,
97
InvalidEscapeCharacter = 15,
98
InvalidCharacter = 16
99
}
100
101
export type NodeType = 'object' | 'array' | 'property' | 'string' | 'number' | 'boolean' | 'null';
102
103
export interface Node {
104
readonly type: NodeType;
105
readonly value?: any;
106
readonly offset: number;
107
readonly length: number;
108
readonly colonOffset?: number;
109
readonly parent?: Node;
110
readonly children?: Node[];
111
}
112
113
export type Segment = string | number;
114
export type JSONPath = Segment[];
115
116
export interface Location {
117
/**
118
* The previous property key or literal value (string, number, boolean or null) or undefined.
119
*/
120
previousNode?: Node;
121
/**
122
* The path describing the location in the JSON document. The path consists of a sequence strings
123
* representing an object property or numbers for array indices.
124
*/
125
path: JSONPath;
126
/**
127
* Matches the locations path against a pattern consisting of strings (for properties) and numbers (for array indices).
128
* '*' will match a single segment, of any property name or index.
129
* '**' will match a sequence of segments or no segment, of any property name or index.
130
*/
131
matches: (patterns: JSONPath) => boolean;
132
/**
133
* If set, the location's offset is at a property key.
134
*/
135
isAtPropertyKey: boolean;
136
}
137
138
export interface ParseOptions {
139
disallowComments?: boolean;
140
allowTrailingComma?: boolean;
141
allowEmptyContent?: boolean;
142
}
143
144
export namespace ParseOptions {
145
export const DEFAULT = {
146
allowTrailingComma: true
147
};
148
}
149
150
export interface JSONVisitor {
151
/**
152
* Invoked when an open brace is encountered and an object is started. The offset and length represent the location of the open brace.
153
*/
154
onObjectBegin?: (offset: number, length: number) => void;
155
156
/**
157
* Invoked when a property is encountered. The offset and length represent the location of the property name.
158
*/
159
onObjectProperty?: (property: string, offset: number, length: number) => void;
160
161
/**
162
* Invoked when a closing brace is encountered and an object is completed. The offset and length represent the location of the closing brace.
163
*/
164
onObjectEnd?: (offset: number, length: number) => void;
165
166
/**
167
* Invoked when an open bracket is encountered. The offset and length represent the location of the open bracket.
168
*/
169
onArrayBegin?: (offset: number, length: number) => void;
170
171
/**
172
* Invoked when a closing bracket is encountered. The offset and length represent the location of the closing bracket.
173
*/
174
onArrayEnd?: (offset: number, length: number) => void;
175
176
/**
177
* Invoked when a literal value is encountered. The offset and length represent the location of the literal value.
178
*/
179
onLiteralValue?: (value: any, offset: number, length: number) => void;
180
181
/**
182
* Invoked when a comma or colon separator is encountered. The offset and length represent the location of the separator.
183
*/
184
onSeparator?: (character: string, offset: number, length: number) => void;
185
186
/**
187
* When comments are allowed, invoked when a line or block comment is encountered. The offset and length represent the location of the comment.
188
*/
189
onComment?: (offset: number, length: number) => void;
190
191
/**
192
* Invoked on an error.
193
*/
194
onError?: (error: ParseErrorCode, offset: number, length: number) => void;
195
}
196
197
/**
198
* Creates a JSON scanner on the given text.
199
* If ignoreTrivia is set, whitespaces or comments are ignored.
200
*/
201
export function createScanner(text: string, ignoreTrivia: boolean = false): JSONScanner {
202
203
let pos = 0;
204
const len = text.length;
205
let value: string = '';
206
let tokenOffset = 0;
207
let token: SyntaxKind = SyntaxKind.Unknown;
208
let scanError: ScanError = ScanError.None;
209
210
function scanHexDigits(count: number): number {
211
let digits = 0;
212
let hexValue = 0;
213
while (digits < count) {
214
const ch = text.charCodeAt(pos);
215
if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {
216
hexValue = hexValue * 16 + ch - CharacterCodes._0;
217
}
218
else if (ch >= CharacterCodes.A && ch <= CharacterCodes.F) {
219
hexValue = hexValue * 16 + ch - CharacterCodes.A + 10;
220
}
221
else if (ch >= CharacterCodes.a && ch <= CharacterCodes.f) {
222
hexValue = hexValue * 16 + ch - CharacterCodes.a + 10;
223
}
224
else {
225
break;
226
}
227
pos++;
228
digits++;
229
}
230
if (digits < count) {
231
hexValue = -1;
232
}
233
return hexValue;
234
}
235
236
function setPosition(newPosition: number) {
237
pos = newPosition;
238
value = '';
239
tokenOffset = 0;
240
token = SyntaxKind.Unknown;
241
scanError = ScanError.None;
242
}
243
244
function scanNumber(): string {
245
const start = pos;
246
if (text.charCodeAt(pos) === CharacterCodes._0) {
247
pos++;
248
} else {
249
pos++;
250
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
251
pos++;
252
}
253
}
254
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.dot) {
255
pos++;
256
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
257
pos++;
258
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
259
pos++;
260
}
261
} else {
262
scanError = ScanError.UnexpectedEndOfNumber;
263
return text.substring(start, pos);
264
}
265
}
266
let end = pos;
267
if (pos < text.length && (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e)) {
268
pos++;
269
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) {
270
pos++;
271
}
272
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
273
pos++;
274
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
275
pos++;
276
}
277
end = pos;
278
} else {
279
scanError = ScanError.UnexpectedEndOfNumber;
280
}
281
}
282
return text.substring(start, end);
283
}
284
285
function scanString(): string {
286
287
let result = '',
288
start = pos;
289
290
while (true) {
291
if (pos >= len) {
292
result += text.substring(start, pos);
293
scanError = ScanError.UnexpectedEndOfString;
294
break;
295
}
296
const ch = text.charCodeAt(pos);
297
if (ch === CharacterCodes.doubleQuote) {
298
result += text.substring(start, pos);
299
pos++;
300
break;
301
}
302
if (ch === CharacterCodes.backslash) {
303
result += text.substring(start, pos);
304
pos++;
305
if (pos >= len) {
306
scanError = ScanError.UnexpectedEndOfString;
307
break;
308
}
309
const ch2 = text.charCodeAt(pos++);
310
switch (ch2) {
311
case CharacterCodes.doubleQuote:
312
result += '\"';
313
break;
314
case CharacterCodes.backslash:
315
result += '\\';
316
break;
317
case CharacterCodes.slash:
318
result += '/';
319
break;
320
case CharacterCodes.b:
321
result += '\b';
322
break;
323
case CharacterCodes.f:
324
result += '\f';
325
break;
326
case CharacterCodes.n:
327
result += '\n';
328
break;
329
case CharacterCodes.r:
330
result += '\r';
331
break;
332
case CharacterCodes.t:
333
result += '\t';
334
break;
335
case CharacterCodes.u: {
336
const ch3 = scanHexDigits(4);
337
if (ch3 >= 0) {
338
result += String.fromCharCode(ch3);
339
} else {
340
scanError = ScanError.InvalidUnicode;
341
}
342
break;
343
}
344
default:
345
scanError = ScanError.InvalidEscapeCharacter;
346
}
347
start = pos;
348
continue;
349
}
350
if (ch >= 0 && ch <= 0x1F) {
351
if (isLineBreak(ch)) {
352
result += text.substring(start, pos);
353
scanError = ScanError.UnexpectedEndOfString;
354
break;
355
} else {
356
scanError = ScanError.InvalidCharacter;
357
// mark as error but continue with string
358
}
359
}
360
pos++;
361
}
362
return result;
363
}
364
365
function scanNext(): SyntaxKind {
366
367
value = '';
368
scanError = ScanError.None;
369
370
tokenOffset = pos;
371
372
if (pos >= len) {
373
// at the end
374
tokenOffset = len;
375
return token = SyntaxKind.EOF;
376
}
377
378
let code = text.charCodeAt(pos);
379
// trivia: whitespace
380
if (isWhitespace(code)) {
381
do {
382
pos++;
383
value += String.fromCharCode(code);
384
code = text.charCodeAt(pos);
385
} while (isWhitespace(code));
386
387
return token = SyntaxKind.Trivia;
388
}
389
390
// trivia: newlines
391
if (isLineBreak(code)) {
392
pos++;
393
value += String.fromCharCode(code);
394
if (code === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
395
pos++;
396
value += '\n';
397
}
398
return token = SyntaxKind.LineBreakTrivia;
399
}
400
401
switch (code) {
402
// tokens: []{}:,
403
case CharacterCodes.openBrace:
404
pos++;
405
return token = SyntaxKind.OpenBraceToken;
406
case CharacterCodes.closeBrace:
407
pos++;
408
return token = SyntaxKind.CloseBraceToken;
409
case CharacterCodes.openBracket:
410
pos++;
411
return token = SyntaxKind.OpenBracketToken;
412
case CharacterCodes.closeBracket:
413
pos++;
414
return token = SyntaxKind.CloseBracketToken;
415
case CharacterCodes.colon:
416
pos++;
417
return token = SyntaxKind.ColonToken;
418
case CharacterCodes.comma:
419
pos++;
420
return token = SyntaxKind.CommaToken;
421
422
// strings
423
case CharacterCodes.doubleQuote:
424
pos++;
425
value = scanString();
426
return token = SyntaxKind.StringLiteral;
427
428
// comments
429
case CharacterCodes.slash: {
430
const start = pos - 1;
431
// Single-line comment
432
if (text.charCodeAt(pos + 1) === CharacterCodes.slash) {
433
pos += 2;
434
435
while (pos < len) {
436
if (isLineBreak(text.charCodeAt(pos))) {
437
break;
438
}
439
pos++;
440
441
}
442
value = text.substring(start, pos);
443
return token = SyntaxKind.LineCommentTrivia;
444
}
445
446
// Multi-line comment
447
if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) {
448
pos += 2;
449
450
const safeLength = len - 1; // For lookahead.
451
let commentClosed = false;
452
while (pos < safeLength) {
453
const ch = text.charCodeAt(pos);
454
455
if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) {
456
pos += 2;
457
commentClosed = true;
458
break;
459
}
460
pos++;
461
}
462
463
if (!commentClosed) {
464
pos++;
465
scanError = ScanError.UnexpectedEndOfComment;
466
}
467
468
value = text.substring(start, pos);
469
return token = SyntaxKind.BlockCommentTrivia;
470
}
471
// just a single slash
472
value += String.fromCharCode(code);
473
pos++;
474
return token = SyntaxKind.Unknown;
475
}
476
// numbers
477
case CharacterCodes.minus:
478
value += String.fromCharCode(code);
479
pos++;
480
if (pos === len || !isDigit(text.charCodeAt(pos))) {
481
return token = SyntaxKind.Unknown;
482
}
483
// found a minus, followed by a number so
484
// we fall through to proceed with scanning
485
// numbers
486
case CharacterCodes._0:
487
case CharacterCodes._1:
488
case CharacterCodes._2:
489
case CharacterCodes._3:
490
case CharacterCodes._4:
491
case CharacterCodes._5:
492
case CharacterCodes._6:
493
case CharacterCodes._7:
494
case CharacterCodes._8:
495
case CharacterCodes._9:
496
value += scanNumber();
497
return token = SyntaxKind.NumericLiteral;
498
// literals and unknown symbols
499
default:
500
// is a literal? Read the full word.
501
while (pos < len && isUnknownContentCharacter(code)) {
502
pos++;
503
code = text.charCodeAt(pos);
504
}
505
if (tokenOffset !== pos) {
506
value = text.substring(tokenOffset, pos);
507
// keywords: true, false, null
508
switch (value) {
509
case 'true': return token = SyntaxKind.TrueKeyword;
510
case 'false': return token = SyntaxKind.FalseKeyword;
511
case 'null': return token = SyntaxKind.NullKeyword;
512
}
513
return token = SyntaxKind.Unknown;
514
}
515
// some
516
value += String.fromCharCode(code);
517
pos++;
518
return token = SyntaxKind.Unknown;
519
}
520
}
521
522
function isUnknownContentCharacter(code: CharacterCodes) {
523
if (isWhitespace(code) || isLineBreak(code)) {
524
return false;
525
}
526
switch (code) {
527
case CharacterCodes.closeBrace:
528
case CharacterCodes.closeBracket:
529
case CharacterCodes.openBrace:
530
case CharacterCodes.openBracket:
531
case CharacterCodes.doubleQuote:
532
case CharacterCodes.colon:
533
case CharacterCodes.comma:
534
case CharacterCodes.slash:
535
return false;
536
}
537
return true;
538
}
539
540
541
function scanNextNonTrivia(): SyntaxKind {
542
let result: SyntaxKind;
543
do {
544
result = scanNext();
545
} while (result >= SyntaxKind.LineCommentTrivia && result <= SyntaxKind.Trivia);
546
return result;
547
}
548
549
return {
550
setPosition: setPosition,
551
getPosition: () => pos,
552
scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
553
getToken: () => token,
554
getTokenValue: () => value,
555
getTokenOffset: () => tokenOffset,
556
getTokenLength: () => pos - tokenOffset,
557
getTokenError: () => scanError
558
};
559
}
560
561
function isWhitespace(ch: number): boolean {
562
return ch === CharacterCodes.space || ch === CharacterCodes.tab || ch === CharacterCodes.verticalTab || ch === CharacterCodes.formFeed ||
563
ch === CharacterCodes.nonBreakingSpace || ch === CharacterCodes.ogham || ch >= CharacterCodes.enQuad && ch <= CharacterCodes.zeroWidthSpace ||
564
ch === CharacterCodes.narrowNoBreakSpace || ch === CharacterCodes.mathematicalSpace || ch === CharacterCodes.ideographicSpace || ch === CharacterCodes.byteOrderMark;
565
}
566
567
function isLineBreak(ch: number): boolean {
568
return ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn || ch === CharacterCodes.lineSeparator || ch === CharacterCodes.paragraphSeparator;
569
}
570
571
function isDigit(ch: number): boolean {
572
return ch >= CharacterCodes._0 && ch <= CharacterCodes._9;
573
}
574
575
const enum CharacterCodes {
576
nullCharacter = 0,
577
maxAsciiCharacter = 0x7F,
578
579
lineFeed = 0x0A, // \n
580
carriageReturn = 0x0D, // \r
581
lineSeparator = 0x2028,
582
paragraphSeparator = 0x2029,
583
584
// REVIEW: do we need to support this? The scanner doesn't, but our IText does. This seems
585
// like an odd disparity? (Or maybe it's completely fine for them to be different).
586
nextLine = 0x0085,
587
588
// Unicode 3.0 space characters
589
space = 0x0020, // " "
590
nonBreakingSpace = 0x00A0, //
591
enQuad = 0x2000,
592
emQuad = 0x2001,
593
enSpace = 0x2002,
594
emSpace = 0x2003,
595
threePerEmSpace = 0x2004,
596
fourPerEmSpace = 0x2005,
597
sixPerEmSpace = 0x2006,
598
figureSpace = 0x2007,
599
punctuationSpace = 0x2008,
600
thinSpace = 0x2009,
601
hairSpace = 0x200A,
602
zeroWidthSpace = 0x200B,
603
narrowNoBreakSpace = 0x202F,
604
ideographicSpace = 0x3000,
605
mathematicalSpace = 0x205F,
606
ogham = 0x1680,
607
608
_ = 0x5F,
609
$ = 0x24,
610
611
_0 = 0x30,
612
_1 = 0x31,
613
_2 = 0x32,
614
_3 = 0x33,
615
_4 = 0x34,
616
_5 = 0x35,
617
_6 = 0x36,
618
_7 = 0x37,
619
_8 = 0x38,
620
_9 = 0x39,
621
622
a = 0x61,
623
b = 0x62,
624
c = 0x63,
625
d = 0x64,
626
e = 0x65,
627
f = 0x66,
628
g = 0x67,
629
h = 0x68,
630
i = 0x69,
631
j = 0x6A,
632
k = 0x6B,
633
l = 0x6C,
634
m = 0x6D,
635
n = 0x6E,
636
o = 0x6F,
637
p = 0x70,
638
q = 0x71,
639
r = 0x72,
640
s = 0x73,
641
t = 0x74,
642
u = 0x75,
643
v = 0x76,
644
w = 0x77,
645
x = 0x78,
646
y = 0x79,
647
z = 0x7A,
648
649
A = 0x41,
650
B = 0x42,
651
C = 0x43,
652
D = 0x44,
653
E = 0x45,
654
F = 0x46,
655
G = 0x47,
656
H = 0x48,
657
I = 0x49,
658
J = 0x4A,
659
K = 0x4B,
660
L = 0x4C,
661
M = 0x4D,
662
N = 0x4E,
663
O = 0x4F,
664
P = 0x50,
665
Q = 0x51,
666
R = 0x52,
667
S = 0x53,
668
T = 0x54,
669
U = 0x55,
670
V = 0x56,
671
W = 0x57,
672
X = 0x58,
673
Y = 0x59,
674
Z = 0x5A,
675
676
ampersand = 0x26, // &
677
asterisk = 0x2A, // *
678
at = 0x40, // @
679
backslash = 0x5C, // \
680
bar = 0x7C, // |
681
caret = 0x5E, // ^
682
closeBrace = 0x7D, // }
683
closeBracket = 0x5D, // ]
684
closeParen = 0x29, // )
685
colon = 0x3A, // :
686
comma = 0x2C, // ,
687
dot = 0x2E, // .
688
doubleQuote = 0x22, // "
689
equals = 0x3D, // =
690
exclamation = 0x21, // !
691
greaterThan = 0x3E, // >
692
lessThan = 0x3C, // <
693
minus = 0x2D, // -
694
openBrace = 0x7B, // {
695
openBracket = 0x5B, // [
696
openParen = 0x28, // (
697
percent = 0x25, // %
698
plus = 0x2B, // +
699
question = 0x3F, // ?
700
semicolon = 0x3B, // ;
701
singleQuote = 0x27, // '
702
slash = 0x2F, // /
703
tilde = 0x7E, // ~
704
705
backspace = 0x08, // \b
706
formFeed = 0x0C, // \f
707
byteOrderMark = 0xFEFF,
708
tab = 0x09, // \t
709
verticalTab = 0x0B, // \v
710
}
711
712
interface NodeImpl extends Node {
713
type: NodeType;
714
value?: any;
715
offset: number;
716
length: number;
717
colonOffset?: number;
718
parent?: NodeImpl;
719
children?: NodeImpl[];
720
}
721
722
/**
723
* For a given offset, evaluate the location in the JSON document. Each segment in the location path is either a property name or an array index.
724
*/
725
export function getLocation(text: string, position: number): Location {
726
const segments: Segment[] = []; // strings or numbers
727
const earlyReturnException = new Object();
728
let previousNode: NodeImpl | undefined = undefined;
729
const previousNodeInst: NodeImpl = {
730
value: {},
731
offset: 0,
732
length: 0,
733
type: 'object',
734
parent: undefined
735
};
736
let isAtPropertyKey = false;
737
function setPreviousNode(value: string, offset: number, length: number, type: NodeType) {
738
previousNodeInst.value = value;
739
previousNodeInst.offset = offset;
740
previousNodeInst.length = length;
741
previousNodeInst.type = type;
742
previousNodeInst.colonOffset = undefined;
743
previousNode = previousNodeInst;
744
}
745
try {
746
747
visit(text, {
748
onObjectBegin: (offset: number, length: number) => {
749
if (position <= offset) {
750
throw earlyReturnException;
751
}
752
previousNode = undefined;
753
isAtPropertyKey = position > offset;
754
segments.push(''); // push a placeholder (will be replaced)
755
},
756
onObjectProperty: (name: string, offset: number, length: number) => {
757
if (position < offset) {
758
throw earlyReturnException;
759
}
760
setPreviousNode(name, offset, length, 'property');
761
segments[segments.length - 1] = name;
762
if (position <= offset + length) {
763
throw earlyReturnException;
764
}
765
},
766
onObjectEnd: (offset: number, length: number) => {
767
if (position <= offset) {
768
throw earlyReturnException;
769
}
770
previousNode = undefined;
771
segments.pop();
772
},
773
onArrayBegin: (offset: number, length: number) => {
774
if (position <= offset) {
775
throw earlyReturnException;
776
}
777
previousNode = undefined;
778
segments.push(0);
779
},
780
onArrayEnd: (offset: number, length: number) => {
781
if (position <= offset) {
782
throw earlyReturnException;
783
}
784
previousNode = undefined;
785
segments.pop();
786
},
787
onLiteralValue: (value: any, offset: number, length: number) => {
788
if (position < offset) {
789
throw earlyReturnException;
790
}
791
setPreviousNode(value, offset, length, getNodeType(value));
792
793
if (position <= offset + length) {
794
throw earlyReturnException;
795
}
796
},
797
onSeparator: (sep: string, offset: number, length: number) => {
798
if (position <= offset) {
799
throw earlyReturnException;
800
}
801
if (sep === ':' && previousNode && previousNode.type === 'property') {
802
previousNode.colonOffset = offset;
803
isAtPropertyKey = false;
804
previousNode = undefined;
805
} else if (sep === ',') {
806
const last = segments[segments.length - 1];
807
if (typeof last === 'number') {
808
segments[segments.length - 1] = last + 1;
809
} else {
810
isAtPropertyKey = true;
811
segments[segments.length - 1] = '';
812
}
813
previousNode = undefined;
814
}
815
}
816
});
817
} catch (e) {
818
if (e !== earlyReturnException) {
819
throw e;
820
}
821
}
822
823
return {
824
path: segments,
825
previousNode,
826
isAtPropertyKey,
827
matches: (pattern: Segment[]) => {
828
let k = 0;
829
for (let i = 0; k < pattern.length && i < segments.length; i++) {
830
if (pattern[k] === segments[i] || pattern[k] === '*') {
831
k++;
832
} else if (pattern[k] !== '**') {
833
return false;
834
}
835
}
836
return k === pattern.length;
837
}
838
};
839
}
840
841
842
/**
843
* Parses the given text and returns the object the JSON content represents. On invalid input, the parser tries to be as fault tolerant as possible, but still return a result.
844
* Therefore always check the errors list to find out if the input was valid.
845
*/
846
export function parse(text: string, errors: ParseError[] = [], options: ParseOptions = ParseOptions.DEFAULT): any {
847
let currentProperty: string | null = null;
848
let currentParent: any = [];
849
const previousParents: any[] = [];
850
851
function onValue(value: unknown) {
852
if (Array.isArray(currentParent)) {
853
(<any[]>currentParent).push(value);
854
} else if (currentProperty !== null) {
855
currentParent[currentProperty] = value;
856
}
857
}
858
859
const visitor: JSONVisitor = {
860
onObjectBegin: () => {
861
const object = {};
862
onValue(object);
863
previousParents.push(currentParent);
864
currentParent = object;
865
currentProperty = null;
866
},
867
onObjectProperty: (name: string) => {
868
currentProperty = name;
869
},
870
onObjectEnd: () => {
871
currentParent = previousParents.pop();
872
},
873
onArrayBegin: () => {
874
const array: any[] = [];
875
onValue(array);
876
previousParents.push(currentParent);
877
currentParent = array;
878
currentProperty = null;
879
},
880
onArrayEnd: () => {
881
currentParent = previousParents.pop();
882
},
883
onLiteralValue: onValue,
884
onError: (error: ParseErrorCode, offset: number, length: number) => {
885
errors.push({ error, offset, length });
886
}
887
};
888
visit(text, visitor, options);
889
return currentParent[0];
890
}
891
892
893
/**
894
* Parses the given text and returns a tree representation the JSON content. On invalid input, the parser tries to be as fault tolerant as possible, but still return a result.
895
*/
896
export function parseTree(text: string, errors: ParseError[] = [], options: ParseOptions = ParseOptions.DEFAULT): Node {
897
let currentParent: NodeImpl = { type: 'array', offset: -1, length: -1, children: [], parent: undefined }; // artificial root
898
899
function ensurePropertyComplete(endOffset: number) {
900
if (currentParent.type === 'property') {
901
currentParent.length = endOffset - currentParent.offset;
902
currentParent = currentParent.parent!;
903
}
904
}
905
906
function onValue(valueNode: Node): Node {
907
currentParent.children!.push(valueNode);
908
return valueNode;
909
}
910
911
const visitor: JSONVisitor = {
912
onObjectBegin: (offset: number) => {
913
currentParent = onValue({ type: 'object', offset, length: -1, parent: currentParent, children: [] });
914
},
915
onObjectProperty: (name: string, offset: number, length: number) => {
916
currentParent = onValue({ type: 'property', offset, length: -1, parent: currentParent, children: [] });
917
currentParent.children!.push({ type: 'string', value: name, offset, length, parent: currentParent });
918
},
919
onObjectEnd: (offset: number, length: number) => {
920
currentParent.length = offset + length - currentParent.offset;
921
currentParent = currentParent.parent!;
922
ensurePropertyComplete(offset + length);
923
},
924
onArrayBegin: (offset: number, length: number) => {
925
currentParent = onValue({ type: 'array', offset, length: -1, parent: currentParent, children: [] });
926
},
927
onArrayEnd: (offset: number, length: number) => {
928
currentParent.length = offset + length - currentParent.offset;
929
currentParent = currentParent.parent!;
930
ensurePropertyComplete(offset + length);
931
},
932
onLiteralValue: (value: unknown, offset: number, length: number) => {
933
onValue({ type: getNodeType(value), offset, length, parent: currentParent, value });
934
ensurePropertyComplete(offset + length);
935
},
936
onSeparator: (sep: string, offset: number, length: number) => {
937
if (currentParent.type === 'property') {
938
if (sep === ':') {
939
currentParent.colonOffset = offset;
940
} else if (sep === ',') {
941
ensurePropertyComplete(offset);
942
}
943
}
944
},
945
onError: (error: ParseErrorCode, offset: number, length: number) => {
946
errors.push({ error, offset, length });
947
}
948
};
949
visit(text, visitor, options);
950
951
const result = currentParent.children![0];
952
if (result) {
953
delete result.parent;
954
}
955
return result;
956
}
957
958
/**
959
* Finds the node at the given path in a JSON DOM.
960
*/
961
export function findNodeAtLocation(root: Node, path: JSONPath): Node | undefined {
962
if (!root) {
963
return undefined;
964
}
965
let node = root;
966
for (const segment of path) {
967
if (typeof segment === 'string') {
968
if (node.type !== 'object' || !Array.isArray(node.children)) {
969
return undefined;
970
}
971
let found = false;
972
for (const propertyNode of node.children) {
973
if (Array.isArray(propertyNode.children) && propertyNode.children[0].value === segment) {
974
node = propertyNode.children[1];
975
found = true;
976
break;
977
}
978
}
979
if (!found) {
980
return undefined;
981
}
982
} else {
983
const index = <number>segment;
984
if (node.type !== 'array' || index < 0 || !Array.isArray(node.children) || index >= node.children.length) {
985
return undefined;
986
}
987
node = node.children[index];
988
}
989
}
990
return node;
991
}
992
993
/**
994
* Gets the JSON path of the given JSON DOM node
995
*/
996
export function getNodePath(node: Node): JSONPath {
997
if (!node.parent || !node.parent.children) {
998
return [];
999
}
1000
const path = getNodePath(node.parent);
1001
if (node.parent.type === 'property') {
1002
const key = node.parent.children[0].value;
1003
path.push(key);
1004
} else if (node.parent.type === 'array') {
1005
const index = node.parent.children.indexOf(node);
1006
if (index !== -1) {
1007
path.push(index);
1008
}
1009
}
1010
return path;
1011
}
1012
1013
/**
1014
* Evaluates the JavaScript object of the given JSON DOM node
1015
*/
1016
export function getNodeValue(node: Node): any {
1017
switch (node.type) {
1018
case 'array':
1019
return node.children!.map(getNodeValue);
1020
case 'object': {
1021
const obj = Object.create(null);
1022
for (const prop of node.children!) {
1023
const valueNode = prop.children![1];
1024
if (valueNode) {
1025
obj[prop.children![0].value] = getNodeValue(valueNode);
1026
}
1027
}
1028
return obj;
1029
}
1030
case 'null':
1031
case 'string':
1032
case 'number':
1033
case 'boolean':
1034
return node.value;
1035
default:
1036
return undefined;
1037
}
1038
1039
}
1040
1041
export function contains(node: Node, offset: number, includeRightBound = false): boolean {
1042
return (offset >= node.offset && offset < (node.offset + node.length)) || includeRightBound && (offset === (node.offset + node.length));
1043
}
1044
1045
/**
1046
* Finds the most inner node at the given offset. If includeRightBound is set, also finds nodes that end at the given offset.
1047
*/
1048
export function findNodeAtOffset(node: Node, offset: number, includeRightBound = false): Node | undefined {
1049
if (contains(node, offset, includeRightBound)) {
1050
const children = node.children;
1051
if (Array.isArray(children)) {
1052
for (let i = 0; i < children.length && children[i].offset <= offset; i++) {
1053
const item = findNodeAtOffset(children[i], offset, includeRightBound);
1054
if (item) {
1055
return item;
1056
}
1057
}
1058
1059
}
1060
return node;
1061
}
1062
return undefined;
1063
}
1064
1065
1066
/**
1067
* Parses the given text and invokes the visitor functions for each object, array and literal reached.
1068
*/
1069
export function visit(text: string, visitor: JSONVisitor, options: ParseOptions = ParseOptions.DEFAULT): any {
1070
1071
const _scanner = createScanner(text, false);
1072
1073
function toNoArgVisit(visitFunction?: (offset: number, length: number) => void): () => void {
1074
return visitFunction ? () => visitFunction(_scanner.getTokenOffset(), _scanner.getTokenLength()) : () => true;
1075
}
1076
function toOneArgVisit<T>(visitFunction?: (arg: T, offset: number, length: number) => void): (arg: T) => void {
1077
return visitFunction ? (arg: T) => visitFunction(arg, _scanner.getTokenOffset(), _scanner.getTokenLength()) : () => true;
1078
}
1079
1080
const onObjectBegin = toNoArgVisit(visitor.onObjectBegin),
1081
onObjectProperty = toOneArgVisit(visitor.onObjectProperty),
1082
onObjectEnd = toNoArgVisit(visitor.onObjectEnd),
1083
onArrayBegin = toNoArgVisit(visitor.onArrayBegin),
1084
onArrayEnd = toNoArgVisit(visitor.onArrayEnd),
1085
onLiteralValue = toOneArgVisit(visitor.onLiteralValue),
1086
onSeparator = toOneArgVisit(visitor.onSeparator),
1087
onComment = toNoArgVisit(visitor.onComment),
1088
onError = toOneArgVisit(visitor.onError);
1089
1090
const disallowComments = options && options.disallowComments;
1091
const allowTrailingComma = options && options.allowTrailingComma;
1092
function scanNext(): SyntaxKind {
1093
while (true) {
1094
const token = _scanner.scan();
1095
switch (_scanner.getTokenError()) {
1096
case ScanError.InvalidUnicode:
1097
handleError(ParseErrorCode.InvalidUnicode);
1098
break;
1099
case ScanError.InvalidEscapeCharacter:
1100
handleError(ParseErrorCode.InvalidEscapeCharacter);
1101
break;
1102
case ScanError.UnexpectedEndOfNumber:
1103
handleError(ParseErrorCode.UnexpectedEndOfNumber);
1104
break;
1105
case ScanError.UnexpectedEndOfComment:
1106
if (!disallowComments) {
1107
handleError(ParseErrorCode.UnexpectedEndOfComment);
1108
}
1109
break;
1110
case ScanError.UnexpectedEndOfString:
1111
handleError(ParseErrorCode.UnexpectedEndOfString);
1112
break;
1113
case ScanError.InvalidCharacter:
1114
handleError(ParseErrorCode.InvalidCharacter);
1115
break;
1116
}
1117
switch (token) {
1118
case SyntaxKind.LineCommentTrivia:
1119
case SyntaxKind.BlockCommentTrivia:
1120
if (disallowComments) {
1121
handleError(ParseErrorCode.InvalidCommentToken);
1122
} else {
1123
onComment();
1124
}
1125
break;
1126
case SyntaxKind.Unknown:
1127
handleError(ParseErrorCode.InvalidSymbol);
1128
break;
1129
case SyntaxKind.Trivia:
1130
case SyntaxKind.LineBreakTrivia:
1131
break;
1132
default:
1133
return token;
1134
}
1135
}
1136
}
1137
1138
function handleError(error: ParseErrorCode, skipUntilAfter: SyntaxKind[] = [], skipUntil: SyntaxKind[] = []): void {
1139
onError(error);
1140
if (skipUntilAfter.length + skipUntil.length > 0) {
1141
let token = _scanner.getToken();
1142
while (token !== SyntaxKind.EOF) {
1143
if (skipUntilAfter.indexOf(token) !== -1) {
1144
scanNext();
1145
break;
1146
} else if (skipUntil.indexOf(token) !== -1) {
1147
break;
1148
}
1149
token = scanNext();
1150
}
1151
}
1152
}
1153
1154
function parseString(isValue: boolean): boolean {
1155
const value = _scanner.getTokenValue();
1156
if (isValue) {
1157
onLiteralValue(value);
1158
} else {
1159
onObjectProperty(value);
1160
}
1161
scanNext();
1162
return true;
1163
}
1164
1165
function parseLiteral(): boolean {
1166
switch (_scanner.getToken()) {
1167
case SyntaxKind.NumericLiteral: {
1168
let value = 0;
1169
try {
1170
value = JSON.parse(_scanner.getTokenValue());
1171
if (typeof value !== 'number') {
1172
handleError(ParseErrorCode.InvalidNumberFormat);
1173
value = 0;
1174
}
1175
} catch (e) {
1176
handleError(ParseErrorCode.InvalidNumberFormat);
1177
}
1178
onLiteralValue(value);
1179
break;
1180
}
1181
case SyntaxKind.NullKeyword:
1182
onLiteralValue(null);
1183
break;
1184
case SyntaxKind.TrueKeyword:
1185
onLiteralValue(true);
1186
break;
1187
case SyntaxKind.FalseKeyword:
1188
onLiteralValue(false);
1189
break;
1190
default:
1191
return false;
1192
}
1193
scanNext();
1194
return true;
1195
}
1196
1197
function parseProperty(): boolean {
1198
if (_scanner.getToken() !== SyntaxKind.StringLiteral) {
1199
handleError(ParseErrorCode.PropertyNameExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken]);
1200
return false;
1201
}
1202
parseString(false);
1203
if (_scanner.getToken() === SyntaxKind.ColonToken) {
1204
onSeparator(':');
1205
scanNext(); // consume colon
1206
1207
if (!parseValue()) {
1208
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken]);
1209
}
1210
} else {
1211
handleError(ParseErrorCode.ColonExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken]);
1212
}
1213
return true;
1214
}
1215
1216
function parseObject(): boolean {
1217
onObjectBegin();
1218
scanNext(); // consume open brace
1219
1220
let needsComma = false;
1221
while (_scanner.getToken() !== SyntaxKind.CloseBraceToken && _scanner.getToken() !== SyntaxKind.EOF) {
1222
if (_scanner.getToken() === SyntaxKind.CommaToken) {
1223
if (!needsComma) {
1224
handleError(ParseErrorCode.ValueExpected, [], []);
1225
}
1226
onSeparator(',');
1227
scanNext(); // consume comma
1228
if (_scanner.getToken() === SyntaxKind.CloseBraceToken && allowTrailingComma) {
1229
break;
1230
}
1231
} else if (needsComma) {
1232
handleError(ParseErrorCode.CommaExpected, [], []);
1233
}
1234
if (!parseProperty()) {
1235
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken]);
1236
}
1237
needsComma = true;
1238
}
1239
onObjectEnd();
1240
if (_scanner.getToken() !== SyntaxKind.CloseBraceToken) {
1241
handleError(ParseErrorCode.CloseBraceExpected, [SyntaxKind.CloseBraceToken], []);
1242
} else {
1243
scanNext(); // consume close brace
1244
}
1245
return true;
1246
}
1247
1248
function parseArray(): boolean {
1249
onArrayBegin();
1250
scanNext(); // consume open bracket
1251
1252
let needsComma = false;
1253
while (_scanner.getToken() !== SyntaxKind.CloseBracketToken && _scanner.getToken() !== SyntaxKind.EOF) {
1254
if (_scanner.getToken() === SyntaxKind.CommaToken) {
1255
if (!needsComma) {
1256
handleError(ParseErrorCode.ValueExpected, [], []);
1257
}
1258
onSeparator(',');
1259
scanNext(); // consume comma
1260
if (_scanner.getToken() === SyntaxKind.CloseBracketToken && allowTrailingComma) {
1261
break;
1262
}
1263
} else if (needsComma) {
1264
handleError(ParseErrorCode.CommaExpected, [], []);
1265
}
1266
if (!parseValue()) {
1267
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBracketToken, SyntaxKind.CommaToken]);
1268
}
1269
needsComma = true;
1270
}
1271
onArrayEnd();
1272
if (_scanner.getToken() !== SyntaxKind.CloseBracketToken) {
1273
handleError(ParseErrorCode.CloseBracketExpected, [SyntaxKind.CloseBracketToken], []);
1274
} else {
1275
scanNext(); // consume close bracket
1276
}
1277
return true;
1278
}
1279
1280
function parseValue(): boolean {
1281
switch (_scanner.getToken()) {
1282
case SyntaxKind.OpenBracketToken:
1283
return parseArray();
1284
case SyntaxKind.OpenBraceToken:
1285
return parseObject();
1286
case SyntaxKind.StringLiteral:
1287
return parseString(true);
1288
default:
1289
return parseLiteral();
1290
}
1291
}
1292
1293
scanNext();
1294
if (_scanner.getToken() === SyntaxKind.EOF) {
1295
if (options.allowEmptyContent) {
1296
return true;
1297
}
1298
handleError(ParseErrorCode.ValueExpected, [], []);
1299
return false;
1300
}
1301
if (!parseValue()) {
1302
handleError(ParseErrorCode.ValueExpected, [], []);
1303
return false;
1304
}
1305
if (_scanner.getToken() !== SyntaxKind.EOF) {
1306
handleError(ParseErrorCode.EndOfFileExpected, [], []);
1307
}
1308
return true;
1309
}
1310
1311
export function getNodeType(value: unknown): NodeType {
1312
switch (typeof value) {
1313
case 'boolean': return 'boolean';
1314
case 'number': return 'number';
1315
case 'string': return 'string';
1316
case 'object': {
1317
if (!value) {
1318
return 'null';
1319
} else if (Array.isArray(value)) {
1320
return 'array';
1321
}
1322
return 'object';
1323
}
1324
default: return 'null';
1325
}
1326
}
1327
1328