Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/simulation/fixtures/codeMapper/scanner.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
'use strict';
6
7
import { JSONScanner, ScanError, SyntaxKind } from './scannerTypes';
8
9
/**
10
* Creates a JSON scanner on the given text.
11
* If ignoreTrivia is set, whitespaces or comments are ignored.
12
*/
13
export function createScanner(text: string, ignoreTrivia: boolean = false): JSONScanner {
14
15
const len = text.length;
16
let pos = 0,
17
value: string = '',
18
tokenOffset = 0,
19
token: SyntaxKind = SyntaxKind.Unknown,
20
lineNumber = 0,
21
lineStartOffset = 0,
22
tokenLineStartOffset = 0,
23
prevTokenLineStartOffset = 0,
24
scanError: ScanError = ScanError.None;
25
26
function scanHexDigits(count: number, exact?: boolean): number {
27
let digits = 0;
28
let value = 0;
29
while (digits < count || !exact) {
30
let ch = text.charCodeAt(pos);
31
if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {
32
value = value * 16 + ch - CharacterCodes._0;
33
}
34
else if (ch >= CharacterCodes.A && ch <= CharacterCodes.F) {
35
value = value * 16 + ch - CharacterCodes.A + 10;
36
}
37
else if (ch >= CharacterCodes.a && ch <= CharacterCodes.f) {
38
value = value * 16 + ch - CharacterCodes.a + 10;
39
}
40
else {
41
break;
42
}
43
pos++;
44
digits++;
45
}
46
if (digits < count) {
47
value = -1;
48
}
49
return value;
50
}
51
52
function setPosition(newPosition: number) {
53
pos = newPosition;
54
value = '';
55
tokenOffset = 0;
56
token = SyntaxKind.Unknown;
57
scanError = ScanError.None;
58
}
59
60
function scanNumber(): string {
61
let start = pos;
62
if (text.charCodeAt(pos) === CharacterCodes._0) {
63
pos++;
64
} else {
65
pos++;
66
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
67
pos++;
68
}
69
}
70
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.dot) {
71
pos++;
72
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
73
pos++;
74
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
75
pos++;
76
}
77
} else {
78
scanError = ScanError.UnexpectedEndOfNumber;
79
return text.substring(start, pos);
80
}
81
}
82
let end = pos;
83
if (pos < text.length && (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e)) {
84
pos++;
85
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) {
86
pos++;
87
}
88
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
89
pos++;
90
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
91
pos++;
92
}
93
end = pos;
94
} else {
95
scanError = ScanError.UnexpectedEndOfNumber;
96
}
97
}
98
return text.substring(start, end);
99
}
100
101
function scanString(): string {
102
103
let result = '',
104
start = pos;
105
106
while (true) {
107
if (pos >= len) {
108
result += text.substring(start, pos);
109
scanError = ScanError.UnexpectedEndOfString;
110
break;
111
}
112
const ch = text.charCodeAt(pos);
113
if (ch === CharacterCodes.doubleQuote) {
114
result += text.substring(start, pos);
115
pos++;
116
break;
117
}
118
if (ch === CharacterCodes.backslash) {
119
result += text.substring(start, pos);
120
pos++;
121
if (pos >= len) {
122
scanError = ScanError.UnexpectedEndOfString;
123
break;
124
}
125
const ch2 = text.charCodeAt(pos++);
126
switch (ch2) {
127
case CharacterCodes.doubleQuote:
128
result += '\"';
129
break;
130
case CharacterCodes.backslash:
131
result += '\\';
132
break;
133
case CharacterCodes.slash:
134
result += '/';
135
break;
136
case CharacterCodes.b:
137
result += '\b';
138
break;
139
case CharacterCodes.f:
140
result += '\f';
141
break;
142
case CharacterCodes.n:
143
result += '\n';
144
break;
145
case CharacterCodes.r:
146
result += '\r';
147
break;
148
case CharacterCodes.t:
149
result += '\t';
150
break;
151
case CharacterCodes.u:
152
const ch3 = scanHexDigits(4, true);
153
if (ch3 >= 0) {
154
result += String.fromCharCode(ch3);
155
} else {
156
scanError = ScanError.InvalidUnicode;
157
}
158
break;
159
default:
160
scanError = ScanError.InvalidEscapeCharacter;
161
}
162
start = pos;
163
continue;
164
}
165
if (ch >= 0 && ch <= 0x1f) {
166
if (isLineBreak(ch)) {
167
result += text.substring(start, pos);
168
scanError = ScanError.UnexpectedEndOfString;
169
break;
170
} else {
171
scanError = ScanError.InvalidCharacter;
172
// mark as error but continue with string
173
}
174
}
175
pos++;
176
}
177
return result;
178
}
179
180
function scanNext(): SyntaxKind {
181
182
value = '';
183
scanError = ScanError.None;
184
185
tokenOffset = pos;
186
lineStartOffset = lineNumber;
187
prevTokenLineStartOffset = tokenLineStartOffset;
188
189
if (pos >= len) {
190
// at the end
191
tokenOffset = len;
192
return token = SyntaxKind.EOF;
193
}
194
195
let code = text.charCodeAt(pos);
196
// trivia: whitespace
197
if (isWhiteSpace(code)) {
198
do {
199
pos++;
200
value += String.fromCharCode(code);
201
code = text.charCodeAt(pos);
202
} while (isWhiteSpace(code));
203
204
return token = SyntaxKind.Trivia;
205
}
206
207
// trivia: newlines
208
if (isLineBreak(code)) {
209
pos++;
210
value += String.fromCharCode(code);
211
if (code === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
212
pos++;
213
value += '\n';
214
}
215
lineNumber++;
216
tokenLineStartOffset = pos;
217
return token = SyntaxKind.LineBreakTrivia;
218
}
219
220
switch (code) {
221
// tokens: []{}:,
222
case CharacterCodes.openBrace:
223
pos++;
224
return token = SyntaxKind.OpenBraceToken;
225
case CharacterCodes.closeBrace:
226
pos++;
227
return token = SyntaxKind.CloseBraceToken;
228
case CharacterCodes.openBracket:
229
pos++;
230
return token = SyntaxKind.OpenBracketToken;
231
case CharacterCodes.closeBracket:
232
pos++;
233
return token = SyntaxKind.CloseBracketToken;
234
case CharacterCodes.colon:
235
pos++;
236
return token = SyntaxKind.ColonToken;
237
case CharacterCodes.comma:
238
pos++;
239
return token = SyntaxKind.CommaToken;
240
241
// strings
242
case CharacterCodes.doubleQuote:
243
pos++;
244
value = scanString();
245
return token = SyntaxKind.StringLiteral;
246
247
// comments
248
case CharacterCodes.slash:
249
const start = pos - 1;
250
// Single-line comment
251
if (text.charCodeAt(pos + 1) === CharacterCodes.slash) {
252
pos += 2;
253
254
while (pos < len) {
255
if (isLineBreak(text.charCodeAt(pos))) {
256
break;
257
}
258
pos++;
259
260
}
261
value = text.substring(start, pos);
262
return token = SyntaxKind.LineCommentTrivia;
263
}
264
265
// Multi-line comment
266
if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) {
267
pos += 2;
268
269
const safeLength = len - 1; // For lookahead.
270
let commentClosed = false;
271
while (pos < safeLength) {
272
const ch = text.charCodeAt(pos);
273
274
if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) {
275
pos += 2;
276
commentClosed = true;
277
break;
278
}
279
280
pos++;
281
282
if (isLineBreak(ch)) {
283
if (ch === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
284
pos++;
285
}
286
287
lineNumber++;
288
tokenLineStartOffset = pos;
289
}
290
}
291
292
if (!commentClosed) {
293
pos++;
294
scanError = ScanError.UnexpectedEndOfComment;
295
}
296
297
value = text.substring(start, pos);
298
return token = SyntaxKind.BlockCommentTrivia;
299
}
300
// just a single slash
301
value += String.fromCharCode(code);
302
pos++;
303
return token = SyntaxKind.Unknown;
304
305
// numbers
306
case CharacterCodes.minus:
307
value += String.fromCharCode(code);
308
pos++;
309
if (pos === len || !isDigit(text.charCodeAt(pos))) {
310
return token = SyntaxKind.Unknown;
311
}
312
// found a minus, followed by a number so
313
// we fall through to proceed with scanning
314
// numbers
315
case CharacterCodes._0:
316
case CharacterCodes._1:
317
case CharacterCodes._2:
318
case CharacterCodes._3:
319
case CharacterCodes._4:
320
case CharacterCodes._5:
321
case CharacterCodes._6:
322
case CharacterCodes._7:
323
case CharacterCodes._8:
324
case CharacterCodes._9:
325
value += scanNumber();
326
return token = SyntaxKind.NumericLiteral;
327
// literals and unknown symbols
328
default:
329
// is a literal? Read the full word.
330
while (pos < len && isUnknownContentCharacter(code)) {
331
pos++;
332
code = text.charCodeAt(pos);
333
}
334
if (tokenOffset !== pos) {
335
value = text.substring(tokenOffset, pos);
336
// keywords: true, false, null
337
switch (value) {
338
case 'true': return token = SyntaxKind.TrueKeyword;
339
case 'false': return token = SyntaxKind.FalseKeyword;
340
case 'null': return token = SyntaxKind.NullKeyword;
341
}
342
return token = SyntaxKind.Unknown;
343
}
344
// some
345
value += String.fromCharCode(code);
346
pos++;
347
return token = SyntaxKind.Unknown;
348
}
349
}
350
351
function isUnknownContentCharacter(code: CharacterCodes) {
352
if (isWhiteSpace(code) || isLineBreak(code)) {
353
return false;
354
}
355
switch (code) {
356
case CharacterCodes.closeBrace:
357
case CharacterCodes.closeBracket:
358
case CharacterCodes.openBrace:
359
case CharacterCodes.openBracket:
360
case CharacterCodes.doubleQuote:
361
case CharacterCodes.colon:
362
case CharacterCodes.comma:
363
case CharacterCodes.slash:
364
return false;
365
}
366
return true;
367
}
368
369
370
function scanNextNonTrivia(): SyntaxKind {
371
let result: SyntaxKind;
372
do {
373
result = scanNext();
374
} while (result >= SyntaxKind.LineCommentTrivia && result <= SyntaxKind.Trivia);
375
return result;
376
}
377
378
return {
379
setPosition: setPosition,
380
getPosition: () => pos,
381
scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
382
getToken: () => token,
383
getTokenValue: () => value,
384
getTokenOffset: () => tokenOffset,
385
getTokenLength: () => pos - tokenOffset,
386
getTokenStartLine: () => lineStartOffset,
387
getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,
388
getTokenError: () => scanError,
389
};
390
}
391
392
function isWhiteSpace(ch: number): boolean {
393
return ch === CharacterCodes.space || ch === CharacterCodes.tab;
394
}
395
396
function isLineBreak(ch: number): boolean {
397
return ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn;
398
}
399
400
function isDigit(ch: number): boolean {
401
return ch >= CharacterCodes._0 && ch <= CharacterCodes._9;
402
}
403
404
const enum CharacterCodes {
405
lineFeed = 0x0A, // \n
406
carriageReturn = 0x0D, // \r
407
408
space = 0x0020, // " "
409
410
_0 = 0x30,
411
_1 = 0x31,
412
_2 = 0x32,
413
_3 = 0x33,
414
_4 = 0x34,
415
_5 = 0x35,
416
_6 = 0x36,
417
_7 = 0x37,
418
_8 = 0x38,
419
_9 = 0x39,
420
421
a = 0x61,
422
b = 0x62,
423
c = 0x63,
424
d = 0x64,
425
e = 0x65,
426
f = 0x66,
427
g = 0x67,
428
h = 0x68,
429
i = 0x69,
430
j = 0x6A,
431
k = 0x6B,
432
l = 0x6C,
433
m = 0x6D,
434
n = 0x6E,
435
o = 0x6F,
436
p = 0x70,
437
q = 0x71,
438
r = 0x72,
439
s = 0x73,
440
t = 0x74,
441
u = 0x75,
442
v = 0x76,
443
w = 0x77,
444
x = 0x78,
445
y = 0x79,
446
z = 0x7A,
447
448
A = 0x41,
449
B = 0x42,
450
C = 0x43,
451
D = 0x44,
452
E = 0x45,
453
F = 0x46,
454
G = 0x47,
455
H = 0x48,
456
I = 0x49,
457
J = 0x4A,
458
K = 0x4B,
459
L = 0x4C,
460
M = 0x4D,
461
N = 0x4E,
462
O = 0x4F,
463
P = 0x50,
464
Q = 0x51,
465
R = 0x52,
466
S = 0x53,
467
T = 0x54,
468
U = 0x55,
469
V = 0x56,
470
W = 0x57,
471
X = 0x58,
472
Y = 0x59,
473
Z = 0x5a,
474
475
asterisk = 0x2A, // *
476
backslash = 0x5C, // \
477
closeBrace = 0x7D, // }
478
closeBracket = 0x5D, // ]
479
colon = 0x3A, // :
480
comma = 0x2C, // ,
481
dot = 0x2E, // .
482
doubleQuote = 0x22, // "
483
minus = 0x2D, // -
484
openBrace = 0x7B, // {
485
openBracket = 0x5B, // [
486
plus = 0x2B, // +
487
slash = 0x2F, // /
488
489
formFeed = 0x0C, // \f
490
tab = 0x09, // \t
491
}
492
493