Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/platform/contextkey/common/scanner.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import { illegalState } from '../../../base/common/errors.js';
8
import { localize } from '../../../nls.js';
9
10
export const enum TokenType {
11
LParen,
12
RParen,
13
Neg,
14
Eq,
15
NotEq,
16
Lt,
17
LtEq,
18
Gt,
19
GtEq,
20
RegexOp,
21
RegexStr,
22
True,
23
False,
24
In,
25
Not,
26
And,
27
Or,
28
Str,
29
QuotedStr,
30
Error,
31
EOF,
32
}
33
34
export type Token =
35
| { type: TokenType.LParen; offset: number }
36
| { type: TokenType.RParen; offset: number }
37
| { type: TokenType.Neg; offset: number }
38
| { type: TokenType.Eq; offset: number; isTripleEq: boolean }
39
| { type: TokenType.NotEq; offset: number; isTripleEq: boolean }
40
| { type: TokenType.Lt; offset: number }
41
| { type: TokenType.LtEq; offset: number }
42
| { type: TokenType.Gt; offset: number }
43
| { type: TokenType.GtEq; offset: number }
44
| { type: TokenType.RegexOp; offset: number }
45
| { type: TokenType.RegexStr; offset: number; lexeme: string }
46
| { type: TokenType.True; offset: number }
47
| { type: TokenType.False; offset: number }
48
| { type: TokenType.In; offset: number }
49
| { type: TokenType.Not; offset: number }
50
| { type: TokenType.And; offset: number }
51
| { type: TokenType.Or; offset: number }
52
| { type: TokenType.Str; offset: number; lexeme: string }
53
| { type: TokenType.QuotedStr; offset: number; lexeme: string }
54
| { type: TokenType.Error; offset: number; lexeme: string }
55
| { type: TokenType.EOF; offset: number };
56
57
type KeywordTokenType = TokenType.Not | TokenType.In | TokenType.False | TokenType.True;
58
type TokenTypeWithoutLexeme =
59
TokenType.LParen |
60
TokenType.RParen |
61
TokenType.Neg |
62
TokenType.Lt |
63
TokenType.LtEq |
64
TokenType.Gt |
65
TokenType.GtEq |
66
TokenType.RegexOp |
67
TokenType.True |
68
TokenType.False |
69
TokenType.In |
70
TokenType.Not |
71
TokenType.And |
72
TokenType.Or |
73
TokenType.EOF;
74
75
/**
76
* Example:
77
* `foo == bar'` - note how single quote doesn't have a corresponding closing quote,
78
* so it's reported as unexpected
79
*/
80
export type LexingError = {
81
offset: number; /** note that this doesn't take into account escape characters from the original encoding of the string, e.g., within an extension manifest file's JSON encoding */
82
lexeme: string;
83
additionalInfo?: string;
84
};
85
86
function hintDidYouMean(...meant: string[]) {
87
switch (meant.length) {
88
case 1:
89
return localize('contextkey.scanner.hint.didYouMean1', "Did you mean {0}?", meant[0]);
90
case 2:
91
return localize('contextkey.scanner.hint.didYouMean2', "Did you mean {0} or {1}?", meant[0], meant[1]);
92
case 3:
93
return localize('contextkey.scanner.hint.didYouMean3', "Did you mean {0}, {1} or {2}?", meant[0], meant[1], meant[2]);
94
default: // we just don't expect that many
95
return undefined;
96
}
97
}
98
99
const hintDidYouForgetToOpenOrCloseQuote = localize('contextkey.scanner.hint.didYouForgetToOpenOrCloseQuote', "Did you forget to open or close the quote?");
100
const hintDidYouForgetToEscapeSlash = localize('contextkey.scanner.hint.didYouForgetToEscapeSlash', "Did you forget to escape the '/' (slash) character? Put two backslashes before it to escape, e.g., '\\\\/\'.");
101
102
/**
103
* A simple scanner for context keys.
104
*
105
* Example:
106
*
107
* ```ts
108
* const scanner = new Scanner().reset('resourceFileName =~ /docker/ && !config.docker.enabled');
109
* const tokens = [...scanner];
110
* if (scanner.errorTokens.length > 0) {
111
* scanner.errorTokens.forEach(err => console.error(`Unexpected token at ${err.offset}: ${err.lexeme}\nHint: ${err.additional}`));
112
* } else {
113
* // process tokens
114
* }
115
* ```
116
*/
117
export class Scanner {
118
119
static getLexeme(token: Token): string {
120
switch (token.type) {
121
case TokenType.LParen:
122
return '(';
123
case TokenType.RParen:
124
return ')';
125
case TokenType.Neg:
126
return '!';
127
case TokenType.Eq:
128
return token.isTripleEq ? '===' : '==';
129
case TokenType.NotEq:
130
return token.isTripleEq ? '!==' : '!=';
131
case TokenType.Lt:
132
return '<';
133
case TokenType.LtEq:
134
return '<=';
135
case TokenType.Gt:
136
return '>=';
137
case TokenType.GtEq:
138
return '>=';
139
case TokenType.RegexOp:
140
return '=~';
141
case TokenType.RegexStr:
142
return token.lexeme;
143
case TokenType.True:
144
return 'true';
145
case TokenType.False:
146
return 'false';
147
case TokenType.In:
148
return 'in';
149
case TokenType.Not:
150
return 'not';
151
case TokenType.And:
152
return '&&';
153
case TokenType.Or:
154
return '||';
155
case TokenType.Str:
156
return token.lexeme;
157
case TokenType.QuotedStr:
158
return token.lexeme;
159
case TokenType.Error:
160
return token.lexeme;
161
case TokenType.EOF:
162
return 'EOF';
163
default:
164
throw illegalState(`unhandled token type: ${JSON.stringify(token)}; have you forgotten to add a case?`);
165
}
166
}
167
168
private static _regexFlags = new Set(['i', 'g', 's', 'm', 'y', 'u'].map(ch => ch.charCodeAt(0)));
169
170
private static _keywords = new Map<string, KeywordTokenType>([
171
['not', TokenType.Not],
172
['in', TokenType.In],
173
['false', TokenType.False],
174
['true', TokenType.True],
175
]);
176
177
private _input: string = '';
178
private _start: number = 0;
179
private _current: number = 0;
180
private _tokens: Token[] = [];
181
private _errors: LexingError[] = [];
182
183
get errors(): Readonly<LexingError[]> {
184
return this._errors;
185
}
186
187
reset(value: string) {
188
this._input = value;
189
190
this._start = 0;
191
this._current = 0;
192
this._tokens = [];
193
this._errors = [];
194
195
return this;
196
}
197
198
scan() {
199
while (!this._isAtEnd()) {
200
201
this._start = this._current;
202
203
const ch = this._advance();
204
switch (ch) {
205
case CharCode.OpenParen: this._addToken(TokenType.LParen); break;
206
case CharCode.CloseParen: this._addToken(TokenType.RParen); break;
207
208
case CharCode.ExclamationMark:
209
if (this._match(CharCode.Equals)) {
210
const isTripleEq = this._match(CharCode.Equals); // eat last `=` if `!==`
211
this._tokens.push({ type: TokenType.NotEq, offset: this._start, isTripleEq });
212
} else {
213
this._addToken(TokenType.Neg);
214
}
215
break;
216
217
case CharCode.SingleQuote: this._quotedString(); break;
218
case CharCode.Slash: this._regex(); break;
219
220
case CharCode.Equals:
221
if (this._match(CharCode.Equals)) { // support `==`
222
const isTripleEq = this._match(CharCode.Equals); // eat last `=` if `===`
223
this._tokens.push({ type: TokenType.Eq, offset: this._start, isTripleEq });
224
} else if (this._match(CharCode.Tilde)) {
225
this._addToken(TokenType.RegexOp);
226
} else {
227
this._error(hintDidYouMean('==', '=~'));
228
}
229
break;
230
231
case CharCode.LessThan: this._addToken(this._match(CharCode.Equals) ? TokenType.LtEq : TokenType.Lt); break;
232
233
case CharCode.GreaterThan: this._addToken(this._match(CharCode.Equals) ? TokenType.GtEq : TokenType.Gt); break;
234
235
case CharCode.Ampersand:
236
if (this._match(CharCode.Ampersand)) {
237
this._addToken(TokenType.And);
238
} else {
239
this._error(hintDidYouMean('&&'));
240
}
241
break;
242
243
case CharCode.Pipe:
244
if (this._match(CharCode.Pipe)) {
245
this._addToken(TokenType.Or);
246
} else {
247
this._error(hintDidYouMean('||'));
248
}
249
break;
250
251
// TODO@ulugbekna: 1) rewrite using a regex 2) reconsider what characters are considered whitespace, including unicode, nbsp, etc.
252
case CharCode.Space:
253
case CharCode.CarriageReturn:
254
case CharCode.Tab:
255
case CharCode.LineFeed:
256
case CharCode.NoBreakSpace: // &nbsp
257
break;
258
259
default:
260
this._string();
261
}
262
}
263
264
this._start = this._current;
265
this._addToken(TokenType.EOF);
266
267
return Array.from(this._tokens);
268
}
269
270
private _match(expected: number): boolean {
271
if (this._isAtEnd()) {
272
return false;
273
}
274
if (this._input.charCodeAt(this._current) !== expected) {
275
return false;
276
}
277
this._current++;
278
return true;
279
}
280
281
private _advance(): number {
282
return this._input.charCodeAt(this._current++);
283
}
284
285
private _peek(): number {
286
return this._isAtEnd() ? CharCode.Null : this._input.charCodeAt(this._current);
287
}
288
289
private _addToken(type: TokenTypeWithoutLexeme) {
290
this._tokens.push({ type, offset: this._start });
291
}
292
293
private _error(additional?: string) {
294
const offset = this._start;
295
const lexeme = this._input.substring(this._start, this._current);
296
const errToken: Token = { type: TokenType.Error, offset: this._start, lexeme };
297
this._errors.push({ offset, lexeme, additionalInfo: additional });
298
this._tokens.push(errToken);
299
}
300
301
// u - unicode, y - sticky // TODO@ulugbekna: we accept double quotes as part of the string rather than as a delimiter (to preserve old parser's behavior)
302
private stringRe = /[a-zA-Z0-9_<>\-\./\\:\*\?\+\[\]\^,#@;"%\$\p{L}-]+/uy;
303
private _string() {
304
this.stringRe.lastIndex = this._start;
305
const match = this.stringRe.exec(this._input);
306
if (match) {
307
this._current = this._start + match[0].length;
308
const lexeme = this._input.substring(this._start, this._current);
309
const keyword = Scanner._keywords.get(lexeme);
310
if (keyword) {
311
this._addToken(keyword);
312
} else {
313
this._tokens.push({ type: TokenType.Str, lexeme, offset: this._start });
314
}
315
}
316
}
317
318
// captures the lexeme without the leading and trailing '
319
private _quotedString() {
320
while (this._peek() !== CharCode.SingleQuote && !this._isAtEnd()) { // TODO@ulugbekna: add support for escaping ' ?
321
this._advance();
322
}
323
324
if (this._isAtEnd()) {
325
this._error(hintDidYouForgetToOpenOrCloseQuote);
326
return;
327
}
328
329
// consume the closing '
330
this._advance();
331
332
this._tokens.push({ type: TokenType.QuotedStr, lexeme: this._input.substring(this._start + 1, this._current - 1), offset: this._start + 1 });
333
}
334
335
/*
336
* Lexing a regex expression: /.../[igsmyu]*
337
* Based on https://github.com/microsoft/TypeScript/blob/9247ef115e617805983740ba795d7a8164babf89/src/compiler/scanner.ts#L2129-L2181
338
*
339
* Note that we want slashes within a regex to be escaped, e.g., /file:\\/\\/\\// should match `file:///`
340
*/
341
private _regex() {
342
let p = this._current;
343
344
let inEscape = false;
345
let inCharacterClass = false;
346
while (true) {
347
if (p >= this._input.length) {
348
this._current = p;
349
this._error(hintDidYouForgetToEscapeSlash);
350
return;
351
}
352
353
const ch = this._input.charCodeAt(p);
354
355
if (inEscape) { // parsing an escape character
356
inEscape = false;
357
} else if (ch === CharCode.Slash && !inCharacterClass) { // end of regex
358
p++;
359
break;
360
} else if (ch === CharCode.OpenSquareBracket) {
361
inCharacterClass = true;
362
} else if (ch === CharCode.Backslash) {
363
inEscape = true;
364
} else if (ch === CharCode.CloseSquareBracket) {
365
inCharacterClass = false;
366
}
367
p++;
368
}
369
370
// Consume flags // TODO@ulugbekna: use regex instead
371
while (p < this._input.length && Scanner._regexFlags.has(this._input.charCodeAt(p))) {
372
p++;
373
}
374
375
this._current = p;
376
377
const lexeme = this._input.substring(this._start, this._current);
378
this._tokens.push({ type: TokenType.RegexStr, lexeme, offset: this._start });
379
}
380
381
private _isAtEnd() {
382
return this._current >= this._input.length;
383
}
384
}
385
386