Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/standalone/test/browser/monarch.test.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import assert from 'assert';
7
import { DisposableStore } from '../../../../base/common/lifecycle.js';
8
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';
9
import { Token, TokenizationRegistry } from '../../../common/languages.js';
10
import { ILanguageService } from '../../../common/languages/language.js';
11
import { LanguageService } from '../../../common/services/languageService.js';
12
import { StandaloneConfigurationService } from '../../browser/standaloneServices.js';
13
import { compile } from '../../common/monarch/monarchCompile.js';
14
import { MonarchTokenizer } from '../../common/monarch/monarchLexer.js';
15
import { IMonarchLanguage } from '../../common/monarch/monarchTypes.js';
16
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
17
import { NullLogService } from '../../../../platform/log/common/log.js';
18
19
suite('Monarch', () => {
20
21
ensureNoDisposablesAreLeakedInTestSuite();
22
23
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {
24
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);
25
}
26
27
function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
28
const actualTokens: Token[][] = [];
29
let state = tokenizer.getInitialState();
30
for (const line of lines) {
31
const result = tokenizer.tokenize(line, true, state);
32
actualTokens.push(result.tokens);
33
state = result.endState;
34
}
35
return actualTokens;
36
}
37
38
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
39
const disposables = new DisposableStore();
40
const languageService = disposables.add(new LanguageService());
41
const configurationService = new StandaloneConfigurationService(new NullLogService());
42
disposables.add(languageService.registerLanguage({ id: 'sql' }));
43
disposables.add(TokenizationRegistry.register('sql', disposables.add(createMonarchTokenizer(languageService, 'sql', {
44
tokenizer: {
45
root: [
46
[/./, 'token']
47
]
48
}
49
}, configurationService))));
50
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
51
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test1', {
52
tokenizer: {
53
root: [
54
[`(\"\"\")${SQL_QUERY_START}`, [{ 'token': 'string.quote', }, { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },]],
55
[/(""")$/, [{ token: 'string.quote', next: '@maybeStringIsSQL', },]],
56
],
57
maybeStringIsSQL: [
58
[/(.*)/, {
59
cases: {
60
[`${SQL_QUERY_START}\\b.*`]: { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },
61
'@default': { token: '@rematch', switchTo: '@endDblDocString', },
62
}
63
}],
64
],
65
endDblDocString: [
66
['[^\']+', 'string'],
67
['\\\\\'', 'string'],
68
['\'\'\'', 'string', '@popall'],
69
['\'', 'string']
70
],
71
endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],
72
}
73
}, configurationService));
74
75
const lines = [
76
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
77
`mysql_query("""`,
78
`SELECT *`,
79
`FROM table_name`,
80
`WHERE ds = '<DATEID>'`,
81
`""")`,
82
];
83
84
const actualTokens = getTokens(tokenizer, lines);
85
86
assert.deepStrictEqual(actualTokens, [
87
[
88
new Token(0, 'source.test1', 'test1'),
89
new Token(12, 'string.quote.test1', 'test1'),
90
new Token(15, 'token.sql', 'sql'),
91
new Token(61, 'string.quote.test1', 'test1'),
92
new Token(64, 'source.test1', 'test1')
93
],
94
[
95
new Token(0, 'source.test1', 'test1'),
96
new Token(12, 'string.quote.test1', 'test1')
97
],
98
[
99
new Token(0, 'token.sql', 'sql')
100
],
101
[
102
new Token(0, 'token.sql', 'sql')
103
],
104
[
105
new Token(0, 'token.sql', 'sql')
106
],
107
[
108
new Token(0, 'string.quote.test1', 'test1'),
109
new Token(3, 'source.test1', 'test1')
110
]
111
]);
112
disposables.dispose();
113
});
114
115
test('Test nextEmbedded: "@pop" in cases statement', () => {
116
const disposables = new DisposableStore();
117
const languageService = disposables.add(new LanguageService());
118
const configurationService = new StandaloneConfigurationService(new NullLogService());
119
disposables.add(languageService.registerLanguage({ id: 'sql' }));
120
disposables.add(TokenizationRegistry.register('sql', disposables.add(createMonarchTokenizer(languageService, 'sql', {
121
tokenizer: {
122
root: [
123
[/./, 'token']
124
]
125
}
126
}, configurationService))));
127
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
128
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test1', {
129
tokenizer: {
130
root: [
131
[`(\"\"\")${SQL_QUERY_START}`, [{ 'token': 'string.quote', }, { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },]],
132
[/(""")$/, [{ token: 'string.quote', next: '@maybeStringIsSQL', },]],
133
],
134
maybeStringIsSQL: [
135
[/(.*)/, {
136
cases: {
137
[`${SQL_QUERY_START}\\b.*`]: { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },
138
'@default': { token: '@rematch', switchTo: '@endDblDocString', },
139
}
140
}],
141
],
142
endDblDocString: [
143
['[^\']+', 'string'],
144
['\\\\\'', 'string'],
145
['\'\'\'', 'string', '@popall'],
146
['\'', 'string']
147
],
148
endStringWithSQL: [[/"""/, {
149
cases: {
150
'"""': {
151
cases: {
152
'': { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', }
153
}
154
},
155
'@default': ''
156
}
157
}]],
158
}
159
}, configurationService));
160
161
const lines = [
162
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
163
`mysql_query("""`,
164
`SELECT *`,
165
`FROM table_name`,
166
`WHERE ds = '<DATEID>'`,
167
`""")`,
168
];
169
170
const actualTokens = getTokens(tokenizer, lines);
171
172
assert.deepStrictEqual(actualTokens, [
173
[
174
new Token(0, 'source.test1', 'test1'),
175
new Token(12, 'string.quote.test1', 'test1'),
176
new Token(15, 'token.sql', 'sql'),
177
new Token(61, 'string.quote.test1', 'test1'),
178
new Token(64, 'source.test1', 'test1')
179
],
180
[
181
new Token(0, 'source.test1', 'test1'),
182
new Token(12, 'string.quote.test1', 'test1')
183
],
184
[
185
new Token(0, 'token.sql', 'sql')
186
],
187
[
188
new Token(0, 'token.sql', 'sql')
189
],
190
[
191
new Token(0, 'token.sql', 'sql')
192
],
193
[
194
new Token(0, 'string.quote.test1', 'test1'),
195
new Token(3, 'source.test1', 'test1')
196
]
197
]);
198
disposables.dispose();
199
});
200
201
202
test('microsoft/monaco-editor#1235: Empty Line Handling', () => {
203
const disposables = new DisposableStore();
204
const configurationService = new StandaloneConfigurationService(new NullLogService());
205
const languageService = disposables.add(new LanguageService());
206
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
207
tokenizer: {
208
root: [
209
{ include: '@comments' },
210
],
211
212
comments: [
213
[/\/\/$/, 'comment'], // empty single-line comment
214
[/\/\//, 'comment', '@comment_cpp'],
215
],
216
217
comment_cpp: [
218
[/(?:[^\\]|(?:\\.))+$/, 'comment', '@pop'],
219
[/.+$/, 'comment'],
220
[/$/, 'comment', '@pop']
221
// No possible rule to detect an empty line and @pop?
222
],
223
},
224
}, configurationService));
225
226
const lines = [
227
`// This comment \\`,
228
` continues on the following line`,
229
``,
230
`// This comment does NOT continue \\\\`,
231
` because the escape char was itself escaped`,
232
``,
233
`// This comment DOES continue because \\\\\\`,
234
` the 1st '\\' escapes the 2nd; the 3rd escapes EOL`,
235
``,
236
`// This comment continues to the following line \\`,
237
``,
238
`But the line was empty. This line should not be commented.`,
239
];
240
241
const actualTokens = getTokens(tokenizer, lines);
242
243
assert.deepStrictEqual(actualTokens, [
244
[new Token(0, 'comment.test', 'test')],
245
[new Token(0, 'comment.test', 'test')],
246
[],
247
[new Token(0, 'comment.test', 'test')],
248
[new Token(0, 'source.test', 'test')],
249
[],
250
[new Token(0, 'comment.test', 'test')],
251
[new Token(0, 'comment.test', 'test')],
252
[],
253
[new Token(0, 'comment.test', 'test')],
254
[],
255
[new Token(0, 'source.test', 'test')]
256
]);
257
258
disposables.dispose();
259
});
260
261
test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {
262
const disposables = new DisposableStore();
263
const configurationService = new StandaloneConfigurationService(new NullLogService());
264
const languageService = disposables.add(new LanguageService());
265
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
266
includeLF: true,
267
tokenizer: {
268
root: [
269
[/^\*/, '', '@inner'],
270
[/\:\*/, '', '@inner'],
271
[/[^*:]+/, 'string'],
272
[/[*:]/, 'string']
273
],
274
inner: [
275
[/\n/, '', '@pop'],
276
[/\d+/, 'number'],
277
[/[^\d]+/, '']
278
]
279
}
280
}, configurationService));
281
282
const lines = [
283
`PRINT 10 * 20`,
284
`*FX200, 3`,
285
`PRINT 2*3:*FX200, 3`
286
];
287
288
const actualTokens = getTokens(tokenizer, lines);
289
290
assert.deepStrictEqual(actualTokens, [
291
[
292
new Token(0, 'string.test', 'test'),
293
],
294
[
295
new Token(0, '', 'test'),
296
new Token(3, 'number.test', 'test'),
297
new Token(6, '', 'test'),
298
new Token(8, 'number.test', 'test'),
299
],
300
[
301
new Token(0, 'string.test', 'test'),
302
new Token(9, '', 'test'),
303
new Token(13, 'number.test', 'test'),
304
new Token(16, '', 'test'),
305
new Token(18, 'number.test', 'test'),
306
]
307
]);
308
309
disposables.dispose();
310
});
311
312
test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
313
const disposables = new DisposableStore();
314
const configurationService = new StandaloneConfigurationService(new NullLogService());
315
const languageService = disposables.add(new LanguageService());
316
317
const tokenizer1 = disposables.add(createMonarchTokenizer(languageService, 'test', {
318
ignoreCase: false,
319
uselessReplaceKey1: '@uselessReplaceKey2',
320
uselessReplaceKey2: '@uselessReplaceKey3',
321
uselessReplaceKey3: '@uselessReplaceKey4',
322
uselessReplaceKey4: '@uselessReplaceKey5',
323
uselessReplaceKey5: '@ham',
324
tokenizer: {
325
root: [
326
{
327
regex: /@\w+/.test('@ham')
328
? new RegExp(`^${'@uselessReplaceKey1'}$`)
329
: new RegExp(`^${'@ham'}$`),
330
action: { token: 'ham' }
331
},
332
],
333
},
334
}, configurationService));
335
336
const tokenizer2 = disposables.add(createMonarchTokenizer(languageService, 'test', {
337
ignoreCase: false,
338
tokenizer: {
339
root: [
340
{
341
regex: /@@ham/,
342
action: { token: 'ham' }
343
},
344
],
345
},
346
}, configurationService));
347
348
const lines = [
349
`@ham`
350
];
351
352
const actualTokens1 = getTokens(tokenizer1, lines);
353
assert.deepStrictEqual(actualTokens1, [
354
[
355
new Token(0, 'ham.test', 'test'),
356
]
357
]);
358
359
const actualTokens2 = getTokens(tokenizer2, lines);
360
assert.deepStrictEqual(actualTokens2, [
361
[
362
new Token(0, 'ham.test', 'test'),
363
]
364
]);
365
366
disposables.dispose();
367
});
368
369
test('microsoft/monaco-editor#2424: Allow to target @@', () => {
370
const disposables = new DisposableStore();
371
const configurationService = new StandaloneConfigurationService(new NullLogService());
372
const languageService = disposables.add(new LanguageService());
373
374
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
375
ignoreCase: false,
376
tokenizer: {
377
root: [
378
{
379
regex: /@@@@/,
380
action: { token: 'ham' }
381
},
382
],
383
},
384
}, configurationService));
385
386
const lines = [
387
`@@`
388
];
389
390
const actualTokens = getTokens(tokenizer, lines);
391
assert.deepStrictEqual(actualTokens, [
392
[
393
new Token(0, 'ham.test', 'test'),
394
]
395
]);
396
397
disposables.dispose();
398
});
399
400
test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {
401
const disposables = new DisposableStore();
402
403
const configurationService = new StandaloneConfigurationService(new NullLogService());
404
const languageService = disposables.add(new LanguageService());
405
406
// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail
407
await configurationService.updateValue('editor.maxTokenizationLineLength', 4);
408
409
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
410
tokenizer: {
411
root: [
412
{
413
regex: /ham/,
414
action: { token: 'ham' }
415
},
416
],
417
},
418
}, configurationService));
419
420
const lines = [
421
'ham', // length 3, should be tokenized
422
'hamham' // length 6, should NOT be tokenized
423
];
424
425
const actualTokens = getTokens(tokenizer, lines);
426
assert.deepStrictEqual(actualTokens, [
427
[
428
new Token(0, 'ham.test', 'test'),
429
], [
430
new Token(0, '', 'test')
431
]
432
]);
433
434
disposables.dispose();
435
});
436
437
test('microsoft/monaco-editor#3128: allow state access within rules', () => {
438
const disposables = new DisposableStore();
439
const configurationService = new StandaloneConfigurationService(new NullLogService());
440
const languageService = disposables.add(new LanguageService());
441
442
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
443
ignoreCase: false,
444
encoding: /u|u8|U|L/,
445
tokenizer: {
446
root: [
447
// C++ 11 Raw String
448
[/@encoding?R\"(?:([^ ()\\\t]*))\(/, { token: 'string.raw.begin', next: '@raw.$1' }],
449
],
450
451
raw: [
452
[/.*\)$S2\"/, 'string.raw', '@pop'],
453
[/.*/, 'string.raw']
454
],
455
},
456
}, configurationService));
457
458
const lines = [
459
`int main(){`,
460
``,
461
` auto s = R""""(`,
462
` Hello World`,
463
` )"""";`,
464
``,
465
` std::cout << "hello";`,
466
``,
467
`}`,
468
];
469
470
const actualTokens = getTokens(tokenizer, lines);
471
assert.deepStrictEqual(actualTokens, [
472
[new Token(0, 'source.test', 'test')],
473
[],
474
[new Token(0, 'source.test', 'test'), new Token(10, 'string.raw.begin.test', 'test')],
475
[new Token(0, 'string.raw.test', 'test')],
476
[new Token(0, 'string.raw.test', 'test'), new Token(6, 'source.test', 'test')],
477
[],
478
[new Token(0, 'source.test', 'test')],
479
[],
480
[new Token(0, 'source.test', 'test')],
481
]);
482
483
disposables.dispose();
484
});
485
486
test('microsoft/monaco-editor#4775: Raw-strings in c++ can break monarch', () => {
487
const disposables = new DisposableStore();
488
const configurationService = new StandaloneConfigurationService(new NullLogService());
489
const languageService = disposables.add(new LanguageService());
490
491
const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
492
ignoreCase: false,
493
encoding: /u|u8|U|L/,
494
tokenizer: {
495
root: [
496
// C++ 11 Raw String
497
[/@encoding?R\"(?:([^ ()\\\t]*))\(/, { token: 'string.raw.begin', next: '@raw.$1' }],
498
],
499
500
raw: [
501
[/.*\)$S2\"/, 'string.raw', '@pop'],
502
[/.*/, 'string.raw']
503
],
504
},
505
}, configurationService));
506
507
const lines = [
508
`R"[())"`,
509
];
510
511
const actualTokens = getTokens(tokenizer, lines);
512
assert.deepStrictEqual(actualTokens, [
513
[new Token(0, 'string.raw.begin.test', 'test'), new Token(4, 'string.raw.test', 'test')],
514
]);
515
516
disposables.dispose();
517
});
518
519
});
520
521