Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/standalone/common/monarch/monarchLexer.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
/**
7
* Create a syntax highighter with a fully declarative JSON style lexer description
8
* using regular expressions.
9
*/
10
11
import { Disposable, IDisposable } from '../../../../base/common/lifecycle.js';
12
import * as languages from '../../../common/languages.js';
13
import { NullState, nullTokenizeEncoded, nullTokenize } from '../../../common/languages/nullTokenize.js';
14
import { TokenTheme } from '../../../common/languages/supports/tokenization.js';
15
import { ILanguageService } from '../../../common/languages/language.js';
16
import * as monarchCommon from './monarchCommon.js';
17
import { IStandaloneThemeService } from '../standaloneTheme.js';
18
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
19
import { LanguageId, MetadataConsts } from '../../../common/encodedTokenAttributes.js';
20
21
const CACHE_STACK_DEPTH = 5;
22
23
/**
24
* Reuse the same stack elements up to a certain depth.
25
*/
26
class MonarchStackElementFactory {
27
28
private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);
29
public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
30
return this._INSTANCE.create(parent, state);
31
}
32
33
private readonly _maxCacheDepth: number;
34
private readonly _entries: { [stackElementId: string]: MonarchStackElement };
35
36
constructor(maxCacheDepth: number) {
37
this._maxCacheDepth = maxCacheDepth;
38
this._entries = Object.create(null);
39
}
40
41
public create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
42
if (parent !== null && parent.depth >= this._maxCacheDepth) {
43
// no caching above a certain depth
44
return new MonarchStackElement(parent, state);
45
}
46
let stackElementId = MonarchStackElement.getStackElementId(parent);
47
if (stackElementId.length > 0) {
48
stackElementId += '|';
49
}
50
stackElementId += state;
51
52
let result = this._entries[stackElementId];
53
if (result) {
54
return result;
55
}
56
result = new MonarchStackElement(parent, state);
57
this._entries[stackElementId] = result;
58
return result;
59
}
60
}
61
62
class MonarchStackElement {
63
64
public readonly parent: MonarchStackElement | null;
65
public readonly state: string;
66
public readonly depth: number;
67
68
constructor(parent: MonarchStackElement | null, state: string) {
69
this.parent = parent;
70
this.state = state;
71
this.depth = (this.parent ? this.parent.depth : 0) + 1;
72
}
73
74
public static getStackElementId(element: MonarchStackElement | null): string {
75
let result = '';
76
while (element !== null) {
77
if (result.length > 0) {
78
result += '|';
79
}
80
result += element.state;
81
element = element.parent;
82
}
83
return result;
84
}
85
86
private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean {
87
while (a !== null && b !== null) {
88
if (a === b) {
89
return true;
90
}
91
if (a.state !== b.state) {
92
return false;
93
}
94
a = a.parent;
95
b = b.parent;
96
}
97
if (a === null && b === null) {
98
return true;
99
}
100
return false;
101
}
102
103
public equals(other: MonarchStackElement): boolean {
104
return MonarchStackElement._equals(this, other);
105
}
106
107
public push(state: string): MonarchStackElement {
108
return MonarchStackElementFactory.create(this, state);
109
}
110
111
public pop(): MonarchStackElement | null {
112
return this.parent;
113
}
114
115
public popall(): MonarchStackElement {
116
let result: MonarchStackElement = this;
117
while (result.parent) {
118
result = result.parent;
119
}
120
return result;
121
}
122
123
public switchTo(state: string): MonarchStackElement {
124
return MonarchStackElementFactory.create(this.parent, state);
125
}
126
}
127
128
class EmbeddedLanguageData {
129
public readonly languageId: string;
130
public readonly state: languages.IState;
131
132
constructor(languageId: string, state: languages.IState) {
133
this.languageId = languageId;
134
this.state = state;
135
}
136
137
public equals(other: EmbeddedLanguageData): boolean {
138
return (
139
this.languageId === other.languageId
140
&& this.state.equals(other.state)
141
);
142
}
143
144
public clone(): EmbeddedLanguageData {
145
const stateClone = this.state.clone();
146
// save an object
147
if (stateClone === this.state) {
148
return this;
149
}
150
return new EmbeddedLanguageData(this.languageId, this.state);
151
}
152
}
153
154
/**
155
* Reuse the same line states up to a certain depth.
156
*/
157
class MonarchLineStateFactory {
158
159
private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);
160
public static create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {
161
return this._INSTANCE.create(stack, embeddedLanguageData);
162
}
163
164
private readonly _maxCacheDepth: number;
165
private readonly _entries: { [stackElementId: string]: MonarchLineState };
166
167
constructor(maxCacheDepth: number) {
168
this._maxCacheDepth = maxCacheDepth;
169
this._entries = Object.create(null);
170
}
171
172
public create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {
173
if (embeddedLanguageData !== null) {
174
// no caching when embedding
175
return new MonarchLineState(stack, embeddedLanguageData);
176
}
177
if (stack !== null && stack.depth >= this._maxCacheDepth) {
178
// no caching above a certain depth
179
return new MonarchLineState(stack, embeddedLanguageData);
180
}
181
const stackElementId = MonarchStackElement.getStackElementId(stack);
182
183
let result = this._entries[stackElementId];
184
if (result) {
185
return result;
186
}
187
result = new MonarchLineState(stack, null);
188
this._entries[stackElementId] = result;
189
return result;
190
}
191
}
192
193
class MonarchLineState implements languages.IState {
194
195
public readonly stack: MonarchStackElement;
196
public readonly embeddedLanguageData: EmbeddedLanguageData | null;
197
198
constructor(
199
stack: MonarchStackElement,
200
embeddedLanguageData: EmbeddedLanguageData | null
201
) {
202
this.stack = stack;
203
this.embeddedLanguageData = embeddedLanguageData;
204
}
205
206
public clone(): languages.IState {
207
const embeddedlanguageDataClone = this.embeddedLanguageData ? this.embeddedLanguageData.clone() : null;
208
// save an object
209
if (embeddedlanguageDataClone === this.embeddedLanguageData) {
210
return this;
211
}
212
return MonarchLineStateFactory.create(this.stack, this.embeddedLanguageData);
213
}
214
215
public equals(other: languages.IState): boolean {
216
if (!(other instanceof MonarchLineState)) {
217
return false;
218
}
219
if (!this.stack.equals(other.stack)) {
220
return false;
221
}
222
if (this.embeddedLanguageData === null && other.embeddedLanguageData === null) {
223
return true;
224
}
225
if (this.embeddedLanguageData === null || other.embeddedLanguageData === null) {
226
return false;
227
}
228
return this.embeddedLanguageData.equals(other.embeddedLanguageData);
229
}
230
}
231
232
interface IMonarchTokensCollector {
233
enterLanguage(languageId: string): void;
234
emit(startOffset: number, type: string): void;
235
nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState;
236
}
237
238
class MonarchClassicTokensCollector implements IMonarchTokensCollector {
239
240
private _tokens: languages.Token[];
241
private _languageId: string | null;
242
private _lastTokenType: string | null;
243
private _lastTokenLanguage: string | null;
244
245
constructor() {
246
this._tokens = [];
247
this._languageId = null;
248
this._lastTokenType = null;
249
this._lastTokenLanguage = null;
250
}
251
252
public enterLanguage(languageId: string): void {
253
this._languageId = languageId;
254
}
255
256
public emit(startOffset: number, type: string): void {
257
if (this._lastTokenType === type && this._lastTokenLanguage === this._languageId) {
258
return;
259
}
260
this._lastTokenType = type;
261
this._lastTokenLanguage = this._languageId;
262
this._tokens.push(new languages.Token(startOffset, type, this._languageId!));
263
}
264
265
public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {
266
const nestedLanguageId = embeddedLanguageData.languageId;
267
const embeddedModeState = embeddedLanguageData.state;
268
269
const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
270
if (!nestedLanguageTokenizationSupport) {
271
this.enterLanguage(nestedLanguageId);
272
this.emit(offsetDelta, '');
273
return embeddedModeState;
274
}
275
276
const nestedResult = nestedLanguageTokenizationSupport.tokenize(embeddedLanguageLine, hasEOL, embeddedModeState);
277
if (offsetDelta !== 0) {
278
for (const token of nestedResult.tokens) {
279
this._tokens.push(new languages.Token(token.offset + offsetDelta, token.type, token.language));
280
}
281
} else {
282
this._tokens = this._tokens.concat(nestedResult.tokens);
283
}
284
this._lastTokenType = null;
285
this._lastTokenLanguage = null;
286
this._languageId = null;
287
return nestedResult.endState;
288
}
289
290
public finalize(endState: MonarchLineState): languages.TokenizationResult {
291
return new languages.TokenizationResult(this._tokens, endState);
292
}
293
}
294
295
class MonarchModernTokensCollector implements IMonarchTokensCollector {
296
297
private readonly _languageService: ILanguageService;
298
private readonly _theme: TokenTheme;
299
private _prependTokens: Uint32Array | null;
300
private _tokens: number[];
301
private _currentLanguageId: LanguageId;
302
private _lastTokenMetadata: number;
303
304
constructor(languageService: ILanguageService, theme: TokenTheme) {
305
this._languageService = languageService;
306
this._theme = theme;
307
this._prependTokens = null;
308
this._tokens = [];
309
this._currentLanguageId = LanguageId.Null;
310
this._lastTokenMetadata = 0;
311
}
312
313
public enterLanguage(languageId: string): void {
314
this._currentLanguageId = this._languageService.languageIdCodec.encodeLanguageId(languageId);
315
}
316
317
public emit(startOffset: number, type: string): void {
318
const metadata = this._theme.match(this._currentLanguageId, type) | MetadataConsts.BALANCED_BRACKETS_MASK;
319
if (this._lastTokenMetadata === metadata) {
320
return;
321
}
322
this._lastTokenMetadata = metadata;
323
this._tokens.push(startOffset);
324
this._tokens.push(metadata);
325
}
326
327
private static _merge(a: Uint32Array | null, b: number[], c: Uint32Array | null): Uint32Array {
328
const aLen = (a !== null ? a.length : 0);
329
const bLen = b.length;
330
const cLen = (c !== null ? c.length : 0);
331
332
if (aLen === 0 && bLen === 0 && cLen === 0) {
333
return new Uint32Array(0);
334
}
335
if (aLen === 0 && bLen === 0) {
336
return c!;
337
}
338
if (bLen === 0 && cLen === 0) {
339
return a!;
340
}
341
342
const result = new Uint32Array(aLen + bLen + cLen);
343
if (a !== null) {
344
result.set(a);
345
}
346
for (let i = 0; i < bLen; i++) {
347
result[aLen + i] = b[i];
348
}
349
if (c !== null) {
350
result.set(c, aLen + bLen);
351
}
352
return result;
353
}
354
355
public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {
356
const nestedLanguageId = embeddedLanguageData.languageId;
357
const embeddedModeState = embeddedLanguageData.state;
358
359
const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
360
if (!nestedLanguageTokenizationSupport) {
361
this.enterLanguage(nestedLanguageId);
362
this.emit(offsetDelta, '');
363
return embeddedModeState;
364
}
365
366
const nestedResult = nestedLanguageTokenizationSupport.tokenizeEncoded(embeddedLanguageLine, hasEOL, embeddedModeState);
367
if (offsetDelta !== 0) {
368
for (let i = 0, len = nestedResult.tokens.length; i < len; i += 2) {
369
nestedResult.tokens[i] += offsetDelta;
370
}
371
}
372
373
this._prependTokens = MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, nestedResult.tokens);
374
this._tokens = [];
375
this._currentLanguageId = 0;
376
this._lastTokenMetadata = 0;
377
return nestedResult.endState;
378
}
379
380
public finalize(endState: MonarchLineState): languages.EncodedTokenizationResult {
381
return new languages.EncodedTokenizationResult(
382
MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, null),
383
endState
384
);
385
}
386
}
387
388
export type ILoadStatus = { loaded: true } | { loaded: false; promise: Promise<void> };
389
390
export class MonarchTokenizer extends Disposable implements languages.ITokenizationSupport, IDisposable {
391
392
private readonly _languageService: ILanguageService;
393
private readonly _standaloneThemeService: IStandaloneThemeService;
394
private readonly _languageId: string;
395
private readonly _lexer: monarchCommon.ILexer;
396
private readonly _embeddedLanguages: { [languageId: string]: boolean };
397
public embeddedLoaded: Promise<void>;
398
private _maxTokenizationLineLength: number;
399
400
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
401
super();
402
this._languageService = languageService;
403
this._standaloneThemeService = standaloneThemeService;
404
this._languageId = languageId;
405
this._lexer = lexer;
406
this._embeddedLanguages = Object.create(null);
407
this.embeddedLoaded = Promise.resolve(undefined);
408
409
// Set up listening for embedded modes
410
let emitting = false;
411
this._register(languages.TokenizationRegistry.onDidChange((e) => {
412
if (emitting) {
413
return;
414
}
415
let isOneOfMyEmbeddedModes = false;
416
for (let i = 0, len = e.changedLanguages.length; i < len; i++) {
417
const language = e.changedLanguages[i];
418
if (this._embeddedLanguages[language]) {
419
isOneOfMyEmbeddedModes = true;
420
break;
421
}
422
}
423
if (isOneOfMyEmbeddedModes) {
424
emitting = true;
425
languages.TokenizationRegistry.handleChange([this._languageId]);
426
emitting = false;
427
}
428
}));
429
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
430
overrideIdentifier: this._languageId
431
});
432
this._register(this._configurationService.onDidChangeConfiguration(e => {
433
if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
434
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
435
overrideIdentifier: this._languageId
436
});
437
}
438
}));
439
}
440
441
public getLoadStatus(): ILoadStatus {
442
const promises: Thenable<any>[] = [];
443
for (const nestedLanguageId in this._embeddedLanguages) {
444
const tokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
445
if (tokenizationSupport) {
446
// The nested language is already loaded
447
if (tokenizationSupport instanceof MonarchTokenizer) {
448
const nestedModeStatus = tokenizationSupport.getLoadStatus();
449
if (nestedModeStatus.loaded === false) {
450
promises.push(nestedModeStatus.promise);
451
}
452
}
453
continue;
454
}
455
456
if (!languages.TokenizationRegistry.isResolved(nestedLanguageId)) {
457
// The nested language is in the process of being loaded
458
promises.push(languages.TokenizationRegistry.getOrCreate(nestedLanguageId));
459
}
460
}
461
462
if (promises.length === 0) {
463
return {
464
loaded: true
465
};
466
}
467
return {
468
loaded: false,
469
promise: Promise.all(promises).then(_ => undefined)
470
};
471
}
472
473
public getInitialState(): languages.IState {
474
const rootState = MonarchStackElementFactory.create(null, this._lexer.start!);
475
return MonarchLineStateFactory.create(rootState, null);
476
}
477
478
public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
479
if (line.length >= this._maxTokenizationLineLength) {
480
return nullTokenize(this._languageId, lineState);
481
}
482
const tokensCollector = new MonarchClassicTokensCollector();
483
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
484
return tokensCollector.finalize(endLineState);
485
}
486
487
public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
488
if (line.length >= this._maxTokenizationLineLength) {
489
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
490
}
491
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
492
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
493
return tokensCollector.finalize(endLineState);
494
}
495
496
private _tokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, collector: IMonarchTokensCollector): MonarchLineState {
497
if (lineState.embeddedLanguageData) {
498
return this._nestedTokenize(line, hasEOL, lineState, 0, collector);
499
} else {
500
return this._myTokenize(line, hasEOL, lineState, 0, collector);
501
}
502
}
503
504
private _findLeavingNestedLanguageOffset(line: string, state: MonarchLineState): number {
505
let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state.stack.state];
506
if (!rules) {
507
rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching
508
if (!rules) {
509
throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);
510
}
511
}
512
513
let popOffset = -1;
514
let hasEmbeddedPopRule = false;
515
516
for (const rule of rules) {
517
if (!monarchCommon.isIAction(rule.action) || !(rule.action.nextEmbedded === '@pop' || rule.action.hasEmbeddedEndInCases)) {
518
continue;
519
}
520
hasEmbeddedPopRule = true;
521
522
let regex = rule.resolveRegex(state.stack.state);
523
const regexSource = regex.source;
524
if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {
525
const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');
526
regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);
527
}
528
529
const result = line.search(regex);
530
if (result === -1 || (result !== 0 && rule.matchOnlyAtLineStart)) {
531
continue;
532
}
533
534
if (popOffset === -1 || result < popOffset) {
535
popOffset = result;
536
}
537
}
538
539
if (!hasEmbeddedPopRule) {
540
throw monarchCommon.createError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);
541
}
542
543
return popOffset;
544
}
545
546
private _nestedTokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
547
548
const popOffset = this._findLeavingNestedLanguageOffset(line, lineState);
549
550
if (popOffset === -1) {
551
// tokenization will not leave nested language
552
const nestedEndState = tokensCollector.nestedLanguageTokenize(line, hasEOL, lineState.embeddedLanguageData!, offsetDelta);
553
return MonarchLineStateFactory.create(lineState.stack, new EmbeddedLanguageData(lineState.embeddedLanguageData!.languageId, nestedEndState));
554
}
555
556
const nestedLanguageLine = line.substring(0, popOffset);
557
if (nestedLanguageLine.length > 0) {
558
// tokenize with the nested language
559
tokensCollector.nestedLanguageTokenize(nestedLanguageLine, false, lineState.embeddedLanguageData!, offsetDelta);
560
}
561
562
const restOfTheLine = line.substring(popOffset);
563
return this._myTokenize(restOfTheLine, hasEOL, lineState, offsetDelta + popOffset, tokensCollector);
564
}
565
566
private _safeRuleName(rule: monarchCommon.IRule | null): string {
567
if (rule) {
568
return rule.name;
569
}
570
return '(unknown)';
571
}
572
573
private _myTokenize(lineWithoutLF: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
574
tokensCollector.enterLanguage(this._languageId);
575
576
const lineWithoutLFLength = lineWithoutLF.length;
577
const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);
578
const lineLength = line.length;
579
580
let embeddedLanguageData = lineState.embeddedLanguageData;
581
let stack = lineState.stack;
582
let pos = 0;
583
584
// regular expression group matching
585
// these never need cloning or equality since they are only used within a line match
586
interface GroupMatching {
587
matches: string[];
588
rule: monarchCommon.IRule | null;
589
groups: { action: monarchCommon.FuzzyAction; matched: string }[];
590
}
591
let groupMatching: GroupMatching | null = null;
592
593
// See https://github.com/microsoft/monaco-editor/issues/1235
594
// Evaluate rules at least once for an empty line
595
let forceEvaluation = true;
596
597
while (forceEvaluation || pos < lineLength) {
598
599
const pos0 = pos;
600
const stackLen0 = stack.depth;
601
const groupLen0 = groupMatching ? groupMatching.groups.length : 0;
602
const state = stack.state;
603
604
let matches: string[] | null = null;
605
let matched: string | null = null;
606
let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
607
let rule: monarchCommon.IRule | null = null;
608
609
let enteringEmbeddedLanguage: string | null = null;
610
611
// check if we need to process group matches first
612
if (groupMatching) {
613
matches = groupMatching.matches;
614
const groupEntry = groupMatching.groups.shift()!;
615
matched = groupEntry.matched;
616
action = groupEntry.action;
617
rule = groupMatching.rule;
618
619
// cleanup if necessary
620
if (groupMatching.groups.length === 0) {
621
groupMatching = null;
622
}
623
} else {
624
// otherwise we match on the token stream
625
626
if (!forceEvaluation && pos >= lineLength) {
627
// nothing to do
628
break;
629
}
630
631
forceEvaluation = false;
632
633
// get the rules for this state
634
let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state];
635
if (!rules) {
636
rules = monarchCommon.findRules(this._lexer, state); // do parent matching
637
if (!rules) {
638
throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);
639
}
640
}
641
642
// try each rule until we match
643
const restOfLine = line.substr(pos);
644
for (const rule of rules) {
645
if (pos === 0 || !rule.matchOnlyAtLineStart) {
646
matches = restOfLine.match(rule.resolveRegex(state));
647
if (matches) {
648
matched = matches[0];
649
action = rule.action;
650
break;
651
}
652
}
653
}
654
}
655
656
// We matched 'rule' with 'matches' and 'action'
657
if (!matches) {
658
matches = [''];
659
matched = '';
660
}
661
662
if (!action) {
663
// bad: we didn't match anything, and there is no action to take
664
// we need to advance the stream or we get progress trouble
665
if (pos < lineLength) {
666
matches = [line.charAt(pos)];
667
matched = matches[0];
668
}
669
action = this._lexer.defaultToken;
670
}
671
672
if (matched === null) {
673
// should never happen, needed for strict null checking
674
break;
675
}
676
677
// advance stream
678
pos += matched.length;
679
680
// maybe call action function (used for 'cases')
681
while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {
682
action = action.test(matched, matches, state, pos === lineLength);
683
}
684
685
let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
686
// set the result: either a string or an array of actions
687
if (typeof action === 'string' || Array.isArray(action)) {
688
result = action;
689
} else if (action.group) {
690
result = action.group;
691
} else if (action.token !== null && action.token !== undefined) {
692
693
// do $n replacements?
694
if (action.tokenSubst) {
695
result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);
696
} else {
697
result = action.token;
698
}
699
700
// enter embedded language?
701
if (action.nextEmbedded) {
702
if (action.nextEmbedded === '@pop') {
703
if (!embeddedLanguageData) {
704
throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');
705
}
706
embeddedLanguageData = null;
707
} else if (embeddedLanguageData) {
708
throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');
709
} else {
710
enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);
711
}
712
}
713
714
// state transformations
715
if (action.goBack) { // back up the stream..
716
pos = Math.max(0, pos - action.goBack);
717
}
718
719
if (action.switchTo && typeof action.switchTo === 'string') {
720
let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...
721
if (nextState[0] === '@') {
722
nextState = nextState.substr(1); // peel off starting '@'
723
}
724
if (!monarchCommon.findRules(this._lexer, nextState)) {
725
throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
726
} else {
727
stack = stack.switchTo(nextState);
728
}
729
} else if (action.transform && typeof action.transform === 'function') {
730
throw monarchCommon.createError(this._lexer, 'action.transform not supported');
731
} else if (action.next) {
732
if (action.next === '@push') {
733
if (stack.depth >= this._lexer.maxStack) {
734
throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +
735
stack.state + ',' + stack.parent!.state + ',...]');
736
} else {
737
stack = stack.push(state);
738
}
739
} else if (action.next === '@pop') {
740
if (stack.depth <= 1) {
741
throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));
742
} else {
743
stack = stack.pop()!;
744
}
745
} else if (action.next === '@popall') {
746
stack = stack.popall();
747
} else {
748
let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);
749
if (nextState[0] === '@') {
750
nextState = nextState.substr(1); // peel off starting '@'
751
}
752
753
if (!monarchCommon.findRules(this._lexer, nextState)) {
754
throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
755
} else {
756
stack = stack.push(nextState);
757
}
758
}
759
}
760
761
if (action.log && typeof (action.log) === 'string') {
762
monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));
763
}
764
}
765
766
// check result
767
if (result === null) {
768
throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));
769
}
770
771
const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage: string) => {
772
// support language names, mime types, and language ids
773
const languageId = (
774
this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)
775
|| this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)
776
|| enteringEmbeddedLanguage
777
);
778
779
const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);
780
781
if (pos < lineLength) {
782
// there is content from the embedded language on this line
783
const restOfLine = lineWithoutLF.substr(pos);
784
return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);
785
} else {
786
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
787
}
788
};
789
790
// is the result a group match?
791
if (Array.isArray(result)) {
792
if (groupMatching && groupMatching.groups.length > 0) {
793
throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));
794
}
795
if (matches.length !== result.length + 1) {
796
throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));
797
}
798
let totalLen = 0;
799
for (let i = 1; i < matches.length; i++) {
800
totalLen += matches[i].length;
801
}
802
if (totalLen !== matched.length) {
803
throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));
804
}
805
806
groupMatching = {
807
rule: rule,
808
matches: matches,
809
groups: []
810
};
811
for (let i = 0; i < result.length; i++) {
812
groupMatching.groups[i] = {
813
action: result[i],
814
matched: matches[i + 1]
815
};
816
}
817
818
pos -= matched.length;
819
// call recursively to initiate first result match
820
continue;
821
} else {
822
// regular result
823
824
// check for '@rematch'
825
if (result === '@rematch') {
826
pos -= matched.length;
827
matched = ''; // better set the next state too..
828
matches = null;
829
result = '';
830
831
// Even though `@rematch` was specified, if `nextEmbedded` also specified,
832
// a state transition should occur.
833
if (enteringEmbeddedLanguage !== null) {
834
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
835
}
836
}
837
838
// check progress
839
if (matched.length === 0) {
840
if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {
841
continue;
842
} else {
843
throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));
844
}
845
}
846
847
// return the result (and check for brace matching)
848
// todo: for efficiency we could pre-sanitize tokenPostfix and substitutions
849
let tokenType: string | null = null;
850
if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {
851
const rest = result.substr('@brackets'.length);
852
const bracket = findBracket(this._lexer, matched);
853
if (!bracket) {
854
throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);
855
}
856
tokenType = monarchCommon.sanitize(bracket.token + rest);
857
} else {
858
const token = (result === '' ? '' : result + this._lexer.tokenPostfix);
859
tokenType = monarchCommon.sanitize(token);
860
}
861
862
if (pos0 < lineWithoutLFLength) {
863
tokensCollector.emit(pos0 + offsetDelta, tokenType);
864
}
865
}
866
867
if (enteringEmbeddedLanguage !== null) {
868
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
869
}
870
}
871
872
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
873
}
874
875
private _getNestedEmbeddedLanguageData(languageId: string): EmbeddedLanguageData {
876
if (!this._languageService.isRegisteredLanguageId(languageId)) {
877
return new EmbeddedLanguageData(languageId, NullState);
878
}
879
880
if (languageId !== this._languageId) {
881
// Fire language loading event
882
this._languageService.requestBasicLanguageFeatures(languageId);
883
languages.TokenizationRegistry.getOrCreate(languageId);
884
this._embeddedLanguages[languageId] = true;
885
}
886
887
const tokenizationSupport = languages.TokenizationRegistry.get(languageId);
888
if (tokenizationSupport) {
889
return new EmbeddedLanguageData(languageId, tokenizationSupport.getInitialState());
890
}
891
892
return new EmbeddedLanguageData(languageId, NullState);
893
}
894
}
895
896
/**
897
* Searches for a bracket in the 'brackets' attribute that matches the input.
898
*/
899
function findBracket(lexer: monarchCommon.ILexer, matched: string) {
900
if (!matched) {
901
return null;
902
}
903
matched = monarchCommon.fixCase(lexer, matched);
904
905
const brackets = lexer.brackets;
906
for (const bracket of brackets) {
907
if (bracket.open === matched) {
908
return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open };
909
}
910
else if (bracket.close === matched) {
911
return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close };
912
}
913
}
914
return null;
915
}
916
917