Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/standalone/common/monarch/monarchLexer.ts
5282 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
/**
7
* Create a syntax highighter with a fully declarative JSON style lexer description
8
* using regular expressions.
9
*/
10
11
import { Disposable, IDisposable } from '../../../../base/common/lifecycle.js';
12
import * as languages from '../../../common/languages.js';
13
import { NullState, nullTokenizeEncoded, nullTokenize } from '../../../common/languages/nullTokenize.js';
14
import { TokenTheme } from '../../../common/languages/supports/tokenization.js';
15
import { ILanguageService } from '../../../common/languages/language.js';
16
import * as monarchCommon from './monarchCommon.js';
17
import { IStandaloneThemeService } from '../standaloneTheme.js';
18
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
19
import { LanguageId, MetadataConsts } from '../../../common/encodedTokenAttributes.js';
20
21
const CACHE_STACK_DEPTH = 5;
22
23
/**
24
* Reuse the same stack elements up to a certain depth.
25
*/
26
class MonarchStackElementFactory {
27
28
private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);
29
public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
30
return this._INSTANCE.create(parent, state);
31
}
32
33
private readonly _maxCacheDepth: number;
34
private readonly _entries: { [stackElementId: string]: MonarchStackElement };
35
36
constructor(maxCacheDepth: number) {
37
this._maxCacheDepth = maxCacheDepth;
38
this._entries = Object.create(null);
39
}
40
41
public create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
42
if (parent !== null && parent.depth >= this._maxCacheDepth) {
43
// no caching above a certain depth
44
return new MonarchStackElement(parent, state);
45
}
46
let stackElementId = MonarchStackElement.getStackElementId(parent);
47
if (stackElementId.length > 0) {
48
stackElementId += '|';
49
}
50
stackElementId += state;
51
52
let result = this._entries[stackElementId];
53
if (result) {
54
return result;
55
}
56
result = new MonarchStackElement(parent, state);
57
this._entries[stackElementId] = result;
58
return result;
59
}
60
}
61
62
class MonarchStackElement {
63
64
public readonly parent: MonarchStackElement | null;
65
public readonly state: string;
66
public readonly depth: number;
67
68
constructor(parent: MonarchStackElement | null, state: string) {
69
this.parent = parent;
70
this.state = state;
71
this.depth = (this.parent ? this.parent.depth : 0) + 1;
72
}
73
74
public static getStackElementId(element: MonarchStackElement | null): string {
75
let result = '';
76
while (element !== null) {
77
if (result.length > 0) {
78
result += '|';
79
}
80
result += element.state;
81
element = element.parent;
82
}
83
return result;
84
}
85
86
private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean {
87
while (a !== null && b !== null) {
88
if (a === b) {
89
return true;
90
}
91
if (a.state !== b.state) {
92
return false;
93
}
94
a = a.parent;
95
b = b.parent;
96
}
97
if (a === null && b === null) {
98
return true;
99
}
100
return false;
101
}
102
103
public equals(other: MonarchStackElement): boolean {
104
return MonarchStackElement._equals(this, other);
105
}
106
107
public push(state: string): MonarchStackElement {
108
return MonarchStackElementFactory.create(this, state);
109
}
110
111
public pop(): MonarchStackElement | null {
112
return this.parent;
113
}
114
115
public popall(): MonarchStackElement {
116
let result: MonarchStackElement = this;
117
while (result.parent) {
118
result = result.parent;
119
}
120
return result;
121
}
122
123
public switchTo(state: string): MonarchStackElement {
124
return MonarchStackElementFactory.create(this.parent, state);
125
}
126
}
127
128
class EmbeddedLanguageData {
129
public readonly languageId: string;
130
public readonly state: languages.IState;
131
132
constructor(languageId: string, state: languages.IState) {
133
this.languageId = languageId;
134
this.state = state;
135
}
136
137
public equals(other: EmbeddedLanguageData): boolean {
138
return (
139
this.languageId === other.languageId
140
&& this.state.equals(other.state)
141
);
142
}
143
144
public clone(): EmbeddedLanguageData {
145
const stateClone = this.state.clone();
146
// save an object
147
if (stateClone === this.state) {
148
return this;
149
}
150
return new EmbeddedLanguageData(this.languageId, this.state);
151
}
152
}
153
154
/**
155
* Reuse the same line states up to a certain depth.
156
*/
157
class MonarchLineStateFactory {
158
159
private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);
160
public static create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {
161
return this._INSTANCE.create(stack, embeddedLanguageData);
162
}
163
164
private readonly _maxCacheDepth: number;
165
private readonly _entries: { [stackElementId: string]: MonarchLineState };
166
167
constructor(maxCacheDepth: number) {
168
this._maxCacheDepth = maxCacheDepth;
169
this._entries = Object.create(null);
170
}
171
172
public create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {
173
if (embeddedLanguageData !== null) {
174
// no caching when embedding
175
return new MonarchLineState(stack, embeddedLanguageData);
176
}
177
if (stack !== null && stack.depth >= this._maxCacheDepth) {
178
// no caching above a certain depth
179
return new MonarchLineState(stack, embeddedLanguageData);
180
}
181
const stackElementId = MonarchStackElement.getStackElementId(stack);
182
183
let result = this._entries[stackElementId];
184
if (result) {
185
return result;
186
}
187
result = new MonarchLineState(stack, null);
188
this._entries[stackElementId] = result;
189
return result;
190
}
191
}
192
193
class MonarchLineState implements languages.IState {
194
195
public readonly stack: MonarchStackElement;
196
public readonly embeddedLanguageData: EmbeddedLanguageData | null;
197
198
constructor(
199
stack: MonarchStackElement,
200
embeddedLanguageData: EmbeddedLanguageData | null
201
) {
202
this.stack = stack;
203
this.embeddedLanguageData = embeddedLanguageData;
204
}
205
206
public clone(): languages.IState {
207
const embeddedlanguageDataClone = this.embeddedLanguageData ? this.embeddedLanguageData.clone() : null;
208
// save an object
209
if (embeddedlanguageDataClone === this.embeddedLanguageData) {
210
return this;
211
}
212
return MonarchLineStateFactory.create(this.stack, this.embeddedLanguageData);
213
}
214
215
public equals(other: languages.IState): boolean {
216
if (!(other instanceof MonarchLineState)) {
217
return false;
218
}
219
if (!this.stack.equals(other.stack)) {
220
return false;
221
}
222
if (this.embeddedLanguageData === null && other.embeddedLanguageData === null) {
223
return true;
224
}
225
if (this.embeddedLanguageData === null || other.embeddedLanguageData === null) {
226
return false;
227
}
228
return this.embeddedLanguageData.equals(other.embeddedLanguageData);
229
}
230
}
231
232
interface IMonarchTokensCollector {
233
enterLanguage(languageId: string): void;
234
emit(startOffset: number, type: string): void;
235
nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState;
236
}
237
238
class MonarchClassicTokensCollector implements IMonarchTokensCollector {
239
240
private _tokens: languages.Token[];
241
private _languageId: string | null;
242
private _lastTokenType: string | null;
243
private _lastTokenLanguage: string | null;
244
245
constructor() {
246
this._tokens = [];
247
this._languageId = null;
248
this._lastTokenType = null;
249
this._lastTokenLanguage = null;
250
}
251
252
public enterLanguage(languageId: string): void {
253
this._languageId = languageId;
254
}
255
256
public emit(startOffset: number, type: string): void {
257
if (this._lastTokenType === type && this._lastTokenLanguage === this._languageId) {
258
return;
259
}
260
this._lastTokenType = type;
261
this._lastTokenLanguage = this._languageId;
262
this._tokens.push(new languages.Token(startOffset, type, this._languageId!));
263
}
264
265
public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {
266
const nestedLanguageId = embeddedLanguageData.languageId;
267
const embeddedModeState = embeddedLanguageData.state;
268
269
const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
270
if (!nestedLanguageTokenizationSupport) {
271
this.enterLanguage(nestedLanguageId);
272
this.emit(offsetDelta, '');
273
return embeddedModeState;
274
}
275
276
const nestedResult = nestedLanguageTokenizationSupport.tokenize(embeddedLanguageLine, hasEOL, embeddedModeState);
277
if (offsetDelta !== 0) {
278
for (const token of nestedResult.tokens) {
279
this._tokens.push(new languages.Token(token.offset + offsetDelta, token.type, token.language));
280
}
281
} else {
282
this._tokens = this._tokens.concat(nestedResult.tokens);
283
}
284
this._lastTokenType = null;
285
this._lastTokenLanguage = null;
286
this._languageId = null;
287
return nestedResult.endState;
288
}
289
290
public finalize(endState: MonarchLineState): languages.TokenizationResult {
291
return new languages.TokenizationResult(this._tokens, endState);
292
}
293
}
294
295
class MonarchModernTokensCollector implements IMonarchTokensCollector {
296
297
private readonly _languageService: ILanguageService;
298
private readonly _theme: TokenTheme;
299
private _prependTokens: Uint32Array | null;
300
private _tokens: number[];
301
private _currentLanguageId: LanguageId;
302
private _lastTokenMetadata: number;
303
304
constructor(languageService: ILanguageService, theme: TokenTheme) {
305
this._languageService = languageService;
306
this._theme = theme;
307
this._prependTokens = null;
308
this._tokens = [];
309
this._currentLanguageId = LanguageId.Null;
310
this._lastTokenMetadata = 0;
311
}
312
313
public enterLanguage(languageId: string): void {
314
this._currentLanguageId = this._languageService.languageIdCodec.encodeLanguageId(languageId);
315
}
316
317
public emit(startOffset: number, type: string): void {
318
const metadata = this._theme.match(this._currentLanguageId, type) | MetadataConsts.BALANCED_BRACKETS_MASK;
319
if (this._lastTokenMetadata === metadata) {
320
return;
321
}
322
this._lastTokenMetadata = metadata;
323
this._tokens.push(startOffset);
324
this._tokens.push(metadata);
325
}
326
327
private static _merge(a: Uint32Array | null, b: number[], c: Uint32Array | null): Uint32Array {
328
const aLen = (a !== null ? a.length : 0);
329
const bLen = b.length;
330
const cLen = (c !== null ? c.length : 0);
331
332
if (aLen === 0 && bLen === 0 && cLen === 0) {
333
return new Uint32Array(0);
334
}
335
if (aLen === 0 && bLen === 0) {
336
return c!;
337
}
338
if (bLen === 0 && cLen === 0) {
339
return a!;
340
}
341
342
const result = new Uint32Array(aLen + bLen + cLen);
343
if (a !== null) {
344
result.set(a);
345
}
346
for (let i = 0; i < bLen; i++) {
347
result[aLen + i] = b[i];
348
}
349
if (c !== null) {
350
result.set(c, aLen + bLen);
351
}
352
return result;
353
}
354
355
public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {
356
const nestedLanguageId = embeddedLanguageData.languageId;
357
const embeddedModeState = embeddedLanguageData.state;
358
359
const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
360
if (!nestedLanguageTokenizationSupport) {
361
this.enterLanguage(nestedLanguageId);
362
this.emit(offsetDelta, '');
363
return embeddedModeState;
364
}
365
366
const nestedResult = nestedLanguageTokenizationSupport.tokenizeEncoded(embeddedLanguageLine, hasEOL, embeddedModeState);
367
if (offsetDelta !== 0) {
368
for (let i = 0, len = nestedResult.tokens.length; i < len; i += 2) {
369
nestedResult.tokens[i] += offsetDelta;
370
}
371
}
372
373
this._prependTokens = MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, nestedResult.tokens);
374
this._tokens = [];
375
this._currentLanguageId = 0;
376
this._lastTokenMetadata = 0;
377
return nestedResult.endState;
378
}
379
380
public finalize(endState: MonarchLineState): languages.EncodedTokenizationResult {
381
return new languages.EncodedTokenizationResult(
382
MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, null),
383
[],
384
endState
385
);
386
}
387
}
388
389
export type ILoadStatus = { loaded: true } | { loaded: false; promise: Promise<void> };
390
391
export class MonarchTokenizer extends Disposable implements languages.ITokenizationSupport, IDisposable {
392
393
private readonly _languageService: ILanguageService;
394
private readonly _standaloneThemeService: IStandaloneThemeService;
395
private readonly _languageId: string;
396
private readonly _lexer: monarchCommon.ILexer;
397
private readonly _embeddedLanguages: { [languageId: string]: boolean };
398
public embeddedLoaded: Promise<void>;
399
private _maxTokenizationLineLength: number;
400
401
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
402
super();
403
this._languageService = languageService;
404
this._standaloneThemeService = standaloneThemeService;
405
this._languageId = languageId;
406
this._lexer = lexer;
407
this._embeddedLanguages = Object.create(null);
408
this.embeddedLoaded = Promise.resolve(undefined);
409
410
// Set up listening for embedded modes
411
let emitting = false;
412
this._register(languages.TokenizationRegistry.onDidChange((e) => {
413
if (emitting) {
414
return;
415
}
416
let isOneOfMyEmbeddedModes = false;
417
for (let i = 0, len = e.changedLanguages.length; i < len; i++) {
418
const language = e.changedLanguages[i];
419
if (this._embeddedLanguages[language]) {
420
isOneOfMyEmbeddedModes = true;
421
break;
422
}
423
}
424
if (isOneOfMyEmbeddedModes) {
425
emitting = true;
426
languages.TokenizationRegistry.handleChange([this._languageId]);
427
emitting = false;
428
}
429
}));
430
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
431
overrideIdentifier: this._languageId
432
});
433
this._register(this._configurationService.onDidChangeConfiguration(e => {
434
if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
435
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
436
overrideIdentifier: this._languageId
437
});
438
}
439
}));
440
}
441
442
public getLoadStatus(): ILoadStatus {
443
const promises: Thenable<any>[] = [];
444
for (const nestedLanguageId in this._embeddedLanguages) {
445
const tokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
446
if (tokenizationSupport) {
447
// The nested language is already loaded
448
if (tokenizationSupport instanceof MonarchTokenizer) {
449
const nestedModeStatus = tokenizationSupport.getLoadStatus();
450
if (nestedModeStatus.loaded === false) {
451
promises.push(nestedModeStatus.promise);
452
}
453
}
454
continue;
455
}
456
457
if (!languages.TokenizationRegistry.isResolved(nestedLanguageId)) {
458
// The nested language is in the process of being loaded
459
promises.push(languages.TokenizationRegistry.getOrCreate(nestedLanguageId));
460
}
461
}
462
463
if (promises.length === 0) {
464
return {
465
loaded: true
466
};
467
}
468
return {
469
loaded: false,
470
promise: Promise.all(promises).then(_ => undefined)
471
};
472
}
473
474
public getInitialState(): languages.IState {
475
const rootState = MonarchStackElementFactory.create(null, this._lexer.start!);
476
return MonarchLineStateFactory.create(rootState, null);
477
}
478
479
public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
480
if (line.length >= this._maxTokenizationLineLength) {
481
return nullTokenize(this._languageId, lineState);
482
}
483
const tokensCollector = new MonarchClassicTokensCollector();
484
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
485
return tokensCollector.finalize(endLineState);
486
}
487
488
public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
489
if (line.length >= this._maxTokenizationLineLength) {
490
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
491
}
492
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
493
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
494
return tokensCollector.finalize(endLineState);
495
}
496
497
private _tokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, collector: IMonarchTokensCollector): MonarchLineState {
498
if (lineState.embeddedLanguageData) {
499
return this._nestedTokenize(line, hasEOL, lineState, 0, collector);
500
} else {
501
return this._myTokenize(line, hasEOL, lineState, 0, collector);
502
}
503
}
504
505
private _findLeavingNestedLanguageOffset(line: string, state: MonarchLineState): number {
506
let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state.stack.state];
507
if (!rules) {
508
rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching
509
if (!rules) {
510
throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);
511
}
512
}
513
514
let popOffset = -1;
515
let hasEmbeddedPopRule = false;
516
517
for (const rule of rules) {
518
if (!monarchCommon.isIAction(rule.action) || !(rule.action.nextEmbedded === '@pop' || rule.action.hasEmbeddedEndInCases)) {
519
continue;
520
}
521
hasEmbeddedPopRule = true;
522
523
let regex = rule.resolveRegex(state.stack.state);
524
const regexSource = regex.source;
525
if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {
526
const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');
527
regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);
528
}
529
530
const result = line.search(regex);
531
if (result === -1 || (result !== 0 && rule.matchOnlyAtLineStart)) {
532
continue;
533
}
534
535
if (popOffset === -1 || result < popOffset) {
536
popOffset = result;
537
}
538
}
539
540
if (!hasEmbeddedPopRule) {
541
throw monarchCommon.createError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);
542
}
543
544
return popOffset;
545
}
546
547
private _nestedTokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
548
549
const popOffset = this._findLeavingNestedLanguageOffset(line, lineState);
550
551
if (popOffset === -1) {
552
// tokenization will not leave nested language
553
const nestedEndState = tokensCollector.nestedLanguageTokenize(line, hasEOL, lineState.embeddedLanguageData!, offsetDelta);
554
return MonarchLineStateFactory.create(lineState.stack, new EmbeddedLanguageData(lineState.embeddedLanguageData!.languageId, nestedEndState));
555
}
556
557
const nestedLanguageLine = line.substring(0, popOffset);
558
if (nestedLanguageLine.length > 0) {
559
// tokenize with the nested language
560
tokensCollector.nestedLanguageTokenize(nestedLanguageLine, false, lineState.embeddedLanguageData!, offsetDelta);
561
}
562
563
const restOfTheLine = line.substring(popOffset);
564
return this._myTokenize(restOfTheLine, hasEOL, lineState, offsetDelta + popOffset, tokensCollector);
565
}
566
567
private _safeRuleName(rule: monarchCommon.IRule | null): string {
568
if (rule) {
569
return rule.name;
570
}
571
return '(unknown)';
572
}
573
574
private _myTokenize(lineWithoutLF: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
575
tokensCollector.enterLanguage(this._languageId);
576
577
const lineWithoutLFLength = lineWithoutLF.length;
578
const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);
579
const lineLength = line.length;
580
581
let embeddedLanguageData = lineState.embeddedLanguageData;
582
let stack = lineState.stack;
583
let pos = 0;
584
585
// regular expression group matching
586
// these never need cloning or equality since they are only used within a line match
587
interface GroupMatching {
588
matches: string[];
589
rule: monarchCommon.IRule | null;
590
groups: { action: monarchCommon.FuzzyAction; matched: string }[];
591
}
592
let groupMatching: GroupMatching | null = null;
593
594
// See https://github.com/microsoft/monaco-editor/issues/1235
595
// Evaluate rules at least once for an empty line
596
let forceEvaluation = true;
597
598
while (forceEvaluation || pos < lineLength) {
599
600
const pos0 = pos;
601
const stackLen0 = stack.depth;
602
const groupLen0 = groupMatching ? groupMatching.groups.length : 0;
603
const state = stack.state;
604
605
let matches: string[] | null = null;
606
let matched: string | null = null;
607
let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
608
let rule: monarchCommon.IRule | null = null;
609
610
let enteringEmbeddedLanguage: string | null = null;
611
612
// check if we need to process group matches first
613
if (groupMatching) {
614
matches = groupMatching.matches;
615
const groupEntry = groupMatching.groups.shift()!;
616
matched = groupEntry.matched;
617
action = groupEntry.action;
618
rule = groupMatching.rule;
619
620
// cleanup if necessary
621
if (groupMatching.groups.length === 0) {
622
groupMatching = null;
623
}
624
} else {
625
// otherwise we match on the token stream
626
627
if (!forceEvaluation && pos >= lineLength) {
628
// nothing to do
629
break;
630
}
631
632
forceEvaluation = false;
633
634
// get the rules for this state
635
let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state];
636
if (!rules) {
637
rules = monarchCommon.findRules(this._lexer, state); // do parent matching
638
if (!rules) {
639
throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);
640
}
641
}
642
643
// try each rule until we match
644
const restOfLine = line.substr(pos);
645
for (const rule of rules) {
646
if (pos === 0 || !rule.matchOnlyAtLineStart) {
647
matches = restOfLine.match(rule.resolveRegex(state));
648
if (matches) {
649
matched = matches[0];
650
action = rule.action;
651
break;
652
}
653
}
654
}
655
}
656
657
// We matched 'rule' with 'matches' and 'action'
658
if (!matches) {
659
matches = [''];
660
matched = '';
661
}
662
663
if (!action) {
664
// bad: we didn't match anything, and there is no action to take
665
// we need to advance the stream or we get progress trouble
666
if (pos < lineLength) {
667
matches = [line.charAt(pos)];
668
matched = matches[0];
669
}
670
action = this._lexer.defaultToken;
671
}
672
673
if (matched === null) {
674
// should never happen, needed for strict null checking
675
break;
676
}
677
678
// advance stream
679
pos += matched.length;
680
681
// maybe call action function (used for 'cases')
682
while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {
683
action = action.test(matched, matches, state, pos === lineLength);
684
}
685
686
let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
687
// set the result: either a string or an array of actions
688
if (typeof action === 'string' || Array.isArray(action)) {
689
result = action;
690
} else if (action.group) {
691
result = action.group;
692
} else if (action.token !== null && action.token !== undefined) {
693
694
// do $n replacements?
695
if (action.tokenSubst) {
696
result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);
697
} else {
698
result = action.token;
699
}
700
701
// enter embedded language?
702
if (action.nextEmbedded) {
703
if (action.nextEmbedded === '@pop') {
704
if (!embeddedLanguageData) {
705
throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');
706
}
707
embeddedLanguageData = null;
708
} else if (embeddedLanguageData) {
709
throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');
710
} else {
711
enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);
712
}
713
}
714
715
// state transformations
716
if (action.goBack) { // back up the stream..
717
pos = Math.max(0, pos - action.goBack);
718
}
719
720
if (action.switchTo && typeof action.switchTo === 'string') {
721
let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...
722
if (nextState[0] === '@') {
723
nextState = nextState.substr(1); // peel off starting '@'
724
}
725
if (!monarchCommon.findRules(this._lexer, nextState)) {
726
throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
727
} else {
728
stack = stack.switchTo(nextState);
729
}
730
} else if (action.transform && typeof action.transform === 'function') {
731
throw monarchCommon.createError(this._lexer, 'action.transform not supported');
732
} else if (action.next) {
733
if (action.next === '@push') {
734
if (stack.depth >= this._lexer.maxStack) {
735
throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +
736
stack.state + ',' + stack.parent!.state + ',...]');
737
} else {
738
stack = stack.push(state);
739
}
740
} else if (action.next === '@pop') {
741
if (stack.depth <= 1) {
742
throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));
743
} else {
744
stack = stack.pop()!;
745
}
746
} else if (action.next === '@popall') {
747
stack = stack.popall();
748
} else {
749
let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);
750
if (nextState[0] === '@') {
751
nextState = nextState.substr(1); // peel off starting '@'
752
}
753
754
if (!monarchCommon.findRules(this._lexer, nextState)) {
755
throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
756
} else {
757
stack = stack.push(nextState);
758
}
759
}
760
}
761
762
if (action.log && typeof (action.log) === 'string') {
763
monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));
764
}
765
}
766
767
// check result
768
if (result === null) {
769
throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));
770
}
771
772
const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage: string) => {
773
// support language names, mime types, and language ids
774
const languageId = (
775
this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)
776
|| this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)
777
|| enteringEmbeddedLanguage
778
);
779
780
const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);
781
782
if (pos < lineLength) {
783
// there is content from the embedded language on this line
784
const restOfLine = lineWithoutLF.substr(pos);
785
return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);
786
} else {
787
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
788
}
789
};
790
791
// is the result a group match?
792
if (Array.isArray(result)) {
793
if (groupMatching && groupMatching.groups.length > 0) {
794
throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));
795
}
796
if (matches.length !== result.length + 1) {
797
throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));
798
}
799
let totalLen = 0;
800
for (let i = 1; i < matches.length; i++) {
801
totalLen += matches[i].length;
802
}
803
if (totalLen !== matched.length) {
804
throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));
805
}
806
807
groupMatching = {
808
rule: rule,
809
matches: matches,
810
groups: []
811
};
812
for (let i = 0; i < result.length; i++) {
813
groupMatching.groups[i] = {
814
action: result[i],
815
matched: matches[i + 1]
816
};
817
}
818
819
pos -= matched.length;
820
// call recursively to initiate first result match
821
continue;
822
} else {
823
// regular result
824
825
// check for '@rematch'
826
if (result === '@rematch') {
827
pos -= matched.length;
828
matched = ''; // better set the next state too..
829
matches = null;
830
result = '';
831
832
// Even though `@rematch` was specified, if `nextEmbedded` also specified,
833
// a state transition should occur.
834
if (enteringEmbeddedLanguage !== null) {
835
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
836
}
837
}
838
839
// check progress
840
if (matched.length === 0) {
841
if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {
842
continue;
843
} else {
844
throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));
845
}
846
}
847
848
// return the result (and check for brace matching)
849
// todo: for efficiency we could pre-sanitize tokenPostfix and substitutions
850
let tokenType: string | null = null;
851
if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {
852
const rest = result.substr('@brackets'.length);
853
const bracket = findBracket(this._lexer, matched);
854
if (!bracket) {
855
throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);
856
}
857
tokenType = monarchCommon.sanitize(bracket.token + rest);
858
} else {
859
const token = (result === '' ? '' : result + this._lexer.tokenPostfix);
860
tokenType = monarchCommon.sanitize(token);
861
}
862
863
if (pos0 < lineWithoutLFLength) {
864
tokensCollector.emit(pos0 + offsetDelta, tokenType);
865
}
866
}
867
868
if (enteringEmbeddedLanguage !== null) {
869
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
870
}
871
}
872
873
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
874
}
875
876
private _getNestedEmbeddedLanguageData(languageId: string): EmbeddedLanguageData {
877
if (!this._languageService.isRegisteredLanguageId(languageId)) {
878
return new EmbeddedLanguageData(languageId, NullState);
879
}
880
881
if (languageId !== this._languageId) {
882
// Fire language loading event
883
this._languageService.requestBasicLanguageFeatures(languageId);
884
languages.TokenizationRegistry.getOrCreate(languageId);
885
this._embeddedLanguages[languageId] = true;
886
}
887
888
const tokenizationSupport = languages.TokenizationRegistry.get(languageId);
889
if (tokenizationSupport) {
890
return new EmbeddedLanguageData(languageId, tokenizationSupport.getInitialState());
891
}
892
893
return new EmbeddedLanguageData(languageId, NullState);
894
}
895
}
896
897
/**
898
* Searches for a bracket in the 'brackets' attribute that matches the input.
899
*/
900
function findBracket(lexer: monarchCommon.ILexer, matched: string) {
901
if (!matched) {
902
return null;
903
}
904
matched = monarchCommon.fixCase(lexer, matched);
905
906
const brackets = lexer.brackets;
907
for (const bracket of brackets) {
908
if (bracket.open === matched) {
909
return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open };
910
}
911
else if (bracket.close === matched) {
912
return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close };
913
}
914
}
915
return null;
916
}
917
918