Path: blob/main/src/vs/editor/standalone/common/monarch/monarchLexer.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45/**6* Create a syntax highighter with a fully declarative JSON style lexer description7* using regular expressions.8*/910import { Disposable, IDisposable } from '../../../../base/common/lifecycle.js';11import * as languages from '../../../common/languages.js';12import { NullState, nullTokenizeEncoded, nullTokenize } from '../../../common/languages/nullTokenize.js';13import { TokenTheme } from '../../../common/languages/supports/tokenization.js';14import { ILanguageService } from '../../../common/languages/language.js';15import * as monarchCommon from './monarchCommon.js';16import { IStandaloneThemeService } from '../standaloneTheme.js';17import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';18import { LanguageId, MetadataConsts } from '../../../common/encodedTokenAttributes.js';1920const CACHE_STACK_DEPTH = 5;2122/**23* Reuse the same stack elements up to a certain depth.24*/25class MonarchStackElementFactory {2627private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);28public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement {29return this._INSTANCE.create(parent, state);30}3132private readonly _maxCacheDepth: number;33private readonly _entries: { [stackElementId: string]: MonarchStackElement };3435constructor(maxCacheDepth: number) {36this._maxCacheDepth = maxCacheDepth;37this._entries = Object.create(null);38}3940public create(parent: MonarchStackElement | null, state: string): MonarchStackElement {41if (parent !== null && parent.depth >= this._maxCacheDepth) {42// no caching above a certain depth43return new MonarchStackElement(parent, state);44}45let stackElementId = MonarchStackElement.getStackElementId(parent);46if (stackElementId.length > 0) {47stackElementId += '|';48}49stackElementId += state;5051let result = this._entries[stackElementId];52if (result) {53return result;54}55result = new MonarchStackElement(parent, state);56this._entries[stackElementId] = result;57return result;58}59}6061class MonarchStackElement {6263public readonly parent: MonarchStackElement | null;64public readonly state: string;65public readonly depth: number;6667constructor(parent: MonarchStackElement | null, state: string) {68this.parent = parent;69this.state = state;70this.depth = (this.parent ? this.parent.depth : 0) + 1;71}7273public static getStackElementId(element: MonarchStackElement | null): string {74let result = '';75while (element !== null) {76if (result.length > 0) {77result += '|';78}79result += element.state;80element = element.parent;81}82return result;83}8485private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean {86while (a !== null && b !== null) {87if (a === b) {88return true;89}90if (a.state !== b.state) {91return false;92}93a = a.parent;94b = b.parent;95}96if (a === null && b === null) {97return true;98}99return false;100}101102public equals(other: MonarchStackElement): boolean {103return MonarchStackElement._equals(this, other);104}105106public push(state: string): MonarchStackElement {107return MonarchStackElementFactory.create(this, state);108}109110public pop(): MonarchStackElement | null {111return this.parent;112}113114public popall(): MonarchStackElement {115let result: MonarchStackElement = this;116while (result.parent) {117result = result.parent;118}119return result;120}121122public switchTo(state: string): MonarchStackElement {123return MonarchStackElementFactory.create(this.parent, state);124}125}126127class EmbeddedLanguageData {128public readonly languageId: string;129public readonly state: languages.IState;130131constructor(languageId: string, state: languages.IState) {132this.languageId = languageId;133this.state = state;134}135136public equals(other: EmbeddedLanguageData): boolean {137return (138this.languageId === other.languageId139&& this.state.equals(other.state)140);141}142143public clone(): EmbeddedLanguageData {144const stateClone = this.state.clone();145// save an object146if (stateClone === this.state) {147return this;148}149return new EmbeddedLanguageData(this.languageId, this.state);150}151}152153/**154* Reuse the same line states up to a certain depth.155*/156class MonarchLineStateFactory {157158private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);159public static create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {160return this._INSTANCE.create(stack, embeddedLanguageData);161}162163private readonly _maxCacheDepth: number;164private readonly _entries: { [stackElementId: string]: MonarchLineState };165166constructor(maxCacheDepth: number) {167this._maxCacheDepth = maxCacheDepth;168this._entries = Object.create(null);169}170171public create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {172if (embeddedLanguageData !== null) {173// no caching when embedding174return new MonarchLineState(stack, embeddedLanguageData);175}176if (stack !== null && stack.depth >= this._maxCacheDepth) {177// no caching above a certain depth178return new MonarchLineState(stack, embeddedLanguageData);179}180const stackElementId = MonarchStackElement.getStackElementId(stack);181182let result = this._entries[stackElementId];183if (result) {184return result;185}186result = new MonarchLineState(stack, null);187this._entries[stackElementId] = result;188return result;189}190}191192class MonarchLineState implements languages.IState {193194public readonly stack: MonarchStackElement;195public readonly embeddedLanguageData: EmbeddedLanguageData | null;196197constructor(198stack: MonarchStackElement,199embeddedLanguageData: EmbeddedLanguageData | null200) {201this.stack = stack;202this.embeddedLanguageData = embeddedLanguageData;203}204205public clone(): languages.IState {206const embeddedlanguageDataClone = this.embeddedLanguageData ? this.embeddedLanguageData.clone() : null;207// save an object208if (embeddedlanguageDataClone === this.embeddedLanguageData) {209return this;210}211return MonarchLineStateFactory.create(this.stack, this.embeddedLanguageData);212}213214public equals(other: languages.IState): boolean {215if (!(other instanceof MonarchLineState)) {216return false;217}218if (!this.stack.equals(other.stack)) {219return false;220}221if (this.embeddedLanguageData === null && other.embeddedLanguageData === null) {222return true;223}224if (this.embeddedLanguageData === null || other.embeddedLanguageData === null) {225return false;226}227return this.embeddedLanguageData.equals(other.embeddedLanguageData);228}229}230231interface IMonarchTokensCollector {232enterLanguage(languageId: string): void;233emit(startOffset: number, type: string): void;234nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState;235}236237class MonarchClassicTokensCollector implements IMonarchTokensCollector {238239private _tokens: languages.Token[];240private _languageId: string | null;241private _lastTokenType: string | null;242private _lastTokenLanguage: string | null;243244constructor() {245this._tokens = [];246this._languageId = null;247this._lastTokenType = null;248this._lastTokenLanguage = null;249}250251public enterLanguage(languageId: string): void {252this._languageId = languageId;253}254255public emit(startOffset: number, type: string): void {256if (this._lastTokenType === type && this._lastTokenLanguage === this._languageId) {257return;258}259this._lastTokenType = type;260this._lastTokenLanguage = this._languageId;261this._tokens.push(new languages.Token(startOffset, type, this._languageId!));262}263264public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {265const nestedLanguageId = embeddedLanguageData.languageId;266const embeddedModeState = embeddedLanguageData.state;267268const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);269if (!nestedLanguageTokenizationSupport) {270this.enterLanguage(nestedLanguageId);271this.emit(offsetDelta, '');272return embeddedModeState;273}274275const nestedResult = nestedLanguageTokenizationSupport.tokenize(embeddedLanguageLine, hasEOL, embeddedModeState);276if (offsetDelta !== 0) {277for (const token of nestedResult.tokens) {278this._tokens.push(new languages.Token(token.offset + offsetDelta, token.type, token.language));279}280} else {281this._tokens = this._tokens.concat(nestedResult.tokens);282}283this._lastTokenType = null;284this._lastTokenLanguage = null;285this._languageId = null;286return nestedResult.endState;287}288289public finalize(endState: MonarchLineState): languages.TokenizationResult {290return new languages.TokenizationResult(this._tokens, endState);291}292}293294class MonarchModernTokensCollector implements IMonarchTokensCollector {295296private readonly _languageService: ILanguageService;297private readonly _theme: TokenTheme;298private _prependTokens: Uint32Array | null;299private _tokens: number[];300private _currentLanguageId: LanguageId;301private _lastTokenMetadata: number;302303constructor(languageService: ILanguageService, theme: TokenTheme) {304this._languageService = languageService;305this._theme = theme;306this._prependTokens = null;307this._tokens = [];308this._currentLanguageId = LanguageId.Null;309this._lastTokenMetadata = 0;310}311312public enterLanguage(languageId: string): void {313this._currentLanguageId = this._languageService.languageIdCodec.encodeLanguageId(languageId);314}315316public emit(startOffset: number, type: string): void {317const metadata = this._theme.match(this._currentLanguageId, type) | MetadataConsts.BALANCED_BRACKETS_MASK;318if (this._lastTokenMetadata === metadata) {319return;320}321this._lastTokenMetadata = metadata;322this._tokens.push(startOffset);323this._tokens.push(metadata);324}325326private static _merge(a: Uint32Array | null, b: number[], c: Uint32Array | null): Uint32Array {327const aLen = (a !== null ? a.length : 0);328const bLen = b.length;329const cLen = (c !== null ? c.length : 0);330331if (aLen === 0 && bLen === 0 && cLen === 0) {332return new Uint32Array(0);333}334if (aLen === 0 && bLen === 0) {335return c!;336}337if (bLen === 0 && cLen === 0) {338return a!;339}340341const result = new Uint32Array(aLen + bLen + cLen);342if (a !== null) {343result.set(a);344}345for (let i = 0; i < bLen; i++) {346result[aLen + i] = b[i];347}348if (c !== null) {349result.set(c, aLen + bLen);350}351return result;352}353354public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {355const nestedLanguageId = embeddedLanguageData.languageId;356const embeddedModeState = embeddedLanguageData.state;357358const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);359if (!nestedLanguageTokenizationSupport) {360this.enterLanguage(nestedLanguageId);361this.emit(offsetDelta, '');362return embeddedModeState;363}364365const nestedResult = nestedLanguageTokenizationSupport.tokenizeEncoded(embeddedLanguageLine, hasEOL, embeddedModeState);366if (offsetDelta !== 0) {367for (let i = 0, len = nestedResult.tokens.length; i < len; i += 2) {368nestedResult.tokens[i] += offsetDelta;369}370}371372this._prependTokens = MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, nestedResult.tokens);373this._tokens = [];374this._currentLanguageId = 0;375this._lastTokenMetadata = 0;376return nestedResult.endState;377}378379public finalize(endState: MonarchLineState): languages.EncodedTokenizationResult {380return new languages.EncodedTokenizationResult(381MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, null),382endState383);384}385}386387export type ILoadStatus = { loaded: true } | { loaded: false; promise: Promise<void> };388389export class MonarchTokenizer extends Disposable implements languages.ITokenizationSupport, IDisposable {390391private readonly _languageService: ILanguageService;392private readonly _standaloneThemeService: IStandaloneThemeService;393private readonly _languageId: string;394private readonly _lexer: monarchCommon.ILexer;395private readonly _embeddedLanguages: { [languageId: string]: boolean };396public embeddedLoaded: Promise<void>;397private _maxTokenizationLineLength: number;398399constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {400super();401this._languageService = languageService;402this._standaloneThemeService = standaloneThemeService;403this._languageId = languageId;404this._lexer = lexer;405this._embeddedLanguages = Object.create(null);406this.embeddedLoaded = Promise.resolve(undefined);407408// Set up listening for embedded modes409let emitting = false;410this._register(languages.TokenizationRegistry.onDidChange((e) => {411if (emitting) {412return;413}414let isOneOfMyEmbeddedModes = false;415for (let i = 0, len = e.changedLanguages.length; i < len; i++) {416const language = e.changedLanguages[i];417if (this._embeddedLanguages[language]) {418isOneOfMyEmbeddedModes = true;419break;420}421}422if (isOneOfMyEmbeddedModes) {423emitting = true;424languages.TokenizationRegistry.handleChange([this._languageId]);425emitting = false;426}427}));428this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {429overrideIdentifier: this._languageId430});431this._register(this._configurationService.onDidChangeConfiguration(e => {432if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {433this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {434overrideIdentifier: this._languageId435});436}437}));438}439440public getLoadStatus(): ILoadStatus {441const promises: Thenable<any>[] = [];442for (const nestedLanguageId in this._embeddedLanguages) {443const tokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);444if (tokenizationSupport) {445// The nested language is already loaded446if (tokenizationSupport instanceof MonarchTokenizer) {447const nestedModeStatus = tokenizationSupport.getLoadStatus();448if (nestedModeStatus.loaded === false) {449promises.push(nestedModeStatus.promise);450}451}452continue;453}454455if (!languages.TokenizationRegistry.isResolved(nestedLanguageId)) {456// The nested language is in the process of being loaded457promises.push(languages.TokenizationRegistry.getOrCreate(nestedLanguageId));458}459}460461if (promises.length === 0) {462return {463loaded: true464};465}466return {467loaded: false,468promise: Promise.all(promises).then(_ => undefined)469};470}471472public getInitialState(): languages.IState {473const rootState = MonarchStackElementFactory.create(null, this._lexer.start!);474return MonarchLineStateFactory.create(rootState, null);475}476477public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {478if (line.length >= this._maxTokenizationLineLength) {479return nullTokenize(this._languageId, lineState);480}481const tokensCollector = new MonarchClassicTokensCollector();482const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);483return tokensCollector.finalize(endLineState);484}485486public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {487if (line.length >= this._maxTokenizationLineLength) {488return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);489}490const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);491const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);492return tokensCollector.finalize(endLineState);493}494495private _tokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, collector: IMonarchTokensCollector): MonarchLineState {496if (lineState.embeddedLanguageData) {497return this._nestedTokenize(line, hasEOL, lineState, 0, collector);498} else {499return this._myTokenize(line, hasEOL, lineState, 0, collector);500}501}502503private _findLeavingNestedLanguageOffset(line: string, state: MonarchLineState): number {504let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state.stack.state];505if (!rules) {506rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching507if (!rules) {508throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);509}510}511512let popOffset = -1;513let hasEmbeddedPopRule = false;514515for (const rule of rules) {516if (!monarchCommon.isIAction(rule.action) || !(rule.action.nextEmbedded === '@pop' || rule.action.hasEmbeddedEndInCases)) {517continue;518}519hasEmbeddedPopRule = true;520521let regex = rule.resolveRegex(state.stack.state);522const regexSource = regex.source;523if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {524const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');525regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);526}527528const result = line.search(regex);529if (result === -1 || (result !== 0 && rule.matchOnlyAtLineStart)) {530continue;531}532533if (popOffset === -1 || result < popOffset) {534popOffset = result;535}536}537538if (!hasEmbeddedPopRule) {539throw monarchCommon.createError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);540}541542return popOffset;543}544545private _nestedTokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {546547const popOffset = this._findLeavingNestedLanguageOffset(line, lineState);548549if (popOffset === -1) {550// tokenization will not leave nested language551const nestedEndState = tokensCollector.nestedLanguageTokenize(line, hasEOL, lineState.embeddedLanguageData!, offsetDelta);552return MonarchLineStateFactory.create(lineState.stack, new EmbeddedLanguageData(lineState.embeddedLanguageData!.languageId, nestedEndState));553}554555const nestedLanguageLine = line.substring(0, popOffset);556if (nestedLanguageLine.length > 0) {557// tokenize with the nested language558tokensCollector.nestedLanguageTokenize(nestedLanguageLine, false, lineState.embeddedLanguageData!, offsetDelta);559}560561const restOfTheLine = line.substring(popOffset);562return this._myTokenize(restOfTheLine, hasEOL, lineState, offsetDelta + popOffset, tokensCollector);563}564565private _safeRuleName(rule: monarchCommon.IRule | null): string {566if (rule) {567return rule.name;568}569return '(unknown)';570}571572private _myTokenize(lineWithoutLF: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {573tokensCollector.enterLanguage(this._languageId);574575const lineWithoutLFLength = lineWithoutLF.length;576const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);577const lineLength = line.length;578579let embeddedLanguageData = lineState.embeddedLanguageData;580let stack = lineState.stack;581let pos = 0;582583// regular expression group matching584// these never need cloning or equality since they are only used within a line match585interface GroupMatching {586matches: string[];587rule: monarchCommon.IRule | null;588groups: { action: monarchCommon.FuzzyAction; matched: string }[];589}590let groupMatching: GroupMatching | null = null;591592// See https://github.com/microsoft/monaco-editor/issues/1235593// Evaluate rules at least once for an empty line594let forceEvaluation = true;595596while (forceEvaluation || pos < lineLength) {597598const pos0 = pos;599const stackLen0 = stack.depth;600const groupLen0 = groupMatching ? groupMatching.groups.length : 0;601const state = stack.state;602603let matches: string[] | null = null;604let matched: string | null = null;605let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;606let rule: monarchCommon.IRule | null = null;607608let enteringEmbeddedLanguage: string | null = null;609610// check if we need to process group matches first611if (groupMatching) {612matches = groupMatching.matches;613const groupEntry = groupMatching.groups.shift()!;614matched = groupEntry.matched;615action = groupEntry.action;616rule = groupMatching.rule;617618// cleanup if necessary619if (groupMatching.groups.length === 0) {620groupMatching = null;621}622} else {623// otherwise we match on the token stream624625if (!forceEvaluation && pos >= lineLength) {626// nothing to do627break;628}629630forceEvaluation = false;631632// get the rules for this state633let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state];634if (!rules) {635rules = monarchCommon.findRules(this._lexer, state); // do parent matching636if (!rules) {637throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);638}639}640641// try each rule until we match642const restOfLine = line.substr(pos);643for (const rule of rules) {644if (pos === 0 || !rule.matchOnlyAtLineStart) {645matches = restOfLine.match(rule.resolveRegex(state));646if (matches) {647matched = matches[0];648action = rule.action;649break;650}651}652}653}654655// We matched 'rule' with 'matches' and 'action'656if (!matches) {657matches = [''];658matched = '';659}660661if (!action) {662// bad: we didn't match anything, and there is no action to take663// we need to advance the stream or we get progress trouble664if (pos < lineLength) {665matches = [line.charAt(pos)];666matched = matches[0];667}668action = this._lexer.defaultToken;669}670671if (matched === null) {672// should never happen, needed for strict null checking673break;674}675676// advance stream677pos += matched.length;678679// maybe call action function (used for 'cases')680while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {681action = action.test(matched, matches, state, pos === lineLength);682}683684let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;685// set the result: either a string or an array of actions686if (typeof action === 'string' || Array.isArray(action)) {687result = action;688} else if (action.group) {689result = action.group;690} else if (action.token !== null && action.token !== undefined) {691692// do $n replacements?693if (action.tokenSubst) {694result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);695} else {696result = action.token;697}698699// enter embedded language?700if (action.nextEmbedded) {701if (action.nextEmbedded === '@pop') {702if (!embeddedLanguageData) {703throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');704}705embeddedLanguageData = null;706} else if (embeddedLanguageData) {707throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');708} else {709enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);710}711}712713// state transformations714if (action.goBack) { // back up the stream..715pos = Math.max(0, pos - action.goBack);716}717718if (action.switchTo && typeof action.switchTo === 'string') {719let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...720if (nextState[0] === '@') {721nextState = nextState.substr(1); // peel off starting '@'722}723if (!monarchCommon.findRules(this._lexer, nextState)) {724throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));725} else {726stack = stack.switchTo(nextState);727}728} else if (action.transform && typeof action.transform === 'function') {729throw monarchCommon.createError(this._lexer, 'action.transform not supported');730} else if (action.next) {731if (action.next === '@push') {732if (stack.depth >= this._lexer.maxStack) {733throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +734stack.state + ',' + stack.parent!.state + ',...]');735} else {736stack = stack.push(state);737}738} else if (action.next === '@pop') {739if (stack.depth <= 1) {740throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));741} else {742stack = stack.pop()!;743}744} else if (action.next === '@popall') {745stack = stack.popall();746} else {747let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);748if (nextState[0] === '@') {749nextState = nextState.substr(1); // peel off starting '@'750}751752if (!monarchCommon.findRules(this._lexer, nextState)) {753throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));754} else {755stack = stack.push(nextState);756}757}758}759760if (action.log && typeof (action.log) === 'string') {761monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));762}763}764765// check result766if (result === null) {767throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));768}769770const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage: string) => {771// support language names, mime types, and language ids772const languageId = (773this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)774|| this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)775|| enteringEmbeddedLanguage776);777778const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);779780if (pos < lineLength) {781// there is content from the embedded language on this line782const restOfLine = lineWithoutLF.substr(pos);783return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);784} else {785return MonarchLineStateFactory.create(stack, embeddedLanguageData);786}787};788789// is the result a group match?790if (Array.isArray(result)) {791if (groupMatching && groupMatching.groups.length > 0) {792throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));793}794if (matches.length !== result.length + 1) {795throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));796}797let totalLen = 0;798for (let i = 1; i < matches.length; i++) {799totalLen += matches[i].length;800}801if (totalLen !== matched.length) {802throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));803}804805groupMatching = {806rule: rule,807matches: matches,808groups: []809};810for (let i = 0; i < result.length; i++) {811groupMatching.groups[i] = {812action: result[i],813matched: matches[i + 1]814};815}816817pos -= matched.length;818// call recursively to initiate first result match819continue;820} else {821// regular result822823// check for '@rematch'824if (result === '@rematch') {825pos -= matched.length;826matched = ''; // better set the next state too..827matches = null;828result = '';829830// Even though `@rematch` was specified, if `nextEmbedded` also specified,831// a state transition should occur.832if (enteringEmbeddedLanguage !== null) {833return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);834}835}836837// check progress838if (matched.length === 0) {839if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {840continue;841} else {842throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));843}844}845846// return the result (and check for brace matching)847// todo: for efficiency we could pre-sanitize tokenPostfix and substitutions848let tokenType: string | null = null;849if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {850const rest = result.substr('@brackets'.length);851const bracket = findBracket(this._lexer, matched);852if (!bracket) {853throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);854}855tokenType = monarchCommon.sanitize(bracket.token + rest);856} else {857const token = (result === '' ? '' : result + this._lexer.tokenPostfix);858tokenType = monarchCommon.sanitize(token);859}860861if (pos0 < lineWithoutLFLength) {862tokensCollector.emit(pos0 + offsetDelta, tokenType);863}864}865866if (enteringEmbeddedLanguage !== null) {867return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);868}869}870871return MonarchLineStateFactory.create(stack, embeddedLanguageData);872}873874private _getNestedEmbeddedLanguageData(languageId: string): EmbeddedLanguageData {875if (!this._languageService.isRegisteredLanguageId(languageId)) {876return new EmbeddedLanguageData(languageId, NullState);877}878879if (languageId !== this._languageId) {880// Fire language loading event881this._languageService.requestBasicLanguageFeatures(languageId);882languages.TokenizationRegistry.getOrCreate(languageId);883this._embeddedLanguages[languageId] = true;884}885886const tokenizationSupport = languages.TokenizationRegistry.get(languageId);887if (tokenizationSupport) {888return new EmbeddedLanguageData(languageId, tokenizationSupport.getInitialState());889}890891return new EmbeddedLanguageData(languageId, NullState);892}893}894895/**896* Searches for a bracket in the 'brackets' attribute that matches the input.897*/898function findBracket(lexer: monarchCommon.ILexer, matched: string) {899if (!matched) {900return null;901}902matched = monarchCommon.fixCase(lexer, matched);903904const brackets = lexer.brackets;905for (const bracket of brackets) {906if (bracket.open === matched) {907return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open };908}909else if (bracket.close === matched) {910return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close };911}912}913return null;914}915916917