Path: blob/main/src/vs/editor/standalone/common/monarch/monarchLexer.ts
5282 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45/**6* Create a syntax highighter with a fully declarative JSON style lexer description7* using regular expressions.8*/910import { Disposable, IDisposable } from '../../../../base/common/lifecycle.js';11import * as languages from '../../../common/languages.js';12import { NullState, nullTokenizeEncoded, nullTokenize } from '../../../common/languages/nullTokenize.js';13import { TokenTheme } from '../../../common/languages/supports/tokenization.js';14import { ILanguageService } from '../../../common/languages/language.js';15import * as monarchCommon from './monarchCommon.js';16import { IStandaloneThemeService } from '../standaloneTheme.js';17import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';18import { LanguageId, MetadataConsts } from '../../../common/encodedTokenAttributes.js';1920const CACHE_STACK_DEPTH = 5;2122/**23* Reuse the same stack elements up to a certain depth.24*/25class MonarchStackElementFactory {2627private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);28public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement {29return this._INSTANCE.create(parent, state);30}3132private readonly _maxCacheDepth: number;33private readonly _entries: { [stackElementId: string]: MonarchStackElement };3435constructor(maxCacheDepth: number) {36this._maxCacheDepth = maxCacheDepth;37this._entries = Object.create(null);38}3940public create(parent: MonarchStackElement | null, state: string): MonarchStackElement {41if (parent !== null && parent.depth >= this._maxCacheDepth) {42// no caching above a certain depth43return new MonarchStackElement(parent, state);44}45let stackElementId = MonarchStackElement.getStackElementId(parent);46if (stackElementId.length > 0) {47stackElementId += '|';48}49stackElementId += state;5051let result = this._entries[stackElementId];52if (result) {53return result;54}55result = new MonarchStackElement(parent, state);56this._entries[stackElementId] = result;57return result;58}59}6061class MonarchStackElement {6263public readonly parent: MonarchStackElement | null;64public readonly state: string;65public readonly depth: number;6667constructor(parent: MonarchStackElement | null, state: string) {68this.parent = parent;69this.state = state;70this.depth = (this.parent ? this.parent.depth : 0) + 1;71}7273public static getStackElementId(element: MonarchStackElement | null): string {74let result = '';75while (element !== null) {76if (result.length > 0) {77result += '|';78}79result += element.state;80element = element.parent;81}82return result;83}8485private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean {86while (a !== null && b !== null) {87if (a === b) {88return true;89}90if (a.state !== b.state) {91return false;92}93a = a.parent;94b = b.parent;95}96if (a === null && b === null) {97return true;98}99return false;100}101102public equals(other: MonarchStackElement): boolean {103return MonarchStackElement._equals(this, other);104}105106public push(state: string): MonarchStackElement {107return MonarchStackElementFactory.create(this, state);108}109110public pop(): MonarchStackElement | null {111return this.parent;112}113114public popall(): MonarchStackElement {115let result: MonarchStackElement = this;116while (result.parent) {117result = result.parent;118}119return result;120}121122public switchTo(state: string): MonarchStackElement {123return MonarchStackElementFactory.create(this.parent, state);124}125}126127class EmbeddedLanguageData {128public readonly languageId: string;129public readonly state: languages.IState;130131constructor(languageId: string, state: languages.IState) {132this.languageId = languageId;133this.state = state;134}135136public equals(other: EmbeddedLanguageData): boolean {137return (138this.languageId === other.languageId139&& this.state.equals(other.state)140);141}142143public clone(): EmbeddedLanguageData {144const stateClone = this.state.clone();145// save an object146if (stateClone === this.state) {147return this;148}149return new EmbeddedLanguageData(this.languageId, this.state);150}151}152153/**154* Reuse the same line states up to a certain depth.155*/156class MonarchLineStateFactory {157158private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);159public static create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {160return this._INSTANCE.create(stack, embeddedLanguageData);161}162163private readonly _maxCacheDepth: number;164private readonly _entries: { [stackElementId: string]: MonarchLineState };165166constructor(maxCacheDepth: number) {167this._maxCacheDepth = maxCacheDepth;168this._entries = Object.create(null);169}170171public create(stack: MonarchStackElement, embeddedLanguageData: EmbeddedLanguageData | null): MonarchLineState {172if (embeddedLanguageData !== null) {173// no caching when embedding174return new MonarchLineState(stack, embeddedLanguageData);175}176if (stack !== null && stack.depth >= this._maxCacheDepth) {177// no caching above a certain depth178return new MonarchLineState(stack, embeddedLanguageData);179}180const stackElementId = MonarchStackElement.getStackElementId(stack);181182let result = this._entries[stackElementId];183if (result) {184return result;185}186result = new MonarchLineState(stack, null);187this._entries[stackElementId] = result;188return result;189}190}191192class MonarchLineState implements languages.IState {193194public readonly stack: MonarchStackElement;195public readonly embeddedLanguageData: EmbeddedLanguageData | null;196197constructor(198stack: MonarchStackElement,199embeddedLanguageData: EmbeddedLanguageData | null200) {201this.stack = stack;202this.embeddedLanguageData = embeddedLanguageData;203}204205public clone(): languages.IState {206const embeddedlanguageDataClone = this.embeddedLanguageData ? this.embeddedLanguageData.clone() : null;207// save an object208if (embeddedlanguageDataClone === this.embeddedLanguageData) {209return this;210}211return MonarchLineStateFactory.create(this.stack, this.embeddedLanguageData);212}213214public equals(other: languages.IState): boolean {215if (!(other instanceof MonarchLineState)) {216return false;217}218if (!this.stack.equals(other.stack)) {219return false;220}221if (this.embeddedLanguageData === null && other.embeddedLanguageData === null) {222return true;223}224if (this.embeddedLanguageData === null || other.embeddedLanguageData === null) {225return false;226}227return this.embeddedLanguageData.equals(other.embeddedLanguageData);228}229}230231interface IMonarchTokensCollector {232enterLanguage(languageId: string): void;233emit(startOffset: number, type: string): void;234nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState;235}236237class MonarchClassicTokensCollector implements IMonarchTokensCollector {238239private _tokens: languages.Token[];240private _languageId: string | null;241private _lastTokenType: string | null;242private _lastTokenLanguage: string | null;243244constructor() {245this._tokens = [];246this._languageId = null;247this._lastTokenType = null;248this._lastTokenLanguage = null;249}250251public enterLanguage(languageId: string): void {252this._languageId = languageId;253}254255public emit(startOffset: number, type: string): void {256if (this._lastTokenType === type && this._lastTokenLanguage === this._languageId) {257return;258}259this._lastTokenType = type;260this._lastTokenLanguage = this._languageId;261this._tokens.push(new languages.Token(startOffset, type, this._languageId!));262}263264public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {265const nestedLanguageId = embeddedLanguageData.languageId;266const embeddedModeState = embeddedLanguageData.state;267268const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);269if (!nestedLanguageTokenizationSupport) {270this.enterLanguage(nestedLanguageId);271this.emit(offsetDelta, '');272return embeddedModeState;273}274275const nestedResult = nestedLanguageTokenizationSupport.tokenize(embeddedLanguageLine, hasEOL, embeddedModeState);276if (offsetDelta !== 0) {277for (const token of nestedResult.tokens) {278this._tokens.push(new languages.Token(token.offset + offsetDelta, token.type, token.language));279}280} else {281this._tokens = this._tokens.concat(nestedResult.tokens);282}283this._lastTokenType = null;284this._lastTokenLanguage = null;285this._languageId = null;286return nestedResult.endState;287}288289public finalize(endState: MonarchLineState): languages.TokenizationResult {290return new languages.TokenizationResult(this._tokens, endState);291}292}293294class MonarchModernTokensCollector implements IMonarchTokensCollector {295296private readonly _languageService: ILanguageService;297private readonly _theme: TokenTheme;298private _prependTokens: Uint32Array | null;299private _tokens: number[];300private _currentLanguageId: LanguageId;301private _lastTokenMetadata: number;302303constructor(languageService: ILanguageService, theme: TokenTheme) {304this._languageService = languageService;305this._theme = theme;306this._prependTokens = null;307this._tokens = [];308this._currentLanguageId = LanguageId.Null;309this._lastTokenMetadata = 0;310}311312public enterLanguage(languageId: string): void {313this._currentLanguageId = this._languageService.languageIdCodec.encodeLanguageId(languageId);314}315316public emit(startOffset: number, type: string): void {317const metadata = this._theme.match(this._currentLanguageId, type) | MetadataConsts.BALANCED_BRACKETS_MASK;318if (this._lastTokenMetadata === metadata) {319return;320}321this._lastTokenMetadata = metadata;322this._tokens.push(startOffset);323this._tokens.push(metadata);324}325326private static _merge(a: Uint32Array | null, b: number[], c: Uint32Array | null): Uint32Array {327const aLen = (a !== null ? a.length : 0);328const bLen = b.length;329const cLen = (c !== null ? c.length : 0);330331if (aLen === 0 && bLen === 0 && cLen === 0) {332return new Uint32Array(0);333}334if (aLen === 0 && bLen === 0) {335return c!;336}337if (bLen === 0 && cLen === 0) {338return a!;339}340341const result = new Uint32Array(aLen + bLen + cLen);342if (a !== null) {343result.set(a);344}345for (let i = 0; i < bLen; i++) {346result[aLen + i] = b[i];347}348if (c !== null) {349result.set(c, aLen + bLen);350}351return result;352}353354public nestedLanguageTokenize(embeddedLanguageLine: string, hasEOL: boolean, embeddedLanguageData: EmbeddedLanguageData, offsetDelta: number): languages.IState {355const nestedLanguageId = embeddedLanguageData.languageId;356const embeddedModeState = embeddedLanguageData.state;357358const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);359if (!nestedLanguageTokenizationSupport) {360this.enterLanguage(nestedLanguageId);361this.emit(offsetDelta, '');362return embeddedModeState;363}364365const nestedResult = nestedLanguageTokenizationSupport.tokenizeEncoded(embeddedLanguageLine, hasEOL, embeddedModeState);366if (offsetDelta !== 0) {367for (let i = 0, len = nestedResult.tokens.length; i < len; i += 2) {368nestedResult.tokens[i] += offsetDelta;369}370}371372this._prependTokens = MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, nestedResult.tokens);373this._tokens = [];374this._currentLanguageId = 0;375this._lastTokenMetadata = 0;376return nestedResult.endState;377}378379public finalize(endState: MonarchLineState): languages.EncodedTokenizationResult {380return new languages.EncodedTokenizationResult(381MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, null),382[],383endState384);385}386}387388export type ILoadStatus = { loaded: true } | { loaded: false; promise: Promise<void> };389390export class MonarchTokenizer extends Disposable implements languages.ITokenizationSupport, IDisposable {391392private readonly _languageService: ILanguageService;393private readonly _standaloneThemeService: IStandaloneThemeService;394private readonly _languageId: string;395private readonly _lexer: monarchCommon.ILexer;396private readonly _embeddedLanguages: { [languageId: string]: boolean };397public embeddedLoaded: Promise<void>;398private _maxTokenizationLineLength: number;399400constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {401super();402this._languageService = languageService;403this._standaloneThemeService = standaloneThemeService;404this._languageId = languageId;405this._lexer = lexer;406this._embeddedLanguages = Object.create(null);407this.embeddedLoaded = Promise.resolve(undefined);408409// Set up listening for embedded modes410let emitting = false;411this._register(languages.TokenizationRegistry.onDidChange((e) => {412if (emitting) {413return;414}415let isOneOfMyEmbeddedModes = false;416for (let i = 0, len = e.changedLanguages.length; i < len; i++) {417const language = e.changedLanguages[i];418if (this._embeddedLanguages[language]) {419isOneOfMyEmbeddedModes = true;420break;421}422}423if (isOneOfMyEmbeddedModes) {424emitting = true;425languages.TokenizationRegistry.handleChange([this._languageId]);426emitting = false;427}428}));429this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {430overrideIdentifier: this._languageId431});432this._register(this._configurationService.onDidChangeConfiguration(e => {433if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {434this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {435overrideIdentifier: this._languageId436});437}438}));439}440441public getLoadStatus(): ILoadStatus {442const promises: Thenable<any>[] = [];443for (const nestedLanguageId in this._embeddedLanguages) {444const tokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);445if (tokenizationSupport) {446// The nested language is already loaded447if (tokenizationSupport instanceof MonarchTokenizer) {448const nestedModeStatus = tokenizationSupport.getLoadStatus();449if (nestedModeStatus.loaded === false) {450promises.push(nestedModeStatus.promise);451}452}453continue;454}455456if (!languages.TokenizationRegistry.isResolved(nestedLanguageId)) {457// The nested language is in the process of being loaded458promises.push(languages.TokenizationRegistry.getOrCreate(nestedLanguageId));459}460}461462if (promises.length === 0) {463return {464loaded: true465};466}467return {468loaded: false,469promise: Promise.all(promises).then(_ => undefined)470};471}472473public getInitialState(): languages.IState {474const rootState = MonarchStackElementFactory.create(null, this._lexer.start!);475return MonarchLineStateFactory.create(rootState, null);476}477478public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {479if (line.length >= this._maxTokenizationLineLength) {480return nullTokenize(this._languageId, lineState);481}482const tokensCollector = new MonarchClassicTokensCollector();483const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);484return tokensCollector.finalize(endLineState);485}486487public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {488if (line.length >= this._maxTokenizationLineLength) {489return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);490}491const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);492const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);493return tokensCollector.finalize(endLineState);494}495496private _tokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, collector: IMonarchTokensCollector): MonarchLineState {497if (lineState.embeddedLanguageData) {498return this._nestedTokenize(line, hasEOL, lineState, 0, collector);499} else {500return this._myTokenize(line, hasEOL, lineState, 0, collector);501}502}503504private _findLeavingNestedLanguageOffset(line: string, state: MonarchLineState): number {505let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state.stack.state];506if (!rules) {507rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching508if (!rules) {509throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);510}511}512513let popOffset = -1;514let hasEmbeddedPopRule = false;515516for (const rule of rules) {517if (!monarchCommon.isIAction(rule.action) || !(rule.action.nextEmbedded === '@pop' || rule.action.hasEmbeddedEndInCases)) {518continue;519}520hasEmbeddedPopRule = true;521522let regex = rule.resolveRegex(state.stack.state);523const regexSource = regex.source;524if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {525const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');526regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);527}528529const result = line.search(regex);530if (result === -1 || (result !== 0 && rule.matchOnlyAtLineStart)) {531continue;532}533534if (popOffset === -1 || result < popOffset) {535popOffset = result;536}537}538539if (!hasEmbeddedPopRule) {540throw monarchCommon.createError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);541}542543return popOffset;544}545546private _nestedTokenize(line: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {547548const popOffset = this._findLeavingNestedLanguageOffset(line, lineState);549550if (popOffset === -1) {551// tokenization will not leave nested language552const nestedEndState = tokensCollector.nestedLanguageTokenize(line, hasEOL, lineState.embeddedLanguageData!, offsetDelta);553return MonarchLineStateFactory.create(lineState.stack, new EmbeddedLanguageData(lineState.embeddedLanguageData!.languageId, nestedEndState));554}555556const nestedLanguageLine = line.substring(0, popOffset);557if (nestedLanguageLine.length > 0) {558// tokenize with the nested language559tokensCollector.nestedLanguageTokenize(nestedLanguageLine, false, lineState.embeddedLanguageData!, offsetDelta);560}561562const restOfTheLine = line.substring(popOffset);563return this._myTokenize(restOfTheLine, hasEOL, lineState, offsetDelta + popOffset, tokensCollector);564}565566private _safeRuleName(rule: monarchCommon.IRule | null): string {567if (rule) {568return rule.name;569}570return '(unknown)';571}572573private _myTokenize(lineWithoutLF: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {574tokensCollector.enterLanguage(this._languageId);575576const lineWithoutLFLength = lineWithoutLF.length;577const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);578const lineLength = line.length;579580let embeddedLanguageData = lineState.embeddedLanguageData;581let stack = lineState.stack;582let pos = 0;583584// regular expression group matching585// these never need cloning or equality since they are only used within a line match586interface GroupMatching {587matches: string[];588rule: monarchCommon.IRule | null;589groups: { action: monarchCommon.FuzzyAction; matched: string }[];590}591let groupMatching: GroupMatching | null = null;592593// See https://github.com/microsoft/monaco-editor/issues/1235594// Evaluate rules at least once for an empty line595let forceEvaluation = true;596597while (forceEvaluation || pos < lineLength) {598599const pos0 = pos;600const stackLen0 = stack.depth;601const groupLen0 = groupMatching ? groupMatching.groups.length : 0;602const state = stack.state;603604let matches: string[] | null = null;605let matched: string | null = null;606let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;607let rule: monarchCommon.IRule | null = null;608609let enteringEmbeddedLanguage: string | null = null;610611// check if we need to process group matches first612if (groupMatching) {613matches = groupMatching.matches;614const groupEntry = groupMatching.groups.shift()!;615matched = groupEntry.matched;616action = groupEntry.action;617rule = groupMatching.rule;618619// cleanup if necessary620if (groupMatching.groups.length === 0) {621groupMatching = null;622}623} else {624// otherwise we match on the token stream625626if (!forceEvaluation && pos >= lineLength) {627// nothing to do628break;629}630631forceEvaluation = false;632633// get the rules for this state634let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state];635if (!rules) {636rules = monarchCommon.findRules(this._lexer, state); // do parent matching637if (!rules) {638throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);639}640}641642// try each rule until we match643const restOfLine = line.substr(pos);644for (const rule of rules) {645if (pos === 0 || !rule.matchOnlyAtLineStart) {646matches = restOfLine.match(rule.resolveRegex(state));647if (matches) {648matched = matches[0];649action = rule.action;650break;651}652}653}654}655656// We matched 'rule' with 'matches' and 'action'657if (!matches) {658matches = [''];659matched = '';660}661662if (!action) {663// bad: we didn't match anything, and there is no action to take664// we need to advance the stream or we get progress trouble665if (pos < lineLength) {666matches = [line.charAt(pos)];667matched = matches[0];668}669action = this._lexer.defaultToken;670}671672if (matched === null) {673// should never happen, needed for strict null checking674break;675}676677// advance stream678pos += matched.length;679680// maybe call action function (used for 'cases')681while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {682action = action.test(matched, matches, state, pos === lineLength);683}684685let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;686// set the result: either a string or an array of actions687if (typeof action === 'string' || Array.isArray(action)) {688result = action;689} else if (action.group) {690result = action.group;691} else if (action.token !== null && action.token !== undefined) {692693// do $n replacements?694if (action.tokenSubst) {695result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);696} else {697result = action.token;698}699700// enter embedded language?701if (action.nextEmbedded) {702if (action.nextEmbedded === '@pop') {703if (!embeddedLanguageData) {704throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');705}706embeddedLanguageData = null;707} else if (embeddedLanguageData) {708throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');709} else {710enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);711}712}713714// state transformations715if (action.goBack) { // back up the stream..716pos = Math.max(0, pos - action.goBack);717}718719if (action.switchTo && typeof action.switchTo === 'string') {720let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...721if (nextState[0] === '@') {722nextState = nextState.substr(1); // peel off starting '@'723}724if (!monarchCommon.findRules(this._lexer, nextState)) {725throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));726} else {727stack = stack.switchTo(nextState);728}729} else if (action.transform && typeof action.transform === 'function') {730throw monarchCommon.createError(this._lexer, 'action.transform not supported');731} else if (action.next) {732if (action.next === '@push') {733if (stack.depth >= this._lexer.maxStack) {734throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +735stack.state + ',' + stack.parent!.state + ',...]');736} else {737stack = stack.push(state);738}739} else if (action.next === '@pop') {740if (stack.depth <= 1) {741throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));742} else {743stack = stack.pop()!;744}745} else if (action.next === '@popall') {746stack = stack.popall();747} else {748let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);749if (nextState[0] === '@') {750nextState = nextState.substr(1); // peel off starting '@'751}752753if (!monarchCommon.findRules(this._lexer, nextState)) {754throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));755} else {756stack = stack.push(nextState);757}758}759}760761if (action.log && typeof (action.log) === 'string') {762monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));763}764}765766// check result767if (result === null) {768throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));769}770771const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage: string) => {772// support language names, mime types, and language ids773const languageId = (774this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)775|| this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)776|| enteringEmbeddedLanguage777);778779const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);780781if (pos < lineLength) {782// there is content from the embedded language on this line783const restOfLine = lineWithoutLF.substr(pos);784return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);785} else {786return MonarchLineStateFactory.create(stack, embeddedLanguageData);787}788};789790// is the result a group match?791if (Array.isArray(result)) {792if (groupMatching && groupMatching.groups.length > 0) {793throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));794}795if (matches.length !== result.length + 1) {796throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));797}798let totalLen = 0;799for (let i = 1; i < matches.length; i++) {800totalLen += matches[i].length;801}802if (totalLen !== matched.length) {803throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));804}805806groupMatching = {807rule: rule,808matches: matches,809groups: []810};811for (let i = 0; i < result.length; i++) {812groupMatching.groups[i] = {813action: result[i],814matched: matches[i + 1]815};816}817818pos -= matched.length;819// call recursively to initiate first result match820continue;821} else {822// regular result823824// check for '@rematch'825if (result === '@rematch') {826pos -= matched.length;827matched = ''; // better set the next state too..828matches = null;829result = '';830831// Even though `@rematch` was specified, if `nextEmbedded` also specified,832// a state transition should occur.833if (enteringEmbeddedLanguage !== null) {834return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);835}836}837838// check progress839if (matched.length === 0) {840if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {841continue;842} else {843throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));844}845}846847// return the result (and check for brace matching)848// todo: for efficiency we could pre-sanitize tokenPostfix and substitutions849let tokenType: string | null = null;850if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {851const rest = result.substr('@brackets'.length);852const bracket = findBracket(this._lexer, matched);853if (!bracket) {854throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);855}856tokenType = monarchCommon.sanitize(bracket.token + rest);857} else {858const token = (result === '' ? '' : result + this._lexer.tokenPostfix);859tokenType = monarchCommon.sanitize(token);860}861862if (pos0 < lineWithoutLFLength) {863tokensCollector.emit(pos0 + offsetDelta, tokenType);864}865}866867if (enteringEmbeddedLanguage !== null) {868return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);869}870}871872return MonarchLineStateFactory.create(stack, embeddedLanguageData);873}874875private _getNestedEmbeddedLanguageData(languageId: string): EmbeddedLanguageData {876if (!this._languageService.isRegisteredLanguageId(languageId)) {877return new EmbeddedLanguageData(languageId, NullState);878}879880if (languageId !== this._languageId) {881// Fire language loading event882this._languageService.requestBasicLanguageFeatures(languageId);883languages.TokenizationRegistry.getOrCreate(languageId);884this._embeddedLanguages[languageId] = true;885}886887const tokenizationSupport = languages.TokenizationRegistry.get(languageId);888if (tokenizationSupport) {889return new EmbeddedLanguageData(languageId, tokenizationSupport.getInitialState());890}891892return new EmbeddedLanguageData(languageId, NullState);893}894}895896/**897* Searches for a bracket in the 'brackets' attribute that matches the input.898*/899function findBracket(lexer: monarchCommon.ILexer, matched: string) {900if (!matched) {901return null;902}903matched = monarchCommon.fixCase(lexer, matched);904905const brackets = lexer.brackets;906for (const bracket of brackets) {907if (bracket.open === matched) {908return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open };909}910else if (bracket.close === matched) {911return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close };912}913}914return null;915}916917918