Path: blob/main/src/vs/editor/standalone/common/monarch/monarchCommon.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { escapeRegExpCharacters } from '../../../../base/common/strings.js';67/*8* This module exports common types and functionality shared between9* the Monarch compiler that compiles JSON to ILexer, and the Monarch10* Tokenizer (that highlights at runtime)11*/1213/*14* Type definitions to be used internally to Monarch.15* Inside monarch we use fully typed definitions and compiled versions of the more abstract JSON descriptions.16*/1718export const enum MonarchBracket {19None = 0,20Open = 1,21Close = -122}2324export interface ILexerMin {25languageId: string;26includeLF: boolean;27noThrow: boolean;28ignoreCase: boolean;29unicode: boolean;30usesEmbedded: boolean;31defaultToken: string;32stateNames: { [stateName: string]: any };33[attr: string]: any;34}3536export interface ILexer extends ILexerMin {37maxStack: number;38start: string | null;39ignoreCase: boolean;40unicode: boolean;41tokenPostfix: string;4243tokenizer: { [stateName: string]: IRule[] };44brackets: IBracket[];45}4647export interface IBracket {48token: string;49open: string;50close: string;51}5253export type FuzzyAction = IAction | string;5455export function isFuzzyActionArr(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction[] {56return (Array.isArray(what));57}5859export function isFuzzyAction(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction {60return !isFuzzyActionArr(what);61}6263export function isString(what: FuzzyAction): what is string {64return (typeof what === 'string');65}6667export function isIAction(what: FuzzyAction): what is IAction {68return !isString(what);69}7071export interface IRule {72action: FuzzyAction;73matchOnlyAtLineStart: boolean;74name: string;75resolveRegex(state: string): RegExp;76}7778export interface IAction {79// an action is either a group of actions80group?: FuzzyAction[];8182hasEmbeddedEndInCases?: boolean;83// or a function that returns a fresh action84test?: (id: string, matches: string[], state: string, eos: boolean) => FuzzyAction;8586// or it is a declarative action with a token value and various other attributes87token?: string;88tokenSubst?: boolean;89next?: string;90nextEmbedded?: string;91bracket?: MonarchBracket;92log?: string;93switchTo?: string;94goBack?: number;95transform?: (states: string[]) => string[];96}9798export interface IBranch {99name: string;100value: FuzzyAction;101test?: (id: string, matches: string[], state: string, eos: boolean) => boolean;102}103104// Small helper functions105106/**107* Is a string null, undefined, or empty?108*/109export function empty(s: string): boolean {110return (s ? false : true);111}112113/**114* Puts a string to lower case if 'ignoreCase' is set.115*/116export function fixCase(lexer: ILexerMin, str: string): string {117return (lexer.ignoreCase && str ? str.toLowerCase() : str);118}119120/**121* Ensures there are no bad characters in a CSS token class.122*/123export function sanitize(s: string) {124return s.replace(/[&<>'"_]/g, '-'); // used on all output token CSS classes125}126127// Logging128129/**130* Logs a message.131*/132export function log(lexer: ILexerMin, msg: string) {133console.log(`${lexer.languageId}: ${msg}`);134}135136// Throwing errors137138export function createError(lexer: ILexerMin, msg: string): Error {139return new Error(`${lexer.languageId}: ${msg}`);140}141142// Helper functions for rule finding and substitution143144/**145* substituteMatches is used on lexer strings and can substitutes predefined patterns:146* $$ => $147* $# => id148* $n => matched entry n149* @attr => contents of lexer[attr]150*151* See documentation for more info152*/153export function substituteMatches(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {154const re = /\$((\$)|(#)|(\d\d?)|[sS](\d\d?)|@(\w+))/g;155let stateMatches: string[] | null = null;156return str.replace(re, function (full, sub?, dollar?, hash?, n?, s?, attr?, ofs?, total?) {157if (!empty(dollar)) {158return '$'; // $$159}160if (!empty(hash)) {161return fixCase(lexer, id); // default $#162}163if (!empty(n) && n < matches.length) {164return fixCase(lexer, matches[n]); // $n165}166if (!empty(attr) && lexer && typeof (lexer[attr]) === 'string') {167return lexer[attr]; //@attribute168}169if (stateMatches === null) { // split state on demand170stateMatches = state.split('.');171stateMatches.unshift(state);172}173if (!empty(s) && s < stateMatches.length) {174return fixCase(lexer, stateMatches[s]); //$Sn175}176return '';177});178}179180/**181* substituteMatchesRe is used on lexer regex rules and can substitutes predefined patterns:182* $Sn => n'th part of state183*184*/185export function substituteMatchesRe(lexer: ILexerMin, str: string, state: string): string {186const re = /\$[sS](\d\d?)/g;187let stateMatches: string[] | null = null;188return str.replace(re, function (full, s) {189if (stateMatches === null) { // split state on demand190stateMatches = state.split('.');191stateMatches.unshift(state);192}193if (!empty(s) && s < stateMatches.length) {194return escapeRegExpCharacters(fixCase(lexer, stateMatches[s])); //$Sn195}196return '';197});198}199200/**201* Find the tokenizer rules for a specific state (i.e. next action)202*/203export function findRules(lexer: ILexer, inState: string): IRule[] | null {204let state: string | null = inState;205while (state && state.length > 0) {206const rules = lexer.tokenizer[state];207if (rules) {208return rules;209}210211const idx = state.lastIndexOf('.');212if (idx < 0) {213state = null; // no further parent214} else {215state = state.substr(0, idx);216}217}218return null;219}220221/**222* Is a certain state defined? In contrast to 'findRules' this works on a ILexerMin.223* This is used during compilation where we may know the defined states224* but not yet whether the corresponding rules are correct.225*/226export function stateExists(lexer: ILexerMin, inState: string): boolean {227let state: string | null = inState;228while (state && state.length > 0) {229const exist = lexer.stateNames[state];230if (exist) {231return true;232}233234const idx = state.lastIndexOf('.');235if (idx < 0) {236state = null; // no further parent237} else {238state = state.substr(0, idx);239}240}241return false;242}243244245