Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/standalone/common/monarch/monarchCommon.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { escapeRegExpCharacters } from '../../../../base/common/strings.js';
7
8
/*
9
* This module exports common types and functionality shared between
10
* the Monarch compiler that compiles JSON to ILexer, and the Monarch
11
* Tokenizer (that highlights at runtime)
12
*/
13
14
/*
15
* Type definitions to be used internally to Monarch.
16
* Inside monarch we use fully typed definitions and compiled versions of the more abstract JSON descriptions.
17
*/
18
19
export const enum MonarchBracket {
20
None = 0,
21
Open = 1,
22
Close = -1
23
}
24
25
export interface ILexerMin {
26
languageId: string;
27
includeLF: boolean;
28
noThrow: boolean;
29
ignoreCase: boolean;
30
unicode: boolean;
31
usesEmbedded: boolean;
32
defaultToken: string;
33
stateNames: { [stateName: string]: any };
34
[attr: string]: any;
35
}
36
37
export interface ILexer extends ILexerMin {
38
maxStack: number;
39
start: string | null;
40
ignoreCase: boolean;
41
unicode: boolean;
42
tokenPostfix: string;
43
44
tokenizer: { [stateName: string]: IRule[] };
45
brackets: IBracket[];
46
}
47
48
export interface IBracket {
49
token: string;
50
open: string;
51
close: string;
52
}
53
54
export type FuzzyAction = IAction | string;
55
56
export function isFuzzyActionArr(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction[] {
57
return (Array.isArray(what));
58
}
59
60
export function isFuzzyAction(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction {
61
return !isFuzzyActionArr(what);
62
}
63
64
export function isString(what: FuzzyAction): what is string {
65
return (typeof what === 'string');
66
}
67
68
export function isIAction(what: FuzzyAction): what is IAction {
69
return !isString(what);
70
}
71
72
export interface IRule {
73
action: FuzzyAction;
74
matchOnlyAtLineStart: boolean;
75
name: string;
76
resolveRegex(state: string): RegExp;
77
}
78
79
export interface IAction {
80
// an action is either a group of actions
81
group?: FuzzyAction[];
82
83
hasEmbeddedEndInCases?: boolean;
84
// or a function that returns a fresh action
85
test?: (id: string, matches: string[], state: string, eos: boolean) => FuzzyAction;
86
87
// or it is a declarative action with a token value and various other attributes
88
token?: string;
89
tokenSubst?: boolean;
90
next?: string;
91
nextEmbedded?: string;
92
bracket?: MonarchBracket;
93
log?: string;
94
switchTo?: string;
95
goBack?: number;
96
transform?: (states: string[]) => string[];
97
}
98
99
export interface IBranch {
100
name: string;
101
value: FuzzyAction;
102
test?: (id: string, matches: string[], state: string, eos: boolean) => boolean;
103
}
104
105
// Small helper functions
106
107
/**
108
* Is a string null, undefined, or empty?
109
*/
110
export function empty(s: string): boolean {
111
return (s ? false : true);
112
}
113
114
/**
115
* Puts a string to lower case if 'ignoreCase' is set.
116
*/
117
export function fixCase(lexer: ILexerMin, str: string): string {
118
return (lexer.ignoreCase && str ? str.toLowerCase() : str);
119
}
120
121
/**
122
* Ensures there are no bad characters in a CSS token class.
123
*/
124
export function sanitize(s: string) {
125
return s.replace(/[&<>'"_]/g, '-'); // used on all output token CSS classes
126
}
127
128
// Logging
129
130
/**
131
* Logs a message.
132
*/
133
export function log(lexer: ILexerMin, msg: string) {
134
console.log(`${lexer.languageId}: ${msg}`);
135
}
136
137
// Throwing errors
138
139
export function createError(lexer: ILexerMin, msg: string): Error {
140
return new Error(`${lexer.languageId}: ${msg}`);
141
}
142
143
// Helper functions for rule finding and substitution
144
145
/**
146
* substituteMatches is used on lexer strings and can substitutes predefined patterns:
147
* $$ => $
148
* $# => id
149
* $n => matched entry n
150
* @attr => contents of lexer[attr]
151
*
152
* See documentation for more info
153
*/
154
export function substituteMatches(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {
155
const re = /\$((\$)|(#)|(\d\d?)|[sS](\d\d?)|@(\w+))/g;
156
let stateMatches: string[] | null = null;
157
return str.replace(re, function (full, sub?, dollar?, hash?, n?, s?, attr?, ofs?, total?) {
158
if (!empty(dollar)) {
159
return '$'; // $$
160
}
161
if (!empty(hash)) {
162
return fixCase(lexer, id); // default $#
163
}
164
if (!empty(n) && n < matches.length) {
165
return fixCase(lexer, matches[n]); // $n
166
}
167
if (!empty(attr) && lexer && typeof (lexer[attr]) === 'string') {
168
return lexer[attr]; //@attribute
169
}
170
if (stateMatches === null) { // split state on demand
171
stateMatches = state.split('.');
172
stateMatches.unshift(state);
173
}
174
if (!empty(s) && s < stateMatches.length) {
175
return fixCase(lexer, stateMatches[s]); //$Sn
176
}
177
return '';
178
});
179
}
180
181
/**
182
* substituteMatchesRe is used on lexer regex rules and can substitutes predefined patterns:
183
* $Sn => n'th part of state
184
*
185
*/
186
export function substituteMatchesRe(lexer: ILexerMin, str: string, state: string): string {
187
const re = /\$[sS](\d\d?)/g;
188
let stateMatches: string[] | null = null;
189
return str.replace(re, function (full, s) {
190
if (stateMatches === null) { // split state on demand
191
stateMatches = state.split('.');
192
stateMatches.unshift(state);
193
}
194
if (!empty(s) && s < stateMatches.length) {
195
return escapeRegExpCharacters(fixCase(lexer, stateMatches[s])); //$Sn
196
}
197
return '';
198
});
199
}
200
201
/**
202
* Find the tokenizer rules for a specific state (i.e. next action)
203
*/
204
export function findRules(lexer: ILexer, inState: string): IRule[] | null {
205
let state: string | null = inState;
206
while (state && state.length > 0) {
207
const rules = lexer.tokenizer[state];
208
if (rules) {
209
return rules;
210
}
211
212
const idx = state.lastIndexOf('.');
213
if (idx < 0) {
214
state = null; // no further parent
215
} else {
216
state = state.substr(0, idx);
217
}
218
}
219
return null;
220
}
221
222
/**
223
* Is a certain state defined? In contrast to 'findRules' this works on a ILexerMin.
224
* This is used during compilation where we may know the defined states
225
* but not yet whether the corresponding rules are correct.
226
*/
227
export function stateExists(lexer: ILexerMin, inState: string): boolean {
228
let state: string | null = inState;
229
while (state && state.length > 0) {
230
const exist = lexer.stateNames[state];
231
if (exist) {
232
return true;
233
}
234
235
const idx = state.lastIndexOf('.');
236
if (idx < 0) {
237
state = null; // no further parent
238
} else {
239
state = state.substr(0, idx);
240
}
241
}
242
return false;
243
}
244
245