Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/languages/supports/richEditBrackets.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as strings from '../../../../base/common/strings.js';
7
import * as stringBuilder from '../../core/stringBuilder.js';
8
import { Range } from '../../core/range.js';
9
import { CharacterPair } from '../languageConfiguration.js';
10
11
interface InternalBracket {
12
open: string[];
13
close: string[];
14
}
15
16
/**
17
* Represents a grouping of colliding bracket pairs.
18
*
19
* Most of the times this contains a single bracket pair,
20
* but sometimes this contains multiple bracket pairs in cases
21
* where the same string appears as a closing bracket for multiple
22
* bracket pairs, or the same string appears an opening bracket for
23
* multiple bracket pairs.
24
*
25
* e.g. of a group containing a single pair:
26
* open: ['{'], close: ['}']
27
*
28
* e.g. of a group containing multiple pairs:
29
* open: ['if', 'for'], close: ['end', 'end']
30
*/
31
export class RichEditBracket {
32
_richEditBracketBrand: void = undefined;
33
34
readonly languageId: string;
35
/**
36
* A 0-based consecutive unique identifier for this bracket pair.
37
* If a language has 5 bracket pairs, out of which 2 are grouped together,
38
* it is expected that the `index` goes from 0 to 4.
39
*/
40
readonly index: number;
41
/**
42
* The open sequence for each bracket pair contained in this group.
43
*
44
* The open sequence at a specific index corresponds to the
45
* closing sequence at the same index.
46
*
47
* [ open[i], closed[i] ] represent a bracket pair.
48
*/
49
readonly open: string[];
50
/**
51
* The close sequence for each bracket pair contained in this group.
52
*
53
* The close sequence at a specific index corresponds to the
54
* opening sequence at the same index.
55
*
56
* [ open[i], closed[i] ] represent a bracket pair.
57
*/
58
readonly close: string[];
59
/**
60
* A regular expression that is useful to search for this bracket pair group in a string.
61
*
62
* This regular expression is built in a way that it is aware of the other bracket
63
* pairs defined for the language, so it might match brackets from other groups.
64
*
65
* See the fine details in `getRegexForBracketPair`.
66
*/
67
readonly forwardRegex: RegExp;
68
/**
69
* A regular expression that is useful to search for this bracket pair group in a string backwards.
70
*
71
* This regular expression is built in a way that it is aware of the other bracket
72
* pairs defined for the language, so it might match brackets from other groups.
73
*
74
* See the fine defails in `getReversedRegexForBracketPair`.
75
*/
76
readonly reversedRegex: RegExp;
77
private readonly _openSet: Set<string>;
78
private readonly _closeSet: Set<string>;
79
80
constructor(languageId: string, index: number, open: string[], close: string[], forwardRegex: RegExp, reversedRegex: RegExp) {
81
this.languageId = languageId;
82
this.index = index;
83
this.open = open;
84
this.close = close;
85
this.forwardRegex = forwardRegex;
86
this.reversedRegex = reversedRegex;
87
this._openSet = RichEditBracket._toSet(this.open);
88
this._closeSet = RichEditBracket._toSet(this.close);
89
}
90
91
/**
92
* Check if the provided `text` is an open bracket in this group.
93
*/
94
public isOpen(text: string) {
95
return this._openSet.has(text);
96
}
97
98
/**
99
* Check if the provided `text` is a close bracket in this group.
100
*/
101
public isClose(text: string) {
102
return this._closeSet.has(text);
103
}
104
105
private static _toSet(arr: string[]): Set<string> {
106
const result = new Set<string>();
107
for (const element of arr) {
108
result.add(element);
109
}
110
return result;
111
}
112
}
113
114
/**
115
* Groups together brackets that have equal open or close sequences.
116
*
117
* For example, if the following brackets are defined:
118
* ['IF','END']
119
* ['for','end']
120
* ['{','}']
121
*
122
* Then the grouped brackets would be:
123
* { open: ['if', 'for'], close: ['end', 'end'] }
124
* { open: ['{'], close: ['}'] }
125
*
126
*/
127
function groupFuzzyBrackets(brackets: readonly CharacterPair[]): InternalBracket[] {
128
const N = brackets.length;
129
130
brackets = brackets.map(b => [b[0].toLowerCase(), b[1].toLowerCase()]);
131
132
const group: number[] = [];
133
for (let i = 0; i < N; i++) {
134
group[i] = i;
135
}
136
137
const areOverlapping = (a: CharacterPair, b: CharacterPair) => {
138
const [aOpen, aClose] = a;
139
const [bOpen, bClose] = b;
140
return (aOpen === bOpen || aOpen === bClose || aClose === bOpen || aClose === bClose);
141
};
142
143
const mergeGroups = (g1: number, g2: number) => {
144
const newG = Math.min(g1, g2);
145
const oldG = Math.max(g1, g2);
146
for (let i = 0; i < N; i++) {
147
if (group[i] === oldG) {
148
group[i] = newG;
149
}
150
}
151
};
152
153
// group together brackets that have the same open or the same close sequence
154
for (let i = 0; i < N; i++) {
155
const a = brackets[i];
156
for (let j = i + 1; j < N; j++) {
157
const b = brackets[j];
158
if (areOverlapping(a, b)) {
159
mergeGroups(group[i], group[j]);
160
}
161
}
162
}
163
164
const result: InternalBracket[] = [];
165
for (let g = 0; g < N; g++) {
166
const currentOpen: string[] = [];
167
const currentClose: string[] = [];
168
for (let i = 0; i < N; i++) {
169
if (group[i] === g) {
170
const [open, close] = brackets[i];
171
currentOpen.push(open);
172
currentClose.push(close);
173
}
174
}
175
if (currentOpen.length > 0) {
176
result.push({
177
open: currentOpen,
178
close: currentClose
179
});
180
}
181
}
182
return result;
183
}
184
185
export class RichEditBrackets {
186
_richEditBracketsBrand: void = undefined;
187
188
/**
189
* All groups of brackets defined for this language.
190
*/
191
public readonly brackets: RichEditBracket[];
192
/**
193
* A regular expression that is useful to search for all bracket pairs in a string.
194
*
195
* See the fine details in `getRegexForBrackets`.
196
*/
197
public readonly forwardRegex: RegExp;
198
/**
199
* A regular expression that is useful to search for all bracket pairs in a string backwards.
200
*
201
* See the fine details in `getReversedRegexForBrackets`.
202
*/
203
public readonly reversedRegex: RegExp;
204
/**
205
* The length (i.e. str.length) for the longest bracket pair.
206
*/
207
public readonly maxBracketLength: number;
208
/**
209
* A map useful for decoding a regex match and finding which bracket group was matched.
210
*/
211
public readonly textIsBracket: { [text: string]: RichEditBracket };
212
/**
213
* A set useful for decoding if a regex match is the open bracket of a bracket pair.
214
*/
215
public readonly textIsOpenBracket: { [text: string]: boolean };
216
217
constructor(languageId: string, _brackets: readonly CharacterPair[]) {
218
const brackets = groupFuzzyBrackets(_brackets);
219
220
this.brackets = brackets.map((b, index) => {
221
return new RichEditBracket(
222
languageId,
223
index,
224
b.open,
225
b.close,
226
getRegexForBracketPair(b.open, b.close, brackets, index),
227
getReversedRegexForBracketPair(b.open, b.close, brackets, index)
228
);
229
});
230
231
this.forwardRegex = getRegexForBrackets(this.brackets);
232
this.reversedRegex = getReversedRegexForBrackets(this.brackets);
233
234
this.textIsBracket = {};
235
this.textIsOpenBracket = {};
236
237
this.maxBracketLength = 0;
238
for (const bracket of this.brackets) {
239
for (const open of bracket.open) {
240
this.textIsBracket[open] = bracket;
241
this.textIsOpenBracket[open] = true;
242
this.maxBracketLength = Math.max(this.maxBracketLength, open.length);
243
}
244
for (const close of bracket.close) {
245
this.textIsBracket[close] = bracket;
246
this.textIsOpenBracket[close] = false;
247
this.maxBracketLength = Math.max(this.maxBracketLength, close.length);
248
}
249
}
250
}
251
}
252
253
function collectSuperstrings(str: string, brackets: InternalBracket[], currentIndex: number, dest: string[]): void {
254
for (let i = 0, len = brackets.length; i < len; i++) {
255
if (i === currentIndex) {
256
continue;
257
}
258
const bracket = brackets[i];
259
for (const open of bracket.open) {
260
if (open.indexOf(str) >= 0) {
261
dest.push(open);
262
}
263
}
264
for (const close of bracket.close) {
265
if (close.indexOf(str) >= 0) {
266
dest.push(close);
267
}
268
}
269
}
270
}
271
272
function lengthcmp(a: string, b: string) {
273
return a.length - b.length;
274
}
275
276
function unique(arr: string[]): string[] {
277
if (arr.length <= 1) {
278
return arr;
279
}
280
const result: string[] = [];
281
const seen = new Set<string>();
282
for (const element of arr) {
283
if (seen.has(element)) {
284
continue;
285
}
286
result.push(element);
287
seen.add(element);
288
}
289
return result;
290
}
291
292
/**
293
* Create a regular expression that can be used to search forward in a piece of text
294
* for a group of bracket pairs. But this regex must be built in a way in which
295
* it is aware of the other bracket pairs defined for the language.
296
*
297
* For example, if a language contains the following bracket pairs:
298
* ['begin', 'end']
299
* ['if', 'end if']
300
* The two bracket pairs do not collide because no open or close brackets are equal.
301
* So the function getRegexForBracketPair is called twice, once with
302
* the ['begin'], ['end'] group consisting of one bracket pair, and once with
303
* the ['if'], ['end if'] group consiting of the other bracket pair.
304
*
305
* But there could be a situation where an occurrence of 'end if' is mistaken
306
* for an occurrence of 'end'.
307
*
308
* Therefore, for the bracket pair ['begin', 'end'], the regex will also
309
* target 'end if'. The regex will be something like:
310
* /(\bend if\b)|(\bend\b)|(\bif\b)/
311
*
312
* The regex also searches for "superstrings" (other brackets that might be mistaken with the current bracket).
313
*
314
*/
315
function getRegexForBracketPair(open: string[], close: string[], brackets: InternalBracket[], currentIndex: number): RegExp {
316
// search in all brackets for other brackets that are a superstring of these brackets
317
let pieces: string[] = [];
318
pieces = pieces.concat(open);
319
pieces = pieces.concat(close);
320
for (let i = 0, len = pieces.length; i < len; i++) {
321
collectSuperstrings(pieces[i], brackets, currentIndex, pieces);
322
}
323
pieces = unique(pieces);
324
pieces.sort(lengthcmp);
325
pieces.reverse();
326
return createBracketOrRegExp(pieces);
327
}
328
329
/**
330
* Matching a regular expression in JS can only be done "forwards". So JS offers natively only
331
* methods to find the first match of a regex in a string. But sometimes, it is useful to
332
* find the last match of a regex in a string. For such a situation, a nice solution is to
333
* simply reverse the string and then search for a reversed regex.
334
*
335
* This function also has the fine details of `getRegexForBracketPair`. For the same example
336
* given above, the regex produced here would look like:
337
* /(\bfi dne\b)|(\bdne\b)|(\bfi\b)/
338
*/
339
function getReversedRegexForBracketPair(open: string[], close: string[], brackets: InternalBracket[], currentIndex: number): RegExp {
340
// search in all brackets for other brackets that are a superstring of these brackets
341
let pieces: string[] = [];
342
pieces = pieces.concat(open);
343
pieces = pieces.concat(close);
344
for (let i = 0, len = pieces.length; i < len; i++) {
345
collectSuperstrings(pieces[i], brackets, currentIndex, pieces);
346
}
347
pieces = unique(pieces);
348
pieces.sort(lengthcmp);
349
pieces.reverse();
350
return createBracketOrRegExp(pieces.map(toReversedString));
351
}
352
353
/**
354
* Creates a regular expression that targets all bracket pairs.
355
*
356
* e.g. for the bracket pairs:
357
* ['{','}']
358
* ['begin,'end']
359
* ['for','end']
360
* the regex would look like:
361
* /(\{)|(\})|(\bbegin\b)|(\bend\b)|(\bfor\b)/
362
*/
363
function getRegexForBrackets(brackets: RichEditBracket[]): RegExp {
364
let pieces: string[] = [];
365
for (const bracket of brackets) {
366
for (const open of bracket.open) {
367
pieces.push(open);
368
}
369
for (const close of bracket.close) {
370
pieces.push(close);
371
}
372
}
373
pieces = unique(pieces);
374
return createBracketOrRegExp(pieces);
375
}
376
377
/**
378
* Matching a regular expression in JS can only be done "forwards". So JS offers natively only
379
* methods to find the first match of a regex in a string. But sometimes, it is useful to
380
* find the last match of a regex in a string. For such a situation, a nice solution is to
381
* simply reverse the string and then search for a reversed regex.
382
*
383
* e.g. for the bracket pairs:
384
* ['{','}']
385
* ['begin,'end']
386
* ['for','end']
387
* the regex would look like:
388
* /(\{)|(\})|(\bnigeb\b)|(\bdne\b)|(\brof\b)/
389
*/
390
function getReversedRegexForBrackets(brackets: RichEditBracket[]): RegExp {
391
let pieces: string[] = [];
392
for (const bracket of brackets) {
393
for (const open of bracket.open) {
394
pieces.push(open);
395
}
396
for (const close of bracket.close) {
397
pieces.push(close);
398
}
399
}
400
pieces = unique(pieces);
401
return createBracketOrRegExp(pieces.map(toReversedString));
402
}
403
404
function prepareBracketForRegExp(str: string): string {
405
// This bracket pair uses letters like e.g. "begin" - "end"
406
const insertWordBoundaries = (/^[\w ]+$/.test(str));
407
str = strings.escapeRegExpCharacters(str);
408
return (insertWordBoundaries ? `\\b${str}\\b` : str);
409
}
410
411
export function createBracketOrRegExp(pieces: string[], options?: strings.RegExpOptions): RegExp {
412
const regexStr = `(${pieces.map(prepareBracketForRegExp).join(')|(')})`;
413
return strings.createRegExp(regexStr, true, options);
414
}
415
416
const toReversedString = (function () {
417
418
function reverse(str: string): string {
419
// create a Uint16Array and then use a TextDecoder to create a string
420
const arr = new Uint16Array(str.length);
421
let offset = 0;
422
for (let i = str.length - 1; i >= 0; i--) {
423
arr[offset++] = str.charCodeAt(i);
424
}
425
return stringBuilder.getPlatformTextDecoder().decode(arr);
426
}
427
428
let lastInput: string | null = null;
429
let lastOutput: string | null = null;
430
return function toReversedString(str: string): string {
431
if (lastInput !== str) {
432
lastInput = str;
433
lastOutput = reverse(lastInput);
434
}
435
return lastOutput!;
436
};
437
})();
438
439
export class BracketsUtils {
440
441
private static _findPrevBracketInText(reversedBracketRegex: RegExp, lineNumber: number, reversedText: string, offset: number): Range | null {
442
const m = reversedText.match(reversedBracketRegex);
443
444
if (!m) {
445
return null;
446
}
447
448
const matchOffset = reversedText.length - (m.index || 0);
449
const matchLength = m[0].length;
450
const absoluteMatchOffset = offset + matchOffset;
451
452
return new Range(lineNumber, absoluteMatchOffset - matchLength + 1, lineNumber, absoluteMatchOffset + 1);
453
}
454
455
public static findPrevBracketInRange(reversedBracketRegex: RegExp, lineNumber: number, lineText: string, startOffset: number, endOffset: number): Range | null {
456
// Because JS does not support backwards regex search, we search forwards in a reversed string with a reversed regex ;)
457
const reversedLineText = toReversedString(lineText);
458
const reversedSubstr = reversedLineText.substring(lineText.length - endOffset, lineText.length - startOffset);
459
return this._findPrevBracketInText(reversedBracketRegex, lineNumber, reversedSubstr, startOffset);
460
}
461
462
public static findNextBracketInText(bracketRegex: RegExp, lineNumber: number, text: string, offset: number): Range | null {
463
const m = text.match(bracketRegex);
464
465
if (!m) {
466
return null;
467
}
468
469
const matchOffset = m.index || 0;
470
const matchLength = m[0].length;
471
if (matchLength === 0) {
472
return null;
473
}
474
const absoluteMatchOffset = offset + matchOffset;
475
476
return new Range(lineNumber, absoluteMatchOffset + 1, lineNumber, absoluteMatchOffset + 1 + matchLength);
477
}
478
479
public static findNextBracketInRange(bracketRegex: RegExp, lineNumber: number, lineText: string, startOffset: number, endOffset: number): Range | null {
480
const substr = lineText.substring(startOffset, endOffset);
481
return this.findNextBracketInText(bracketRegex, lineNumber, substr, startOffset);
482
}
483
}
484
485