Path: blob/main/src/vs/editor/common/languages/supports/richEditBrackets.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as strings from '../../../../base/common/strings.js';6import * as stringBuilder from '../../core/stringBuilder.js';7import { Range } from '../../core/range.js';8import { CharacterPair } from '../languageConfiguration.js';910interface InternalBracket {11open: string[];12close: string[];13}1415/**16* Represents a grouping of colliding bracket pairs.17*18* Most of the times this contains a single bracket pair,19* but sometimes this contains multiple bracket pairs in cases20* where the same string appears as a closing bracket for multiple21* bracket pairs, or the same string appears an opening bracket for22* multiple bracket pairs.23*24* e.g. of a group containing a single pair:25* open: ['{'], close: ['}']26*27* e.g. of a group containing multiple pairs:28* open: ['if', 'for'], close: ['end', 'end']29*/30export class RichEditBracket {31_richEditBracketBrand: void = undefined;3233readonly languageId: string;34/**35* A 0-based consecutive unique identifier for this bracket pair.36* If a language has 5 bracket pairs, out of which 2 are grouped together,37* it is expected that the `index` goes from 0 to 4.38*/39readonly index: number;40/**41* The open sequence for each bracket pair contained in this group.42*43* The open sequence at a specific index corresponds to the44* closing sequence at the same index.45*46* [ open[i], closed[i] ] represent a bracket pair.47*/48readonly open: string[];49/**50* The close sequence for each bracket pair contained in this group.51*52* The close sequence at a specific index corresponds to the53* opening sequence at the same index.54*55* [ open[i], closed[i] ] represent a bracket pair.56*/57readonly close: string[];58/**59* A regular expression that is useful to search for this bracket pair group in a string.60*61* This regular expression is built in a way that it is aware of the other bracket62* pairs defined for the language, so it might match brackets from other groups.63*64* See the fine details in `getRegexForBracketPair`.65*/66readonly forwardRegex: RegExp;67/**68* A regular expression that is useful to search for this bracket pair group in a string backwards.69*70* This regular expression is built in a way that it is aware of the other bracket71* pairs defined for the language, so it might match brackets from other groups.72*73* See the fine defails in `getReversedRegexForBracketPair`.74*/75readonly reversedRegex: RegExp;76private readonly _openSet: Set<string>;77private readonly _closeSet: Set<string>;7879constructor(languageId: string, index: number, open: string[], close: string[], forwardRegex: RegExp, reversedRegex: RegExp) {80this.languageId = languageId;81this.index = index;82this.open = open;83this.close = close;84this.forwardRegex = forwardRegex;85this.reversedRegex = reversedRegex;86this._openSet = RichEditBracket._toSet(this.open);87this._closeSet = RichEditBracket._toSet(this.close);88}8990/**91* Check if the provided `text` is an open bracket in this group.92*/93public isOpen(text: string) {94return this._openSet.has(text);95}9697/**98* Check if the provided `text` is a close bracket in this group.99*/100public isClose(text: string) {101return this._closeSet.has(text);102}103104private static _toSet(arr: string[]): Set<string> {105const result = new Set<string>();106for (const element of arr) {107result.add(element);108}109return result;110}111}112113/**114* Groups together brackets that have equal open or close sequences.115*116* For example, if the following brackets are defined:117* ['IF','END']118* ['for','end']119* ['{','}']120*121* Then the grouped brackets would be:122* { open: ['if', 'for'], close: ['end', 'end'] }123* { open: ['{'], close: ['}'] }124*125*/126function groupFuzzyBrackets(brackets: readonly CharacterPair[]): InternalBracket[] {127const N = brackets.length;128129brackets = brackets.map(b => [b[0].toLowerCase(), b[1].toLowerCase()]);130131const group: number[] = [];132for (let i = 0; i < N; i++) {133group[i] = i;134}135136const areOverlapping = (a: CharacterPair, b: CharacterPair) => {137const [aOpen, aClose] = a;138const [bOpen, bClose] = b;139return (aOpen === bOpen || aOpen === bClose || aClose === bOpen || aClose === bClose);140};141142const mergeGroups = (g1: number, g2: number) => {143const newG = Math.min(g1, g2);144const oldG = Math.max(g1, g2);145for (let i = 0; i < N; i++) {146if (group[i] === oldG) {147group[i] = newG;148}149}150};151152// group together brackets that have the same open or the same close sequence153for (let i = 0; i < N; i++) {154const a = brackets[i];155for (let j = i + 1; j < N; j++) {156const b = brackets[j];157if (areOverlapping(a, b)) {158mergeGroups(group[i], group[j]);159}160}161}162163const result: InternalBracket[] = [];164for (let g = 0; g < N; g++) {165const currentOpen: string[] = [];166const currentClose: string[] = [];167for (let i = 0; i < N; i++) {168if (group[i] === g) {169const [open, close] = brackets[i];170currentOpen.push(open);171currentClose.push(close);172}173}174if (currentOpen.length > 0) {175result.push({176open: currentOpen,177close: currentClose178});179}180}181return result;182}183184export class RichEditBrackets {185_richEditBracketsBrand: void = undefined;186187/**188* All groups of brackets defined for this language.189*/190public readonly brackets: RichEditBracket[];191/**192* A regular expression that is useful to search for all bracket pairs in a string.193*194* See the fine details in `getRegexForBrackets`.195*/196public readonly forwardRegex: RegExp;197/**198* A regular expression that is useful to search for all bracket pairs in a string backwards.199*200* See the fine details in `getReversedRegexForBrackets`.201*/202public readonly reversedRegex: RegExp;203/**204* The length (i.e. str.length) for the longest bracket pair.205*/206public readonly maxBracketLength: number;207/**208* A map useful for decoding a regex match and finding which bracket group was matched.209*/210public readonly textIsBracket: { [text: string]: RichEditBracket };211/**212* A set useful for decoding if a regex match is the open bracket of a bracket pair.213*/214public readonly textIsOpenBracket: { [text: string]: boolean };215216constructor(languageId: string, _brackets: readonly CharacterPair[]) {217const brackets = groupFuzzyBrackets(_brackets);218219this.brackets = brackets.map((b, index) => {220return new RichEditBracket(221languageId,222index,223b.open,224b.close,225getRegexForBracketPair(b.open, b.close, brackets, index),226getReversedRegexForBracketPair(b.open, b.close, brackets, index)227);228});229230this.forwardRegex = getRegexForBrackets(this.brackets);231this.reversedRegex = getReversedRegexForBrackets(this.brackets);232233this.textIsBracket = {};234this.textIsOpenBracket = {};235236this.maxBracketLength = 0;237for (const bracket of this.brackets) {238for (const open of bracket.open) {239this.textIsBracket[open] = bracket;240this.textIsOpenBracket[open] = true;241this.maxBracketLength = Math.max(this.maxBracketLength, open.length);242}243for (const close of bracket.close) {244this.textIsBracket[close] = bracket;245this.textIsOpenBracket[close] = false;246this.maxBracketLength = Math.max(this.maxBracketLength, close.length);247}248}249}250}251252function collectSuperstrings(str: string, brackets: InternalBracket[], currentIndex: number, dest: string[]): void {253for (let i = 0, len = brackets.length; i < len; i++) {254if (i === currentIndex) {255continue;256}257const bracket = brackets[i];258for (const open of bracket.open) {259if (open.indexOf(str) >= 0) {260dest.push(open);261}262}263for (const close of bracket.close) {264if (close.indexOf(str) >= 0) {265dest.push(close);266}267}268}269}270271function lengthcmp(a: string, b: string) {272return a.length - b.length;273}274275function unique(arr: string[]): string[] {276if (arr.length <= 1) {277return arr;278}279const result: string[] = [];280const seen = new Set<string>();281for (const element of arr) {282if (seen.has(element)) {283continue;284}285result.push(element);286seen.add(element);287}288return result;289}290291/**292* Create a regular expression that can be used to search forward in a piece of text293* for a group of bracket pairs. But this regex must be built in a way in which294* it is aware of the other bracket pairs defined for the language.295*296* For example, if a language contains the following bracket pairs:297* ['begin', 'end']298* ['if', 'end if']299* The two bracket pairs do not collide because no open or close brackets are equal.300* So the function getRegexForBracketPair is called twice, once with301* the ['begin'], ['end'] group consisting of one bracket pair, and once with302* the ['if'], ['end if'] group consiting of the other bracket pair.303*304* But there could be a situation where an occurrence of 'end if' is mistaken305* for an occurrence of 'end'.306*307* Therefore, for the bracket pair ['begin', 'end'], the regex will also308* target 'end if'. The regex will be something like:309* /(\bend if\b)|(\bend\b)|(\bif\b)/310*311* The regex also searches for "superstrings" (other brackets that might be mistaken with the current bracket).312*313*/314function getRegexForBracketPair(open: string[], close: string[], brackets: InternalBracket[], currentIndex: number): RegExp {315// search in all brackets for other brackets that are a superstring of these brackets316let pieces: string[] = [];317pieces = pieces.concat(open);318pieces = pieces.concat(close);319for (let i = 0, len = pieces.length; i < len; i++) {320collectSuperstrings(pieces[i], brackets, currentIndex, pieces);321}322pieces = unique(pieces);323pieces.sort(lengthcmp);324pieces.reverse();325return createBracketOrRegExp(pieces);326}327328/**329* Matching a regular expression in JS can only be done "forwards". So JS offers natively only330* methods to find the first match of a regex in a string. But sometimes, it is useful to331* find the last match of a regex in a string. For such a situation, a nice solution is to332* simply reverse the string and then search for a reversed regex.333*334* This function also has the fine details of `getRegexForBracketPair`. For the same example335* given above, the regex produced here would look like:336* /(\bfi dne\b)|(\bdne\b)|(\bfi\b)/337*/338function getReversedRegexForBracketPair(open: string[], close: string[], brackets: InternalBracket[], currentIndex: number): RegExp {339// search in all brackets for other brackets that are a superstring of these brackets340let pieces: string[] = [];341pieces = pieces.concat(open);342pieces = pieces.concat(close);343for (let i = 0, len = pieces.length; i < len; i++) {344collectSuperstrings(pieces[i], brackets, currentIndex, pieces);345}346pieces = unique(pieces);347pieces.sort(lengthcmp);348pieces.reverse();349return createBracketOrRegExp(pieces.map(toReversedString));350}351352/**353* Creates a regular expression that targets all bracket pairs.354*355* e.g. for the bracket pairs:356* ['{','}']357* ['begin,'end']358* ['for','end']359* the regex would look like:360* /(\{)|(\})|(\bbegin\b)|(\bend\b)|(\bfor\b)/361*/362function getRegexForBrackets(brackets: RichEditBracket[]): RegExp {363let pieces: string[] = [];364for (const bracket of brackets) {365for (const open of bracket.open) {366pieces.push(open);367}368for (const close of bracket.close) {369pieces.push(close);370}371}372pieces = unique(pieces);373return createBracketOrRegExp(pieces);374}375376/**377* Matching a regular expression in JS can only be done "forwards". So JS offers natively only378* methods to find the first match of a regex in a string. But sometimes, it is useful to379* find the last match of a regex in a string. For such a situation, a nice solution is to380* simply reverse the string and then search for a reversed regex.381*382* e.g. for the bracket pairs:383* ['{','}']384* ['begin,'end']385* ['for','end']386* the regex would look like:387* /(\{)|(\})|(\bnigeb\b)|(\bdne\b)|(\brof\b)/388*/389function getReversedRegexForBrackets(brackets: RichEditBracket[]): RegExp {390let pieces: string[] = [];391for (const bracket of brackets) {392for (const open of bracket.open) {393pieces.push(open);394}395for (const close of bracket.close) {396pieces.push(close);397}398}399pieces = unique(pieces);400return createBracketOrRegExp(pieces.map(toReversedString));401}402403function prepareBracketForRegExp(str: string): string {404// This bracket pair uses letters like e.g. "begin" - "end"405const insertWordBoundaries = (/^[\w ]+$/.test(str));406str = strings.escapeRegExpCharacters(str);407return (insertWordBoundaries ? `\\b${str}\\b` : str);408}409410export function createBracketOrRegExp(pieces: string[], options?: strings.RegExpOptions): RegExp {411const regexStr = `(${pieces.map(prepareBracketForRegExp).join(')|(')})`;412return strings.createRegExp(regexStr, true, options);413}414415const toReversedString = (function () {416417function reverse(str: string): string {418// create a Uint16Array and then use a TextDecoder to create a string419const arr = new Uint16Array(str.length);420let offset = 0;421for (let i = str.length - 1; i >= 0; i--) {422arr[offset++] = str.charCodeAt(i);423}424return stringBuilder.getPlatformTextDecoder().decode(arr);425}426427let lastInput: string | null = null;428let lastOutput: string | null = null;429return function toReversedString(str: string): string {430if (lastInput !== str) {431lastInput = str;432lastOutput = reverse(lastInput);433}434return lastOutput!;435};436})();437438export class BracketsUtils {439440private static _findPrevBracketInText(reversedBracketRegex: RegExp, lineNumber: number, reversedText: string, offset: number): Range | null {441const m = reversedText.match(reversedBracketRegex);442443if (!m) {444return null;445}446447const matchOffset = reversedText.length - (m.index || 0);448const matchLength = m[0].length;449const absoluteMatchOffset = offset + matchOffset;450451return new Range(lineNumber, absoluteMatchOffset - matchLength + 1, lineNumber, absoluteMatchOffset + 1);452}453454public static findPrevBracketInRange(reversedBracketRegex: RegExp, lineNumber: number, lineText: string, startOffset: number, endOffset: number): Range | null {455// Because JS does not support backwards regex search, we search forwards in a reversed string with a reversed regex ;)456const reversedLineText = toReversedString(lineText);457const reversedSubstr = reversedLineText.substring(lineText.length - endOffset, lineText.length - startOffset);458return this._findPrevBracketInText(reversedBracketRegex, lineNumber, reversedSubstr, startOffset);459}460461public static findNextBracketInText(bracketRegex: RegExp, lineNumber: number, text: string, offset: number): Range | null {462const m = text.match(bracketRegex);463464if (!m) {465return null;466}467468const matchOffset = m.index || 0;469const matchLength = m[0].length;470if (matchLength === 0) {471return null;472}473const absoluteMatchOffset = offset + matchOffset;474475return new Range(lineNumber, absoluteMatchOffset + 1, lineNumber, absoluteMatchOffset + 1 + matchLength);476}477478public static findNextBracketInRange(bracketRegex: RegExp, lineNumber: number, lineText: string, startOffset: number, endOffset: number): Range | null {479const substr = lineText.substring(startOffset, endOffset);480return this.findNextBracketInText(bracketRegex, lineNumber, substr, startOffset);481}482}483484485