Path: blob/main/src/vs/editor/common/model/textModelSearch.ts
3294 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import * as strings from '../../../base/common/strings.js';7import { WordCharacterClass, WordCharacterClassifier, getMapForWordSeparators } from '../core/wordCharacterClassifier.js';8import { Position } from '../core/position.js';9import { Range } from '../core/range.js';10import { EndOfLinePreference, FindMatch, SearchData } from '../model.js';11import { TextModel } from './textModel.js';1213const LIMIT_FIND_COUNT = 999;1415export class SearchParams {16public readonly searchString: string;17public readonly isRegex: boolean;18public readonly matchCase: boolean;19public readonly wordSeparators: string | null;2021constructor(searchString: string, isRegex: boolean, matchCase: boolean, wordSeparators: string | null) {22this.searchString = searchString;23this.isRegex = isRegex;24this.matchCase = matchCase;25this.wordSeparators = wordSeparators;26}2728public parseSearchRequest(): SearchData | null {29if (this.searchString === '') {30return null;31}3233// Try to create a RegExp out of the params34let multiline: boolean;35if (this.isRegex) {36multiline = isMultilineRegexSource(this.searchString);37} else {38multiline = (this.searchString.indexOf('\n') >= 0);39}4041let regex: RegExp | null = null;42try {43regex = strings.createRegExp(this.searchString, this.isRegex, {44matchCase: this.matchCase,45wholeWord: false,46multiline: multiline,47global: true,48unicode: true49});50} catch (err) {51return null;52}5354if (!regex) {55return null;56}5758let canUseSimpleSearch = (!this.isRegex && !multiline);59if (canUseSimpleSearch && this.searchString.toLowerCase() !== this.searchString.toUpperCase()) {60// casing might make a difference61canUseSimpleSearch = this.matchCase;62}6364return new SearchData(regex, this.wordSeparators ? getMapForWordSeparators(this.wordSeparators, []) : null, canUseSimpleSearch ? this.searchString : null);65}66}6768export function isMultilineRegexSource(searchString: string): boolean {69if (!searchString || searchString.length === 0) {70return false;71}7273for (let i = 0, len = searchString.length; i < len; i++) {74const chCode = searchString.charCodeAt(i);7576if (chCode === CharCode.LineFeed) {77return true;78}7980if (chCode === CharCode.Backslash) {8182// move to next char83i++;8485if (i >= len) {86// string ends with a \87break;88}8990const nextChCode = searchString.charCodeAt(i);91if (nextChCode === CharCode.n || nextChCode === CharCode.r || nextChCode === CharCode.W) {92return true;93}94}95}9697return false;98}99100export function createFindMatch(range: Range, rawMatches: RegExpExecArray, captureMatches: boolean): FindMatch {101if (!captureMatches) {102return new FindMatch(range, null);103}104const matches: string[] = [];105for (let i = 0, len = rawMatches.length; i < len; i++) {106matches[i] = rawMatches[i];107}108return new FindMatch(range, matches);109}110111class LineFeedCounter {112113private readonly _lineFeedsOffsets: number[];114115constructor(text: string) {116const lineFeedsOffsets: number[] = [];117let lineFeedsOffsetsLen = 0;118for (let i = 0, textLen = text.length; i < textLen; i++) {119if (text.charCodeAt(i) === CharCode.LineFeed) {120lineFeedsOffsets[lineFeedsOffsetsLen++] = i;121}122}123this._lineFeedsOffsets = lineFeedsOffsets;124}125126public findLineFeedCountBeforeOffset(offset: number): number {127const lineFeedsOffsets = this._lineFeedsOffsets;128let min = 0;129let max = lineFeedsOffsets.length - 1;130131if (max === -1) {132// no line feeds133return 0;134}135136if (offset <= lineFeedsOffsets[0]) {137// before first line feed138return 0;139}140141while (min < max) {142const mid = min + ((max - min) / 2 >> 0);143144if (lineFeedsOffsets[mid] >= offset) {145max = mid - 1;146} else {147if (lineFeedsOffsets[mid + 1] >= offset) {148// bingo!149min = mid;150max = mid;151} else {152min = mid + 1;153}154}155}156return min + 1;157}158}159160export class TextModelSearch {161162public static findMatches(model: TextModel, searchParams: SearchParams, searchRange: Range, captureMatches: boolean, limitResultCount: number): FindMatch[] {163const searchData = searchParams.parseSearchRequest();164if (!searchData) {165return [];166}167168if (searchData.regex.multiline) {169return this._doFindMatchesMultiline(model, searchRange, new Searcher(searchData.wordSeparators, searchData.regex), captureMatches, limitResultCount);170}171return this._doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount);172}173174/**175* Multiline search always executes on the lines concatenated with \n.176* We must therefore compensate for the count of \n in case the model is CRLF177*/178private static _getMultilineMatchRange(model: TextModel, deltaOffset: number, text: string, lfCounter: LineFeedCounter | null, matchIndex: number, match0: string): Range {179let startOffset: number;180let lineFeedCountBeforeMatch = 0;181if (lfCounter) {182lineFeedCountBeforeMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex);183startOffset = deltaOffset + matchIndex + lineFeedCountBeforeMatch /* add as many \r as there were \n */;184} else {185startOffset = deltaOffset + matchIndex;186}187188let endOffset: number;189if (lfCounter) {190const lineFeedCountBeforeEndOfMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex + match0.length);191const lineFeedCountInMatch = lineFeedCountBeforeEndOfMatch - lineFeedCountBeforeMatch;192endOffset = startOffset + match0.length + lineFeedCountInMatch /* add as many \r as there were \n */;193} else {194endOffset = startOffset + match0.length;195}196197const startPosition = model.getPositionAt(startOffset);198const endPosition = model.getPositionAt(endOffset);199return new Range(startPosition.lineNumber, startPosition.column, endPosition.lineNumber, endPosition.column);200}201202private static _doFindMatchesMultiline(model: TextModel, searchRange: Range, searcher: Searcher, captureMatches: boolean, limitResultCount: number): FindMatch[] {203const deltaOffset = model.getOffsetAt(searchRange.getStartPosition());204// We always execute multiline search over the lines joined with \n205// This makes it that \n will match the EOL for both CRLF and LF models206// We compensate for offset errors in `_getMultilineMatchRange`207const text = model.getValueInRange(searchRange, EndOfLinePreference.LF);208const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);209210const result: FindMatch[] = [];211let counter = 0;212213let m: RegExpExecArray | null;214searcher.reset(0);215while ((m = searcher.next(text))) {216result[counter++] = createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);217if (counter >= limitResultCount) {218return result;219}220}221222return result;223}224225private static _doFindMatchesLineByLine(model: TextModel, searchRange: Range, searchData: SearchData, captureMatches: boolean, limitResultCount: number): FindMatch[] {226const result: FindMatch[] = [];227let resultLen = 0;228229// Early case for a search range that starts & stops on the same line number230if (searchRange.startLineNumber === searchRange.endLineNumber) {231const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1, searchRange.endColumn - 1);232resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);233return result;234}235236// Collect results from first line237const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1);238resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);239240// Collect results from middle lines241for (let lineNumber = searchRange.startLineNumber + 1; lineNumber < searchRange.endLineNumber && resultLen < limitResultCount; lineNumber++) {242resultLen = this._findMatchesInLine(searchData, model.getLineContent(lineNumber), lineNumber, 0, resultLen, result, captureMatches, limitResultCount);243}244245// Collect results from last line246if (resultLen < limitResultCount) {247const text = model.getLineContent(searchRange.endLineNumber).substring(0, searchRange.endColumn - 1);248resultLen = this._findMatchesInLine(searchData, text, searchRange.endLineNumber, 0, resultLen, result, captureMatches, limitResultCount);249}250251return result;252}253254private static _findMatchesInLine(searchData: SearchData, text: string, lineNumber: number, deltaOffset: number, resultLen: number, result: FindMatch[], captureMatches: boolean, limitResultCount: number): number {255const wordSeparators = searchData.wordSeparators;256if (!captureMatches && searchData.simpleSearch) {257const searchString = searchData.simpleSearch;258const searchStringLen = searchString.length;259const textLength = text.length;260261let lastMatchIndex = -searchStringLen;262while ((lastMatchIndex = text.indexOf(searchString, lastMatchIndex + searchStringLen)) !== -1) {263if (!wordSeparators || isValidMatch(wordSeparators, text, textLength, lastMatchIndex, searchStringLen)) {264result[resultLen++] = new FindMatch(new Range(lineNumber, lastMatchIndex + 1 + deltaOffset, lineNumber, lastMatchIndex + 1 + searchStringLen + deltaOffset), null);265if (resultLen >= limitResultCount) {266return resultLen;267}268}269}270return resultLen;271}272273const searcher = new Searcher(searchData.wordSeparators, searchData.regex);274let m: RegExpExecArray | null;275// Reset regex to search from the beginning276searcher.reset(0);277do {278m = searcher.next(text);279if (m) {280result[resultLen++] = createFindMatch(new Range(lineNumber, m.index + 1 + deltaOffset, lineNumber, m.index + 1 + m[0].length + deltaOffset), m, captureMatches);281if (resultLen >= limitResultCount) {282return resultLen;283}284}285} while (m);286return resultLen;287}288289public static findNextMatch(model: TextModel, searchParams: SearchParams, searchStart: Position, captureMatches: boolean): FindMatch | null {290const searchData = searchParams.parseSearchRequest();291if (!searchData) {292return null;293}294295const searcher = new Searcher(searchData.wordSeparators, searchData.regex);296297if (searchData.regex.multiline) {298return this._doFindNextMatchMultiline(model, searchStart, searcher, captureMatches);299}300return this._doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches);301}302303private static _doFindNextMatchMultiline(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {304const searchTextStart = new Position(searchStart.lineNumber, 1);305const deltaOffset = model.getOffsetAt(searchTextStart);306const lineCount = model.getLineCount();307// We always execute multiline search over the lines joined with \n308// This makes it that \n will match the EOL for both CRLF and LF models309// We compensate for offset errors in `_getMultilineMatchRange`310const text = model.getValueInRange(new Range(searchTextStart.lineNumber, searchTextStart.column, lineCount, model.getLineMaxColumn(lineCount)), EndOfLinePreference.LF);311const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);312searcher.reset(searchStart.column - 1);313const m = searcher.next(text);314if (m) {315return createFindMatch(316this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]),317m,318captureMatches319);320}321322if (searchStart.lineNumber !== 1 || searchStart.column !== 1) {323// Try again from the top324return this._doFindNextMatchMultiline(model, new Position(1, 1), searcher, captureMatches);325}326327return null;328}329330private static _doFindNextMatchLineByLine(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {331const lineCount = model.getLineCount();332const startLineNumber = searchStart.lineNumber;333334// Look in first line335const text = model.getLineContent(startLineNumber);336const r = this._findFirstMatchInLine(searcher, text, startLineNumber, searchStart.column, captureMatches);337if (r) {338return r;339}340341for (let i = 1; i <= lineCount; i++) {342const lineIndex = (startLineNumber + i - 1) % lineCount;343const text = model.getLineContent(lineIndex + 1);344const r = this._findFirstMatchInLine(searcher, text, lineIndex + 1, 1, captureMatches);345if (r) {346return r;347}348}349350return null;351}352353private static _findFirstMatchInLine(searcher: Searcher, text: string, lineNumber: number, fromColumn: number, captureMatches: boolean): FindMatch | null {354// Set regex to search from column355searcher.reset(fromColumn - 1);356const m: RegExpExecArray | null = searcher.next(text);357if (m) {358return createFindMatch(359new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length),360m,361captureMatches362);363}364return null;365}366367public static findPreviousMatch(model: TextModel, searchParams: SearchParams, searchStart: Position, captureMatches: boolean): FindMatch | null {368const searchData = searchParams.parseSearchRequest();369if (!searchData) {370return null;371}372373const searcher = new Searcher(searchData.wordSeparators, searchData.regex);374375if (searchData.regex.multiline) {376return this._doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches);377}378return this._doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches);379}380381private static _doFindPreviousMatchMultiline(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {382const matches = this._doFindMatchesMultiline(model, new Range(1, 1, searchStart.lineNumber, searchStart.column), searcher, captureMatches, 10 * LIMIT_FIND_COUNT);383if (matches.length > 0) {384return matches[matches.length - 1];385}386387const lineCount = model.getLineCount();388if (searchStart.lineNumber !== lineCount || searchStart.column !== model.getLineMaxColumn(lineCount)) {389// Try again with all content390return this._doFindPreviousMatchMultiline(model, new Position(lineCount, model.getLineMaxColumn(lineCount)), searcher, captureMatches);391}392393return null;394}395396private static _doFindPreviousMatchLineByLine(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {397const lineCount = model.getLineCount();398const startLineNumber = searchStart.lineNumber;399400// Look in first line401const text = model.getLineContent(startLineNumber).substring(0, searchStart.column - 1);402const r = this._findLastMatchInLine(searcher, text, startLineNumber, captureMatches);403if (r) {404return r;405}406407for (let i = 1; i <= lineCount; i++) {408const lineIndex = (lineCount + startLineNumber - i - 1) % lineCount;409const text = model.getLineContent(lineIndex + 1);410const r = this._findLastMatchInLine(searcher, text, lineIndex + 1, captureMatches);411if (r) {412return r;413}414}415416return null;417}418419private static _findLastMatchInLine(searcher: Searcher, text: string, lineNumber: number, captureMatches: boolean): FindMatch | null {420let bestResult: FindMatch | null = null;421let m: RegExpExecArray | null;422searcher.reset(0);423while ((m = searcher.next(text))) {424bestResult = createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);425}426return bestResult;427}428}429430function leftIsWordBounday(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {431if (matchStartIndex === 0) {432// Match starts at start of string433return true;434}435436const charBefore = text.charCodeAt(matchStartIndex - 1);437if (wordSeparators.get(charBefore) !== WordCharacterClass.Regular) {438// The character before the match is a word separator439return true;440}441442if (charBefore === CharCode.CarriageReturn || charBefore === CharCode.LineFeed) {443// The character before the match is line break or carriage return.444return true;445}446447if (matchLength > 0) {448const firstCharInMatch = text.charCodeAt(matchStartIndex);449if (wordSeparators.get(firstCharInMatch) !== WordCharacterClass.Regular) {450// The first character inside the match is a word separator451return true;452}453}454455return false;456}457458function rightIsWordBounday(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {459if (matchStartIndex + matchLength === textLength) {460// Match ends at end of string461return true;462}463464const charAfter = text.charCodeAt(matchStartIndex + matchLength);465if (wordSeparators.get(charAfter) !== WordCharacterClass.Regular) {466// The character after the match is a word separator467return true;468}469470if (charAfter === CharCode.CarriageReturn || charAfter === CharCode.LineFeed) {471// The character after the match is line break or carriage return.472return true;473}474475if (matchLength > 0) {476const lastCharInMatch = text.charCodeAt(matchStartIndex + matchLength - 1);477if (wordSeparators.get(lastCharInMatch) !== WordCharacterClass.Regular) {478// The last character in the match is a word separator479return true;480}481}482483return false;484}485486export function isValidMatch(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {487return (488leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)489&& rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)490);491}492493export class Searcher {494public readonly _wordSeparators: WordCharacterClassifier | null;495private readonly _searchRegex: RegExp;496private _prevMatchStartIndex: number;497private _prevMatchLength: number;498499constructor(wordSeparators: WordCharacterClassifier | null, searchRegex: RegExp,) {500this._wordSeparators = wordSeparators;501this._searchRegex = searchRegex;502this._prevMatchStartIndex = -1;503this._prevMatchLength = 0;504}505506public reset(lastIndex: number): void {507this._searchRegex.lastIndex = lastIndex;508this._prevMatchStartIndex = -1;509this._prevMatchLength = 0;510}511512public next(text: string): RegExpExecArray | null {513const textLength = text.length;514515let m: RegExpExecArray | null;516do {517if (this._prevMatchStartIndex + this._prevMatchLength === textLength) {518// Reached the end of the line519return null;520}521522m = this._searchRegex.exec(text);523if (!m) {524return null;525}526527const matchStartIndex = m.index;528const matchLength = m[0].length;529if (matchStartIndex === this._prevMatchStartIndex && matchLength === this._prevMatchLength) {530if (matchLength === 0) {531// the search result is an empty string and won't advance `regex.lastIndex`, so `regex.exec` will stuck here532// we attempt to recover from that by advancing by two if surrogate pair found and by one otherwise533if (strings.getNextCodePoint(text, textLength, this._searchRegex.lastIndex) > 0xFFFF) {534this._searchRegex.lastIndex += 2;535} else {536this._searchRegex.lastIndex += 1;537}538continue;539}540// Exit early if the regex matches the same range twice541return null;542}543this._prevMatchStartIndex = matchStartIndex;544this._prevMatchLength = matchLength;545546if (!this._wordSeparators || isValidMatch(this._wordSeparators, text, textLength, matchStartIndex, matchLength)) {547return m;548}549550} while (m);551552return null;553}554}555556557