Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/textModelSearch.ts
3294 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import * as strings from '../../../base/common/strings.js';
8
import { WordCharacterClass, WordCharacterClassifier, getMapForWordSeparators } from '../core/wordCharacterClassifier.js';
9
import { Position } from '../core/position.js';
10
import { Range } from '../core/range.js';
11
import { EndOfLinePreference, FindMatch, SearchData } from '../model.js';
12
import { TextModel } from './textModel.js';
13
14
const LIMIT_FIND_COUNT = 999;
15
16
export class SearchParams {
17
public readonly searchString: string;
18
public readonly isRegex: boolean;
19
public readonly matchCase: boolean;
20
public readonly wordSeparators: string | null;
21
22
constructor(searchString: string, isRegex: boolean, matchCase: boolean, wordSeparators: string | null) {
23
this.searchString = searchString;
24
this.isRegex = isRegex;
25
this.matchCase = matchCase;
26
this.wordSeparators = wordSeparators;
27
}
28
29
public parseSearchRequest(): SearchData | null {
30
if (this.searchString === '') {
31
return null;
32
}
33
34
// Try to create a RegExp out of the params
35
let multiline: boolean;
36
if (this.isRegex) {
37
multiline = isMultilineRegexSource(this.searchString);
38
} else {
39
multiline = (this.searchString.indexOf('\n') >= 0);
40
}
41
42
let regex: RegExp | null = null;
43
try {
44
regex = strings.createRegExp(this.searchString, this.isRegex, {
45
matchCase: this.matchCase,
46
wholeWord: false,
47
multiline: multiline,
48
global: true,
49
unicode: true
50
});
51
} catch (err) {
52
return null;
53
}
54
55
if (!regex) {
56
return null;
57
}
58
59
let canUseSimpleSearch = (!this.isRegex && !multiline);
60
if (canUseSimpleSearch && this.searchString.toLowerCase() !== this.searchString.toUpperCase()) {
61
// casing might make a difference
62
canUseSimpleSearch = this.matchCase;
63
}
64
65
return new SearchData(regex, this.wordSeparators ? getMapForWordSeparators(this.wordSeparators, []) : null, canUseSimpleSearch ? this.searchString : null);
66
}
67
}
68
69
export function isMultilineRegexSource(searchString: string): boolean {
70
if (!searchString || searchString.length === 0) {
71
return false;
72
}
73
74
for (let i = 0, len = searchString.length; i < len; i++) {
75
const chCode = searchString.charCodeAt(i);
76
77
if (chCode === CharCode.LineFeed) {
78
return true;
79
}
80
81
if (chCode === CharCode.Backslash) {
82
83
// move to next char
84
i++;
85
86
if (i >= len) {
87
// string ends with a \
88
break;
89
}
90
91
const nextChCode = searchString.charCodeAt(i);
92
if (nextChCode === CharCode.n || nextChCode === CharCode.r || nextChCode === CharCode.W) {
93
return true;
94
}
95
}
96
}
97
98
return false;
99
}
100
101
export function createFindMatch(range: Range, rawMatches: RegExpExecArray, captureMatches: boolean): FindMatch {
102
if (!captureMatches) {
103
return new FindMatch(range, null);
104
}
105
const matches: string[] = [];
106
for (let i = 0, len = rawMatches.length; i < len; i++) {
107
matches[i] = rawMatches[i];
108
}
109
return new FindMatch(range, matches);
110
}
111
112
class LineFeedCounter {
113
114
private readonly _lineFeedsOffsets: number[];
115
116
constructor(text: string) {
117
const lineFeedsOffsets: number[] = [];
118
let lineFeedsOffsetsLen = 0;
119
for (let i = 0, textLen = text.length; i < textLen; i++) {
120
if (text.charCodeAt(i) === CharCode.LineFeed) {
121
lineFeedsOffsets[lineFeedsOffsetsLen++] = i;
122
}
123
}
124
this._lineFeedsOffsets = lineFeedsOffsets;
125
}
126
127
public findLineFeedCountBeforeOffset(offset: number): number {
128
const lineFeedsOffsets = this._lineFeedsOffsets;
129
let min = 0;
130
let max = lineFeedsOffsets.length - 1;
131
132
if (max === -1) {
133
// no line feeds
134
return 0;
135
}
136
137
if (offset <= lineFeedsOffsets[0]) {
138
// before first line feed
139
return 0;
140
}
141
142
while (min < max) {
143
const mid = min + ((max - min) / 2 >> 0);
144
145
if (lineFeedsOffsets[mid] >= offset) {
146
max = mid - 1;
147
} else {
148
if (lineFeedsOffsets[mid + 1] >= offset) {
149
// bingo!
150
min = mid;
151
max = mid;
152
} else {
153
min = mid + 1;
154
}
155
}
156
}
157
return min + 1;
158
}
159
}
160
161
export class TextModelSearch {
162
163
public static findMatches(model: TextModel, searchParams: SearchParams, searchRange: Range, captureMatches: boolean, limitResultCount: number): FindMatch[] {
164
const searchData = searchParams.parseSearchRequest();
165
if (!searchData) {
166
return [];
167
}
168
169
if (searchData.regex.multiline) {
170
return this._doFindMatchesMultiline(model, searchRange, new Searcher(searchData.wordSeparators, searchData.regex), captureMatches, limitResultCount);
171
}
172
return this._doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount);
173
}
174
175
/**
176
* Multiline search always executes on the lines concatenated with \n.
177
* We must therefore compensate for the count of \n in case the model is CRLF
178
*/
179
private static _getMultilineMatchRange(model: TextModel, deltaOffset: number, text: string, lfCounter: LineFeedCounter | null, matchIndex: number, match0: string): Range {
180
let startOffset: number;
181
let lineFeedCountBeforeMatch = 0;
182
if (lfCounter) {
183
lineFeedCountBeforeMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex);
184
startOffset = deltaOffset + matchIndex + lineFeedCountBeforeMatch /* add as many \r as there were \n */;
185
} else {
186
startOffset = deltaOffset + matchIndex;
187
}
188
189
let endOffset: number;
190
if (lfCounter) {
191
const lineFeedCountBeforeEndOfMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex + match0.length);
192
const lineFeedCountInMatch = lineFeedCountBeforeEndOfMatch - lineFeedCountBeforeMatch;
193
endOffset = startOffset + match0.length + lineFeedCountInMatch /* add as many \r as there were \n */;
194
} else {
195
endOffset = startOffset + match0.length;
196
}
197
198
const startPosition = model.getPositionAt(startOffset);
199
const endPosition = model.getPositionAt(endOffset);
200
return new Range(startPosition.lineNumber, startPosition.column, endPosition.lineNumber, endPosition.column);
201
}
202
203
private static _doFindMatchesMultiline(model: TextModel, searchRange: Range, searcher: Searcher, captureMatches: boolean, limitResultCount: number): FindMatch[] {
204
const deltaOffset = model.getOffsetAt(searchRange.getStartPosition());
205
// We always execute multiline search over the lines joined with \n
206
// This makes it that \n will match the EOL for both CRLF and LF models
207
// We compensate for offset errors in `_getMultilineMatchRange`
208
const text = model.getValueInRange(searchRange, EndOfLinePreference.LF);
209
const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
210
211
const result: FindMatch[] = [];
212
let counter = 0;
213
214
let m: RegExpExecArray | null;
215
searcher.reset(0);
216
while ((m = searcher.next(text))) {
217
result[counter++] = createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);
218
if (counter >= limitResultCount) {
219
return result;
220
}
221
}
222
223
return result;
224
}
225
226
private static _doFindMatchesLineByLine(model: TextModel, searchRange: Range, searchData: SearchData, captureMatches: boolean, limitResultCount: number): FindMatch[] {
227
const result: FindMatch[] = [];
228
let resultLen = 0;
229
230
// Early case for a search range that starts & stops on the same line number
231
if (searchRange.startLineNumber === searchRange.endLineNumber) {
232
const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1, searchRange.endColumn - 1);
233
resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
234
return result;
235
}
236
237
// Collect results from first line
238
const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1);
239
resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
240
241
// Collect results from middle lines
242
for (let lineNumber = searchRange.startLineNumber + 1; lineNumber < searchRange.endLineNumber && resultLen < limitResultCount; lineNumber++) {
243
resultLen = this._findMatchesInLine(searchData, model.getLineContent(lineNumber), lineNumber, 0, resultLen, result, captureMatches, limitResultCount);
244
}
245
246
// Collect results from last line
247
if (resultLen < limitResultCount) {
248
const text = model.getLineContent(searchRange.endLineNumber).substring(0, searchRange.endColumn - 1);
249
resultLen = this._findMatchesInLine(searchData, text, searchRange.endLineNumber, 0, resultLen, result, captureMatches, limitResultCount);
250
}
251
252
return result;
253
}
254
255
private static _findMatchesInLine(searchData: SearchData, text: string, lineNumber: number, deltaOffset: number, resultLen: number, result: FindMatch[], captureMatches: boolean, limitResultCount: number): number {
256
const wordSeparators = searchData.wordSeparators;
257
if (!captureMatches && searchData.simpleSearch) {
258
const searchString = searchData.simpleSearch;
259
const searchStringLen = searchString.length;
260
const textLength = text.length;
261
262
let lastMatchIndex = -searchStringLen;
263
while ((lastMatchIndex = text.indexOf(searchString, lastMatchIndex + searchStringLen)) !== -1) {
264
if (!wordSeparators || isValidMatch(wordSeparators, text, textLength, lastMatchIndex, searchStringLen)) {
265
result[resultLen++] = new FindMatch(new Range(lineNumber, lastMatchIndex + 1 + deltaOffset, lineNumber, lastMatchIndex + 1 + searchStringLen + deltaOffset), null);
266
if (resultLen >= limitResultCount) {
267
return resultLen;
268
}
269
}
270
}
271
return resultLen;
272
}
273
274
const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
275
let m: RegExpExecArray | null;
276
// Reset regex to search from the beginning
277
searcher.reset(0);
278
do {
279
m = searcher.next(text);
280
if (m) {
281
result[resultLen++] = createFindMatch(new Range(lineNumber, m.index + 1 + deltaOffset, lineNumber, m.index + 1 + m[0].length + deltaOffset), m, captureMatches);
282
if (resultLen >= limitResultCount) {
283
return resultLen;
284
}
285
}
286
} while (m);
287
return resultLen;
288
}
289
290
public static findNextMatch(model: TextModel, searchParams: SearchParams, searchStart: Position, captureMatches: boolean): FindMatch | null {
291
const searchData = searchParams.parseSearchRequest();
292
if (!searchData) {
293
return null;
294
}
295
296
const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
297
298
if (searchData.regex.multiline) {
299
return this._doFindNextMatchMultiline(model, searchStart, searcher, captureMatches);
300
}
301
return this._doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches);
302
}
303
304
private static _doFindNextMatchMultiline(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {
305
const searchTextStart = new Position(searchStart.lineNumber, 1);
306
const deltaOffset = model.getOffsetAt(searchTextStart);
307
const lineCount = model.getLineCount();
308
// We always execute multiline search over the lines joined with \n
309
// This makes it that \n will match the EOL for both CRLF and LF models
310
// We compensate for offset errors in `_getMultilineMatchRange`
311
const text = model.getValueInRange(new Range(searchTextStart.lineNumber, searchTextStart.column, lineCount, model.getLineMaxColumn(lineCount)), EndOfLinePreference.LF);
312
const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
313
searcher.reset(searchStart.column - 1);
314
const m = searcher.next(text);
315
if (m) {
316
return createFindMatch(
317
this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]),
318
m,
319
captureMatches
320
);
321
}
322
323
if (searchStart.lineNumber !== 1 || searchStart.column !== 1) {
324
// Try again from the top
325
return this._doFindNextMatchMultiline(model, new Position(1, 1), searcher, captureMatches);
326
}
327
328
return null;
329
}
330
331
private static _doFindNextMatchLineByLine(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {
332
const lineCount = model.getLineCount();
333
const startLineNumber = searchStart.lineNumber;
334
335
// Look in first line
336
const text = model.getLineContent(startLineNumber);
337
const r = this._findFirstMatchInLine(searcher, text, startLineNumber, searchStart.column, captureMatches);
338
if (r) {
339
return r;
340
}
341
342
for (let i = 1; i <= lineCount; i++) {
343
const lineIndex = (startLineNumber + i - 1) % lineCount;
344
const text = model.getLineContent(lineIndex + 1);
345
const r = this._findFirstMatchInLine(searcher, text, lineIndex + 1, 1, captureMatches);
346
if (r) {
347
return r;
348
}
349
}
350
351
return null;
352
}
353
354
private static _findFirstMatchInLine(searcher: Searcher, text: string, lineNumber: number, fromColumn: number, captureMatches: boolean): FindMatch | null {
355
// Set regex to search from column
356
searcher.reset(fromColumn - 1);
357
const m: RegExpExecArray | null = searcher.next(text);
358
if (m) {
359
return createFindMatch(
360
new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length),
361
m,
362
captureMatches
363
);
364
}
365
return null;
366
}
367
368
public static findPreviousMatch(model: TextModel, searchParams: SearchParams, searchStart: Position, captureMatches: boolean): FindMatch | null {
369
const searchData = searchParams.parseSearchRequest();
370
if (!searchData) {
371
return null;
372
}
373
374
const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
375
376
if (searchData.regex.multiline) {
377
return this._doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches);
378
}
379
return this._doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches);
380
}
381
382
private static _doFindPreviousMatchMultiline(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {
383
const matches = this._doFindMatchesMultiline(model, new Range(1, 1, searchStart.lineNumber, searchStart.column), searcher, captureMatches, 10 * LIMIT_FIND_COUNT);
384
if (matches.length > 0) {
385
return matches[matches.length - 1];
386
}
387
388
const lineCount = model.getLineCount();
389
if (searchStart.lineNumber !== lineCount || searchStart.column !== model.getLineMaxColumn(lineCount)) {
390
// Try again with all content
391
return this._doFindPreviousMatchMultiline(model, new Position(lineCount, model.getLineMaxColumn(lineCount)), searcher, captureMatches);
392
}
393
394
return null;
395
}
396
397
private static _doFindPreviousMatchLineByLine(model: TextModel, searchStart: Position, searcher: Searcher, captureMatches: boolean): FindMatch | null {
398
const lineCount = model.getLineCount();
399
const startLineNumber = searchStart.lineNumber;
400
401
// Look in first line
402
const text = model.getLineContent(startLineNumber).substring(0, searchStart.column - 1);
403
const r = this._findLastMatchInLine(searcher, text, startLineNumber, captureMatches);
404
if (r) {
405
return r;
406
}
407
408
for (let i = 1; i <= lineCount; i++) {
409
const lineIndex = (lineCount + startLineNumber - i - 1) % lineCount;
410
const text = model.getLineContent(lineIndex + 1);
411
const r = this._findLastMatchInLine(searcher, text, lineIndex + 1, captureMatches);
412
if (r) {
413
return r;
414
}
415
}
416
417
return null;
418
}
419
420
private static _findLastMatchInLine(searcher: Searcher, text: string, lineNumber: number, captureMatches: boolean): FindMatch | null {
421
let bestResult: FindMatch | null = null;
422
let m: RegExpExecArray | null;
423
searcher.reset(0);
424
while ((m = searcher.next(text))) {
425
bestResult = createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);
426
}
427
return bestResult;
428
}
429
}
430
431
function leftIsWordBounday(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {
432
if (matchStartIndex === 0) {
433
// Match starts at start of string
434
return true;
435
}
436
437
const charBefore = text.charCodeAt(matchStartIndex - 1);
438
if (wordSeparators.get(charBefore) !== WordCharacterClass.Regular) {
439
// The character before the match is a word separator
440
return true;
441
}
442
443
if (charBefore === CharCode.CarriageReturn || charBefore === CharCode.LineFeed) {
444
// The character before the match is line break or carriage return.
445
return true;
446
}
447
448
if (matchLength > 0) {
449
const firstCharInMatch = text.charCodeAt(matchStartIndex);
450
if (wordSeparators.get(firstCharInMatch) !== WordCharacterClass.Regular) {
451
// The first character inside the match is a word separator
452
return true;
453
}
454
}
455
456
return false;
457
}
458
459
function rightIsWordBounday(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {
460
if (matchStartIndex + matchLength === textLength) {
461
// Match ends at end of string
462
return true;
463
}
464
465
const charAfter = text.charCodeAt(matchStartIndex + matchLength);
466
if (wordSeparators.get(charAfter) !== WordCharacterClass.Regular) {
467
// The character after the match is a word separator
468
return true;
469
}
470
471
if (charAfter === CharCode.CarriageReturn || charAfter === CharCode.LineFeed) {
472
// The character after the match is line break or carriage return.
473
return true;
474
}
475
476
if (matchLength > 0) {
477
const lastCharInMatch = text.charCodeAt(matchStartIndex + matchLength - 1);
478
if (wordSeparators.get(lastCharInMatch) !== WordCharacterClass.Regular) {
479
// The last character in the match is a word separator
480
return true;
481
}
482
}
483
484
return false;
485
}
486
487
export function isValidMatch(wordSeparators: WordCharacterClassifier, text: string, textLength: number, matchStartIndex: number, matchLength: number): boolean {
488
return (
489
leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)
490
&& rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)
491
);
492
}
493
494
export class Searcher {
495
public readonly _wordSeparators: WordCharacterClassifier | null;
496
private readonly _searchRegex: RegExp;
497
private _prevMatchStartIndex: number;
498
private _prevMatchLength: number;
499
500
constructor(wordSeparators: WordCharacterClassifier | null, searchRegex: RegExp,) {
501
this._wordSeparators = wordSeparators;
502
this._searchRegex = searchRegex;
503
this._prevMatchStartIndex = -1;
504
this._prevMatchLength = 0;
505
}
506
507
public reset(lastIndex: number): void {
508
this._searchRegex.lastIndex = lastIndex;
509
this._prevMatchStartIndex = -1;
510
this._prevMatchLength = 0;
511
}
512
513
public next(text: string): RegExpExecArray | null {
514
const textLength = text.length;
515
516
let m: RegExpExecArray | null;
517
do {
518
if (this._prevMatchStartIndex + this._prevMatchLength === textLength) {
519
// Reached the end of the line
520
return null;
521
}
522
523
m = this._searchRegex.exec(text);
524
if (!m) {
525
return null;
526
}
527
528
const matchStartIndex = m.index;
529
const matchLength = m[0].length;
530
if (matchStartIndex === this._prevMatchStartIndex && matchLength === this._prevMatchLength) {
531
if (matchLength === 0) {
532
// the search result is an empty string and won't advance `regex.lastIndex`, so `regex.exec` will stuck here
533
// we attempt to recover from that by advancing by two if surrogate pair found and by one otherwise
534
if (strings.getNextCodePoint(text, textLength, this._searchRegex.lastIndex) > 0xFFFF) {
535
this._searchRegex.lastIndex += 2;
536
} else {
537
this._searchRegex.lastIndex += 1;
538
}
539
continue;
540
}
541
// Exit early if the regex matches the same range twice
542
return null;
543
}
544
this._prevMatchStartIndex = matchStartIndex;
545
this._prevMatchLength = matchLength;
546
547
if (!this._wordSeparators || isValidMatch(this._wordSeparators, text, textLength, matchStartIndex, matchLength)) {
548
return m;
549
}
550
551
} while (m);
552
553
return null;
554
}
555
}
556
557