Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/common/fuzzyScorer.ts
3291 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from './charCode.js';
7
import { compareAnything } from './comparers.js';
8
import { createMatches as createFuzzyMatches, fuzzyScore, IMatch, isUpper, matchesPrefix } from './filters.js';
9
import { hash } from './hash.js';
10
import { sep } from './path.js';
11
import { isLinux, isWindows } from './platform.js';
12
import { equalsIgnoreCase, stripWildcards } from './strings.js';
13
14
//#region Fuzzy scorer
15
16
export type FuzzyScore = [number /* score */, number[] /* match positions */];
17
export type FuzzyScorerCache = { [key: string]: IItemScore };
18
19
const NO_MATCH = 0;
20
const NO_SCORE: FuzzyScore = [NO_MATCH, []];
21
22
// const DEBUG = true;
23
// const DEBUG_MATRIX = false;
24
25
export function scoreFuzzy(target: string, query: string, queryLower: string, allowNonContiguousMatches: boolean): FuzzyScore {
26
if (!target || !query) {
27
return NO_SCORE; // return early if target or query are undefined
28
}
29
30
const targetLength = target.length;
31
const queryLength = query.length;
32
33
if (targetLength < queryLength) {
34
return NO_SCORE; // impossible for query to be contained in target
35
}
36
37
// if (DEBUG) {
38
// console.group(`Target: ${target}, Query: ${query}`);
39
// }
40
41
const targetLower = target.toLowerCase();
42
const res = doScoreFuzzy(query, queryLower, queryLength, target, targetLower, targetLength, allowNonContiguousMatches);
43
44
// if (DEBUG) {
45
// console.log(`%cFinal Score: ${res[0]}`, 'font-weight: bold');
46
// console.groupEnd();
47
// }
48
49
return res;
50
}
51
52
function doScoreFuzzy(query: string, queryLower: string, queryLength: number, target: string, targetLower: string, targetLength: number, allowNonContiguousMatches: boolean): FuzzyScore {
53
const scores: number[] = [];
54
const matches: number[] = [];
55
56
//
57
// Build Scorer Matrix:
58
//
59
// The matrix is composed of query q and target t. For each index we score
60
// q[i] with t[i] and compare that with the previous score. If the score is
61
// equal or larger, we keep the match. In addition to the score, we also keep
62
// the length of the consecutive matches to use as boost for the score.
63
//
64
// t a r g e t
65
// q
66
// u
67
// e
68
// r
69
// y
70
//
71
for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
72
const queryIndexOffset = queryIndex * targetLength;
73
const queryIndexPreviousOffset = queryIndexOffset - targetLength;
74
75
const queryIndexGtNull = queryIndex > 0;
76
77
const queryCharAtIndex = query[queryIndex];
78
const queryLowerCharAtIndex = queryLower[queryIndex];
79
80
for (let targetIndex = 0; targetIndex < targetLength; targetIndex++) {
81
const targetIndexGtNull = targetIndex > 0;
82
83
const currentIndex = queryIndexOffset + targetIndex;
84
const leftIndex = currentIndex - 1;
85
const diagIndex = queryIndexPreviousOffset + targetIndex - 1;
86
87
const leftScore = targetIndexGtNull ? scores[leftIndex] : 0;
88
const diagScore = queryIndexGtNull && targetIndexGtNull ? scores[diagIndex] : 0;
89
90
const matchesSequenceLength = queryIndexGtNull && targetIndexGtNull ? matches[diagIndex] : 0;
91
92
// If we are not matching on the first query character any more, we only produce a
93
// score if we had a score previously for the last query index (by looking at the diagScore).
94
// This makes sure that the query always matches in sequence on the target. For example
95
// given a target of "ede" and a query of "de", we would otherwise produce a wrong high score
96
// for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost.
97
let score: number;
98
if (!diagScore && queryIndexGtNull) {
99
score = 0;
100
} else {
101
score = computeCharScore(queryCharAtIndex, queryLowerCharAtIndex, target, targetLower, targetIndex, matchesSequenceLength);
102
}
103
104
// We have a score and its equal or larger than the left score
105
// Match: sequence continues growing from previous diag value
106
// Score: increases by diag score value
107
const isValidScore = score && diagScore + score >= leftScore;
108
if (isValidScore && (
109
// We don't need to check if it's contiguous if we allow non-contiguous matches
110
allowNonContiguousMatches ||
111
// We must be looking for a contiguous match.
112
// Looking at an index higher than 0 in the query means we must have already
113
// found out this is contiguous otherwise there wouldn't have been a score
114
queryIndexGtNull ||
115
// lastly check if the query is completely contiguous at this index in the target
116
targetLower.startsWith(queryLower, targetIndex)
117
)) {
118
matches[currentIndex] = matchesSequenceLength + 1;
119
scores[currentIndex] = diagScore + score;
120
}
121
122
// We either have no score or the score is lower than the left score
123
// Match: reset to 0
124
// Score: pick up from left hand side
125
else {
126
matches[currentIndex] = NO_MATCH;
127
scores[currentIndex] = leftScore;
128
}
129
}
130
}
131
132
// Restore Positions (starting from bottom right of matrix)
133
const positions: number[] = [];
134
let queryIndex = queryLength - 1;
135
let targetIndex = targetLength - 1;
136
while (queryIndex >= 0 && targetIndex >= 0) {
137
const currentIndex = queryIndex * targetLength + targetIndex;
138
const match = matches[currentIndex];
139
if (match === NO_MATCH) {
140
targetIndex--; // go left
141
} else {
142
positions.push(targetIndex);
143
144
// go up and left
145
queryIndex--;
146
targetIndex--;
147
}
148
}
149
150
// Print matrix
151
// if (DEBUG_MATRIX) {
152
// printMatrix(query, target, matches, scores);
153
// }
154
155
return [scores[queryLength * targetLength - 1], positions.reverse()];
156
}
157
158
function computeCharScore(queryCharAtIndex: string, queryLowerCharAtIndex: string, target: string, targetLower: string, targetIndex: number, matchesSequenceLength: number): number {
159
let score = 0;
160
161
if (!considerAsEqual(queryLowerCharAtIndex, targetLower[targetIndex])) {
162
return score; // no match of characters
163
}
164
165
// if (DEBUG) {
166
// console.groupCollapsed(`%cFound a match of char: ${queryLowerCharAtIndex} at index ${targetIndex}`, 'font-weight: normal');
167
// }
168
169
// Character match bonus
170
score += 1;
171
172
// if (DEBUG) {
173
// console.log(`%cCharacter match bonus: +1`, 'font-weight: normal');
174
// }
175
176
// Consecutive match bonus: sequences up to 3 get the full bonus (6)
177
// and the remainder gets half the bonus (3). This helps reduce the
178
// overall boost for long sequence matches.
179
if (matchesSequenceLength > 0) {
180
score += (Math.min(matchesSequenceLength, 3) * 6) + (Math.max(0, matchesSequenceLength - 3) * 3);
181
182
// if (DEBUG) {
183
// console.log(`Consecutive match bonus: +${matchesSequenceLength * 5}`);
184
// }
185
}
186
187
// Same case bonus
188
if (queryCharAtIndex === target[targetIndex]) {
189
score += 1;
190
191
// if (DEBUG) {
192
// console.log('Same case bonus: +1');
193
// }
194
}
195
196
// Start of word bonus
197
if (targetIndex === 0) {
198
score += 8;
199
200
// if (DEBUG) {
201
// console.log('Start of word bonus: +8');
202
// }
203
}
204
205
else {
206
207
// After separator bonus
208
const separatorBonus = scoreSeparatorAtPos(target.charCodeAt(targetIndex - 1));
209
if (separatorBonus) {
210
score += separatorBonus;
211
212
// if (DEBUG) {
213
// console.log(`After separator bonus: +${separatorBonus}`);
214
// }
215
}
216
217
// Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence.
218
// For example:
219
// NPE => NullPointerException = boost
220
// HTTP => HTTP = not boost
221
else if (isUpper(target.charCodeAt(targetIndex)) && matchesSequenceLength === 0) {
222
score += 2;
223
224
// if (DEBUG) {
225
// console.log('Inside word upper case bonus: +2');
226
// }
227
}
228
}
229
230
// if (DEBUG) {
231
// console.log(`Total score: ${score}`);
232
// console.groupEnd();
233
// }
234
235
return score;
236
}
237
238
function considerAsEqual(a: string, b: string): boolean {
239
if (a === b) {
240
return true;
241
}
242
243
// Special case path separators: ignore platform differences
244
if (a === '/' || a === '\\') {
245
return b === '/' || b === '\\';
246
}
247
248
return false;
249
}
250
251
function scoreSeparatorAtPos(charCode: number): number {
252
switch (charCode) {
253
case CharCode.Slash:
254
case CharCode.Backslash:
255
return 5; // prefer path separators...
256
case CharCode.Underline:
257
case CharCode.Dash:
258
case CharCode.Period:
259
case CharCode.Space:
260
case CharCode.SingleQuote:
261
case CharCode.DoubleQuote:
262
case CharCode.Colon:
263
return 4; // ...over other separators
264
default:
265
return 0;
266
}
267
}
268
269
// function printMatrix(query: string, target: string, matches: number[], scores: number[]): void {
270
// console.log('\t' + target.split('').join('\t'));
271
// for (let queryIndex = 0; queryIndex < query.length; queryIndex++) {
272
// let line = query[queryIndex] + '\t';
273
// for (let targetIndex = 0; targetIndex < target.length; targetIndex++) {
274
// const currentIndex = queryIndex * target.length + targetIndex;
275
// line = line + 'M' + matches[currentIndex] + '/' + 'S' + scores[currentIndex] + '\t';
276
// }
277
278
// console.log(line);
279
// }
280
// }
281
282
//#endregion
283
284
285
//#region Alternate fuzzy scorer implementation that is e.g. used for symbols
286
287
export type FuzzyScore2 = [number | undefined /* score */, IMatch[]];
288
289
const NO_SCORE2: FuzzyScore2 = [undefined, []];
290
291
export function scoreFuzzy2(target: string, query: IPreparedQuery | IPreparedQueryPiece, patternStart = 0, wordStart = 0): FuzzyScore2 {
292
293
// Score: multiple inputs
294
const preparedQuery = query as IPreparedQuery;
295
if (preparedQuery.values && preparedQuery.values.length > 1) {
296
return doScoreFuzzy2Multiple(target, preparedQuery.values, patternStart, wordStart);
297
}
298
299
// Score: single input
300
return doScoreFuzzy2Single(target, query, patternStart, wordStart);
301
}
302
303
function doScoreFuzzy2Multiple(target: string, query: IPreparedQueryPiece[], patternStart: number, wordStart: number): FuzzyScore2 {
304
let totalScore = 0;
305
const totalMatches: IMatch[] = [];
306
307
for (const queryPiece of query) {
308
const [score, matches] = doScoreFuzzy2Single(target, queryPiece, patternStart, wordStart);
309
if (typeof score !== 'number') {
310
// if a single query value does not match, return with
311
// no score entirely, we require all queries to match
312
return NO_SCORE2;
313
}
314
315
totalScore += score;
316
totalMatches.push(...matches);
317
}
318
319
// if we have a score, ensure that the positions are
320
// sorted in ascending order and distinct
321
return [totalScore, normalizeMatches(totalMatches)];
322
}
323
324
function doScoreFuzzy2Single(target: string, query: IPreparedQueryPiece, patternStart: number, wordStart: number): FuzzyScore2 {
325
const score = fuzzyScore(query.original, query.originalLowercase, patternStart, target, target.toLowerCase(), wordStart, { firstMatchCanBeWeak: true, boostFullMatch: true });
326
if (!score) {
327
return NO_SCORE2;
328
}
329
330
return [score[0], createFuzzyMatches(score)];
331
}
332
333
//#endregion
334
335
336
//#region Item (label, description, path) scorer
337
338
/**
339
* Scoring on structural items that have a label and optional description.
340
*/
341
export interface IItemScore {
342
343
/**
344
* Overall score.
345
*/
346
score: number;
347
348
/**
349
* Matches within the label.
350
*/
351
labelMatch?: IMatch[];
352
353
/**
354
* Matches within the description.
355
*/
356
descriptionMatch?: IMatch[];
357
}
358
359
const NO_ITEM_SCORE = Object.freeze<IItemScore>({ score: 0 });
360
361
export interface IItemAccessor<T> {
362
363
/**
364
* Just the label of the item to score on.
365
*/
366
getItemLabel(item: T): string | undefined;
367
368
/**
369
* The optional description of the item to score on.
370
*/
371
getItemDescription(item: T): string | undefined;
372
373
/**
374
* If the item is a file, the path of the file to score on.
375
*/
376
getItemPath(file: T): string | undefined;
377
}
378
379
const PATH_IDENTITY_SCORE = 1 << 18;
380
const LABEL_PREFIX_SCORE_THRESHOLD = 1 << 17;
381
const LABEL_SCORE_THRESHOLD = 1 << 16;
382
383
function getCacheHash(label: string, description: string | undefined, allowNonContiguousMatches: boolean, query: IPreparedQuery) {
384
const values = query.values ? query.values : [query];
385
const cacheHash = hash({
386
[query.normalized]: {
387
values: values.map(v => ({ value: v.normalized, expectContiguousMatch: v.expectContiguousMatch })),
388
label,
389
description,
390
allowNonContiguousMatches
391
}
392
});
393
return cacheHash;
394
}
395
396
export function scoreItemFuzzy<T>(item: T, query: IPreparedQuery, allowNonContiguousMatches: boolean, accessor: IItemAccessor<T>, cache: FuzzyScorerCache): IItemScore {
397
if (!item || !query.normalized) {
398
return NO_ITEM_SCORE; // we need an item and query to score on at least
399
}
400
401
const label = accessor.getItemLabel(item);
402
if (!label) {
403
return NO_ITEM_SCORE; // we need a label at least
404
}
405
406
const description = accessor.getItemDescription(item);
407
408
// in order to speed up scoring, we cache the score with a unique hash based on:
409
// - label
410
// - description (if provided)
411
// - whether non-contiguous matching is enabled or not
412
// - hash of the query (normalized) values
413
const cacheHash = getCacheHash(label, description, allowNonContiguousMatches, query);
414
const cached = cache[cacheHash];
415
if (cached) {
416
return cached;
417
}
418
419
const itemScore = doScoreItemFuzzy(label, description, accessor.getItemPath(item), query, allowNonContiguousMatches);
420
cache[cacheHash] = itemScore;
421
422
return itemScore;
423
}
424
425
function doScoreItemFuzzy(label: string, description: string | undefined, path: string | undefined, query: IPreparedQuery, allowNonContiguousMatches: boolean): IItemScore {
426
const preferLabelMatches = !path || !query.containsPathSeparator;
427
428
// Treat identity matches on full path highest
429
if (path && (isLinux ? query.pathNormalized === path : equalsIgnoreCase(query.pathNormalized, path))) {
430
return { score: PATH_IDENTITY_SCORE, labelMatch: [{ start: 0, end: label.length }], descriptionMatch: description ? [{ start: 0, end: description.length }] : undefined };
431
}
432
433
// Score: multiple inputs
434
if (query.values && query.values.length > 1) {
435
return doScoreItemFuzzyMultiple(label, description, path, query.values, preferLabelMatches, allowNonContiguousMatches);
436
}
437
438
// Score: single input
439
return doScoreItemFuzzySingle(label, description, path, query, preferLabelMatches, allowNonContiguousMatches);
440
}
441
442
function doScoreItemFuzzyMultiple(label: string, description: string | undefined, path: string | undefined, query: IPreparedQueryPiece[], preferLabelMatches: boolean, allowNonContiguousMatches: boolean): IItemScore {
443
let totalScore = 0;
444
const totalLabelMatches: IMatch[] = [];
445
const totalDescriptionMatches: IMatch[] = [];
446
447
for (const queryPiece of query) {
448
const { score, labelMatch, descriptionMatch } = doScoreItemFuzzySingle(label, description, path, queryPiece, preferLabelMatches, allowNonContiguousMatches);
449
if (score === NO_MATCH) {
450
// if a single query value does not match, return with
451
// no score entirely, we require all queries to match
452
return NO_ITEM_SCORE;
453
}
454
455
totalScore += score;
456
if (labelMatch) {
457
totalLabelMatches.push(...labelMatch);
458
}
459
460
if (descriptionMatch) {
461
totalDescriptionMatches.push(...descriptionMatch);
462
}
463
}
464
465
// if we have a score, ensure that the positions are
466
// sorted in ascending order and distinct
467
return {
468
score: totalScore,
469
labelMatch: normalizeMatches(totalLabelMatches),
470
descriptionMatch: normalizeMatches(totalDescriptionMatches)
471
};
472
}
473
474
function doScoreItemFuzzySingle(label: string, description: string | undefined, path: string | undefined, query: IPreparedQueryPiece, preferLabelMatches: boolean, allowNonContiguousMatches: boolean): IItemScore {
475
476
// Prefer label matches if told so or we have no description
477
if (preferLabelMatches || !description) {
478
const [labelScore, labelPositions] = scoreFuzzy(
479
label,
480
query.normalized,
481
query.normalizedLowercase,
482
allowNonContiguousMatches && !query.expectContiguousMatch);
483
if (labelScore) {
484
485
// If we have a prefix match on the label, we give a much
486
// higher baseScore to elevate these matches over others
487
// This ensures that typing a file name wins over results
488
// that are present somewhere in the label, but not the
489
// beginning.
490
const labelPrefixMatch = matchesPrefix(query.normalized, label);
491
let baseScore: number;
492
if (labelPrefixMatch) {
493
baseScore = LABEL_PREFIX_SCORE_THRESHOLD;
494
495
// We give another boost to labels that are short, e.g. given
496
// files "window.ts" and "windowActions.ts" and a query of
497
// "window", we want "window.ts" to receive a higher score.
498
// As such we compute the percentage the query has within the
499
// label and add that to the baseScore.
500
const prefixLengthBoost = Math.round((query.normalized.length / label.length) * 100);
501
baseScore += prefixLengthBoost;
502
} else {
503
baseScore = LABEL_SCORE_THRESHOLD;
504
}
505
506
return { score: baseScore + labelScore, labelMatch: labelPrefixMatch || createMatches(labelPositions) };
507
}
508
}
509
510
// Finally compute description + label scores if we have a description
511
if (description) {
512
let descriptionPrefix = description;
513
if (!!path) {
514
descriptionPrefix = `${description}${sep}`; // assume this is a file path
515
}
516
517
const descriptionPrefixLength = descriptionPrefix.length;
518
const descriptionAndLabel = `${descriptionPrefix}${label}`;
519
520
const [labelDescriptionScore, labelDescriptionPositions] = scoreFuzzy(
521
descriptionAndLabel,
522
query.normalized,
523
query.normalizedLowercase,
524
allowNonContiguousMatches && !query.expectContiguousMatch);
525
if (labelDescriptionScore) {
526
const labelDescriptionMatches = createMatches(labelDescriptionPositions);
527
const labelMatch: IMatch[] = [];
528
const descriptionMatch: IMatch[] = [];
529
530
// We have to split the matches back onto the label and description portions
531
labelDescriptionMatches.forEach(h => {
532
533
// Match overlaps label and description part, we need to split it up
534
if (h.start < descriptionPrefixLength && h.end > descriptionPrefixLength) {
535
labelMatch.push({ start: 0, end: h.end - descriptionPrefixLength });
536
descriptionMatch.push({ start: h.start, end: descriptionPrefixLength });
537
}
538
539
// Match on label part
540
else if (h.start >= descriptionPrefixLength) {
541
labelMatch.push({ start: h.start - descriptionPrefixLength, end: h.end - descriptionPrefixLength });
542
}
543
544
// Match on description part
545
else {
546
descriptionMatch.push(h);
547
}
548
});
549
550
return { score: labelDescriptionScore, labelMatch, descriptionMatch };
551
}
552
}
553
554
return NO_ITEM_SCORE;
555
}
556
557
function createMatches(offsets: number[] | undefined): IMatch[] {
558
const ret: IMatch[] = [];
559
if (!offsets) {
560
return ret;
561
}
562
563
let last: IMatch | undefined;
564
for (const pos of offsets) {
565
if (last && last.end === pos) {
566
last.end += 1;
567
} else {
568
last = { start: pos, end: pos + 1 };
569
ret.push(last);
570
}
571
}
572
573
return ret;
574
}
575
576
function normalizeMatches(matches: IMatch[]): IMatch[] {
577
578
// sort matches by start to be able to normalize
579
const sortedMatches = matches.sort((matchA, matchB) => {
580
return matchA.start - matchB.start;
581
});
582
583
// merge matches that overlap
584
const normalizedMatches: IMatch[] = [];
585
let currentMatch: IMatch | undefined = undefined;
586
for (const match of sortedMatches) {
587
588
// if we have no current match or the matches
589
// do not overlap, we take it as is and remember
590
// it for future merging
591
if (!currentMatch || !matchOverlaps(currentMatch, match)) {
592
currentMatch = match;
593
normalizedMatches.push(match);
594
}
595
596
// otherwise we merge the matches
597
else {
598
currentMatch.start = Math.min(currentMatch.start, match.start);
599
currentMatch.end = Math.max(currentMatch.end, match.end);
600
}
601
}
602
603
return normalizedMatches;
604
}
605
606
function matchOverlaps(matchA: IMatch, matchB: IMatch): boolean {
607
if (matchA.end < matchB.start) {
608
return false; // A ends before B starts
609
}
610
611
if (matchB.end < matchA.start) {
612
return false; // B ends before A starts
613
}
614
615
return true;
616
}
617
618
//#endregion
619
620
621
//#region Comparers
622
623
export function compareItemsByFuzzyScore<T>(itemA: T, itemB: T, query: IPreparedQuery, allowNonContiguousMatches: boolean, accessor: IItemAccessor<T>, cache: FuzzyScorerCache): number {
624
const itemScoreA = scoreItemFuzzy(itemA, query, allowNonContiguousMatches, accessor, cache);
625
const itemScoreB = scoreItemFuzzy(itemB, query, allowNonContiguousMatches, accessor, cache);
626
627
const scoreA = itemScoreA.score;
628
const scoreB = itemScoreB.score;
629
630
// 1.) identity matches have highest score
631
if (scoreA === PATH_IDENTITY_SCORE || scoreB === PATH_IDENTITY_SCORE) {
632
if (scoreA !== scoreB) {
633
return scoreA === PATH_IDENTITY_SCORE ? -1 : 1;
634
}
635
}
636
637
// 2.) matches on label are considered higher compared to label+description matches
638
if (scoreA > LABEL_SCORE_THRESHOLD || scoreB > LABEL_SCORE_THRESHOLD) {
639
if (scoreA !== scoreB) {
640
return scoreA > scoreB ? -1 : 1;
641
}
642
643
// prefer more compact matches over longer in label (unless this is a prefix match where
644
// longer prefix matches are actually preferred)
645
if (scoreA < LABEL_PREFIX_SCORE_THRESHOLD && scoreB < LABEL_PREFIX_SCORE_THRESHOLD) {
646
const comparedByMatchLength = compareByMatchLength(itemScoreA.labelMatch, itemScoreB.labelMatch);
647
if (comparedByMatchLength !== 0) {
648
return comparedByMatchLength;
649
}
650
}
651
652
// prefer shorter labels over longer labels
653
const labelA = accessor.getItemLabel(itemA) || '';
654
const labelB = accessor.getItemLabel(itemB) || '';
655
if (labelA.length !== labelB.length) {
656
return labelA.length - labelB.length;
657
}
658
}
659
660
// 3.) compare by score in label+description
661
if (scoreA !== scoreB) {
662
return scoreA > scoreB ? -1 : 1;
663
}
664
665
// 4.) scores are identical: prefer matches in label over non-label matches
666
const itemAHasLabelMatches = Array.isArray(itemScoreA.labelMatch) && itemScoreA.labelMatch.length > 0;
667
const itemBHasLabelMatches = Array.isArray(itemScoreB.labelMatch) && itemScoreB.labelMatch.length > 0;
668
if (itemAHasLabelMatches && !itemBHasLabelMatches) {
669
return -1;
670
} else if (itemBHasLabelMatches && !itemAHasLabelMatches) {
671
return 1;
672
}
673
674
// 5.) scores are identical: prefer more compact matches (label and description)
675
const itemAMatchDistance = computeLabelAndDescriptionMatchDistance(itemA, itemScoreA, accessor);
676
const itemBMatchDistance = computeLabelAndDescriptionMatchDistance(itemB, itemScoreB, accessor);
677
if (itemAMatchDistance && itemBMatchDistance && itemAMatchDistance !== itemBMatchDistance) {
678
return itemBMatchDistance > itemAMatchDistance ? -1 : 1;
679
}
680
681
// 6.) scores are identical: start to use the fallback compare
682
return fallbackCompare(itemA, itemB, query, accessor);
683
}
684
685
function computeLabelAndDescriptionMatchDistance<T>(item: T, score: IItemScore, accessor: IItemAccessor<T>): number {
686
let matchStart: number = -1;
687
let matchEnd: number = -1;
688
689
// If we have description matches, the start is first of description match
690
if (score.descriptionMatch && score.descriptionMatch.length) {
691
matchStart = score.descriptionMatch[0].start;
692
}
693
694
// Otherwise, the start is the first label match
695
else if (score.labelMatch && score.labelMatch.length) {
696
matchStart = score.labelMatch[0].start;
697
}
698
699
// If we have label match, the end is the last label match
700
// If we had a description match, we add the length of the description
701
// as offset to the end to indicate this.
702
if (score.labelMatch && score.labelMatch.length) {
703
matchEnd = score.labelMatch[score.labelMatch.length - 1].end;
704
if (score.descriptionMatch && score.descriptionMatch.length) {
705
const itemDescription = accessor.getItemDescription(item);
706
if (itemDescription) {
707
matchEnd += itemDescription.length;
708
}
709
}
710
}
711
712
// If we have just a description match, the end is the last description match
713
else if (score.descriptionMatch && score.descriptionMatch.length) {
714
matchEnd = score.descriptionMatch[score.descriptionMatch.length - 1].end;
715
}
716
717
return matchEnd - matchStart;
718
}
719
720
function compareByMatchLength(matchesA?: IMatch[], matchesB?: IMatch[]): number {
721
if ((!matchesA && !matchesB) || ((!matchesA || !matchesA.length) && (!matchesB || !matchesB.length))) {
722
return 0; // make sure to not cause bad comparing when matches are not provided
723
}
724
725
if (!matchesB || !matchesB.length) {
726
return -1;
727
}
728
729
if (!matchesA || !matchesA.length) {
730
return 1;
731
}
732
733
// Compute match length of A (first to last match)
734
const matchStartA = matchesA[0].start;
735
const matchEndA = matchesA[matchesA.length - 1].end;
736
const matchLengthA = matchEndA - matchStartA;
737
738
// Compute match length of B (first to last match)
739
const matchStartB = matchesB[0].start;
740
const matchEndB = matchesB[matchesB.length - 1].end;
741
const matchLengthB = matchEndB - matchStartB;
742
743
// Prefer shorter match length
744
return matchLengthA === matchLengthB ? 0 : matchLengthB < matchLengthA ? 1 : -1;
745
}
746
747
function fallbackCompare<T>(itemA: T, itemB: T, query: IPreparedQuery, accessor: IItemAccessor<T>): number {
748
749
// check for label + description length and prefer shorter
750
const labelA = accessor.getItemLabel(itemA) || '';
751
const labelB = accessor.getItemLabel(itemB) || '';
752
753
const descriptionA = accessor.getItemDescription(itemA);
754
const descriptionB = accessor.getItemDescription(itemB);
755
756
const labelDescriptionALength = labelA.length + (descriptionA ? descriptionA.length : 0);
757
const labelDescriptionBLength = labelB.length + (descriptionB ? descriptionB.length : 0);
758
759
if (labelDescriptionALength !== labelDescriptionBLength) {
760
return labelDescriptionALength - labelDescriptionBLength;
761
}
762
763
// check for path length and prefer shorter
764
const pathA = accessor.getItemPath(itemA);
765
const pathB = accessor.getItemPath(itemB);
766
767
if (pathA && pathB && pathA.length !== pathB.length) {
768
return pathA.length - pathB.length;
769
}
770
771
// 7.) finally we have equal scores and equal length, we fallback to comparer
772
773
// compare by label
774
if (labelA !== labelB) {
775
return compareAnything(labelA, labelB, query.normalized);
776
}
777
778
// compare by description
779
if (descriptionA && descriptionB && descriptionA !== descriptionB) {
780
return compareAnything(descriptionA, descriptionB, query.normalized);
781
}
782
783
// compare by path
784
if (pathA && pathB && pathA !== pathB) {
785
return compareAnything(pathA, pathB, query.normalized);
786
}
787
788
// equal
789
return 0;
790
}
791
792
//#endregion
793
794
795
//#region Query Normalizer
796
797
export interface IPreparedQueryPiece {
798
799
/**
800
* The original query as provided as input.
801
*/
802
original: string;
803
originalLowercase: string;
804
805
/**
806
* Original normalized to platform separators:
807
* - Windows: \
808
* - Posix: /
809
*/
810
pathNormalized: string;
811
812
/**
813
* In addition to the normalized path, will have
814
* whitespace and wildcards removed.
815
*/
816
normalized: string;
817
normalizedLowercase: string;
818
819
/**
820
* The query is wrapped in quotes which means
821
* this query must be a substring of the input.
822
* In other words, no fuzzy matching is used.
823
*/
824
expectContiguousMatch: boolean;
825
}
826
827
export interface IPreparedQuery extends IPreparedQueryPiece {
828
829
/**
830
* Query split by spaces into pieces.
831
*/
832
values: IPreparedQueryPiece[] | undefined;
833
834
/**
835
* Whether the query contains path separator(s) or not.
836
*/
837
containsPathSeparator: boolean;
838
}
839
840
/*
841
* If a query is wrapped in quotes, the user does not want to
842
* use fuzzy search for this query.
843
*/
844
function queryExpectsExactMatch(query: string) {
845
return query.startsWith('"') && query.endsWith('"');
846
}
847
848
/**
849
* Helper function to prepare a search value for scoring by removing unwanted characters
850
* and allowing to score on multiple pieces separated by whitespace character.
851
*/
852
const MULTIPLE_QUERY_VALUES_SEPARATOR = ' ';
853
export function prepareQuery(original: string): IPreparedQuery {
854
if (typeof original !== 'string') {
855
original = '';
856
}
857
858
const originalLowercase = original.toLowerCase();
859
const { pathNormalized, normalized, normalizedLowercase } = normalizeQuery(original);
860
const containsPathSeparator = pathNormalized.indexOf(sep) >= 0;
861
const expectExactMatch = queryExpectsExactMatch(original);
862
863
let values: IPreparedQueryPiece[] | undefined = undefined;
864
865
const originalSplit = original.split(MULTIPLE_QUERY_VALUES_SEPARATOR);
866
if (originalSplit.length > 1) {
867
for (const originalPiece of originalSplit) {
868
const expectExactMatchPiece = queryExpectsExactMatch(originalPiece);
869
const {
870
pathNormalized: pathNormalizedPiece,
871
normalized: normalizedPiece,
872
normalizedLowercase: normalizedLowercasePiece
873
} = normalizeQuery(originalPiece);
874
875
if (normalizedPiece) {
876
if (!values) {
877
values = [];
878
}
879
880
values.push({
881
original: originalPiece,
882
originalLowercase: originalPiece.toLowerCase(),
883
pathNormalized: pathNormalizedPiece,
884
normalized: normalizedPiece,
885
normalizedLowercase: normalizedLowercasePiece,
886
expectContiguousMatch: expectExactMatchPiece
887
});
888
}
889
}
890
}
891
892
return { original, originalLowercase, pathNormalized, normalized, normalizedLowercase, values, containsPathSeparator, expectContiguousMatch: expectExactMatch };
893
}
894
895
function normalizeQuery(original: string): { pathNormalized: string; normalized: string; normalizedLowercase: string } {
896
let pathNormalized: string;
897
if (isWindows) {
898
pathNormalized = original.replace(/\//g, sep); // Help Windows users to search for paths when using slash
899
} else {
900
pathNormalized = original.replace(/\\/g, sep); // Help macOS/Linux users to search for paths when using backslash
901
}
902
903
// we remove quotes here because quotes are used for exact match search
904
const normalized = stripWildcards(pathNormalized).replace(/\s|"/g, '');
905
906
return {
907
pathNormalized,
908
normalized,
909
normalizedLowercase: normalized.toLowerCase()
910
};
911
}
912
913
export function pieceToQuery(piece: IPreparedQueryPiece): IPreparedQuery;
914
export function pieceToQuery(pieces: IPreparedQueryPiece[]): IPreparedQuery;
915
export function pieceToQuery(arg1: IPreparedQueryPiece | IPreparedQueryPiece[]): IPreparedQuery {
916
if (Array.isArray(arg1)) {
917
return prepareQuery(arg1.map(piece => piece.original).join(MULTIPLE_QUERY_VALUES_SEPARATOR));
918
}
919
920
return prepareQuery(arg1.original);
921
}
922
923
//#endregion
924
925