CoCalc -- fuzzyScorer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/common/fuzzyScorer.ts
³²⁹¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { CharCode } from './charCode.js';
7
import { compareAnything } from './comparers.js';
8
import { createMatches as createFuzzyMatches, fuzzyScore, IMatch, isUpper, matchesPrefix } from './filters.js';
9
import { hash } from './hash.js';
10
import { sep } from './path.js';
11
import { isLinux, isWindows } from './platform.js';
12
import { equalsIgnoreCase, stripWildcards } from './strings.js';
13

14
//#region Fuzzy scorer
15

16
export type FuzzyScore = [number /* score */, number[] /* match positions */];
17
export type FuzzyScorerCache = { [key: string]: IItemScore };
18

19
const NO_MATCH = 0;
20
const NO_SCORE: FuzzyScore = [NO_MATCH, []];
21

22
// const DEBUG = true;
23
// const DEBUG_MATRIX = false;
24

25
export function scoreFuzzy(target: string, query: string, queryLower: string, allowNonContiguousMatches: boolean): FuzzyScore {
26
	if (!target || !query) {
27
		return NO_SCORE; // return early if target or query are undefined
28
	}
29

30
	const targetLength = target.length;
31
	const queryLength = query.length;
32

33
	if (targetLength < queryLength) {
34
		return NO_SCORE; // impossible for query to be contained in target
35
	}
36

37
	// if (DEBUG) {
38
	// 	console.group(`Target: ${target}, Query: ${query}`);
39
	// }
40

41
	const targetLower = target.toLowerCase();
42
	const res = doScoreFuzzy(query, queryLower, queryLength, target, targetLower, targetLength, allowNonContiguousMatches);
43

44
	// if (DEBUG) {
45
	// 	console.log(`%cFinal Score: ${res[0]}`, 'font-weight: bold');
46
	// 	console.groupEnd();
47
	// }
48

49
	return res;
50
}
51

52
function doScoreFuzzy(query: string, queryLower: string, queryLength: number, target: string, targetLower: string, targetLength: number, allowNonContiguousMatches: boolean): FuzzyScore {
53
	const scores: number[] = [];
54
	const matches: number[] = [];
55

56
	//
57
	// Build Scorer Matrix:
58
	//
59
	// The matrix is composed of query q and target t. For each index we score
60
	// q[i] with t[i] and compare that with the previous score. If the score is
61
	// equal or larger, we keep the match. In addition to the score, we also keep
62
	// the length of the consecutive matches to use as boost for the score.
63
	//
64
	//      t   a   r   g   e   t
65
	//  q
66
	//  u
67
	//  e
68
	//  r
69
	//  y
70
	//
71
	for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
72
		const queryIndexOffset = queryIndex * targetLength;
73
		const queryIndexPreviousOffset = queryIndexOffset - targetLength;
74

75
		const queryIndexGtNull = queryIndex > 0;
76

77
		const queryCharAtIndex = query[queryIndex];
78
		const queryLowerCharAtIndex = queryLower[queryIndex];
79

80
		for (let targetIndex = 0; targetIndex < targetLength; targetIndex++) {
81
			const targetIndexGtNull = targetIndex > 0;
82

83
			const currentIndex = queryIndexOffset + targetIndex;
84
			const leftIndex = currentIndex - 1;
85
			const diagIndex = queryIndexPreviousOffset + targetIndex - 1;
86

87
			const leftScore = targetIndexGtNull ? scores[leftIndex] : 0;
88
			const diagScore = queryIndexGtNull && targetIndexGtNull ? scores[diagIndex] : 0;
89

90
			const matchesSequenceLength = queryIndexGtNull && targetIndexGtNull ? matches[diagIndex] : 0;
91

92
			// If we are not matching on the first query character any more, we only produce a
93
			// score if we had a score previously for the last query index (by looking at the diagScore).
94
			// This makes sure that the query always matches in sequence on the target. For example
95
			// given a target of "ede" and a query of "de", we would otherwise produce a wrong high score
96
			// for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost.
97
			let score: number;
98
			if (!diagScore && queryIndexGtNull) {
99
				score = 0;
100
			} else {
101
				score = computeCharScore(queryCharAtIndex, queryLowerCharAtIndex, target, targetLower, targetIndex, matchesSequenceLength);
102
			}
103

104
			// We have a score and its equal or larger than the left score
105
			// Match: sequence continues growing from previous diag value
106
			// Score: increases by diag score value
107
			const isValidScore = score && diagScore + score >= leftScore;
108
			if (isValidScore && (
109
				// We don't need to check if it's contiguous if we allow non-contiguous matches
110
				allowNonContiguousMatches ||
111
				// We must be looking for a contiguous match.
112
				// Looking at an index higher than 0 in the query means we must have already
113
				// found out this is contiguous otherwise there wouldn't have been a score
114
				queryIndexGtNull ||
115
				// lastly check if the query is completely contiguous at this index in the target
116
				targetLower.startsWith(queryLower, targetIndex)
117
			)) {
118
				matches[currentIndex] = matchesSequenceLength + 1;
119
				scores[currentIndex] = diagScore + score;
120
			}
121

122
			// We either have no score or the score is lower than the left score
123
			// Match: reset to 0
124
			// Score: pick up from left hand side
125
			else {
126
				matches[currentIndex] = NO_MATCH;
127
				scores[currentIndex] = leftScore;
128
			}
129
		}
130
	}
131

132
	// Restore Positions (starting from bottom right of matrix)
133
	const positions: number[] = [];
134
	let queryIndex = queryLength - 1;
135
	let targetIndex = targetLength - 1;
136
	while (queryIndex >= 0 && targetIndex >= 0) {
137
		const currentIndex = queryIndex * targetLength + targetIndex;
138
		const match = matches[currentIndex];
139
		if (match === NO_MATCH) {
140
			targetIndex--; // go left
141
		} else {
142
			positions.push(targetIndex);
143

144
			// go up and left
145
			queryIndex--;
146
			targetIndex--;
147
		}
148
	}
149

150
	// Print matrix
151
	// if (DEBUG_MATRIX) {
152
	// 	printMatrix(query, target, matches, scores);
153
	// }
154

155
	return [scores[queryLength * targetLength - 1], positions.reverse()];
156
}
157

158
function computeCharScore(queryCharAtIndex: string, queryLowerCharAtIndex: string, target: string, targetLower: string, targetIndex: number, matchesSequenceLength: number): number {
159
	let score = 0;
160

161
	if (!considerAsEqual(queryLowerCharAtIndex, targetLower[targetIndex])) {
162
		return score; // no match of characters
163
	}
164

165
	// if (DEBUG) {
166
	// 	console.groupCollapsed(`%cFound a match of char: ${queryLowerCharAtIndex} at index ${targetIndex}`, 'font-weight: normal');
167
	// }
168

169
	// Character match bonus
170
	score += 1;
171

172
	// if (DEBUG) {
173
	// 	console.log(`%cCharacter match bonus: +1`, 'font-weight: normal');
174
	// }
175

176
	// Consecutive match bonus: sequences up to 3 get the full bonus (6)
177
	// and the remainder gets half the bonus (3). This helps reduce the
178
	// overall boost for long sequence matches.
179
	if (matchesSequenceLength > 0) {
180
		score += (Math.min(matchesSequenceLength, 3) * 6) + (Math.max(0, matchesSequenceLength - 3) * 3);
181

182
		// if (DEBUG) {
183
		// 	console.log(`Consecutive match bonus: +${matchesSequenceLength * 5}`);
184
		// }
185
	}
186

187
	// Same case bonus
188
	if (queryCharAtIndex === target[targetIndex]) {
189
		score += 1;
190

191
		// if (DEBUG) {
192
		// 	console.log('Same case bonus: +1');
193
		// }
194
	}
195

196
	// Start of word bonus
197
	if (targetIndex === 0) {
198
		score += 8;
199

200
		// if (DEBUG) {
201
		// 	console.log('Start of word bonus: +8');
202
		// }
203
	}
204

205
	else {
206

207
		// After separator bonus
208
		const separatorBonus = scoreSeparatorAtPos(target.charCodeAt(targetIndex - 1));
209
		if (separatorBonus) {
210
			score += separatorBonus;
211

212
			// if (DEBUG) {
213
			// 	console.log(`After separator bonus: +${separatorBonus}`);
214
			// }
215
		}
216

217
		// Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence.
218
		// For example:
219
		// NPE => NullPointerException = boost
220
		// HTTP => HTTP = not boost
221
		else if (isUpper(target.charCodeAt(targetIndex)) && matchesSequenceLength === 0) {
222
			score += 2;
223

224
			// if (DEBUG) {
225
			// 	console.log('Inside word upper case bonus: +2');
226
			// }
227
		}
228
	}
229

230
	// if (DEBUG) {
231
	// 	console.log(`Total score: ${score}`);
232
	// 	console.groupEnd();
233
	// }
234

235
	return score;
236
}
237

238
function considerAsEqual(a: string, b: string): boolean {
239
	if (a === b) {
240
		return true;
241
	}
242

243
	// Special case path separators: ignore platform differences
244
	if (a === '/' || a === '\\') {
245
		return b === '/' || b === '\\';
246
	}
247

248
	return false;
249
}
250

251
function scoreSeparatorAtPos(charCode: number): number {
252
	switch (charCode) {
253
		case CharCode.Slash:
254
		case CharCode.Backslash:
255
			return 5; // prefer path separators...
256
		case CharCode.Underline:
257
		case CharCode.Dash:
258
		case CharCode.Period:
259
		case CharCode.Space:
260
		case CharCode.SingleQuote:
261
		case CharCode.DoubleQuote:
262
		case CharCode.Colon:
263
			return 4; // ...over other separators
264
		default:
265
			return 0;
266
	}
267
}
268

269
// function printMatrix(query: string, target: string, matches: number[], scores: number[]): void {
270
// 	console.log('\t' + target.split('').join('\t'));
271
// 	for (let queryIndex = 0; queryIndex < query.length; queryIndex++) {
272
// 		let line = query[queryIndex] + '\t';
273
// 		for (let targetIndex = 0; targetIndex < target.length; targetIndex++) {
274
// 			const currentIndex = queryIndex * target.length + targetIndex;
275
// 			line = line + 'M' + matches[currentIndex] + '/' + 'S' + scores[currentIndex] + '\t';
276
// 		}
277

278
// 		console.log(line);
279
// 	}
280
// }
281

282
//#endregion
283

284

285
//#region Alternate fuzzy scorer implementation that is e.g. used for symbols
286

287
export type FuzzyScore2 = [number | undefined /* score */, IMatch[]];
288

289
const NO_SCORE2: FuzzyScore2 = [undefined, []];
290

291
export function scoreFuzzy2(target: string, query: IPreparedQuery | IPreparedQueryPiece, patternStart = 0, wordStart = 0): FuzzyScore2 {
292

293
	// Score: multiple inputs
294
	const preparedQuery = query as IPreparedQuery;
295
	if (preparedQuery.values && preparedQuery.values.length > 1) {
296
		return doScoreFuzzy2Multiple(target, preparedQuery.values, patternStart, wordStart);
297
	}
298

299
	// Score: single input
300
	return doScoreFuzzy2Single(target, query, patternStart, wordStart);
301
}
302

303
function doScoreFuzzy2Multiple(target: string, query: IPreparedQueryPiece[], patternStart: number, wordStart: number): FuzzyScore2 {
304
	let totalScore = 0;
305
	const totalMatches: IMatch[] = [];
306

307
	for (const queryPiece of query) {
308
		const [score, matches] = doScoreFuzzy2Single(target, queryPiece, patternStart, wordStart);
309
		if (typeof score !== 'number') {
310
			// if a single query value does not match, return with
311
			// no score entirely, we require all queries to match
312
			return NO_SCORE2;
313
		}
314

315
		totalScore += score;
316
		totalMatches.push(...matches);
317
	}
318

319
	// if we have a score, ensure that the positions are
320
	// sorted in ascending order and distinct
321
	return [totalScore, normalizeMatches(totalMatches)];
322
}
323

324
function doScoreFuzzy2Single(target: string, query: IPreparedQueryPiece, patternStart: number, wordStart: number): FuzzyScore2 {
325
	const score = fuzzyScore(query.original, query.originalLowercase, patternStart, target, target.toLowerCase(), wordStart, { firstMatchCanBeWeak: true, boostFullMatch: true });
326
	if (!score) {
327
		return NO_SCORE2;
328
	}
329

330
	return [score[0], createFuzzyMatches(score)];
331
}
332

333
//#endregion
334

335

336
//#region Item (label, description, path) scorer
337

338
/**
339
 * Scoring on structural items that have a label and optional description.
340
 */
341
export interface IItemScore {
342

343
	/**
344
	 * Overall score.
345
	 */
346
	score: number;
347

348
	/**
349
	 * Matches within the label.
350
	 */
351
	labelMatch?: IMatch[];
352

353
	/**
354
	 * Matches within the description.
355
	 */
356
	descriptionMatch?: IMatch[];
357
}
358

359
const NO_ITEM_SCORE = Object.freeze<IItemScore>({ score: 0 });
360

361
export interface IItemAccessor<T> {
362

363
	/**
364
	 * Just the label of the item to score on.
365
	 */
366
	getItemLabel(item: T): string | undefined;
367

368
	/**
369
	 * The optional description of the item to score on.
370
	 */
371
	getItemDescription(item: T): string | undefined;
372

373
	/**
374
	 * If the item is a file, the path of the file to score on.
375
	 */
376
	getItemPath(file: T): string | undefined;
377
}
378

379
const PATH_IDENTITY_SCORE = 1 << 18;
380
const LABEL_PREFIX_SCORE_THRESHOLD = 1 << 17;
381
const LABEL_SCORE_THRESHOLD = 1 << 16;
382

383
function getCacheHash(label: string, description: string | undefined, allowNonContiguousMatches: boolean, query: IPreparedQuery) {
384
	const values = query.values ? query.values : [query];
385
	const cacheHash = hash({
386
		[query.normalized]: {
387
			values: values.map(v => ({ value: v.normalized, expectContiguousMatch: v.expectContiguousMatch })),
388
			label,
389
			description,
390
			allowNonContiguousMatches
391
		}
392
	});
393
	return cacheHash;
394
}
395

396
export function scoreItemFuzzy<T>(item: T, query: IPreparedQuery, allowNonContiguousMatches: boolean, accessor: IItemAccessor<T>, cache: FuzzyScorerCache): IItemScore {
397
	if (!item || !query.normalized) {
398
		return NO_ITEM_SCORE; // we need an item and query to score on at least
399
	}
400

401
	const label = accessor.getItemLabel(item);
402
	if (!label) {
403
		return NO_ITEM_SCORE; // we need a label at least
404
	}
405

406
	const description = accessor.getItemDescription(item);
407

408
	// in order to speed up scoring, we cache the score with a unique hash based on:
409
	// - label
410
	// - description (if provided)
411
	// - whether non-contiguous matching is enabled or not
412
	// - hash of the query (normalized) values
413
	const cacheHash = getCacheHash(label, description, allowNonContiguousMatches, query);
414
	const cached = cache[cacheHash];
415
	if (cached) {
416
		return cached;
417
	}
418

419
	const itemScore = doScoreItemFuzzy(label, description, accessor.getItemPath(item), query, allowNonContiguousMatches);
420
	cache[cacheHash] = itemScore;
421

422
	return itemScore;
423
}
424

425
function doScoreItemFuzzy(label: string, description: string | undefined, path: string | undefined, query: IPreparedQuery, allowNonContiguousMatches: boolean): IItemScore {
426
	const preferLabelMatches = !path || !query.containsPathSeparator;
427

428
	// Treat identity matches on full path highest
429
	if (path && (isLinux ? query.pathNormalized === path : equalsIgnoreCase(query.pathNormalized, path))) {
430
		return { score: PATH_IDENTITY_SCORE, labelMatch: [{ start: 0, end: label.length }], descriptionMatch: description ? [{ start: 0, end: description.length }] : undefined };
431
	}
432

433
	// Score: multiple inputs
434
	if (query.values && query.values.length > 1) {
435
		return doScoreItemFuzzyMultiple(label, description, path, query.values, preferLabelMatches, allowNonContiguousMatches);
436
	}
437

438
	// Score: single input
439
	return doScoreItemFuzzySingle(label, description, path, query, preferLabelMatches, allowNonContiguousMatches);
440
}
441

442
function doScoreItemFuzzyMultiple(label: string, description: string | undefined, path: string | undefined, query: IPreparedQueryPiece[], preferLabelMatches: boolean, allowNonContiguousMatches: boolean): IItemScore {
443
	let totalScore = 0;
444
	const totalLabelMatches: IMatch[] = [];
445
	const totalDescriptionMatches: IMatch[] = [];
446

447
	for (const queryPiece of query) {
448
		const { score, labelMatch, descriptionMatch } = doScoreItemFuzzySingle(label, description, path, queryPiece, preferLabelMatches, allowNonContiguousMatches);
449
		if (score === NO_MATCH) {
450
			// if a single query value does not match, return with
451
			// no score entirely, we require all queries to match
452
			return NO_ITEM_SCORE;
453
		}
454

455
		totalScore += score;
456
		if (labelMatch) {
457
			totalLabelMatches.push(...labelMatch);
458
		}
459

460
		if (descriptionMatch) {
461
			totalDescriptionMatches.push(...descriptionMatch);
462
		}
463
	}
464

465
	// if we have a score, ensure that the positions are
466
	// sorted in ascending order and distinct
467
	return {
468
		score: totalScore,
469
		labelMatch: normalizeMatches(totalLabelMatches),
470
		descriptionMatch: normalizeMatches(totalDescriptionMatches)
471
	};
472
}
473

474
function doScoreItemFuzzySingle(label: string, description: string | undefined, path: string | undefined, query: IPreparedQueryPiece, preferLabelMatches: boolean, allowNonContiguousMatches: boolean): IItemScore {
475

476
	// Prefer label matches if told so or we have no description
477
	if (preferLabelMatches || !description) {
478
		const [labelScore, labelPositions] = scoreFuzzy(
479
			label,
480
			query.normalized,
481
			query.normalizedLowercase,
482
			allowNonContiguousMatches && !query.expectContiguousMatch);
483
		if (labelScore) {
484

485
			// If we have a prefix match on the label, we give a much
486
			// higher baseScore to elevate these matches over others
487
			// This ensures that typing a file name wins over results
488
			// that are present somewhere in the label, but not the
489
			// beginning.
490
			const labelPrefixMatch = matchesPrefix(query.normalized, label);
491
			let baseScore: number;
492
			if (labelPrefixMatch) {
493
				baseScore = LABEL_PREFIX_SCORE_THRESHOLD;
494

495
				// We give another boost to labels that are short, e.g. given
496
				// files "window.ts" and "windowActions.ts" and a query of
497
				// "window", we want "window.ts" to receive a higher score.
498
				// As such we compute the percentage the query has within the
499
				// label and add that to the baseScore.
500
				const prefixLengthBoost = Math.round((query.normalized.length / label.length) * 100);
501
				baseScore += prefixLengthBoost;
502
			} else {
503
				baseScore = LABEL_SCORE_THRESHOLD;
504
			}
505

506
			return { score: baseScore + labelScore, labelMatch: labelPrefixMatch || createMatches(labelPositions) };
507
		}
508
	}
509

510
	// Finally compute description + label scores if we have a description
511
	if (description) {
512
		let descriptionPrefix = description;
513
		if (!!path) {
514
			descriptionPrefix = `${description}${sep}`; // assume this is a file path
515
		}
516

517
		const descriptionPrefixLength = descriptionPrefix.length;
518
		const descriptionAndLabel = `${descriptionPrefix}${label}`;
519

520
		const [labelDescriptionScore, labelDescriptionPositions] = scoreFuzzy(
521
			descriptionAndLabel,
522
			query.normalized,
523
			query.normalizedLowercase,
524
			allowNonContiguousMatches && !query.expectContiguousMatch);
525
		if (labelDescriptionScore) {
526
			const labelDescriptionMatches = createMatches(labelDescriptionPositions);
527
			const labelMatch: IMatch[] = [];
528
			const descriptionMatch: IMatch[] = [];
529

530
			// We have to split the matches back onto the label and description portions
531
			labelDescriptionMatches.forEach(h => {
532

533
				// Match overlaps label and description part, we need to split it up
534
				if (h.start < descriptionPrefixLength && h.end > descriptionPrefixLength) {
535
					labelMatch.push({ start: 0, end: h.end - descriptionPrefixLength });
536
					descriptionMatch.push({ start: h.start, end: descriptionPrefixLength });
537
				}
538

539
				// Match on label part
540
				else if (h.start >= descriptionPrefixLength) {
541
					labelMatch.push({ start: h.start - descriptionPrefixLength, end: h.end - descriptionPrefixLength });
542
				}
543

544
				// Match on description part
545
				else {
546
					descriptionMatch.push(h);
547
				}
548
			});
549

550
			return { score: labelDescriptionScore, labelMatch, descriptionMatch };
551
		}
552
	}
553

554
	return NO_ITEM_SCORE;
555
}
556

557
function createMatches(offsets: number[] | undefined): IMatch[] {
558
	const ret: IMatch[] = [];
559
	if (!offsets) {
560
		return ret;
561
	}
562

563
	let last: IMatch | undefined;
564
	for (const pos of offsets) {
565
		if (last && last.end === pos) {
566
			last.end += 1;
567
		} else {
568
			last = { start: pos, end: pos + 1 };
569
			ret.push(last);
570
		}
571
	}
572

573
	return ret;
574
}
575

576
function normalizeMatches(matches: IMatch[]): IMatch[] {
577

578
	// sort matches by start to be able to normalize
579
	const sortedMatches = matches.sort((matchA, matchB) => {
580
		return matchA.start - matchB.start;
581
	});
582

583
	// merge matches that overlap
584
	const normalizedMatches: IMatch[] = [];
585
	let currentMatch: IMatch | undefined = undefined;
586
	for (const match of sortedMatches) {
587

588
		// if we have no current match or the matches
589
		// do not overlap, we take it as is and remember
590
		// it for future merging
591
		if (!currentMatch || !matchOverlaps(currentMatch, match)) {
592
			currentMatch = match;
593
			normalizedMatches.push(match);
594
		}
595

596
		// otherwise we merge the matches
597
		else {
598
			currentMatch.start = Math.min(currentMatch.start, match.start);
599
			currentMatch.end = Math.max(currentMatch.end, match.end);
600
		}
601
	}
602

603
	return normalizedMatches;
604
}
605

606
function matchOverlaps(matchA: IMatch, matchB: IMatch): boolean {
607
	if (matchA.end < matchB.start) {
608
		return false;	// A ends before B starts
609
	}
610

611
	if (matchB.end < matchA.start) {
612
		return false; // B ends before A starts
613
	}
614

615
	return true;
616
}
617

618
//#endregion
619

620

621
//#region Comparers
622

623
export function compareItemsByFuzzyScore<T>(itemA: T, itemB: T, query: IPreparedQuery, allowNonContiguousMatches: boolean, accessor: IItemAccessor<T>, cache: FuzzyScorerCache): number {
624
	const itemScoreA = scoreItemFuzzy(itemA, query, allowNonContiguousMatches, accessor, cache);
625
	const itemScoreB = scoreItemFuzzy(itemB, query, allowNonContiguousMatches, accessor, cache);
626

627
	const scoreA = itemScoreA.score;
628
	const scoreB = itemScoreB.score;
629

630
	// 1.) identity matches have highest score
631
	if (scoreA === PATH_IDENTITY_SCORE || scoreB === PATH_IDENTITY_SCORE) {
632
		if (scoreA !== scoreB) {
633
			return scoreA === PATH_IDENTITY_SCORE ? -1 : 1;
634
		}
635
	}
636

637
	// 2.) matches on label are considered higher compared to label+description matches
638
	if (scoreA > LABEL_SCORE_THRESHOLD || scoreB > LABEL_SCORE_THRESHOLD) {
639
		if (scoreA !== scoreB) {
640
			return scoreA > scoreB ? -1 : 1;
641
		}
642

643
		// prefer more compact matches over longer in label (unless this is a prefix match where
644
		// longer prefix matches are actually preferred)
645
		if (scoreA < LABEL_PREFIX_SCORE_THRESHOLD && scoreB < LABEL_PREFIX_SCORE_THRESHOLD) {
646
			const comparedByMatchLength = compareByMatchLength(itemScoreA.labelMatch, itemScoreB.labelMatch);
647
			if (comparedByMatchLength !== 0) {
648
				return comparedByMatchLength;
649
			}
650
		}
651

652
		// prefer shorter labels over longer labels
653
		const labelA = accessor.getItemLabel(itemA) || '';
654
		const labelB = accessor.getItemLabel(itemB) || '';
655
		if (labelA.length !== labelB.length) {
656
			return labelA.length - labelB.length;
657
		}
658
	}
659

660
	// 3.) compare by score in label+description
661
	if (scoreA !== scoreB) {
662
		return scoreA > scoreB ? -1 : 1;
663
	}
664

665
	// 4.) scores are identical: prefer matches in label over non-label matches
666
	const itemAHasLabelMatches = Array.isArray(itemScoreA.labelMatch) && itemScoreA.labelMatch.length > 0;
667
	const itemBHasLabelMatches = Array.isArray(itemScoreB.labelMatch) && itemScoreB.labelMatch.length > 0;
668
	if (itemAHasLabelMatches && !itemBHasLabelMatches) {
669
		return -1;
670
	} else if (itemBHasLabelMatches && !itemAHasLabelMatches) {
671
		return 1;
672
	}
673

674
	// 5.) scores are identical: prefer more compact matches (label and description)
675
	const itemAMatchDistance = computeLabelAndDescriptionMatchDistance(itemA, itemScoreA, accessor);
676
	const itemBMatchDistance = computeLabelAndDescriptionMatchDistance(itemB, itemScoreB, accessor);
677
	if (itemAMatchDistance && itemBMatchDistance && itemAMatchDistance !== itemBMatchDistance) {
678
		return itemBMatchDistance > itemAMatchDistance ? -1 : 1;
679
	}
680

681
	// 6.) scores are identical: start to use the fallback compare
682
	return fallbackCompare(itemA, itemB, query, accessor);
683
}
684

685
function computeLabelAndDescriptionMatchDistance<T>(item: T, score: IItemScore, accessor: IItemAccessor<T>): number {
686
	let matchStart: number = -1;
687
	let matchEnd: number = -1;
688

689
	// If we have description matches, the start is first of description match
690
	if (score.descriptionMatch && score.descriptionMatch.length) {
691
		matchStart = score.descriptionMatch[0].start;
692
	}
693

694
	// Otherwise, the start is the first label match
695
	else if (score.labelMatch && score.labelMatch.length) {
696
		matchStart = score.labelMatch[0].start;
697
	}
698

699
	// If we have label match, the end is the last label match
700
	// If we had a description match, we add the length of the description
701
	// as offset to the end to indicate this.
702
	if (score.labelMatch && score.labelMatch.length) {
703
		matchEnd = score.labelMatch[score.labelMatch.length - 1].end;
704
		if (score.descriptionMatch && score.descriptionMatch.length) {
705
			const itemDescription = accessor.getItemDescription(item);
706
			if (itemDescription) {
707
				matchEnd += itemDescription.length;
708
			}
709
		}
710
	}
711

712
	// If we have just a description match, the end is the last description match
713
	else if (score.descriptionMatch && score.descriptionMatch.length) {
714
		matchEnd = score.descriptionMatch[score.descriptionMatch.length - 1].end;
715
	}
716

717
	return matchEnd - matchStart;
718
}
719

720
function compareByMatchLength(matchesA?: IMatch[], matchesB?: IMatch[]): number {
721
	if ((!matchesA && !matchesB) || ((!matchesA || !matchesA.length) && (!matchesB || !matchesB.length))) {
722
		return 0; // make sure to not cause bad comparing when matches are not provided
723
	}
724

725
	if (!matchesB || !matchesB.length) {
726
		return -1;
727
	}
728

729
	if (!matchesA || !matchesA.length) {
730
		return 1;
731
	}
732

733
	// Compute match length of A (first to last match)
734
	const matchStartA = matchesA[0].start;
735
	const matchEndA = matchesA[matchesA.length - 1].end;
736
	const matchLengthA = matchEndA - matchStartA;
737

738
	// Compute match length of B (first to last match)
739
	const matchStartB = matchesB[0].start;
740
	const matchEndB = matchesB[matchesB.length - 1].end;
741
	const matchLengthB = matchEndB - matchStartB;
742

743
	// Prefer shorter match length
744
	return matchLengthA === matchLengthB ? 0 : matchLengthB < matchLengthA ? 1 : -1;
745
}
746

747
function fallbackCompare<T>(itemA: T, itemB: T, query: IPreparedQuery, accessor: IItemAccessor<T>): number {
748

749
	// check for label + description length and prefer shorter
750
	const labelA = accessor.getItemLabel(itemA) || '';
751
	const labelB = accessor.getItemLabel(itemB) || '';
752

753
	const descriptionA = accessor.getItemDescription(itemA);
754
	const descriptionB = accessor.getItemDescription(itemB);
755

756
	const labelDescriptionALength = labelA.length + (descriptionA ? descriptionA.length : 0);
757
	const labelDescriptionBLength = labelB.length + (descriptionB ? descriptionB.length : 0);
758

759
	if (labelDescriptionALength !== labelDescriptionBLength) {
760
		return labelDescriptionALength - labelDescriptionBLength;
761
	}
762

763
	// check for path length and prefer shorter
764
	const pathA = accessor.getItemPath(itemA);
765
	const pathB = accessor.getItemPath(itemB);
766

767
	if (pathA && pathB && pathA.length !== pathB.length) {
768
		return pathA.length - pathB.length;
769
	}
770

771
	// 7.) finally we have equal scores and equal length, we fallback to comparer
772

773
	// compare by label
774
	if (labelA !== labelB) {
775
		return compareAnything(labelA, labelB, query.normalized);
776
	}
777

778
	// compare by description
779
	if (descriptionA && descriptionB && descriptionA !== descriptionB) {
780
		return compareAnything(descriptionA, descriptionB, query.normalized);
781
	}
782

783
	// compare by path
784
	if (pathA && pathB && pathA !== pathB) {
785
		return compareAnything(pathA, pathB, query.normalized);
786
	}
787

788
	// equal
789
	return 0;
790
}
791

792
//#endregion
793

794

795
//#region Query Normalizer
796

797
export interface IPreparedQueryPiece {
798

799
	/**
800
	 * The original query as provided as input.
801
	 */
802
	original: string;
803
	originalLowercase: string;
804

805
	/**
806
	 * Original normalized to platform separators:
807
	 * - Windows: \
808
	 * - Posix: /
809
	 */
810
	pathNormalized: string;
811

812
	/**
813
	 * In addition to the normalized path, will have
814
	 * whitespace and wildcards removed.
815
	 */
816
	normalized: string;
817
	normalizedLowercase: string;
818

819
	/**
820
	 * The query is wrapped in quotes which means
821
	 * this query must be a substring of the input.
822
	 * In other words, no fuzzy matching is used.
823
	 */
824
	expectContiguousMatch: boolean;
825
}
826

827
export interface IPreparedQuery extends IPreparedQueryPiece {
828

829
	/**
830
	 * Query split by spaces into pieces.
831
	 */
832
	values: IPreparedQueryPiece[] | undefined;
833

834
	/**
835
	 * Whether the query contains path separator(s) or not.
836
	 */
837
	containsPathSeparator: boolean;
838
}
839

840
/*
841
 * If a query is wrapped in quotes, the user does not want to
842
 * use fuzzy search for this query.
843
 */
844
function queryExpectsExactMatch(query: string) {
845
	return query.startsWith('"') && query.endsWith('"');
846
}
847

848
/**
849
 * Helper function to prepare a search value for scoring by removing unwanted characters
850
 * and allowing to score on multiple pieces separated by whitespace character.
851
 */
852
const MULTIPLE_QUERY_VALUES_SEPARATOR = ' ';
853
export function prepareQuery(original: string): IPreparedQuery {
854
	if (typeof original !== 'string') {
855
		original = '';
856
	}
857

858
	const originalLowercase = original.toLowerCase();
859
	const { pathNormalized, normalized, normalizedLowercase } = normalizeQuery(original);
860
	const containsPathSeparator = pathNormalized.indexOf(sep) >= 0;
861
	const expectExactMatch = queryExpectsExactMatch(original);
862

863
	let values: IPreparedQueryPiece[] | undefined = undefined;
864

865
	const originalSplit = original.split(MULTIPLE_QUERY_VALUES_SEPARATOR);
866
	if (originalSplit.length > 1) {
867
		for (const originalPiece of originalSplit) {
868
			const expectExactMatchPiece = queryExpectsExactMatch(originalPiece);
869
			const {
870
				pathNormalized: pathNormalizedPiece,
871
				normalized: normalizedPiece,
872
				normalizedLowercase: normalizedLowercasePiece
873
			} = normalizeQuery(originalPiece);
874

875
			if (normalizedPiece) {
876
				if (!values) {
877
					values = [];
878
				}
879

880
				values.push({
881
					original: originalPiece,
882
					originalLowercase: originalPiece.toLowerCase(),
883
					pathNormalized: pathNormalizedPiece,
884
					normalized: normalizedPiece,
885
					normalizedLowercase: normalizedLowercasePiece,
886
					expectContiguousMatch: expectExactMatchPiece
887
				});
888
			}
889
		}
890
	}
891

892
	return { original, originalLowercase, pathNormalized, normalized, normalizedLowercase, values, containsPathSeparator, expectContiguousMatch: expectExactMatch };
893
}
894

895
function normalizeQuery(original: string): { pathNormalized: string; normalized: string; normalizedLowercase: string } {
896
	let pathNormalized: string;
897
	if (isWindows) {
898
		pathNormalized = original.replace(/\//g, sep); // Help Windows users to search for paths when using slash
899
	} else {
900
		pathNormalized = original.replace(/\\/g, sep); // Help macOS/Linux users to search for paths when using backslash
901
	}
902

903
	// we remove quotes here because quotes are used for exact match search
904
	const normalized = stripWildcards(pathNormalized).replace(/\s|"/g, '');
905

906
	return {
907
		pathNormalized,
908
		normalized,
909
		normalizedLowercase: normalized.toLowerCase()
910
	};
911
}
912

913
export function pieceToQuery(piece: IPreparedQueryPiece): IPreparedQuery;
914
export function pieceToQuery(pieces: IPreparedQueryPiece[]): IPreparedQuery;
915
export function pieceToQuery(arg1: IPreparedQueryPiece | IPreparedQueryPiece[]): IPreparedQuery {
916
	if (Array.isArray(arg1)) {
917
		return prepareQuery(arg1.map(piece => piece.original).join(MULTIPLE_QUERY_VALUES_SEPARATOR));
918
	}
919

920
	return prepareQuery(arg1.original);
921
}
922

923
//#endregion
924

925
Product

Resources

Company