Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/contrib/chat/common/chatWordCounter.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as markedKatexExtension from '../../markdown/common/markedKatexExtension.js';
7
8
export interface IWordCountResult {
9
value: string;
10
returnedWordCount: number;
11
totalWordCount: number;
12
isFullString: boolean;
13
}
14
15
const r = String.raw;
16
17
/**
18
* Matches `[text](link title?)` or `[text](<link> title?)`
19
*
20
* Taken from vscode-markdown-languageservice
21
*/
22
const linkPattern =
23
r`(?<!\\)` + // Must not start with escape
24
25
// text
26
r`(!?\[` + // open prefix match -->
27
/**/r`(?:` +
28
/*****/r`[^\[\]\\]|` + // Non-bracket chars, or...
29
/*****/r`\\.|` + // Escaped char, or...
30
/*****/r`\[[^\[\]]*\]` + // Matched bracket pair
31
/**/r`)*` +
32
r`\])` + // <-- close prefix match
33
34
// Destination
35
r`(\(\s*)` + // Pre href
36
/**/r`(` +
37
/*****/r`[^\s\(\)<](?:[^\s\(\)]|\([^\s\(\)]*?\))*|` + // Link without whitespace, or...
38
/*****/r`<(?:\\[<>]|[^<>])+>` + // In angle brackets
39
/**/r`)` +
40
41
// Title
42
/**/r`\s*(?:"[^"]*"|'[^']*'|\([^\(\)]*\))?\s*` +
43
r`\)`;
44
45
export function getNWords(str: string, numWordsToCount: number): IWordCountResult {
46
// This regex matches each word and skips over whitespace and separators. A word is:
47
// A markdown link
48
// Inline math
49
// One chinese character
50
// One or more + - =, handled so that code like "a=1+2-3" is broken up better
51
// One or more characters that aren't whitepace or any of the above
52
const backtick = '`';
53
54
const wordRegExp = new RegExp('(?:' + linkPattern + ')|(?:' + markedKatexExtension.mathInlineRegExp.source + r`)|\p{sc=Han}|=+|\++|-+|[^\s\|\p{sc=Han}|=|\+|\-|${backtick}]+`, 'gu');
55
const allWordMatches = Array.from(str.matchAll(wordRegExp));
56
57
const targetWords = allWordMatches.slice(0, numWordsToCount);
58
59
const endIndex = numWordsToCount >= allWordMatches.length
60
? str.length // Reached end of string
61
: targetWords.length ? targetWords.at(-1)!.index + targetWords.at(-1)![0].length : 0;
62
63
const value = str.substring(0, endIndex);
64
return {
65
value,
66
returnedWordCount: targetWords.length === 0 ? (value.length ? 1 : 0) : targetWords.length,
67
isFullString: endIndex >= str.length,
68
totalWordCount: allWordMatches.length
69
};
70
}
71
72
export function countWords(str: string): number {
73
const result = getNWords(str, Number.MAX_SAFE_INTEGER);
74
return result.returnedWordCount;
75
}
76
77