Path: blob/main/src/vs/workbench/contrib/chat/common/chatWordCounter.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as markedKatexExtension from '../../markdown/common/markedKatexExtension.js';67export interface IWordCountResult {8value: string;9returnedWordCount: number;10totalWordCount: number;11isFullString: boolean;12}1314const r = String.raw;1516/**17* Matches `[text](link title?)` or `[text](<link> title?)`18*19* Taken from vscode-markdown-languageservice20*/21const linkPattern =22r`(?<!\\)` + // Must not start with escape2324// text25r`(!?\[` + // open prefix match -->26/**/r`(?:` +27/*****/r`[^\[\]\\]|` + // Non-bracket chars, or...28/*****/r`\\.|` + // Escaped char, or...29/*****/r`\[[^\[\]]*\]` + // Matched bracket pair30/**/r`)*` +31r`\])` + // <-- close prefix match3233// Destination34r`(\(\s*)` + // Pre href35/**/r`(` +36/*****/r`[^\s\(\)<](?:[^\s\(\)]|\([^\s\(\)]*?\))*|` + // Link without whitespace, or...37/*****/r`<(?:\\[<>]|[^<>])+>` + // In angle brackets38/**/r`)` +3940// Title41/**/r`\s*(?:"[^"]*"|'[^']*'|\([^\(\)]*\))?\s*` +42r`\)`;4344export function getNWords(str: string, numWordsToCount: number): IWordCountResult {45// This regex matches each word and skips over whitespace and separators. A word is:46// A markdown link47// Inline math48// One chinese character49// One or more + - =, handled so that code like "a=1+2-3" is broken up better50// One or more characters that aren't whitepace or any of the above51const backtick = '`';5253const wordRegExp = new RegExp('(?:' + linkPattern + ')|(?:' + markedKatexExtension.mathInlineRegExp.source + r`)|\p{sc=Han}|=+|\++|-+|[^\s\|\p{sc=Han}|=|\+|\-|${backtick}]+`, 'gu');54const allWordMatches = Array.from(str.matchAll(wordRegExp));5556const targetWords = allWordMatches.slice(0, numWordsToCount);5758const endIndex = numWordsToCount >= allWordMatches.length59? str.length // Reached end of string60: targetWords.length ? targetWords.at(-1)!.index + targetWords.at(-1)![0].length : 0;6162const value = str.substring(0, endIndex);63return {64value,65returnedWordCount: targetWords.length === 0 ? (value.length ? 1 : 0) : targetWords.length,66isFullString: endIndex >= str.length,67totalWordCount: allWordMatches.length68};69}7071export function countWords(str: string): number {72const result = getNWords(str, Number.MAX_SAFE_INTEGER);73return result.returnedWordCount;74}757677