Path: blob/main/extensions/copilot/src/platform/chunking/common/chunkingStringUtils.ts
13400 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { splitLines } from '../../../util/vs/base/common/strings';67export function truncateToMaxUtf8Length(str: string, maxBytes: number): string {8// utf-16 strings have at most 4 bytes per character (2 * 2)9// If we're under that, skip the more expensive checks10const upperEstimatedByteLength = str.length * 4;11if (upperEstimatedByteLength <= maxBytes) {12return str;13}1415const encoder = new TextEncoder();16const encodedStr = encoder.encode(str);1718if (encodedStr.length <= maxBytes) {19return str;20}2122const truncatedBytes = encodedStr.slice(0, maxBytes);2324// Decode the truncated bytes back to a string, ensuring no partial characters25return new TextDecoder().decode(truncatedBytes, {26stream: true // Don't emit partial characters27});28}2930/**31* Returned chunks are formatted with extra metadata:32*33* File: `fileName.ext`:34* ```lang35* chunk text36* ```37*38* Try to strip this out39*/4041export function stripChunkTextMetadata(text: string): string {42const lines = splitLines(text);43if (lines.length >= 3 && lines[0].startsWith('File: ') && lines[1].startsWith('```') && lines.at(-1)?.startsWith('```')) {44return lines.slice(2, -1).join('\n');45}46return text;47}48495051