Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/chunking/common/chunkingStringUtils.ts
13400 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { splitLines } from '../../../util/vs/base/common/strings';
7
8
export function truncateToMaxUtf8Length(str: string, maxBytes: number): string {
9
// utf-16 strings have at most 4 bytes per character (2 * 2)
10
// If we're under that, skip the more expensive checks
11
const upperEstimatedByteLength = str.length * 4;
12
if (upperEstimatedByteLength <= maxBytes) {
13
return str;
14
}
15
16
const encoder = new TextEncoder();
17
const encodedStr = encoder.encode(str);
18
19
if (encodedStr.length <= maxBytes) {
20
return str;
21
}
22
23
const truncatedBytes = encodedStr.slice(0, maxBytes);
24
25
// Decode the truncated bytes back to a string, ensuring no partial characters
26
return new TextDecoder().decode(truncatedBytes, {
27
stream: true // Don't emit partial characters
28
});
29
}
30
31
/**
32
* Returned chunks are formatted with extra metadata:
33
*
34
* File: `fileName.ext`:
35
* ```lang
36
* chunk text
37
* ```
38
*
39
* Try to strip this out
40
*/
41
42
export function stripChunkTextMetadata(text: string): string {
43
const lines = splitLines(text);
44
if (lines.length >= 3 && lines[0].startsWith('File: ') && lines[1].startsWith('```') && lines.at(-1)?.startsWith('```')) {
45
return lines.slice(2, -1).join('\n');
46
}
47
return text;
48
}
49
50
51