Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/pipeline/output.ts
13389 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as fs from 'fs/promises';
7
import * as path from 'path';
8
import { IGeneratedPrompt } from './promptStep';
9
import { IProcessedRow } from './replayRecording';
10
import { IGeneratedResponse } from './responseStep';
11
12
export interface IMessage {
13
readonly role: 'system' | 'user' | 'assistant';
14
readonly content: string;
15
}
16
17
export interface ISampleMetadata {
18
readonly rowIndex: number;
19
readonly language: string;
20
readonly strategy: string;
21
readonly oracleEditCount: number;
22
readonly suggestionStatus: string;
23
readonly filePath: string;
24
readonly docContent: string;
25
readonly oracleEdits: readonly (readonly [start: number, endEx: number, text: string])[];
26
readonly originalPrompt: unknown[];
27
readonly modelResponse: string;
28
}
29
30
export interface ISample {
31
readonly messages: readonly IMessage[];
32
readonly metadata: ISampleMetadata;
33
}
34
35
interface ISkipReason {
36
readonly rowIndex: number;
37
readonly reason: string;
38
}
39
40
export interface IWriteResult {
41
readonly written: number;
42
readonly skipped: number;
43
readonly skipReasons: readonly ISkipReason[];
44
readonly fileSize: number;
45
readonly outputPath: string;
46
readonly languageCounts: ReadonlyMap<string, number>;
47
}
48
49
export function assembleSample(
50
index: number,
51
prompt: IGeneratedPrompt,
52
response: IGeneratedResponse,
53
processedRow: IProcessedRow,
54
strategy: string,
55
modelResponse: string,
56
): ISample {
57
const messages: IMessage[] = [
58
{ role: 'system', content: prompt.system },
59
{ role: 'user', content: prompt.user },
60
{ role: 'assistant', content: response.assistant },
61
];
62
63
const metadata: ISampleMetadata = {
64
rowIndex: index,
65
language: processedRow.row.activeDocumentLanguageId,
66
strategy,
67
oracleEditCount: processedRow.nextUserEdit?.edit?.length ?? 0,
68
suggestionStatus: processedRow.row.suggestionStatus,
69
filePath: processedRow.activeFilePath.replace(/\\/g, '/'),
70
docContent: processedRow.activeDocument.value.get().value,
71
oracleEdits: processedRow.nextUserEdit?.edit ?? [],
72
originalPrompt: processedRow.row.prompt,
73
modelResponse,
74
};
75
76
return { messages, metadata };
77
}
78
79
interface IStructuralValidationResult {
80
readonly valid: boolean;
81
readonly reason?: string;
82
}
83
84
/**
85
* Structural check: ensures messages are non-empty before writing.
86
*/
87
export function validateSample(sample: ISample): IStructuralValidationResult {
88
for (const msg of sample.messages) {
89
if (msg.content === undefined || msg.content === null) {
90
return { valid: false, reason: `${msg.role} message content is null/undefined` };
91
}
92
}
93
94
const system = sample.messages.find(m => m.role === 'system');
95
const user = sample.messages.find(m => m.role === 'user');
96
const assistant = sample.messages.find(m => m.role === 'assistant');
97
98
if (!system || !system.content.trim()) {
99
return { valid: false, reason: 'Empty system message' };
100
}
101
if (!user || !user.content.trim()) {
102
return { valid: false, reason: 'Empty user message' };
103
}
104
if (!assistant || !assistant.content.trim()) {
105
return { valid: false, reason: 'Empty assistant message' };
106
}
107
108
return { valid: true };
109
}
110
111
export function resolveOutputPath(inputPath: string, explicitPath: string | undefined): string {
112
if (explicitPath) {
113
return path.resolve(explicitPath);
114
}
115
const parsed = path.parse(inputPath);
116
return path.join(parsed.dir, `${parsed.name}_output.json`);
117
}
118
119
/**
120
* Write validated samples to a JSON file.
121
* Samples are sorted by rowIndex for deterministic output.
122
*/
123
export async function writeSamples(
124
outputPath: string,
125
samples: readonly ISample[],
126
): Promise<IWriteResult> {
127
const skipReasons: ISkipReason[] = [];
128
const validSamples: ISample[] = [];
129
130
for (const sample of samples) {
131
const result = validateSample(sample);
132
if (result.valid) {
133
validSamples.push(sample);
134
} else {
135
skipReasons.push({
136
rowIndex: sample.metadata.rowIndex,
137
reason: result.reason!,
138
});
139
}
140
}
141
142
validSamples.sort((a, b) => a.metadata.rowIndex - b.metadata.rowIndex);
143
144
const output = validSamples.map(sample => ({
145
messages: sample.messages.map(m => ({ role: m.role, content: m.content })),
146
metadata: sample.metadata,
147
}));
148
const content = JSON.stringify(output, null, 2);
149
150
const resolvedPath = path.resolve(outputPath);
151
await fs.mkdir(path.dirname(resolvedPath), { recursive: true });
152
await fs.writeFile(resolvedPath, content, 'utf-8');
153
154
const fileSize = Buffer.byteLength(content, 'utf-8');
155
const languageCounts = new Map<string, number>();
156
for (const sample of validSamples) {
157
const lang = sample.metadata.language || 'unknown';
158
languageCounts.set(lang, (languageCounts.get(lang) ?? 0) + 1);
159
}
160
161
return {
162
written: validSamples.length,
163
skipped: skipReasons.length,
164
skipReasons,
165
fileSize,
166
outputPath: resolvedPath,
167
languageCounts,
168
};
169
}
170
171