Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/test/simulation/workbench/stores/amlResults.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import * as csvParse from 'csv-parse/sync';
7
import type * as vscode from 'vscode';
8
import { OutputAnnotation } from '../../shared/sharedTypes';
9
import { AMLRunKind } from './amlSimulations';
10
11
/** Copied from src/base/conversation/context/resolvers/gitRepository.ts */
12
interface RepoContext {
13
readonly name: string;
14
readonly headBranchName: string | undefined;
15
readonly upstreamBranchName: string | undefined;
16
readonly isRebasing: boolean;
17
readonly remotes: string[];
18
}
19
20
/*
21
* Copied from src/base/conversation/context/promptContextModel.ts
22
* because workbench tsconfig restricts importing from `promptContextModel` for some reason.
23
*/
24
interface ISerializedWorkspaceState {
25
readonly workspaceFoldersFilePaths: string[] | undefined;
26
readonly activeTextEditor: {
27
selections: { anchor: vscode.Position; active: vscode.Position; isReversed: boolean }[];
28
documentFilePath: string;
29
visibleRanges: { start: vscode.Position; end: vscode.Position }[];
30
languageId: string;
31
} | undefined;
32
readonly symbols: {
33
name: string;
34
kind: vscode.SymbolKind;
35
containerName: string;
36
filePath: string;
37
start: vscode.Position;
38
end: vscode.Position;
39
}[] | undefined;
40
readonly notebookDocumentFilePaths: string[] | undefined;
41
readonly activeFileDiagnostics: { start: vscode.Position; end: vscode.Position; message: string; severity?: vscode.DiagnosticSeverity }[];
42
readonly debugConsoleOutput: string;
43
readonly repoContext: RepoContext | undefined;
44
readonly terminalBuffer: string;
45
readonly terminalSelection: string;
46
}
47
48
/** Type for `response` field of `<experiment>_scored_predictions.csv` */
49
export type Response = {
50
originalFilePath: string;
51
fileBefore: string;
52
fileAfter: string;
53
logFileContents: string;
54
conversationFileContents: string;
55
56
/**
57
* This needs to be of type `promptContextModel.ISerializedWorkspaceState`,
58
* but workbench tsconfig restricts importing from `promptContextModel` for some reason.
59
* We don't need this property for now, so we can leave it as an `object` for now. */
60
workspaceStateFileContents: ISerializedWorkspaceState;
61
};
62
63
export type EvaluationError = {
64
startLine: number;
65
startColumn: number;
66
endLine: number;
67
endColumn: number;
68
message: string;
69
rule: string;
70
tool: string;
71
};
72
73
export type TestRunEvaluation = {
74
caseName: string;
75
/** the n-th run for this case? 0-based */
76
nId: number;
77
languageId: string;
78
isSuccess: boolean;
79
errorsOnlyInBefore?: EvaluationError[];
80
errorsOnlyInAfter?: EvaluationError[];
81
annotations?: OutputAnnotation[];
82
stdout?: string;
83
stderr?: string;
84
evaluatorError?: string;
85
generatedTestCaseCount?: number;
86
generatedAssertCount?: number;
87
expectedDiff?: string;
88
};
89
90
// parses each line in `<component>_scored_predictions.csv` into a TestRunEvaluation
91
function _parseScoredPredictionsCsv(kind: AMLRunKind, fileContents: string[]): TestRunEvaluation[] {
92
return fileContents.map((line, i) => {
93
94
const json: any = JSON.parse(line); // may throw but not sure if we should have a way to recover
95
96
let stdout: string | undefined;
97
let stderr: string | undefined;
98
let errorsOnlyInBefore: EvaluationError[] | undefined;
99
let errorsOnlyInAfter: EvaluationError[] | undefined;
100
const annotations: OutputAnnotation[] = [];
101
let evaluatorError: string | undefined;
102
let generatedTestCaseCount: number | undefined;
103
let generatedAssertCount: number | undefined;
104
105
const extraDataJson = json.extra_data_json;
106
107
if (extraDataJson) {
108
({ errorsOnlyInBefore, errorsOnlyInAfter } = _parseFixEvaluationData(kind, extraDataJson));
109
110
[generatedTestCaseCount, generatedAssertCount] = _parseTestEvaluationData(kind, extraDataJson);
111
112
stdout = extraDataJson.stdout && typeof extraDataJson.stdout === 'string' ? extraDataJson.stdout : undefined;
113
stderr = extraDataJson.stderr && typeof extraDataJson.stderr === 'string' ? extraDataJson.stderr : undefined;
114
}
115
116
if (json.score !== 1) {
117
const statusCodes: string[] = json.status_codes;
118
119
if (statusCodes) {
120
for (const statusCode of statusCodes) {
121
if (statusCode !== 'SUCCESS') {
122
annotations.push({ message: `AML eval error: ${statusCode}`, label: statusCode, severity: 'error' } satisfies OutputAnnotation);
123
}
124
}
125
126
if (json.status_message) {
127
evaluatorError = evaluatorError ? `${evaluatorError}\n${json.status_message}` : json.status_message;
128
}
129
}
130
}
131
132
return {
133
caseName: json.test_case_id,
134
nId: parseInt(json.n_id),
135
languageId: json.language,
136
isSuccess: typeof json.score === 'number' ? json.score === 1 : json.score,
137
errorsOnlyInBefore,
138
errorsOnlyInAfter,
139
annotations,
140
stdout,
141
stderr,
142
evaluatorError,
143
generatedTestCaseCount: generatedTestCaseCount,
144
generatedAssertCount: generatedAssertCount,
145
expectedDiff: extraDataJson?.diff,
146
};
147
});
148
}
149
150
function _parseFixEvaluationData(kind: AMLRunKind, json: unknown) {
151
let errorsOnlyInBefore: EvaluationError[] | undefined;
152
let errorsOnlyInAfter: EvaluationError[] | undefined;
153
154
if (kind === AMLRunKind.Fix && typeof json === 'object' && json) {
155
errorsOnlyInAfter = (json as any).errors_only_in_after?.map(_toEvaluationError).sort(_evaluationErrorComparator);
156
errorsOnlyInBefore = (json as any).errors_only_in_before?.map(_toEvaluationError).sort(_evaluationErrorComparator);
157
}
158
159
return { errorsOnlyInBefore, errorsOnlyInAfter };
160
}
161
162
function _parseTestEvaluationData(kind: AMLRunKind, json: unknown): [number | undefined, number | undefined] {
163
let generatedTestCaseCount: number | undefined = undefined;
164
let generatedAssertCount: number | undefined = undefined;
165
166
if (kind === AMLRunKind.TestGen && typeof json === 'object' && json) {
167
if ('generated_test_case_count' in json) {
168
generatedTestCaseCount = json.generated_test_case_count as number;
169
}
170
if ('generated_assert_count' in json) {
171
generatedAssertCount = json.generated_assert_count as number;
172
}
173
}
174
175
return [generatedTestCaseCount, generatedAssertCount];
176
}
177
178
function _toEvaluationError(error: any): EvaluationError {
179
180
return {
181
message: error.message,
182
rule: error.rule,
183
tool: error.tool,
184
startLine: error.start_line_index,
185
startColumn: error.start_col_index,
186
endLine: error.end_line_index,
187
endColumn: error.end_col_index
188
};
189
}
190
191
function _evaluationErrorComparator(error1: EvaluationError, error2: EvaluationError) {
192
if (error1.startLine !== error2.startLine) {
193
return error1.startLine - error2.startLine;
194
}
195
if (error1.startColumn !== error2.startColumn) {
196
return error1.startColumn - error2.startColumn;
197
}
198
if (error1.endLine !== error2.endLine) {
199
return error1.endLine - error2.endLine;
200
}
201
if (error1.endColumn !== error2.endColumn) {
202
return error1.endColumn - error2.endColumn;
203
}
204
return 0;
205
}
206
207
export type TestRunsEvaluation = {
208
caseName: string;
209
activeEditorLanguageId?: string;
210
isEachTestRunSuccess: boolean[];
211
errorsOnlyInBefore?: EvaluationError[];
212
errorsOnlyInAfter?: EvaluationError[];
213
annotations?: OutputAnnotation[];
214
stdout?: string;
215
stderr?: string;
216
evaluatorError?: string;
217
generatedTestCaseCount?: number;
218
generatedAssertCount?: number;
219
expectedDiff?: string;
220
};
221
222
// parses lines in `<component>_scored_predictions.csv` and aggregates them into
223
// a format that is easier to use for our purposes
224
export function parseScoredPredictionsCsv(kind: AMLRunKind, fileContents: string[]): TestRunsEvaluation[] {
225
226
const testRunEvals = _parseScoredPredictionsCsv(kind, fileContents);
227
228
const testRunsEvaluation: TestRunsEvaluation[] = [];
229
230
let ix = 0;
231
while (ix < testRunEvals.length) {
232
const { caseName, languageId, errorsOnlyInBefore, errorsOnlyInAfter, annotations, stdout, stderr, evaluatorError, generatedTestCaseCount, generatedAssertCount, expectedDiff } = testRunEvals[ix];
233
const isEachTestRunSuccess: boolean[] = [];
234
while (ix < testRunEvals.length && testRunEvals[ix].caseName === caseName) {
235
isEachTestRunSuccess.push(testRunEvals[ix].isSuccess);
236
ix++;
237
}
238
testRunsEvaluation.push({
239
caseName,
240
activeEditorLanguageId: languageId,
241
isEachTestRunSuccess,
242
errorsOnlyInBefore,
243
errorsOnlyInAfter,
244
annotations,
245
stdout,
246
stderr,
247
evaluatorError,
248
generatedTestCaseCount: generatedTestCaseCount,
249
generatedAssertCount: generatedAssertCount,
250
expectedDiff,
251
});
252
}
253
return testRunsEvaluation;
254
}
255
256
export type ScoreCard = {
257
metric: string;
258
mean: number;
259
median: number;
260
stdErr: number;
261
confidenceInterval: [number, number];
262
count: number;
263
};
264
265
export function parseScoreCard(fileContents: string): ScoreCard {
266
const scoreCardRows: ScoreCard[] = csvParse.parse(
267
fileContents,
268
{
269
delimiter: ',',
270
columns: ['metric', 'mean', 'median', 'stdErr', 'confidenceInterval', 'count'],
271
cast: (value: string, context: csvParse.CastingContext) => {
272
switch (context.column) {
273
case 'metric':
274
return value;
275
case 'mean':
276
return `${(parseFloat(value) * 100).toFixed(2)}%`;
277
case 'confidenceInterval': {
278
const unparenthesized = value.substring(1, value.length - 1);
279
const [lower, upper] = unparenthesized.split(', ').map(parseFloat);
280
return [lower, upper];
281
}
282
case 'count':
283
return parseInt(value);
284
default:
285
return parseFloat(value).toFixed(2);
286
}
287
},
288
fromLine: 2,
289
}
290
);
291
return scoreCardRows[0];
292
}
293
294
export type ScoreCardByLanguage = {
295
language: string;
296
testCasesCount: number;
297
scoredCount: number;
298
unscoredCount: number;
299
meanScore: number;
300
};
301
302
export function parseScoreCardByLanguage(fileContents: string): ScoreCardByLanguage[] {
303
return JSON.parse(fileContents).map((entry: any) => ({
304
language: entry.Language,
305
testCasesCount: entry.nTestCases,
306
scoredCount: entry.nScored,
307
unscoredCount: entry.nUnscored,
308
meanScore: entry.MeanScore
309
}));
310
}
311
312