Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentEditGenerator.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { CancellationToken, NotebookCell, NotebookDocument } from 'vscode';
7
import { isJupyterNotebookUri } from '../../../util/common/notebooks';
8
import { createServiceIdentifier } from '../../../util/common/services';
9
import { isUri } from '../../../util/common/types';
10
import { AsyncIterableObject, AsyncIterableSource, DeferredPromise } from '../../../util/vs/base/common/async';
11
import { StringSHA1 } from '../../../util/vs/base/common/hash';
12
import { Constants } from '../../../util/vs/base/common/uint';
13
import { EndOfLine, NotebookCellData, NotebookCellKind, NotebookEdit, NotebookRange, Range, TextEdit, Uri } from '../../../vscodeTypes';
14
import { IDiffService } from '../../diff/common/diffService';
15
import { ILogService } from '../../log/common/logService';
16
import { ITelemetryService } from '../../telemetry/common/telemetry';
17
import { AlternativeContentFormat, IAlternativeNotebookContentService } from './alternativeContent';
18
import { lineMightHaveCellMarker } from './alternativeContentProvider.text';
19
import { EOL, getCellId, getCellIdMap, LineOfText } from './helpers';
20
import { computeDiff } from './notebookDiff';
21
22
export type NotebookEditGenerationTelemtryOptions = {
23
model: Promise<string> | string | undefined;
24
requestId: string | undefined;
25
source: NotebookEditGenrationSource;
26
};
27
28
export enum NotebookEditGenrationSource {
29
codeMapperEditNotebook = 'codeMapperEditNotebook',
30
codeMapperEmptyNotebook = 'codeMapperEmptyNotebook',
31
codeMapperFastApply = 'codeMapperFastApply',
32
createFile = 'createFile',
33
stringReplace = 'stringReplace',
34
applyPatch = 'applyPatch',
35
newNotebookIntent = 'newNotebookIntent',
36
}
37
38
export const IAlternativeNotebookContentEditGenerator = createServiceIdentifier<IAlternativeNotebookContentEditGenerator>('IAlternativeNotebookContentEditGenerator');
39
export interface IAlternativeNotebookContentEditGenerator {
40
readonly _serviceBrand: undefined;
41
generateNotebookEdits(notebookOrUri: NotebookDocument | Uri, lines: AsyncIterable<LineOfText> | string, telemetryOptions: NotebookEditGenerationTelemtryOptions | undefined, token: CancellationToken): AsyncIterable<NotebookEdit | [Uri, TextEdit[]]>;
42
}
43
44
export class AlternativeNotebookContentEditGenerator implements IAlternativeNotebookContentEditGenerator {
45
declare readonly _serviceBrand: undefined;
46
constructor(
47
@IAlternativeNotebookContentService private readonly alternativeContentService: IAlternativeNotebookContentService,
48
@IDiffService private readonly diffService: IDiffService,
49
@ILogService private readonly logger: ILogService,
50
@ITelemetryService private readonly telemetryService: ITelemetryService,
51
) {
52
}
53
54
private getFormat(firstLine: string): AlternativeContentFormat {
55
// if the source starts with `{` or `[`, then its a JSON string,
56
// If it starts with `<`, then its an XML string, else text
57
// Trim, as we want to ensure we remove any leading/trailing whitespace (e.g. its possible there's empty space between the fence and the content)
58
const firstChar = firstLine.trim().substring(0, 1);
59
const format = firstChar === '{' ? 'json' : firstChar === '<' ? 'xml' : 'text';
60
return format;
61
}
62
63
/**
64
* Given a NotebookDocument or Uri, and a cell kind, return the EOL for the new cell.
65
* If the notebook is empty, then return the default EOL.
66
* Else default to the EOL of the first cell of the given kind.
67
* This way we have a consistent EOL for new cells (matching existing cells).
68
*/
69
private getEOLForNewCell(notebookOrUri: NotebookDocument | Uri, cellKind: NotebookCellKind): string | undefined {
70
const eolInExistingCodeCell = isUri(notebookOrUri) ? undefined : (notebookOrUri.getCells().find(c => c.kind === cellKind)?.document.eol ?? undefined);
71
return eolInExistingCodeCell ? eolInExistingCodeCell === EndOfLine.LF ? '\n' : '\r\n' : EOL;
72
}
73
74
/**
75
* Given a stream of lines for the alternative content, generate the corresponding edits to apply to the notebook document.
76
* We accept a NotebookDocument or a Uri.
77
* This is because its possible the Notebook may not have been created/loaded as of yet.
78
* I.e. for new Notebooks, we can emity the Insert Cell Edits without the notebook being created.
79
*/
80
public async *generateNotebookEdits(notebookOrUri: NotebookDocument | Uri, lines: AsyncIterable<LineOfText> | string, telemetryOptions: NotebookEditGenerationTelemtryOptions | undefined, token: CancellationToken): AsyncIterable<NotebookEdit | [Uri, TextEdit[]]> {
81
lines = typeof lines === 'string' ? textToAsyncIterableLines(lines) : lines;
82
const firstNonEmptyLinePromise = new DeferredPromise<LineOfText>();
83
lines = readFirstNonEmptyLineAndKeepStreaming(lines, firstNonEmptyLinePromise);
84
const firstNonEmptyLine = (await firstNonEmptyLinePromise.p).value;
85
const format = this.getFormat(firstNonEmptyLine);
86
87
// Sometimes llm hallucinates with jupytext format, and doesn't send the cell markers.
88
// Instead just sends plain python code.
89
// In such cases, if no new cells were emitted, then emit a new cell with the contents of the entire plain python code.
90
const linesCollected: string[] = [];
91
lines = collectWhileStreaming(lines, linesCollected);
92
const isEmptyNotebook = isUri(notebookOrUri) || notebookOrUri.cellCount === 0;
93
94
let notebookEditEmitted = false;
95
let cellTextEditEmitted = false;
96
for await (const edit of this.generateNotebookEditsImpl(notebookOrUri, lines, format, token)) {
97
notebookEditEmitted = notebookEditEmitted || !Array.isArray(edit);
98
if (Array.isArray(edit)) {
99
cellTextEditEmitted = true;
100
}
101
yield edit;
102
}
103
104
if (isEmptyNotebook || !isUri(notebookOrUri)) {
105
if (!notebookEditEmitted && format === 'text' && linesCollected.length && !lineMightHaveCellMarker(firstNonEmptyLine)) {
106
const uri = isUri(notebookOrUri) ? notebookOrUri : notebookOrUri.uri;
107
if (isJupyterNotebookUri(uri)) {
108
const eolForNewCell = this.getEOLForNewCell(notebookOrUri, NotebookCellKind.Code);
109
const cellData = new NotebookCellData(NotebookCellKind.Code, linesCollected.join(eolForNewCell), 'python');
110
yield NotebookEdit.insertCells(0, [cellData]);
111
this.logger.info(`No new cells were emitted for ${uri.toString()}. Emitting a new cell with the contents of the code.`);
112
} else {
113
this.logger.warn(`No new cells were emitted for ${uri.toString()}`);
114
}
115
}
116
}
117
118
(async () => {
119
const model = await Promise.resolve(telemetryOptions?.model).catch(() => undefined);
120
/* __GDPR__
121
"notebook.editGeneration" : {
122
"owner": "donjayamanne",
123
"comment": "Metadata about the code mapper request",
124
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The id of the current request turn." },
125
"requestSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The source from where the request was made" },
126
"model": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Model selection for the response" },
127
"inputFormat": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Input format for the notebook source (xml, json, text)" },
128
"isEmptyNotebook": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether the notebook is empty", "isMeasurement": true },
129
"isNotebookOrUri": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether we're given a notebook or just a uri (1 = Notebook, 0 = Uri)", "isMeasurement": true },
130
"isJupyterNotebookUri": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether we're given a Jupyter notebook or just a uri (1 = Jupyter Notebook, 0 = Other)", "isMeasurement": true },
131
"isEditEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether a Notebook edit was emitted (insert or delete cell) (1 = Yes, 0 = No)", "isMeasurement": true },
132
"isCellTextEditEmitted": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Whether an edit was emitted for a cell (1 = Yes, 0 = No)", "isMeasurement": true },
133
"sourceLength": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of lines in the source code from which we're to generate edits", "isMeasurement": true }
134
}
135
*/
136
this.telemetryService.sendMSFTTelemetryEvent('notebook.editGeneration', {
137
requestId: telemetryOptions?.requestId,
138
requestSource: telemetryOptions?.source,
139
model,
140
inputFormat: format
141
}, {
142
isEmptyNotebook: isEmptyNotebook ? 1 : 0,
143
isNotebookOrUri: isUri(notebookOrUri) ? 0 : 1,
144
isJupyterNotebookUri: isJupyterNotebookUri(isUri(notebookOrUri) ? notebookOrUri : notebookOrUri.uri) ? 1 : 0,
145
isEditEmitted: notebookEditEmitted ? 1 : 0,
146
isCellTextEditEmitted: cellTextEditEmitted ? 1 : 0,
147
sourceLength: linesCollected.length
148
});
149
})();
150
}
151
152
public async *generateNotebookEditsImpl(notebookOrUri: NotebookDocument | Uri, lines: AsyncIterable<LineOfText>, format: AlternativeContentFormat, token: CancellationToken): AsyncIterable<NotebookEdit | [Uri, TextEdit[]]> {
153
const provider = this.alternativeContentService.create(format);
154
const isEmptyNotebook = isUri(notebookOrUri) || notebookOrUri.cellCount === 0;
155
const isNotebookAvailable = !isUri(notebookOrUri);
156
const cellIdMap = isNotebookAvailable ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();
157
158
const cellInfo: { index: number; language: string; cell?: NotebookCell; lines: string[]; insertEdit?: NotebookEdit; ended: boolean } = {
159
index: -1,
160
lines: [],
161
language: 'markdown',
162
ended: false
163
};
164
165
const cellsSeen = new WeakSet<NotebookCell>();
166
function getCellIdOfNewCell(cell: ExpectedCellInfo): string {
167
const hash = new StringSHA1();
168
hash.update(cell.index.toString());
169
return hash.digest().substring(0, 8);
170
}
171
172
173
// This tracks the order and content of the cells as they are expected to be in the notebook.
174
type ExpectedCellInfo = { index: number; cell?: NotebookCell; lines: string[]; language: string };
175
const expectedCells: ExpectedCellInfo[] = [];
176
const original: { id: string; uri?: Uri }[] = isUri(notebookOrUri) ? [] : notebookOrUri.getCells().map(cell => ({ id: getCellId(cell), uri: cell.document.uri }));
177
const allLines: string[] = [];
178
lines = collectWhileStreaming(lines, allLines);
179
let editsEmitted = false;
180
for await (const line of provider.parseAlternateContent(notebookOrUri, lines, token)) {
181
if (token.isCancellationRequested) {
182
break;
183
}
184
if (line.type === 'start') {
185
const expectedCell: ExpectedCellInfo = {
186
index: line.index,
187
language: line.language || 'markdown',
188
lines: [],
189
cell: line.id ? cellIdMap.get(line.id) : undefined
190
};
191
expectedCells.push(expectedCell);
192
cellInfo.ended = false;
193
cellInfo.insertEdit = undefined;
194
cellInfo.index = expectedCell.index;
195
cellInfo.lines = expectedCell.lines;
196
cellInfo.language = expectedCell.language;
197
cellInfo.cell = expectedCell.cell;
198
if (cellInfo.cell) {
199
cellsSeen.add(cellInfo.cell);
200
}
201
} else if (line.type === 'end') {
202
cellInfo.ended = true;
203
const doc = cellInfo.cell?.document;
204
if (!cellInfo.insertEdit && !cellInfo.cell && !cellInfo.lines.length) {
205
// This is a case where we have an empty cell.
206
// We do not get the line at all, but we only have a start and end,
207
// Meaning it is a cell, and it is well structured, but its empty.
208
const cellData = new NotebookCellData(cellInfo.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code, '', cellInfo.language);
209
const insertEdit = NotebookEdit.insertCells(cellInfo.index, [cellData]);
210
yield insertEdit;
211
editsEmitted = true;
212
original.splice(cellInfo.index, 0, { id: getCellIdOfNewCell(cellInfo) });
213
} else if (cellInfo.insertEdit && !cellInfo.cell) {
214
// Possible we got a cell from LLM that doesn't have an id, but matches the content of an existing cell.
215
// This can happen as follows:
216
// 1. User asks LLM to insert a cell
217
// 2. LLM returns a edit request to insert the cell without the cell id
218
// 3. We insert the cell
219
// 4. User asks for some other changes,
220
// 5. LLM uses history and see that the cell in history that doestn' have an id
221
// 6. LLM returns this same cell again along with other cells (new/changes, etc)
222
// 7. Some how SD endpoint cannot figure out this is the same cell, and SD returns this cell but without the id
223
// 8. Now we see this cell without an id, we insert it and we delete the old cell that was in this place.
224
// Solution: If the cell being inserted is the same as the cell that is already in the notebook in the same position, then don't insert it.
225
const existingCell = (!isEmptyNotebook && isNotebookAvailable && cellInfo.index < notebookOrUri.cellCount) ? notebookOrUri.cellAt(cellInfo.index) : undefined;
226
if (existingCell && existingCell.document.getText() === cellInfo.insertEdit.newCells[0].value) {
227
// Emit the edits for this cell.
228
// & do not insert this cell.
229
cellsSeen.add(existingCell);
230
expectedCells[expectedCells.length - 1].cell = existingCell;
231
232
// Remit the edits for all the lines of this existing cell.
233
const doc = existingCell.document;
234
for (let i = 0; i < doc.lineCount; i++) {
235
const line = doc.lineAt(i);
236
yield [doc.uri, [new TextEdit(new Range(i, 0, i, Constants.MAX_SAFE_SMALL_INTEGER), line.text)]];
237
editsEmitted = true;
238
}
239
} else {
240
yield cellInfo.insertEdit;
241
editsEmitted = true;
242
original.splice(cellInfo.index, 0, { id: getCellIdOfNewCell(cellInfo) });
243
}
244
} else if (cellInfo.lines.length && doc && cellInfo.lines.length < doc.lineCount) {
245
const range = new Range(cellInfo.lines.length - 1, cellInfo.lines.slice(-1)[0].length, doc.lineCount - 1, doc.lineAt(doc.lineCount - 1).text.length);
246
yield [doc.uri, [new TextEdit(range, '')]];
247
}
248
} else if (line.type === 'line' && !cellInfo.ended) {
249
cellInfo.lines.push(line.line);
250
if (cellInfo.cell) {
251
if (cellInfo.lines.length > cellInfo.cell.document.lineCount) {
252
const range = new Range(cellInfo.lines.length - 1, 0, cellInfo.lines.length - 1, 0);
253
const eol = cellInfo.cell.document.eol === EndOfLine.LF ? '\n' : '\r\n';
254
const newText = `${eol}${line.line}`;
255
yield [cellInfo.cell.document.uri, [new TextEdit(range, newText)]];
256
} else {
257
const lineIndex = cellInfo.lines.length - 1;
258
yield [cellInfo.cell.document.uri, [new TextEdit(new Range(lineIndex, 0, lineIndex, Constants.MAX_SAFE_SMALL_INTEGER), line.line)]];
259
}
260
editsEmitted = true;
261
} else if (cellInfo.insertEdit) {
262
const eolForNewCell = this.getEOLForNewCell(notebookOrUri, cellInfo.insertEdit.newCells[0].kind);
263
cellInfo.insertEdit.newCells[0].value = cellInfo.lines.join(eolForNewCell);
264
} else {
265
// Insert the new cell.
266
const cellData = new NotebookCellData(cellInfo.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code, line.line, cellInfo.language);
267
cellInfo.insertEdit = NotebookEdit.insertCells(cellInfo.index, [cellData]);
268
}
269
}
270
}
271
272
if (isEmptyNotebook || !isNotebookAvailable) {
273
return;
274
}
275
276
// If we have content in the original notebook and no edits were emitted,
277
// But we have some content,
278
// This this can mean only one thing = invalid format.
279
// If the format is correct, then we should have emitted some edits.
280
// If we don't exit here we end up deleting all the cells in the notebook.
281
if (!editsEmitted && allLines.length) {
282
this.logger.warn(`No edits generated for notebook ${notebookOrUri.uri.toString()}. This is likely due to an invalid format. Expected format: ${format}. Provided content as follows:\n\n${allLines.join('\n')}`);
283
return;
284
}
285
286
const modified = expectedCells.map(cell => cell.cell ? getCellId(cell.cell) : getCellIdOfNewCell(cell));
287
288
// Delete the missing cells.
289
for (const missingCell of original.filter(cell => cell.uri && !modified.includes(cell.id)).reverse()) {
290
const cell = cellIdMap.get(missingCell.id);
291
if (cell) {
292
const index = original.indexOf(missingCell);
293
yield NotebookEdit.deleteCells(new NotebookRange(index, index + 1));
294
original.splice(index, 1);
295
}
296
}
297
298
const result = await this.diffService.computeDiff(original.map(c => c.id).join(EOL), modified.join(EOL), { computeMoves: false, ignoreTrimWhitespace: true, maxComputationTimeMs: 5_000 });
299
const diffResult = computeDiff(original.map(i => i.id), modified, result.changes);
300
301
if (diffResult.every(d => d.type === 'unchanged')) {
302
return;
303
}
304
305
// Delete items
306
for (const change of diffResult.filter(d => d.type === 'delete').reverse()) {
307
yield NotebookEdit.deleteCells(new NotebookRange(change.originalCellIndex, change.originalCellIndex + 1));
308
}
309
310
// insert items
311
for (const change of diffResult.filter(d => d.type === 'insert')) {
312
const expectedCell = expectedCells[change.modifiedCellIndex];
313
const kind = expectedCell.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code;
314
const eolForNewCell = this.getEOLForNewCell(notebookOrUri, kind);
315
const source = expectedCell.lines.join(eolForNewCell);
316
const cellData = new NotebookCellData(kind, source, expectedCell.language);
317
yield NotebookEdit.insertCells(expectedCell.index, [cellData]);
318
}
319
}
320
321
}
322
323
export function textToAsyncIterableLines(text: string): AsyncIterable<LineOfText> {
324
const source = new AsyncIterableSource<string>();
325
source.emitOne(text);
326
source.resolve();
327
return streamLines(source.asyncIterable);
328
}
329
330
331
/**
332
* Split an incoming stream of text to a stream of lines.
333
*/
334
function streamLines(source: AsyncIterable<string>): AsyncIterableObject<LineOfText> {
335
return new AsyncIterableObject<LineOfText>(async (emitter) => {
336
let buffer = '';
337
for await (const str of source) {
338
buffer += str;
339
do {
340
const newlineIndex = buffer.indexOf('\n');
341
if (newlineIndex === -1) {
342
break;
343
}
344
345
// take the first line
346
const line = buffer.substring(0, newlineIndex);
347
buffer = buffer.substring(newlineIndex + 1);
348
349
emitter.emitOne(new LineOfText(line));
350
} while (true);
351
}
352
353
if (buffer.length > 0) {
354
// last line which doesn't end with \n
355
emitter.emitOne(new LineOfText(buffer));
356
}
357
});
358
}
359
360
361
function readFirstNonEmptyLineAndKeepStreaming(source: AsyncIterable<LineOfText>, firstNonEmptyLine: DeferredPromise<LineOfText>): AsyncIterable<LineOfText> {
362
return new AsyncIterableObject<LineOfText>(async (emitter) => {
363
for await (const line of source) {
364
if (!firstNonEmptyLine.isSettled && line.value.trim().length) {
365
firstNonEmptyLine.complete(line);
366
}
367
emitter.emitOne(line);
368
}
369
if (!firstNonEmptyLine.isSettled) {
370
firstNonEmptyLine.complete(new LineOfText(''));
371
}
372
});
373
}
374
375
function collectWhileStreaming(source: AsyncIterable<LineOfText>, lines: string[]): AsyncIterable<LineOfText> {
376
return new AsyncIterableObject<LineOfText>(async (emitter) => {
377
for await (const line of source) {
378
lines.push(line.value);
379
emitter.emitOne(line);
380
}
381
});
382
}
383