Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.json.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import { visit } from 'jsonc-parser';
6
import type { CancellationToken, NotebookCell, NotebookDocument, TextDocument } from 'vscode';
7
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
8
import { URI } from '../../../util/vs/base/common/uri';
9
import { NotebookCellKind, Position, Range, Uri } from '../../../vscodeTypes';
10
import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';
11
import { AlternativeNotebookDocument } from './alternativeNotebookDocument';
12
import { EOL, getCellId, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize } from './helpers';
13
14
const IndentSize = 4;
15
16
export function isJsonContent(text: string): boolean {
17
if (text.startsWith('{') || text.trim().startsWith('{')) {
18
return true;
19
}
20
21
if ((text.includes('{') || text.includes('}')) && text.includes('"source":') && text.includes('"cell_type":')) {
22
return true;
23
}
24
return false;
25
}
26
27
class AlternativeJsonDocument extends AlternativeNotebookDocument {
28
override fromCellPosition(cell: NotebookCell, position: Position): Position {
29
const cellId = getCellId(cell);
30
31
const alternativeContentText = this.getText();
32
const sourcePrefix = ` `; // we know we're indented by 4 spaces and source is 3 levels deep
33
const cellMarker = `"id": "${cellId}",`;
34
const positionOfSource = alternativeContentText.indexOf(`"source": [`, alternativeContentText.indexOf(cellMarker));
35
36
// Assume the text in the line is `print("Hello World")`
37
// & the position is the white space before `World`
38
// Position = line = n, character = 12
39
// In Json this would be ` "print(\"Hello World\")"`
40
// That would be translated as character position in translated = ` "print(\"Hello`.length;
41
const firstLineIndexOfCellSource = this.positionAt(positionOfSource).line + 1;
42
const leadingCharacters = cell.document.getText(new Range(position.line, 0, position.line, position.character));
43
// -1 to exclude to trailing `"`
44
const characterPositionInAltContent = `${sourcePrefix}${JSON.stringify(leadingCharacters).slice(0, -1)}`;
45
const linePositionInAltContent = position.line + firstLineIndexOfCellSource;
46
// -1 to exclude to trailing `"`
47
return new Position(linePositionInAltContent, characterPositionInAltContent.length);
48
}
49
override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {
50
throw new Error('Method not implemented.');
51
}
52
}
53
54
export class AlternativeJsonNotebookContentProvider extends BaseAlternativeNotebookContentProvider {
55
constructor() {
56
super('json');
57
}
58
59
public stripCellMarkers(text: string): string {
60
return text;
61
}
62
63
public override parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {
64
return this.parseAlternateContentImpl(notebookOrUri, inputStream, token);
65
}
66
67
public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {
68
return new AlternativeJsonDocument(text, notebook);
69
}
70
71
public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {
72
const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => {
73
const summary = summarize(cell);
74
const source = getCellCode(cell.document);
75
76
return {
77
cell_type: summary.cell_type,
78
id: summary.id,
79
metadata: {
80
language: summary.language
81
},
82
source,
83
} satisfies SummaryCell;
84
});
85
86
const json: Notebook = { cells };
87
const text = JSON.stringify(json, undefined, IndentSize);
88
89
return new AlternativeJsonDocument(text, notebook);
90
}
91
92
public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {
93
const lines = ['{', ' "cells: ['];
94
const existingCodeMarkerWithComment = `// ${existingCodeMarker}`;
95
notebook.getCells().forEach((cell) => {
96
if (cellsToInclude.includes(cell)) {
97
const cellSummary = summarize(cell);
98
if (cellSummary.source.length && cellSummary.source[0].trim().length) {
99
cellSummary.source = [cellSummary.source[0], existingCodeMarkerWithComment];
100
} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {
101
cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];
102
} else {
103
cellSummary.source = [existingCodeMarkerWithComment];
104
}
105
const summary = JSON.stringify(cellSummary, undefined, IndentSize).split(/\r?\n/).map(line => ` ${line}`);
106
lines.push(...summary);
107
lines.push(',');
108
} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {
109
lines.push(existingCodeMarkerWithComment);
110
}
111
});
112
lines.push(` ]`);
113
lines.push(`}`);
114
return lines.join(EOL);
115
}
116
117
private parseAlternateContentImpl(notebookOrUri: NotebookDocument | URI, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {
118
return new AsyncIterableObject<LineOfCellText>(async (emitter) => {
119
const cellIdMap = URI.isUri(notebookOrUri) ? new Map<string, NotebookCell>() : getCellIdMap(notebookOrUri);
120
const cellIdsSeen = new Set<string>();
121
let jsonText = '';
122
let lastSeenOffset = -1;
123
const cellInfo: { id?: string; index: number; kind: NotebookCellKind; source: string[]; uri?: Uri; language?: string; startOffset: number; endOffset: number } = {
124
index: -1,
125
startOffset: -1,
126
endOffset: -1,
127
kind: NotebookCellKind.Code,
128
source: [],
129
};
130
const defaultLanguage = URI.isUri(notebookOrUri) ? 'python' : getDefaultLanguage(notebookOrUri);
131
132
const emitCell = (endOffset: number) => {
133
// LLM can return duplicate cell with the same id.
134
cellInfo.language = cellInfo.language || defaultLanguage;
135
if (cellInfo.id && cellIdMap.get(cellInfo.id)?.document.languageId === cellInfo.language) {
136
if (cellIdsSeen.has(cellInfo.id)) {
137
cellInfo.id = '';
138
} else {
139
cellIdsSeen.add(cellInfo.id);
140
}
141
} else {
142
// Possible duplicate cell with the same id but different language.
143
// In such cases, treat them as new cells.
144
cellInfo.id = '';
145
}
146
const cell = cellIdMap.get(cellInfo.id);
147
cellInfo.uri = cell?.document.uri;
148
cellInfo.kind = cell?.kind || (cellInfo.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);
149
150
emitter.emitOne({ index: cellInfo.index, type: 'start', kind: cellInfo.kind, language: cellInfo.language, uri: cellInfo.uri, id: cellInfo.id });
151
cellInfo.source.forEach(cellLine => emitter.emitOne({ index: cellInfo.index, type: 'line', line: cellLine }));
152
emitter.emitOne({ index: cellInfo.index, type: 'end' });
153
};
154
155
let finalOffset = 0;
156
for await (const lineOfText of inputStream) {
157
if (token.isCancellationRequested) {
158
break;
159
}
160
const line = lineOfText.value;
161
162
163
jsonText += line;
164
165
visit(jsonText, {
166
onObjectEnd(offset, _length, _startLine, _startCharacter) {
167
finalOffset = offset;
168
},
169
onLiteralValue: (value, offset, _length, _startLine, _startCharacter, pathSupplier) => {
170
if (lastSeenOffset >= offset) {
171
return;
172
}
173
const segments = pathSupplier();
174
if (segments.length < 2) {
175
return;
176
}
177
if (segments.shift() !== 'cells') {
178
return;
179
}
180
const cellIndex = segments.shift();
181
if (typeof cellIndex !== 'number') {
182
return;
183
}
184
185
const property = segments.shift()! as string;
186
lastSeenOffset = offset;
187
188
if (cellInfo.index !== -1 && cellInfo.index !== cellIndex) {
189
emitCell(offset);
190
191
cellInfo.startOffset = offset;
192
cellInfo.id = undefined;
193
cellInfo.kind = NotebookCellKind.Code;
194
cellInfo.source = [];
195
cellInfo.uri = undefined;
196
cellInfo.language = undefined;
197
}
198
199
cellInfo.index = cellIndex;
200
201
if (property === 'cell_type') {
202
cellInfo.kind = value === 'code' ? NotebookCellKind.Code : NotebookCellKind.Markup;
203
if (cellInfo.kind === NotebookCellKind.Markup) {
204
cellInfo.language = 'markdown';
205
}
206
} else if (property === 'id') {
207
// This is for scenarios when LLM sends the id as part of the cell instead of metdata.
208
cellInfo.id = value;
209
} else if (property === 'metadata' && segments[0] === 'id') {
210
cellInfo.id = value;
211
} else if (property === 'metadata' && segments[0] === 'language') {
212
cellInfo.language = value;
213
if (cellInfo.language === 'markdown') {
214
cellInfo.kind = NotebookCellKind.Markup;
215
}
216
} else if (property === 'source' && segments.length && typeof segments[0] === 'number') {
217
if (segments[0] === 0) {
218
cellInfo.startOffset = offset;
219
}
220
let code = typeof value === 'string' ? value : `${value || ''}`;
221
// Generally code in jupyter cells always end with `\n` when persisted in JSON file.
222
// However we do not want to deal with the \n as we're only interested in the lines of code.
223
// This is because we're going to edit a line at a time, new line means we have two lines to edit, but thats not possible.
224
// A line cannot contain new line in editor (then its just two lines in editor).
225
if (code.endsWith('\n')) {
226
code = code.substr(0, code.length - 1);
227
}
228
cellInfo.source.push(code);
229
}
230
}
231
});
232
}
233
234
if (cellInfo.index !== -1) {
235
emitCell(finalOffset);
236
}
237
});
238
}
239
}
240
241
function getCellCode(document: TextDocument): string[] {
242
if (document.lineCount === 0) {
243
return [];
244
}
245
if (document.lineCount === 1) {
246
return [document.lineAt(0).text];
247
}
248
const lineCount = document.lineCount;
249
return new Array(lineCount).fill('').map((_, i) => document.lineAt(i).text);
250
}
251
252
type Notebook = {
253
cells: SummaryCell[];
254
};
255
256
/**
257
* Shortend version of a Jupyter cell JSON.
258
*/
259
type SummaryCell = {
260
cell_type: 'code' | 'markdown';
261
source: string[];
262
id: string;
263
metadata: {
264
language: string;
265
};
266
};
267
268