Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.text.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import type { CancellationToken, NotebookCell, NotebookDocument, Position, Uri } from 'vscode';
6
import { getLanguage } from '../../../util/common/languages';
7
import { isUri } from '../../../util/common/types';
8
import { findLast } from '../../../util/vs/base/common/arraysFind';
9
import { EndOfLine, NotebookCellKind } from '../../../vscodeTypes';
10
import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';
11
import { AlternativeNotebookDocument } from './alternativeNotebookDocument';
12
import { EOL, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize, SummaryCell } from './helpers';
13
14
export function generateCellTextMarker(cell: SummaryCell, lineComment: string): string {
15
const cellIdStr = cell.id ? `[id=${cell.id}] ` : '';
16
return `${lineComment}%% vscode.cell ${cellIdStr}[language=${cell.language}]`;
17
}
18
19
export function lineMightHaveCellMarker(line: string) {
20
return line.toLowerCase().includes('vscode.cell');
21
}
22
23
class AlternativeTextDocument extends AlternativeNotebookDocument {
24
constructor(text: string, private readonly cellOffsetMap: { offset: number; sourceOffset: number; cell: NotebookCell }[], notebook: NotebookDocument) {
25
super(text, notebook);
26
}
27
28
override fromCellPosition(cell: NotebookCell, position: Position): Position {
29
const cellSummary = summarize(cell);
30
const lineCommentStart = getLineCommentStart(this.notebook);
31
const cellMarker = generateCellTextMarker(cellSummary, lineCommentStart);
32
33
const eolLength = cell.document.eol === EndOfLine.LF ? 1 : 2;
34
const blockComment = getBlockComment(this.notebook);
35
const alternativeContentText = this.getText();
36
const offsetInCell = cell.document.offsetAt(position);
37
const markdownOffset = cell.kind === NotebookCellKind.Markup ? blockComment[0].length + eolLength : 0;
38
const offset = alternativeContentText.indexOf(cellMarker) + cellMarker.length + eolLength + markdownOffset + offsetInCell;
39
return this.positionAt(offset);
40
}
41
42
override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {
43
const offset = this.offsetAt(position);
44
const cell = findLast(this.cellOffsetMap, (cell) => cell.sourceOffset <= offset);
45
if (!cell) {
46
return undefined;
47
}
48
const cellPosition = cell.cell.document.positionAt(offset - cell.sourceOffset);
49
return { cell: cell.cell, position: cellPosition };
50
}
51
}
52
53
54
export class AlternativeTextNotebookContentProvider extends BaseAlternativeNotebookContentProvider {
55
constructor() {
56
super('text');
57
}
58
59
public stripCellMarkers(text: string): string {
60
const lines = text.split(EOL);
61
if (lines.length && lineMightHaveCellMarker(lines[0])) {
62
lines.shift();
63
return lines.join(EOL);
64
} else {
65
return text;
66
}
67
}
68
69
public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {
70
const blockComment = getBlockComment(notebook);
71
const lineCommentStart = getLineCommentStart(notebook);
72
const existingCodeMarkerWithComment = `${lineCommentStart} ${existingCodeMarker}`;
73
const lines: string[] = [];
74
notebook.getCells().forEach((cell) => {
75
if (cellsToInclude.includes(cell)) {
76
const cellSummary = summarize(cell);
77
if (cellSummary.source.length && cellSummary.source[0].trim().length) {
78
cellSummary.source = [cellSummary.source[0], existingCodeMarkerWithComment];
79
} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {
80
cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];
81
} else {
82
cellSummary.source = [existingCodeMarkerWithComment];
83
}
84
lines.push(generateAlternativeCellTextContent(cellSummary, lineCommentStart, blockComment).content);
85
} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {
86
lines.push(existingCodeMarkerWithComment);
87
}
88
});
89
return lines.join(EOL);
90
}
91
92
93
public override async *parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {
94
const isNotebook = !isUri(notebookOrUri);
95
const cellIdMap = isNotebook ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();
96
97
let inMarkdownCell = false;
98
let isInTripleQuotes = false;
99
let pendingTripleQuotes = false;
100
let emittedStart = false;
101
let cellIndex = -1;
102
103
const lineCommentStart = getLineCommentStart(isNotebook ? notebookOrUri : undefined);
104
const blockComment = getBlockComment(isNotebook ? notebookOrUri : undefined);
105
const defaultLanguage = isNotebook ? getLanguage(getDefaultLanguage(notebookOrUri)).languageId : undefined;
106
const cellIdsSeen = new Set<string>();
107
for await (const lineOfText of inputStream) {
108
if (token.isCancellationRequested) {
109
break;
110
}
111
const line = lineOfText.value;
112
113
// Check for new cell delimiter
114
// Sometimes LLM returns cells without the `vscode.cell` marker such as .
115
const isLineCommentForEmptyCellWithoutCellMarker = line.startsWith(`${lineCommentStart}%% [`) && line.trimEnd().endsWith(']');
116
const isLineCommentWithCellMarker = line.startsWith(`${lineCommentStart}%% vscode.cell`);
117
// Attempt to extract only if we think we have a cell marker, else we end up doing this for every single line and thats expensive.
118
const cellParts = (isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) ? extractCellParts(line, defaultLanguage) : undefined;
119
if ((isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) && cellParts?.language) {
120
if (pendingTripleQuotes) {
121
pendingTripleQuotes = false;
122
}
123
const lineOfCellText: LineOfCellText & { emitted: Boolean } = { index: -1, uri: undefined, language: undefined, kind: NotebookCellKind.Code, emitted: false, type: 'start' };
124
lineOfCellText.index = cellIndex += 1;
125
lineOfCellText.emitted = false;
126
// LLM returns duplicate cell with the same id.
127
if (cellParts.id && cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language) {
128
if (cellIdsSeen.has(cellParts.id)) {
129
cellParts.id = '';
130
} else {
131
cellIdsSeen.add(cellParts.id);
132
}
133
} else {
134
// Possible duplicate cell with the same id but different language.
135
// In such cases, treat them as new cells.
136
cellParts.id = '';
137
}
138
139
const cell = cellIdMap.get(cellParts.id);
140
lineOfCellText.id = cellParts.id;
141
lineOfCellText.language = cellParts.language;
142
lineOfCellText.uri = cell?.document.uri;
143
lineOfCellText.kind = cell?.kind || (lineOfCellText.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);
144
inMarkdownCell = lineOfCellText.language === 'markdown';
145
isInTripleQuotes = false;
146
147
if (emittedStart) {
148
yield { index: cellIndex - 1, type: 'end' };
149
}
150
151
emittedStart = true;
152
yield lineOfCellText;
153
continue;
154
}
155
156
if (!emittedStart) {
157
continue;
158
}
159
if (inMarkdownCell) {
160
if (!isInTripleQuotes) {
161
// Look for the opening triple quotes
162
if (line === blockComment[0]) {
163
isInTripleQuotes = true;
164
} else {
165
// lineEmitted = true;
166
yield { index: cellIndex, line, type: 'line' };
167
}
168
} else {
169
// We are in triple quotes
170
if (line === blockComment[1]) {
171
// Closing triple quotes found
172
isInTripleQuotes = false;
173
pendingTripleQuotes = true;
174
} else {
175
yield { index: cellIndex, line, type: 'line' };
176
}
177
}
178
} else {
179
// Non-markdown cell or default
180
yield { index: cellIndex, line, type: 'line' };
181
}
182
}
183
184
if (emittedStart) {
185
yield { index: cellIndex, type: 'end' };
186
}
187
}
188
189
public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {
190
const blockComment = getBlockComment(notebook);
191
const lineCommentStart = getLineCommentStart(notebook);
192
const cellIdMap = getCellIdMap(notebook);
193
const cellOffsetMap: { offset: number; sourceOffset: number; cell: NotebookCell }[] = [];
194
195
// Parse the text to find cell markers and build the offset map
196
const lines = text.split(EOL);
197
let currentOffset = 0;
198
199
for (let i = 0; i < lines.length; i++) {
200
const line = lines[i];
201
const isLineCommentForEmptyCellWithoutCellMarker = line.startsWith(`${lineCommentStart}%% [`) && line.trimEnd().endsWith(']');
202
const isLineCommentWithCellMarker = line.startsWith(`${lineCommentStart}%% vscode.cell`);
203
204
if (isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) {
205
const cellParts = extractCellParts(line, undefined);
206
if (cellParts) {
207
const cell = cellIdMap.get(cellParts.id) || notebook.getCells().find(c =>
208
c.document.languageId === cellParts.language &&
209
!cellOffsetMap.some(entry => entry.cell === c)
210
);
211
212
if (cell) {
213
const offset = currentOffset;
214
// Calculate sourceOffset: skip the cell marker line and any markdown block comment start
215
const eolLength = EOL.length;
216
const isMarkdown = cellParts.language === 'markdown';
217
const sourceOffset = offset + line.length + eolLength + (isMarkdown ? blockComment[0].length + eolLength : 0);
218
219
cellOffsetMap.push({ offset, sourceOffset, cell });
220
}
221
}
222
}
223
224
currentOffset += line.length + EOL.length;
225
}
226
227
return new AlternativeTextDocument(text, cellOffsetMap, notebook);
228
}
229
230
public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {
231
const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => summarize(cell));
232
const blockComment = getBlockComment(notebook);
233
const lineCommentStart = getLineCommentStart(notebook);
234
const cellContent = cells.map(cell => ({ ...generateAlternativeCellTextContent(cell, lineCommentStart, blockComment), cell: notebook.cellAt(cell.index) }));
235
const content = cellContent.map(cell => cell.content).join(EOL);
236
const cellOffsetMap = cellContent.map(cellContent => {
237
const offset = content.indexOf(cellContent.content);
238
const sourceOffset = offset + cellContent.prefix.length;
239
return { offset, sourceOffset, cell: notebook.cellAt(cellContent.cell.index) };
240
});
241
242
return new AlternativeTextDocument(content, cellOffsetMap, notebook);
243
}
244
245
}
246
247
function generateAlternativeCellTextContent(cell: SummaryCell, lineCommentStart: string, blockComment: [string, string]): { content: string; prefix: string } {
248
const cellMarker = generateCellTextMarker(cell, lineCommentStart);
249
const src = cell.source.join(EOL);
250
const prefix = cell.language === 'markdown' ? `${cellMarker}${EOL}${blockComment[0]}${EOL}` : `${cellMarker}${EOL}`;
251
const content = cell.language === 'markdown'
252
? `${prefix}${src}${EOL}${blockComment[1]}`
253
: `${prefix}${src}`;
254
return { content, prefix };
255
}
256
257
export function getBlockComment(notebook?: NotebookDocument): [string, string] {
258
if (!notebook) {
259
return ['"""', '"""'];
260
}
261
const language = getLanguage(getDefaultLanguage(notebook));
262
return language.blockComment ?? ['```', '```'];
263
}
264
265
export function getLineCommentStart(notebook?: NotebookDocument): string {
266
if (!notebook) {
267
return '#';
268
}
269
const language = getLanguage(getDefaultLanguage(notebook));
270
return language.lineComment.start || '#';
271
}
272
273
function extractCellParts(line: string, defaultLanguage: string | undefined): { id: string; language: string } | undefined {
274
const idMatch = line.match(/\[id=(.+?)\]/);
275
const languageMatch = line.match(/\[language=(.+?)\]/);
276
if (!languageMatch) {
277
if (lineMightHaveCellMarker(line) && typeof defaultLanguage === 'string') {
278
// If we have a cell marker but no language, we assume the default language.
279
return { id: idMatch ? idMatch[1].trim() : '', language: defaultLanguage };
280
}
281
return;
282
}
283
return { id: idMatch ? idMatch[1].trim() : '', language: languageMatch[1].trim() };
284
}
285
286