Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.xml.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import type { CancellationToken, NotebookCell, NotebookDocument, Uri } from 'vscode';
6
import { getLanguage } from '../../../util/common/languages';
7
import { isUri } from '../../../util/common/types';
8
import { findLast } from '../../../util/vs/base/common/arraysFind';
9
import { EndOfLine, NotebookCellKind, Position } from '../../../vscodeTypes';
10
import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';
11
import { AlternativeNotebookDocument } from './alternativeNotebookDocument';
12
import { EOL, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize, SummaryCell } from './helpers';
13
14
const StartDelimter = `<VSCode.Cell `;
15
const StartEmptyCellDelimter = `<VSCode.Cell>`;
16
const EndDelimter = `</VSCode.Cell>`;
17
18
function generatePartialStartDelimiterWithId(id: string) {
19
return `${StartDelimter}id="${id}" `;
20
}
21
22
function generateCellMarker(cell: SummaryCell) {
23
return `${generatePartialStartDelimiterWithId(cell.id)}language="${cell.language}">`;
24
}
25
26
export function isXmlContent(text: string): boolean {
27
return text.includes(StartDelimter) || text.includes(EndDelimter) || text.includes(StartEmptyCellDelimter);
28
}
29
30
31
class AlternativeXmlDocument extends AlternativeNotebookDocument {
32
constructor(text: string, private readonly cellOffsetMap: { offset: number; cell: NotebookCell }[], notebook: NotebookDocument) {
33
super(text, notebook);
34
}
35
36
override fromCellPosition(cell: NotebookCell, position: Position): Position {
37
const cellSummary = summarize(cell);
38
const cellMarker = generateCellMarker(cellSummary);
39
40
const eolLength = cell.document.eol === EndOfLine.LF ? 1 : 2;
41
42
const alternativeContentText = this.getText();
43
const offsetInCell = cell.document.offsetAt(position);
44
const offset = alternativeContentText.indexOf(cellMarker) + cellMarker.length + eolLength + offsetInCell;
45
return this.positionAt(offset);
46
}
47
48
override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {
49
const offset = this.offsetAt(position);
50
const cell = findLast(this.cellOffsetMap, (cell) => cell.offset <= offset);
51
if (!cell) {
52
return undefined;
53
}
54
const cellPosition = cell.cell.document.positionAt(offset - cell.offset);
55
return { cell: cell.cell, position: cellPosition };
56
}
57
}
58
59
export class AlternativeXmlNotebookContentProvider extends BaseAlternativeNotebookContentProvider {
60
constructor() {
61
super('xml');
62
}
63
public stripCellMarkers(text: string): string {
64
const lines = text.split(EOL);
65
if (lines.length && (lines[0].startsWith(StartDelimter) || lines[0].startsWith(StartEmptyCellDelimter))) {
66
lines.shift();
67
}
68
if (lines.length && lines[lines.length - 1].trim().endsWith(EndDelimter)) {
69
lines[lines.length - 1] = lines[lines.length - 1].substring(0, lines[lines.length - 1].lastIndexOf(EndDelimter));
70
}
71
return lines.join(EOL);
72
}
73
74
public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {
75
const lines: string[] = [];
76
const existingCodeMarkerWithComment = `// ${existingCodeMarker}`;
77
notebook.getCells().forEach((cell) => {
78
if (cellsToInclude.includes(cell)) {
79
const cellSummary = summarize(cell);
80
lines.push(generateCellMarker(cellSummary));
81
if (cellSummary.source.length && cellSummary.source[0].trim().length) {
82
lines.push(cellSummary.source[0]);
83
lines.push(existingCodeMarkerWithComment);
84
} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {
85
cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];
86
} else {
87
lines.push(existingCodeMarkerWithComment);
88
}
89
lines.push(EndDelimter);
90
} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {
91
lines.push(existingCodeMarkerWithComment);
92
}
93
});
94
return lines.join(EOL);
95
}
96
97
public async *parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {
98
const isNotebook = !isUri(notebookOrUri);
99
const cellIdMap = isNotebook ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();
100
101
102
let index = -1;
103
let endDelimiterSeen = false;
104
const cellIdsSeen = new Set<string>();
105
let previousLineEndedWithEndCellMarker = false;
106
let previousLine: LineOfCellText | undefined = undefined;
107
const defaultLanguage = isNotebook ? getLanguage(getDefaultLanguage(notebookOrUri)).languageId : undefined;
108
for await (const lineOfText of inputStream) {
109
if (token.isCancellationRequested) {
110
break;
111
}
112
const line = lineOfText.value;
113
if ((line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) && (index < 0 || (endDelimiterSeen || (previousLineEndedWithEndCellMarker && previousLine)))) {
114
if (!endDelimiterSeen && previousLineEndedWithEndCellMarker && previousLine) {
115
// Last line didn't finish, emit that, but strip the end delimiter.
116
previousLine.line = previousLine.line.substring(0, previousLine.line.lastIndexOf(EndDelimter));
117
yield previousLine;
118
yield { type: 'end', index: previousLine.index };
119
}
120
previousLineEndedWithEndCellMarker = false;
121
previousLine = undefined;
122
123
index += 1;
124
endDelimiterSeen = false;
125
const lineOfCellText: LineOfCellText = { type: 'start', index, uri: undefined, language: undefined, kind: NotebookCellKind.Code };
126
const cellParts = extractCellParts(line, defaultLanguage);
127
// LLM returns duplicate cell with the same id.
128
// We need tests for this.
129
// this is a work around to treat subsequent cells as new cells.
130
if (cellParts.id && cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language) {
131
if (cellIdsSeen.has(cellParts.id)) {
132
cellParts.id = '';
133
} else {
134
cellIdsSeen.add(cellParts.id);
135
}
136
} else {
137
// Possible duplicate cell with the same id but different language.
138
// In such cases, treat them as new cells.
139
cellParts.id = '';
140
}
141
const cell = cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language ? cellIdMap.get(cellParts.id) : undefined;
142
lineOfCellText.id = cellParts.id;
143
lineOfCellText.language = cellParts.language;
144
lineOfCellText.uri = cell?.document.uri;
145
lineOfCellText.kind = cell?.kind || (lineOfCellText.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);
146
yield lineOfCellText;
147
} else if (line.startsWith(EndDelimter)) {
148
if (previousLineEndedWithEndCellMarker && previousLine) {
149
// The last line somehow ends with the cell marker (must have been added by the user),
150
// yield the previous line.
151
yield previousLine;
152
}
153
154
endDelimiterSeen = true;
155
previousLineEndedWithEndCellMarker = false;
156
previousLine = undefined;
157
yield { type: 'end', index };
158
} else if (index >= 0) {
159
if (previousLineEndedWithEndCellMarker && previousLine) {
160
// Some how we have two subsequent lines that end with the cell marker,
161
// Weird, shoudl not happen, if it does, yield the previous line.
162
yield previousLine;
163
previousLine = undefined;
164
}
165
previousLineEndedWithEndCellMarker = line.endsWith(EndDelimter);
166
if (previousLineEndedWithEndCellMarker) {
167
previousLine = { type: 'line', index, line };
168
} else {
169
yield { type: 'line', index, line };
170
}
171
}
172
}
173
}
174
175
176
public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {
177
const cellIdMap = getCellIdMap(notebook);
178
const cellOffsetMap: { offset: number; cell: NotebookCell }[] = [];
179
180
// Parse the text to find cell markers and build the offset map
181
const lines = text.split(EOL);
182
let currentOffset = 0;
183
184
for (let i = 0; i < lines.length; i++) {
185
const line = lines[i];
186
187
if (line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) {
188
const cellParts = extractCellParts(line, undefined);
189
const cell = cellIdMap.get(cellParts.id) || notebook.getCells().find(c =>
190
c.document.languageId === cellParts.language &&
191
!cellOffsetMap.some(entry => entry.cell === c)
192
);
193
194
if (cell) {
195
// Calculate offset: skip the cell marker line
196
const eolLength = EOL.length;
197
const offset = currentOffset + line.length + eolLength;
198
199
cellOffsetMap.push({ offset, cell });
200
}
201
}
202
203
currentOffset += line.length + EOL.length;
204
}
205
206
return new AlternativeXmlDocument(text, cellOffsetMap, notebook);
207
}
208
209
public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {
210
const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => summarize(cell));
211
212
const cellContent = cells.map(cell => {
213
const cellMarker = generateCellMarker(cell);
214
const prefix = `${cellMarker}${EOL}`;
215
return { content: `${prefix}${cell.source.join(EOL)}${EOL}${EndDelimter}`, prefix, cell: notebook.cellAt(cell.index) };
216
});
217
const content = cellContent.map(cell => cell.content).join(EOL);
218
const cellOffsetMap = cellContent.map(cellContent => ({ offset: content.indexOf(cellContent.content) + cellContent.prefix.length, cell: cellContent.cell }));
219
220
return new AlternativeXmlDocument(content, cellOffsetMap, notebook);
221
}
222
223
}
224
225
226
function extractCellParts(line: string, defaultLanguage: string | undefined): { id: string; language: string } {
227
const idMatch = line.match(/id="([^"]+)"/);
228
const languageMatch = line.match(/language="([^"]+)"/);
229
if (!languageMatch) {
230
if (isXmlContent(line) && typeof defaultLanguage === 'string') {
231
// If we have a cell marker but no language, we assume the default language.
232
return { id: idMatch ? idMatch[1].trim() : '', language: defaultLanguage };
233
}
234
throw new Error(`Invalid cell part in ${line}`);
235
}
236
237
// New cells will not have an id.
238
return { id: idMatch ? idMatch[1].trim() : '', language: languageMatch[1].trim() };
239
}
240
241