CoCalc -- alternativeContentProvider.xml.ts

GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.xml.ts
¹³⁴⁰¹ views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5
import type { CancellationToken, NotebookCell, NotebookDocument, Uri } from 'vscode';
6
import { getLanguage } from '../../../util/common/languages';
7
import { isUri } from '../../../util/common/types';
8
import { findLast } from '../../../util/vs/base/common/arraysFind';
9
import { EndOfLine, NotebookCellKind, Position } from '../../../vscodeTypes';
10
import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';
11
import { AlternativeNotebookDocument } from './alternativeNotebookDocument';
12
import { EOL, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize, SummaryCell } from './helpers';
13

14
const StartDelimter = `<VSCode.Cell `;
15
const StartEmptyCellDelimter = `<VSCode.Cell>`;
16
const EndDelimter = `</VSCode.Cell>`;
17

18
function generatePartialStartDelimiterWithId(id: string) {
19
	return `${StartDelimter}id="${id}" `;
20
}
21

22
function generateCellMarker(cell: SummaryCell) {
23
	return `${generatePartialStartDelimiterWithId(cell.id)}language="${cell.language}">`;
24
}
25

26
export function isXmlContent(text: string): boolean {
27
	return text.includes(StartDelimter) || text.includes(EndDelimter) || text.includes(StartEmptyCellDelimter);
28
}
29

30

31
class AlternativeXmlDocument extends AlternativeNotebookDocument {
32
	constructor(text: string, private readonly cellOffsetMap: { offset: number; cell: NotebookCell }[], notebook: NotebookDocument) {
33
		super(text, notebook);
34
	}
35

36
	override fromCellPosition(cell: NotebookCell, position: Position): Position {
37
		const cellSummary = summarize(cell);
38
		const cellMarker = generateCellMarker(cellSummary);
39

40
		const eolLength = cell.document.eol === EndOfLine.LF ? 1 : 2;
41

42
		const alternativeContentText = this.getText();
43
		const offsetInCell = cell.document.offsetAt(position);
44
		const offset = alternativeContentText.indexOf(cellMarker) + cellMarker.length + eolLength + offsetInCell;
45
		return this.positionAt(offset);
46
	}
47

48
	override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {
49
		const offset = this.offsetAt(position);
50
		const cell = findLast(this.cellOffsetMap, (cell) => cell.offset <= offset);
51
		if (!cell) {
52
			return undefined;
53
		}
54
		const cellPosition = cell.cell.document.positionAt(offset - cell.offset);
55
		return { cell: cell.cell, position: cellPosition };
56
	}
57
}
58

59
export class AlternativeXmlNotebookContentProvider extends BaseAlternativeNotebookContentProvider {
60
	constructor() {
61
		super('xml');
62
	}
63
	public stripCellMarkers(text: string): string {
64
		const lines = text.split(EOL);
65
		if (lines.length && (lines[0].startsWith(StartDelimter) || lines[0].startsWith(StartEmptyCellDelimter))) {
66
			lines.shift();
67
		}
68
		if (lines.length && lines[lines.length - 1].trim().endsWith(EndDelimter)) {
69
			lines[lines.length - 1] = lines[lines.length - 1].substring(0, lines[lines.length - 1].lastIndexOf(EndDelimter));
70
		}
71
		return lines.join(EOL);
72
	}
73

74
	public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {
75
		const lines: string[] = [];
76
		const existingCodeMarkerWithComment = `// ${existingCodeMarker}`;
77
		notebook.getCells().forEach((cell) => {
78
			if (cellsToInclude.includes(cell)) {
79
				const cellSummary = summarize(cell);
80
				lines.push(generateCellMarker(cellSummary));
81
				if (cellSummary.source.length && cellSummary.source[0].trim().length) {
82
					lines.push(cellSummary.source[0]);
83
					lines.push(existingCodeMarkerWithComment);
84
				} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {
85
					cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];
86
				} else {
87
					lines.push(existingCodeMarkerWithComment);
88
				}
89
				lines.push(EndDelimter);
90
			} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {
91
				lines.push(existingCodeMarkerWithComment);
92
			}
93
		});
94
		return lines.join(EOL);
95
	}
96

97
	public async *parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {
98
		const isNotebook = !isUri(notebookOrUri);
99
		const cellIdMap = isNotebook ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();
100

101

102
		let index = -1;
103
		let endDelimiterSeen = false;
104
		const cellIdsSeen = new Set<string>();
105
		let previousLineEndedWithEndCellMarker = false;
106
		let previousLine: LineOfCellText | undefined = undefined;
107
		const defaultLanguage = isNotebook ? getLanguage(getDefaultLanguage(notebookOrUri)).languageId : undefined;
108
		for await (const lineOfText of inputStream) {
109
			if (token.isCancellationRequested) {
110
				break;
111
			}
112
			const line = lineOfText.value;
113
			if ((line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) && (index < 0 || (endDelimiterSeen || (previousLineEndedWithEndCellMarker && previousLine)))) {
114
				if (!endDelimiterSeen && previousLineEndedWithEndCellMarker && previousLine) {
115
					// Last line didn't finish, emit that, but strip the end delimiter.
116
					previousLine.line = previousLine.line.substring(0, previousLine.line.lastIndexOf(EndDelimter));
117
					yield previousLine;
118
					yield { type: 'end', index: previousLine.index };
119
				}
120
				previousLineEndedWithEndCellMarker = false;
121
				previousLine = undefined;
122

123
				index += 1;
124
				endDelimiterSeen = false;
125
				const lineOfCellText: LineOfCellText = { type: 'start', index, uri: undefined, language: undefined, kind: NotebookCellKind.Code };
126
				const cellParts = extractCellParts(line, defaultLanguage);
127
				// LLM returns duplicate cell with the same id.
128
				// We need tests for this.
129
				// this is a work around to treat subsequent cells as new cells.
130
				if (cellParts.id && cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language) {
131
					if (cellIdsSeen.has(cellParts.id)) {
132
						cellParts.id = '';
133
					} else {
134
						cellIdsSeen.add(cellParts.id);
135
					}
136
				} else {
137
					// Possible duplicate cell with the same id but different language.
138
					// In such cases, treat them as new cells.
139
					cellParts.id = '';
140
				}
141
				const cell = cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language ? cellIdMap.get(cellParts.id) : undefined;
142
				lineOfCellText.id = cellParts.id;
143
				lineOfCellText.language = cellParts.language;
144
				lineOfCellText.uri = cell?.document.uri;
145
				lineOfCellText.kind = cell?.kind || (lineOfCellText.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);
146
				yield lineOfCellText;
147
			} else if (line.startsWith(EndDelimter)) {
148
				if (previousLineEndedWithEndCellMarker && previousLine) {
149
					// The last line somehow ends with the cell marker (must have been added by the user),
150
					// yield the previous line.
151
					yield previousLine;
152
				}
153

154
				endDelimiterSeen = true;
155
				previousLineEndedWithEndCellMarker = false;
156
				previousLine = undefined;
157
				yield { type: 'end', index };
158
			} else if (index >= 0) {
159
				if (previousLineEndedWithEndCellMarker && previousLine) {
160
					// Some how we have two subsequent lines that end with the cell marker,
161
					// Weird, shoudl not happen, if it does, yield the previous line.
162
					yield previousLine;
163
					previousLine = undefined;
164
				}
165
				previousLineEndedWithEndCellMarker = line.endsWith(EndDelimter);
166
				if (previousLineEndedWithEndCellMarker) {
167
					previousLine = { type: 'line', index, line };
168
				} else {
169
					yield { type: 'line', index, line };
170
				}
171
			}
172
		}
173
	}
174

175

176
	public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {
177
		const cellIdMap = getCellIdMap(notebook);
178
		const cellOffsetMap: { offset: number; cell: NotebookCell }[] = [];
179

180
		// Parse the text to find cell markers and build the offset map
181
		const lines = text.split(EOL);
182
		let currentOffset = 0;
183

184
		for (let i = 0; i < lines.length; i++) {
185
			const line = lines[i];
186

187
			if (line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) {
188
				const cellParts = extractCellParts(line, undefined);
189
				const cell = cellIdMap.get(cellParts.id) || notebook.getCells().find(c =>
190
					c.document.languageId === cellParts.language &&
191
					!cellOffsetMap.some(entry => entry.cell === c)
192
				);
193

194
				if (cell) {
195
					// Calculate offset: skip the cell marker line
196
					const eolLength = EOL.length;
197
					const offset = currentOffset + line.length + eolLength;
198

199
					cellOffsetMap.push({ offset, cell });
200
				}
201
			}
202

203
			currentOffset += line.length + EOL.length;
204
		}
205

206
		return new AlternativeXmlDocument(text, cellOffsetMap, notebook);
207
	}
208

209
	public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {
210
		const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => summarize(cell));
211

212
		const cellContent = cells.map(cell => {
213
			const cellMarker = generateCellMarker(cell);
214
			const prefix = `${cellMarker}${EOL}`;
215
			return { content: `${prefix}${cell.source.join(EOL)}${EOL}${EndDelimter}`, prefix, cell: notebook.cellAt(cell.index) };
216
		});
217
		const content = cellContent.map(cell => cell.content).join(EOL);
218
		const cellOffsetMap = cellContent.map(cellContent => ({ offset: content.indexOf(cellContent.content) + cellContent.prefix.length, cell: cellContent.cell }));
219

220
		return new AlternativeXmlDocument(content, cellOffsetMap, notebook);
221
	}
222

223
}
224

225

226
function extractCellParts(line: string, defaultLanguage: string | undefined): { id: string; language: string } {
227
	const idMatch = line.match(/id="([^"]+)"/);
228
	const languageMatch = line.match(/language="([^"]+)"/);
229
	if (!languageMatch) {
230
		if (isXmlContent(line) && typeof defaultLanguage === 'string') {
231
			// If we have a cell marker but no language, we assume the default language.
232
			return { id: idMatch ? idMatch[1].trim() : '', language: defaultLanguage };
233
		}
234
		throw new Error(`Invalid cell part in ${line}`);
235
	}
236

237
	// New cells will not have an id.
238
	return { id: idMatch ? idMatch[1].trim() : '', language: languageMatch[1].trim() };
239
}
240

241
Product

Resources

Company