Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.xml.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import type { CancellationToken, NotebookCell, NotebookDocument, Uri } from 'vscode';5import { getLanguage } from '../../../util/common/languages';6import { isUri } from '../../../util/common/types';7import { findLast } from '../../../util/vs/base/common/arraysFind';8import { EndOfLine, NotebookCellKind, Position } from '../../../vscodeTypes';9import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';10import { AlternativeNotebookDocument } from './alternativeNotebookDocument';11import { EOL, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize, SummaryCell } from './helpers';1213const StartDelimter = `<VSCode.Cell `;14const StartEmptyCellDelimter = `<VSCode.Cell>`;15const EndDelimter = `</VSCode.Cell>`;1617function generatePartialStartDelimiterWithId(id: string) {18return `${StartDelimter}id="${id}" `;19}2021function generateCellMarker(cell: SummaryCell) {22return `${generatePartialStartDelimiterWithId(cell.id)}language="${cell.language}">`;23}2425export function isXmlContent(text: string): boolean {26return text.includes(StartDelimter) || text.includes(EndDelimter) || text.includes(StartEmptyCellDelimter);27}282930class AlternativeXmlDocument extends AlternativeNotebookDocument {31constructor(text: string, private readonly cellOffsetMap: { offset: number; cell: NotebookCell }[], notebook: NotebookDocument) {32super(text, notebook);33}3435override fromCellPosition(cell: NotebookCell, position: Position): Position {36const cellSummary = summarize(cell);37const cellMarker = generateCellMarker(cellSummary);3839const eolLength = cell.document.eol === EndOfLine.LF ? 1 : 2;4041const alternativeContentText = this.getText();42const offsetInCell = cell.document.offsetAt(position);43const offset = alternativeContentText.indexOf(cellMarker) + cellMarker.length + eolLength + offsetInCell;44return this.positionAt(offset);45}4647override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {48const offset = this.offsetAt(position);49const cell = findLast(this.cellOffsetMap, (cell) => cell.offset <= offset);50if (!cell) {51return undefined;52}53const cellPosition = cell.cell.document.positionAt(offset - cell.offset);54return { cell: cell.cell, position: cellPosition };55}56}5758export class AlternativeXmlNotebookContentProvider extends BaseAlternativeNotebookContentProvider {59constructor() {60super('xml');61}62public stripCellMarkers(text: string): string {63const lines = text.split(EOL);64if (lines.length && (lines[0].startsWith(StartDelimter) || lines[0].startsWith(StartEmptyCellDelimter))) {65lines.shift();66}67if (lines.length && lines[lines.length - 1].trim().endsWith(EndDelimter)) {68lines[lines.length - 1] = lines[lines.length - 1].substring(0, lines[lines.length - 1].lastIndexOf(EndDelimter));69}70return lines.join(EOL);71}7273public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {74const lines: string[] = [];75const existingCodeMarkerWithComment = `// ${existingCodeMarker}`;76notebook.getCells().forEach((cell) => {77if (cellsToInclude.includes(cell)) {78const cellSummary = summarize(cell);79lines.push(generateCellMarker(cellSummary));80if (cellSummary.source.length && cellSummary.source[0].trim().length) {81lines.push(cellSummary.source[0]);82lines.push(existingCodeMarkerWithComment);83} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {84cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];85} else {86lines.push(existingCodeMarkerWithComment);87}88lines.push(EndDelimter);89} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {90lines.push(existingCodeMarkerWithComment);91}92});93return lines.join(EOL);94}9596public async *parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {97const isNotebook = !isUri(notebookOrUri);98const cellIdMap = isNotebook ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();99100101let index = -1;102let endDelimiterSeen = false;103const cellIdsSeen = new Set<string>();104let previousLineEndedWithEndCellMarker = false;105let previousLine: LineOfCellText | undefined = undefined;106const defaultLanguage = isNotebook ? getLanguage(getDefaultLanguage(notebookOrUri)).languageId : undefined;107for await (const lineOfText of inputStream) {108if (token.isCancellationRequested) {109break;110}111const line = lineOfText.value;112if ((line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) && (index < 0 || (endDelimiterSeen || (previousLineEndedWithEndCellMarker && previousLine)))) {113if (!endDelimiterSeen && previousLineEndedWithEndCellMarker && previousLine) {114// Last line didn't finish, emit that, but strip the end delimiter.115previousLine.line = previousLine.line.substring(0, previousLine.line.lastIndexOf(EndDelimter));116yield previousLine;117yield { type: 'end', index: previousLine.index };118}119previousLineEndedWithEndCellMarker = false;120previousLine = undefined;121122index += 1;123endDelimiterSeen = false;124const lineOfCellText: LineOfCellText = { type: 'start', index, uri: undefined, language: undefined, kind: NotebookCellKind.Code };125const cellParts = extractCellParts(line, defaultLanguage);126// LLM returns duplicate cell with the same id.127// We need tests for this.128// this is a work around to treat subsequent cells as new cells.129if (cellParts.id && cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language) {130if (cellIdsSeen.has(cellParts.id)) {131cellParts.id = '';132} else {133cellIdsSeen.add(cellParts.id);134}135} else {136// Possible duplicate cell with the same id but different language.137// In such cases, treat them as new cells.138cellParts.id = '';139}140const cell = cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language ? cellIdMap.get(cellParts.id) : undefined;141lineOfCellText.id = cellParts.id;142lineOfCellText.language = cellParts.language;143lineOfCellText.uri = cell?.document.uri;144lineOfCellText.kind = cell?.kind || (lineOfCellText.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);145yield lineOfCellText;146} else if (line.startsWith(EndDelimter)) {147if (previousLineEndedWithEndCellMarker && previousLine) {148// The last line somehow ends with the cell marker (must have been added by the user),149// yield the previous line.150yield previousLine;151}152153endDelimiterSeen = true;154previousLineEndedWithEndCellMarker = false;155previousLine = undefined;156yield { type: 'end', index };157} else if (index >= 0) {158if (previousLineEndedWithEndCellMarker && previousLine) {159// Some how we have two subsequent lines that end with the cell marker,160// Weird, shoudl not happen, if it does, yield the previous line.161yield previousLine;162previousLine = undefined;163}164previousLineEndedWithEndCellMarker = line.endsWith(EndDelimter);165if (previousLineEndedWithEndCellMarker) {166previousLine = { type: 'line', index, line };167} else {168yield { type: 'line', index, line };169}170}171}172}173174175public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {176const cellIdMap = getCellIdMap(notebook);177const cellOffsetMap: { offset: number; cell: NotebookCell }[] = [];178179// Parse the text to find cell markers and build the offset map180const lines = text.split(EOL);181let currentOffset = 0;182183for (let i = 0; i < lines.length; i++) {184const line = lines[i];185186if (line.startsWith(StartDelimter) || line.startsWith(StartEmptyCellDelimter)) {187const cellParts = extractCellParts(line, undefined);188const cell = cellIdMap.get(cellParts.id) || notebook.getCells().find(c =>189c.document.languageId === cellParts.language &&190!cellOffsetMap.some(entry => entry.cell === c)191);192193if (cell) {194// Calculate offset: skip the cell marker line195const eolLength = EOL.length;196const offset = currentOffset + line.length + eolLength;197198cellOffsetMap.push({ offset, cell });199}200}201202currentOffset += line.length + EOL.length;203}204205return new AlternativeXmlDocument(text, cellOffsetMap, notebook);206}207208public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {209const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => summarize(cell));210211const cellContent = cells.map(cell => {212const cellMarker = generateCellMarker(cell);213const prefix = `${cellMarker}${EOL}`;214return { content: `${prefix}${cell.source.join(EOL)}${EOL}${EndDelimter}`, prefix, cell: notebook.cellAt(cell.index) };215});216const content = cellContent.map(cell => cell.content).join(EOL);217const cellOffsetMap = cellContent.map(cellContent => ({ offset: content.indexOf(cellContent.content) + cellContent.prefix.length, cell: cellContent.cell }));218219return new AlternativeXmlDocument(content, cellOffsetMap, notebook);220}221222}223224225function extractCellParts(line: string, defaultLanguage: string | undefined): { id: string; language: string } {226const idMatch = line.match(/id="([^"]+)"/);227const languageMatch = line.match(/language="([^"]+)"/);228if (!languageMatch) {229if (isXmlContent(line) && typeof defaultLanguage === 'string') {230// If we have a cell marker but no language, we assume the default language.231return { id: idMatch ? idMatch[1].trim() : '', language: defaultLanguage };232}233throw new Error(`Invalid cell part in ${line}`);234}235236// New cells will not have an id.237return { id: idMatch ? idMatch[1].trim() : '', language: languageMatch[1].trim() };238}239240241