Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.text.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import type { CancellationToken, NotebookCell, NotebookDocument, Position, Uri } from 'vscode';5import { getLanguage } from '../../../util/common/languages';6import { isUri } from '../../../util/common/types';7import { findLast } from '../../../util/vs/base/common/arraysFind';8import { EndOfLine, NotebookCellKind } from '../../../vscodeTypes';9import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';10import { AlternativeNotebookDocument } from './alternativeNotebookDocument';11import { EOL, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize, SummaryCell } from './helpers';1213export function generateCellTextMarker(cell: SummaryCell, lineComment: string): string {14const cellIdStr = cell.id ? `[id=${cell.id}] ` : '';15return `${lineComment}%% vscode.cell ${cellIdStr}[language=${cell.language}]`;16}1718export function lineMightHaveCellMarker(line: string) {19return line.toLowerCase().includes('vscode.cell');20}2122class AlternativeTextDocument extends AlternativeNotebookDocument {23constructor(text: string, private readonly cellOffsetMap: { offset: number; sourceOffset: number; cell: NotebookCell }[], notebook: NotebookDocument) {24super(text, notebook);25}2627override fromCellPosition(cell: NotebookCell, position: Position): Position {28const cellSummary = summarize(cell);29const lineCommentStart = getLineCommentStart(this.notebook);30const cellMarker = generateCellTextMarker(cellSummary, lineCommentStart);3132const eolLength = cell.document.eol === EndOfLine.LF ? 1 : 2;33const blockComment = getBlockComment(this.notebook);34const alternativeContentText = this.getText();35const offsetInCell = cell.document.offsetAt(position);36const markdownOffset = cell.kind === NotebookCellKind.Markup ? blockComment[0].length + eolLength : 0;37const offset = alternativeContentText.indexOf(cellMarker) + cellMarker.length + eolLength + markdownOffset + offsetInCell;38return this.positionAt(offset);39}4041override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {42const offset = this.offsetAt(position);43const cell = findLast(this.cellOffsetMap, (cell) => cell.sourceOffset <= offset);44if (!cell) {45return undefined;46}47const cellPosition = cell.cell.document.positionAt(offset - cell.sourceOffset);48return { cell: cell.cell, position: cellPosition };49}50}515253export class AlternativeTextNotebookContentProvider extends BaseAlternativeNotebookContentProvider {54constructor() {55super('text');56}5758public stripCellMarkers(text: string): string {59const lines = text.split(EOL);60if (lines.length && lineMightHaveCellMarker(lines[0])) {61lines.shift();62return lines.join(EOL);63} else {64return text;65}66}6768public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {69const blockComment = getBlockComment(notebook);70const lineCommentStart = getLineCommentStart(notebook);71const existingCodeMarkerWithComment = `${lineCommentStart} ${existingCodeMarker}`;72const lines: string[] = [];73notebook.getCells().forEach((cell) => {74if (cellsToInclude.includes(cell)) {75const cellSummary = summarize(cell);76if (cellSummary.source.length && cellSummary.source[0].trim().length) {77cellSummary.source = [cellSummary.source[0], existingCodeMarkerWithComment];78} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {79cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];80} else {81cellSummary.source = [existingCodeMarkerWithComment];82}83lines.push(generateAlternativeCellTextContent(cellSummary, lineCommentStart, blockComment).content);84} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {85lines.push(existingCodeMarkerWithComment);86}87});88return lines.join(EOL);89}909192public override async *parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {93const isNotebook = !isUri(notebookOrUri);94const cellIdMap = isNotebook ? getCellIdMap(notebookOrUri) : new Map<string, NotebookCell>();9596let inMarkdownCell = false;97let isInTripleQuotes = false;98let pendingTripleQuotes = false;99let emittedStart = false;100let cellIndex = -1;101102const lineCommentStart = getLineCommentStart(isNotebook ? notebookOrUri : undefined);103const blockComment = getBlockComment(isNotebook ? notebookOrUri : undefined);104const defaultLanguage = isNotebook ? getLanguage(getDefaultLanguage(notebookOrUri)).languageId : undefined;105const cellIdsSeen = new Set<string>();106for await (const lineOfText of inputStream) {107if (token.isCancellationRequested) {108break;109}110const line = lineOfText.value;111112// Check for new cell delimiter113// Sometimes LLM returns cells without the `vscode.cell` marker such as .114const isLineCommentForEmptyCellWithoutCellMarker = line.startsWith(`${lineCommentStart}%% [`) && line.trimEnd().endsWith(']');115const isLineCommentWithCellMarker = line.startsWith(`${lineCommentStart}%% vscode.cell`);116// Attempt to extract only if we think we have a cell marker, else we end up doing this for every single line and thats expensive.117const cellParts = (isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) ? extractCellParts(line, defaultLanguage) : undefined;118if ((isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) && cellParts?.language) {119if (pendingTripleQuotes) {120pendingTripleQuotes = false;121}122const lineOfCellText: LineOfCellText & { emitted: Boolean } = { index: -1, uri: undefined, language: undefined, kind: NotebookCellKind.Code, emitted: false, type: 'start' };123lineOfCellText.index = cellIndex += 1;124lineOfCellText.emitted = false;125// LLM returns duplicate cell with the same id.126if (cellParts.id && cellIdMap.get(cellParts.id)?.document.languageId === cellParts.language) {127if (cellIdsSeen.has(cellParts.id)) {128cellParts.id = '';129} else {130cellIdsSeen.add(cellParts.id);131}132} else {133// Possible duplicate cell with the same id but different language.134// In such cases, treat them as new cells.135cellParts.id = '';136}137138const cell = cellIdMap.get(cellParts.id);139lineOfCellText.id = cellParts.id;140lineOfCellText.language = cellParts.language;141lineOfCellText.uri = cell?.document.uri;142lineOfCellText.kind = cell?.kind || (lineOfCellText.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);143inMarkdownCell = lineOfCellText.language === 'markdown';144isInTripleQuotes = false;145146if (emittedStart) {147yield { index: cellIndex - 1, type: 'end' };148}149150emittedStart = true;151yield lineOfCellText;152continue;153}154155if (!emittedStart) {156continue;157}158if (inMarkdownCell) {159if (!isInTripleQuotes) {160// Look for the opening triple quotes161if (line === blockComment[0]) {162isInTripleQuotes = true;163} else {164// lineEmitted = true;165yield { index: cellIndex, line, type: 'line' };166}167} else {168// We are in triple quotes169if (line === blockComment[1]) {170// Closing triple quotes found171isInTripleQuotes = false;172pendingTripleQuotes = true;173} else {174yield { index: cellIndex, line, type: 'line' };175}176}177} else {178// Non-markdown cell or default179yield { index: cellIndex, line, type: 'line' };180}181}182183if (emittedStart) {184yield { index: cellIndex, type: 'end' };185}186}187188public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {189const blockComment = getBlockComment(notebook);190const lineCommentStart = getLineCommentStart(notebook);191const cellIdMap = getCellIdMap(notebook);192const cellOffsetMap: { offset: number; sourceOffset: number; cell: NotebookCell }[] = [];193194// Parse the text to find cell markers and build the offset map195const lines = text.split(EOL);196let currentOffset = 0;197198for (let i = 0; i < lines.length; i++) {199const line = lines[i];200const isLineCommentForEmptyCellWithoutCellMarker = line.startsWith(`${lineCommentStart}%% [`) && line.trimEnd().endsWith(']');201const isLineCommentWithCellMarker = line.startsWith(`${lineCommentStart}%% vscode.cell`);202203if (isLineCommentWithCellMarker || isLineCommentForEmptyCellWithoutCellMarker) {204const cellParts = extractCellParts(line, undefined);205if (cellParts) {206const cell = cellIdMap.get(cellParts.id) || notebook.getCells().find(c =>207c.document.languageId === cellParts.language &&208!cellOffsetMap.some(entry => entry.cell === c)209);210211if (cell) {212const offset = currentOffset;213// Calculate sourceOffset: skip the cell marker line and any markdown block comment start214const eolLength = EOL.length;215const isMarkdown = cellParts.language === 'markdown';216const sourceOffset = offset + line.length + eolLength + (isMarkdown ? blockComment[0].length + eolLength : 0);217218cellOffsetMap.push({ offset, sourceOffset, cell });219}220}221}222223currentOffset += line.length + EOL.length;224}225226return new AlternativeTextDocument(text, cellOffsetMap, notebook);227}228229public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {230const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => summarize(cell));231const blockComment = getBlockComment(notebook);232const lineCommentStart = getLineCommentStart(notebook);233const cellContent = cells.map(cell => ({ ...generateAlternativeCellTextContent(cell, lineCommentStart, blockComment), cell: notebook.cellAt(cell.index) }));234const content = cellContent.map(cell => cell.content).join(EOL);235const cellOffsetMap = cellContent.map(cellContent => {236const offset = content.indexOf(cellContent.content);237const sourceOffset = offset + cellContent.prefix.length;238return { offset, sourceOffset, cell: notebook.cellAt(cellContent.cell.index) };239});240241return new AlternativeTextDocument(content, cellOffsetMap, notebook);242}243244}245246function generateAlternativeCellTextContent(cell: SummaryCell, lineCommentStart: string, blockComment: [string, string]): { content: string; prefix: string } {247const cellMarker = generateCellTextMarker(cell, lineCommentStart);248const src = cell.source.join(EOL);249const prefix = cell.language === 'markdown' ? `${cellMarker}${EOL}${blockComment[0]}${EOL}` : `${cellMarker}${EOL}`;250const content = cell.language === 'markdown'251? `${prefix}${src}${EOL}${blockComment[1]}`252: `${prefix}${src}`;253return { content, prefix };254}255256export function getBlockComment(notebook?: NotebookDocument): [string, string] {257if (!notebook) {258return ['"""', '"""'];259}260const language = getLanguage(getDefaultLanguage(notebook));261return language.blockComment ?? ['```', '```'];262}263264export function getLineCommentStart(notebook?: NotebookDocument): string {265if (!notebook) {266return '#';267}268const language = getLanguage(getDefaultLanguage(notebook));269return language.lineComment.start || '#';270}271272function extractCellParts(line: string, defaultLanguage: string | undefined): { id: string; language: string } | undefined {273const idMatch = line.match(/\[id=(.+?)\]/);274const languageMatch = line.match(/\[language=(.+?)\]/);275if (!languageMatch) {276if (lineMightHaveCellMarker(line) && typeof defaultLanguage === 'string') {277// If we have a cell marker but no language, we assume the default language.278return { id: idMatch ? idMatch[1].trim() : '', language: defaultLanguage };279}280return;281}282return { id: idMatch ? idMatch[1].trim() : '', language: languageMatch[1].trim() };283}284285286