Path: blob/main/extensions/copilot/src/platform/notebook/common/alternativeContentProvider.json.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import { visit } from 'jsonc-parser';5import type { CancellationToken, NotebookCell, NotebookDocument, TextDocument } from 'vscode';6import { AsyncIterableObject } from '../../../util/vs/base/common/async';7import { URI } from '../../../util/vs/base/common/uri';8import { NotebookCellKind, Position, Range, Uri } from '../../../vscodeTypes';9import { BaseAlternativeNotebookContentProvider } from './alternativeContentProvider';10import { AlternativeNotebookDocument } from './alternativeNotebookDocument';11import { EOL, getCellId, getCellIdMap, getDefaultLanguage, LineOfCellText, LineOfText, summarize } from './helpers';1213const IndentSize = 4;1415export function isJsonContent(text: string): boolean {16if (text.startsWith('{') || text.trim().startsWith('{')) {17return true;18}1920if ((text.includes('{') || text.includes('}')) && text.includes('"source":') && text.includes('"cell_type":')) {21return true;22}23return false;24}2526class AlternativeJsonDocument extends AlternativeNotebookDocument {27override fromCellPosition(cell: NotebookCell, position: Position): Position {28const cellId = getCellId(cell);2930const alternativeContentText = this.getText();31const sourcePrefix = ` `; // we know we're indented by 4 spaces and source is 3 levels deep32const cellMarker = `"id": "${cellId}",`;33const positionOfSource = alternativeContentText.indexOf(`"source": [`, alternativeContentText.indexOf(cellMarker));3435// Assume the text in the line is `print("Hello World")`36// & the position is the white space before `World`37// Position = line = n, character = 1238// In Json this would be ` "print(\"Hello World\")"`39// That would be translated as character position in translated = ` "print(\"Hello`.length;40const firstLineIndexOfCellSource = this.positionAt(positionOfSource).line + 1;41const leadingCharacters = cell.document.getText(new Range(position.line, 0, position.line, position.character));42// -1 to exclude to trailing `"`43const characterPositionInAltContent = `${sourcePrefix}${JSON.stringify(leadingCharacters).slice(0, -1)}`;44const linePositionInAltContent = position.line + firstLineIndexOfCellSource;45// -1 to exclude to trailing `"`46return new Position(linePositionInAltContent, characterPositionInAltContent.length);47}48override toCellPosition(position: Position): { cell: NotebookCell; position: Position } | undefined {49throw new Error('Method not implemented.');50}51}5253export class AlternativeJsonNotebookContentProvider extends BaseAlternativeNotebookContentProvider {54constructor() {55super('json');56}5758public stripCellMarkers(text: string): string {59return text;60}6162public override parseAlternateContent(notebookOrUri: NotebookDocument | Uri, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {63return this.parseAlternateContentImpl(notebookOrUri, inputStream, token);64}6566public override getAlternativeDocumentFromText(text: string, notebook: NotebookDocument): AlternativeNotebookDocument {67return new AlternativeJsonDocument(text, notebook);68}6970public override getAlternativeDocument(notebook: NotebookDocument, excludeMarkdownCells?: boolean): AlternativeNotebookDocument {71const cells = notebook.getCells().filter(cell => excludeMarkdownCells ? cell.kind !== NotebookCellKind.Markup : true).map(cell => {72const summary = summarize(cell);73const source = getCellCode(cell.document);7475return {76cell_type: summary.cell_type,77id: summary.id,78metadata: {79language: summary.language80},81source,82} satisfies SummaryCell;83});8485const json: Notebook = { cells };86const text = JSON.stringify(json, undefined, IndentSize);8788return new AlternativeJsonDocument(text, notebook);89}9091public override getSummaryOfStructure(notebook: NotebookDocument, cellsToInclude: NotebookCell[], existingCodeMarker: string): string {92const lines = ['{', ' "cells: ['];93const existingCodeMarkerWithComment = `// ${existingCodeMarker}`;94notebook.getCells().forEach((cell) => {95if (cellsToInclude.includes(cell)) {96const cellSummary = summarize(cell);97if (cellSummary.source.length && cellSummary.source[0].trim().length) {98cellSummary.source = [cellSummary.source[0], existingCodeMarkerWithComment];99} else if (cellSummary.source.length && cellSummary.source.some(line => line.trim().length)) {100cellSummary.source = [existingCodeMarkerWithComment, cellSummary.source.filter(line => line.trim().length)[0], existingCodeMarkerWithComment];101} else {102cellSummary.source = [existingCodeMarkerWithComment];103}104const summary = JSON.stringify(cellSummary, undefined, IndentSize).split(/\r?\n/).map(line => ` ${line}`);105lines.push(...summary);106lines.push(',');107} else if (!lines.length || lines[lines.length - 1] !== existingCodeMarkerWithComment) {108lines.push(existingCodeMarkerWithComment);109}110});111lines.push(` ]`);112lines.push(`}`);113return lines.join(EOL);114}115116private parseAlternateContentImpl(notebookOrUri: NotebookDocument | URI, inputStream: AsyncIterable<LineOfText>, token: CancellationToken): AsyncIterable<LineOfCellText> {117return new AsyncIterableObject<LineOfCellText>(async (emitter) => {118const cellIdMap = URI.isUri(notebookOrUri) ? new Map<string, NotebookCell>() : getCellIdMap(notebookOrUri);119const cellIdsSeen = new Set<string>();120let jsonText = '';121let lastSeenOffset = -1;122const cellInfo: { id?: string; index: number; kind: NotebookCellKind; source: string[]; uri?: Uri; language?: string; startOffset: number; endOffset: number } = {123index: -1,124startOffset: -1,125endOffset: -1,126kind: NotebookCellKind.Code,127source: [],128};129const defaultLanguage = URI.isUri(notebookOrUri) ? 'python' : getDefaultLanguage(notebookOrUri);130131const emitCell = (endOffset: number) => {132// LLM can return duplicate cell with the same id.133cellInfo.language = cellInfo.language || defaultLanguage;134if (cellInfo.id && cellIdMap.get(cellInfo.id)?.document.languageId === cellInfo.language) {135if (cellIdsSeen.has(cellInfo.id)) {136cellInfo.id = '';137} else {138cellIdsSeen.add(cellInfo.id);139}140} else {141// Possible duplicate cell with the same id but different language.142// In such cases, treat them as new cells.143cellInfo.id = '';144}145const cell = cellIdMap.get(cellInfo.id);146cellInfo.uri = cell?.document.uri;147cellInfo.kind = cell?.kind || (cellInfo.language === 'markdown' ? NotebookCellKind.Markup : NotebookCellKind.Code);148149emitter.emitOne({ index: cellInfo.index, type: 'start', kind: cellInfo.kind, language: cellInfo.language, uri: cellInfo.uri, id: cellInfo.id });150cellInfo.source.forEach(cellLine => emitter.emitOne({ index: cellInfo.index, type: 'line', line: cellLine }));151emitter.emitOne({ index: cellInfo.index, type: 'end' });152};153154let finalOffset = 0;155for await (const lineOfText of inputStream) {156if (token.isCancellationRequested) {157break;158}159const line = lineOfText.value;160161162jsonText += line;163164visit(jsonText, {165onObjectEnd(offset, _length, _startLine, _startCharacter) {166finalOffset = offset;167},168onLiteralValue: (value, offset, _length, _startLine, _startCharacter, pathSupplier) => {169if (lastSeenOffset >= offset) {170return;171}172const segments = pathSupplier();173if (segments.length < 2) {174return;175}176if (segments.shift() !== 'cells') {177return;178}179const cellIndex = segments.shift();180if (typeof cellIndex !== 'number') {181return;182}183184const property = segments.shift()! as string;185lastSeenOffset = offset;186187if (cellInfo.index !== -1 && cellInfo.index !== cellIndex) {188emitCell(offset);189190cellInfo.startOffset = offset;191cellInfo.id = undefined;192cellInfo.kind = NotebookCellKind.Code;193cellInfo.source = [];194cellInfo.uri = undefined;195cellInfo.language = undefined;196}197198cellInfo.index = cellIndex;199200if (property === 'cell_type') {201cellInfo.kind = value === 'code' ? NotebookCellKind.Code : NotebookCellKind.Markup;202if (cellInfo.kind === NotebookCellKind.Markup) {203cellInfo.language = 'markdown';204}205} else if (property === 'id') {206// This is for scenarios when LLM sends the id as part of the cell instead of metdata.207cellInfo.id = value;208} else if (property === 'metadata' && segments[0] === 'id') {209cellInfo.id = value;210} else if (property === 'metadata' && segments[0] === 'language') {211cellInfo.language = value;212if (cellInfo.language === 'markdown') {213cellInfo.kind = NotebookCellKind.Markup;214}215} else if (property === 'source' && segments.length && typeof segments[0] === 'number') {216if (segments[0] === 0) {217cellInfo.startOffset = offset;218}219let code = typeof value === 'string' ? value : `${value || ''}`;220// Generally code in jupyter cells always end with `\n` when persisted in JSON file.221// However we do not want to deal with the \n as we're only interested in the lines of code.222// This is because we're going to edit a line at a time, new line means we have two lines to edit, but thats not possible.223// A line cannot contain new line in editor (then its just two lines in editor).224if (code.endsWith('\n')) {225code = code.substr(0, code.length - 1);226}227cellInfo.source.push(code);228}229}230});231}232233if (cellInfo.index !== -1) {234emitCell(finalOffset);235}236});237}238}239240function getCellCode(document: TextDocument): string[] {241if (document.lineCount === 0) {242return [];243}244if (document.lineCount === 1) {245return [document.lineAt(0).text];246}247const lineCount = document.lineCount;248return new Array(lineCount).fill('').map((_, i) => document.lineAt(i).text);249}250251type Notebook = {252cells: SummaryCell[];253};254255/**256* Shortend version of a Jupyter cell JSON.257*/258type SummaryCell = {259cell_type: 'code' | 'markdown';260source: string[];261id: string;262metadata: {263language: string;264};265};266267268