Path: blob/main/extensions/copilot/src/platform/notebook/test/node/alternativeContent.spec.ts
13405 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { EOL } from 'os';6import { describe, expect, test } from 'vitest';7import type { NotebookDocument } from 'vscode';8import { DiffServiceImpl } from '../../../../platform/diff/node/diffServiceImpl';9import { ILogger, ILogService } from '../../../../platform/log/common/logService';10import { IAlternativeNotebookContentService } from '../../common/alternativeContent';1112import { AlternativeNotebookContentEditGenerator, textToAsyncIterableLines } from '../../common/alternativeContentEditGenerator';1314import { BaseAlternativeNotebookContentProvider } from '../../common/alternativeContentProvider';1516import { AlternativeJsonNotebookContentProvider } from '../../common/alternativeContentProvider.json';1718import { AlternativeTextNotebookContentProvider } from '../../common/alternativeContentProvider.text';1920import { AlternativeXmlNotebookContentProvider } from '../../common/alternativeContentProvider.xml';2122import { NullTelemetryService } from '../../../../platform/telemetry/common/nullTelemetryService';23import { SimulationWorkspace } from '../../../../platform/test/node/simulationWorkspace';24import { ExtHostNotebookDocumentData } from '../../../../util/common/test/shims/notebookDocument';25import { AsyncIterableObject } from '../../../../util/vs/base/common/async';26import { CancellationToken } from '../../../../util/vs/base/common/cancellation';27import { ResourceMap } from '../../../../util/vs/base/common/map';28import * as path from '../../../../util/vs/base/common/path';29import { NotebookCellData, NotebookCellKind, NotebookData, NotebookEdit, NotebookRange, Position, Range, TextEdit, Uri } from '../../../../vscodeTypes';30import { LineOfText, notebookCellToCellData, summarize } from '../../common/helpers';31import { fixture, loadFile, loadNotebook } from './utils';3233describe('Alternative Content for Notebooks', () => {34[35new AlternativeXmlNotebookContentProvider(),36new AlternativeTextNotebookContentProvider(),37new AlternativeJsonNotebookContentProvider()38].forEach((provider) => {39const mockLogger: ILogger = {40error: () => { /* no-op */ },41warn: () => { /* no-op */ },42info: () => { /* no-op */ },43debug: () => { /* no-op */ },44trace: () => { /* no-op */ },45show: () => { /* no-op */ },46createSubLogger(): ILogger { return mockLogger; },47withExtraTarget(): ILogger { return mockLogger; }48};49function getEditGenerator(provider: BaseAlternativeNotebookContentProvider) {50return new AlternativeNotebookContentEditGenerator(new class implements IAlternativeNotebookContentService {51declare readonly _serviceBrand: undefined;52create(_format: any) {53return provider;54}55getFormat() {56return provider.kind;57}58}(), new DiffServiceImpl(), new class implements ILogService {59_serviceBrand: undefined;60internal = mockLogger;61logger = mockLogger;62trace = mockLogger.trace;63debug = mockLogger.debug;64info = mockLogger.info;65warn = mockLogger.warn;66error = mockLogger.error;67show(preserveFocus?: boolean): void {68//69}70createSubLogger(): ILogger {71return this;72}73withExtraTarget(): ILogger {74return this;75}76}(), new NullTelemetryService());77}78[true, false].forEach(applyEditsImmediately => {79describe(`${provider.kind} Content Parser`, () => {80test(`Generate a single Notebook Edit (insert md cell)`, async () => {81if (provider.kind !== 'xml') {82return;83}84const alternativeFile = await loadFile({ filePath: `${fixture('insert.1.ipynb')}.xml` });85const file = await loadFile({ filePath: fixture('insert.ipynb') });86const notebook = await loadNotebook(file);8788let alternativeContents = alternativeFile.contents;89const cellSummary = notebook.getCells().map(summarize);90cellSummary.forEach(cell => {91const toReplace = provider.kind === 'xml' ? `<CELL_ID_${cell.index}>` : `CELL_ID_${cell.index}`;92alternativeContents = alternativeContents.replace(toReplace, cell.id);93});94const alternativeContentLines = AsyncIterableObject.fromArray(alternativeContents.split(/\r?\n/)).map(l => new LineOfText(l));95const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, alternativeContentLines, undefined, CancellationToken.None);96const notebookEdits: NotebookEdit[] = [];97for await (const edit of edits) {98if (!Array.isArray(edit)) {99notebookEdits.push(edit);100}101}102expect(notebookEdits.length).toBe(1);103expect(notebookEdits[0].newCells.length).toBe(1);104expect(notebookEdits[0].newCells[0].kind).toBe(NotebookCellKind.Markup);105expect(notebookEdits[0].newCells[0].value.split(/\r?\n/g)).toEqual([`# DataFrame Details`, ``, `This DataFrame contains two columns: 'Name' and 'Gender'. The 'Name' column has three entries: 'Hello', 'World', and 'Baz'. The 'Gender' column has three entries: 'F', 'M', and 'F'.`]);106expect(notebookEdits[0].range.start).toBe(1);107expect(notebookEdits[0].range.end).toBe(1);108109// Generate edits as though this is a branch new notebook.110const newEdits = await getEditGenerator(provider).generateNotebookEdits(Uri.file('newNotebook.ipynb'), alternativeContentLines, undefined, CancellationToken.None);111notebookEdits.length = 0;112for await (const edit of newEdits) {113if (!Array.isArray(edit)) {114notebookEdits.push(edit);115}116}117expect(notebookEdits.length).toBe(notebook.cellCount + 1);118});119test(`Generate a single Notebook Edit (insert Python cell)`, async () => {120// This test focuses on generating as Notebook edit where LLM hallucinates121// & generates Python content instead of a structured Jupytext content.122// In such cases the python code should be inserted as is in a single cell.123// Previously nothing would be inserted.124if (provider.kind !== 'text') {125return;126}127const alternativeContents = 'import math\n\ndef circle_area(radius):\n return math.pi * radius**2\n';128const alternativeContentLines = AsyncIterableObject.fromArray(alternativeContents.split(/\r?\n/)).map(l => new LineOfText(l));129const edits = await getEditGenerator(provider).generateNotebookEdits(Uri.file('newFile.ipynb'), alternativeContentLines, undefined, CancellationToken.None);130const notebookEdits: NotebookEdit[] = [];131for await (const edit of edits) {132if (!Array.isArray(edit)) {133notebookEdits.push(edit);134}135}136expect(notebookEdits.length).toBe(1);137expect(notebookEdits[0].newCells.length).toBe(1);138expect(notebookEdits[0].newCells[0].kind).toBe(NotebookCellKind.Code);139expect(notebookEdits[0].newCells[0].value.split(/\r?\n/g)).toEqual(alternativeContents.split(/\r?\n/));140});141142[143{144file: `${fixture('insert.2.ipynb')}.xml`,145notebookEdits: [146NotebookEdit.insertCells(1, [new NotebookCellData(NotebookCellKind.Markup, '', 'markdown')]),147NotebookEdit.insertCells(2, [new NotebookCellData(NotebookCellKind.Markup, '', 'markdown')]),148NotebookEdit.insertCells(7, [new NotebookCellData(NotebookCellKind.Code, '', 'python')])149]150},151{152file: `${fixture('insert.3.ipynb')}.xml`,153notebookEdits: [154NotebookEdit.insertCells(5, [new NotebookCellData(NotebookCellKind.Code, '', 'python')])155]156},157{158file: `${fixture('insert.4.ipynb')}.xml`,159notebookEdits: [160NotebookEdit.deleteCells(new NotebookRange(1, 2))161]162}163].forEach(testInfo => {164test(`Generate ${testInfo.notebookEdits.length} Notebook Edits from ${path.basename(testInfo.file)}`, async () => {165// This test focuses on generating as few Notebook edits as possible.166// If a user deletes a cell in the middle there's no need to generate any other edits, but just the delete edit.167if (provider.kind !== 'xml') {168return;169}170171const simulation = new SimulationWorkspace();172const beforeIPynb = await loadFile({ filePath: fixture('insert.ipynb') });173const notebook = await loadNotebook(beforeIPynb, simulation);174175const alternativeFile = await loadFile({ filePath: testInfo.file });176let alternativeContents = alternativeFile.contents;177const cellSummary = notebook.getCells().map(summarize);178cellSummary.forEach(cell => {179const toReplace = provider.kind === 'xml' ? `<CELL_ID_${cell.index}>` : `CELL_ID_${cell.index}`;180alternativeContents = alternativeContents.replace(toReplace, cell.id);181});182const alternativeContentLines = AsyncIterableObject.fromArray(alternativeContents.split(/\r?\n/)).map(l => new LineOfText(l));183const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, alternativeContentLines, undefined, CancellationToken.None);184185186const notebookEdits: NotebookEdit[] = [];187for await (const edit of edits) {188if (Array.isArray(edit)) {189simulation.applyEdits(edit[0], edit[1]);190} else {191notebookEdits.push(edit);192simulation.applyNotebookEdits(notebook.uri, [edit]);193}194}195196expect(normatlizeContent(provider.getAlternativeDocument(notebook).getText())).toBe(normatlizeContent(alternativeFile.contents));197expect(notebookEdits.length).toBe(testInfo.notebookEdits.length);198199testInfo.notebookEdits.forEach((edit, i) => {200expect(notebookEdits[i].newCells.length).toBe(edit.newCells.length);201edit.newCells.forEach((c, j) => {202expect(notebookEdits[i].newCells[j].kind).toBe(c.kind);203expect(notebookEdits[i].newCells[j].languageId).toBe(c.languageId);204});205expect(notebookEdits[i].range.start).toBe(edit.range.start);206expect(notebookEdits[i].range.end).toBe(edit.range.end);207});208});209});210211describe(`${provider.kind} Position Translator`, () => {212test(`Translate position in notebook cell to Alternative Document & back`, async () => {213const notebook = await loadNotebook(loadFile({ filePath: fixture('sample.ipynb') }));214const altDoc = provider.getAlternativeDocument(notebook);215216const positions = [217{ cellIndex: 0, start: new Position(0, 9), end: new Position(0, 17) },218{ cellIndex: 1, start: new Position(0, 0), end: new Position(0, 34) },219{ cellIndex: 1, start: new Position(0, 0), end: new Position(0, 33) },220{ cellIndex: 2, start: new Position(0, 0), end: new Position(0, 6) },221{ cellIndex: 2, start: new Position(1, 7), end: new Position(1, 9) },222{ cellIndex: 3, start: new Position(1, 10), end: new Position(2, 9) },223{ cellIndex: 5, start: new Position(1, 10), end: new Position(1, 20) },224];225226for (const pos of positions) {227const cell = notebook.cellAt(pos.cellIndex);228const startTranslation = [pos.start, pos.end].map(p => altDoc.fromCellPosition(cell, p));229const textFromCell = cell.document.getText(new Range(pos.start, pos.end));230const textFromAltDoc = altDoc.getText(new Range(startTranslation[0], startTranslation[1]));231if (provider.kind !== 'json' || pos.start.line === pos.end.line) {232expect(normatlizeContent(textFromAltDoc)).toBe(normatlizeContent(textFromCell));233} else {234expect(normatlizeContent(textFromAltDoc).split(/\r?\n/).join(EOL)).toBe([`\\"Hello from Python!\\")",`, ` " print`].join(EOL));235}236237// Now try the reverse translation.238if (provider.kind !== 'json') {239const cellPosition = altDoc.toCellPosition(startTranslation[0]);240expect(cellPosition).toBeDefined();241expect(cellPosition?.cell).toBe(cell);242expect(cellPosition?.position.line).toBe(pos.start.line);243expect(cellPosition?.position.character).toBe(pos.start.character);244}245}246});247248test(`getAlternativeDocumentFromText rebuilds cell offset map correctly`, async () => {249if (provider.kind === 'json') {250// JSON format doesn't use getAlternativeDocumentFromText251return;252}253254const simulation = new SimulationWorkspace();255const cells = [256new NotebookCellData(NotebookCellKind.Code, 'import sys', 'python'),257new NotebookCellData(NotebookCellKind.Code, 'print(sys.executable)', 'python'),258new NotebookCellData(NotebookCellKind.Markup, '# Hello World', 'markdown'),259new NotebookCellData(NotebookCellKind.Code, 'import os\nprint(os.path)', 'python'),260];261const notebook = ExtHostNotebookDocumentData.fromNotebookData(262Uri.file('test.ipynb'),263new NotebookData(cells),264'jupyter-notebook',265simulation266).document;267268// Get the alternative document269const altDoc = provider.getAlternativeDocument(notebook);270const originalText = altDoc.getText();271272// Rebuild from text273const rebuiltDoc = provider.getAlternativeDocumentFromText(originalText, notebook);274275// Test that the rebuilt document has the same text276expect(rebuiltDoc.getText()).toBe(originalText);277278// Test position translation works correctly279const positions = [280{ cellIndex: 0, position: new Position(0, 0) },281{ cellIndex: 0, position: new Position(0, 6) },282{ cellIndex: 1, position: new Position(0, 0) },283{ cellIndex: 1, position: new Position(0, 10) },284{ cellIndex: 2, position: new Position(0, 0) },285{ cellIndex: 3, position: new Position(0, 0) },286{ cellIndex: 3, position: new Position(1, 5) },287];288289for (const pos of positions) {290const cell = notebook.cellAt(pos.cellIndex);291292// Translate from cell to alternative document293const altPosition = rebuiltDoc.fromCellPosition(cell, pos.position);294295// Translate back from alternative document to cell296const cellPosition = rebuiltDoc.toCellPosition(altPosition);297298expect(cellPosition).toBeDefined();299expect(cellPosition?.cell).toBe(cell);300expect(cellPosition?.position.line).toBe(pos.position.line);301expect(cellPosition?.position.character).toBe(pos.position.character);302}303});304305test(`getAlternativeDocumentFromText handles cells without IDs`, async () => {306if (provider.kind === 'json') {307return;308}309310const simulation = new SimulationWorkspace();311const cells = [312new NotebookCellData(NotebookCellKind.Code, 'x = 1', 'python'),313new NotebookCellData(NotebookCellKind.Code, 'y = 2', 'python'),314new NotebookCellData(NotebookCellKind.Code, 'z = 3', 'python'),315];316const notebook = ExtHostNotebookDocumentData.fromNotebookData(317Uri.file('test.ipynb'),318new NotebookData(cells),319'jupyter-notebook',320simulation321).document;322323// Get alternative document text324const altDoc = provider.getAlternativeDocument(notebook);325let text = altDoc.getText();326327// Strip cell IDs to simulate LLM-generated content without IDs328if (provider.kind === 'xml') {329text = text.replace(/id="[^"]+"/g, 'id=""');330} else if (provider.kind === 'text') {331text = text.replace(/\[id=[^\]]+\]/g, '');332}333334// Rebuild from text without IDs335const rebuiltDoc = provider.getAlternativeDocumentFromText(text, notebook);336337// Verify position translation still works by matching language338for (let i = 0; i < notebook.cellCount; i++) {339const cell = notebook.cellAt(i);340const position = new Position(0, 0);341342const altPosition = rebuiltDoc.fromCellPosition(cell, position);343const cellPosition = rebuiltDoc.toCellPosition(altPosition);344345expect(cellPosition).toBeDefined();346expect(cellPosition?.cell.document.languageId).toBe('python');347}348});349350test(`getAlternativeDocumentFromText handles markdown cells correctly`, async () => {351if (provider.kind === 'json') {352return;353}354355const simulation = new SimulationWorkspace();356const cells = [357new NotebookCellData(NotebookCellKind.Markup, '# Title\nSome content', 'markdown'),358new NotebookCellData(NotebookCellKind.Code, 'print("hello")', 'python'),359new NotebookCellData(NotebookCellKind.Markup, '## Subtitle\nMore text', 'markdown'),360];361const notebook = ExtHostNotebookDocumentData.fromNotebookData(362Uri.file('test.ipynb'),363new NotebookData(cells),364'jupyter-notebook',365simulation366).document;367368const altDoc = provider.getAlternativeDocument(notebook);369const text = altDoc.getText();370const rebuiltDoc = provider.getAlternativeDocumentFromText(text, notebook);371372// Test markdown cell position translation373const markdownCell1 = notebook.cellAt(0);374const markdownCell2 = notebook.cellAt(2);375376const pos1 = new Position(0, 2); // Inside "# Title"377const pos2 = new Position(0, 3); // Inside "## Subtitle"378379const altPos1 = rebuiltDoc.fromCellPosition(markdownCell1, pos1);380const altPos2 = rebuiltDoc.fromCellPosition(markdownCell2, pos2);381382const backToCell1 = rebuiltDoc.toCellPosition(altPos1);383const backToCell2 = rebuiltDoc.toCellPosition(altPos2);384385expect(backToCell1?.cell).toBe(markdownCell1);386expect(backToCell1?.position.line).toBe(0);387expect(backToCell1?.position.character).toBe(2);388389expect(backToCell2?.cell).toBe(markdownCell2);390expect(backToCell2?.position.line).toBe(0);391expect(backToCell2?.position.character).toBe(3);392});393});394395test(`Parse with leading empty lines`, async () => {396const txt = `397398#%% vscode.cell [language=python]399import math400401def circle_area(radius):402return math.pi * radius**2403`;404const xml = `405406<VSCode.Cell id="f18c8b6e" language="python">407import math408409def circle_area(radius):410return math.pi * radius**2411</VSCode.Cell>412`;413const json = `414415{416"cells": [417{418"cell_type": "code",419"metadata": {420"id": "f18c8b6e",421"language": "python"422},423"source": [424"import math",425"",426"def circle_area(radius):",427" return math.pi * radius**2"428]429}430]431}432`;433const content = provider.kind === 'xml' ? xml : (provider.kind === 'text' ? txt : json);434const uri = Uri.file('single_before.ipynb');435const notebook = ExtHostNotebookDocumentData.createJupyterNotebook(uri, JSON.stringify({ cells: [] })).document;436const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(content), undefined, CancellationToken.None);437const notebookEdits = [];438for await (const edit of edits) {439notebookEdits.push(edit);440}441expect(notebookEdits.length).toBe(1);442expect(notebookEdits[0]).toBeInstanceOf(NotebookEdit);443expect((notebookEdits[0] as NotebookEdit).newCells.length).toBe(1);444expect(normatlizeContent((notebookEdits[0] as NotebookEdit).newCells[0].value)).toBe(normatlizeContent(`import math445446def circle_area(radius):447return math.pi * radius**2448`));449});450test(`Parse with empty lines between cell markers`, async () => {451if (provider.kind !== 'xml') {452return;453}454const content = `<VSCode.Cell id="feb4cb5e" language="julia">455function circleArea(r::Float64)456return pi * r * r457end458</VSCode.Cell>459460461<VSCode.Cell language="julia">462function calculateCircleArea(radius::Float64)463return pi * radius^2464end465</VSCode.Cell>`;466const uri = Uri.file('single_before.ipynb');467const notebook = ExtHostNotebookDocumentData.createJupyterNotebook(uri, JSON.stringify({ cells: [] })).document;468const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(content), undefined, CancellationToken.None);469const notebookEdits = [];470for await (const edit of edits) {471notebookEdits.push(edit);472}473expect(notebookEdits.length).toBe(2);474expect(notebookEdits[0]).toBeInstanceOf(NotebookEdit);475expect((notebookEdits[0] as NotebookEdit).newCells.length).toBe(1);476expect(normatlizeContent((notebookEdits[0] as NotebookEdit).newCells[0].value)).toBe(normatlizeContent(`function circleArea(r::Float64)477return pi * r * r478end479`));480expect(normatlizeContent((notebookEdits[1] as NotebookEdit).newCells[0].value)).toBe(normatlizeContent(`function calculateCircleArea(radius::Float64)481return pi * radius^2482end483`));484});485test('Handle duplicate ids', async () => {486if (provider.kind === 'text' || provider.kind === 'json') {487return;488}489const simulation = new SimulationWorkspace();490const file = await loadFile({ filePath: fixture('duplicateCellIds.xml') });491const notebook = await loadNotebook(await loadFile({ filePath: fixture('duplicateCellIds.ipynb') }), simulation);492const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(file.contents), undefined, CancellationToken.None);493for await (const edit of edits) {494if (!Array.isArray(edit)) {495simulation.applyNotebookEdits(notebook.uri, [edit]);496}497}498499expect(notebook.cellCount).toBe(11);500expect(notebook.getCells()[0].kind).toBe(NotebookCellKind.Markup);501expect(notebook.getCells()[1].kind).toBe(NotebookCellKind.Code);502expect(notebook.getCells()[2].kind).toBe(NotebookCellKind.Code);503expect(notebook.getCells()[3].kind).toBe(NotebookCellKind.Markup);504expect(notebook.getCells()[4].kind).toBe(NotebookCellKind.Code);505expect(notebook.getCells()[5].kind).toBe(NotebookCellKind.Markup);506expect(notebook.getCells()[6].kind).toBe(NotebookCellKind.Code);507expect(notebook.getCells()[7].kind).toBe(NotebookCellKind.Markup);508expect(notebook.getCells()[8].kind).toBe(NotebookCellKind.Code);509expect(notebook.getCells()[9].kind).toBe(NotebookCellKind.Markup);510expect(notebook.getCells()[10].kind).toBe(NotebookCellKind.Code);511});512});513describe(`${provider.kind} Edit Generation`, () => {514[515'circle_area_edits',516'delete_1_line_in_cell',517'data_processing',518'data_processing_2',519'data_visualization',520'data_visualization_2',521'datacleansing',522'dataframe',523'edit',524'empty',525'imports',526'large_cell',527'multicells',528'plot',529'plotly_to_matplotlib',530'refactor',531'reorder',532'single',533'variables'534].forEach((filePath) => {535test(`Apply Edits for ${path.basename(filePath)}`, async () => {536if ((filePath === 'plotly_to_matplotlib' || filePath === 'matplotlib_to_plotly') && provider.kind === 'json') {537// generating text edits for JSON format and ensuring the final output is the same as that generated for text/xml is difficult.538return;539}540if (provider.kind === 'json' && ['delete_1_line_in_cell'].includes(filePath)) {541// Incorrectly genrated edits for JSON format.542return;543}544const simulation = new SimulationWorkspace();545const [atlContent, beforeIPynb, afterIPynb] = await Promise.all([loadFile({ filePath: fixture(`${filePath}.altContent.${provider.kind}`) }), loadFile({ filePath: fixture(`${filePath}_before.ipynb`) }), loadFile({ filePath: fixture(`${filePath}_after.ipynb`) })]);546const notebook = await loadNotebook(beforeIPynb, simulation);547const cellSummary = notebook.getCells().map(summarize);548cellSummary.forEach(cell => {549const toReplace = provider.kind === 'xml' ? `<CELL_ID_${cell.index}>` : `CELL_ID_${cell.index}`;550atlContent.contents = atlContent.contents.replace(toReplace, cell.id);551});552553const notebookEdits: (NotebookEdit | [Uri, TextEdit[]])[] = [];554for await (const edit of getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(atlContent.contents), undefined, CancellationToken.None)) {555notebookEdits.push(edit);556}557558const notebookData = applyNotebookEdits(notebook, notebookEdits, simulation);559const expectedNotebook = await loadNotebook(afterIPynb, simulation);560if (filePath === 'plotly_to_matplotlib' && provider.kind === 'text') {561// The edits generated for text version is slightly different, hence the result notebook is not the same as we'd expect when using xml.562// Hence we need to skip the failing cell (due to differences in LLM outputs)563notebookData.cells[8].value = expectedNotebook.getCells()[8].document.getText();564notebookData.cells[10].value = expectedNotebook.getCells()[10].document.getText();565}566if (filePath === 'multicells' && provider.kind === 'text') {567// The edits generated for text version is slightly different, hence the result notebook is not the same as we'd expect when using xml.568// Hence we need to skip the failing cell (due to differences in LLM outputs)569notebookData.cells[1].value = expectedNotebook.getCells()[1].document.getText();570notebookData.cells[3].value = expectedNotebook.getCells()[3].document.getText();571}572assertDocumentsAreEqual(expectedNotebook, notebookData, provider.kind);573});574test(`Generate Edits for New Document for ${path.basename(filePath)}`, async () => {575if ((filePath === 'plotly_to_matplotlib' || filePath === 'matplotlib_to_plotly') && provider.kind === 'json') {576// generating text edits for JSON format and ensuring the final output is the same as that generated for text/xml is difficult.577return;578}579const ipynb = await loadFile({ filePath: fixture(`${filePath}_before.ipynb`) });580const notebook = await loadNotebook(ipynb);581const altContent = provider.getAlternativeDocument(notebook).getText();582const alternativeContentLines = textToAsyncIterableLines(altContent);583const newEdits = await getEditGenerator(provider).generateNotebookEdits(Uri.file('newNotebook.ipynb'), alternativeContentLines, undefined, CancellationToken.None);584const notebookEdits: NotebookEdit[] = [];585for await (const edit of newEdits) {586if (!Array.isArray(edit)) {587notebookEdits.push(edit);588}589}590expect(notebookEdits.length).toBe(notebook.cellCount);591notebook.getCells().forEach((cell, i) => {592const expectedCell = notebook.cellAt(i);593expect(normatlizeContent(cell.document.getText())).toBe(normatlizeContent(expectedCell.document.getText()));594expect(cell.document.languageId).toBe(expectedCell.document.languageId);595expect(cell.kind).toBe(expectedCell.kind);596});597});598});599});600601/**602* In realworld, notebook gets edited asynchronously.603* I.e. when we stream the edits, the edits are not applied immediately.604* In tests, they get applied immediately.605*606* Lets cover both cases.607*/608async function applyEditsSyncOrAsync(simulation: SimulationWorkspace, notebook: NotebookDocument, edits: AsyncIterable<NotebookEdit | [Uri, TextEdit[]]>, applyEditsImmediately: boolean) {609const notebookEdits = [];610if (applyEditsImmediately) {611for await (const edit of edits) {612if (Array.isArray(edit)) {613simulation.applyEdits(edit[0], edit[1]);614} else {615simulation.applyNotebookEdits(notebook.uri, [edit]);616notebookEdits.push(edit);617}618}619620} else {621const collectedEdits = [];622for await (const edit of edits) {623collectedEdits.push(edit);624}625for (const edit of collectedEdits) {626if (Array.isArray(edit)) {627simulation.applyEdits(edit[0], edit[1]);628} else {629simulation.applyNotebookEdits(notebook.uri, [edit]);630notebookEdits.push(edit);631}632}633}634return notebookEdits;635}636637describe(`${provider.kind} Generate Edits (insert/delete/swap`, () => {638async function applyEditsAndVerify(cells: { index: number; contents: number }[]) {639const simulation = new SimulationWorkspace();640const notebook = await loadNotebook(await loadFile({ filePath: fixture('swapping_cells.ipynb') }), simulation);641let altContent = cells.map(item => {642return [643`<VSCode.Cell id="<CELL_ID_${item.index}>" language="python">`,644`${item.contents}`,645`</VSCode.Cell>`646].join(EOL);647}).join(EOL);648const cellSummary = notebook.getCells().map(summarize);649cellSummary.forEach(cell => {650const toReplace = provider.kind === 'xml' ? `<CELL_ID_${cell.index}>` : `CELL_ID_${cell.index}`;651altContent = altContent.replace(toReplace, cell.id);652});653654const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(altContent), undefined, CancellationToken.None);655const notebookEdits = await applyEditsSyncOrAsync(simulation, notebook, edits, applyEditsImmediately);656657expect(notebook.getCells().map(c => c.document.getText()).join()).toBe(cells.map(i => `${i.contents}`).join());658return { notebook, notebookEdits };659}660test('Insert 1 cell at the top', async () => {661if (provider.kind !== 'xml') {662return;663}664const cells = [10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(i => ({ index: i, contents: i }));665const { notebookEdits } = await applyEditsAndVerify(cells);666expect(notebookEdits.length).toBe(1);667expect(notebookEdits[0].newCells.length).toBe(1);668expect(notebookEdits[0].newCells[0].value).toBe('10');669expect(notebookEdits[0].range.start).toBe(0);670});671test('Insert 1 cell at the end', async () => {672if (provider.kind !== 'xml') {673return;674}675const cells = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(i => ({ index: i, contents: i }));676const { notebookEdits } = await applyEditsAndVerify(cells);677expect(notebookEdits.length).toBe(1);678expect(notebookEdits[0].newCells.length).toBe(1);679expect(notebookEdits[0].newCells[0].value).toBe('10');680});681test('Swap 2 Cells', async () => {682if (provider.kind !== 'xml') {683return;684}685const cells = [0, 1, 2, 3, 4, 5, 6, 7, 9, 8].map(i => ({ index: i, contents: i }));686await applyEditsAndVerify(cells);687});688test('Moving 2 Cells', async () => {689if (provider.kind !== 'xml') {690return;691}692const cells = [0, 1, 2, 3, 4, 5, 6, 9, 7, 8].map(i => ({ index: i, contents: i }));693await applyEditsAndVerify(cells);694});695test('Delete 1 Cell', async () => {696if (provider.kind !== 'xml') {697return;698}699const cells = [0, 1, 2, 3, 4, 5, 6, 7, 8].map(i => ({ index: i, contents: i }));700const { notebookEdits } = await applyEditsAndVerify(cells);701702expect(notebookEdits.length).toBe(1);703expect(notebookEdits[0].range.start).toBe(9);704});705test('Move last Cell to top', async () => {706if (provider.kind !== 'xml') {707return;708}709const cells = [9, 0, 1, 2, 3, 4, 5, 6, 7, 8].map(i => ({ index: i, contents: i }));710const { notebookEdits } = await applyEditsAndVerify(cells);711712expect(notebookEdits.length).toBe(2);713expect(notebookEdits[0].range.start).toBe(9);714expect(notebookEdits[1].range.start).toBe(0);715expect(notebookEdits[1].newCells[0].value).toBe('9');716});717test('Swap and insert', async () => {718if (provider.kind !== 'xml') {719return;720}721const cells = [9, 0, 1, 2, 3, 14, 15, 6, 7, 8].map(i => ({ index: i, contents: i }));722await applyEditsAndVerify(cells);723});724test('Swap multiple and insert', async () => {725if (provider.kind !== 'xml') {726return;727}728const cells = [1, 2, 3, 4, 6, 5, 9, 0].map(i => ({ index: i, contents: i }));729await applyEditsAndVerify(cells);730});731test('Swap multiple and delete', async () => {732if (provider.kind !== 'xml') {733return;734}735const cells = [1, 2, 3, 5, 6, 4, 0, 9].map(i => ({ index: i, contents: i }));736await applyEditsAndVerify(cells);737});738test('Move top Cell to bottom', async () => {739if (provider.kind !== 'xml') {740return;741}742const cells = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0].map(i => ({ index: i, contents: i }));743const { notebookEdits } = await applyEditsAndVerify(cells);744745expect(notebookEdits.length).toBe(2);746expect(notebookEdits[0].range.start).toBe(0);747expect(notebookEdits[1].range.start).toBe(9);748expect(notebookEdits[1].newCells[0].value).toBe('0');749});750test('Insert 2 Cell at the top', async () => {751if (provider.kind !== 'xml') {752return;753}754const cells = [10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(i => ({ index: i, contents: i }));755const { notebookEdits } = await applyEditsAndVerify(cells);756757expect(notebookEdits.length).toBe(2);758expect(notebookEdits[0].newCells[0].value).toBe('10');759expect(notebookEdits[0].range.start).toBe(0);760expect(notebookEdits[1].newCells[0].value).toBe('11');761expect(notebookEdits[1].range.start).toBe(1);762});763test('Insert 8 Cell in the middle', async () => {764if (provider.kind !== 'xml') {765return;766}767const cells = [0, 1, 2, 3, 4, 15, 16, 17, 18, 19, 20, 21, 22, 5, 6, 7, 8, 9].map(i => ({ index: i, contents: i }));768const { notebookEdits } = await applyEditsAndVerify(cells);769770expect(notebookEdits.length).toBe(8);771});772test('Delete 3 cells from the middle', async () => {773if (provider.kind !== 'xml') {774return;775}776const cells = [0, 1, 2, 3, 7, 8, 9].map(i => ({ index: i, contents: i }));777const { notebookEdits } = await applyEditsAndVerify(cells);778779expect(notebookEdits.length).toBe(3);780});781test('Delete 3 Cell', async () => {782if (provider.kind !== 'xml') {783return;784}785const cells = [{ index: 1, contents: 1 }, { index: 2, contents: 2 }, { index: 3, contents: 3 }, { index: 4, contents: 4 }, { index: 5, contents: 5 }, { index: 6, contents: 6 }, { index: 7, contents: 7 }];786const { notebookEdits } = await applyEditsAndVerify(cells);787788// We should only have 3 deletes789expect(notebookEdits.length).toBe(3);790expect(notebookEdits[0].range.start).toBe(9);791expect(notebookEdits[1].range.start).toBe(8);792expect(notebookEdits[2].range.start).toBe(0);793});794test('Delete 3 Cell (from middle as well)', async () => {795if (provider.kind !== 'xml') {796return;797}798const cells = [{ index: 1, contents: 1 }, { index: 2, contents: 2 }, { index: 3, contents: 3 }, { index: 4, contents: 4 }, { index: 6, contents: 6 }, { index: 7, contents: 7 }, { index: 8, contents: 8 }];799const { notebookEdits } = await applyEditsAndVerify(cells);800801// We should only have 3 deletes802expect(notebookEdits.length).toBe(3);803expect(notebookEdits[0].range.start).toBe(9);804expect(notebookEdits[1].range.start).toBe(5);805expect(notebookEdits[2].range.start).toBe(0);806});807test('Delete first and update second', async () => {808if (provider.kind !== 'xml') {809return;810}811const cells = [{ index: 1, contents: 2 }, { index: 2, contents: 2 }, { index: 3, contents: 3 }, { index: 4, contents: 4 }, { index: 6, contents: 6 }, { index: 7, contents: 7 }, { index: 8, contents: 8 }];812await applyEditsAndVerify(cells);813});814test('Delete first, last and update few in middle', async () => {815if (provider.kind !== 'xml') {816return;817}818const cells = [{ index: 1, contents: 1 }, { index: 2, contents: 2222 }, { index: 3, contents: 999 }, { index: 4, contents: 4 }, { index: 6, contents: 6 }, { index: 7, contents: 7 }];819await applyEditsAndVerify(cells);820});821});822823describe(`${provider.kind} Generate Edits instead of inserting and deleteing a cell (where id is missing)`, () => {824test('Do not insert and delete the same cell if id is missing', async () => {825const simulation = new SimulationWorkspace();826const cells = [827[`import sys`, `import os`],828[`print(sys.executable)`]829].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));830const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;831832const newNotebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test2.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;833let alternativeContent = provider.getAlternativeDocument(newNotebook).getText();834const id = summarize(newNotebook.getCells()[0]).id;835alternativeContent = alternativeContent.replace(id, '');836837const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);838const notebookEdits = await applyEditsSyncOrAsync(simulation, notebook, edits, applyEditsImmediately);839840expect(notebookEdits.length).toBe(0);841notebook.getCells().forEach((cell, i) => {842expect(cell.document.getText()).toBe(newNotebook.getCells()[i].document.getText());843});844});845test('Do not insert and delete the same two cell if id is missing, just insert the new 3rd cell', async () => {846const simulation = new SimulationWorkspace();847let cells = [848[`import sys`, `import os`],849[`print(sys.executable)`],850].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));851const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;852853cells = [854[`import sys`, `import os`],855[`print(sys.executable)`],856[`print("Hello World")`]857].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));858const newNotebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test2.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;859let alternativeContent = provider.getAlternativeDocument(newNotebook).getText();860newNotebook.getCells().forEach(cell => {861const id = summarize(cell).id;862alternativeContent = alternativeContent.replace(id, '');863});864865const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);866const notebookEdits = await applyEditsSyncOrAsync(simulation, notebook, edits, applyEditsImmediately);867868expect(notebookEdits.length).toBe(1);869expect(notebookEdits[0].range.start).toBe(2);870expect(notebookEdits[0].newCells[0].value).toBe('print("Hello World")');871expect(notebookEdits[0].newCells.length).toBe(1);872notebook.getCells().forEach((cell, i) => {873expect(cell.document.getText()).toBe(newNotebook.getCells()[i].document.getText());874});875});876test('Insert new cell, instead of deleting the inserted cell', async () => {877const simulation = new SimulationWorkspace();878let cells = [879[``],880].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));881const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;882883cells = [884[`import sys`],885].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));886const newNotebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test2.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;887let alternativeContent = provider.getAlternativeDocument(newNotebook).getText();888newNotebook.getCells().forEach(cell => {889const id = summarize(cell).id;890alternativeContent = alternativeContent.replace(id, '');891});892alternativeContent = alternativeContent.replace(`id=""`, '');893const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);894895await applyEditsSyncOrAsync(simulation, notebook, edits, applyEditsImmediately);896897notebook.getCells().forEach((cell, i) => {898expect(cell.document.getText()).toBe(newNotebook.getCells()[i].document.getText());899});900});901});902});903describe('Malformed XML', () => {904test('Missing line breaks in one cell', async () => {905if (provider.kind !== 'xml') {906return;907}908const simulation = new SimulationWorkspace();909const cells = [910[`import sys`],911[`print(sys.executable)`],912[`import os`],913[`print(os.path)`],914].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));915const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;916917let alternativeContent = provider.getAlternativeDocument(notebook).getText();918alternativeContent = alternativeContent.replace('sys.executable', '"Hello World"');919alternativeContent = alternativeContent.split(/\r?\n/).join(EOL);920// Remove the line break and ensure end cell tag is on the same line as the last line of code.921alternativeContent = alternativeContent.replace(`print("Hello World")${EOL}</VSCode.Cell>`, `print("Hello World")</VSCode.Cell>`);922923const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);924const notebookEdits = [];925for await (const edit of edits) {926if (Array.isArray(edit)) {927simulation.applyEdits(edit[0], edit[1]);928} else {929simulation.applyNotebookEdits(notebook.uri, [edit]);930notebookEdits.push(edit);931}932}933934expect(notebook.cellAt(0).document.getText()).toBe('import sys');935expect(notebook.cellAt(1).document.getText()).toBe('print("Hello World")');936expect(notebook.cellAt(2).document.getText()).toBe('import os');937expect(notebook.cellAt(3).document.getText()).toBe('print(os.path)');938});939test('Missing line breaks in all cells', async () => {940if (provider.kind !== 'xml') {941return;942}943const simulation = new SimulationWorkspace();944const cells = [945[`import sys`],946[`print(sys.executable)`],947[`import os`],948[`print(os.path)`],949].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));950const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;951952let alternativeContent = provider.getAlternativeDocument(notebook).getText();953alternativeContent = alternativeContent.replace('sys.executable', '"Hello World"');954alternativeContent = alternativeContent.split(/\r?\n/).join(EOL);955// Remove the line break and ensure end cell tag is on the same line as the last line of code.956alternativeContent = alternativeContent.replace(`${EOL}</VSCode.Cell>`, `</VSCode.Cell>`);957958const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);959const notebookEdits = [];960for await (const edit of edits) {961if (Array.isArray(edit)) {962simulation.applyEdits(edit[0], edit[1]);963} else {964simulation.applyNotebookEdits(notebook.uri, [edit]);965notebookEdits.push(edit);966}967}968969expect(notebook.cellAt(0).document.getText()).toBe('import sys');970expect(notebook.cellAt(1).document.getText()).toBe('print("Hello World")');971expect(notebook.cellAt(2).document.getText()).toBe('import os');972expect(notebook.cellAt(3).document.getText()).toBe('print(os.path)');973});974test('Deliberately include EndCell marker in a cell', async () => {975if (provider.kind !== 'xml') {976return;977}978const simulation = new SimulationWorkspace();979const cells = [980[`import sys`],981[`print(sys.executable)`],982[`import os</VSCode.Cell>`],983[`print(os.path)`],984].map(contents => new NotebookCellData(NotebookCellKind.Code, contents.join(EOL), 'python'));985const notebook = ExtHostNotebookDocumentData.fromNotebookData(Uri.file('test.ipynb'), new NotebookData(cells), 'jupyter-notebook', simulation).document;986987let alternativeContent = provider.getAlternativeDocument(notebook).getText();988alternativeContent = alternativeContent.replace('sys.executable', '"Hello World"');989alternativeContent = alternativeContent.split(/\r?\n/).join(EOL);990// Remove the line break and ensure end cell tag is on the same line as the last line of code.991alternativeContent = alternativeContent.replace(`${EOL}</VSCode.Cell>`, `</VSCode.Cell>`);992993const edits = await getEditGenerator(provider).generateNotebookEdits(notebook, textToAsyncIterableLines(alternativeContent), undefined, CancellationToken.None);994const notebookEdits = [];995for await (const edit of edits) {996if (Array.isArray(edit)) {997simulation.applyEdits(edit[0], edit[1]);998} else {999simulation.applyNotebookEdits(notebook.uri, [edit]);1000notebookEdits.push(edit);1001}1002}10031004expect(notebook.cellAt(0).document.getText()).toBe('import sys');1005expect(notebook.cellAt(1).document.getText()).toBe('print("Hello World")');1006expect(notebook.cellAt(2).document.getText()).toBe('import os</VSCode.Cell>');1007expect(notebook.cellAt(3).document.getText()).toBe('print(os.path)');1008});1009});1010});1011});10121013function applyNotebookEdits(notebook: NotebookDocument, edits: (NotebookEdit | [Uri, TextEdit[]])[], simulationWorkspace: SimulationWorkspace) {1014const notebookEdits: NotebookEdit[] = [];1015for (const edit of edits) {1016if (Array.isArray(edit)) {1017simulationWorkspace.applyEdits(edit[0], edit[1]);1018} else {1019notebookEdits.push(edit);1020}1021}10221023simulationWorkspace.applyNotebookEdits(notebook.uri, notebookEdits);1024return notebookDocumentToData(notebook);1025}10261027function notebookDocumentToData(notebook: NotebookDocument): NotebookData {1028const newCells = notebook.getCells().map(notebookCellToCellData);1029const newCellMap = new ResourceMap<NotebookCellData>();1030notebook.getCells().forEach((cell, i) => {1031newCellMap.set(cell.document.uri, newCells[i]);1032});10331034return new NotebookData(newCells);1035}10361037function assertDocumentsAreEqual(notebook: NotebookDocument, data: NotebookData, kind: 'xml' | 'text' | 'json') {1038expect(notebook.cellCount).toBe(data.cells.length);1039for (let i = 0; i < notebook.cellCount; i++) {1040const cell = notebook.cellAt(i);1041const cellData = data.cells[i];1042// LLMs retun empty new lines for jupytext cells. Check the case of `reorder.ipynb`1043if (kind === 'text') {1044expect(normatlizeContent(cell.document.getText())).toBe(normatlizeContent(cellData.value));1045} else if (kind === 'json') {1046// With JSON with get extra padding and thats wrong.1047// E.g. doc string in python will have extra padding.1048// Before1049/**1050"source": [1051"import math",1052"",1053"def circle_area(radius):",1054" print(\"HELLO WORLD\")",1055" return math.pi * radius**2"1056]1057*/1058// Response from LLM, notice how the empty lines in docstrings are indented.1059/**1060"source": [1061"import math",1062"",1063"def circle_area(radius):",1064" \"\"\"",1065" Calculate the area of a circle given its radius.",1066" ",1067" Args:",1068" radius (float): The radius of the circle.",1069" ",1070" Returns:",1071" float: The area of the circle.",1072" \"\"\"",1073" print(\"HELLO WORLD\")",1074" return math.pi * radius**2"1075]1076*/1077expect(normatlizeContent(cell.document.getText().split(/\r?\n/g).map(l => l.trim()).join('\n'))).toBe(normatlizeContent(cellData.value.split(/\r?\n/g).map(l => l.trim()).join('\n')));1078} else {1079expect(normatlizeContent(cell.document.getText())).toBe(normatlizeContent(cellData.value));1080}1081expect(cell.document.languageId).toBe(cellData.languageId);1082expect(cell.kind).toBe(cellData.kind);1083}1084}108510861087/**1088* Strip the id value from the string `id="2ce940c2"` to `id=""`.1089*/1090function normatlizeContent(content: string) {1091return content.1092replace(/id="[^"]+"/g, 'id=""'). // xml id1093replace(/id=[^"]+/g, 'id='). // jupytext id1094replace(/"id": "[^"]+"/g, '"id": ""'). // json id1095replace(/\r\n/g, '\n'). // windows/unix newlines1096trim();1097}109810991100