Path: blob/main/extensions/ipynb/src/notebookAttachmentCleaner.ts
3292 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as vscode from 'vscode';6import { ATTACHMENT_CLEANUP_COMMANDID, JUPYTER_NOTEBOOK_MARKDOWN_SELECTOR } from './constants';7import { deepClone, objectEquals, Delayer } from './helper';89interface AttachmentCleanRequest {10notebook: vscode.NotebookDocument;11document: vscode.TextDocument;12cell: vscode.NotebookCell;13}1415interface IAttachmentData {16[key: string /** mimetype */]: string;/** b64-encoded */17}1819interface IAttachmentDiagnostic {20name: string;21ranges: vscode.Range[];22}2324export enum DiagnosticCode {25missing_attachment = 'notebook.missing-attachment'26}2728export class AttachmentCleaner implements vscode.CodeActionProvider {29private _attachmentCache:30Map<string /** uri */, Map<string /** cell fragment*/, Map<string /** attachment filename */, IAttachmentData>>> = new Map();3132private _disposables: vscode.Disposable[];33private _imageDiagnosticCollection: vscode.DiagnosticCollection;34private readonly _delayer = new Delayer(750);3536constructor() {37this._disposables = [];38this._imageDiagnosticCollection = vscode.languages.createDiagnosticCollection('Notebook Image Attachment');39this._disposables.push(this._imageDiagnosticCollection);4041this._disposables.push(vscode.commands.registerCommand(ATTACHMENT_CLEANUP_COMMANDID, async (document: vscode.Uri, range: vscode.Range) => {42const workspaceEdit = new vscode.WorkspaceEdit();43workspaceEdit.delete(document, range);44await vscode.workspace.applyEdit(workspaceEdit);45}));4647this._disposables.push(vscode.languages.registerCodeActionsProvider(JUPYTER_NOTEBOOK_MARKDOWN_SELECTOR, this, {48providedCodeActionKinds: [49vscode.CodeActionKind.QuickFix50],51}));5253this._disposables.push(vscode.workspace.onDidChangeNotebookDocument(e => {54this._delayer.trigger(() => {5556e.cellChanges.forEach(change => {57if (!change.document) {58return;59}6061if (change.cell.kind !== vscode.NotebookCellKind.Markup) {62return;63}6465const metadataEdit = this.cleanNotebookAttachments({66notebook: e.notebook,67cell: change.cell,68document: change.document69});70if (metadataEdit) {71const workspaceEdit = new vscode.WorkspaceEdit();72workspaceEdit.set(e.notebook.uri, [metadataEdit]);73vscode.workspace.applyEdit(workspaceEdit);74}75});76});77}));787980this._disposables.push(vscode.workspace.onWillSaveNotebookDocument(e => {81if (e.reason === vscode.TextDocumentSaveReason.Manual) {82this._delayer.dispose();83if (e.notebook.getCells().length === 0) {84return;85}86const notebookEdits: vscode.NotebookEdit[] = [];87for (const cell of e.notebook.getCells()) {88if (cell.kind !== vscode.NotebookCellKind.Markup) {89continue;90}9192const metadataEdit = this.cleanNotebookAttachments({93notebook: e.notebook,94cell: cell,95document: cell.document96});9798if (metadataEdit) {99notebookEdits.push(metadataEdit);100}101}102if (!notebookEdits.length) {103return;104}105const workspaceEdit = new vscode.WorkspaceEdit();106workspaceEdit.set(e.notebook.uri, notebookEdits);107e.waitUntil(Promise.resolve(workspaceEdit));108}109}));110111this._disposables.push(vscode.workspace.onDidCloseNotebookDocument(e => {112this._attachmentCache.delete(e.uri.toString());113}));114115this._disposables.push(vscode.workspace.onWillRenameFiles(e => {116const re = /\.ipynb$/;117for (const file of e.files) {118if (!re.exec(file.oldUri.toString())) {119continue;120}121122// transfer cache to new uri123if (this._attachmentCache.has(file.oldUri.toString())) {124this._attachmentCache.set(file.newUri.toString(), this._attachmentCache.get(file.oldUri.toString())!);125this._attachmentCache.delete(file.oldUri.toString());126}127}128}));129130this._disposables.push(vscode.workspace.onDidOpenTextDocument(e => {131this.analyzeMissingAttachments(e);132}));133134this._disposables.push(vscode.workspace.onDidCloseTextDocument(e => {135this.analyzeMissingAttachments(e);136}));137138vscode.workspace.textDocuments.forEach(document => {139this.analyzeMissingAttachments(document);140});141}142143provideCodeActions(document: vscode.TextDocument, _range: vscode.Range | vscode.Selection, context: vscode.CodeActionContext, _token: vscode.CancellationToken): vscode.ProviderResult<(vscode.CodeAction | vscode.Command)[]> {144const fixes: vscode.CodeAction[] = [];145146for (const diagnostic of context.diagnostics) {147switch (diagnostic.code) {148case DiagnosticCode.missing_attachment:149{150const fix = new vscode.CodeAction(151'Remove invalid image attachment reference',152vscode.CodeActionKind.QuickFix);153154fix.command = {155command: ATTACHMENT_CLEANUP_COMMANDID,156title: 'Remove invalid image attachment reference',157arguments: [document.uri, diagnostic.range],158};159fixes.push(fix);160}161break;162}163}164165return fixes;166}167168/**169* take in a NotebookDocumentChangeEvent, and clean the attachment data for the cell(s) that have had their markdown source code changed170* @param e NotebookDocumentChangeEvent from the onDidChangeNotebookDocument listener171* @returns vscode.NotebookEdit, the metadata alteration performed on the json behind the ipynb172*/173private cleanNotebookAttachments(e: AttachmentCleanRequest): vscode.NotebookEdit | undefined {174175if (e.notebook.isClosed) {176return;177}178const document = e.document;179const cell = e.cell;180181const markdownAttachmentsInUse: { [key: string /** filename */]: IAttachmentData } = {};182const cellFragment = cell.document.uri.fragment;183const notebookUri = e.notebook.uri.toString();184const diagnostics: IAttachmentDiagnostic[] = [];185const markdownAttachmentsRefedInCell = this.getAttachmentNames(document);186187if (markdownAttachmentsRefedInCell.size === 0) {188// no attachments used in this cell, cache all images from cell metadata189this.saveAllAttachmentsToCache(cell.metadata, notebookUri, cellFragment);190}191192if (this.checkMetadataHasAttachmentsField(cell.metadata)) {193// the cell metadata contains attachments, check if any are used in the markdown source194195for (const [currFilename, attachment] of Object.entries(cell.metadata.attachments)) {196// means markdown reference is present in the metadata, rendering will work properly197// therefore, we don't need to check it in the next loop either198if (markdownAttachmentsRefedInCell.has(currFilename)) {199// attachment reference is present in the markdown source, no need to cache it200markdownAttachmentsRefedInCell.get(currFilename)!.valid = true;201markdownAttachmentsInUse[currFilename] = attachment as IAttachmentData;202} else {203// attachment reference is not present in the markdown source, cache it204this.saveAttachmentToCache(notebookUri, cellFragment, currFilename, cell.metadata);205}206}207}208209for (const [currFilename, attachment] of markdownAttachmentsRefedInCell) {210if (attachment.valid) {211// attachment reference is present in both the markdown source and the metadata, no op212continue;213}214215// if image is referenced in markdown source but not in metadata -> check if we have image in the cache216const cachedImageAttachment = this._attachmentCache.get(notebookUri)?.get(cellFragment)?.get(currFilename);217if (cachedImageAttachment) {218markdownAttachmentsInUse[currFilename] = cachedImageAttachment;219this._attachmentCache.get(notebookUri)?.get(cellFragment)?.delete(currFilename);220} else {221// if image is not in the cache, show warning222diagnostics.push({ name: currFilename, ranges: attachment.ranges });223}224}225226this.updateDiagnostics(cell.document.uri, diagnostics);227228if (cell.index > -1 && !objectEquals(markdownAttachmentsInUse || {}, cell.metadata.attachments || {})) {229const updateMetadata: { [key: string]: any } = deepClone(cell.metadata);230if (Object.keys(markdownAttachmentsInUse).length === 0) {231updateMetadata.attachments = undefined;232} else {233updateMetadata.attachments = markdownAttachmentsInUse;234}235const metadataEdit = vscode.NotebookEdit.updateCellMetadata(cell.index, updateMetadata);236return metadataEdit;237}238return;239}240241private analyzeMissingAttachments(document: vscode.TextDocument): void {242if (document.uri.scheme !== 'vscode-notebook-cell') {243// not notebook244return;245}246247if (document.isClosed) {248this.updateDiagnostics(document.uri, []);249return;250}251252let notebook: vscode.NotebookDocument | undefined;253let activeCell: vscode.NotebookCell | undefined;254for (const notebookDocument of vscode.workspace.notebookDocuments) {255const cell = notebookDocument.getCells().find(cell => cell.document === document);256if (cell) {257notebook = notebookDocument;258activeCell = cell;259break;260}261}262263if (!notebook || !activeCell) {264return;265}266267const diagnostics: IAttachmentDiagnostic[] = [];268const markdownAttachments = this.getAttachmentNames(document);269if (this.checkMetadataHasAttachmentsField(activeCell.metadata)) {270for (const [currFilename, attachment] of markdownAttachments) {271if (!activeCell.metadata.attachments[currFilename]) {272// no attachment reference in the metadata273diagnostics.push({ name: currFilename, ranges: attachment.ranges });274}275}276}277278this.updateDiagnostics(activeCell.document.uri, diagnostics);279}280281private updateDiagnostics(cellUri: vscode.Uri, diagnostics: IAttachmentDiagnostic[]) {282const vscodeDiagnostics: vscode.Diagnostic[] = [];283for (const currDiagnostic of diagnostics) {284currDiagnostic.ranges.forEach(range => {285const diagnostic = new vscode.Diagnostic(range, `The image named: '${currDiagnostic.name}' is not present in cell metadata.`, vscode.DiagnosticSeverity.Warning);286diagnostic.code = DiagnosticCode.missing_attachment;287vscodeDiagnostics.push(diagnostic);288});289}290291this._imageDiagnosticCollection.set(cellUri, vscodeDiagnostics);292}293294/**295* remove attachment from metadata and add it to the cache296* @param notebookUri uri of the notebook currently being edited297* @param cellFragment fragment of the cell currently being edited298* @param currFilename filename of the image being pulled into the cell299* @param metadata metadata of the cell currently being edited300*/301private saveAttachmentToCache(notebookUri: string, cellFragment: string, currFilename: string, metadata: { [key: string]: any }): void {302const documentCache = this._attachmentCache.get(notebookUri);303if (!documentCache) {304// no cache for this notebook yet305const cellCache = new Map<string, IAttachmentData>();306cellCache.set(currFilename, this.getMetadataAttachment(metadata, currFilename));307const documentCache = new Map();308documentCache.set(cellFragment, cellCache);309this._attachmentCache.set(notebookUri, documentCache);310} else if (!documentCache.has(cellFragment)) {311// no cache for this cell yet312const cellCache = new Map<string, IAttachmentData>();313cellCache.set(currFilename, this.getMetadataAttachment(metadata, currFilename));314documentCache.set(cellFragment, cellCache);315} else {316// cache for this cell already exists317// add to cell cache318documentCache.get(cellFragment)?.set(currFilename, this.getMetadataAttachment(metadata, currFilename));319}320}321322/**323* get an attachment entry from the given metadata324* @param metadata metadata to extract image data from325* @param currFilename filename of image being extracted326* @returns327*/328private getMetadataAttachment(metadata: { [key: string]: any }, currFilename: string): { [key: string]: any } {329return metadata.attachments[currFilename];330}331332/**333* returns a boolean that represents if there are any images in the attachment field of a cell's metadata334* @param metadata metadata of cell335* @returns boolean representing the presence of any attachments336*/337private checkMetadataHasAttachmentsField(metadata: { [key: string]: unknown }): metadata is { readonly attachments: Record<string, unknown> } {338return !!metadata.attachments && typeof metadata.attachments === 'object';339}340341/**342* given metadata from a cell, cache every image (used in cases with no image links in markdown source)343* @param metadata metadata for a cell with no images in markdown source344* @param notebookUri uri for the notebook being edited345* @param cellFragment fragment of cell being edited346*/347private saveAllAttachmentsToCache(metadata: { [key: string]: unknown }, notebookUri: string, cellFragment: string): void {348const documentCache = this._attachmentCache.get(notebookUri) ?? new Map();349this._attachmentCache.set(notebookUri, documentCache);350const cellCache = documentCache.get(cellFragment) ?? new Map<string, IAttachmentData>();351documentCache.set(cellFragment, cellCache);352353if (metadata.attachments && typeof metadata.attachments === 'object') {354for (const [currFilename, attachment] of Object.entries(metadata.attachments)) {355cellCache.set(currFilename, attachment);356}357}358}359360/**361* pass in all of the markdown source code, and get a dictionary of all images referenced in the markdown. keys are image filenames, values are render state362* @param document the text document for the cell, formatted as a string363*/364private getAttachmentNames(document: vscode.TextDocument) {365const source = document.getText();366const filenames: Map<string, { valid: boolean; ranges: vscode.Range[] }> = new Map();367const re = /!\[.*?\]\(<?attachment:(?<filename>.*?)>?\)/gm;368369let match;370while ((match = re.exec(source))) {371if (match.groups?.filename) {372const index = match.index;373const length = match[0].length;374const startPosition = document.positionAt(index);375const endPosition = document.positionAt(index + length);376const range = new vscode.Range(startPosition, endPosition);377const filename = filenames.get(match.groups.filename) ?? { valid: false, ranges: [] };378filenames.set(match.groups.filename, filename);379filename.ranges.push(range);380}381}382return filenames;383}384385dispose() {386this._disposables.forEach(d => d.dispose());387this._delayer.dispose();388}389}390391392393