Path: blob/main/extensions/copilot/src/platform/notebook/common/helpers.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { ChatRequest, NotebookCell, NotebookDocument, TextDocument, Uri } from 'vscode';6import { isLocation, isUri } from '../../../util/common/types';7import { StringSHA1 } from '../../../util/vs/base/common/hash';8import { removeAnsiEscapeCodes } from '../../../util/vs/base/common/strings';9import { isUriComponents, URI } from '../../../util/vs/base/common/uri';10import { NotebookCellData, NotebookCellKind } from '../../../vscodeTypes';11import { INotebookService } from './notebookService';121314export class LineOfText {15readonly __lineOfTextBrand: void = undefined;16public readonly value: string;17constructor(18value: string19) {20this.value = value.replace(/\r$/, '');21}22}2324/** End of Line for alternative Notebook contnt is always \n */25export const EOL = '\n';26export type LineOfCellText = {27type: 'start';28/**29* The cell index of the cell that this line belongs to.30*/31index: number;32id?: string;33/**34* The Uri of the cell that this line belongs to.35* Undefined if this is a cell that doesn't belong in the actual notebook.36*/37uri?: Uri;38/**39* Language of the cell.40*/41language?: string;42/**43* The type of cell.44*/45kind: NotebookCellKind;46} | {47type: 'line';48/**49* A line of text from a cell. Does not include the newline character.50*/51line: string;52/**53* The cell index of the cell that this line belongs to.54*/55index: number;56} | {57type: 'end';58/**59*60* The cell index of the cell that this line belongs to.61*/62index: number;63};6465export type SummaryCell = {66cell_type: 'code' | 'markdown';67language: string;68id: string;69source: string[];70index: number;71};7273export function summarize(cell: NotebookCell): SummaryCell {74const cellType = cell.kind === NotebookCellKind.Code ? 'code' : 'markdown';75const id = getCellId(cell);76const source = getCellCode(cell.document);77return { cell_type: cellType, id, language: cell.document.languageId, source, index: cell.index };78}7980export function notebookCellToCellData(cell: NotebookCell): NotebookCellData {81const cellData = new NotebookCellData(cell.kind, cell.document.getText(), cell.document.languageId);82cellData.metadata = cell.metadata;83cellData.executionSummary = cell.executionSummary;84if (cell.outputs.length) {85cellData.outputs = [...cell.outputs];86}87return cellData;88}8990export function getCellIdMap(notebook: NotebookDocument): Map<string, NotebookCell> {91const cellIdMap = new Map<string, NotebookCell>();92notebook.getCells().forEach(cell => {93cellIdMap.set(getCellId(cell), cell);94});95return cellIdMap;96}9798const cellIdCache = new WeakMap<NotebookCell, string>();99100/** The length of the hash portion of cell IDs */101const CELL_ID_HASH_LENGTH = 8;102103/** Use a unique enough cell id prefix so that we can easily identify cell ids*/104const CELL_ID_PREFIX = '#VSC-';105106/** RegExp to match all Cell Ids */107export const CellIdPatternRe = new RegExp(`(\\s+|^|\\b|\\W)(#VSC-[a-f0-9]{${CELL_ID_HASH_LENGTH}})\\b`, 'gi');108109/**110* Sometimes the model may return a cellId that is not in the expected format.111* This function attempts to convert such cellIds to the expected format.112*/113export function normalizeCellId(cellId: string): string {114if (cellId.startsWith(CELL_ID_PREFIX)) {115return cellId;116}117if (cellId.startsWith('VSC-')) {118return `#${cellId}`;119}120if (cellId.startsWith('#V-') && cellId.length === (CELL_ID_HASH_LENGTH + 3)) {121return `${CELL_ID_PREFIX}${cellId.substring(3)}`;122}123if (cellId.toLowerCase().startsWith('vscode-') && cellId.length === (CELL_ID_HASH_LENGTH + 7)) {124return `${CELL_ID_PREFIX}${cellId.substring(7)}`;125}126if (cellId.startsWith('-')) {127return `#VSC${cellId}`;128}129// Possible case where the cellId is just a hash without the prefix130return cellId.length === CELL_ID_HASH_LENGTH ? `${CELL_ID_PREFIX}${cellId}` : cellId;131}132133const notebookIdCache = new WeakMap<NotebookDocument, string>();134export function getNotebookId(notebook: NotebookDocument): string {135let id = notebookIdCache.get(notebook);136if (id) {137return id;138}139const hash = new StringSHA1();140hash.update(notebook.uri.toString());141id = hash.digest();142notebookIdCache.set(notebook, id);143return id;144}145146/**147* Given a Notebook cell returns a unique identifier for the cell.148* The identifier is based on the cell's URI and is cached for performance.149* This is useful for tracking cells across sessions or for referencing cells in a consistent manner.150* The cell Id will have a specicial prefix as well do as to easily identify it as a cell Id.151*/152export function getCellId(cell: NotebookCell): string {153let oldId = cellIdCache.get(cell);154if (oldId) {155return oldId;156}157const hash = new StringSHA1();158hash.update(cell.document.uri.toString());159oldId = `${CELL_ID_PREFIX}${hash.digest().substring(0, CELL_ID_HASH_LENGTH)}`;160cellIdCache.set(cell, oldId);161return oldId;162}163164function getCellCode(document: TextDocument): string[] {165if (document.lineCount === 0) {166return [];167}168return new Array(document.lineCount).fill('').map((_, i) => document.lineAt(i).text);169}170171export function getDefaultLanguage(notebook: NotebookDocument): string | undefined {172const codeCell = notebook.getCells().find(cell => cell.kind === NotebookCellKind.Code);173if (codeCell) {174return codeCell.document.languageId;175}176// Fallback for Jupyter Notebooks that do not have a code cell.177if (notebook.notebookType === 'jupyter-notebook') {178return notebook.metadata?.language_info?.name || notebook.metadata?.kernelspec?.language || 'python';179}180}181182183const notebookTermsToLookFor = ['jupyter', 'notebook', 'cell.', 'cells.', ' cell ', 'cells', 'notebook cell'];184export function requestHasNotebookRefs(request: ChatRequest, notebookService: INotebookService, options?: { checkPromptAsWell: boolean }): boolean {185const prompt = (request.prompt || '').toLowerCase();186if (options?.checkPromptAsWell && notebookTermsToLookFor.some(term => prompt.includes(term))) {187return true;188}189return request.references.some(ref => {190if (isLocation(ref.value)) {191return notebookService.hasSupportedNotebooks(ref.value.uri);192}193if (isUriComponents(ref.value)) {194return notebookService.hasSupportedNotebooks(URI.revive(ref.value));195}196if (isUri(ref.value)) {197return notebookService.hasSupportedNotebooks(ref.value);198}199return false;200});201}202203export function parseAndCleanStack(jsonString: string): string {204try {205// Parse the JSON string206const parsed = JSON.parse(jsonString) as Partial<Error>;207return removeAnsiEscapeCodes(parsed?.stack || parsed.message || '') || parsed.message || parsed.name || jsonString;208} catch {209return jsonString; // Return the original string if parsing fails210}211}212213214