Path: blob/main/extensions/copilot/src/util/common/markdown.ts
13397 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import MarkdownIt = require('markdown-it');6import { Lazy } from '../vs/base/common/lazy';7import { extname } from '../vs/base/common/resources';8import { escapeRegExpCharacters } from '../vs/base/common/strings';9import { URI } from '../vs/base/common/uri';10import { getLanguage, wellKnownLanguages } from './languages';1112/**13*14* @param code A block of source code that might contain markdown code block fences15* @returns A fence with the required number of backticks to avoid prematurely terminating the code block16*/17export function getFenceForCodeBlock(code: string, minNumberOfBackticks = 3) {18const backticks = code.matchAll(/^\s*(```+)/gm);19const backticksNeeded = Math.max(minNumberOfBackticks, ...Array.from(backticks, d => d[1].length + 1));20return '`'.repeat(backticksNeeded);21}2223export const filepathCodeBlockMarker = 'filepath:';2425export function createFilepathRegexp(languageId?: string): RegExp {26const language = getLanguage(languageId);27const prefixes: string[] = ['#', '\\/\\/']; // always allow # and // as comment start28const suffixes: string[] = [];29function add(lineComment: { start: string; end?: string }) {30prefixes.push(escapeRegExpCharacters(lineComment.start));31if (lineComment.end) {32suffixes.push(escapeRegExpCharacters(lineComment.end));33}34}35add(language.lineComment);36language.alternativeLineComments?.forEach(add);37const startMatch = `(?:${prefixes.join('|')})`;38const optionalEndMatch = suffixes.length ? `(?:\\s*${suffixes.join('|')})?` : '';39return new RegExp(`^\\s*${startMatch}\\s*${filepathCodeBlockMarker}\\s*(.*?)${optionalEndMatch}\\s*$`);40}4142/**43* Create a markdown code block with an optional language id and an optional file path.44* @param filePath The file path to include in the code block. To create the file path use the {@link IPromptPathRepresentationService}45*/46export function createFencedCodeBlock(languageId: string, code: string, shouldTrim = true, filePath?: string, minNumberOfBackticksOrStyle: string | number = 3): string {47const fence = typeof minNumberOfBackticksOrStyle === 'number'48? getFenceForCodeBlock(code, minNumberOfBackticksOrStyle)49: minNumberOfBackticksOrStyle;5051let filepathComment = '';52if (filePath) {53filepathComment = getFilepathComment(languageId, filePath);54}5556return `${fence}${fence && (languageIdToMDCodeBlockLang(languageId) + '\n')}${filepathComment}${shouldTrim ? code.trim() : code}${fence && ('\n' + fence)}`;57}5859export function getFilepathComment(languageId: string, filePath: string): string {60const language = getLanguage(languageId);61const { start, end } = language.lineComment;62return end ? `${start} ${filepathCodeBlockMarker} ${filePath} ${end}\n` : `${start} ${filepathCodeBlockMarker} ${filePath}\n`;63}6465export function removeLeadingFilepathComment(codeblock: string, languageId: string, filepath: string): string {66const filepathComment = getFilepathComment(languageId, filepath);67if (codeblock.startsWith(filepathComment)) {68return codeblock.substring(filepathComment.length);69}7071return codeblock;72}7374export function languageIdToMDCodeBlockLang(languageId: string): string {75const language = getLanguage(languageId);76return language?.markdownLanguageIds?.[0] ?? languageId;77}7879const mdLanguageIdToLanguageId = new Lazy(() => {80const result = new Map<string, string>();81wellKnownLanguages.forEach((language, languageId) => {82if (language.markdownLanguageIds) {83language.markdownLanguageIds.forEach(mdLanguageId => {84result.set(mdLanguageId, languageId);85});86} else {87result.set(languageId, languageId);88}89});90return result;91});9293export function mdCodeBlockLangToLanguageId(mdLanguageId: string): string | undefined {94return mdLanguageIdToLanguageId.value.get(mdLanguageId);95}9697export function getLanguageId(uri: URI) {98const ext = extname(uri).toLowerCase();99100return Object.keys(wellKnownLanguages).find(id => {101return wellKnownLanguages.get(id)?.extensions?.includes(ext);102}) || ext.replace(/^\./, '');103}104105export function getMdCodeBlockLanguage(uri: URI) {106const languageId = getLanguageId(uri);107108return languageIdToMDCodeBlockLang(languageId);109}110111export interface MarkdownCodeBlock {112/** The fence characters used to start the block. */113readonly startMarkup: string;114115/** The markdown language id of the code block, e.g. 'typescript'. May be empty */116readonly language: string;117118/** The code content of the block. */119readonly code: string;120121readonly startLine: number;122readonly endLine: number;123}124125export function extractCodeBlocks(text: string): MarkdownCodeBlock[] {126const out: MarkdownCodeBlock[] = [];127const md = new MarkdownIt();128const tokens = md.parse(text, {});129for (const token of flattenTokensLists(tokens)) {130if (token.map && token.type === 'fence') {131out.push({132startMarkup: token.markup,133// Trim trailing newline since this is always included134code: token.content.replace(/\n$/, ''),135language: token.info.trim(),136startLine: token.map[0],137endLine: token.map[1],138});139}140}141return out;142}143144export function extractInlineCode(text: string): string[] {145const out: string[] = [];146const md = new MarkdownIt();147const tokens = md.parse(text, {});148for (const token of flattenTokensLists(tokens)) {149if (token.type === 'code_inline') {150out.push(token.content.replace(/\n$/, ''));151}152}153return out;154}155156function* flattenTokensLists(tokensList: readonly MarkdownIt.Token[]): Iterable<MarkdownIt.Token> {157for (const entry of tokensList) {158if (entry.children) {159yield* flattenTokensLists(entry.children);160}161yield entry;162}163}164165166