Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/util/common/markdown.ts
13397 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import MarkdownIt = require('markdown-it');
7
import { Lazy } from '../vs/base/common/lazy';
8
import { extname } from '../vs/base/common/resources';
9
import { escapeRegExpCharacters } from '../vs/base/common/strings';
10
import { URI } from '../vs/base/common/uri';
11
import { getLanguage, wellKnownLanguages } from './languages';
12
13
/**
14
*
15
* @param code A block of source code that might contain markdown code block fences
16
* @returns A fence with the required number of backticks to avoid prematurely terminating the code block
17
*/
18
export function getFenceForCodeBlock(code: string, minNumberOfBackticks = 3) {
19
const backticks = code.matchAll(/^\s*(```+)/gm);
20
const backticksNeeded = Math.max(minNumberOfBackticks, ...Array.from(backticks, d => d[1].length + 1));
21
return '`'.repeat(backticksNeeded);
22
}
23
24
export const filepathCodeBlockMarker = 'filepath:';
25
26
export function createFilepathRegexp(languageId?: string): RegExp {
27
const language = getLanguage(languageId);
28
const prefixes: string[] = ['#', '\\/\\/']; // always allow # and // as comment start
29
const suffixes: string[] = [];
30
function add(lineComment: { start: string; end?: string }) {
31
prefixes.push(escapeRegExpCharacters(lineComment.start));
32
if (lineComment.end) {
33
suffixes.push(escapeRegExpCharacters(lineComment.end));
34
}
35
}
36
add(language.lineComment);
37
language.alternativeLineComments?.forEach(add);
38
const startMatch = `(?:${prefixes.join('|')})`;
39
const optionalEndMatch = suffixes.length ? `(?:\\s*${suffixes.join('|')})?` : '';
40
return new RegExp(`^\\s*${startMatch}\\s*${filepathCodeBlockMarker}\\s*(.*?)${optionalEndMatch}\\s*$`);
41
}
42
43
/**
44
* Create a markdown code block with an optional language id and an optional file path.
45
* @param filePath The file path to include in the code block. To create the file path use the {@link IPromptPathRepresentationService}
46
*/
47
export function createFencedCodeBlock(languageId: string, code: string, shouldTrim = true, filePath?: string, minNumberOfBackticksOrStyle: string | number = 3): string {
48
const fence = typeof minNumberOfBackticksOrStyle === 'number'
49
? getFenceForCodeBlock(code, minNumberOfBackticksOrStyle)
50
: minNumberOfBackticksOrStyle;
51
52
let filepathComment = '';
53
if (filePath) {
54
filepathComment = getFilepathComment(languageId, filePath);
55
}
56
57
return `${fence}${fence && (languageIdToMDCodeBlockLang(languageId) + '\n')}${filepathComment}${shouldTrim ? code.trim() : code}${fence && ('\n' + fence)}`;
58
}
59
60
export function getFilepathComment(languageId: string, filePath: string): string {
61
const language = getLanguage(languageId);
62
const { start, end } = language.lineComment;
63
return end ? `${start} ${filepathCodeBlockMarker} ${filePath} ${end}\n` : `${start} ${filepathCodeBlockMarker} ${filePath}\n`;
64
}
65
66
export function removeLeadingFilepathComment(codeblock: string, languageId: string, filepath: string): string {
67
const filepathComment = getFilepathComment(languageId, filepath);
68
if (codeblock.startsWith(filepathComment)) {
69
return codeblock.substring(filepathComment.length);
70
}
71
72
return codeblock;
73
}
74
75
export function languageIdToMDCodeBlockLang(languageId: string): string {
76
const language = getLanguage(languageId);
77
return language?.markdownLanguageIds?.[0] ?? languageId;
78
}
79
80
const mdLanguageIdToLanguageId = new Lazy(() => {
81
const result = new Map<string, string>();
82
wellKnownLanguages.forEach((language, languageId) => {
83
if (language.markdownLanguageIds) {
84
language.markdownLanguageIds.forEach(mdLanguageId => {
85
result.set(mdLanguageId, languageId);
86
});
87
} else {
88
result.set(languageId, languageId);
89
}
90
});
91
return result;
92
});
93
94
export function mdCodeBlockLangToLanguageId(mdLanguageId: string): string | undefined {
95
return mdLanguageIdToLanguageId.value.get(mdLanguageId);
96
}
97
98
export function getLanguageId(uri: URI) {
99
const ext = extname(uri).toLowerCase();
100
101
return Object.keys(wellKnownLanguages).find(id => {
102
return wellKnownLanguages.get(id)?.extensions?.includes(ext);
103
}) || ext.replace(/^\./, '');
104
}
105
106
export function getMdCodeBlockLanguage(uri: URI) {
107
const languageId = getLanguageId(uri);
108
109
return languageIdToMDCodeBlockLang(languageId);
110
}
111
112
export interface MarkdownCodeBlock {
113
/** The fence characters used to start the block. */
114
readonly startMarkup: string;
115
116
/** The markdown language id of the code block, e.g. 'typescript'. May be empty */
117
readonly language: string;
118
119
/** The code content of the block. */
120
readonly code: string;
121
122
readonly startLine: number;
123
readonly endLine: number;
124
}
125
126
export function extractCodeBlocks(text: string): MarkdownCodeBlock[] {
127
const out: MarkdownCodeBlock[] = [];
128
const md = new MarkdownIt();
129
const tokens = md.parse(text, {});
130
for (const token of flattenTokensLists(tokens)) {
131
if (token.map && token.type === 'fence') {
132
out.push({
133
startMarkup: token.markup,
134
// Trim trailing newline since this is always included
135
code: token.content.replace(/\n$/, ''),
136
language: token.info.trim(),
137
startLine: token.map[0],
138
endLine: token.map[1],
139
});
140
}
141
}
142
return out;
143
}
144
145
export function extractInlineCode(text: string): string[] {
146
const out: string[] = [];
147
const md = new MarkdownIt();
148
const tokens = md.parse(text, {});
149
for (const token of flattenTokensLists(tokens)) {
150
if (token.type === 'code_inline') {
151
out.push(token.content.replace(/\n$/, ''));
152
}
153
}
154
return out;
155
}
156
157
function* flattenTokensLists(tokensList: readonly MarkdownIt.Token[]): Iterable<MarkdownIt.Token> {
158
for (const entry of tokensList) {
159
if (entry.children) {
160
yield* flattenTokensLists(entry.children);
161
}
162
yield entry;
163
}
164
}
165
166