Path: blob/main/extensions/copilot/src/extension/codeBlocks/node/test/codeBlockProcessor.spec.ts
13403 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { assert } from 'chai';6import { suite, test } from 'vitest';7import type { ChatVulnerability } from 'vscode';8import { URI } from '../../../../util/vs/base/common/uri';9import { MarkdownString } from '../../../../vscodeTypes';10import { CodeBlock } from '../../../prompt/common/conversation';11import { CodeBlockInfo, CodeBlockProcessor, LineProcessor } from '../codeBlockProcessor';1213suite('CodeBlockProcessor', () => {1415type ReportedMarkdown = { markdown: string; codeBlock: CodeBlockInfo | undefined; vulnerabilities: ChatVulnerability[] | undefined };1617function newCodeBlockProcessor(reportedCodeblocks: CodeBlock[] = [], reportedMarkdown: ReportedMarkdown[] = [], lineProcessor?: LineProcessor) {18return new CodeBlockProcessor(19(path) => URI.file(path),20(markdown, codeBlockInfo, vulnerabilities) => reportedMarkdown.push({ markdown: markdown.value, codeBlock: codeBlockInfo, vulnerabilities }),21(codeblock) => reportedCodeblocks.push(codeblock),22lineProcessor23);24}2526test('append multi line text', () => {27const reportedCodeblocks: CodeBlock[] = [];28const reportedMarkdown: ReportedMarkdown[] = [];29const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);3031const lines = [32'hello\n',33'```ts\n',34'console.log("Hello, world!");\n',35'```'36].join('');37tracker.processMarkdown(lines, undefined);38tracker.flush();3940assert.deepEqual(reportedCodeblocks[0], {41code: 'console.log("Hello, world!");\n',42markdownBeforeBlock: 'hello\n',43language: 'ts',44resource: undefined45});4647const resource = undefined;48const language = 'ts';4950assert.deepEqual(reportedMarkdown, [51{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },52{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },53{ markdown: 'console.log("Hello, world!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },54{ markdown: '```', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },55]);5657});5859test('append muliple lines', () => {60const reportedCodeblocks: CodeBlock[] = [];61const reportedMarkdown: ReportedMarkdown[] = [];6263const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);64[65'hello\n',66'```ts\n',67'console.log("Hello!");\n',68'console.log("World!");\n',69'```'70].forEach(line => tracker.processMarkdown(line, undefined));71tracker.flush();7273const resource = undefined;74const language = 'ts';7576assert.deepEqual(reportedCodeblocks[0], {77code: 'console.log("Hello!");\nconsole.log("World!");\n',78markdownBeforeBlock: 'hello\n',79language,80resource81});8283assert.deepEqual(reportedMarkdown, [84{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },85{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },86{ markdown: 'console.log("Hello!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },87{ markdown: 'console.log("World!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },88{ markdown: '```', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },89]);9091});9293test('append muliple partial lines', () => {94const reportedCodeblocks: CodeBlock[] = [];95const reportedMarkdown: ReportedMarkdown[] = [];9697const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);98[99'he', 'llo\n',100'```', 'ts\n',101'console', '.log("Hello!");\nconsole',102'.log("World!");\n',103'```'104].forEach(line => tracker.processMarkdown(line, undefined));105tracker.flush();106107const resource = undefined;108const language = 'ts';109110assert.deepEqual(reportedCodeblocks[0], {111code: 'console.log("Hello!");\nconsole.log("World!");\n',112markdownBeforeBlock: 'hello\n',113language,114resource115});116117assert.deepEqual(reportedMarkdown, [118{ markdown: 'he', codeBlock: undefined, vulnerabilities: undefined },119{ markdown: 'llo\n', codeBlock: undefined, vulnerabilities: undefined },120{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },121{ markdown: 'console.log("Hello!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },122{ markdown: 'console', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },123{ markdown: '.log("World!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },124{ markdown: '```', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },125]);126});127128test('append partial lines 1', () => {129const reportedCodeblocks: CodeBlock[] = [];130const reportedMarkdown: ReportedMarkdown[] = [];131132const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);133[134'12', '345\n',135'`123`', '```456```\n',136'`', '`', '123\n``\n',137'`', '`', '`', 'ts\n',138'# filepath: /project/foo\n',139'`', '`', '123``',140'456\n',141'```'142].forEach(line => tracker.processMarkdown(line, undefined));143tracker.flush();144145const resource = URI.file('/project/foo');146const language = 'ts';147148assert.deepEqual(reportedCodeblocks[0], {149code: '``123``456\n',150markdownBeforeBlock: '12345\n`123````456```\n``123\n``\n',151language,152resource153});154155assert.deepEqual(reportedMarkdown, [156{ markdown: '12', codeBlock: undefined, vulnerabilities: undefined },157{ markdown: '345\n', codeBlock: undefined, vulnerabilities: undefined },158{ markdown: '`123`', codeBlock: undefined, vulnerabilities: undefined },159{ markdown: '```456```\n', codeBlock: undefined, vulnerabilities: undefined },160{ markdown: '``123\n', codeBlock: undefined, vulnerabilities: undefined },161{ markdown: '``\n', codeBlock: undefined, vulnerabilities: undefined },162{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },163{ markdown: '``123``456\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },164{ markdown: '```', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },165]);166});167168169test('multiple code blocks', () => {170const reportedCodeblocks: CodeBlock[] = [];171const reportedMarkdown: ReportedMarkdown[] = [];172173const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);174tracker.processMarkdown([175'hello\n',176'```ts\n',177'console.log("Hello, world!");\n',178'```\n',179'more\n',180'more\n',181'more\n',182'```ts\n',183'console.log("more");\n',184'```'185].join(''));186tracker.flush();187188const language = 'ts';189const resource = undefined;190191assert.deepEqual(reportedCodeblocks[0], {192code: 'console.log("Hello, world!");\n',193markdownBeforeBlock: 'hello\n',194language,195resource196});197assert.deepEqual(reportedCodeblocks[1], {198code: 'console.log("more");\n',199markdownBeforeBlock: 'more\nmore\nmore\n',200language,201resource202});203204205assert.deepEqual(reportedMarkdown, [206{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },207{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },208{ markdown: 'console.log("Hello, world!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },209{ markdown: '```\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },210{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },211{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },212{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },213{ markdown: '```ts\n', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },214{ markdown: 'console.log("more");\n', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },215{ markdown: '```', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },216]);217218219});220221test('code blocks with tildes', () => {222const reportedCodeblocks: CodeBlock[] = [];223const reportedMarkdown: ReportedMarkdown[] = [];224225const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);226227tracker.processMarkdown([228'~~~ts\n',229'// using tilde\n',230'~~~\n',231'````ts\n',232'// using 4 backticks\n',233'````\n',234].join(''));235tracker.flush();236237const resource = undefined;238const language = 'ts';239240assert.deepEqual(reportedCodeblocks[0], {241code: '// using tilde\n',242markdownBeforeBlock: '',243language,244resource245});246assert.deepEqual(reportedCodeblocks[1], {247code: '// using 4 backticks\n',248markdownBeforeBlock: '',249language,250resource251});252253assert.deepEqual(reportedMarkdown, [254{ markdown: '~~~ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },255{ markdown: '// using tilde\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },256{ markdown: '~~~\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },257{ markdown: '````ts\n', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },258{ markdown: '// using 4 backticks\n', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },259{ markdown: '````\n', codeBlock: { index: 1, resource, language }, vulnerabilities: undefined },260]);261});262263test('nested code blocks', () => {264const reportedCodeblocks: CodeBlock[] = [];265const reportedMarkdown: ReportedMarkdown[] = [];266267const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);268tracker.processMarkdown([269'````ts\n',270'// using 4 backticks\n',271'```ts\n',272'// nested using 3 backticks\n',273'```\n',274'````\n',275].join(''));276tracker.flush();277278const resource = undefined;279const language = 'ts';280281assert.deepEqual(reportedCodeblocks[0], {282code: [283'// using 4 backticks\n',284'```ts\n',285'// nested using 3 backticks\n',286'```\n',287].join(''),288markdownBeforeBlock: '',289language,290resource291});292293assert.deepEqual(reportedMarkdown, [294{ markdown: '````ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },295{ markdown: '// using 4 backticks\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },296{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },297{ markdown: '// nested using 3 backticks\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },298{ markdown: '```\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },299{ markdown: '````\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },300]);301});302303test('file marker', () => {304const reportedCodeblocks: CodeBlock[] = [];305const reportedMarkdown: ReportedMarkdown[] = [];306307const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);308tracker.processMarkdown([309'hello\n',310'```ts\n',311'// filepath: /project/foo0\n',312'console.log("Hello, world!");\n',313'```\n',314'more\n',315'more\n',316'more\n',317'```html\n',318'<!-- filepath: /project/foo1 -->\n',319'<html>more</html>\n',320'```'321].join(''));322tracker.flush();323324const resource0 = URI.file('/project/foo0');325const language0 = 'ts';326const resource1 = URI.file('/project/foo1');327const language1 = 'html';328329assert.deepEqual(reportedCodeblocks[0], {330code: 'console.log("Hello, world!");\n',331markdownBeforeBlock: 'hello\n',332language: language0,333resource: resource0334});335assert.deepEqual(reportedCodeblocks[1], {336code: '<html>more</html>\n',337markdownBeforeBlock: 'more\nmore\nmore\n',338language: language1,339resource: resource1340});341342343344assert.deepEqual(reportedMarkdown, [345{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },346{ markdown: '```ts\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },347{ markdown: 'console.log("Hello, world!");\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },348{ markdown: '```\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },349{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },350{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },351{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },352{ markdown: '```html\n', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },353{ markdown: '<html>more</html>\n', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },354{ markdown: '```', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },355]);356});357358test('new line after file marker', () => {359const reportedCodeblocks: CodeBlock[] = [];360const reportedMarkdown: ReportedMarkdown[] = [];361362const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);363tracker.processMarkdown([364'hello\n',365'```ts\n',366'// filepath: /project/foo0\n',367'\n',368'console.log("Hello, world!");\n',369'```\n',370'more\n',371'more\n',372'more\n',373'```html\n',374'<!-- filepath: /project/foo1 -->\n',375'\n',376'\n',377'<html>more</html>\n',378'```'379].join(''));380tracker.flush();381382const resource0 = URI.file('/project/foo0');383const language0 = 'ts';384const resource1 = URI.file('/project/foo1');385const language1 = 'html';386387assert.deepEqual(reportedCodeblocks[0], {388code: 'console.log("Hello, world!");\n',389markdownBeforeBlock: 'hello\n',390language: language0,391resource: resource0392});393assert.deepEqual(reportedCodeblocks[1], {394code: '\n<html>more</html>\n',395markdownBeforeBlock: 'more\nmore\nmore\n',396language: language1,397resource: resource1398});399400assert.deepEqual(reportedMarkdown, [401{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },402{ markdown: '```ts\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },403{ markdown: 'console.log("Hello, world!");\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },404{ markdown: '```\n', codeBlock: { index: 0, resource: resource0, language: language0 }, vulnerabilities: undefined },405{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },406{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },407{ markdown: 'more\n', codeBlock: undefined, vulnerabilities: undefined },408{ markdown: '```html\n', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },409{ markdown: '\n', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },410{ markdown: '<html>more</html>\n', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },411{ markdown: '```', codeBlock: { index: 1, resource: resource1, language: language1 }, vulnerabilities: undefined },412]);413});414415416test('file marker reported', () => {417const reportedCodeblocks: CodeBlock[] = [];418const reportedMarkdown: ReportedMarkdown[] = [];419420const resource = URI.file('/project/foo');421422const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);423tracker.processMarkdown([424'hello\n',425'```ts\n',426].join(''));427tracker.processCodeblockUri(resource);428tracker.processMarkdown([429'console.log("Hello, world!");\n',430'```\n',431].join(''));432tracker.flush();433434const language = 'ts';435436assert.deepEqual(reportedCodeblocks[0], {437code: 'console.log("Hello, world!");\n',438markdownBeforeBlock: 'hello\n',439language: language,440resource: resource441});442443assert.deepEqual(reportedMarkdown, [444{ markdown: 'hello\n', codeBlock: undefined, vulnerabilities: undefined },445{ markdown: '```ts\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },446{ markdown: 'console.log("Hello, world!");\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },447{ markdown: '```\n', codeBlock: { index: 0, resource, language }, vulnerabilities: undefined },448]);449450});451452test('nested codeblocks with the same separator', () => {453const reportedCodeblocks: CodeBlock[] = [];454const reportedMarkdown: ReportedMarkdown[] = [];455456const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown);457const lines = [458'```markdown\n',459'# Example Markdown Document\n',460'\n',461'This is an example of a Markdown document that contains a code block.\n',462'\n',463'## Code Block\n',464'\n',465'Here is a code block in TypeScript:\n',466'\n',467'```typescript\n',468'// Generated by Copilot\n',469'class Example {\n',470' private _value: number;\n',471'}\n',472'```\n',473'```\n'474];475tracker.processMarkdown(lines.join(''));476tracker.flush();477478const resource = undefined;479const language = 'markdown';480481assert.deepEqual(reportedCodeblocks[0], {482code: lines.slice(1, lines.length - 1).join(''),483markdownBeforeBlock: '',484language: language,485resource: resource486});487488assert.deepEqual(reportedMarkdown, lines.map(markdown => ({ markdown, codeBlock: { index: 0, resource, language }, vulnerabilities: undefined })));489490491});492493test('line handler', () => {494const reportedCodeblocks: CodeBlock[] = [];495const reportedMarkdown: ReportedMarkdown[] = [];496497const lineProcessor = {498matchesLineStart(linePart: string, inCodeBlock: boolean): boolean {499return linePart.startsWith('###'.substring(0, linePart.length));500},501process(line: MarkdownString, inCodeBlock: boolean): MarkdownString {502return new MarkdownString(inCodeBlock ? line.value.toLowerCase() : line.value.toUpperCase());503}504};505506const tracker = newCodeBlockProcessor(reportedCodeblocks, reportedMarkdown, lineProcessor);507const lines = [508'# Big Header\n',509'### Example Header\n',510'\n',511'This is an example of a Markdown document that contains a code block.\n',512'\n',513'#### Outside\n',514'\n',515'Here is a code block:\n',516'\n',517'```markdown\n', // line 9518'# Unrelated\n',519'## Unrelated\n',520'### Inside\n',521'```\n',522];523// process character by character to simulate streaming524lines.join('').split('').forEach(s =>525tracker.processMarkdown(s)526);527tracker.flush();528529const resource = undefined;530const language = 'markdown';531532const expectedLines = [...lines];533expectedLines[1] = '### EXAMPLE HEADER\n';534expectedLines[5] = '#### OUTSIDE\n';535expectedLines[12] = '### inside\n';536537538assert.deepEqual(reportedCodeblocks[0], {539code: expectedLines.slice(10, 13).join(''),540markdownBeforeBlock: expectedLines.slice(0, 9).join(''),541language: language,542resource: resource543});544545assert.deepEqual(reportedMarkdown.map(m => m.markdown).join(''), expectedLines.join(''));546547548});549550});551552553