Path: blob/main/extensions/copilot/src/extension/prompt/node/editFromDiffGeneration.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../util/vs/base/common/charCode';6import { Lines, LinesEdit } from './editGeneration';7import { IGuessedIndentation, computeIndentLevel2, guessIndentation } from './indentationGuesser';8910export interface Reporter {11recovery(originalLine: number, newLine: number): void;12warning(message: string): void;13}1415export function createEditsFromRealDiff(code: Lines, diff: Lines, reporter?: Reporter): LinesEdit[] {16const edits: LinesEdit[] = [];17let diffLineIndex = findChuck(diff);18if (diffLineIndex === -1) {19reporter?.warning('No chunk header found in the diff.');20diffLineIndex = 0;21}22function handleLineContentMismatch(diffLine: string, code: Lines, originalLineIndex: number): boolean {23for (let i = originalLineIndex; i < code.length; i++) {24if (code[i] === diffLine) {25reporter?.recovery(originalLineIndex, i);26originalLineIndex = i;27return true;28}29}30for (let i = originalLineIndex - 1; i >= 0; i--) {31if (code[i] === diffLine) {32reporter?.recovery(originalLineIndex, i);33originalLineIndex = i;34return true;35}36}37reporter?.warning(`Diff line does not match original content: Not found,`);38return false;39}404142let originalLineIndex = 0;43while (diffLineIndex < diff.length && originalLineIndex <= code.length) {44const diffLine = diff[diffLineIndex];45if (diffLine.length === 0) {46break;47}48const firstChar = diffLine.charCodeAt(0);49switch (firstChar) {50case CharCode.AtSign: {51const match = /^@@ -(\d+),?\d* \+(\d+),?\d* @@/.exec(diffLine);52if (match) {53const originalLineHint = parseInt(match[1]);54originalLineIndex = originalLineHint - 1;55} else {56reporter?.warning(`Invalid chunk header found in the diff: ${diffLine}`);57}58break;59}60case CharCode.Plus: {61const noEOL = isNextLineNoEOLMarker(diff, diffLineIndex);62edits.push(new LinesEdit(originalLineIndex, originalLineIndex, [diffLine.substring(1)], '', noEOL ? '' : '\n'));63break;64}65case CharCode.Dash:66if (diffLine.substring(1) !== code[originalLineIndex]) {67if (!handleLineContentMismatch(diffLine.substring(1), code, originalLineIndex)) {68break; // don't do the delete69}70}71edits.push(new LinesEdit(originalLineIndex, originalLineIndex + 1, []));72originalLineIndex++;73break;74case CharCode.Backslash: {75// ignore, already handled for insert, and not relevant for delete76break;77}78default: {79if (diffLine.substring(1) === code[originalLineIndex] || diffLine === code[originalLineIndex]) {80originalLineIndex++;81} else {82if (handleLineContentMismatch(diffLine.substring(1), code, originalLineIndex)) {83originalLineIndex++;84}85}86break;87}88}89diffLineIndex++;90}91return edits;92}9394function isNextLineNoEOLMarker(diff: Lines, i: number): boolean {95return i + 1 < diff.length && diff[i + 1].charCodeAt(0) === CharCode.Backslash;96}9798function findChuck(diff: Lines): number {99for (let i = 0; i < diff.length; i++) {100if (diff[i].startsWith('@@')) {101return i;102}103}104return -1;105}106107enum Match {108No,109Yes,110Similar111}112113export function createEditsFromPseudoDiff(code: Lines, diff: Lines, reporter?: Reporter): LinesEdit[] {114115const diffLineInfos = getLineInfos(diff);116117const codeIndentInfo = guessIndentation(code, 4, false);118119const diffTabSize = getTabSize(diffLineInfos);120121let indentDiff: number | undefined;122123function compareLine(diffLineInfo: LineInfo, codeLine: string): Match {124const diffLine = diffLineInfo.content;125const codeIndentLength = getIndentLength(codeLine);126let i = diffLineInfo.indentLength, k = codeIndentLength;127let charactersMatched = 0;128129// ignore the leading indentation130while (i < diffLine.length && k < codeLine.length) {131if (diffLine.charCodeAt(i) !== codeLine.charCodeAt(k)) {132break;133}134i++;135k++;136charactersMatched++;137}138if (i < diffLine.length || k < codeLine.length) {139return (((codeLine.length - codeIndentLength) * 3 / 4 < charactersMatched) && ((diffLine.length - diffLineInfo.indentLength) * 3 / 4 < charactersMatched)) ? Match.Similar : Match.No;140}141if (indentDiff === undefined) {142const diffIndent = computeIndentLevel2(diffLine, diffTabSize);143if (diffIndent >= 0) {144const codeIndent = computeIndentLevel2(codeLine, codeIndentInfo.tabSize);145indentDiff = codeIndent - diffIndent;146}147}148return Match.Yes;149}150function handleLineContentMismatch(diffLineInfo: LineInfo, code: Lines, originalLineIndex: number): number {151for (let i = originalLineIndex; i < code.length; i++) {152if (compareLine(diffLineInfo, code[i]) === Match.Yes) {153reporter?.recovery(originalLineIndex, i);154return i;155}156}157reporter?.warning('Unable to find a matching line for the diff line: ' + diffLineInfo.content);158return -1;159}160161function findFirstOccurrenceOfLine(diffLineInfo: LineInfo, code: Lines): number {162for (let i = 0; i < code.length; i++) {163if (compareLine(diffLineInfo, code[i]) === Match.Yes) {164return i;165}166}167return 0;168}169170const edits: LinesEdit[] = [];171let diffLineIndex = 0;172let originalLineIndex = 0;173if (diffLineInfos.length > 0) {174originalLineIndex = findFirstOccurrenceOfLine(diffLineInfos[0], code);175}176while (diffLineIndex < diffLineInfos.length && originalLineIndex < code.length) {177const diffLineInfo = diffLineInfos[diffLineIndex];178switch (diffLineInfo.op) {179case Op.Insert: {180const codeLineContent = adjustIndenation(diffLineInfo, diffTabSize, indentDiff ?? 0, codeIndentInfo);181edits.push(new LinesEdit(originalLineIndex, originalLineIndex, [codeLineContent]));182break;183}184case Op.Delete: {185const codeLine = code[originalLineIndex];186const match = compareLine(diffLineInfo, codeLine);187if (match === Match.No) {188const line = handleLineContentMismatch(diffLineInfo, code, originalLineIndex);189if (line !== -1) {190originalLineIndex = line;191} else {192break; // do not delete193}194}195const nextDiffLine = diffLineInfos[diffLineIndex + 1];196if (nextDiffLine?.op === Op.Insert) {197if (nextDiffLine.indentLength === diffLineInfo.indentLength) {198// special handling of the case where an insert follows the remove and they use the same indentation199const newContent = getIndent(codeLine) + nextDiffLine.content.substring(nextDiffLine.indentLength);200edits.push(new LinesEdit(originalLineIndex, originalLineIndex + 1, [newContent]));201diffLineIndex++;202originalLineIndex++;203break;204}205}206edits.push(new LinesEdit(originalLineIndex, originalLineIndex + 1, []));207originalLineIndex++;208break;209}210default: {211const match = compareLine(diffLineInfo, code[originalLineIndex]);212if (match === Match.No) {213const line = handleLineContentMismatch(diffLineInfo, code, originalLineIndex);214if (line !== -1) {215originalLineIndex = line;216} else {217break; // do not increase originalLineIndex218}219} else if (match === Match.Similar) {220const codeLineContent = adjustIndenation(diffLineInfo, diffTabSize, indentDiff ?? 0, codeIndentInfo);221edits.push(new LinesEdit(originalLineIndex, originalLineIndex + 1, [codeLineContent]));222}223originalLineIndex++;224break;225}226}227diffLineIndex++;228}229if (originalLineIndex === code.length && diffLineIndex < diffLineInfos.length) {230// there are still some lines to add231for (; diffLineIndex < diffLineInfos.length; diffLineIndex++) {232const diffLineInfo = diffLineInfos[diffLineIndex];233if (diffLineInfo.op === Op.Insert) {234const codeLineContent = adjustIndenation(diffLineInfo, diffTabSize, indentDiff ?? 0, codeIndentInfo);235edits.push(new LinesEdit(originalLineIndex, originalLineIndex, [codeLineContent], '\n', ''));236}237}238}239240241242return edits;243}244245function isWhiteSpace(charCode: number): boolean {246return charCode === CharCode.Space || charCode === CharCode.Tab;247}248249function isPlusOrMinus(charCode: number): boolean {250return charCode === CharCode.Dash || charCode === CharCode.Plus;251}252253function getIndentLength(line: string): number {254let i = 0;255while (i < line.length && isWhiteSpace(line.charCodeAt(i))) {256i++;257}258return i;259}260261function getIndent(line: string): string {262let i = 0;263while (i < line.length && isWhiteSpace(line.charCodeAt(i))) {264i++;265}266return line.substring(0, i);267}268269270function adjustIndenation(diffLineInfo: LineInfo, diffLineTabSize: number, indentDifference: number, codeIndentInfo: IGuessedIndentation): string {271if (indentDifference === 0 && ((!codeIndentInfo.insertSpaces && diffLineInfo.indentKind === IndentKind.Tabs) || (codeIndentInfo.insertSpaces && diffLineInfo.indentKind === IndentKind.Spaces))) {272return diffLineInfo.content;273}274const diffIndent = computeIndentLevel2(diffLineInfo.content, diffLineTabSize);275const newIndentation = codeIndentInfo.insertSpaces ? ' '.repeat(codeIndentInfo.tabSize * (diffIndent + indentDifference)) : '\t'.repeat(diffIndent + indentDifference);276return newIndentation + diffLineInfo.content.substring(diffLineInfo.indentLength);277}278279enum Op {280Equal = 0,281Insert = 1,282Delete = -1283}284285enum IndentKind {286undefined = 0,287Tabs = 1,288Spaces = 2,289Mixed = 3290}291292interface LineInfo {293content: string;294op: Op;295indentKind: IndentKind;296indentLength: number;297}298299function udpateIndentInfo(lineInfo: LineInfo): void {300let indentKind = IndentKind.undefined;301let indentLength = 0;302const line = lineInfo.content;303if (line.length > 0) {304const indentChar = line.charCodeAt(0);305if (isWhiteSpace(indentChar)) {306indentLength++;307indentKind = indentChar === CharCode.Space ? IndentKind.Spaces : IndentKind.Tabs;308while (indentLength < line.length) {309const charCode = line.charCodeAt(indentLength);310if (!isWhiteSpace(charCode)) {311break;312}313indentLength++;314if (charCode !== indentChar) {315indentKind = IndentKind.Mixed;316}317}318319}320}321lineInfo.indentKind = indentKind;322lineInfo.indentLength = indentLength;323}324325function getLineInfos(diffLines: Lines): LineInfo[] {326const result: LineInfo[] = [];327328for (let i = 0; i < diffLines.length; i++) {329const line = diffLines[i];330const lineInfo: LineInfo = {331content: line,332op: Op.Equal,333indentKind: IndentKind.undefined,334indentLength: 0335};336if (line.length > 0) {337if (isPlusOrMinus(line.charCodeAt(0))) {338lineInfo.op = line.charCodeAt(0) === CharCode.Dash ? Op.Delete : Op.Insert;339if (line.length > 1 && line.charCodeAt(1) === CharCode.Space) {340lineInfo.content = line.substring(1);341// replace the + or - with a space if the remaining indentation is odd342if (getIndentLength(lineInfo.content) % 2 === 1) {343lineInfo.content = ' ' + lineInfo.content;344}345} else {346lineInfo.content = line.substring(1);347}348}349udpateIndentInfo(lineInfo);350}351result.push(lineInfo);352}353sanitizeLineInfos(result);354return result;355}356357358function sanitizeLineInfos(lineInfos: LineInfo[]): void {359let min = Number.MAX_VALUE;360for (const lineInfo of lineInfos) {361if (lineInfo.indentKind !== IndentKind.Spaces || lineInfo.indentLength === 0) {362return;363}364if (lineInfo.indentLength < min) {365min = lineInfo.indentLength;366}367}368if (min > 0) {369for (const lineInfo of lineInfos) {370lineInfo.indentLength -= min;371lineInfo.content = lineInfo.content.substring(min);372}373}374}375376function getTabSize(lineInfos: LineInfo[]): number {377return 4;378}379380381