Path: blob/main/extensions/copilot/src/extension/prompt/node/indentationGuesser.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import type { FormattingOptions, TextDocument } from 'vscode';6import { TextDocumentSnapshot } from '../../../platform/editing/common/textDocumentSnapshot';7import * as strings from '../../../util/vs/base/common/strings';8import { isLines, Lines } from './editGeneration';910/**11* An inlined enum containing useful character codes (to be used with String.charCodeAt).12* Please leave the const keyword such that it gets inlined when compiled to JavaScript!13*/14const enum CharCode {15/**16* The `\t` character.17*/18Tab = 9,19Space = 32,20/**21* The `,` character.22*/23Comma = 44,24}2526export interface IIndentationTextBuffer {27getLineCount(): number;28getLineLength(lineNumber: number): number;29getLineContent(lineNumber: number): string;30}3132class SpacesDiffResult {33public spacesDiff = 0;34public looksLikeAlignment = false;35}3637/**38* Compute the diff in spaces between two line's indentation.39*/40function spacesDiff(a: string, aLength: number, b: string, bLength: number, result: SpacesDiffResult): void {41result.spacesDiff = 0;42result.looksLikeAlignment = false;4344// This can go both ways (e.g.):45// - a: "\t"46// - b: "\t "47// => This should count 1 tab and 4 spaces4849let i: number;5051for (i = 0; i < aLength && i < bLength; i++) {52const aCharCode = a.charCodeAt(i);53const bCharCode = b.charCodeAt(i);5455if (aCharCode !== bCharCode) {56break;57}58}5960let aSpacesCnt = 0,61aTabsCount = 0;62for (let j = i; j < aLength; j++) {63const aCharCode = a.charCodeAt(j);64if (aCharCode === CharCode.Space) {65aSpacesCnt++;66} else {67aTabsCount++;68}69}7071let bSpacesCnt = 0,72bTabsCount = 0;73for (let j = i; j < bLength; j++) {74const bCharCode = b.charCodeAt(j);75if (bCharCode === CharCode.Space) {76bSpacesCnt++;77} else {78bTabsCount++;79}80}8182if (aSpacesCnt > 0 && aTabsCount > 0) {83return;84}85if (bSpacesCnt > 0 && bTabsCount > 0) {86return;87}8889const tabsDiff = Math.abs(aTabsCount - bTabsCount);90const spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt);9192if (tabsDiff === 0) {93// check if the indentation difference might be caused by alignment reasons94// sometime folks like to align their code, but this should not be used as a hint95result.spacesDiff = spacesDiff;9697if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) {98if (b.charCodeAt(bSpacesCnt) !== CharCode.Space && a.charCodeAt(bSpacesCnt - 1) === CharCode.Space) {99if (a.charCodeAt(a.length - 1) === CharCode.Comma) {100// This looks like an alignment desire: e.g.101// const a = b + c,102// d = b - c;103result.looksLikeAlignment = true;104}105}106}107return;108}109if (spacesDiff % tabsDiff === 0) {110result.spacesDiff = spacesDiff / tabsDiff;111return;112}113}114115/**116* Result for a guessIndentation117*/118export interface IGuessedIndentation {119/**120* If indentation is based on spaces (`insertSpaces` = true), then what is the number of spaces that make an indent?121*/122tabSize: number;123/**124* Is indentation based on spaces?125*/126insertSpaces: boolean;127}128129export function guessFileIndentInfo(source: Lines | TextDocument | TextDocumentSnapshot): FormattingOptions {130return { ...guessIndentation(source, 4, false) };131}132133export function guessIndentation(134source: Lines | TextDocument | TextDocumentSnapshot,135defaultTabSize: number,136defaultInsertSpaces: boolean137): IGuessedIndentation {138// Look at most at the first 10k lines139const linesCount = Math.min(isLines(source) ? source.length : source.lineCount, 10000);140141let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation142let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation143144let previousLineText = ''; // content of latest line that contained non-whitespace chars145let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char146147const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8].148const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8149150const spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores151const tmp = new SpacesDiffResult();152153for (let lineNumber = 0; lineNumber < linesCount; lineNumber++) {154const currentLineText = isLines(source) ? source[lineNumber] : source.lineAt(lineNumber).text;155const currentLineLength = currentLineText.length;156157let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars158let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char159let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation160let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation161for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) {162const charCode = currentLineText.charCodeAt(j);163164if (charCode === CharCode.Tab) {165currentLineTabsCount++;166} else if (charCode === CharCode.Space) {167currentLineSpacesCount++;168} else {169// Hit non whitespace character on this line170currentLineHasContent = true;171currentLineIndentation = j;172break;173}174}175176// Ignore empty or only whitespace lines177if (!currentLineHasContent) {178continue;179}180181if (currentLineTabsCount > 0) {182linesIndentedWithTabsCount++;183} else if (currentLineSpacesCount > 1) {184linesIndentedWithSpacesCount++;185}186187spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp);188189if (tmp.looksLikeAlignment) {190// if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation191//192// - item1193// - item2194//195// otherwise skip this line entirely196//197// const a = 1,198// b = 2;199200if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) {201continue;202}203}204205const currentSpacesDiff = tmp.spacesDiff;206if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {207spacesDiffCount[currentSpacesDiff]++;208}209210previousLineText = currentLineText;211previousLineIndentation = currentLineIndentation;212}213214let insertSpaces = defaultInsertSpaces;215if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) {216insertSpaces = linesIndentedWithTabsCount < linesIndentedWithSpacesCount;217}218219let tabSize = defaultTabSize;220221// Guess tabSize only if inserting spaces...222if (insertSpaces) {223let tabSizeScore = insertSpaces ? 0 : 0.1 * linesCount;224225// console.log("score threshold: " + tabSizeScore);226227ALLOWED_TAB_SIZE_GUESSES.forEach(possibleTabSize => {228const possibleTabSizeScore = spacesDiffCount[possibleTabSize];229if (possibleTabSizeScore > tabSizeScore) {230tabSizeScore = possibleTabSizeScore;231tabSize = possibleTabSize;232}233});234235// Let a tabSize of 2 win even if it is not the maximum236// (only in case 4 was guessed)237if (238tabSize === 4 &&239spacesDiffCount[4] > 0 &&240spacesDiffCount[2] > 0 &&241spacesDiffCount[2] >= spacesDiffCount[4] / 2242) {243tabSize = 2;244}245}246247// console.log('--------------------------');248// console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount);249// console.log('spacesDiffCount: ' + spacesDiffCount);250// console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore);251252return {253insertSpaces: insertSpaces,254tabSize: tabSize,255};256}257258/**259* Returns:260* - if the result is positive => the indent level is returned value261* - if the result is negative => the line contains only whitespace and the indent level is ~(result)262*/263function computeIndentLevel(line: string, tabSize: number): number {264let indent = 0;265let i = 0;266const len = line.length;267268while (i < len) {269const chCode = line.charCodeAt(i);270if (chCode === CharCode.Space) {271indent++;272} else if (chCode === CharCode.Tab) {273indent = indent - indent % tabSize + tabSize;274} else {275break;276}277i++;278}279280if (i === len) {281return ~indent; // line only consists of whitespace282}283284return indent;285}286287export function computeIndentLevel2(line: string, tabSize: number): number {288const result = computeIndentLevel(line, tabSize);289if (result < 0) {290return Math.floor(~result / tabSize);291}292return Math.floor(result / tabSize);293}294295function nextIndentTabStop(visibleColumn: number, indentSize: number): number {296return visibleColumn + indentSize - visibleColumn % indentSize;297}298299function _normalizeIndentationFromWhitespace(str: string, indentSize: number, insertSpaces: boolean): string {300let spacesCnt = 0;301for (let i = 0; i < str.length; i++) {302if (str.charAt(i) === '\t') {303spacesCnt = nextIndentTabStop(spacesCnt, indentSize);304} else {305spacesCnt++;306}307}308309let result = '';310if (!insertSpaces) {311const tabsCnt = Math.floor(spacesCnt / indentSize);312spacesCnt = spacesCnt % indentSize;313for (let i = 0; i < tabsCnt; i++) {314result += '\t';315}316}317318for (let i = 0; i < spacesCnt; i++) {319result += ' ';320}321322return result;323}324325export function normalizeIndentation(str: string, indentSize: number, insertSpaces: boolean): string {326let firstNonWhitespaceIndex = strings.firstNonWhitespaceIndex(str);327if (firstNonWhitespaceIndex === -1) {328firstNonWhitespaceIndex = str.length;329}330return _normalizeIndentationFromWhitespace(str.substring(0, firstNonWhitespaceIndex), indentSize, insertSpaces) + str.substring(firstNonWhitespaceIndex);331}332333export function getIndentationChar(indentation: IGuessedIndentation): string {334if (indentation.insertSpaces) {335return ' '.repeat(indentation.tabSize);336} else {337return '\t';338}339}340341export function transformIndentation(content: string, fromIndent: IGuessedIndentation, toIndent: IGuessedIndentation): string {342if (fromIndent.insertSpaces === toIndent.insertSpaces && fromIndent.tabSize === toIndent.tabSize) {343return content;344}345346const fromChr = getIndentationChar(fromIndent);347const toChr = getIndentationChar(toIndent);348349const lines = content.split('\n');350for (let i = 0; i < lines.length; i++) {351let k = 0;352while (lines[i].slice(k, k + fromChr.length) === fromChr) {353k += fromChr.length;354}355356lines[i] = toChr.repeat(k / fromChr.length) + lines[i].slice(k);357}358359return lines.join('\n');360}361362363