Path: blob/main/src/vs/editor/common/model/indentationGuesser.ts
3294 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import { ITextBuffer } from '../model.js';78class SpacesDiffResult {9public spacesDiff: number = 0;10public looksLikeAlignment: boolean = false;11}1213/**14* Compute the diff in spaces between two line's indentation.15*/16function spacesDiff(a: string, aLength: number, b: string, bLength: number, result: SpacesDiffResult): void {1718result.spacesDiff = 0;19result.looksLikeAlignment = false;2021// This can go both ways (e.g.):22// - a: "\t"23// - b: "\t "24// => This should count 1 tab and 4 spaces2526let i: number;2728for (i = 0; i < aLength && i < bLength; i++) {29const aCharCode = a.charCodeAt(i);30const bCharCode = b.charCodeAt(i);3132if (aCharCode !== bCharCode) {33break;34}35}3637let aSpacesCnt = 0, aTabsCount = 0;38for (let j = i; j < aLength; j++) {39const aCharCode = a.charCodeAt(j);40if (aCharCode === CharCode.Space) {41aSpacesCnt++;42} else {43aTabsCount++;44}45}4647let bSpacesCnt = 0, bTabsCount = 0;48for (let j = i; j < bLength; j++) {49const bCharCode = b.charCodeAt(j);50if (bCharCode === CharCode.Space) {51bSpacesCnt++;52} else {53bTabsCount++;54}55}5657if (aSpacesCnt > 0 && aTabsCount > 0) {58return;59}60if (bSpacesCnt > 0 && bTabsCount > 0) {61return;62}6364const tabsDiff = Math.abs(aTabsCount - bTabsCount);65const spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt);6667if (tabsDiff === 0) {68// check if the indentation difference might be caused by alignment reasons69// sometime folks like to align their code, but this should not be used as a hint70result.spacesDiff = spacesDiff;7172if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) {73if (b.charCodeAt(bSpacesCnt) !== CharCode.Space && a.charCodeAt(bSpacesCnt - 1) === CharCode.Space) {74if (a.charCodeAt(a.length - 1) === CharCode.Comma) {75// This looks like an alignment desire: e.g.76// const a = b + c,77// d = b - c;78result.looksLikeAlignment = true;79}80}81}82return;83}84if (spacesDiff % tabsDiff === 0) {85result.spacesDiff = spacesDiff / tabsDiff;86return;87}88}8990/**91* Result for a guessIndentation92*/93export interface IGuessedIndentation {94/**95* If indentation is based on spaces (`insertSpaces` = true), then what is the number of spaces that make an indent?96*/97tabSize: number;98/**99* Is indentation based on spaces?100*/101insertSpaces: boolean;102}103104export function guessIndentation(source: ITextBuffer, defaultTabSize: number, defaultInsertSpaces: boolean): IGuessedIndentation {105// Look at most at the first 10k lines106const linesCount = Math.min(source.getLineCount(), 10000);107108let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation109let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation110111let previousLineText = ''; // content of latest line that contained non-whitespace chars112let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char113114const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8].115const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8116117const spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores118const tmp = new SpacesDiffResult();119120for (let lineNumber = 1; lineNumber <= linesCount; lineNumber++) {121const currentLineLength = source.getLineLength(lineNumber);122const currentLineText = source.getLineContent(lineNumber);123124// if the text buffer is chunk based, so long lines are cons-string, v8 will flattern the string when we check charCode.125// checking charCode on chunks directly is cheaper.126const useCurrentLineText = (currentLineLength <= 65536);127128let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars129let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char130let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation131let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation132for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) {133const charCode = (useCurrentLineText ? currentLineText.charCodeAt(j) : source.getLineCharCode(lineNumber, j));134135if (charCode === CharCode.Tab) {136currentLineTabsCount++;137} else if (charCode === CharCode.Space) {138currentLineSpacesCount++;139} else {140// Hit non whitespace character on this line141currentLineHasContent = true;142currentLineIndentation = j;143break;144}145}146147// Ignore empty or only whitespace lines148if (!currentLineHasContent) {149continue;150}151152if (currentLineTabsCount > 0) {153linesIndentedWithTabsCount++;154} else if (currentLineSpacesCount > 1) {155linesIndentedWithSpacesCount++;156}157158spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp);159160if (tmp.looksLikeAlignment) {161// if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation162//163// - item1164// - item2165//166// otherwise skip this line entirely167//168// const a = 1,169// b = 2;170171if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) {172continue;173}174}175176const currentSpacesDiff = tmp.spacesDiff;177if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {178spacesDiffCount[currentSpacesDiff]++;179}180181previousLineText = currentLineText;182previousLineIndentation = currentLineIndentation;183}184185let insertSpaces = defaultInsertSpaces;186if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) {187insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);188}189190let tabSize = defaultTabSize;191192// Guess tabSize only if inserting spaces...193if (insertSpaces) {194let tabSizeScore = (insertSpaces ? 0 : 0.1 * linesCount);195196// console.log("score threshold: " + tabSizeScore);197198ALLOWED_TAB_SIZE_GUESSES.forEach((possibleTabSize) => {199const possibleTabSizeScore = spacesDiffCount[possibleTabSize];200if (possibleTabSizeScore > tabSizeScore) {201tabSizeScore = possibleTabSizeScore;202tabSize = possibleTabSize;203}204});205206// Let a tabSize of 2 win even if it is not the maximum207// (only in case 4 was guessed)208if (tabSize === 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {209tabSize = 2;210}211}212213214// console.log('--------------------------');215// console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount);216// console.log('spacesDiffCount: ' + spacesDiffCount);217// console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore);218219return {220insertSpaces: insertSpaces,221tabSize: tabSize222};223}224225226