Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/indentationGuesser.ts
3294 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import { ITextBuffer } from '../model.js';
8
9
class SpacesDiffResult {
10
public spacesDiff: number = 0;
11
public looksLikeAlignment: boolean = false;
12
}
13
14
/**
15
* Compute the diff in spaces between two line's indentation.
16
*/
17
function spacesDiff(a: string, aLength: number, b: string, bLength: number, result: SpacesDiffResult): void {
18
19
result.spacesDiff = 0;
20
result.looksLikeAlignment = false;
21
22
// This can go both ways (e.g.):
23
// - a: "\t"
24
// - b: "\t "
25
// => This should count 1 tab and 4 spaces
26
27
let i: number;
28
29
for (i = 0; i < aLength && i < bLength; i++) {
30
const aCharCode = a.charCodeAt(i);
31
const bCharCode = b.charCodeAt(i);
32
33
if (aCharCode !== bCharCode) {
34
break;
35
}
36
}
37
38
let aSpacesCnt = 0, aTabsCount = 0;
39
for (let j = i; j < aLength; j++) {
40
const aCharCode = a.charCodeAt(j);
41
if (aCharCode === CharCode.Space) {
42
aSpacesCnt++;
43
} else {
44
aTabsCount++;
45
}
46
}
47
48
let bSpacesCnt = 0, bTabsCount = 0;
49
for (let j = i; j < bLength; j++) {
50
const bCharCode = b.charCodeAt(j);
51
if (bCharCode === CharCode.Space) {
52
bSpacesCnt++;
53
} else {
54
bTabsCount++;
55
}
56
}
57
58
if (aSpacesCnt > 0 && aTabsCount > 0) {
59
return;
60
}
61
if (bSpacesCnt > 0 && bTabsCount > 0) {
62
return;
63
}
64
65
const tabsDiff = Math.abs(aTabsCount - bTabsCount);
66
const spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt);
67
68
if (tabsDiff === 0) {
69
// check if the indentation difference might be caused by alignment reasons
70
// sometime folks like to align their code, but this should not be used as a hint
71
result.spacesDiff = spacesDiff;
72
73
if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) {
74
if (b.charCodeAt(bSpacesCnt) !== CharCode.Space && a.charCodeAt(bSpacesCnt - 1) === CharCode.Space) {
75
if (a.charCodeAt(a.length - 1) === CharCode.Comma) {
76
// This looks like an alignment desire: e.g.
77
// const a = b + c,
78
// d = b - c;
79
result.looksLikeAlignment = true;
80
}
81
}
82
}
83
return;
84
}
85
if (spacesDiff % tabsDiff === 0) {
86
result.spacesDiff = spacesDiff / tabsDiff;
87
return;
88
}
89
}
90
91
/**
92
* Result for a guessIndentation
93
*/
94
export interface IGuessedIndentation {
95
/**
96
* If indentation is based on spaces (`insertSpaces` = true), then what is the number of spaces that make an indent?
97
*/
98
tabSize: number;
99
/**
100
* Is indentation based on spaces?
101
*/
102
insertSpaces: boolean;
103
}
104
105
export function guessIndentation(source: ITextBuffer, defaultTabSize: number, defaultInsertSpaces: boolean): IGuessedIndentation {
106
// Look at most at the first 10k lines
107
const linesCount = Math.min(source.getLineCount(), 10000);
108
109
let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
110
let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
111
112
let previousLineText = ''; // content of latest line that contained non-whitespace chars
113
let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
114
115
const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8].
116
const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
117
118
const spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores
119
const tmp = new SpacesDiffResult();
120
121
for (let lineNumber = 1; lineNumber <= linesCount; lineNumber++) {
122
const currentLineLength = source.getLineLength(lineNumber);
123
const currentLineText = source.getLineContent(lineNumber);
124
125
// if the text buffer is chunk based, so long lines are cons-string, v8 will flattern the string when we check charCode.
126
// checking charCode on chunks directly is cheaper.
127
const useCurrentLineText = (currentLineLength <= 65536);
128
129
let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
130
let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
131
let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
132
let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
133
for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) {
134
const charCode = (useCurrentLineText ? currentLineText.charCodeAt(j) : source.getLineCharCode(lineNumber, j));
135
136
if (charCode === CharCode.Tab) {
137
currentLineTabsCount++;
138
} else if (charCode === CharCode.Space) {
139
currentLineSpacesCount++;
140
} else {
141
// Hit non whitespace character on this line
142
currentLineHasContent = true;
143
currentLineIndentation = j;
144
break;
145
}
146
}
147
148
// Ignore empty or only whitespace lines
149
if (!currentLineHasContent) {
150
continue;
151
}
152
153
if (currentLineTabsCount > 0) {
154
linesIndentedWithTabsCount++;
155
} else if (currentLineSpacesCount > 1) {
156
linesIndentedWithSpacesCount++;
157
}
158
159
spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp);
160
161
if (tmp.looksLikeAlignment) {
162
// if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
163
//
164
// - item1
165
// - item2
166
//
167
// otherwise skip this line entirely
168
//
169
// const a = 1,
170
// b = 2;
171
172
if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) {
173
continue;
174
}
175
}
176
177
const currentSpacesDiff = tmp.spacesDiff;
178
if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
179
spacesDiffCount[currentSpacesDiff]++;
180
}
181
182
previousLineText = currentLineText;
183
previousLineIndentation = currentLineIndentation;
184
}
185
186
let insertSpaces = defaultInsertSpaces;
187
if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) {
188
insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
189
}
190
191
let tabSize = defaultTabSize;
192
193
// Guess tabSize only if inserting spaces...
194
if (insertSpaces) {
195
let tabSizeScore = (insertSpaces ? 0 : 0.1 * linesCount);
196
197
// console.log("score threshold: " + tabSizeScore);
198
199
ALLOWED_TAB_SIZE_GUESSES.forEach((possibleTabSize) => {
200
const possibleTabSizeScore = spacesDiffCount[possibleTabSize];
201
if (possibleTabSizeScore > tabSizeScore) {
202
tabSizeScore = possibleTabSizeScore;
203
tabSize = possibleTabSize;
204
}
205
});
206
207
// Let a tabSize of 2 win even if it is not the maximum
208
// (only in case 4 was guessed)
209
if (tabSize === 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
210
tabSize = 2;
211
}
212
}
213
214
215
// console.log('--------------------------');
216
// console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount);
217
// console.log('spacesDiffCount: ' + spacesDiffCount);
218
// console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore);
219
220
return {
221
insertSpaces: insertSpaces,
222
tabSize: tabSize
223
};
224
}
225
226