Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/node/indentationGuesser.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import type { FormattingOptions, TextDocument } from 'vscode';
7
import { TextDocumentSnapshot } from '../../../platform/editing/common/textDocumentSnapshot';
8
import * as strings from '../../../util/vs/base/common/strings';
9
import { isLines, Lines } from './editGeneration';
10
11
/**
12
* An inlined enum containing useful character codes (to be used with String.charCodeAt).
13
* Please leave the const keyword such that it gets inlined when compiled to JavaScript!
14
*/
15
const enum CharCode {
16
/**
17
* The `\t` character.
18
*/
19
Tab = 9,
20
Space = 32,
21
/**
22
* The `,` character.
23
*/
24
Comma = 44,
25
}
26
27
export interface IIndentationTextBuffer {
28
getLineCount(): number;
29
getLineLength(lineNumber: number): number;
30
getLineContent(lineNumber: number): string;
31
}
32
33
class SpacesDiffResult {
34
public spacesDiff = 0;
35
public looksLikeAlignment = false;
36
}
37
38
/**
39
* Compute the diff in spaces between two line's indentation.
40
*/
41
function spacesDiff(a: string, aLength: number, b: string, bLength: number, result: SpacesDiffResult): void {
42
result.spacesDiff = 0;
43
result.looksLikeAlignment = false;
44
45
// This can go both ways (e.g.):
46
// - a: "\t"
47
// - b: "\t "
48
// => This should count 1 tab and 4 spaces
49
50
let i: number;
51
52
for (i = 0; i < aLength && i < bLength; i++) {
53
const aCharCode = a.charCodeAt(i);
54
const bCharCode = b.charCodeAt(i);
55
56
if (aCharCode !== bCharCode) {
57
break;
58
}
59
}
60
61
let aSpacesCnt = 0,
62
aTabsCount = 0;
63
for (let j = i; j < aLength; j++) {
64
const aCharCode = a.charCodeAt(j);
65
if (aCharCode === CharCode.Space) {
66
aSpacesCnt++;
67
} else {
68
aTabsCount++;
69
}
70
}
71
72
let bSpacesCnt = 0,
73
bTabsCount = 0;
74
for (let j = i; j < bLength; j++) {
75
const bCharCode = b.charCodeAt(j);
76
if (bCharCode === CharCode.Space) {
77
bSpacesCnt++;
78
} else {
79
bTabsCount++;
80
}
81
}
82
83
if (aSpacesCnt > 0 && aTabsCount > 0) {
84
return;
85
}
86
if (bSpacesCnt > 0 && bTabsCount > 0) {
87
return;
88
}
89
90
const tabsDiff = Math.abs(aTabsCount - bTabsCount);
91
const spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt);
92
93
if (tabsDiff === 0) {
94
// check if the indentation difference might be caused by alignment reasons
95
// sometime folks like to align their code, but this should not be used as a hint
96
result.spacesDiff = spacesDiff;
97
98
if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) {
99
if (b.charCodeAt(bSpacesCnt) !== CharCode.Space && a.charCodeAt(bSpacesCnt - 1) === CharCode.Space) {
100
if (a.charCodeAt(a.length - 1) === CharCode.Comma) {
101
// This looks like an alignment desire: e.g.
102
// const a = b + c,
103
// d = b - c;
104
result.looksLikeAlignment = true;
105
}
106
}
107
}
108
return;
109
}
110
if (spacesDiff % tabsDiff === 0) {
111
result.spacesDiff = spacesDiff / tabsDiff;
112
return;
113
}
114
}
115
116
/**
117
* Result for a guessIndentation
118
*/
119
export interface IGuessedIndentation {
120
/**
121
* If indentation is based on spaces (`insertSpaces` = true), then what is the number of spaces that make an indent?
122
*/
123
tabSize: number;
124
/**
125
* Is indentation based on spaces?
126
*/
127
insertSpaces: boolean;
128
}
129
130
export function guessFileIndentInfo(source: Lines | TextDocument | TextDocumentSnapshot): FormattingOptions {
131
return { ...guessIndentation(source, 4, false) };
132
}
133
134
export function guessIndentation(
135
source: Lines | TextDocument | TextDocumentSnapshot,
136
defaultTabSize: number,
137
defaultInsertSpaces: boolean
138
): IGuessedIndentation {
139
// Look at most at the first 10k lines
140
const linesCount = Math.min(isLines(source) ? source.length : source.lineCount, 10000);
141
142
let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
143
let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
144
145
let previousLineText = ''; // content of latest line that contained non-whitespace chars
146
let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
147
148
const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8].
149
const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
150
151
const spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores
152
const tmp = new SpacesDiffResult();
153
154
for (let lineNumber = 0; lineNumber < linesCount; lineNumber++) {
155
const currentLineText = isLines(source) ? source[lineNumber] : source.lineAt(lineNumber).text;
156
const currentLineLength = currentLineText.length;
157
158
let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
159
let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
160
let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
161
let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
162
for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) {
163
const charCode = currentLineText.charCodeAt(j);
164
165
if (charCode === CharCode.Tab) {
166
currentLineTabsCount++;
167
} else if (charCode === CharCode.Space) {
168
currentLineSpacesCount++;
169
} else {
170
// Hit non whitespace character on this line
171
currentLineHasContent = true;
172
currentLineIndentation = j;
173
break;
174
}
175
}
176
177
// Ignore empty or only whitespace lines
178
if (!currentLineHasContent) {
179
continue;
180
}
181
182
if (currentLineTabsCount > 0) {
183
linesIndentedWithTabsCount++;
184
} else if (currentLineSpacesCount > 1) {
185
linesIndentedWithSpacesCount++;
186
}
187
188
spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp);
189
190
if (tmp.looksLikeAlignment) {
191
// if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
192
//
193
// - item1
194
// - item2
195
//
196
// otherwise skip this line entirely
197
//
198
// const a = 1,
199
// b = 2;
200
201
if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) {
202
continue;
203
}
204
}
205
206
const currentSpacesDiff = tmp.spacesDiff;
207
if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
208
spacesDiffCount[currentSpacesDiff]++;
209
}
210
211
previousLineText = currentLineText;
212
previousLineIndentation = currentLineIndentation;
213
}
214
215
let insertSpaces = defaultInsertSpaces;
216
if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) {
217
insertSpaces = linesIndentedWithTabsCount < linesIndentedWithSpacesCount;
218
}
219
220
let tabSize = defaultTabSize;
221
222
// Guess tabSize only if inserting spaces...
223
if (insertSpaces) {
224
let tabSizeScore = insertSpaces ? 0 : 0.1 * linesCount;
225
226
// console.log("score threshold: " + tabSizeScore);
227
228
ALLOWED_TAB_SIZE_GUESSES.forEach(possibleTabSize => {
229
const possibleTabSizeScore = spacesDiffCount[possibleTabSize];
230
if (possibleTabSizeScore > tabSizeScore) {
231
tabSizeScore = possibleTabSizeScore;
232
tabSize = possibleTabSize;
233
}
234
});
235
236
// Let a tabSize of 2 win even if it is not the maximum
237
// (only in case 4 was guessed)
238
if (
239
tabSize === 4 &&
240
spacesDiffCount[4] > 0 &&
241
spacesDiffCount[2] > 0 &&
242
spacesDiffCount[2] >= spacesDiffCount[4] / 2
243
) {
244
tabSize = 2;
245
}
246
}
247
248
// console.log('--------------------------');
249
// console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount);
250
// console.log('spacesDiffCount: ' + spacesDiffCount);
251
// console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore);
252
253
return {
254
insertSpaces: insertSpaces,
255
tabSize: tabSize,
256
};
257
}
258
259
/**
260
* Returns:
261
* - if the result is positive => the indent level is returned value
262
* - if the result is negative => the line contains only whitespace and the indent level is ~(result)
263
*/
264
function computeIndentLevel(line: string, tabSize: number): number {
265
let indent = 0;
266
let i = 0;
267
const len = line.length;
268
269
while (i < len) {
270
const chCode = line.charCodeAt(i);
271
if (chCode === CharCode.Space) {
272
indent++;
273
} else if (chCode === CharCode.Tab) {
274
indent = indent - indent % tabSize + tabSize;
275
} else {
276
break;
277
}
278
i++;
279
}
280
281
if (i === len) {
282
return ~indent; // line only consists of whitespace
283
}
284
285
return indent;
286
}
287
288
export function computeIndentLevel2(line: string, tabSize: number): number {
289
const result = computeIndentLevel(line, tabSize);
290
if (result < 0) {
291
return Math.floor(~result / tabSize);
292
}
293
return Math.floor(result / tabSize);
294
}
295
296
function nextIndentTabStop(visibleColumn: number, indentSize: number): number {
297
return visibleColumn + indentSize - visibleColumn % indentSize;
298
}
299
300
function _normalizeIndentationFromWhitespace(str: string, indentSize: number, insertSpaces: boolean): string {
301
let spacesCnt = 0;
302
for (let i = 0; i < str.length; i++) {
303
if (str.charAt(i) === '\t') {
304
spacesCnt = nextIndentTabStop(spacesCnt, indentSize);
305
} else {
306
spacesCnt++;
307
}
308
}
309
310
let result = '';
311
if (!insertSpaces) {
312
const tabsCnt = Math.floor(spacesCnt / indentSize);
313
spacesCnt = spacesCnt % indentSize;
314
for (let i = 0; i < tabsCnt; i++) {
315
result += '\t';
316
}
317
}
318
319
for (let i = 0; i < spacesCnt; i++) {
320
result += ' ';
321
}
322
323
return result;
324
}
325
326
export function normalizeIndentation(str: string, indentSize: number, insertSpaces: boolean): string {
327
let firstNonWhitespaceIndex = strings.firstNonWhitespaceIndex(str);
328
if (firstNonWhitespaceIndex === -1) {
329
firstNonWhitespaceIndex = str.length;
330
}
331
return _normalizeIndentationFromWhitespace(str.substring(0, firstNonWhitespaceIndex), indentSize, insertSpaces) + str.substring(firstNonWhitespaceIndex);
332
}
333
334
export function getIndentationChar(indentation: IGuessedIndentation): string {
335
if (indentation.insertSpaces) {
336
return ' '.repeat(indentation.tabSize);
337
} else {
338
return '\t';
339
}
340
}
341
342
export function transformIndentation(content: string, fromIndent: IGuessedIndentation, toIndent: IGuessedIndentation): string {
343
if (fromIndent.insertSpaces === toIndent.insertSpaces && fromIndent.tabSize === toIndent.tabSize) {
344
return content;
345
}
346
347
const fromChr = getIndentationChar(fromIndent);
348
const toChr = getIndentationChar(toIndent);
349
350
const lines = content.split('\n');
351
for (let i = 0; i < lines.length; i++) {
352
let k = 0;
353
while (lines[i].slice(k, k + fromChr.length) === fromChr) {
354
k += fromChr.length;
355
}
356
357
lines[i] = toChr.repeat(k / fromChr.length) + lines[i].slice(k);
358
}
359
360
return lines.join('\n');
361
}
362
363