Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/tokens/treeSitter/treeSitterTokenizationImpl.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Emitter, Event } from '../../../../../base/common/event.js';
7
import { Disposable } from '../../../../../base/common/lifecycle.js';
8
import { setTimeout0 } from '../../../../../base/common/platform.js';
9
import { StopWatch } from '../../../../../base/common/stopwatch.js';
10
import { LanguageId } from '../../../encodedTokenAttributes.js';
11
import { ILanguageIdCodec, QueryCapture } from '../../../languages.js';
12
import { IModelContentChangedEvent, IModelTokensChangedEvent } from '../../../textModelEvents.js';
13
import { findLikelyRelevantLines } from '../../textModelTokens.js';
14
import { TokenStore, TokenUpdate, TokenQuality } from './tokenStore.js';
15
import { TreeSitterTree, RangeChange, RangeWithOffsets } from './treeSitterTree.js';
16
import type * as TreeSitter from '@vscode/tree-sitter-wasm';
17
import { autorun, autorunHandleChanges, IObservable, recordChanges, runOnChange } from '../../../../../base/common/observable.js';
18
import { LineRange } from '../../../core/ranges/lineRange.js';
19
import { LineTokens } from '../../../tokens/lineTokens.js';
20
import { Position } from '../../../core/position.js';
21
import { Range } from '../../../core/range.js';
22
import { isDefined } from '../../../../../base/common/types.js';
23
import { ITreeSitterThemeService } from '../../../services/treeSitter/treeSitterThemeService.js';
24
import { BugIndicatingError } from '../../../../../base/common/errors.js';
25
26
export class TreeSitterTokenizationImpl extends Disposable {
27
private readonly _tokenStore: TokenStore;
28
private _accurateVersion: number;
29
private _guessVersion: number;
30
31
private readonly _onDidChangeTokens: Emitter<{ changes: IModelTokensChangedEvent }> = this._register(new Emitter());
32
public readonly onDidChangeTokens: Event<{ changes: IModelTokensChangedEvent }> = this._onDidChangeTokens.event;
33
private readonly _onDidCompleteBackgroundTokenization: Emitter<void> = this._register(new Emitter());
34
public readonly onDidChangeBackgroundTokenization: Event<void> = this._onDidCompleteBackgroundTokenization.event;
35
36
private _encodedLanguageId: LanguageId;
37
38
private get _textModel() {
39
return this._tree.textModel;
40
}
41
42
constructor(
43
private readonly _tree: TreeSitterTree,
44
private readonly _highlightingQueries: TreeSitter.Query,
45
private readonly _languageIdCodec: ILanguageIdCodec,
46
private readonly _visibleLineRanges: IObservable<readonly LineRange[]>,
47
48
@ITreeSitterThemeService private readonly _treeSitterThemeService: ITreeSitterThemeService,
49
) {
50
super();
51
52
this._encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);
53
54
this._register(runOnChange(this._treeSitterThemeService.onChange, () => {
55
this._updateTheme();
56
}));
57
58
this._tokenStore = this._register(new TokenStore(this._textModel));
59
this._accurateVersion = this._textModel.getVersionId();
60
this._guessVersion = this._textModel.getVersionId();
61
this._tokenStore.buildStore(this._createEmptyTokens(), TokenQuality.None);
62
63
this._register(autorun(reader => {
64
const visibleLineRanges = this._visibleLineRanges.read(reader);
65
this._parseAndTokenizeViewPort(visibleLineRanges);
66
}));
67
68
this._register(autorunHandleChanges({
69
owner: this,
70
changeTracker: recordChanges({ tree: this._tree.tree }),
71
}, (reader, ctx) => {
72
const changeEvent = ctx.changes.at(0)?.change;
73
if (ctx.changes.length > 1) {
74
throw new BugIndicatingError('The tree changed twice in one transaction. This is currently not supported and should not happen.');
75
}
76
77
if (!changeEvent) {
78
if (ctx.tree) {
79
this._firstTreeUpdate(this._tree.treeLastParsedVersion.read(reader));
80
}
81
} else {
82
if (this.hasTokens()) {
83
// Mark the range for refresh immediately
84
85
for (const range of changeEvent.ranges) {
86
this._markForRefresh(range.newRange);
87
}
88
}
89
90
// First time we see a tree we need to build a token store.
91
if (!this.hasTokens()) {
92
this._firstTreeUpdate(changeEvent.versionId);
93
} else {
94
this._handleTreeUpdate(changeEvent.ranges, changeEvent.versionId);
95
}
96
}
97
}));
98
}
99
100
public handleContentChanged(e: IModelContentChangedEvent): void {
101
this._guessVersion = e.versionId;
102
for (const change of e.changes) {
103
if (change.text.length > change.rangeLength) {
104
// If possible, use the token before the change as the starting point for the new token.
105
// This is more likely to let the new text be the correct color as typeing is usually at the end of the token.
106
const offset = change.rangeOffset > 0 ? change.rangeOffset - 1 : change.rangeOffset;
107
const oldToken = this._tokenStore.getTokenAt(offset);
108
let newToken: TokenUpdate;
109
if (oldToken) {
110
// Insert. Just grow the token at this position to include the insert.
111
newToken = { startOffsetInclusive: oldToken.startOffsetInclusive, length: oldToken.length + change.text.length - change.rangeLength, token: oldToken.token };
112
// Also mark tokens that are in the range of the change as needing a refresh.
113
this._tokenStore.markForRefresh(offset, change.rangeOffset + (change.text.length > change.rangeLength ? change.text.length : change.rangeLength));
114
} else {
115
// The document got larger and the change is at the end of the document.
116
newToken = { startOffsetInclusive: offset, length: change.text.length, token: 0 };
117
}
118
this._tokenStore.update(oldToken?.length ?? 0, [newToken], TokenQuality.EditGuess);
119
} else if (change.text.length < change.rangeLength) {
120
// Delete. Delete the tokens at the corresponding range.
121
const deletedCharCount = change.rangeLength - change.text.length;
122
this._tokenStore.delete(deletedCharCount, change.rangeOffset);
123
}
124
}
125
}
126
127
public getLineTokens(lineNumber: number) {
128
const content = this._textModel.getLineContent(lineNumber);
129
const rawTokens = this.getTokens(lineNumber);
130
return new LineTokens(rawTokens, content, this._languageIdCodec);
131
}
132
133
private _createEmptyTokens() {
134
const emptyToken = this._emptyToken();
135
const modelEndOffset = this._textModel.getValueLength();
136
137
const emptyTokens: TokenUpdate[] = [this._emptyTokensForOffsetAndLength(0, modelEndOffset, emptyToken)];
138
return emptyTokens;
139
}
140
141
private _emptyToken() {
142
return this._treeSitterThemeService.findMetadata([], this._encodedLanguageId, false, undefined);
143
}
144
145
private _emptyTokensForOffsetAndLength(offset: number, length: number, emptyToken: number): TokenUpdate {
146
return { token: emptyToken, length: offset + length, startOffsetInclusive: 0 };
147
}
148
149
public hasAccurateTokensForLine(lineNumber: number): boolean {
150
return this.hasTokens(new Range(lineNumber, 1, lineNumber, this._textModel.getLineMaxColumn(lineNumber)));
151
}
152
153
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
154
const rawLineTokens = this._guessTokensForLinesContent(lineNumber, lines);
155
const lineTokens: LineTokens[] = [];
156
if (!rawLineTokens) {
157
return null;
158
}
159
for (let i = 0; i < rawLineTokens.length; i++) {
160
lineTokens.push(new LineTokens(rawLineTokens[i], lines[i], this._languageIdCodec));
161
}
162
return lineTokens;
163
}
164
165
private _rangeHasTokens(range: Range, minimumTokenQuality: TokenQuality): boolean {
166
return this._tokenStore.rangeHasTokens(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()), minimumTokenQuality);
167
}
168
169
public hasTokens(accurateForRange?: Range): boolean {
170
if (!accurateForRange || (this._guessVersion === this._accurateVersion)) {
171
return true;
172
}
173
174
return !this._tokenStore.rangeNeedsRefresh(this._textModel.getOffsetAt(accurateForRange.getStartPosition()), this._textModel.getOffsetAt(accurateForRange.getEndPosition()));
175
}
176
177
public getTokens(line: number): Uint32Array {
178
const lineStartOffset = this._textModel.getOffsetAt({ lineNumber: line, column: 1 });
179
const lineEndOffset = this._textModel.getOffsetAt({ lineNumber: line, column: this._textModel.getLineLength(line) + 1 });
180
const lineTokens = this._tokenStore.getTokensInRange(lineStartOffset, lineEndOffset);
181
const result = new Uint32Array(lineTokens.length * 2);
182
for (let i = 0; i < lineTokens.length; i++) {
183
result[i * 2] = lineTokens[i].startOffsetInclusive - lineStartOffset + lineTokens[i].length;
184
result[i * 2 + 1] = lineTokens[i].token;
185
}
186
return result;
187
}
188
189
getTokensInRange(range: Range, rangeStartOffset: number, rangeEndOffset: number, captures?: QueryCapture[]): TokenUpdate[] | undefined {
190
const tokens = captures ? this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset) : this._tokenize(range, rangeStartOffset, rangeEndOffset);
191
if (tokens?.endOffsetsAndMetadata) {
192
return this._rangeTokensAsUpdates(rangeStartOffset, tokens.endOffsetsAndMetadata);
193
}
194
return undefined;
195
}
196
197
private _updateTokensInStore(version: number, updates: { oldRangeLength?: number; newTokens: TokenUpdate[] }[], tokenQuality: TokenQuality): void {
198
this._accurateVersion = version;
199
for (const update of updates) {
200
const lastToken = update.newTokens.length > 0 ? update.newTokens[update.newTokens.length - 1] : undefined;
201
let oldRangeLength: number;
202
if (lastToken && (this._guessVersion >= version)) {
203
oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - update.newTokens[0].startOffsetInclusive;
204
} else if (update.oldRangeLength) {
205
oldRangeLength = update.oldRangeLength;
206
} else {
207
oldRangeLength = 0;
208
}
209
this._tokenStore.update(oldRangeLength, update.newTokens, tokenQuality);
210
}
211
}
212
213
private _markForRefresh(range: Range): void {
214
this._tokenStore.markForRefresh(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()));
215
}
216
217
private _getNeedsRefresh(): { range: Range; startOffset: number; endOffset: number }[] {
218
const needsRefreshOffsetRanges = this._tokenStore.getNeedsRefresh();
219
if (!needsRefreshOffsetRanges) {
220
return [];
221
}
222
return needsRefreshOffsetRanges.map(range => ({
223
range: Range.fromPositions(this._textModel.getPositionAt(range.startOffset), this._textModel.getPositionAt(range.endOffset)),
224
startOffset: range.startOffset,
225
endOffset: range.endOffset
226
}));
227
}
228
229
230
private _parseAndTokenizeViewPort(lineRanges: readonly LineRange[]) {
231
const viewportRanges = lineRanges.map(r => r.toInclusiveRange()).filter(isDefined);
232
for (const range of viewportRanges) {
233
const startOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getStartPosition());
234
const endOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getEndPosition());
235
const version = this._textModel.getVersionId();
236
if (this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {
237
continue;
238
}
239
const content = this._textModel.getValueInRange(range);
240
const tokenUpdates = this._forceParseAndTokenizeContent(range, startOffsetOfRangeInDocument, endOffsetOfRangeInDocument, content, true);
241
if (!tokenUpdates || this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {
242
continue;
243
}
244
if (tokenUpdates.length === 0) {
245
continue;
246
}
247
const lastToken = tokenUpdates[tokenUpdates.length - 1];
248
const oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - tokenUpdates[0].startOffsetInclusive;
249
this._updateTokensInStore(version, [{ newTokens: tokenUpdates, oldRangeLength }], TokenQuality.ViewportGuess);
250
this._onDidChangeTokens.fire({ changes: { semanticTokensApplied: false, ranges: [{ fromLineNumber: range.startLineNumber, toLineNumber: range.endLineNumber }] } });
251
}
252
}
253
254
private _guessTokensForLinesContent(lineNumber: number, lines: string[]): Uint32Array[] | undefined {
255
if (lines.length === 0) {
256
return undefined;
257
}
258
const lineContent = lines.join(this._textModel.getEOL());
259
const range = new Range(1, 1, lineNumber + lines.length, lines[lines.length - 1].length + 1);
260
const startOffset = this._textModel.getOffsetAt({ lineNumber, column: 1 });
261
const tokens = this._forceParseAndTokenizeContent(range, startOffset, startOffset + lineContent.length, lineContent, false);
262
if (!tokens) {
263
return undefined;
264
}
265
const tokensByLine: Uint32Array[] = new Array(lines.length);
266
let tokensIndex: number = 0;
267
let tokenStartOffset = 0;
268
let lineStartOffset = 0;
269
for (let i = 0; i < lines.length; i++) {
270
const tokensForLine: EndOffsetToken[] = [];
271
let moveToNextLine = false;
272
for (let j = tokensIndex; (!moveToNextLine && (j < tokens.length)); j++) {
273
const token = tokens[j];
274
const lineAdjustedEndOffset = token.endOffset - lineStartOffset;
275
const lineAdjustedStartOffset = tokenStartOffset - lineStartOffset;
276
if (lineAdjustedEndOffset <= lines[i].length) {
277
tokensForLine.push({ endOffset: lineAdjustedEndOffset, metadata: token.metadata });
278
tokensIndex++;
279
} else if (lineAdjustedStartOffset < lines[i].length) {
280
const partialToken: EndOffsetToken = { endOffset: lines[i].length, metadata: token.metadata };
281
tokensForLine.push(partialToken);
282
moveToNextLine = true;
283
} else {
284
moveToNextLine = true;
285
}
286
tokenStartOffset = token.endOffset;
287
}
288
289
tokensByLine[i] = this._endOffsetTokensToUint32Array(tokensForLine);
290
lineStartOffset += lines[i].length + this._textModel.getEOL().length;
291
}
292
293
return tokensByLine;
294
}
295
296
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: true): TokenUpdate[] | undefined;
297
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: false): EndOffsetToken[] | undefined;
298
private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: boolean): EndOffsetToken[] | TokenUpdate[] | undefined {
299
const likelyRelevantLines = findLikelyRelevantLines(this._textModel, range.startLineNumber).likelyRelevantLines;
300
const likelyRelevantPrefix = likelyRelevantLines.join(this._textModel.getEOL());
301
302
const tree = this._tree.createParsedTreeSync(`${likelyRelevantPrefix}${content}`);
303
if (!tree) {
304
return;
305
}
306
307
const treeRange = new Range(1, 1, range.endLineNumber - range.startLineNumber + 1 + likelyRelevantLines.length, range.endColumn);
308
const captures = this.captureAtRange(treeRange);
309
const tokens = this._tokenizeCapturesWithMetadata(captures, likelyRelevantPrefix.length, endOffsetOfRangeInDocument - startOffsetOfRangeInDocument + likelyRelevantPrefix.length);
310
tree.delete();
311
312
if (!tokens) {
313
return;
314
}
315
316
if (asUpdate) {
317
return this._rangeTokensAsUpdates(startOffsetOfRangeInDocument, tokens.endOffsetsAndMetadata, likelyRelevantPrefix.length);
318
} else {
319
return tokens.endOffsetsAndMetadata;
320
}
321
}
322
323
324
private _firstTreeUpdate(versionId: number) {
325
return this._setViewPortTokens(versionId);
326
}
327
328
private _setViewPortTokens(versionId: number) {
329
const rangeChanges = this._visibleLineRanges.get().map<RangeChange | undefined>(lineRange => {
330
const range = lineRange.toInclusiveRange();
331
if (!range) { return undefined; }
332
const newRangeStartOffset = this._textModel.getOffsetAt(range.getStartPosition());
333
const newRangeEndOffset = this._textModel.getOffsetAt(range.getEndPosition());
334
return {
335
newRange: range,
336
newRangeEndOffset,
337
newRangeStartOffset,
338
};
339
}).filter(isDefined);
340
341
return this._handleTreeUpdate(rangeChanges, versionId);
342
}
343
344
/**
345
* Do not await in this method, it will cause a race
346
*/
347
private _handleTreeUpdate(ranges: RangeChange[], versionId: number) {
348
const rangeChanges: RangeWithOffsets[] = [];
349
const chunkSize = 1000;
350
351
for (let i = 0; i < ranges.length; i++) {
352
const rangeLinesLength = ranges[i].newRange.endLineNumber - ranges[i].newRange.startLineNumber;
353
if (rangeLinesLength > chunkSize) {
354
// Split the range into chunks to avoid long operations
355
const fullRangeEndLineNumber = ranges[i].newRange.endLineNumber;
356
let chunkLineStart = ranges[i].newRange.startLineNumber;
357
let chunkColumnStart = ranges[i].newRange.startColumn;
358
let chunkLineEnd = chunkLineStart + chunkSize;
359
do {
360
const chunkStartingPosition = new Position(chunkLineStart, chunkColumnStart);
361
const chunkEndColumn = ((chunkLineEnd === ranges[i].newRange.endLineNumber) ? ranges[i].newRange.endColumn : this._textModel.getLineMaxColumn(chunkLineEnd));
362
const chunkEndPosition = new Position(chunkLineEnd, chunkEndColumn);
363
const chunkRange = Range.fromPositions(chunkStartingPosition, chunkEndPosition);
364
365
rangeChanges.push({
366
range: chunkRange,
367
startOffset: this._textModel.getOffsetAt(chunkRange.getStartPosition()),
368
endOffset: this._textModel.getOffsetAt(chunkRange.getEndPosition())
369
});
370
371
chunkLineStart = chunkLineEnd + 1;
372
chunkColumnStart = 1;
373
if (chunkLineEnd < fullRangeEndLineNumber && chunkLineEnd + chunkSize > fullRangeEndLineNumber) {
374
chunkLineEnd = fullRangeEndLineNumber;
375
} else {
376
chunkLineEnd = chunkLineEnd + chunkSize;
377
}
378
} while (chunkLineEnd <= fullRangeEndLineNumber);
379
} else {
380
// Check that the previous range doesn't overlap
381
if ((i === 0) || (rangeChanges[i - 1].endOffset < ranges[i].newRangeStartOffset)) {
382
rangeChanges.push({
383
range: ranges[i].newRange,
384
startOffset: ranges[i].newRangeStartOffset,
385
endOffset: ranges[i].newRangeEndOffset
386
});
387
} else if (rangeChanges[i - 1].endOffset < ranges[i].newRangeEndOffset) {
388
// clip the range to the previous range
389
const startPosition = this._textModel.getPositionAt(rangeChanges[i - 1].endOffset + 1);
390
const range = new Range(startPosition.lineNumber, startPosition.column, ranges[i].newRange.endLineNumber, ranges[i].newRange.endColumn);
391
rangeChanges.push({
392
range,
393
startOffset: rangeChanges[i - 1].endOffset + 1,
394
endOffset: ranges[i].newRangeEndOffset
395
});
396
}
397
}
398
}
399
400
// Get the captures immediately while the text model is correct
401
const captures = rangeChanges.map(range => this._getCaptures(range.range));
402
// Don't block
403
return this._updateTreeForRanges(rangeChanges, versionId, captures).then(() => {
404
if (!this._textModel.isDisposed() && (this._tree.treeLastParsedVersion.get() === this._textModel.getVersionId())) {
405
this._refreshNeedsRefresh(versionId);
406
}
407
});
408
}
409
410
private async _updateTreeForRanges(rangeChanges: RangeWithOffsets[], versionId: number, captures: QueryCapture[][]) {
411
let tokenUpdate: { newTokens: TokenUpdate[] } | undefined;
412
413
for (let i = 0; i < rangeChanges.length; i++) {
414
if (!this._textModel.isDisposed() && versionId !== this._textModel.getVersionId()) {
415
// Our captures have become invalid and we need to re-capture
416
break;
417
}
418
const capture = captures[i];
419
const range = rangeChanges[i];
420
421
const updates = this.getTokensInRange(range.range, range.startOffset, range.endOffset, capture);
422
if (updates) {
423
tokenUpdate = { newTokens: updates };
424
} else {
425
tokenUpdate = { newTokens: [] };
426
}
427
this._updateTokensInStore(versionId, [tokenUpdate], TokenQuality.Accurate);
428
this._onDidChangeTokens.fire({
429
changes: {
430
semanticTokensApplied: false,
431
ranges: [{ fromLineNumber: range.range.getStartPosition().lineNumber, toLineNumber: range.range.getEndPosition().lineNumber }]
432
}
433
});
434
await new Promise<void>(resolve => setTimeout0(resolve));
435
}
436
this._onDidCompleteBackgroundTokenization.fire();
437
}
438
439
private _refreshNeedsRefresh(versionId: number) {
440
const rangesToRefresh = this._getNeedsRefresh();
441
if (rangesToRefresh.length === 0) {
442
return;
443
}
444
const rangeChanges: RangeChange[] = new Array(rangesToRefresh.length);
445
446
for (let i = 0; i < rangesToRefresh.length; i++) {
447
const range = rangesToRefresh[i];
448
rangeChanges[i] = {
449
newRange: range.range,
450
newRangeStartOffset: range.startOffset,
451
newRangeEndOffset: range.endOffset
452
};
453
}
454
455
this._handleTreeUpdate(rangeChanges, versionId);
456
}
457
458
private _rangeTokensAsUpdates(rangeOffset: number, endOffsetToken: EndOffsetToken[], startingOffsetInArray?: number) {
459
const updates: TokenUpdate[] = [];
460
let lastEnd = 0;
461
for (const token of endOffsetToken) {
462
if (token.endOffset <= lastEnd || (startingOffsetInArray && (token.endOffset < startingOffsetInArray))) {
463
continue;
464
}
465
let tokenUpdate: TokenUpdate;
466
if (startingOffsetInArray && (lastEnd < startingOffsetInArray)) {
467
tokenUpdate = { startOffsetInclusive: rangeOffset + startingOffsetInArray, length: token.endOffset - startingOffsetInArray, token: token.metadata };
468
} else {
469
tokenUpdate = { startOffsetInclusive: rangeOffset + lastEnd, length: token.endOffset - lastEnd, token: token.metadata };
470
}
471
updates.push(tokenUpdate);
472
lastEnd = token.endOffset;
473
}
474
return updates;
475
}
476
477
private _updateTheme() {
478
const modelRange = this._textModel.getFullModelRange();
479
this._markForRefresh(modelRange);
480
this._parseAndTokenizeViewPort(this._visibleLineRanges.get());
481
}
482
483
// Was used for inspect editor tokens command
484
captureAtPosition(lineNumber: number, column: number): QueryCapture[] {
485
const captures = this.captureAtRangeWithInjections(new Range(lineNumber, column, lineNumber, column + 1));
486
return captures;
487
}
488
489
// Was used for the colorization tests
490
captureAtRangeTree(range: Range): QueryCapture[] {
491
const captures = this.captureAtRangeWithInjections(range);
492
return captures;
493
}
494
495
private captureAtRange(range: Range): QueryCapture[] {
496
const tree = this._tree.tree.get();
497
if (!tree) {
498
return [];
499
}
500
// Tree sitter row is 0 based, column is 0 based
501
return this._highlightingQueries.captures(tree.rootNode, { startPosition: { row: range.startLineNumber - 1, column: range.startColumn - 1 }, endPosition: { row: range.endLineNumber - 1, column: range.endColumn - 1 } }).map(capture => (
502
{
503
name: capture.name,
504
text: capture.node.text,
505
node: {
506
startIndex: capture.node.startIndex,
507
endIndex: capture.node.endIndex,
508
startPosition: {
509
lineNumber: capture.node.startPosition.row + 1,
510
column: capture.node.startPosition.column + 1
511
},
512
endPosition: {
513
lineNumber: capture.node.endPosition.row + 1,
514
column: capture.node.endPosition.column + 1
515
}
516
},
517
encodedLanguageId: this._encodedLanguageId
518
}
519
));
520
}
521
522
private captureAtRangeWithInjections(range: Range): QueryCapture[] {
523
const captures: QueryCapture[] = this.captureAtRange(range);
524
for (let i = 0; i < captures.length; i++) {
525
const capture = captures[i];
526
527
const capStartLine = capture.node.startPosition.lineNumber;
528
const capEndLine = capture.node.endPosition.lineNumber;
529
const capStartColumn = capture.node.startPosition.column;
530
const capEndColumn = capture.node.endPosition.column;
531
532
const startLine = ((capStartLine > range.startLineNumber) && (capStartLine < range.endLineNumber)) ? capStartLine : range.startLineNumber;
533
const endLine = ((capEndLine > range.startLineNumber) && (capEndLine < range.endLineNumber)) ? capEndLine : range.endLineNumber;
534
const startColumn = (capStartLine === range.startLineNumber) ? (capStartColumn < range.startColumn ? range.startColumn : capStartColumn) : (capStartLine < range.startLineNumber ? range.startColumn : capStartColumn);
535
const endColumn = (capEndLine === range.endLineNumber) ? (capEndColumn > range.endColumn ? range.endColumn : capEndColumn) : (capEndLine > range.endLineNumber ? range.endColumn : capEndColumn);
536
const injectionRange = new Range(startLine, startColumn, endLine, endColumn);
537
538
const injection = this._getInjectionCaptures(capture, injectionRange);
539
if (injection && injection.length > 0) {
540
captures.splice(i + 1, 0, ...injection);
541
i += injection.length;
542
}
543
}
544
return captures;
545
}
546
547
/**
548
* Gets the tokens for a given line.
549
* Each token takes 2 elements in the array. The first element is the offset of the end of the token *in the line, not in the document*, and the second element is the metadata.
550
*
551
* @param lineNumber
552
* @returns
553
*/
554
public tokenizeEncoded(lineNumber: number) {
555
const tokens = this._tokenizeEncoded(lineNumber);
556
if (!tokens) {
557
return undefined;
558
}
559
const updates = this._rangeTokensAsUpdates(this._textModel.getOffsetAt({ lineNumber, column: 1 }), tokens.result);
560
if (tokens.versionId === this._textModel.getVersionId()) {
561
this._updateTokensInStore(tokens.versionId, [{ newTokens: updates, oldRangeLength: this._textModel.getLineLength(lineNumber) }], TokenQuality.Accurate);
562
}
563
}
564
565
public tokenizeEncodedInstrumented(lineNumber: number): { result: Uint32Array; captureTime: number; metadataTime: number } | undefined {
566
const tokens = this._tokenizeEncoded(lineNumber);
567
if (!tokens) {
568
return undefined;
569
}
570
return { result: this._endOffsetTokensToUint32Array(tokens.result), captureTime: tokens.captureTime, metadataTime: tokens.metadataTime };
571
}
572
573
private _getCaptures(range: Range): QueryCapture[] {
574
const captures = this.captureAtRangeWithInjections(range);
575
return captures;
576
}
577
578
private _tokenize(range: Range, rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: { endOffset: number; metadata: number }[]; versionId: number; captureTime: number; metadataTime: number } | undefined {
579
const captures = this._getCaptures(range);
580
const result = this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset);
581
if (!result) {
582
return undefined;
583
}
584
return { ...result, versionId: this._tree.treeLastParsedVersion.get() };
585
}
586
587
private _createTokensFromCaptures(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsets: EndOffsetAndScopes[]; captureTime: number } | undefined {
588
const tree = this._tree.tree.get();
589
const stopwatch = StopWatch.create();
590
const rangeLength = rangeEndOffset - rangeStartOffset;
591
const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);
592
const baseScope: string = TREESITTER_BASE_SCOPES[this._tree.languageId] || 'source';
593
594
if (captures.length === 0) {
595
if (tree) {
596
stopwatch.stop();
597
const endOffsetsAndMetadata = [{ endOffset: rangeLength, scopes: [], encodedLanguageId }];
598
return { endOffsets: endOffsetsAndMetadata, captureTime: stopwatch.elapsed() };
599
}
600
return undefined;
601
}
602
603
const endOffsetsAndScopes: EndOffsetAndScopes[] = Array(captures.length);
604
endOffsetsAndScopes.fill({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
605
let tokenIndex = 0;
606
607
const increaseSizeOfTokensByOneToken = () => {
608
endOffsetsAndScopes.push({ endOffset: 0, scopes: [baseScope], encodedLanguageId });
609
};
610
611
const brackets = (capture: QueryCapture, startOffset: number): number[] | undefined => {
612
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
613
};
614
615
const addCurrentTokenToArray = (capture: QueryCapture, startOffset: number, endOffset: number, position?: number) => {
616
if (position !== undefined) {
617
const oldScopes = endOffsetsAndScopes[position].scopes;
618
let oldBracket = endOffsetsAndScopes[position].bracket;
619
// Check that the previous token ends at the same point that the current token starts
620
const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;
621
if (prevEndOffset !== startOffset) {
622
let preInsertBracket: number[] | undefined = undefined;
623
if (oldBracket && oldBracket.length > 0) {
624
preInsertBracket = [];
625
const postInsertBracket: number[] = [];
626
for (let i = 0; i < oldBracket.length; i++) {
627
const bracket = oldBracket[i];
628
if (bracket < startOffset) {
629
preInsertBracket.push(bracket);
630
} else if (bracket > endOffset) {
631
postInsertBracket.push(bracket);
632
}
633
}
634
if (preInsertBracket.length === 0) {
635
preInsertBracket = undefined;
636
}
637
if (postInsertBracket.length === 0) {
638
oldBracket = undefined;
639
} else {
640
oldBracket = postInsertBracket;
641
}
642
}
643
// We need to add some of the position token to cover the space
644
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...oldScopes], bracket: preInsertBracket, encodedLanguageId: capture.encodedLanguageId });
645
position++;
646
increaseSizeOfTokensByOneToken();
647
tokenIndex++;
648
}
649
650
endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [...oldScopes, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId });
651
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
652
} else {
653
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [baseScope, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId };
654
}
655
tokenIndex++;
656
};
657
658
for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {
659
const capture = captures[captureIndex];
660
const tokenEndIndex = capture.node.endIndex < rangeEndOffset ? ((capture.node.endIndex < rangeStartOffset) ? rangeStartOffset : capture.node.endIndex) : rangeEndOffset;
661
const tokenStartIndex = capture.node.startIndex < rangeStartOffset ? rangeStartOffset : capture.node.startIndex;
662
663
const endOffset = tokenEndIndex - rangeStartOffset;
664
665
// Not every character will get captured, so we need to make sure that our current capture doesn't bleed toward the start of the line and cover characters that it doesn't apply to.
666
// We do this by creating a new token in the array if the previous token ends before the current token starts.
667
let previousEndOffset: number;
668
const currentTokenLength = tokenEndIndex - tokenStartIndex;
669
if (captureIndex > 0) {
670
previousEndOffset = endOffsetsAndScopes[(tokenIndex - 1)].endOffset;
671
} else {
672
previousEndOffset = tokenStartIndex - rangeStartOffset - 1;
673
}
674
const startOffset = endOffset - currentTokenLength;
675
if ((previousEndOffset >= 0) && (previousEndOffset < startOffset)) {
676
// Add en empty token to cover the space where there were no captures
677
endOffsetsAndScopes[tokenIndex] = { endOffset: startOffset, scopes: [baseScope], encodedLanguageId: this._encodedLanguageId };
678
tokenIndex++;
679
680
increaseSizeOfTokensByOneToken();
681
}
682
683
if (currentTokenLength < 0) {
684
// This happens when we have a token "gap" right at the end of the capture range. The last capture isn't used because it's start index isn't included in the range.
685
continue;
686
}
687
688
if (previousEndOffset >= endOffset) {
689
// walk back through the tokens until we find the one that contains the current token
690
let withinTokenIndex = tokenIndex - 1;
691
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
692
693
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
694
do {
695
696
// Check that the current token doesn't just replace the last token
697
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
698
if (previousTokenStartOffset === startOffset) {
699
// Current token and previous token span the exact same characters, add the scopes to the previous token
700
endOffsetsAndScopes[withinTokenIndex].scopes.push(capture.name);
701
const oldBracket = endOffsetsAndScopes[withinTokenIndex].bracket;
702
endOffsetsAndScopes[withinTokenIndex].bracket = ((oldBracket && (oldBracket.length > 0)) ? oldBracket : brackets(capture, startOffset));
703
}
704
} else if (previousTokenStartOffset <= startOffset) {
705
addCurrentTokenToArray(capture, startOffset, endOffset, withinTokenIndex);
706
break;
707
}
708
withinTokenIndex--;
709
previousTokenStartOffset = ((withinTokenIndex >= 1) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
710
previousTokenEndOffset = ((withinTokenIndex >= 0) ? endOffsetsAndScopes[withinTokenIndex].endOffset : 0);
711
} while (previousTokenEndOffset > startOffset);
712
} else {
713
// Just add the token to the array
714
addCurrentTokenToArray(capture, startOffset, endOffset);
715
}
716
}
717
718
// Account for uncaptured characters at the end of the line
719
if ((endOffsetsAndScopes[tokenIndex - 1].endOffset < rangeLength)) {
720
if (rangeLength - endOffsetsAndScopes[tokenIndex - 1].endOffset > 0) {
721
increaseSizeOfTokensByOneToken();
722
endOffsetsAndScopes[tokenIndex] = { endOffset: rangeLength, scopes: endOffsetsAndScopes[tokenIndex].scopes, encodedLanguageId: this._encodedLanguageId };
723
tokenIndex++;
724
}
725
}
726
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
727
const token = endOffsetsAndScopes[i];
728
if (token.endOffset === 0 && i !== 0) {
729
endOffsetsAndScopes.splice(i, endOffsetsAndScopes.length - i);
730
break;
731
}
732
}
733
const captureTime = stopwatch.elapsed();
734
return { endOffsets: endOffsetsAndScopes as { endOffset: number; scopes: string[]; encodedLanguageId: LanguageId }[], captureTime };
735
}
736
737
private _getInjectionCaptures(parentCapture: QueryCapture, range: Range): QueryCapture[] {
738
/*
739
const injection = textModelTreeSitter.getInjection(parentCapture.node.startIndex, this._treeSitterModel.languageId);
740
if (!injection?.tree || injection.versionId !== textModelTreeSitter.parseResult?.versionId) {
741
return undefined;
742
}
743
744
const feature = TreeSitterTokenizationRegistry.get(injection.languageId);
745
if (!feature) {
746
return undefined;
747
}
748
return feature.tokSupport_captureAtRangeTree(range, injection.tree, textModelTreeSitter);*/
749
return [];
750
}
751
752
private _tokenizeCapturesWithMetadata(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: EndOffsetToken[]; captureTime: number; metadataTime: number } | undefined {
753
const stopwatch = StopWatch.create();
754
const emptyTokens = this._createTokensFromCaptures(captures, rangeStartOffset, rangeEndOffset);
755
if (!emptyTokens) {
756
return undefined;
757
}
758
const endOffsetsAndScopes: EndOffsetWithMeta[] = emptyTokens.endOffsets;
759
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
760
const token = endOffsetsAndScopes[i];
761
token.metadata = this._treeSitterThemeService.findMetadata(token.scopes, token.encodedLanguageId, !!token.bracket && (token.bracket.length > 0), undefined);
762
}
763
764
const metadataTime = stopwatch.elapsed();
765
return { endOffsetsAndMetadata: endOffsetsAndScopes as { endOffset: number; scopes: string[]; metadata: number }[], captureTime: emptyTokens.captureTime, metadataTime };
766
}
767
768
private _tokenizeEncoded(lineNumber: number): { result: EndOffsetToken[]; captureTime: number; metadataTime: number; versionId: number } | undefined {
769
const lineOffset = this._textModel.getOffsetAt({ lineNumber: lineNumber, column: 1 });
770
const maxLine = this._textModel.getLineCount();
771
const lineEndOffset = (lineNumber + 1 <= maxLine) ? this._textModel.getOffsetAt({ lineNumber: lineNumber + 1, column: 1 }) : this._textModel.getValueLength();
772
const lineLength = lineEndOffset - lineOffset;
773
774
const result = this._tokenize(new Range(lineNumber, 1, lineNumber, lineLength + 1), lineOffset, lineEndOffset);
775
if (!result) {
776
return undefined;
777
}
778
return { result: result.endOffsetsAndMetadata, captureTime: result.captureTime, metadataTime: result.metadataTime, versionId: result.versionId };
779
}
780
781
private _endOffsetTokensToUint32Array(endOffsetsAndMetadata: EndOffsetToken[]): Uint32Array {
782
783
const uint32Array = new Uint32Array(endOffsetsAndMetadata.length * 2);
784
for (let i = 0; i < endOffsetsAndMetadata.length; i++) {
785
uint32Array[i * 2] = endOffsetsAndMetadata[i].endOffset;
786
uint32Array[i * 2 + 1] = endOffsetsAndMetadata[i].metadata;
787
}
788
return uint32Array;
789
}
790
}
791
792
793
interface EndOffsetToken {
794
endOffset: number;
795
metadata: number;
796
}
797
798
interface EndOffsetAndScopes {
799
endOffset: number;
800
scopes: string[];
801
bracket?: number[];
802
encodedLanguageId: LanguageId;
803
}
804
805
interface EndOffsetWithMeta extends EndOffsetAndScopes {
806
metadata?: number;
807
}
808
export const TREESITTER_BASE_SCOPES: Record<string, string> = {
809
'css': 'source.css',
810
'typescript': 'source.ts',
811
'ini': 'source.ini',
812
'regex': 'source.regex',
813
};
814
815
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;
816
817