Path: blob/main/src/vs/editor/common/model/tokens/treeSitter/treeSitterTokenizationImpl.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Emitter, Event } from '../../../../../base/common/event.js';6import { Disposable } from '../../../../../base/common/lifecycle.js';7import { setTimeout0 } from '../../../../../base/common/platform.js';8import { StopWatch } from '../../../../../base/common/stopwatch.js';9import { LanguageId } from '../../../encodedTokenAttributes.js';10import { ILanguageIdCodec, QueryCapture } from '../../../languages.js';11import { IModelContentChangedEvent, IModelTokensChangedEvent } from '../../../textModelEvents.js';12import { findLikelyRelevantLines } from '../../textModelTokens.js';13import { TokenStore, TokenUpdate, TokenQuality } from './tokenStore.js';14import { TreeSitterTree, RangeChange, RangeWithOffsets } from './treeSitterTree.js';15import type * as TreeSitter from '@vscode/tree-sitter-wasm';16import { autorun, autorunHandleChanges, IObservable, recordChanges, runOnChange } from '../../../../../base/common/observable.js';17import { LineRange } from '../../../core/ranges/lineRange.js';18import { LineTokens } from '../../../tokens/lineTokens.js';19import { Position } from '../../../core/position.js';20import { Range } from '../../../core/range.js';21import { isDefined } from '../../../../../base/common/types.js';22import { ITreeSitterThemeService } from '../../../services/treeSitter/treeSitterThemeService.js';23import { BugIndicatingError } from '../../../../../base/common/errors.js';2425export class TreeSitterTokenizationImpl extends Disposable {26private readonly _tokenStore: TokenStore;27private _accurateVersion: number;28private _guessVersion: number;2930private readonly _onDidChangeTokens: Emitter<{ changes: IModelTokensChangedEvent }> = this._register(new Emitter());31public readonly onDidChangeTokens: Event<{ changes: IModelTokensChangedEvent }> = this._onDidChangeTokens.event;32private readonly _onDidCompleteBackgroundTokenization: Emitter<void> = this._register(new Emitter());33public readonly onDidChangeBackgroundTokenization: Event<void> = this._onDidCompleteBackgroundTokenization.event;3435private _encodedLanguageId: LanguageId;3637private get _textModel() {38return this._tree.textModel;39}4041constructor(42private readonly _tree: TreeSitterTree,43private readonly _highlightingQueries: TreeSitter.Query,44private readonly _languageIdCodec: ILanguageIdCodec,45private readonly _visibleLineRanges: IObservable<readonly LineRange[]>,4647@ITreeSitterThemeService private readonly _treeSitterThemeService: ITreeSitterThemeService,48) {49super();5051this._encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);5253this._register(runOnChange(this._treeSitterThemeService.onChange, () => {54this._updateTheme();55}));5657this._tokenStore = this._register(new TokenStore(this._textModel));58this._accurateVersion = this._textModel.getVersionId();59this._guessVersion = this._textModel.getVersionId();60this._tokenStore.buildStore(this._createEmptyTokens(), TokenQuality.None);6162this._register(autorun(reader => {63const visibleLineRanges = this._visibleLineRanges.read(reader);64this._parseAndTokenizeViewPort(visibleLineRanges);65}));6667this._register(autorunHandleChanges({68owner: this,69changeTracker: recordChanges({ tree: this._tree.tree }),70}, (reader, ctx) => {71const changeEvent = ctx.changes.at(0)?.change;72if (ctx.changes.length > 1) {73throw new BugIndicatingError('The tree changed twice in one transaction. This is currently not supported and should not happen.');74}7576if (!changeEvent) {77if (ctx.tree) {78this._firstTreeUpdate(this._tree.treeLastParsedVersion.read(reader));79}80} else {81if (this.hasTokens()) {82// Mark the range for refresh immediately8384for (const range of changeEvent.ranges) {85this._markForRefresh(range.newRange);86}87}8889// First time we see a tree we need to build a token store.90if (!this.hasTokens()) {91this._firstTreeUpdate(changeEvent.versionId);92} else {93this._handleTreeUpdate(changeEvent.ranges, changeEvent.versionId);94}95}96}));97}9899public handleContentChanged(e: IModelContentChangedEvent): void {100this._guessVersion = e.versionId;101for (const change of e.changes) {102if (change.text.length > change.rangeLength) {103// If possible, use the token before the change as the starting point for the new token.104// This is more likely to let the new text be the correct color as typeing is usually at the end of the token.105const offset = change.rangeOffset > 0 ? change.rangeOffset - 1 : change.rangeOffset;106const oldToken = this._tokenStore.getTokenAt(offset);107let newToken: TokenUpdate;108if (oldToken) {109// Insert. Just grow the token at this position to include the insert.110newToken = { startOffsetInclusive: oldToken.startOffsetInclusive, length: oldToken.length + change.text.length - change.rangeLength, token: oldToken.token };111// Also mark tokens that are in the range of the change as needing a refresh.112this._tokenStore.markForRefresh(offset, change.rangeOffset + (change.text.length > change.rangeLength ? change.text.length : change.rangeLength));113} else {114// The document got larger and the change is at the end of the document.115newToken = { startOffsetInclusive: offset, length: change.text.length, token: 0 };116}117this._tokenStore.update(oldToken?.length ?? 0, [newToken], TokenQuality.EditGuess);118} else if (change.text.length < change.rangeLength) {119// Delete. Delete the tokens at the corresponding range.120const deletedCharCount = change.rangeLength - change.text.length;121this._tokenStore.delete(deletedCharCount, change.rangeOffset);122}123}124}125126public getLineTokens(lineNumber: number) {127const content = this._textModel.getLineContent(lineNumber);128const rawTokens = this.getTokens(lineNumber);129return new LineTokens(rawTokens, content, this._languageIdCodec);130}131132private _createEmptyTokens() {133const emptyToken = this._emptyToken();134const modelEndOffset = this._textModel.getValueLength();135136const emptyTokens: TokenUpdate[] = [this._emptyTokensForOffsetAndLength(0, modelEndOffset, emptyToken)];137return emptyTokens;138}139140private _emptyToken() {141return this._treeSitterThemeService.findMetadata([], this._encodedLanguageId, false, undefined);142}143144private _emptyTokensForOffsetAndLength(offset: number, length: number, emptyToken: number): TokenUpdate {145return { token: emptyToken, length: offset + length, startOffsetInclusive: 0 };146}147148public hasAccurateTokensForLine(lineNumber: number): boolean {149return this.hasTokens(new Range(lineNumber, 1, lineNumber, this._textModel.getLineMaxColumn(lineNumber)));150}151152public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {153const rawLineTokens = this._guessTokensForLinesContent(lineNumber, lines);154const lineTokens: LineTokens[] = [];155if (!rawLineTokens) {156return null;157}158for (let i = 0; i < rawLineTokens.length; i++) {159lineTokens.push(new LineTokens(rawLineTokens[i], lines[i], this._languageIdCodec));160}161return lineTokens;162}163164private _rangeHasTokens(range: Range, minimumTokenQuality: TokenQuality): boolean {165return this._tokenStore.rangeHasTokens(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()), minimumTokenQuality);166}167168public hasTokens(accurateForRange?: Range): boolean {169if (!accurateForRange || (this._guessVersion === this._accurateVersion)) {170return true;171}172173return !this._tokenStore.rangeNeedsRefresh(this._textModel.getOffsetAt(accurateForRange.getStartPosition()), this._textModel.getOffsetAt(accurateForRange.getEndPosition()));174}175176public getTokens(line: number): Uint32Array {177const lineStartOffset = this._textModel.getOffsetAt({ lineNumber: line, column: 1 });178const lineEndOffset = this._textModel.getOffsetAt({ lineNumber: line, column: this._textModel.getLineLength(line) + 1 });179const lineTokens = this._tokenStore.getTokensInRange(lineStartOffset, lineEndOffset);180const result = new Uint32Array(lineTokens.length * 2);181for (let i = 0; i < lineTokens.length; i++) {182result[i * 2] = lineTokens[i].startOffsetInclusive - lineStartOffset + lineTokens[i].length;183result[i * 2 + 1] = lineTokens[i].token;184}185return result;186}187188getTokensInRange(range: Range, rangeStartOffset: number, rangeEndOffset: number, captures?: QueryCapture[]): TokenUpdate[] | undefined {189const tokens = captures ? this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset) : this._tokenize(range, rangeStartOffset, rangeEndOffset);190if (tokens?.endOffsetsAndMetadata) {191return this._rangeTokensAsUpdates(rangeStartOffset, tokens.endOffsetsAndMetadata);192}193return undefined;194}195196private _updateTokensInStore(version: number, updates: { oldRangeLength?: number; newTokens: TokenUpdate[] }[], tokenQuality: TokenQuality): void {197this._accurateVersion = version;198for (const update of updates) {199const lastToken = update.newTokens.length > 0 ? update.newTokens[update.newTokens.length - 1] : undefined;200let oldRangeLength: number;201if (lastToken && (this._guessVersion >= version)) {202oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - update.newTokens[0].startOffsetInclusive;203} else if (update.oldRangeLength) {204oldRangeLength = update.oldRangeLength;205} else {206oldRangeLength = 0;207}208this._tokenStore.update(oldRangeLength, update.newTokens, tokenQuality);209}210}211212private _markForRefresh(range: Range): void {213this._tokenStore.markForRefresh(this._textModel.getOffsetAt(range.getStartPosition()), this._textModel.getOffsetAt(range.getEndPosition()));214}215216private _getNeedsRefresh(): { range: Range; startOffset: number; endOffset: number }[] {217const needsRefreshOffsetRanges = this._tokenStore.getNeedsRefresh();218if (!needsRefreshOffsetRanges) {219return [];220}221return needsRefreshOffsetRanges.map(range => ({222range: Range.fromPositions(this._textModel.getPositionAt(range.startOffset), this._textModel.getPositionAt(range.endOffset)),223startOffset: range.startOffset,224endOffset: range.endOffset225}));226}227228229private _parseAndTokenizeViewPort(lineRanges: readonly LineRange[]) {230const viewportRanges = lineRanges.map(r => r.toInclusiveRange()).filter(isDefined);231for (const range of viewportRanges) {232const startOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getStartPosition());233const endOffsetOfRangeInDocument = this._textModel.getOffsetAt(range.getEndPosition());234const version = this._textModel.getVersionId();235if (this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {236continue;237}238const content = this._textModel.getValueInRange(range);239const tokenUpdates = this._forceParseAndTokenizeContent(range, startOffsetOfRangeInDocument, endOffsetOfRangeInDocument, content, true);240if (!tokenUpdates || this._rangeHasTokens(range, TokenQuality.ViewportGuess)) {241continue;242}243if (tokenUpdates.length === 0) {244continue;245}246const lastToken = tokenUpdates[tokenUpdates.length - 1];247const oldRangeLength = lastToken.startOffsetInclusive + lastToken.length - tokenUpdates[0].startOffsetInclusive;248this._updateTokensInStore(version, [{ newTokens: tokenUpdates, oldRangeLength }], TokenQuality.ViewportGuess);249this._onDidChangeTokens.fire({ changes: { semanticTokensApplied: false, ranges: [{ fromLineNumber: range.startLineNumber, toLineNumber: range.endLineNumber }] } });250}251}252253private _guessTokensForLinesContent(lineNumber: number, lines: string[]): Uint32Array[] | undefined {254if (lines.length === 0) {255return undefined;256}257const lineContent = lines.join(this._textModel.getEOL());258const range = new Range(1, 1, lineNumber + lines.length, lines[lines.length - 1].length + 1);259const startOffset = this._textModel.getOffsetAt({ lineNumber, column: 1 });260const tokens = this._forceParseAndTokenizeContent(range, startOffset, startOffset + lineContent.length, lineContent, false);261if (!tokens) {262return undefined;263}264const tokensByLine: Uint32Array[] = new Array(lines.length);265let tokensIndex: number = 0;266let tokenStartOffset = 0;267let lineStartOffset = 0;268for (let i = 0; i < lines.length; i++) {269const tokensForLine: EndOffsetToken[] = [];270let moveToNextLine = false;271for (let j = tokensIndex; (!moveToNextLine && (j < tokens.length)); j++) {272const token = tokens[j];273const lineAdjustedEndOffset = token.endOffset - lineStartOffset;274const lineAdjustedStartOffset = tokenStartOffset - lineStartOffset;275if (lineAdjustedEndOffset <= lines[i].length) {276tokensForLine.push({ endOffset: lineAdjustedEndOffset, metadata: token.metadata });277tokensIndex++;278} else if (lineAdjustedStartOffset < lines[i].length) {279const partialToken: EndOffsetToken = { endOffset: lines[i].length, metadata: token.metadata };280tokensForLine.push(partialToken);281moveToNextLine = true;282} else {283moveToNextLine = true;284}285tokenStartOffset = token.endOffset;286}287288tokensByLine[i] = this._endOffsetTokensToUint32Array(tokensForLine);289lineStartOffset += lines[i].length + this._textModel.getEOL().length;290}291292return tokensByLine;293}294295private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: true): TokenUpdate[] | undefined;296private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: false): EndOffsetToken[] | undefined;297private _forceParseAndTokenizeContent(range: Range, startOffsetOfRangeInDocument: number, endOffsetOfRangeInDocument: number, content: string, asUpdate: boolean): EndOffsetToken[] | TokenUpdate[] | undefined {298const likelyRelevantLines = findLikelyRelevantLines(this._textModel, range.startLineNumber).likelyRelevantLines;299const likelyRelevantPrefix = likelyRelevantLines.join(this._textModel.getEOL());300301const tree = this._tree.createParsedTreeSync(`${likelyRelevantPrefix}${content}`);302if (!tree) {303return;304}305306const treeRange = new Range(1, 1, range.endLineNumber - range.startLineNumber + 1 + likelyRelevantLines.length, range.endColumn);307const captures = this.captureAtRange(treeRange);308const tokens = this._tokenizeCapturesWithMetadata(captures, likelyRelevantPrefix.length, endOffsetOfRangeInDocument - startOffsetOfRangeInDocument + likelyRelevantPrefix.length);309tree.delete();310311if (!tokens) {312return;313}314315if (asUpdate) {316return this._rangeTokensAsUpdates(startOffsetOfRangeInDocument, tokens.endOffsetsAndMetadata, likelyRelevantPrefix.length);317} else {318return tokens.endOffsetsAndMetadata;319}320}321322323private _firstTreeUpdate(versionId: number) {324return this._setViewPortTokens(versionId);325}326327private _setViewPortTokens(versionId: number) {328const rangeChanges = this._visibleLineRanges.get().map<RangeChange | undefined>(lineRange => {329const range = lineRange.toInclusiveRange();330if (!range) { return undefined; }331const newRangeStartOffset = this._textModel.getOffsetAt(range.getStartPosition());332const newRangeEndOffset = this._textModel.getOffsetAt(range.getEndPosition());333return {334newRange: range,335newRangeEndOffset,336newRangeStartOffset,337};338}).filter(isDefined);339340return this._handleTreeUpdate(rangeChanges, versionId);341}342343/**344* Do not await in this method, it will cause a race345*/346private _handleTreeUpdate(ranges: RangeChange[], versionId: number) {347const rangeChanges: RangeWithOffsets[] = [];348const chunkSize = 1000;349350for (let i = 0; i < ranges.length; i++) {351const rangeLinesLength = ranges[i].newRange.endLineNumber - ranges[i].newRange.startLineNumber;352if (rangeLinesLength > chunkSize) {353// Split the range into chunks to avoid long operations354const fullRangeEndLineNumber = ranges[i].newRange.endLineNumber;355let chunkLineStart = ranges[i].newRange.startLineNumber;356let chunkColumnStart = ranges[i].newRange.startColumn;357let chunkLineEnd = chunkLineStart + chunkSize;358do {359const chunkStartingPosition = new Position(chunkLineStart, chunkColumnStart);360const chunkEndColumn = ((chunkLineEnd === ranges[i].newRange.endLineNumber) ? ranges[i].newRange.endColumn : this._textModel.getLineMaxColumn(chunkLineEnd));361const chunkEndPosition = new Position(chunkLineEnd, chunkEndColumn);362const chunkRange = Range.fromPositions(chunkStartingPosition, chunkEndPosition);363364rangeChanges.push({365range: chunkRange,366startOffset: this._textModel.getOffsetAt(chunkRange.getStartPosition()),367endOffset: this._textModel.getOffsetAt(chunkRange.getEndPosition())368});369370chunkLineStart = chunkLineEnd + 1;371chunkColumnStart = 1;372if (chunkLineEnd < fullRangeEndLineNumber && chunkLineEnd + chunkSize > fullRangeEndLineNumber) {373chunkLineEnd = fullRangeEndLineNumber;374} else {375chunkLineEnd = chunkLineEnd + chunkSize;376}377} while (chunkLineEnd <= fullRangeEndLineNumber);378} else {379// Check that the previous range doesn't overlap380if ((i === 0) || (rangeChanges[i - 1].endOffset < ranges[i].newRangeStartOffset)) {381rangeChanges.push({382range: ranges[i].newRange,383startOffset: ranges[i].newRangeStartOffset,384endOffset: ranges[i].newRangeEndOffset385});386} else if (rangeChanges[i - 1].endOffset < ranges[i].newRangeEndOffset) {387// clip the range to the previous range388const startPosition = this._textModel.getPositionAt(rangeChanges[i - 1].endOffset + 1);389const range = new Range(startPosition.lineNumber, startPosition.column, ranges[i].newRange.endLineNumber, ranges[i].newRange.endColumn);390rangeChanges.push({391range,392startOffset: rangeChanges[i - 1].endOffset + 1,393endOffset: ranges[i].newRangeEndOffset394});395}396}397}398399// Get the captures immediately while the text model is correct400const captures = rangeChanges.map(range => this._getCaptures(range.range));401// Don't block402return this._updateTreeForRanges(rangeChanges, versionId, captures).then(() => {403if (!this._textModel.isDisposed() && (this._tree.treeLastParsedVersion.get() === this._textModel.getVersionId())) {404this._refreshNeedsRefresh(versionId);405}406});407}408409private async _updateTreeForRanges(rangeChanges: RangeWithOffsets[], versionId: number, captures: QueryCapture[][]) {410let tokenUpdate: { newTokens: TokenUpdate[] } | undefined;411412for (let i = 0; i < rangeChanges.length; i++) {413if (!this._textModel.isDisposed() && versionId !== this._textModel.getVersionId()) {414// Our captures have become invalid and we need to re-capture415break;416}417const capture = captures[i];418const range = rangeChanges[i];419420const updates = this.getTokensInRange(range.range, range.startOffset, range.endOffset, capture);421if (updates) {422tokenUpdate = { newTokens: updates };423} else {424tokenUpdate = { newTokens: [] };425}426this._updateTokensInStore(versionId, [tokenUpdate], TokenQuality.Accurate);427this._onDidChangeTokens.fire({428changes: {429semanticTokensApplied: false,430ranges: [{ fromLineNumber: range.range.getStartPosition().lineNumber, toLineNumber: range.range.getEndPosition().lineNumber }]431}432});433await new Promise<void>(resolve => setTimeout0(resolve));434}435this._onDidCompleteBackgroundTokenization.fire();436}437438private _refreshNeedsRefresh(versionId: number) {439const rangesToRefresh = this._getNeedsRefresh();440if (rangesToRefresh.length === 0) {441return;442}443const rangeChanges: RangeChange[] = new Array(rangesToRefresh.length);444445for (let i = 0; i < rangesToRefresh.length; i++) {446const range = rangesToRefresh[i];447rangeChanges[i] = {448newRange: range.range,449newRangeStartOffset: range.startOffset,450newRangeEndOffset: range.endOffset451};452}453454this._handleTreeUpdate(rangeChanges, versionId);455}456457private _rangeTokensAsUpdates(rangeOffset: number, endOffsetToken: EndOffsetToken[], startingOffsetInArray?: number) {458const updates: TokenUpdate[] = [];459let lastEnd = 0;460for (const token of endOffsetToken) {461if (token.endOffset <= lastEnd || (startingOffsetInArray && (token.endOffset < startingOffsetInArray))) {462continue;463}464let tokenUpdate: TokenUpdate;465if (startingOffsetInArray && (lastEnd < startingOffsetInArray)) {466tokenUpdate = { startOffsetInclusive: rangeOffset + startingOffsetInArray, length: token.endOffset - startingOffsetInArray, token: token.metadata };467} else {468tokenUpdate = { startOffsetInclusive: rangeOffset + lastEnd, length: token.endOffset - lastEnd, token: token.metadata };469}470updates.push(tokenUpdate);471lastEnd = token.endOffset;472}473return updates;474}475476private _updateTheme() {477const modelRange = this._textModel.getFullModelRange();478this._markForRefresh(modelRange);479this._parseAndTokenizeViewPort(this._visibleLineRanges.get());480}481482// Was used for inspect editor tokens command483captureAtPosition(lineNumber: number, column: number): QueryCapture[] {484const captures = this.captureAtRangeWithInjections(new Range(lineNumber, column, lineNumber, column + 1));485return captures;486}487488// Was used for the colorization tests489captureAtRangeTree(range: Range): QueryCapture[] {490const captures = this.captureAtRangeWithInjections(range);491return captures;492}493494private captureAtRange(range: Range): QueryCapture[] {495const tree = this._tree.tree.get();496if (!tree) {497return [];498}499// Tree sitter row is 0 based, column is 0 based500return this._highlightingQueries.captures(tree.rootNode, { startPosition: { row: range.startLineNumber - 1, column: range.startColumn - 1 }, endPosition: { row: range.endLineNumber - 1, column: range.endColumn - 1 } }).map(capture => (501{502name: capture.name,503text: capture.node.text,504node: {505startIndex: capture.node.startIndex,506endIndex: capture.node.endIndex,507startPosition: {508lineNumber: capture.node.startPosition.row + 1,509column: capture.node.startPosition.column + 1510},511endPosition: {512lineNumber: capture.node.endPosition.row + 1,513column: capture.node.endPosition.column + 1514}515},516encodedLanguageId: this._encodedLanguageId517}518));519}520521private captureAtRangeWithInjections(range: Range): QueryCapture[] {522const captures: QueryCapture[] = this.captureAtRange(range);523for (let i = 0; i < captures.length; i++) {524const capture = captures[i];525526const capStartLine = capture.node.startPosition.lineNumber;527const capEndLine = capture.node.endPosition.lineNumber;528const capStartColumn = capture.node.startPosition.column;529const capEndColumn = capture.node.endPosition.column;530531const startLine = ((capStartLine > range.startLineNumber) && (capStartLine < range.endLineNumber)) ? capStartLine : range.startLineNumber;532const endLine = ((capEndLine > range.startLineNumber) && (capEndLine < range.endLineNumber)) ? capEndLine : range.endLineNumber;533const startColumn = (capStartLine === range.startLineNumber) ? (capStartColumn < range.startColumn ? range.startColumn : capStartColumn) : (capStartLine < range.startLineNumber ? range.startColumn : capStartColumn);534const endColumn = (capEndLine === range.endLineNumber) ? (capEndColumn > range.endColumn ? range.endColumn : capEndColumn) : (capEndLine > range.endLineNumber ? range.endColumn : capEndColumn);535const injectionRange = new Range(startLine, startColumn, endLine, endColumn);536537const injection = this._getInjectionCaptures(capture, injectionRange);538if (injection && injection.length > 0) {539captures.splice(i + 1, 0, ...injection);540i += injection.length;541}542}543return captures;544}545546/**547* Gets the tokens for a given line.548* Each token takes 2 elements in the array. The first element is the offset of the end of the token *in the line, not in the document*, and the second element is the metadata.549*550* @param lineNumber551* @returns552*/553public tokenizeEncoded(lineNumber: number) {554const tokens = this._tokenizeEncoded(lineNumber);555if (!tokens) {556return undefined;557}558const updates = this._rangeTokensAsUpdates(this._textModel.getOffsetAt({ lineNumber, column: 1 }), tokens.result);559if (tokens.versionId === this._textModel.getVersionId()) {560this._updateTokensInStore(tokens.versionId, [{ newTokens: updates, oldRangeLength: this._textModel.getLineLength(lineNumber) }], TokenQuality.Accurate);561}562}563564public tokenizeEncodedInstrumented(lineNumber: number): { result: Uint32Array; captureTime: number; metadataTime: number } | undefined {565const tokens = this._tokenizeEncoded(lineNumber);566if (!tokens) {567return undefined;568}569return { result: this._endOffsetTokensToUint32Array(tokens.result), captureTime: tokens.captureTime, metadataTime: tokens.metadataTime };570}571572private _getCaptures(range: Range): QueryCapture[] {573const captures = this.captureAtRangeWithInjections(range);574return captures;575}576577private _tokenize(range: Range, rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: { endOffset: number; metadata: number }[]; versionId: number; captureTime: number; metadataTime: number } | undefined {578const captures = this._getCaptures(range);579const result = this._tokenizeCapturesWithMetadata(captures, rangeStartOffset, rangeEndOffset);580if (!result) {581return undefined;582}583return { ...result, versionId: this._tree.treeLastParsedVersion.get() };584}585586private _createTokensFromCaptures(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsets: EndOffsetAndScopes[]; captureTime: number } | undefined {587const tree = this._tree.tree.get();588const stopwatch = StopWatch.create();589const rangeLength = rangeEndOffset - rangeStartOffset;590const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._tree.languageId);591const baseScope: string = TREESITTER_BASE_SCOPES[this._tree.languageId] || 'source';592593if (captures.length === 0) {594if (tree) {595stopwatch.stop();596const endOffsetsAndMetadata = [{ endOffset: rangeLength, scopes: [], encodedLanguageId }];597return { endOffsets: endOffsetsAndMetadata, captureTime: stopwatch.elapsed() };598}599return undefined;600}601602const endOffsetsAndScopes: EndOffsetAndScopes[] = Array(captures.length);603endOffsetsAndScopes.fill({ endOffset: 0, scopes: [baseScope], encodedLanguageId });604let tokenIndex = 0;605606const increaseSizeOfTokensByOneToken = () => {607endOffsetsAndScopes.push({ endOffset: 0, scopes: [baseScope], encodedLanguageId });608};609610const brackets = (capture: QueryCapture, startOffset: number): number[] | undefined => {611return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;612};613614const addCurrentTokenToArray = (capture: QueryCapture, startOffset: number, endOffset: number, position?: number) => {615if (position !== undefined) {616const oldScopes = endOffsetsAndScopes[position].scopes;617let oldBracket = endOffsetsAndScopes[position].bracket;618// Check that the previous token ends at the same point that the current token starts619const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;620if (prevEndOffset !== startOffset) {621let preInsertBracket: number[] | undefined = undefined;622if (oldBracket && oldBracket.length > 0) {623preInsertBracket = [];624const postInsertBracket: number[] = [];625for (let i = 0; i < oldBracket.length; i++) {626const bracket = oldBracket[i];627if (bracket < startOffset) {628preInsertBracket.push(bracket);629} else if (bracket > endOffset) {630postInsertBracket.push(bracket);631}632}633if (preInsertBracket.length === 0) {634preInsertBracket = undefined;635}636if (postInsertBracket.length === 0) {637oldBracket = undefined;638} else {639oldBracket = postInsertBracket;640}641}642// We need to add some of the position token to cover the space643endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...oldScopes], bracket: preInsertBracket, encodedLanguageId: capture.encodedLanguageId });644position++;645increaseSizeOfTokensByOneToken();646tokenIndex++;647}648649endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [...oldScopes, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId });650endOffsetsAndScopes[tokenIndex].bracket = oldBracket;651} else {652endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [baseScope, capture.name], bracket: brackets(capture, startOffset), encodedLanguageId: capture.encodedLanguageId };653}654tokenIndex++;655};656657for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {658const capture = captures[captureIndex];659const tokenEndIndex = capture.node.endIndex < rangeEndOffset ? ((capture.node.endIndex < rangeStartOffset) ? rangeStartOffset : capture.node.endIndex) : rangeEndOffset;660const tokenStartIndex = capture.node.startIndex < rangeStartOffset ? rangeStartOffset : capture.node.startIndex;661662const endOffset = tokenEndIndex - rangeStartOffset;663664// Not every character will get captured, so we need to make sure that our current capture doesn't bleed toward the start of the line and cover characters that it doesn't apply to.665// We do this by creating a new token in the array if the previous token ends before the current token starts.666let previousEndOffset: number;667const currentTokenLength = tokenEndIndex - tokenStartIndex;668if (captureIndex > 0) {669previousEndOffset = endOffsetsAndScopes[(tokenIndex - 1)].endOffset;670} else {671previousEndOffset = tokenStartIndex - rangeStartOffset - 1;672}673const startOffset = endOffset - currentTokenLength;674if ((previousEndOffset >= 0) && (previousEndOffset < startOffset)) {675// Add en empty token to cover the space where there were no captures676endOffsetsAndScopes[tokenIndex] = { endOffset: startOffset, scopes: [baseScope], encodedLanguageId: this._encodedLanguageId };677tokenIndex++;678679increaseSizeOfTokensByOneToken();680}681682if (currentTokenLength < 0) {683// This happens when we have a token "gap" right at the end of the capture range. The last capture isn't used because it's start index isn't included in the range.684continue;685}686687if (previousEndOffset >= endOffset) {688// walk back through the tokens until we find the one that contains the current token689let withinTokenIndex = tokenIndex - 1;690let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;691692let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);693do {694695// Check that the current token doesn't just replace the last token696if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {697if (previousTokenStartOffset === startOffset) {698// Current token and previous token span the exact same characters, add the scopes to the previous token699endOffsetsAndScopes[withinTokenIndex].scopes.push(capture.name);700const oldBracket = endOffsetsAndScopes[withinTokenIndex].bracket;701endOffsetsAndScopes[withinTokenIndex].bracket = ((oldBracket && (oldBracket.length > 0)) ? oldBracket : brackets(capture, startOffset));702}703} else if (previousTokenStartOffset <= startOffset) {704addCurrentTokenToArray(capture, startOffset, endOffset, withinTokenIndex);705break;706}707withinTokenIndex--;708previousTokenStartOffset = ((withinTokenIndex >= 1) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);709previousTokenEndOffset = ((withinTokenIndex >= 0) ? endOffsetsAndScopes[withinTokenIndex].endOffset : 0);710} while (previousTokenEndOffset > startOffset);711} else {712// Just add the token to the array713addCurrentTokenToArray(capture, startOffset, endOffset);714}715}716717// Account for uncaptured characters at the end of the line718if ((endOffsetsAndScopes[tokenIndex - 1].endOffset < rangeLength)) {719if (rangeLength - endOffsetsAndScopes[tokenIndex - 1].endOffset > 0) {720increaseSizeOfTokensByOneToken();721endOffsetsAndScopes[tokenIndex] = { endOffset: rangeLength, scopes: endOffsetsAndScopes[tokenIndex].scopes, encodedLanguageId: this._encodedLanguageId };722tokenIndex++;723}724}725for (let i = 0; i < endOffsetsAndScopes.length; i++) {726const token = endOffsetsAndScopes[i];727if (token.endOffset === 0 && i !== 0) {728endOffsetsAndScopes.splice(i, endOffsetsAndScopes.length - i);729break;730}731}732const captureTime = stopwatch.elapsed();733return { endOffsets: endOffsetsAndScopes as { endOffset: number; scopes: string[]; encodedLanguageId: LanguageId }[], captureTime };734}735736private _getInjectionCaptures(parentCapture: QueryCapture, range: Range): QueryCapture[] {737/*738const injection = textModelTreeSitter.getInjection(parentCapture.node.startIndex, this._treeSitterModel.languageId);739if (!injection?.tree || injection.versionId !== textModelTreeSitter.parseResult?.versionId) {740return undefined;741}742743const feature = TreeSitterTokenizationRegistry.get(injection.languageId);744if (!feature) {745return undefined;746}747return feature.tokSupport_captureAtRangeTree(range, injection.tree, textModelTreeSitter);*/748return [];749}750751private _tokenizeCapturesWithMetadata(captures: QueryCapture[], rangeStartOffset: number, rangeEndOffset: number): { endOffsetsAndMetadata: EndOffsetToken[]; captureTime: number; metadataTime: number } | undefined {752const stopwatch = StopWatch.create();753const emptyTokens = this._createTokensFromCaptures(captures, rangeStartOffset, rangeEndOffset);754if (!emptyTokens) {755return undefined;756}757const endOffsetsAndScopes: EndOffsetWithMeta[] = emptyTokens.endOffsets;758for (let i = 0; i < endOffsetsAndScopes.length; i++) {759const token = endOffsetsAndScopes[i];760token.metadata = this._treeSitterThemeService.findMetadata(token.scopes, token.encodedLanguageId, !!token.bracket && (token.bracket.length > 0), undefined);761}762763const metadataTime = stopwatch.elapsed();764return { endOffsetsAndMetadata: endOffsetsAndScopes as { endOffset: number; scopes: string[]; metadata: number }[], captureTime: emptyTokens.captureTime, metadataTime };765}766767private _tokenizeEncoded(lineNumber: number): { result: EndOffsetToken[]; captureTime: number; metadataTime: number; versionId: number } | undefined {768const lineOffset = this._textModel.getOffsetAt({ lineNumber: lineNumber, column: 1 });769const maxLine = this._textModel.getLineCount();770const lineEndOffset = (lineNumber + 1 <= maxLine) ? this._textModel.getOffsetAt({ lineNumber: lineNumber + 1, column: 1 }) : this._textModel.getValueLength();771const lineLength = lineEndOffset - lineOffset;772773const result = this._tokenize(new Range(lineNumber, 1, lineNumber, lineLength + 1), lineOffset, lineEndOffset);774if (!result) {775return undefined;776}777return { result: result.endOffsetsAndMetadata, captureTime: result.captureTime, metadataTime: result.metadataTime, versionId: result.versionId };778}779780private _endOffsetTokensToUint32Array(endOffsetsAndMetadata: EndOffsetToken[]): Uint32Array {781782const uint32Array = new Uint32Array(endOffsetsAndMetadata.length * 2);783for (let i = 0; i < endOffsetsAndMetadata.length; i++) {784uint32Array[i * 2] = endOffsetsAndMetadata[i].endOffset;785uint32Array[i * 2 + 1] = endOffsetsAndMetadata[i].metadata;786}787return uint32Array;788}789}790791792interface EndOffsetToken {793endOffset: number;794metadata: number;795}796797interface EndOffsetAndScopes {798endOffset: number;799scopes: string[];800bracket?: number[];801encodedLanguageId: LanguageId;802}803804interface EndOffsetWithMeta extends EndOffsetAndScopes {805metadata?: number;806}807export const TREESITTER_BASE_SCOPES: Record<string, string> = {808'css': 'source.css',809'typescript': 'source.ts',810'ini': 'source.ini',811'regex': 'source.regex',812};813814const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;815816817