Path: blob/main/src/vs/editor/common/model/tokens/treeSitter/treeSitterTree.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import type * as TreeSitter from '@vscode/tree-sitter-wasm';5import { TaskQueue } from '../../../../../base/common/async.js';6import { Disposable, toDisposable } from '../../../../../base/common/lifecycle.js';7import { IObservable, observableValue, transaction, IObservableWithChange } from '../../../../../base/common/observable.js';8import { setTimeout0 } from '../../../../../base/common/platform.js';9import { ILogService } from '../../../../../platform/log/common/log.js';10import { ITelemetryService } from '../../../../../platform/telemetry/common/telemetry.js';11import { TextLength } from '../../../core/text/textLength.js';12import { IModelContentChangedEvent } from '../../../textModelEvents.js';13import { IModelContentChange } from '../../mirrorTextModel.js';14import { TextModel } from '../../textModel.js';15import { gotoParent, getClosestPreviousNodes, nextSiblingOrParentSibling, gotoNthChild } from './cursorUtils.js';16import { Range } from '../../../core/range.js';1718export class TreeSitterTree extends Disposable {1920private readonly _tree = observableValue<TreeSitter.Tree | undefined, TreeParseUpdateEvent>(this, undefined);21public readonly tree: IObservableWithChange<TreeSitter.Tree | undefined, TreeParseUpdateEvent> = this._tree;2223private readonly _treeLastParsedVersion = observableValue(this, -1);24public readonly treeLastParsedVersion: IObservable<number> = this._treeLastParsedVersion;2526private _lastFullyParsed: TreeSitter.Tree | undefined;27private _lastFullyParsedWithEdits: TreeSitter.Tree | undefined;2829private _onDidChangeContentQueue: TaskQueue = new TaskQueue();3031constructor(32public readonly languageId: string,33private _ranges: TreeSitter.Range[] | undefined,34// readonly treeSitterLanguage: Language,35/** Must have the language set! */36private readonly _parser: TreeSitter.Parser,37private readonly _parserClass: typeof TreeSitter.Parser,38// private readonly _injectionQuery: TreeSitter.Query,39public readonly textModel: TextModel,40@ILogService private readonly _logService: ILogService,41@ITelemetryService private readonly _telemetryService: ITelemetryService42) {43super();4445this._tree = observableValue(this, undefined);46this.tree = this._tree;4748this._register(toDisposable(() => {49this._tree.get()?.delete();50this._lastFullyParsed?.delete();51this._lastFullyParsedWithEdits?.delete();52this._parser.delete();53}));54this.handleContentChange(undefined, this._ranges);55}5657public handleContentChange(e: IModelContentChangedEvent | undefined, ranges?: TreeSitter.Range[]): void {58const version = this.textModel.getVersionId();59let newRanges: TreeSitter.Range[] = [];60if (ranges) {61newRanges = this._setRanges(ranges);62}63if (e) {64this._applyEdits(e.changes);65}6667this._onDidChangeContentQueue.clearPending();68this._onDidChangeContentQueue.schedule(async () => {69if (this._store.isDisposed) {70// No need to continue the queue if we are disposed71return;72}7374const oldTree = this._lastFullyParsed;75let changedNodes: TreeSitter.Range[] | undefined;76if (this._lastFullyParsedWithEdits && this._lastFullyParsed) {77changedNodes = this._findChangedNodes(this._lastFullyParsedWithEdits, this._lastFullyParsed);78}7980const completed = await this._parseAndUpdateTree(version);81if (completed) {82let ranges: RangeChange[] | undefined;83if (!changedNodes) {84if (this._ranges) {85ranges = this._ranges.map(r => ({ newRange: new Range(r.startPosition.row + 1, r.startPosition.column + 1, r.endPosition.row + 1, r.endPosition.column + 1), oldRangeLength: r.endIndex - r.startIndex, newRangeStartOffset: r.startIndex, newRangeEndOffset: r.endIndex }));86}87} else if (oldTree && changedNodes) {88ranges = this._findTreeChanges(completed, changedNodes, newRanges);89}90if (!ranges) {91ranges = [{ newRange: this.textModel.getFullModelRange(), newRangeStartOffset: 0, newRangeEndOffset: this.textModel.getValueLength() }];92}9394const previousTree = this._tree.get();95transaction(tx => {96this._tree.set(completed, tx, { ranges, versionId: version });97this._treeLastParsedVersion.set(version, tx);98});99previousTree?.delete();100}101});102}103104get ranges(): TreeSitter.Range[] | undefined {105return this._ranges;106}107108public getInjectionTrees(startIndex: number, languageId: string): TreeSitterTree | undefined {109// TODO110return undefined;111}112113private _applyEdits(changes: IModelContentChange[]) {114for (const change of changes) {115const originalTextLength = TextLength.ofRange(Range.lift(change.range));116const newTextLength = TextLength.ofText(change.text);117const summedTextLengths = change.text.length === 0 ? newTextLength : originalTextLength.add(newTextLength);118const edit = {119startIndex: change.rangeOffset,120oldEndIndex: change.rangeOffset + change.rangeLength,121newEndIndex: change.rangeOffset + change.text.length,122startPosition: { row: change.range.startLineNumber - 1, column: change.range.startColumn - 1 },123oldEndPosition: { row: change.range.endLineNumber - 1, column: change.range.endColumn - 1 },124newEndPosition: { row: change.range.startLineNumber + summedTextLengths.lineCount - 1, column: summedTextLengths.lineCount ? summedTextLengths.columnCount : (change.range.endColumn + summedTextLengths.columnCount) }125};126this._tree.get()?.edit(edit);127this._lastFullyParsedWithEdits?.edit(edit);128}129}130131private _findChangedNodes(newTree: TreeSitter.Tree, oldTree: TreeSitter.Tree): TreeSitter.Range[] | undefined {132if ((this._ranges && this._ranges.every(range => range.startPosition.row !== newTree.rootNode.startPosition.row)) || newTree.rootNode.startPosition.row !== 0) {133return [];134}135const newCursor = newTree.walk();136const oldCursor = oldTree.walk();137138const nodes: TreeSitter.Range[] = [];139let next = true;140141do {142if (newCursor.currentNode.hasChanges) {143// Check if only one of the children has changes.144// If it's only one, then we go to that child.145// If it's more then, we need to go to each child146// If it's none, then we've found one of our ranges147const newChildren = newCursor.currentNode.children;148const indexChangedChildren: number[] = [];149const changedChildren = newChildren.filter((c, index) => {150if (c?.hasChanges || (oldCursor.currentNode.children.length <= index)) {151indexChangedChildren.push(index);152return true;153}154return false;155});156// If we have changes and we *had* an error, the whole node should be refreshed.157if ((changedChildren.length === 0) || (newCursor.currentNode.hasError !== oldCursor.currentNode.hasError)) {158// walk up again until we get to the first one that's named as unnamed nodes can be too granular159while (newCursor.currentNode.parent && next && !newCursor.currentNode.isNamed) {160next = gotoParent(newCursor, oldCursor);161}162// Use the end position of the previous node and the start position of the current node163const newNode = newCursor.currentNode;164const closestPreviousNode = getClosestPreviousNodes(newCursor, newTree) ?? newNode;165nodes.push({166startIndex: closestPreviousNode.startIndex,167endIndex: newNode.endIndex,168startPosition: closestPreviousNode.startPosition,169endPosition: newNode.endPosition170});171next = nextSiblingOrParentSibling(newCursor, oldCursor);172} else if (changedChildren.length >= 1) {173next = gotoNthChild(newCursor, oldCursor, indexChangedChildren[0]);174}175} else {176next = nextSiblingOrParentSibling(newCursor, oldCursor);177}178} while (next);179180newCursor.delete();181oldCursor.delete();182return nodes;183}184185private _findTreeChanges(newTree: TreeSitter.Tree, changedNodes: TreeSitter.Range[], newRanges: TreeSitter.Range[]): RangeChange[] {186let newRangeIndex = 0;187const mergedChanges: RangeChange[] = [];188189// Find the parent in the new tree of the changed node190for (let nodeIndex = 0; nodeIndex < changedNodes.length; nodeIndex++) {191const node = changedNodes[nodeIndex];192193if (mergedChanges.length > 0) {194if ((node.startIndex >= mergedChanges[mergedChanges.length - 1].newRangeStartOffset) && (node.endIndex <= mergedChanges[mergedChanges.length - 1].newRangeEndOffset)) {195// This node is within the previous range, skip it196continue;197}198}199200const cursor = newTree.walk();201const cursorContainersNode = () => cursor.startIndex < node.startIndex && cursor.endIndex > node.endIndex;202203while (cursorContainersNode()) {204// See if we can go to a child205let child = cursor.gotoFirstChild();206let foundChild = false;207while (child) {208if (cursorContainersNode() && cursor.currentNode.isNamed) {209foundChild = true;210break;211} else {212child = cursor.gotoNextSibling();213}214}215if (!foundChild) {216cursor.gotoParent();217break;218}219if (cursor.currentNode.childCount === 0) {220break;221}222}223224const startPosition = cursor.currentNode.startPosition;225const endPosition = cursor.currentNode.endPosition;226const startIndex = cursor.currentNode.startIndex;227const endIndex = cursor.currentNode.endIndex;228229const newChange = { newRange: new Range(startPosition.row + 1, startPosition.column + 1, endPosition.row + 1, endPosition.column + 1), newRangeStartOffset: startIndex, newRangeEndOffset: endIndex };230if ((newRangeIndex < newRanges.length) && rangesIntersect(newRanges[newRangeIndex], { startIndex, endIndex, startPosition, endPosition })) {231// combine the new change with the range232if (newRanges[newRangeIndex].startIndex < newChange.newRangeStartOffset) {233newChange.newRange = newChange.newRange.setStartPosition(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1);234newChange.newRangeStartOffset = newRanges[newRangeIndex].startIndex;235}236if (newRanges[newRangeIndex].endIndex > newChange.newRangeEndOffset) {237newChange.newRange = newChange.newRange.setEndPosition(newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1);238newChange.newRangeEndOffset = newRanges[newRangeIndex].endIndex;239}240newRangeIndex++;241} else if (newRangeIndex < newRanges.length && newRanges[newRangeIndex].endIndex < newChange.newRangeStartOffset) {242// add the full range to the merged changes243mergedChanges.push({244newRange: new Range(newRanges[newRangeIndex].startPosition.row + 1, newRanges[newRangeIndex].startPosition.column + 1, newRanges[newRangeIndex].endPosition.row + 1, newRanges[newRangeIndex].endPosition.column + 1),245newRangeStartOffset: newRanges[newRangeIndex].startIndex,246newRangeEndOffset: newRanges[newRangeIndex].endIndex247});248}249250if ((mergedChanges.length > 0) && (mergedChanges[mergedChanges.length - 1].newRangeEndOffset >= newChange.newRangeStartOffset)) {251// Merge the changes252mergedChanges[mergedChanges.length - 1].newRange = Range.fromPositions(mergedChanges[mergedChanges.length - 1].newRange.getStartPosition(), newChange.newRange.getEndPosition());253mergedChanges[mergedChanges.length - 1].newRangeEndOffset = newChange.newRangeEndOffset;254} else {255mergedChanges.push(newChange);256}257}258return this._constrainRanges(mergedChanges);259}260261private _constrainRanges(changes: RangeChange[]): RangeChange[] {262if (!this._ranges) {263return changes;264}265266const constrainedChanges: RangeChange[] = [];267let changesIndex = 0;268let rangesIndex = 0;269while (changesIndex < changes.length && rangesIndex < this._ranges.length) {270const change = changes[changesIndex];271const range = this._ranges[rangesIndex];272if (change.newRangeEndOffset < range.startIndex) {273// Change is before the range, move to the next change274changesIndex++;275} else if (change.newRangeStartOffset > range.endIndex) {276// Change is after the range, move to the next range277rangesIndex++;278} else {279// Change is within the range, constrain it280const newRangeStartOffset = Math.max(change.newRangeStartOffset, range.startIndex);281const newRangeEndOffset = Math.min(change.newRangeEndOffset, range.endIndex);282const newRange = change.newRange.intersectRanges(new Range(range.startPosition.row + 1, range.startPosition.column + 1, range.endPosition.row + 1, range.endPosition.column + 1))!;283constrainedChanges.push({284newRange,285newRangeEndOffset,286newRangeStartOffset287});288// Remove the intersected range from the current change289if (newRangeEndOffset < change.newRangeEndOffset) {290change.newRange = Range.fromPositions(newRange.getEndPosition(), change.newRange.getEndPosition());291change.newRangeStartOffset = newRangeEndOffset + 1;292} else {293// Move to the next change294changesIndex++;295}296}297}298299return constrainedChanges;300}301302private async _parseAndUpdateTree(version: number): Promise<TreeSitter.Tree | undefined> {303const tree = await this._parse();304if (tree) {305this._lastFullyParsed?.delete();306this._lastFullyParsed = tree.copy();307this._lastFullyParsedWithEdits?.delete();308this._lastFullyParsedWithEdits = tree.copy();309310return tree;311} else if (!this._tree.get()) {312// No tree means this is the initial parse and there were edits313// parse function doesn't handle this well and we can end up with an incorrect tree, so we reset314this._parser.reset();315}316return undefined;317}318319private _parse(): Promise<TreeSitter.Tree | undefined> {320let parseType: TelemetryParseType = TelemetryParseType.Full;321if (this._tree.get()) {322parseType = TelemetryParseType.Incremental;323}324return this._parseAndYield(parseType);325}326327private async _parseAndYield(parseType: TelemetryParseType): Promise<TreeSitter.Tree | undefined> {328let time: number = 0;329let passes: number = 0;330const inProgressVersion = this.textModel.getVersionId();331let newTree: TreeSitter.Tree | null | undefined;332333const progressCallback = newTimeOutProgressCallback();334335do {336const timer = performance.now();337338newTree = this._parser.parse((index: number, position?: TreeSitter.Point) => this._parseCallback(index), this._tree.get(), { progressCallback, includedRanges: this._ranges });339340time += performance.now() - timer;341passes++;342343// So long as this isn't the initial parse, even if the model changes and edits are applied, the tree parsing will continue correctly after the await.344await new Promise<void>(resolve => setTimeout0(resolve));345346} while (!this._store.isDisposed && !newTree && inProgressVersion === this.textModel.getVersionId());347this._sendParseTimeTelemetry(parseType, time, passes);348return (newTree && (inProgressVersion === this.textModel.getVersionId())) ? newTree : undefined;349}350351private _parseCallback(index: number): string | undefined {352try {353return this.textModel.getTextBuffer().getNearestChunk(index);354} catch (e) {355this._logService.debug('Error getting chunk for tree-sitter parsing', e);356}357return undefined;358}359360private _setRanges(newRanges: TreeSitter.Range[]): TreeSitter.Range[] {361const unKnownRanges: TreeSitter.Range[] = [];362// If we have existing ranges, find the parts of the new ranges that are not included in the existing ones363if (this._ranges) {364for (const newRange of newRanges) {365let isFullyIncluded = false;366367for (let i = 0; i < this._ranges.length; i++) {368const existingRange = this._ranges[i];369370if (rangesEqual(existingRange, newRange) || rangesIntersect(existingRange, newRange)) {371isFullyIncluded = true;372break;373}374}375376if (!isFullyIncluded) {377unKnownRanges.push(newRange);378}379}380} else {381// No existing ranges, all new ranges are unknown382unKnownRanges.push(...newRanges);383}384385this._ranges = newRanges;386return unKnownRanges;387}388389private _sendParseTimeTelemetry(parseType: TelemetryParseType, time: number, passes: number): void {390this._logService.debug(`Tree parsing (${parseType}) took ${time} ms and ${passes} passes.`);391type ParseTimeClassification = {392owner: 'alexr00';393comment: 'Used to understand how long it takes to parse a tree-sitter tree';394languageId: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The programming language ID.' };395time: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The ms it took to parse' };396passes: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'The number of passes it took to parse' };397};398if (parseType === TelemetryParseType.Full) {399this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.fullParse`, { languageId: this.languageId, time, passes });400} else {401this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.incrementalParse`, { languageId: this.languageId, time, passes });402}403}404405public createParsedTreeSync(src: string): TreeSitter.Tree | undefined {406const parser = new this._parserClass();407parser.setLanguage(this._parser.language!);408const tree = parser.parse(src);409parser.delete();410return tree ?? undefined;411}412}413414const enum TelemetryParseType {415Full = 'fullParse',416Incremental = 'incrementalParse'417}418419export interface TreeParseUpdateEvent {420ranges: RangeChange[];421versionId: number;422}423424export interface RangeWithOffsets {425range: Range;426startOffset: number;427endOffset: number;428}429430export interface RangeChange {431newRange: Range;432newRangeStartOffset: number;433newRangeEndOffset: number;434}435436function newTimeOutProgressCallback(): (state: TreeSitter.ParseState) => void {437let lastYieldTime: number = performance.now();438return function parseProgressCallback(_state: TreeSitter.ParseState) {439const now = performance.now();440if (now - lastYieldTime > 50) {441lastYieldTime = now;442return true;443}444return false;445};446}447export function rangesEqual(a: TreeSitter.Range, b: TreeSitter.Range) {448return (a.startPosition.row === b.startPosition.row)449&& (a.startPosition.column === b.startPosition.column)450&& (a.endPosition.row === b.endPosition.row)451&& (a.endPosition.column === b.endPosition.column)452&& (a.startIndex === b.startIndex)453&& (a.endIndex === b.endIndex);454}455456export function rangesIntersect(a: TreeSitter.Range, b: TreeSitter.Range) {457return (a.startIndex <= b.startIndex && a.endIndex >= b.startIndex) ||458(b.startIndex <= a.startIndex && b.endIndex >= a.startIndex);459}460461462