Path: blob/main/extensions/copilot/src/platform/parser/node/structure.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { SyntaxNode } from 'web-tree-sitter';6import { LRUCache } from '../../../util/common/cache';7import { OverlayNode, TreeSitterOffsetRange } from './nodes';8import { _parse } from './parserWithCaching';9import { runQueries } from './querying';10import { WASMLanguage } from './treeSitterLanguages';11import { syntacticallyValidAtoms } from './treeSitterQueries';1213export class StructureComputer {1415private _cache = new LRUCache<OverlayNode | undefined>(5);1617public setCacheSize(size: number) {18this._cache = new LRUCache(size);19}2021public async getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {22const cacheKey = `${lang}:${source}`;23let cacheValue = this._cache.get(cacheKey);24if (!cacheValue) {25cacheValue = await this._getStructure(lang, source);26this._cache.put(cacheKey, cacheValue);27}28return cacheValue;29}3031private async _getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {32const queries = syntacticallyValidAtoms[lang];3334if (queries.length === 0) {35// language not supported36return undefined;37}3839const treeRef = await _parse(lang, source);4041try {42const captures = runQueries(queries, treeRef.tree.rootNode)43.flatMap(e => e.captures)44.sort((a, b) => TreeSitterOffsetRange.compare(a.node, b.node));4546// Exclude captures contained in ranges marked with ".exclude_captures"47const excludedRanges: TreeSitterOffsetRange[] = [];48for (const capture of captures) {49if (capture.name.endsWith('.exclude_captures')) {50excludedRanges.push(TreeSitterOffsetRange.ofSyntaxNode(capture.node));51}52}5354const root = new OverlayNode(0, source.length, 'root', []);5556const parentStack = [root];5758for (let i = 0; i < captures.length; ++i) {59const currentCapture = captures[i];60const currentNode = currentCapture.node;6162if (excludedRanges.some(r => TreeSitterOffsetRange.isEqual(r, currentNode))) {63// This node should be excluded64continue;65}6667// find a parent node that contains the current capture68let currentParent: OverlayNode;69do {70currentParent = parentStack.pop()!; // ! because we know there will be the root node71} while (currentParent && !TreeSitterOffsetRange.doesContain(currentParent, currentNode));7273// nodes that need to be merged with their child, e.g.,74// Given `export const foo = 1;`, we can't just remove `const foo = 1;` because then syntax doesn't make sense75const ambientParents = new Set(['export_statement', 'ambient_declaration']);7677if (ambientParents.has(currentParent.kind)) { // merge the parent with the child7879currentParent.kind = currentNode.type;80parentStack.push(currentParent);8182} else {83// get a more specific node kind84// js/ts/tsx: kind `method_definition` with identifier "constructor" -> kind `constructor`85let nodeKind = currentNode.type;86if ((lang === WASMLanguage.TypeScript || lang === WASMLanguage.TypeScriptTsx || lang === WASMLanguage.JavaScript) &&87nodeKind === 'method_definition' && currentNode.namedChildren.some(c => c.type === 'property_identifier' && c.text === 'constructor')) {88nodeKind = 'constructor';89}9091let startIndex = currentNode.startIndex;9293const prevSibling = currentNode.previousSibling;94if (prevSibling !== null) {95const textBetweenNodes = source.substring(prevSibling.endIndex, currentNode.startIndex);96const nlIdx = textBetweenNodes.indexOf('\n');97if (nlIdx === -1) {98startIndex = prevSibling.endIndex;99} else {100startIndex = prevSibling.endIndex + nlIdx + 1;101}102}103104let endIndex = currentNode.endIndex;105106// currentNode should subsume sibling nodes that are trivial, eg `;`, `,`, same-line comment107// if trivial sibling node is itself captured as a separate node, then it becomes a child node of currentNode, ie currentNode would have a comment as a child108// else it's just part of the node109if (currentNode.nextSibling !== null) {110let nextSibling: SyntaxNode | null = currentNode.nextSibling;111112if (lang === WASMLanguage.TypeScript || lang === WASMLanguage.TypeScriptTsx || lang === WASMLanguage.JavaScript || lang === WASMLanguage.Cpp) {113while (nextSibling &&114(nextSibling.type === ';' ||115nextSibling.type === ',' ||116(nextSibling.type === 'comment' && !source.substring(endIndex, nextSibling.startIndex).includes('\n') /* on the same line */))117) {118excludedRanges.push(TreeSitterOffsetRange.ofSyntaxNode(nextSibling));119endIndex = nextSibling.endIndex;120nextSibling = nextSibling.nextSibling;121}122}123124if (nextSibling !== null) {125const textBetweenNodes = source.substring(endIndex, nextSibling.startIndex);126const nlIdx = textBetweenNodes.indexOf('\n');127if (nlIdx !== -1) {128endIndex = endIndex + nlIdx + 1;129}130}131}132133const newNode = new OverlayNode(startIndex, endIndex, nodeKind, []);134currentParent.children.push(newNode);135parentStack.push(currentParent, newNode);136}137}138139return root;140141} catch (e) {142console.error(e instanceof Error ? e : new Error(e));143} finally {144treeRef.dispose();145}146}147}148149export const structureComputer = new StructureComputer();150151152