Path: blob/main/extensions/copilot/src/platform/parser/node/parserImpl.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { findInsertionIndexInSortedArray } from '../../../util/common/arrays';6import { BlockNameDetail, DetailBlock, GenericDetail, MatchGroup, PythonDetail, QueryMatchTree } from './chunkGroupTypes';7import { Node, OverlayNode, TreeSitterChunkHeaderInfo, TreeSitterExpressionInfo, TreeSitterOffsetRange, TreeSitterPoint, TreeSitterPointRange } from './nodes';8import { _parse } from './parserWithCaching';9import { runQueries } from './querying';10import { _getNodeMatchingSelection } from './selectionParsing';11import { structureComputer } from './structure';12import { WASMLanguage } from './treeSitterLanguages';13import { _isFineScope, _isScope, _isStatement, callExpressionQuery, classDeclarationQuery, classReferenceQuery, coarseScopesQuery, functionQuery, semanticChunkingTargetQuery, symbolQueries, typeDeclarationQuery, typeReferenceQuery } from './treeSitterQueries';14import { extractIdentifier } from './util';15import Parser = require('web-tree-sitter');1617export { _getDocumentableNodeIfOnIdentifier, _getNodeToDocument, NodeToDocumentContext } from './docGenParsing';18export { _dispose } from './parserWithCaching';19export { _getNodeMatchingSelection } from './selectionParsing';20export { _findLastTest, _getTestableNode, _getTestableNodes } from './testGenParsing';2122function queryCoarseScopes(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {23const queries = coarseScopesQuery[language];24return runQueries(queries, root);25}2627function queryFunctions(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {28const queries = functionQuery[language];29return runQueries(queries, root);30}3132function queryCallExpressions(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {33const queries = callExpressionQuery[language];34if (!queries) {35return [];36}37return runQueries(queries, root);38}3940function queryClasses(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {41const queries = classDeclarationQuery[language];42if (!queries) {43return [];44}45return runQueries(queries, root);46}4748function queryTypeDeclarations(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {49const queries = typeDeclarationQuery[language];50if (!queries) {51return [];52}53return runQueries(queries, root);54}5556function queryTypeReferences(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {57const queries = typeReferenceQuery[language];58if (!queries) {59return [];60}61return runQueries(queries, root);62}6364function queryClassReferences(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {65const queries = classReferenceQuery[language];66if (!queries) {67return [];68}69return runQueries(queries, root);70}7172function querySemanticTargets(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {73const queries = semanticChunkingTargetQuery[language];74return runQueries(queries, root);75}767778/**79* Get the positions of all function calls in the given piece of source code.80*/81export async function _getCallExpressions(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {82const treeRef = await _parse(language, source);83try {84const results = queryCallExpressions(language, treeRef.tree.rootNode);85const positions = results.reduce<TreeSitterExpressionInfo[]>((acc, res) => {86const fn = res.captures.find(c => c.name === 'call_expression')!.node;87if (TreeSitterOffsetRange.doIntersect(selection, fn)) {88let identifier;89let identifierNode;90if (language === 'ruby') { // strip preceding : from any captured simple symbols91identifierNode = res.captures.find(c => c.name === 'symbol')?.node;92identifier = identifierNode?.text?.slice(1);93}94identifierNode ??= res.captures.find(c => c.name === 'identifier')?.node;95identifier ??= identifierNode?.text;96acc.push({97identifier: identifier ?? '',98text: fn.text,99startIndex: (identifierNode ?? fn).startIndex,100endIndex: (identifierNode ?? fn).endIndex,101});102}103return acc;104}, []);105return positions;106} finally {107treeRef.dispose();108}109}110111/**112* Get function definition info for all function definitions in the given piece of source code.113*/114export async function _getFunctionDefinitions(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {115const treeRef = await _parse(language, source);116try {117const results = queryFunctions(language, treeRef.tree.rootNode);118const positions = results.map(res => {119const fn = res.captures.find(c => c.name === 'function')!.node;120const identifier = res.captures.find(c => c.name === 'identifier')?.node.text;121return {122identifier: identifier ?? '',123text: fn.text,124startIndex: fn.startIndex,125endIndex: fn.endIndex,126};127});128return positions;129} finally {130treeRef.dispose();131}132}133134export async function _getClassDeclarations(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {135const treeRef = await _parse(language, source);136try {137const results = queryClasses(language, treeRef.tree.rootNode);138const positions = results.map(res => {139const fn = res.captures.find(c => c.name === 'class_declaration')!.node;140const identifier = fn?.children.find(c =>141c.type === 'type_identifier' // typescript142|| c.type === 'identifier' // python143|| c.type === 'constant' // ruby144)?.text;145return {146identifier: identifier ?? '',147text: fn.text,148startIndex: fn.startIndex,149endIndex: fn.endIndex,150};151});152return positions;153} finally {154treeRef.dispose();155}156}157158export async function _getTypeDeclarations(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {159const treeRef = await _parse(language, source);160try {161const results = queryTypeDeclarations(language, treeRef.tree.rootNode);162const positions = results.map(res => {163const fn = res.captures.find(c => c.name === 'type_declaration')!.node;164let identifier = res.captures.find(c => c.name === 'type_identifier')?.node.text;165if (!identifier) { // TODO@joyceerhl debt: move this into query captures166identifier = fn?.children.find(c => c.type === 'type_identifier')?.text;167}168return {169identifier: identifier ?? '',170text: fn.text,171startIndex: fn.startIndex,172endIndex: fn.endIndex,173};174});175return positions;176} finally {177treeRef.dispose();178}179}180181export async function _getTypeReferences(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {182const treeRef = await _parse(language, source);183try {184const results = queryTypeReferences(language, treeRef.tree.rootNode);185const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {186const typeIdentifier = res.captures.find(c => c.name === 'type_identifier')!.node;187if (TreeSitterOffsetRange.doIntersect(selection, typeIdentifier)) {188acc.push({189identifier: typeIdentifier.text,190text: typeIdentifier.text,191startIndex: typeIdentifier.startIndex,192endIndex: typeIdentifier.endIndex,193});194}195return acc;196}, []);197return positions;198} finally {199treeRef.dispose();200}201}202203export async function _getClassReferences(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {204const treeRef = await _parse(language, source);205try {206const results = queryClassReferences(language, treeRef.tree.rootNode);207const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {208const fn = res.captures.find(c => c.name === 'new_expression')!.node;209if (TreeSitterOffsetRange.doIntersect(selection, fn)) {210acc.push({211identifier: fn.text,212text: fn.text,213startIndex: fn.startIndex,214endIndex: fn.endIndex,215});216}217return acc;218}, []);219return positions;220} finally {221treeRef.dispose();222}223}224225export async function _getSymbols(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {226const treeRef = await _parse(language, source);227try {228const queries = symbolQueries[language];229const results = runQueries(queries, treeRef.tree.rootNode);230const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {231const fn = res.captures.find(c => c.name === 'symbol')!.node;232if (TreeSitterOffsetRange.doIntersect(selection, fn)) {233acc.push({234identifier: fn.text,235text: fn.text,236startIndex: fn.startIndex,237endIndex: fn.endIndex,238});239}240return acc;241}, []);242return positions;243} finally {244treeRef.dispose();245}246}247248export async function _getSemanticChunkTree(language: WASMLanguage, source: string): Promise<QueryMatchTree<DetailBlock>> {249const treeRef = await _parse(language, source);250try {251const results = querySemanticTargets(language, treeRef.tree.rootNode);252return getQueryMatchTree(language, results, treeRef.tree.rootNode);253} finally {254treeRef.dispose();255}256}257258export async function _getSemanticChunkNames(language: WASMLanguage, source: string): Promise<QueryMatchTree<BlockNameDetail>> {259const treeRef = await _parse(language, source);260try {261const results = querySemanticTargets(language, treeRef.tree.rootNode);262return getBlockNameTree(language, results, treeRef.tree.rootNode);263} finally {264treeRef.dispose();265}266}267268269/**270* Get the positions of all function bodies nodes in the given piece of source code.271*/272export async function _getFunctionBodies(language: WASMLanguage, source: string): Promise<TreeSitterOffsetRange[]> {273const treeRef = await _parse(language, source);274try {275const results = queryFunctions(language, treeRef.tree.rootNode);276const positions = results.map(res => {277const fn = res.captures.find(c => c.name === 'body')!.node;278return {279startIndex: fn.startIndex,280endIndex: fn.endIndex,281};282});283return positions;284} finally {285treeRef.dispose();286}287}288289export async function _getCoarseParentScope(language: WASMLanguage, source: string, range: TreeSitterPointRange): Promise<TreeSitterPointRange> {290const treeRef = await _parse(language, source);291try {292const scopes = queryCoarseScopes(language, treeRef.tree.rootNode);293let parentNode: Parser.SyntaxNode | undefined;294for (const scope of scopes) {295const captureNode = scope.captures[0].node;296const captureNodeRange = TreeSitterPointRange.ofSyntaxNode(captureNode);297if (TreeSitterPointRange.doesContain(captureNodeRange, range)) {298parentNode = captureNode;299}300if (TreeSitterPoint.isBefore(range.endPosition, captureNodeRange.startPosition)) {301break;302}303}304if (!parentNode) {305throw new Error('No parent node found');306} else {307return TreeSitterPointRange.ofSyntaxNode(parentNode);308}309} finally {310treeRef.dispose();311}312}313314/**315* Find the selection of interest for the /fix command316*/317export async function _getFixSelectionOfInterest(language: WASMLanguage, source: string, range: TreeSitterPointRange, maxNumberOfLines: number): Promise<TreeSitterPointRange> {318const treeRef = await _parse(language, source);319try {320const smallestNode = treeRef.tree.rootNode.descendantForPosition(range.startPosition, range.endPosition);321const initialRange = { startPosition: smallestNode.startPosition, endPosition: smallestNode.endPosition };322const biggestRange = _getBiggestRangeContainingNodeSmallerThan(language, smallestNode, maxNumberOfLines, range, true);323if (TreeSitterPointRange.equals(initialRange, biggestRange)) {324return _getSmallestRangeContainingNode(language, smallestNode);325}326return biggestRange;327} finally {328treeRef.dispose();329}330}331332/**333* Find the smallest range containing the node334*/335function _getSmallestRangeContainingNode(language: WASMLanguage, node: Parser.SyntaxNode): TreeSitterPointRange {336const parent = node.parent;337const range = { startPosition: node.startPosition, endPosition: node.endPosition };338if (_isScope(language, node) || !parent) {339return range;340}341const { filteredRanges, indexOfInterest } = _findFilteredRangesAndIndexOfInterest(language, parent.children, range, false);342if (indexOfInterest - 1 >= 0 && indexOfInterest + 1 <= filteredRanges.length - 1) {343const siblingAbove = filteredRanges[indexOfInterest - 1];344const siblingBelow = filteredRanges[indexOfInterest + 1];345return { startPosition: siblingAbove.startPosition, endPosition: siblingBelow.endPosition };346}347return _getSmallestRangeContainingNode(language, parent);348}349350/**351* Get the biggest range containing the node of length smaller than the max number of lines352*/353function _getBiggestRangeContainingNodeSmallerThan(language: WASMLanguage, node: Parser.SyntaxNode, maxNumberOfLines: number, range: TreeSitterPointRange, firstCall: boolean): TreeSitterPointRange {354const children = node.children;355const lengthSpannedByNode = node.endPosition.row - node.startPosition.row + 1;356if (lengthSpannedByNode <= maxNumberOfLines) {357const newRange = _isScope(language, node) ?358{ startPosition: node.startPosition, endPosition: node.endPosition } :359_getBiggestRangeContainingNodeAmongNodesSmallerThan(language, children, maxNumberOfLines, range, firstCall);360const parent = node.parent;361return parent ? _getBiggestRangeContainingNodeSmallerThan(language, parent, maxNumberOfLines, newRange, false) : newRange;362}363return _getBiggestRangeContainingNodeAmongNodesSmallerThan(language, children, maxNumberOfLines, range, firstCall);364}365366function _numberOfLinesSpannedByRanges(range1: TreeSitterPointRange, range2: TreeSitterPointRange) {367return range2.endPosition.row - range1.startPosition.row + 1;368}369370/**371* Search the nodes and find the biggest range made of statements or scopes that surrounds the range372*/373function _getBiggestRangeContainingNodeAmongNodesSmallerThan(language: WASMLanguage, nodes: Parser.SyntaxNode[], maxNumberOfLines: number, lastRange: TreeSitterPointRange, firstCall: boolean): TreeSitterPointRange {374if (nodes.length === 0) {375return lastRange;376}377const { filteredRanges, indexOfInterest } = _findFilteredRangesAndIndexOfInterest(language, nodes, lastRange, firstCall);378let siblingAboveIndex = 0;379let siblingBelowIndex = filteredRanges.length - 1;380let siblingAbove = filteredRanges[siblingAboveIndex];381let siblingBelow = filteredRanges[siblingBelowIndex];382383while (_numberOfLinesSpannedByRanges(siblingAbove, siblingBelow) > maxNumberOfLines) {384if (siblingAboveIndex === siblingBelowIndex) {385// The two indices are equal to the insertion index386break;387} else if (indexOfInterest - siblingAboveIndex < siblingBelowIndex - indexOfInterest) {388siblingBelowIndex--;389siblingBelow = filteredRanges[siblingBelowIndex];390} else {391siblingAboveIndex++;392siblingAbove = filteredRanges[siblingAboveIndex];393}394}395if (_numberOfLinesSpannedByRanges(siblingAbove, siblingBelow) <= maxNumberOfLines) {396return { startPosition: siblingAbove.startPosition, endPosition: siblingBelow.endPosition };397}398return lastRange;399}400401/**402* Filter the nodes that are scopes or statements and find the index of the node containing the given range, or append the range to the array403*/404function _findFilteredRangesAndIndexOfInterest(language: WASMLanguage, nodes: Parser.SyntaxNode[], range: TreeSitterPointRange, firstCall: boolean): { filteredRanges: TreeSitterPointRange[]; indexOfInterest: number } {405let filteredRanges: TreeSitterPointRange[];406let indexOfInterest: number;407if (firstCall) {408filteredRanges = nodes.filter((child) => _isScope(language, child) || _isStatement(language, child));409indexOfInterest = findInsertionIndexInSortedArray(filteredRanges, range, (a, b) => TreeSitterPoint.isBefore(a.startPosition, b.startPosition));410filteredRanges.splice(indexOfInterest, 0, range);411} else {412filteredRanges = nodes.filter((child) => TreeSitterPointRange.doesContain(child, range) || _isScope(language, child) || _isStatement(language, child));413indexOfInterest = filteredRanges.findIndex(child => TreeSitterPointRange.doesContain(child, range));414}415if (indexOfInterest === -1) {416throw new Error(`Valid index not found`);417}418return { filteredRanges, indexOfInterest };419}420421export async function _getFineScopes(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterOffsetRange[]> {422const blockScopes: TreeSitterOffsetRange[] = [];423424const treeRef = await _parse(language, source);425const syntaxNode = treeRef.tree.rootNode.descendantForIndex(selection.startIndex, selection.endIndex);426let currentNode: Parser.SyntaxNode | null = syntaxNode;427428// Ascend the parse tree until we reach the root node, collecting all block scopes that intersect with the provided selection429while (currentNode !== null) {430if (_isFineScope(language, currentNode)) {431blockScopes.push({ startIndex: currentNode.startIndex, endIndex: currentNode.endIndex });432}433currentNode = currentNode.parent;434}435436return blockScopes;437}438439export type NodeToExplainContext = {440441/** is undefined when we couldn't determine the identifier */442nodeIdentifier: string | undefined;443444nodeToExplain: Node;445};446447/**448*449* Given a selection around an identifier, returns the definition node.450*/451export async function _getNodeToExplain(452language: WASMLanguage,453source: string,454selection: TreeSitterOffsetRange455): Promise<NodeToExplainContext | undefined> {456457const treeRef = await _parse(language, source);458459try {460const isSelectionEmpty = selection.startIndex === selection.endIndex;461if (isSelectionEmpty) {462return;463}464465const identifier = isSelectionEmpty ? undefined : _getNodeMatchingSelection(treeRef.tree, selection, language);466const fullDefinition = isSelectionEmpty ? undefined : _getNodeMatchingSelection(treeRef.tree, selection, language, isExplainableNode);467468if (fullDefinition && identifier) {469const nodeIdentifier = extractIdentifier(identifier, language);470return {471nodeIdentifier,472nodeToExplain: Node.ofSyntaxNode(fullDefinition),473};474}475} finally {476treeRef.dispose();477}478}479480function isExplainableNode(node: Parser.SyntaxNode, language: WASMLanguage) {481return node.type.match(/definition/);482}483484export function getBlockNameTree(language: WASMLanguage, queryMatches: Parser.QueryMatch[], root: Parser.SyntaxNode): QueryMatchTree<BlockNameDetail> {485const matches: Map<number, MatchGroup<BlockNameDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates486queryMatches.forEach(n => {487const captures = n.captures;488489let definitionNode = captures.find(v => v.name === 'definition')?.node;490491let keyword;492if (language === WASMLanguage.Cpp && definitionNode?.type === 'function_definition') {493keyword = definitionNode?.childForFieldName('declarator')?.childForFieldName('declarator');494} else if (language === WASMLanguage.Rust && definitionNode?.type === 'impl_item') {495keyword = definitionNode?.childForFieldName('trait');496} else {497keyword = definitionNode?.childForFieldName('name');498}499const bodyNode = definitionNode?.childForFieldName('body');500if (definitionNode && bodyNode) {501502switch (language) {503case WASMLanguage.TypeScript:504case WASMLanguage.JavaScript: {505const { definition } = getCommentsAndDefFromTSJSDefinition(definitionNode);506definitionNode = definition;507break;508}509}510const existingMatch = matches.get(definitionNode.id);511if (!existingMatch) {512matches.set(definitionNode.id, {513mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),514detailBlocks: {515body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode),516name: keyword?.text,517},518});519}520}521});522const groups = Array.from(matches.values());523524return new QueryMatchTree(groups, TreeSitterChunkHeaderInfo.ofSyntaxNode(root));525}526527528529/**530* helper workspace chunker functions531*/532533function getQueryMatchTree(language: WASMLanguage, queryMatches: Parser.QueryMatch[], root: Parser.SyntaxNode): QueryMatchTree<DetailBlock> {534let groups: MatchGroup<DetailBlock>[];535536switch (language) {537case WASMLanguage.Python:538groups = queryCapturesToPythonSemanticGroup(queryMatches);539break;540case WASMLanguage.Ruby:541groups = queryCapturesToRubySemanticGroup(queryMatches);542break;543default: {544groups = queryCapturesToGenericSemanticGroup(queryMatches, language);545break;546}547}548549const queryTree = new QueryMatchTree(groups, TreeSitterChunkHeaderInfo.ofSyntaxNode(root));550551return queryTree;552}553554function queryCapturesToGenericSemanticGroup(queryMatches: Parser.QueryMatch[], wasmLang: WASMLanguage): MatchGroup<GenericDetail>[] {555const matches: Map<number, MatchGroup<GenericDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates556557queryMatches558.forEach(n => {559const captures = n.captures;560561let definitionNode = captures.find(v => v.name === 'definition')?.node;562563const bodyNode = definitionNode?.childForFieldName('body');564if (definitionNode && bodyNode) {565566let commentNodes;567switch (wasmLang) {568case WASMLanguage.TypeScript:569case WASMLanguage.JavaScript: {570const { definition, comments } = getCommentsAndDefFromTSJSDefinition(definitionNode);571definitionNode = definition;572commentNodes = comments;573break;574}575case WASMLanguage.Java:576case WASMLanguage.Rust:577commentNodes = getCommentsFromJavaRustDefinition(definitionNode);578break;579default: {580commentNodes = getCommentsFromDefinition(definitionNode);581break;582}583}584const existingMatch = matches.get(definitionNode.id);585if (!existingMatch) {586matches.set(definitionNode.id, {587mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),588detailBlocks: {589comments: commentNodes.map(e => TreeSitterChunkHeaderInfo.ofSyntaxNode(e)),590body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode)591},592});593}594}595});596597return Array.from(matches.values());598}599600function getFirstBodyParamForRuby(namedNodes: Parser.SyntaxNode[]) {601// the children must have at least 2 nodes. The second node is the first potential body node, since the first is the identifier.602603if (namedNodes.length < 2) {604return undefined;605}606for (let i = 1; i < namedNodes.length; i++) {607const node = namedNodes[i];608if (!node.type.includes('parameters')) {609return node;610}611}612613return undefined;614}615616function queryCapturesToRubySemanticGroup(queryMatches: Parser.QueryMatch[]): MatchGroup<GenericDetail>[] {617const matches: Map<number, MatchGroup<GenericDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates618queryMatches619.forEach(n => {620const captures = n.captures;621622const definitionNode = captures.find(v => v.name === 'definition')?.node;623if (definitionNode) {624const defChildren = definitionNode.namedChildren;625const startChild = getFirstBodyParamForRuby(defChildren);626if (startChild) {627const endChild = defChildren[defChildren.length - 1];628const childText = definitionNode.text.substring(startChild.startIndex - definitionNode.startIndex, endChild.endIndex - definitionNode.startIndex);629630const commentNodes = getCommentsFromDefinition(definitionNode);631const existingMatch = matches.get(definitionNode.id);632if (!existingMatch) {633matches.set(definitionNode.id, {634mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),635detailBlocks: {636comments: commentNodes.map(e => TreeSitterChunkHeaderInfo.ofSyntaxNode(e)),637body: {638range: <TreeSitterPointRange>{639startPosition: { row: startChild.startPosition.row, column: startChild.startPosition.column },640endPosition: { row: endChild.endPosition.row, column: endChild.endPosition.column }641},642startIndex: startChild.startIndex,643text: childText,644endIndex: endChild.endIndex,645}646},647});648}649}650}651});652653return Array.from(matches.values());654}655656function queryCapturesToPythonSemanticGroup(queryMatches: Parser.QueryMatch[]): MatchGroup<PythonDetail>[] {657const matches: Map<number, MatchGroup<PythonDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates658659queryMatches660.forEach(n => {661const captures = n.captures;662const definitionNode = captures.find(v => v.name === 'definition')?.node;663const bodyNode = definitionNode?.childForFieldName('body');664665if (definitionNode && bodyNode) {666const docstringNode = getDocstringFromBody(bodyNode);667const decoratorNode = getDecoratorFromDefinition(definitionNode);668matches.set(definitionNode.id, {669mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),670detailBlocks: {671docstring: docstringNode ? TreeSitterChunkHeaderInfo.ofSyntaxNode(docstringNode) : undefined,672decorator: decoratorNode ? TreeSitterChunkHeaderInfo.ofSyntaxNode(decoratorNode) : undefined,673body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode),674},675});676return;677}678});679680return Array.from(matches.values());681}682683/**684* For Generic (Cpp/Cs/Go) workspace chunks685*/686function getCommentsFromDefinition(definition: Parser.SyntaxNode, commentNodeNames = ['comment']): Parser.SyntaxNode[] {687688// there is an issue where the query sometimes returns comments that are at the beginning of the file689// instead of one that actually close to the declaration.690// Therefore, we should programatically find comments for more reliability691const ret: Parser.SyntaxNode[] = [];692let prevSibling = definition.previousNamedSibling;693while (prevSibling && commentNodeNames.some(e => e === prevSibling?.type)) {694ret.push(prevSibling);695prevSibling = prevSibling.previousNamedSibling;696}697return ret.reverse();698}699700/**701* For TS/JS workspace chunks702*/703function getCommentsAndDefFromTSJSDefinition(definition: Parser.SyntaxNode): {704definition: Parser.SyntaxNode;705comments: Parser.SyntaxNode[];706} {707const parent = definition.parent;708if (parent?.type === 'export_statement') {709return {710definition: parent,711comments: getCommentsFromDefinition(parent)712};713}714715return {716definition: definition,717comments: getCommentsFromDefinition(definition)718};719}720721/**722* For Java workspace chunks723*/724function getCommentsFromJavaRustDefinition(definition: Parser.SyntaxNode): Parser.SyntaxNode[] {725return getCommentsFromDefinition(definition, ['block_comment', 'line_comment']);726}727728729/**730* For Python workspace chunks731*/732function getDecoratorFromDefinition(definition: Parser.SyntaxNode) {733const prevSibling = definition.previousNamedSibling;734return prevSibling?.type === 'decorator' ? prevSibling : undefined;735}736737function getDocstringFromBody(body: Parser.SyntaxNode) {738const firstChild = body.firstChild;739if (!firstChild || firstChild.type !== 'expression_statement') {740return;741}742743const potentialDocstring = firstChild.firstChild;744return potentialDocstring?.type === 'string' ? potentialDocstring : undefined;745}746747export function _getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {748return structureComputer.getStructure(lang, source);749}750751export async function _getParseErrorCount(language: WASMLanguage, source: string): Promise<number> {752const treeRef = await _parse(language, source);753try {754if (!treeRef.tree.rootNode.hasError) {755return 0;756}757758// Recursively count error nodes759function countErrors(node: Parser.SyntaxNode): number {760let count = node.type === 'ERROR' ? 1 : 0;761for (const child of node.children) {762count += countErrors(child);763}764return count;765}766767return countErrors(treeRef.tree.rootNode);768} finally {769treeRef.dispose();770}771}772773774