Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/parser/node/structure.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { SyntaxNode } from 'web-tree-sitter';
7
import { LRUCache } from '../../../util/common/cache';
8
import { OverlayNode, TreeSitterOffsetRange } from './nodes';
9
import { _parse } from './parserWithCaching';
10
import { runQueries } from './querying';
11
import { WASMLanguage } from './treeSitterLanguages';
12
import { syntacticallyValidAtoms } from './treeSitterQueries';
13
14
export class StructureComputer {
15
16
private _cache = new LRUCache<OverlayNode | undefined>(5);
17
18
public setCacheSize(size: number) {
19
this._cache = new LRUCache(size);
20
}
21
22
public async getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {
23
const cacheKey = `${lang}:${source}`;
24
let cacheValue = this._cache.get(cacheKey);
25
if (!cacheValue) {
26
cacheValue = await this._getStructure(lang, source);
27
this._cache.put(cacheKey, cacheValue);
28
}
29
return cacheValue;
30
}
31
32
private async _getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {
33
const queries = syntacticallyValidAtoms[lang];
34
35
if (queries.length === 0) {
36
// language not supported
37
return undefined;
38
}
39
40
const treeRef = await _parse(lang, source);
41
42
try {
43
const captures = runQueries(queries, treeRef.tree.rootNode)
44
.flatMap(e => e.captures)
45
.sort((a, b) => TreeSitterOffsetRange.compare(a.node, b.node));
46
47
// Exclude captures contained in ranges marked with ".exclude_captures"
48
const excludedRanges: TreeSitterOffsetRange[] = [];
49
for (const capture of captures) {
50
if (capture.name.endsWith('.exclude_captures')) {
51
excludedRanges.push(TreeSitterOffsetRange.ofSyntaxNode(capture.node));
52
}
53
}
54
55
const root = new OverlayNode(0, source.length, 'root', []);
56
57
const parentStack = [root];
58
59
for (let i = 0; i < captures.length; ++i) {
60
const currentCapture = captures[i];
61
const currentNode = currentCapture.node;
62
63
if (excludedRanges.some(r => TreeSitterOffsetRange.isEqual(r, currentNode))) {
64
// This node should be excluded
65
continue;
66
}
67
68
// find a parent node that contains the current capture
69
let currentParent: OverlayNode;
70
do {
71
currentParent = parentStack.pop()!; // ! because we know there will be the root node
72
} while (currentParent && !TreeSitterOffsetRange.doesContain(currentParent, currentNode));
73
74
// nodes that need to be merged with their child, e.g.,
75
// Given `export const foo = 1;`, we can't just remove `const foo = 1;` because then syntax doesn't make sense
76
const ambientParents = new Set(['export_statement', 'ambient_declaration']);
77
78
if (ambientParents.has(currentParent.kind)) { // merge the parent with the child
79
80
currentParent.kind = currentNode.type;
81
parentStack.push(currentParent);
82
83
} else {
84
// get a more specific node kind
85
// js/ts/tsx: kind `method_definition` with identifier "constructor" -> kind `constructor`
86
let nodeKind = currentNode.type;
87
if ((lang === WASMLanguage.TypeScript || lang === WASMLanguage.TypeScriptTsx || lang === WASMLanguage.JavaScript) &&
88
nodeKind === 'method_definition' && currentNode.namedChildren.some(c => c.type === 'property_identifier' && c.text === 'constructor')) {
89
nodeKind = 'constructor';
90
}
91
92
let startIndex = currentNode.startIndex;
93
94
const prevSibling = currentNode.previousSibling;
95
if (prevSibling !== null) {
96
const textBetweenNodes = source.substring(prevSibling.endIndex, currentNode.startIndex);
97
const nlIdx = textBetweenNodes.indexOf('\n');
98
if (nlIdx === -1) {
99
startIndex = prevSibling.endIndex;
100
} else {
101
startIndex = prevSibling.endIndex + nlIdx + 1;
102
}
103
}
104
105
let endIndex = currentNode.endIndex;
106
107
// currentNode should subsume sibling nodes that are trivial, eg `;`, `,`, same-line comment
108
// if trivial sibling node is itself captured as a separate node, then it becomes a child node of currentNode, ie currentNode would have a comment as a child
109
// else it's just part of the node
110
if (currentNode.nextSibling !== null) {
111
let nextSibling: SyntaxNode | null = currentNode.nextSibling;
112
113
if (lang === WASMLanguage.TypeScript || lang === WASMLanguage.TypeScriptTsx || lang === WASMLanguage.JavaScript || lang === WASMLanguage.Cpp) {
114
while (nextSibling &&
115
(nextSibling.type === ';' ||
116
nextSibling.type === ',' ||
117
(nextSibling.type === 'comment' && !source.substring(endIndex, nextSibling.startIndex).includes('\n') /* on the same line */))
118
) {
119
excludedRanges.push(TreeSitterOffsetRange.ofSyntaxNode(nextSibling));
120
endIndex = nextSibling.endIndex;
121
nextSibling = nextSibling.nextSibling;
122
}
123
}
124
125
if (nextSibling !== null) {
126
const textBetweenNodes = source.substring(endIndex, nextSibling.startIndex);
127
const nlIdx = textBetweenNodes.indexOf('\n');
128
if (nlIdx !== -1) {
129
endIndex = endIndex + nlIdx + 1;
130
}
131
}
132
}
133
134
const newNode = new OverlayNode(startIndex, endIndex, nodeKind, []);
135
currentParent.children.push(newNode);
136
parentStack.push(currentParent, newNode);
137
}
138
}
139
140
return root;
141
142
} catch (e) {
143
console.error(e instanceof Error ? e : new Error(e));
144
} finally {
145
treeRef.dispose();
146
}
147
}
148
}
149
150
export const structureComputer = new StructureComputer();
151
152