Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/parser/node/parserImpl.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { findInsertionIndexInSortedArray } from '../../../util/common/arrays';
7
import { BlockNameDetail, DetailBlock, GenericDetail, MatchGroup, PythonDetail, QueryMatchTree } from './chunkGroupTypes';
8
import { Node, OverlayNode, TreeSitterChunkHeaderInfo, TreeSitterExpressionInfo, TreeSitterOffsetRange, TreeSitterPoint, TreeSitterPointRange } from './nodes';
9
import { _parse } from './parserWithCaching';
10
import { runQueries } from './querying';
11
import { _getNodeMatchingSelection } from './selectionParsing';
12
import { structureComputer } from './structure';
13
import { WASMLanguage } from './treeSitterLanguages';
14
import { _isFineScope, _isScope, _isStatement, callExpressionQuery, classDeclarationQuery, classReferenceQuery, coarseScopesQuery, functionQuery, semanticChunkingTargetQuery, symbolQueries, typeDeclarationQuery, typeReferenceQuery } from './treeSitterQueries';
15
import { extractIdentifier } from './util';
16
import Parser = require('web-tree-sitter');
17
18
export { _getDocumentableNodeIfOnIdentifier, _getNodeToDocument, NodeToDocumentContext } from './docGenParsing';
19
export { _dispose } from './parserWithCaching';
20
export { _getNodeMatchingSelection } from './selectionParsing';
21
export { _findLastTest, _getTestableNode, _getTestableNodes } from './testGenParsing';
22
23
function queryCoarseScopes(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
24
const queries = coarseScopesQuery[language];
25
return runQueries(queries, root);
26
}
27
28
function queryFunctions(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
29
const queries = functionQuery[language];
30
return runQueries(queries, root);
31
}
32
33
function queryCallExpressions(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
34
const queries = callExpressionQuery[language];
35
if (!queries) {
36
return [];
37
}
38
return runQueries(queries, root);
39
}
40
41
function queryClasses(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
42
const queries = classDeclarationQuery[language];
43
if (!queries) {
44
return [];
45
}
46
return runQueries(queries, root);
47
}
48
49
function queryTypeDeclarations(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
50
const queries = typeDeclarationQuery[language];
51
if (!queries) {
52
return [];
53
}
54
return runQueries(queries, root);
55
}
56
57
function queryTypeReferences(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
58
const queries = typeReferenceQuery[language];
59
if (!queries) {
60
return [];
61
}
62
return runQueries(queries, root);
63
}
64
65
function queryClassReferences(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
66
const queries = classReferenceQuery[language];
67
if (!queries) {
68
return [];
69
}
70
return runQueries(queries, root);
71
}
72
73
function querySemanticTargets(language: WASMLanguage, root: Parser.SyntaxNode): Parser.QueryMatch[] {
74
const queries = semanticChunkingTargetQuery[language];
75
return runQueries(queries, root);
76
}
77
78
79
/**
80
* Get the positions of all function calls in the given piece of source code.
81
*/
82
export async function _getCallExpressions(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {
83
const treeRef = await _parse(language, source);
84
try {
85
const results = queryCallExpressions(language, treeRef.tree.rootNode);
86
const positions = results.reduce<TreeSitterExpressionInfo[]>((acc, res) => {
87
const fn = res.captures.find(c => c.name === 'call_expression')!.node;
88
if (TreeSitterOffsetRange.doIntersect(selection, fn)) {
89
let identifier;
90
let identifierNode;
91
if (language === 'ruby') { // strip preceding : from any captured simple symbols
92
identifierNode = res.captures.find(c => c.name === 'symbol')?.node;
93
identifier = identifierNode?.text?.slice(1);
94
}
95
identifierNode ??= res.captures.find(c => c.name === 'identifier')?.node;
96
identifier ??= identifierNode?.text;
97
acc.push({
98
identifier: identifier ?? '',
99
text: fn.text,
100
startIndex: (identifierNode ?? fn).startIndex,
101
endIndex: (identifierNode ?? fn).endIndex,
102
});
103
}
104
return acc;
105
}, []);
106
return positions;
107
} finally {
108
treeRef.dispose();
109
}
110
}
111
112
/**
113
* Get function definition info for all function definitions in the given piece of source code.
114
*/
115
export async function _getFunctionDefinitions(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {
116
const treeRef = await _parse(language, source);
117
try {
118
const results = queryFunctions(language, treeRef.tree.rootNode);
119
const positions = results.map(res => {
120
const fn = res.captures.find(c => c.name === 'function')!.node;
121
const identifier = res.captures.find(c => c.name === 'identifier')?.node.text;
122
return {
123
identifier: identifier ?? '',
124
text: fn.text,
125
startIndex: fn.startIndex,
126
endIndex: fn.endIndex,
127
};
128
});
129
return positions;
130
} finally {
131
treeRef.dispose();
132
}
133
}
134
135
export async function _getClassDeclarations(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {
136
const treeRef = await _parse(language, source);
137
try {
138
const results = queryClasses(language, treeRef.tree.rootNode);
139
const positions = results.map(res => {
140
const fn = res.captures.find(c => c.name === 'class_declaration')!.node;
141
const identifier = fn?.children.find(c =>
142
c.type === 'type_identifier' // typescript
143
|| c.type === 'identifier' // python
144
|| c.type === 'constant' // ruby
145
)?.text;
146
return {
147
identifier: identifier ?? '',
148
text: fn.text,
149
startIndex: fn.startIndex,
150
endIndex: fn.endIndex,
151
};
152
});
153
return positions;
154
} finally {
155
treeRef.dispose();
156
}
157
}
158
159
export async function _getTypeDeclarations(language: WASMLanguage, source: string): Promise<TreeSitterExpressionInfo[]> {
160
const treeRef = await _parse(language, source);
161
try {
162
const results = queryTypeDeclarations(language, treeRef.tree.rootNode);
163
const positions = results.map(res => {
164
const fn = res.captures.find(c => c.name === 'type_declaration')!.node;
165
let identifier = res.captures.find(c => c.name === 'type_identifier')?.node.text;
166
if (!identifier) { // TODO@joyceerhl debt: move this into query captures
167
identifier = fn?.children.find(c => c.type === 'type_identifier')?.text;
168
}
169
return {
170
identifier: identifier ?? '',
171
text: fn.text,
172
startIndex: fn.startIndex,
173
endIndex: fn.endIndex,
174
};
175
});
176
return positions;
177
} finally {
178
treeRef.dispose();
179
}
180
}
181
182
export async function _getTypeReferences(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {
183
const treeRef = await _parse(language, source);
184
try {
185
const results = queryTypeReferences(language, treeRef.tree.rootNode);
186
const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {
187
const typeIdentifier = res.captures.find(c => c.name === 'type_identifier')!.node;
188
if (TreeSitterOffsetRange.doIntersect(selection, typeIdentifier)) {
189
acc.push({
190
identifier: typeIdentifier.text,
191
text: typeIdentifier.text,
192
startIndex: typeIdentifier.startIndex,
193
endIndex: typeIdentifier.endIndex,
194
});
195
}
196
return acc;
197
}, []);
198
return positions;
199
} finally {
200
treeRef.dispose();
201
}
202
}
203
204
export async function _getClassReferences(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {
205
const treeRef = await _parse(language, source);
206
try {
207
const results = queryClassReferences(language, treeRef.tree.rootNode);
208
const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {
209
const fn = res.captures.find(c => c.name === 'new_expression')!.node;
210
if (TreeSitterOffsetRange.doIntersect(selection, fn)) {
211
acc.push({
212
identifier: fn.text,
213
text: fn.text,
214
startIndex: fn.startIndex,
215
endIndex: fn.endIndex,
216
});
217
}
218
return acc;
219
}, []);
220
return positions;
221
} finally {
222
treeRef.dispose();
223
}
224
}
225
226
export async function _getSymbols(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterExpressionInfo[]> {
227
const treeRef = await _parse(language, source);
228
try {
229
const queries = symbolQueries[language];
230
const results = runQueries(queries, treeRef.tree.rootNode);
231
const positions = results.reduce((acc: TreeSitterExpressionInfo[], res: Parser.QueryMatch) => {
232
const fn = res.captures.find(c => c.name === 'symbol')!.node;
233
if (TreeSitterOffsetRange.doIntersect(selection, fn)) {
234
acc.push({
235
identifier: fn.text,
236
text: fn.text,
237
startIndex: fn.startIndex,
238
endIndex: fn.endIndex,
239
});
240
}
241
return acc;
242
}, []);
243
return positions;
244
} finally {
245
treeRef.dispose();
246
}
247
}
248
249
export async function _getSemanticChunkTree(language: WASMLanguage, source: string): Promise<QueryMatchTree<DetailBlock>> {
250
const treeRef = await _parse(language, source);
251
try {
252
const results = querySemanticTargets(language, treeRef.tree.rootNode);
253
return getQueryMatchTree(language, results, treeRef.tree.rootNode);
254
} finally {
255
treeRef.dispose();
256
}
257
}
258
259
export async function _getSemanticChunkNames(language: WASMLanguage, source: string): Promise<QueryMatchTree<BlockNameDetail>> {
260
const treeRef = await _parse(language, source);
261
try {
262
const results = querySemanticTargets(language, treeRef.tree.rootNode);
263
return getBlockNameTree(language, results, treeRef.tree.rootNode);
264
} finally {
265
treeRef.dispose();
266
}
267
}
268
269
270
/**
271
* Get the positions of all function bodies nodes in the given piece of source code.
272
*/
273
export async function _getFunctionBodies(language: WASMLanguage, source: string): Promise<TreeSitterOffsetRange[]> {
274
const treeRef = await _parse(language, source);
275
try {
276
const results = queryFunctions(language, treeRef.tree.rootNode);
277
const positions = results.map(res => {
278
const fn = res.captures.find(c => c.name === 'body')!.node;
279
return {
280
startIndex: fn.startIndex,
281
endIndex: fn.endIndex,
282
};
283
});
284
return positions;
285
} finally {
286
treeRef.dispose();
287
}
288
}
289
290
export async function _getCoarseParentScope(language: WASMLanguage, source: string, range: TreeSitterPointRange): Promise<TreeSitterPointRange> {
291
const treeRef = await _parse(language, source);
292
try {
293
const scopes = queryCoarseScopes(language, treeRef.tree.rootNode);
294
let parentNode: Parser.SyntaxNode | undefined;
295
for (const scope of scopes) {
296
const captureNode = scope.captures[0].node;
297
const captureNodeRange = TreeSitterPointRange.ofSyntaxNode(captureNode);
298
if (TreeSitterPointRange.doesContain(captureNodeRange, range)) {
299
parentNode = captureNode;
300
}
301
if (TreeSitterPoint.isBefore(range.endPosition, captureNodeRange.startPosition)) {
302
break;
303
}
304
}
305
if (!parentNode) {
306
throw new Error('No parent node found');
307
} else {
308
return TreeSitterPointRange.ofSyntaxNode(parentNode);
309
}
310
} finally {
311
treeRef.dispose();
312
}
313
}
314
315
/**
316
* Find the selection of interest for the /fix command
317
*/
318
export async function _getFixSelectionOfInterest(language: WASMLanguage, source: string, range: TreeSitterPointRange, maxNumberOfLines: number): Promise<TreeSitterPointRange> {
319
const treeRef = await _parse(language, source);
320
try {
321
const smallestNode = treeRef.tree.rootNode.descendantForPosition(range.startPosition, range.endPosition);
322
const initialRange = { startPosition: smallestNode.startPosition, endPosition: smallestNode.endPosition };
323
const biggestRange = _getBiggestRangeContainingNodeSmallerThan(language, smallestNode, maxNumberOfLines, range, true);
324
if (TreeSitterPointRange.equals(initialRange, biggestRange)) {
325
return _getSmallestRangeContainingNode(language, smallestNode);
326
}
327
return biggestRange;
328
} finally {
329
treeRef.dispose();
330
}
331
}
332
333
/**
334
* Find the smallest range containing the node
335
*/
336
function _getSmallestRangeContainingNode(language: WASMLanguage, node: Parser.SyntaxNode): TreeSitterPointRange {
337
const parent = node.parent;
338
const range = { startPosition: node.startPosition, endPosition: node.endPosition };
339
if (_isScope(language, node) || !parent) {
340
return range;
341
}
342
const { filteredRanges, indexOfInterest } = _findFilteredRangesAndIndexOfInterest(language, parent.children, range, false);
343
if (indexOfInterest - 1 >= 0 && indexOfInterest + 1 <= filteredRanges.length - 1) {
344
const siblingAbove = filteredRanges[indexOfInterest - 1];
345
const siblingBelow = filteredRanges[indexOfInterest + 1];
346
return { startPosition: siblingAbove.startPosition, endPosition: siblingBelow.endPosition };
347
}
348
return _getSmallestRangeContainingNode(language, parent);
349
}
350
351
/**
352
* Get the biggest range containing the node of length smaller than the max number of lines
353
*/
354
function _getBiggestRangeContainingNodeSmallerThan(language: WASMLanguage, node: Parser.SyntaxNode, maxNumberOfLines: number, range: TreeSitterPointRange, firstCall: boolean): TreeSitterPointRange {
355
const children = node.children;
356
const lengthSpannedByNode = node.endPosition.row - node.startPosition.row + 1;
357
if (lengthSpannedByNode <= maxNumberOfLines) {
358
const newRange = _isScope(language, node) ?
359
{ startPosition: node.startPosition, endPosition: node.endPosition } :
360
_getBiggestRangeContainingNodeAmongNodesSmallerThan(language, children, maxNumberOfLines, range, firstCall);
361
const parent = node.parent;
362
return parent ? _getBiggestRangeContainingNodeSmallerThan(language, parent, maxNumberOfLines, newRange, false) : newRange;
363
}
364
return _getBiggestRangeContainingNodeAmongNodesSmallerThan(language, children, maxNumberOfLines, range, firstCall);
365
}
366
367
function _numberOfLinesSpannedByRanges(range1: TreeSitterPointRange, range2: TreeSitterPointRange) {
368
return range2.endPosition.row - range1.startPosition.row + 1;
369
}
370
371
/**
372
* Search the nodes and find the biggest range made of statements or scopes that surrounds the range
373
*/
374
function _getBiggestRangeContainingNodeAmongNodesSmallerThan(language: WASMLanguage, nodes: Parser.SyntaxNode[], maxNumberOfLines: number, lastRange: TreeSitterPointRange, firstCall: boolean): TreeSitterPointRange {
375
if (nodes.length === 0) {
376
return lastRange;
377
}
378
const { filteredRanges, indexOfInterest } = _findFilteredRangesAndIndexOfInterest(language, nodes, lastRange, firstCall);
379
let siblingAboveIndex = 0;
380
let siblingBelowIndex = filteredRanges.length - 1;
381
let siblingAbove = filteredRanges[siblingAboveIndex];
382
let siblingBelow = filteredRanges[siblingBelowIndex];
383
384
while (_numberOfLinesSpannedByRanges(siblingAbove, siblingBelow) > maxNumberOfLines) {
385
if (siblingAboveIndex === siblingBelowIndex) {
386
// The two indices are equal to the insertion index
387
break;
388
} else if (indexOfInterest - siblingAboveIndex < siblingBelowIndex - indexOfInterest) {
389
siblingBelowIndex--;
390
siblingBelow = filteredRanges[siblingBelowIndex];
391
} else {
392
siblingAboveIndex++;
393
siblingAbove = filteredRanges[siblingAboveIndex];
394
}
395
}
396
if (_numberOfLinesSpannedByRanges(siblingAbove, siblingBelow) <= maxNumberOfLines) {
397
return { startPosition: siblingAbove.startPosition, endPosition: siblingBelow.endPosition };
398
}
399
return lastRange;
400
}
401
402
/**
403
* Filter the nodes that are scopes or statements and find the index of the node containing the given range, or append the range to the array
404
*/
405
function _findFilteredRangesAndIndexOfInterest(language: WASMLanguage, nodes: Parser.SyntaxNode[], range: TreeSitterPointRange, firstCall: boolean): { filteredRanges: TreeSitterPointRange[]; indexOfInterest: number } {
406
let filteredRanges: TreeSitterPointRange[];
407
let indexOfInterest: number;
408
if (firstCall) {
409
filteredRanges = nodes.filter((child) => _isScope(language, child) || _isStatement(language, child));
410
indexOfInterest = findInsertionIndexInSortedArray(filteredRanges, range, (a, b) => TreeSitterPoint.isBefore(a.startPosition, b.startPosition));
411
filteredRanges.splice(indexOfInterest, 0, range);
412
} else {
413
filteredRanges = nodes.filter((child) => TreeSitterPointRange.doesContain(child, range) || _isScope(language, child) || _isStatement(language, child));
414
indexOfInterest = filteredRanges.findIndex(child => TreeSitterPointRange.doesContain(child, range));
415
}
416
if (indexOfInterest === -1) {
417
throw new Error(`Valid index not found`);
418
}
419
return { filteredRanges, indexOfInterest };
420
}
421
422
export async function _getFineScopes(language: WASMLanguage, source: string, selection: TreeSitterOffsetRange): Promise<TreeSitterOffsetRange[]> {
423
const blockScopes: TreeSitterOffsetRange[] = [];
424
425
const treeRef = await _parse(language, source);
426
const syntaxNode = treeRef.tree.rootNode.descendantForIndex(selection.startIndex, selection.endIndex);
427
let currentNode: Parser.SyntaxNode | null = syntaxNode;
428
429
// Ascend the parse tree until we reach the root node, collecting all block scopes that intersect with the provided selection
430
while (currentNode !== null) {
431
if (_isFineScope(language, currentNode)) {
432
blockScopes.push({ startIndex: currentNode.startIndex, endIndex: currentNode.endIndex });
433
}
434
currentNode = currentNode.parent;
435
}
436
437
return blockScopes;
438
}
439
440
export type NodeToExplainContext = {
441
442
/** is undefined when we couldn't determine the identifier */
443
nodeIdentifier: string | undefined;
444
445
nodeToExplain: Node;
446
};
447
448
/**
449
*
450
* Given a selection around an identifier, returns the definition node.
451
*/
452
export async function _getNodeToExplain(
453
language: WASMLanguage,
454
source: string,
455
selection: TreeSitterOffsetRange
456
): Promise<NodeToExplainContext | undefined> {
457
458
const treeRef = await _parse(language, source);
459
460
try {
461
const isSelectionEmpty = selection.startIndex === selection.endIndex;
462
if (isSelectionEmpty) {
463
return;
464
}
465
466
const identifier = isSelectionEmpty ? undefined : _getNodeMatchingSelection(treeRef.tree, selection, language);
467
const fullDefinition = isSelectionEmpty ? undefined : _getNodeMatchingSelection(treeRef.tree, selection, language, isExplainableNode);
468
469
if (fullDefinition && identifier) {
470
const nodeIdentifier = extractIdentifier(identifier, language);
471
return {
472
nodeIdentifier,
473
nodeToExplain: Node.ofSyntaxNode(fullDefinition),
474
};
475
}
476
} finally {
477
treeRef.dispose();
478
}
479
}
480
481
function isExplainableNode(node: Parser.SyntaxNode, language: WASMLanguage) {
482
return node.type.match(/definition/);
483
}
484
485
export function getBlockNameTree(language: WASMLanguage, queryMatches: Parser.QueryMatch[], root: Parser.SyntaxNode): QueryMatchTree<BlockNameDetail> {
486
const matches: Map<number, MatchGroup<BlockNameDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates
487
queryMatches.forEach(n => {
488
const captures = n.captures;
489
490
let definitionNode = captures.find(v => v.name === 'definition')?.node;
491
492
let keyword;
493
if (language === WASMLanguage.Cpp && definitionNode?.type === 'function_definition') {
494
keyword = definitionNode?.childForFieldName('declarator')?.childForFieldName('declarator');
495
} else if (language === WASMLanguage.Rust && definitionNode?.type === 'impl_item') {
496
keyword = definitionNode?.childForFieldName('trait');
497
} else {
498
keyword = definitionNode?.childForFieldName('name');
499
}
500
const bodyNode = definitionNode?.childForFieldName('body');
501
if (definitionNode && bodyNode) {
502
503
switch (language) {
504
case WASMLanguage.TypeScript:
505
case WASMLanguage.JavaScript: {
506
const { definition } = getCommentsAndDefFromTSJSDefinition(definitionNode);
507
definitionNode = definition;
508
break;
509
}
510
}
511
const existingMatch = matches.get(definitionNode.id);
512
if (!existingMatch) {
513
matches.set(definitionNode.id, {
514
mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),
515
detailBlocks: {
516
body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode),
517
name: keyword?.text,
518
},
519
});
520
}
521
}
522
});
523
const groups = Array.from(matches.values());
524
525
return new QueryMatchTree(groups, TreeSitterChunkHeaderInfo.ofSyntaxNode(root));
526
}
527
528
529
530
/**
531
* helper workspace chunker functions
532
*/
533
534
function getQueryMatchTree(language: WASMLanguage, queryMatches: Parser.QueryMatch[], root: Parser.SyntaxNode): QueryMatchTree<DetailBlock> {
535
let groups: MatchGroup<DetailBlock>[];
536
537
switch (language) {
538
case WASMLanguage.Python:
539
groups = queryCapturesToPythonSemanticGroup(queryMatches);
540
break;
541
case WASMLanguage.Ruby:
542
groups = queryCapturesToRubySemanticGroup(queryMatches);
543
break;
544
default: {
545
groups = queryCapturesToGenericSemanticGroup(queryMatches, language);
546
break;
547
}
548
}
549
550
const queryTree = new QueryMatchTree(groups, TreeSitterChunkHeaderInfo.ofSyntaxNode(root));
551
552
return queryTree;
553
}
554
555
function queryCapturesToGenericSemanticGroup(queryMatches: Parser.QueryMatch[], wasmLang: WASMLanguage): MatchGroup<GenericDetail>[] {
556
const matches: Map<number, MatchGroup<GenericDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates
557
558
queryMatches
559
.forEach(n => {
560
const captures = n.captures;
561
562
let definitionNode = captures.find(v => v.name === 'definition')?.node;
563
564
const bodyNode = definitionNode?.childForFieldName('body');
565
if (definitionNode && bodyNode) {
566
567
let commentNodes;
568
switch (wasmLang) {
569
case WASMLanguage.TypeScript:
570
case WASMLanguage.JavaScript: {
571
const { definition, comments } = getCommentsAndDefFromTSJSDefinition(definitionNode);
572
definitionNode = definition;
573
commentNodes = comments;
574
break;
575
}
576
case WASMLanguage.Java:
577
case WASMLanguage.Rust:
578
commentNodes = getCommentsFromJavaRustDefinition(definitionNode);
579
break;
580
default: {
581
commentNodes = getCommentsFromDefinition(definitionNode);
582
break;
583
}
584
}
585
const existingMatch = matches.get(definitionNode.id);
586
if (!existingMatch) {
587
matches.set(definitionNode.id, {
588
mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),
589
detailBlocks: {
590
comments: commentNodes.map(e => TreeSitterChunkHeaderInfo.ofSyntaxNode(e)),
591
body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode)
592
},
593
});
594
}
595
}
596
});
597
598
return Array.from(matches.values());
599
}
600
601
function getFirstBodyParamForRuby(namedNodes: Parser.SyntaxNode[]) {
602
// the children must have at least 2 nodes. The second node is the first potential body node, since the first is the identifier.
603
604
if (namedNodes.length < 2) {
605
return undefined;
606
}
607
for (let i = 1; i < namedNodes.length; i++) {
608
const node = namedNodes[i];
609
if (!node.type.includes('parameters')) {
610
return node;
611
}
612
}
613
614
return undefined;
615
}
616
617
function queryCapturesToRubySemanticGroup(queryMatches: Parser.QueryMatch[]): MatchGroup<GenericDetail>[] {
618
const matches: Map<number, MatchGroup<GenericDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates
619
queryMatches
620
.forEach(n => {
621
const captures = n.captures;
622
623
const definitionNode = captures.find(v => v.name === 'definition')?.node;
624
if (definitionNode) {
625
const defChildren = definitionNode.namedChildren;
626
const startChild = getFirstBodyParamForRuby(defChildren);
627
if (startChild) {
628
const endChild = defChildren[defChildren.length - 1];
629
const childText = definitionNode.text.substring(startChild.startIndex - definitionNode.startIndex, endChild.endIndex - definitionNode.startIndex);
630
631
const commentNodes = getCommentsFromDefinition(definitionNode);
632
const existingMatch = matches.get(definitionNode.id);
633
if (!existingMatch) {
634
matches.set(definitionNode.id, {
635
mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),
636
detailBlocks: {
637
comments: commentNodes.map(e => TreeSitterChunkHeaderInfo.ofSyntaxNode(e)),
638
body: {
639
range: <TreeSitterPointRange>{
640
startPosition: { row: startChild.startPosition.row, column: startChild.startPosition.column },
641
endPosition: { row: endChild.endPosition.row, column: endChild.endPosition.column }
642
},
643
startIndex: startChild.startIndex,
644
text: childText,
645
endIndex: endChild.endIndex,
646
}
647
},
648
});
649
}
650
}
651
}
652
});
653
654
return Array.from(matches.values());
655
}
656
657
function queryCapturesToPythonSemanticGroup(queryMatches: Parser.QueryMatch[]): MatchGroup<PythonDetail>[] {
658
const matches: Map<number, MatchGroup<PythonDetail>> = new Map(); // map nodes to their starting position to ensure that we get rid of duplicates
659
660
queryMatches
661
.forEach(n => {
662
const captures = n.captures;
663
const definitionNode = captures.find(v => v.name === 'definition')?.node;
664
const bodyNode = definitionNode?.childForFieldName('body');
665
666
if (definitionNode && bodyNode) {
667
const docstringNode = getDocstringFromBody(bodyNode);
668
const decoratorNode = getDecoratorFromDefinition(definitionNode);
669
matches.set(definitionNode.id, {
670
mainBlock: TreeSitterChunkHeaderInfo.ofSyntaxNode(definitionNode),
671
detailBlocks: {
672
docstring: docstringNode ? TreeSitterChunkHeaderInfo.ofSyntaxNode(docstringNode) : undefined,
673
decorator: decoratorNode ? TreeSitterChunkHeaderInfo.ofSyntaxNode(decoratorNode) : undefined,
674
body: TreeSitterChunkHeaderInfo.ofSyntaxNode(bodyNode),
675
},
676
});
677
return;
678
}
679
});
680
681
return Array.from(matches.values());
682
}
683
684
/**
685
* For Generic (Cpp/Cs/Go) workspace chunks
686
*/
687
function getCommentsFromDefinition(definition: Parser.SyntaxNode, commentNodeNames = ['comment']): Parser.SyntaxNode[] {
688
689
// there is an issue where the query sometimes returns comments that are at the beginning of the file
690
// instead of one that actually close to the declaration.
691
// Therefore, we should programatically find comments for more reliability
692
const ret: Parser.SyntaxNode[] = [];
693
let prevSibling = definition.previousNamedSibling;
694
while (prevSibling && commentNodeNames.some(e => e === prevSibling?.type)) {
695
ret.push(prevSibling);
696
prevSibling = prevSibling.previousNamedSibling;
697
}
698
return ret.reverse();
699
}
700
701
/**
702
* For TS/JS workspace chunks
703
*/
704
function getCommentsAndDefFromTSJSDefinition(definition: Parser.SyntaxNode): {
705
definition: Parser.SyntaxNode;
706
comments: Parser.SyntaxNode[];
707
} {
708
const parent = definition.parent;
709
if (parent?.type === 'export_statement') {
710
return {
711
definition: parent,
712
comments: getCommentsFromDefinition(parent)
713
};
714
}
715
716
return {
717
definition: definition,
718
comments: getCommentsFromDefinition(definition)
719
};
720
}
721
722
/**
723
* For Java workspace chunks
724
*/
725
function getCommentsFromJavaRustDefinition(definition: Parser.SyntaxNode): Parser.SyntaxNode[] {
726
return getCommentsFromDefinition(definition, ['block_comment', 'line_comment']);
727
}
728
729
730
/**
731
* For Python workspace chunks
732
*/
733
function getDecoratorFromDefinition(definition: Parser.SyntaxNode) {
734
const prevSibling = definition.previousNamedSibling;
735
return prevSibling?.type === 'decorator' ? prevSibling : undefined;
736
}
737
738
function getDocstringFromBody(body: Parser.SyntaxNode) {
739
const firstChild = body.firstChild;
740
if (!firstChild || firstChild.type !== 'expression_statement') {
741
return;
742
}
743
744
const potentialDocstring = firstChild.firstChild;
745
return potentialDocstring?.type === 'string' ? potentialDocstring : undefined;
746
}
747
748
export function _getStructure(lang: WASMLanguage, source: string): Promise<OverlayNode | undefined> {
749
return structureComputer.getStructure(lang, source);
750
}
751
752
export async function _getParseErrorCount(language: WASMLanguage, source: string): Promise<number> {
753
const treeRef = await _parse(language, source);
754
try {
755
if (!treeRef.tree.rootNode.hasError) {
756
return 0;
757
}
758
759
// Recursively count error nodes
760
function countErrors(node: Parser.SyntaxNode): number {
761
let count = node.type === 'ERROR' ? 1 : 0;
762
for (const child of node.children) {
763
count += countErrors(child);
764
}
765
return count;
766
}
767
768
return countErrors(treeRef.tree.rootNode);
769
} finally {
770
treeRef.dispose();
771
}
772
}
773
774