Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/remoteCodeSearch/common/githubCodeSearchService.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import { RequestType } from '@vscode/copilot-api';
6
import { shouldInclude } from '../../../util/common/glob';
7
import { Result } from '../../../util/common/result';
8
import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
9
import { raceCancellationError } from '../../../util/vs/base/common/async';
10
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
11
import { isCancellationError } from '../../../util/vs/base/common/errors';
12
import { URI } from '../../../util/vs/base/common/uri';
13
import { Range } from '../../../util/vs/editor/common/core/range';
14
import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
15
import { IAuthenticationService } from '../../authentication/common/authentication';
16
import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk';
17
import { stripChunkTextMetadata, truncateToMaxUtf8Length } from '../../chunking/common/chunkingStringUtils';
18
import { EmbeddingType } from '../../embeddings/common/embeddingsComputer';
19
import { ICAPIClientService } from '../../endpoint/common/capiClient';
20
import { IEnvService } from '../../env/common/envService';
21
import { GithubRepoId, toGithubNwo } from '../../git/common/gitService';
22
import { makeGitHubAPIRequest } from '../../github/common/githubAPI';
23
import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService';
24
import { IIgnoreService } from '../../ignore/common/ignoreService';
25
import { ILogService } from '../../log/common/logService';
26
import { IFetcherService, Response } from '../../networking/common/fetcherService';
27
import { postRequest } from '../../networking/common/networking';
28
import { ITelemetryService } from '../../telemetry/common/telemetry';
29
import { CodeSearchOptions, LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';
30
31
32
interface ResponseShape {
33
readonly results: readonly SemanticSearchResult[];
34
readonly embedding_model: string;
35
}
36
37
type SemanticSearchResult = {
38
chunk: {
39
hash: string;
40
text: string;
41
// Byte offset range of the chunk
42
range: { start: number; end: number };
43
line_range: { start: number; end: number };
44
embedding?: { embedding: number[] };
45
};
46
distance: number;
47
location: {
48
path: string; // file path
49
commit_sha: string;
50
ref_name: string;
51
repo: {
52
nwo: string;
53
url: string;
54
};
55
};
56
};
57
58
export interface GithubCodeSearchRepoInfo {
59
readonly kind: 'repo';
60
readonly githubRepoId: GithubRepoId;
61
readonly localRepoRoot: URI | undefined;
62
readonly indexedCommit: string | undefined;
63
}
64
65
export interface GithubCodeSearchOrgInfo {
66
readonly kind: 'org';
67
readonly org: string;
68
}
69
70
export type GithubCodeSearchScope = GithubCodeSearchRepoInfo | GithubCodeSearchOrgInfo;
71
72
export const IGithubCodeSearchService = createDecorator('IGithubCodeSearchService');
73
74
export interface IGithubCodeSearchService {
75
readonly _serviceBrand: undefined;
76
77
/**
78
* Gets the state of the remote index for a given repo.
79
*/
80
getRemoteIndexState(
81
authOptions: { readonly silent: boolean },
82
githubRepoId: GithubRepoId,
83
telemetryInfo: TelemetryCorrelationId,
84
token: CancellationToken,
85
): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>>;
86
87
/**
88
* Requests that a given repo be indexed.
89
*/
90
triggerIndexing(
91
authOptions: { readonly silent: boolean },
92
triggerReason: 'auto' | 'manual' | 'tool',
93
githubRepoId: GithubRepoId,
94
telemetryInfo: TelemetryCorrelationId,
95
): Promise<Result<true, RemoteCodeSearchError>>;
96
97
/**
98
* Semantic searches a given github repo for relevant code snippets
99
*
100
* The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}.
101
*/
102
semanticSearch(
103
authOptions: { readonly silent: boolean },
104
embeddingType: EmbeddingType,
105
scope: GithubCodeSearchRepoInfo,
106
query: string,
107
maxResults: number,
108
options: CodeSearchOptions,
109
telemetryInfo: TelemetryCorrelationId,
110
token: CancellationToken,
111
): Promise<SemanticCodeSearchResult>;
112
113
/**
114
* Lexical searches a given github repo or org for relevant code snippets
115
*/
116
lexicalSearch(
117
authOptions: { readonly silent: boolean },
118
scope: GithubCodeSearchScope,
119
query: string,
120
maxResults: number,
121
options: CodeSearchOptions,
122
telemetryInfo: TelemetryCorrelationId,
123
token: CancellationToken,
124
): Promise<LexicalCodeSearchResult>;
125
}
126
127
export class GithubCodeSearchService implements IGithubCodeSearchService {
128
129
declare readonly _serviceBrand: undefined;
130
131
constructor(
132
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
133
@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
134
@IEnvService private readonly _envService: IEnvService,
135
@IFetcherService private readonly _fetcherService: IFetcherService,
136
@IIgnoreService private readonly _ignoreService: IIgnoreService,
137
@ILogService private readonly _logService: ILogService,
138
@ITelemetryService private readonly _telemetryService: ITelemetryService,
139
@IInstantiationService private readonly _instantiationService: IInstantiationService,
140
) { }
141
142
async getRemoteIndexState(auth: { readonly silent: boolean }, githubRepoId: GithubRepoId, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>> {
143
const repoNwo = toGithubNwo(githubRepoId);
144
145
if (repoNwo.startsWith('microsoft/simuluation-test-')) {
146
return Result.ok({ status: RemoteCodeSearchIndexStatus.NotYetIndexed });
147
}
148
149
const authToken = await this.getGithubAccessToken(auth.silent);
150
if (!authToken) {
151
this._logService.error(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Failed to fetch indexing status. No valid github auth token.`);
152
return Result.error<RemoteCodeSearchError>({ type: 'not-authorized' });
153
}
154
155
try {
156
const statusRequest = await raceCancellationError(this._capiClientService.makeRequest<Response>({
157
method: 'GET',
158
headers: {
159
Authorization: `Bearer ${authToken}`,
160
...getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
161
}
162
}, { type: RequestType.EmbeddingsIndex, repoWithOwner: repoNwo }), token);
163
if (!statusRequest.ok) {
164
/* __GDPR__
165
"githubCodeSearch.getRemoteIndexState.error" : {
166
"owner": "mjbvz",
167
"comment": "Information about failed remote index state requests",
168
"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" }
169
}
170
*/
171
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.getRemoteIndexState.error', {}, {
172
statusCode: statusRequest.status,
173
});
174
175
this._logService.error(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Failed to fetch indexing status. Response: ${statusRequest.status}. ${await statusRequest.text()}`);
176
return Result.error<RemoteCodeSearchError>({ type: 'generic-error', error: new Error(`Failed to fetch indexing status. Response: ${statusRequest.status}.`) });
177
}
178
179
const preCheckResult = await raceCancellationError(statusRequest.json(), token);
180
if (preCheckResult.semantic_code_search_ok && preCheckResult.semantic_commit_sha) {
181
const indexedCommit = preCheckResult.semantic_commit_sha;
182
this._logService.trace(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Found indexed commit: ${indexedCommit}.`);
183
return Result.ok({
184
status: RemoteCodeSearchIndexStatus.Ready,
185
indexedCommit,
186
});
187
}
188
189
if (preCheckResult.semantic_indexing_enabled) {
190
if (await raceCancellationError(this.isEmptyRepo(authToken, githubRepoId, token), token)) {
191
this._logService.trace(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Semantic indexing enabled but repo is empty.`);
192
return Result.ok({
193
status: RemoteCodeSearchIndexStatus.Ready,
194
indexedCommit: undefined
195
});
196
}
197
198
this._logService.trace(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Semantic indexing enabled but not yet indexed.`);
199
200
return Result.ok({ status: RemoteCodeSearchIndexStatus.BuildingIndex });
201
} else {
202
this._logService.trace(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). semantic_indexing_enabled was false. Repo not yet indexed but possibly can be.`);
203
return Result.ok({ status: RemoteCodeSearchIndexStatus.NotYetIndexed });
204
}
205
} catch (e: unknown) {
206
if (isCancellationError(e)) {
207
throw e;
208
}
209
210
this._logService.error(`GithubCodeSearchService::getRemoteIndexState(${repoNwo}). Error: ${e}`);
211
return Result.error<RemoteCodeSearchError>({ type: 'generic-error', error: e instanceof Error ? e : new Error(String(e)) });
212
}
213
}
214
215
public async triggerIndexing(
216
auth: { readonly silent: boolean },
217
triggerReason: 'auto' | 'manual' | 'tool',
218
githubRepoId: GithubRepoId,
219
telemetryInfo: TelemetryCorrelationId,
220
): Promise<Result<true, RemoteCodeSearchError>> {
221
const authToken = await this.getGithubAccessToken(auth.silent);
222
if (!authToken) {
223
return Result.error({ type: 'not-authorized' });
224
}
225
226
const response = await this._capiClientService.makeRequest<Response>({
227
method: 'POST',
228
headers: {
229
Authorization: `Bearer ${authToken}`,
230
...getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
231
},
232
body: JSON.stringify({
233
auto: triggerReason === 'auto',
234
})
235
}, { type: RequestType.EmbeddingsIndex, repoWithOwner: toGithubNwo(githubRepoId) });
236
237
if (!response.ok) {
238
this._logService.error(`GithubCodeSearchService.triggerIndexing(${triggerReason}). Failed to request indexing for '${githubRepoId}'. Response: ${response.status}. ${await response.text()}`);
239
240
/* __GDPR__
241
"githubCodeSearch.triggerIndexing.error" : {
242
"owner": "mjbvz",
243
"comment": "Information about failed trigger indexing requests",
244
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
245
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
246
"triggerReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Reason why the indexing was triggered" },
247
"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" }
248
}
249
*/
250
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.triggerIndexing.error', {
251
workspaceSearchSource: telemetryInfo.callTracker.toString(),
252
workspaceSearchCorrelationId: telemetryInfo.correlationId,
253
triggerReason
254
}, {
255
statusCode: response.status,
256
});
257
258
return Result.error({ type: 'generic-error', error: new Error(`Failed to request indexing for '${githubRepoId}'. Response: ${response.status}.`) });
259
}
260
261
/* __GDPR__
262
"githubCodeSearch.getRemoteIndexState.success" : {
263
"owner": "mjbvz",
264
"comment": "Information about failed remote index state requests",
265
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
266
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
267
"triggerReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Reason why the indexing was triggered" }
268
}
269
*/
270
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.getRemoteIndexState.success', {
271
workspaceSearchSource: telemetryInfo.callTracker.toString(),
272
workspaceSearchCorrelationId: telemetryInfo.correlationId,
273
triggerReason,
274
}, {});
275
276
return Result.ok(true);
277
}
278
279
async semanticSearch(
280
auth: { readonly silent: boolean },
281
embeddingType: EmbeddingType,
282
repo: GithubCodeSearchRepoInfo,
283
searchQuery: string,
284
maxResults: number,
285
options: CodeSearchOptions,
286
telemetryInfo: TelemetryCorrelationId,
287
token: CancellationToken
288
): Promise<SemanticCodeSearchResult> {
289
const authToken = await this.getGithubAccessToken(auth.silent);
290
if (!authToken) {
291
throw new Error('No valid auth token');
292
}
293
294
const response = await raceCancellationError(
295
this._instantiationService.invokeFunction(postRequest, {
296
endpointOrUrl: { type: RequestType.EmbeddingsCodeSearch },
297
secretKey: authToken,
298
intent: 'copilot-panel',
299
requestId: '',
300
body: {
301
scoping_query: `repo:${toGithubNwo(repo.githubRepoId)}`,
302
// The semantic search endpoint only supports prompts of up to 8k bytes (in utf8)
303
// For now just truncate but we should consider a better way to handle this, such as having a model
304
// generate a short prompt
305
prompt: truncateToMaxUtf8Length(searchQuery, 7800),
306
include_embeddings: false,
307
limit: maxResults,
308
embedding_model: embeddingType.id,
309
} satisfies {
310
scoping_query: string;
311
prompt: string;
312
include_embeddings: boolean;
313
limit: number;
314
embedding_model: string;
315
} as any,
316
additionalHeaders: getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
317
cancelToken: token,
318
}),
319
token);
320
321
if (!response.ok) {
322
/* __GDPR__
323
"githubCodeSearch.searchRepo.error" : {
324
"owner": "mjbvz",
325
"comment": "Information about failed code searches",
326
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
327
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
328
"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" }
329
}
330
*/
331
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.searchRepo.error', {
332
workspaceSearchSource: telemetryInfo.callTracker.toString(),
333
workspaceSearchCorrelationId: telemetryInfo.correlationId,
334
}, {
335
statusCode: response.status,
336
});
337
338
throw new Error(`Code search semantic search failed with status: ${response.status}`);
339
}
340
341
const body = await raceCancellationError(response.json(), token);
342
if (!Array.isArray(body.results)) {
343
throw new Error(`Code search semantic search unexpected response json shape`);
344
}
345
346
const result = await raceCancellationError(parseGithubCodeSearchResponse(body, repo, options, this._ignoreService), token);
347
348
/* __GDPR__
349
"githubCodeSearch.searchRepo.success" : {
350
"owner": "mjbvz",
351
"comment": "Information about successful code searches",
352
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
353
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
354
"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned chunks from the search" },
355
"resultOutOfSync": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Tracks if the commit we think code search has indexed matches the commit code search returns results from" }
356
}
357
*/
358
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.searchRepo.success', {
359
workspaceSearchSource: telemetryInfo.callTracker.toString(),
360
workspaceSearchCorrelationId: telemetryInfo.correlationId,
361
}, {
362
resultCount: body.results.length,
363
resultOutOfSync: result.outOfSync ? 1 : 0,
364
});
365
366
return result;
367
}
368
369
async lexicalSearch(
370
auth: { readonly silent: boolean },
371
scope: GithubCodeSearchScope,
372
query: string,
373
maxResults: number,
374
options: CodeSearchOptions,
375
telemetryInfo: TelemetryCorrelationId,
376
token: CancellationToken
377
): Promise<LexicalCodeSearchResult> {
378
const authToken = await this.getGithubAccessToken(auth.silent);
379
if (!authToken) {
380
throw new Error('No valid auth token');
381
}
382
383
const scopeQualifier = scope.kind === 'org' ? `org:${scope.org}` : `repo:${toGithubNwo(scope.githubRepoId)}`;
384
const searchQuery = `${query} ${scopeQualifier}`;
385
const routeSlug = `search/code?q=${encodeURIComponent(searchQuery)}&per_page=${maxResults}`;
386
387
const body = await raceCancellationError(makeGitHubAPIRequest(
388
this._fetcherService,
389
this._logService,
390
this._telemetryService,
391
this._capiClientService.dotcomAPIURL,
392
routeSlug,
393
'GET',
394
authToken,
395
{
396
accept: 'application/vnd.github.text-match+json',
397
additionalHeaders: getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
398
callSite: 'github-code-search-lexical',
399
},
400
), token);
401
402
if (!body) {
403
/* __GDPR__
404
"githubCodeSearch.lexicalSearch.error" : {
405
"owner": "mjbvz",
406
"comment": "Information about failed lexical code searches",
407
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
408
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" }
409
}
410
*/
411
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.error', {
412
workspaceSearchSource: telemetryInfo.callTracker.toString(),
413
workspaceSearchCorrelationId: telemetryInfo.correlationId,
414
});
415
416
throw new Error(`Code search lexical search failed`);
417
}
418
if (!Array.isArray(body.items)) {
419
throw new Error(`Code search lexical search unexpected response json shape`);
420
}
421
422
const result = await raceCancellationError(parseLexicalSearchResponse(body, scope, options, this._ignoreService), token);
423
424
/* __GDPR__
425
"githubCodeSearch.lexicalSearch.success" : {
426
"owner": "mjbvz",
427
"comment": "Information about successful lexical code searches",
428
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
429
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
430
"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned items from the search" }
431
}
432
*/
433
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.success', {
434
workspaceSearchSource: telemetryInfo.callTracker.toString(),
435
workspaceSearchCorrelationId: telemetryInfo.correlationId,
436
}, {
437
resultCount: body.items.length,
438
});
439
440
return result;
441
}
442
443
private async getGithubAccessToken(silent: boolean) {
444
return (await this._authenticationService.getGitHubSession('permissive', { silent }))?.accessToken
445
?? (await this._authenticationService.getGitHubSession('any', { silent }))?.accessToken;
446
}
447
448
449
private async isEmptyRepo(authToken: string, githubRepoId: GithubRepoId, token: CancellationToken): Promise<boolean> {
450
const response = await raceCancellationError(fetch(this._capiClientService.dotcomAPIURL + `/repos/${toGithubNwo(githubRepoId)}`, {
451
headers: {
452
'Authorization': `Bearer ${authToken}`,
453
'Accept': 'application/vnd.github.v3+json'
454
}
455
}), token);
456
457
if (!response.ok) {
458
this._logService.error(`GithubCodeSearchService.isEmptyRepo(${toGithubNwo(githubRepoId)}). Failed to fetch repo info. Response: ${response.status}. ${await response.text()}`);
459
return false;
460
}
461
462
const data: any = await response.json();
463
464
// Check multiple indicators of an empty repo:
465
// - size of 0 indicates no content
466
// - missing default_branch often means no commits
467
return data.size === 0 || !data.default_branch;
468
}
469
}
470
471
export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<SemanticCodeSearchResult> {
472
let outOfSync = false;
473
const outChunks: FileChunkAndScore[] = [];
474
475
const embeddingsType = new EmbeddingType(body.embedding_model);
476
477
await Promise.all(body.results.map(async (result): Promise<FileChunkAndScore | undefined> => {
478
if (!options.skipVerifyRepo && result.location.repo.nwo.toLowerCase() !== toGithubNwo(repo.githubRepoId)) {
479
return;
480
}
481
482
let fileUri: URI;
483
if (repo.localRepoRoot) {
484
fileUri = URI.joinPath(repo.localRepoRoot, result.location.path);
485
if (await ignoreService.isCopilotIgnored(fileUri)) {
486
return;
487
}
488
} else {
489
// Non-local repo, make up a URI
490
fileUri = URI.from({
491
scheme: 'githubRepoResult',
492
path: '/' + result.location.path
493
});
494
}
495
496
if (!shouldInclude(fileUri, options.globPatterns)) {
497
return;
498
}
499
500
outOfSync ||= !!repo.indexedCommit && result.location.commit_sha !== repo.indexedCommit;
501
outChunks.push({
502
chunk: {
503
file: fileUri,
504
text: stripChunkTextMetadata(result.chunk.text),
505
rawText: undefined,
506
range: new Range(result.chunk.line_range.start, 0, result.chunk.line_range.end, 0),
507
isFullFile: false, // TODO: get this from github
508
},
509
distance: {
510
embeddingType: embeddingsType,
511
value: result.distance,
512
}
513
});
514
}));
515
516
// Extract the remote URL and ref name from the first result
517
const firstResult = body.results[0];
518
let remoteUrl: string | undefined;
519
let refName: string | undefined;
520
if (firstResult) {
521
// Derive the web URL from the API URL (e.g. https://api.github.com/repos/o/r -> https://github.com/o/r)
522
const apiUrl = firstResult.location.repo.url;
523
const nwo = firstResult.location.repo.nwo;
524
try {
525
const parsed = URI.parse(apiUrl);
526
const host = parsed.authority === 'api.github.com' ? 'github.com' : parsed.authority.replace(/^api\./, '');
527
remoteUrl = `https://${host}/${nwo}`;
528
} catch {
529
// Fall back to constructing from nwo
530
remoteUrl = `https://github.com/${nwo}`;
531
}
532
533
// Extract branch name from ref_name (e.g. "refs/heads/main" -> "main")
534
const rawRef = firstResult.location.ref_name;
535
if (rawRef?.startsWith('refs/heads/')) {
536
refName = rawRef.slice('refs/heads/'.length);
537
} else if (rawRef) {
538
refName = rawRef;
539
}
540
}
541
542
return { chunks: outChunks, outOfSync, remoteUrl, refName };
543
}
544
545
interface LexicalSearchResponseShape {
546
readonly total_count: number;
547
readonly incomplete_results: boolean;
548
readonly items: readonly LexicalSearchItem[];
549
}
550
551
type LexicalSearchItem = {
552
readonly path: string;
553
readonly repository: {
554
readonly full_name: string;
555
};
556
readonly text_matches?: readonly {
557
readonly fragment: string;
558
readonly matches: readonly { readonly text: string; readonly indices: readonly [number, number] }[];
559
readonly object_type: string;
560
readonly property: string;
561
}[];
562
readonly score: number;
563
};
564
565
export async function parseLexicalSearchResponse(body: LexicalSearchResponseShape, scope: GithubCodeSearchScope & { skipVerifyRepo?: boolean }, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<LexicalCodeSearchResult> {
566
const outChunks: FileChunk[] = [];
567
568
await Promise.all(body.items.map(async (item): Promise<void> => {
569
if (!options.skipVerifyRepo && scope.kind === 'repo' && item.repository.full_name.toLowerCase() !== toGithubNwo(scope.githubRepoId)) {
570
return;
571
}
572
if (!options.skipVerifyRepo && scope.kind === 'org' && item.repository.full_name.toLowerCase().split('/')[0] !== scope.org.toLowerCase()) {
573
return;
574
}
575
576
const localRepoRoot = scope.kind === 'repo' ? scope.localRepoRoot : undefined;
577
let fileUri: URI;
578
if (localRepoRoot) {
579
fileUri = URI.joinPath(localRepoRoot, item.path);
580
if (await ignoreService.isCopilotIgnored(fileUri)) {
581
return;
582
}
583
} else {
584
fileUri = URI.from({
585
scheme: 'githubRepoResult',
586
path: '/' + item.repository.full_name + '/' + item.path
587
});
588
}
589
590
if (!shouldInclude(fileUri, options.globPatterns)) {
591
return;
592
}
593
594
const textMatches = item.text_matches?.filter(m => m.property === 'content');
595
if (textMatches && textMatches.length > 0) {
596
for (const match of textMatches) {
597
outChunks.push({
598
file: fileUri,
599
text: match.fragment,
600
rawText: undefined,
601
range: new Range(0, 0, 0, 0),
602
isFullFile: false,
603
});
604
}
605
} else {
606
// No text matches, include the file as a whole-file result
607
outChunks.push({
608
file: fileUri,
609
text: '',
610
rawText: undefined,
611
range: new Range(0, 0, 0, 0),
612
isFullFile: true,
613
});
614
}
615
}));
616
617
return { chunks: outChunks, outOfSync: false };
618
}
619
620