Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
import { shouldInclude } from '../../../util/common/glob';
6
import { Result } from '../../../util/common/result';
7
import { CallTracker, TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
8
import { raceCancellationError } from '../../../util/vs/base/common/async';
9
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
10
import { Emitter, Event } from '../../../util/vs/base/common/event';
11
import { Disposable } from '../../../util/vs/base/common/lifecycle';
12
import { StopWatch } from '../../../util/vs/base/common/stopwatch';
13
import { URI } from '../../../util/vs/base/common/uri';
14
import { Range } from '../../../util/vs/editor/common/core/range';
15
import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
16
import { IAuthenticationService } from '../../authentication/common/authentication';
17
import { FileChunkAndScore } from '../../chunking/common/chunk';
18
import { stripChunkTextMetadata } from '../../chunking/common/chunkingStringUtils';
19
import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';
20
import { EmbeddingType } from '../../embeddings/common/embeddingsComputer';
21
import { IEnvService } from '../../env/common/envService';
22
import { AdoRepoId } from '../../git/common/gitService';
23
import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService';
24
import { IIgnoreService } from '../../ignore/common/ignoreService';
25
import { measureExecTime } from '../../log/common/logExecTime';
26
import { ILogService } from '../../log/common/logService';
27
import { getRequest, postRequest } from '../../networking/common/networking';
28
import { ITelemetryService } from '../../telemetry/common/telemetry';
29
import { CodeSearchOptions, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';
30
31
32
interface ResponseShape {
33
readonly results: readonly SemanticSearchResult[];
34
readonly embedding_model: string;
35
}
36
37
type SemanticSearchResult = {
38
chunk: {
39
hash: string;
40
text: string;
41
// Byte offset range of the chunk
42
range: { start: number; end: number };
43
line_range: { start: number; end: number };
44
embedding?: { embedding: number[] };
45
};
46
distance: number;
47
location: {
48
path: string; // file path
49
commit_sha: string;
50
repo: {
51
nwo: string;
52
url: string;
53
};
54
};
55
};
56
57
58
export interface AdoCodeSearchRepoInfo {
59
readonly adoRepoId: AdoRepoId;
60
readonly localRepoRoot: URI | undefined;
61
readonly indexedCommit: string | undefined;
62
}
63
64
export const IAdoCodeSearchService = createDecorator('IAdoCodeSearchService');
65
66
export interface IAdoCodeSearchService {
67
readonly _serviceBrand: undefined;
68
69
readonly onDidChangeIndexState: Event<void>;
70
71
/**
72
* Gets the state of the remote index for a given repo.
73
*/
74
getRemoteIndexState(
75
auth: { readonly silent: boolean },
76
repoId: AdoRepoId,
77
token: CancellationToken,
78
): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>>;
79
80
/**
81
* Requests that a given repo be indexed.
82
*/
83
triggerIndexing(
84
auth: { readonly silent: boolean },
85
triggerReason: 'auto' | 'manual' | 'tool',
86
repoId: AdoRepoId,
87
telemetryInfo: TelemetryCorrelationId,
88
): Promise<Result<true, RemoteCodeSearchError>>;
89
90
/**
91
* Semantic searches a given repo for relevant code snippets
92
*
93
* The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}.
94
*/
95
searchRepo(
96
auth: { readonly silent: boolean },
97
repo: AdoCodeSearchRepoInfo,
98
query: string,
99
maxResults: number,
100
options: CodeSearchOptions,
101
telemetryInfo: TelemetryCorrelationId,
102
token: CancellationToken,
103
): Promise<SemanticCodeSearchResult>;
104
}
105
106
/**
107
* Ado currently uses their own scoring system for embeddings.
108
*/
109
const adoCustomEmbeddingScoreType = new EmbeddingType('adoCustomEmbeddingScore');
110
111
export class AdoCodeSearchService extends Disposable implements IAdoCodeSearchService {
112
113
declare readonly _serviceBrand: undefined;
114
115
private readonly _onDidChangeIndexState = this._register(new Emitter<void>());
116
public readonly onDidChangeIndexState = this._onDidChangeIndexState.event;
117
118
constructor(
119
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
120
@IConfigurationService private readonly _configurationService: IConfigurationService,
121
@IEnvService private readonly _envService: IEnvService,
122
@ILogService private readonly _logService: ILogService,
123
@IIgnoreService private readonly _ignoreService: IIgnoreService,
124
@ITelemetryService private readonly _telemetryService: ITelemetryService,
125
@IInstantiationService private readonly _instantiationService: IInstantiationService,
126
) {
127
super();
128
}
129
130
private getAdoAlmStatusUrl(repoId: AdoRepoId): string {
131
return `https://almsearch.dev.azure.com/${repoId.org}/${repoId.project}/_apis/search/semanticsearchstatus/${repoId.repo}?api-version=7.1-preview`;
132
}
133
134
private getAdoAlmSearchUrl(repo: AdoRepoId): string {
135
return `https://almsearch.dev.azure.com/${repo.org}/${repo.project}/_apis/search/embeddings?api-version=7.1-preview`;
136
}
137
138
async getRemoteIndexState(auth: { readonly silent: boolean }, repoId: AdoRepoId, token: CancellationToken): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>> {
139
return measureExecTime(() => this.getRemoteIndexStateImpl(auth, repoId, token), (execTime, status, result) => {
140
/* __GDPR__
141
"adoCodeSearch.getRemoteIndexState" : {
142
"owner": "mjbvz",
143
"comment": "Information about failed remote index state requests",
144
"status": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the call succeeded or failed" },
145
"ok": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Details on successful calls" },
146
"err": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Details on failed calls" },
147
"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Time in milliseconds that the call took" }
148
}
149
*/
150
this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.getRemoteIndexState', {
151
status,
152
ok: result?.isOk() ? result.val.status : undefined,
153
error: result?.isError() ? result.err.type : undefined,
154
}, {
155
execTime
156
});
157
});
158
}
159
160
private async getRemoteIndexStateImpl(auth: { readonly silent: boolean }, repoId: AdoRepoId, token: CancellationToken): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>> {
161
const authToken = await this.getAdoAuthToken(auth.silent);
162
if (!authToken) {
163
this._logService.error(`AdoCodeSearchService::getRemoteIndexState(${repoId}). Failed to fetch indexing status. No valid ADO auth token.`);
164
return Result.error<RemoteCodeSearchError>({ type: 'not-authorized' });
165
}
166
167
const endpoint = this.getAdoAlmStatusUrl(repoId);
168
169
const additionalHeaders = {
170
Accept: 'application/json',
171
Authorization: `Basic ${authToken}`,
172
'Content-Type': 'application/json',
173
...getGithubMetadataHeaders(new CallTracker('AdoCodeSearchService::getRemoteIndexState'), this._envService)
174
};
175
176
const result = await raceCancellationError(
177
this._instantiationService.invokeFunction(getRequest, {
178
endpointOrUrl: endpoint,
179
secretKey: authToken,
180
intent: 'copilot-panel',
181
requestId: '',
182
additionalHeaders,
183
cancelToken: token,
184
}),
185
token);
186
187
if (!result.ok) {
188
/* __GDPR__
189
"adoCodeSearch.getRemoteIndexState.requestError" : {
190
"owner": "mjbvz",
191
"comment": "Information about failed remote index state requests",
192
"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" }
193
}
194
*/
195
this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.getRemoteIndexState.requestError', {}, {
196
statusCode: result.status,
197
});
198
199
if (result.status === 401 || result.status === 403) {
200
return Result.error<RemoteCodeSearchError>({ type: 'not-authorized' });
201
}
202
203
return Result.error<RemoteCodeSearchError>({ type: 'generic-error', error: new Error(`ADO code search index status request failed with status: ${result.status}`) });
204
}
205
type AdoIndexStatusResponse = {
206
semanticSearchEnabled: boolean;
207
id: string;
208
name: string;
209
indexedBranches: {
210
name: string;
211
lastIndexedChangeId: string;
212
lastProcessedTime: string;
213
}[];
214
};
215
216
const body: AdoIndexStatusResponse = await result.json();
217
if (!body.semanticSearchEnabled) {
218
return Result.ok<RemoteCodeSearchIndexState>({
219
status: RemoteCodeSearchIndexStatus.NotIndexable,
220
});
221
}
222
223
const indexedCommit = body.indexedBranches.at(0)?.lastIndexedChangeId;
224
225
return Result.ok<RemoteCodeSearchIndexState>({
226
indexedCommit,
227
status: RemoteCodeSearchIndexStatus.Ready,
228
});
229
}
230
231
public async triggerIndexing(
232
auth: { readonly silent: boolean },
233
_triggerReason: 'auto' | 'manual' | 'tool',
234
repoId: AdoRepoId,
235
telemetryInfo: TelemetryCorrelationId,
236
): Promise<Result<true, RemoteCodeSearchError>> {
237
// ADO doesn't support explicit indexing. Just use the status and assume it's always ready
238
const status = await this.getRemoteIndexState(auth, repoId, CancellationToken.None);
239
if (status.isOk()) {
240
return Result.ok(true);
241
}
242
243
return status;
244
}
245
246
async searchRepo(
247
auth: { readonly silent: boolean },
248
repo: AdoCodeSearchRepoInfo,
249
searchQuery: string,
250
maxResults: number,
251
options: CodeSearchOptions,
252
telemetryInfo: TelemetryCorrelationId,
253
token: CancellationToken
254
): Promise<SemanticCodeSearchResult> {
255
const totalSw = new StopWatch();
256
257
const authToken = await this.getAdoAuthToken(auth.silent);
258
if (!authToken) {
259
this._logService.error(`AdoCodeSearchService::searchRepo(${repo.adoRepoId}). Failed to search repo. No valid ADO auth token.`);
260
throw new Error('No valid auth token');
261
}
262
263
let endpoint = this._configurationService.getConfig(ConfigKey.Advanced.WorkspacePrototypeAdoCodeSearchEndpointOverride);
264
if (!endpoint) {
265
endpoint = this.getAdoAlmSearchUrl(repo.adoRepoId);
266
}
267
const additionalHeaders = {
268
Accept: 'application/json',
269
Authorization: `Basic ${authToken}`,
270
'Content-Type': 'application/json',
271
...getGithubMetadataHeaders(new CallTracker('AdoCodeSearchService::searchRepo'), this._envService)
272
};
273
274
const requestSw = new StopWatch();
275
const response = await raceCancellationError(
276
this._instantiationService.invokeFunction(postRequest, {
277
endpointOrUrl: endpoint,
278
secretKey: authToken,
279
intent: 'copilot-panel',
280
requestId: '',
281
body: {
282
// TODO: Unclear what's ADO's actual limit is
283
prompt: searchQuery.slice(0, 10000),
284
scoping_query: `repo:${repo.adoRepoId.project}/${repo.adoRepoId.repo}`,
285
limit: maxResults,
286
} satisfies {
287
prompt: string;
288
scoping_query: string;
289
limit: number;
290
},
291
additionalHeaders,
292
cancelToken: token,
293
}),
294
token);
295
296
const requestExecTime = requestSw.elapsed();
297
298
if (!response.ok) {
299
/* __GDPR__
300
"adoCodeSearch.searchRepo.error" : {
301
"owner": "mjbvz",
302
"comment": "Information about failed code ado searches",
303
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
304
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
305
"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" },
306
"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The total time for the search call" },
307
"requestExecTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The request execution time" }
308
}
309
*/
310
this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.searchRepo.error', {
311
workspaceSearchSource: telemetryInfo.callTracker.toString(),
312
workspaceSearchCorrelationId: telemetryInfo.correlationId,
313
}, {
314
statusCode: response.status,
315
execTime: totalSw.elapsed(),
316
requestExecTime: requestExecTime,
317
});
318
319
this._logService.trace(`AdoCodeSearchService::searchRepo: Failed. Status code: ${response.status}`);
320
321
throw new Error(`Ado code search semantic search failed with status: ${response.status}`);
322
}
323
324
const body: ResponseShape = await raceCancellationError(response.json(), token);
325
if (!Array.isArray(body.results)) {
326
throw new Error(`Code search semantic search unexpected response json shape`);
327
}
328
const rawResultCount = body.results.length;
329
330
const returnedEmbeddingsType = body.embedding_model ? new EmbeddingType(body.embedding_model) : adoCustomEmbeddingScoreType;
331
332
const outChunks: FileChunkAndScore[] = [];
333
let outOfSync = false;
334
await Promise.all(body.results.map(async (result: SemanticSearchResult): Promise<FileChunkAndScore | undefined> => {
335
let fileUri: URI;
336
if (repo.localRepoRoot) {
337
fileUri = URI.joinPath(repo.localRepoRoot, result.location.path.replace('%repo%/', ''));
338
if (await this._ignoreService.isCopilotIgnored(fileUri)) {
339
return;
340
}
341
} else {
342
// Non-local repo, make up a URI
343
fileUri = URI.from({
344
scheme: 'githubRepoResult',
345
path: '/' + result.location.path
346
});
347
}
348
349
if (!shouldInclude(fileUri, options.globPatterns)) {
350
return;
351
}
352
353
outOfSync ||= !!repo.indexedCommit && result.location.commit_sha !== repo.indexedCommit;
354
355
outChunks.push({
356
chunk: {
357
file: fileUri,
358
text: stripChunkTextMetadata(result.chunk.text),
359
rawText: undefined,
360
range: new Range(result.chunk.line_range.start, 0, result.chunk.line_range.end, 0),
361
isFullFile: false, // TODO: not provided
362
},
363
distance: {
364
embeddingType: returnedEmbeddingsType,
365
value: result.distance,
366
}
367
});
368
}));
369
370
/* __GDPR__
371
"adoCodeSearch.searchRepo.success" : {
372
"owner": "mjbvz",
373
"comment": "Information about successful ado code search searches",
374
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
375
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
376
"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned chunks from the search after filtering" },
377
"rawResultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Original number of returned chunks from the search before filtering" },
378
"resultOutOfSync": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Tracks if the commit we think code search has indexed matches the commit code search returns results from" },
379
"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The total time for the search call" },
380
"requestExecTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The request execution time" }
381
}
382
*/
383
this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.searchRepo.success', {
384
workspaceSearchSource: telemetryInfo.callTracker.toString(),
385
workspaceSearchCorrelationId: telemetryInfo.correlationId,
386
}, {
387
resultCount: body.results.length,
388
rawResultCount,
389
resultOutOfSync: outOfSync ? 1 : 0,
390
execTime: totalSw.elapsed(),
391
requestExecTime: requestExecTime,
392
});
393
394
this._logService.trace(`AdoCodeSearchService::searchRepo: Returning ${outChunks.length} chunks. Raw result count: ${rawResultCount}`);
395
return { chunks: outChunks, outOfSync };
396
}
397
398
private getAdoAuthToken(silent: boolean): Promise<string | undefined> {
399
return this._authenticationService.getAdoAccessTokenBase64({ silent });
400
}
401
}
402
403