Path: blob/main/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts
13401 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4import { shouldInclude } from '../../../util/common/glob';5import { Result } from '../../../util/common/result';6import { CallTracker, TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';7import { raceCancellationError } from '../../../util/vs/base/common/async';8import { CancellationToken } from '../../../util/vs/base/common/cancellation';9import { Emitter, Event } from '../../../util/vs/base/common/event';10import { Disposable } from '../../../util/vs/base/common/lifecycle';11import { StopWatch } from '../../../util/vs/base/common/stopwatch';12import { URI } from '../../../util/vs/base/common/uri';13import { Range } from '../../../util/vs/editor/common/core/range';14import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';15import { IAuthenticationService } from '../../authentication/common/authentication';16import { FileChunkAndScore } from '../../chunking/common/chunk';17import { stripChunkTextMetadata } from '../../chunking/common/chunkingStringUtils';18import { ConfigKey, IConfigurationService } from '../../configuration/common/configurationService';19import { EmbeddingType } from '../../embeddings/common/embeddingsComputer';20import { IEnvService } from '../../env/common/envService';21import { AdoRepoId } from '../../git/common/gitService';22import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService';23import { IIgnoreService } from '../../ignore/common/ignoreService';24import { measureExecTime } from '../../log/common/logExecTime';25import { ILogService } from '../../log/common/logService';26import { getRequest, postRequest } from '../../networking/common/networking';27import { ITelemetryService } from '../../telemetry/common/telemetry';28import { CodeSearchOptions, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';293031interface ResponseShape {32readonly results: readonly SemanticSearchResult[];33readonly embedding_model: string;34}3536type SemanticSearchResult = {37chunk: {38hash: string;39text: string;40// Byte offset range of the chunk41range: { start: number; end: number };42line_range: { start: number; end: number };43embedding?: { embedding: number[] };44};45distance: number;46location: {47path: string; // file path48commit_sha: string;49repo: {50nwo: string;51url: string;52};53};54};555657export interface AdoCodeSearchRepoInfo {58readonly adoRepoId: AdoRepoId;59readonly localRepoRoot: URI | undefined;60readonly indexedCommit: string | undefined;61}6263export const IAdoCodeSearchService = createDecorator('IAdoCodeSearchService');6465export interface IAdoCodeSearchService {66readonly _serviceBrand: undefined;6768readonly onDidChangeIndexState: Event<void>;6970/**71* Gets the state of the remote index for a given repo.72*/73getRemoteIndexState(74auth: { readonly silent: boolean },75repoId: AdoRepoId,76token: CancellationToken,77): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>>;7879/**80* Requests that a given repo be indexed.81*/82triggerIndexing(83auth: { readonly silent: boolean },84triggerReason: 'auto' | 'manual' | 'tool',85repoId: AdoRepoId,86telemetryInfo: TelemetryCorrelationId,87): Promise<Result<true, RemoteCodeSearchError>>;8889/**90* Semantic searches a given repo for relevant code snippets91*92* The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}.93*/94searchRepo(95auth: { readonly silent: boolean },96repo: AdoCodeSearchRepoInfo,97query: string,98maxResults: number,99options: CodeSearchOptions,100telemetryInfo: TelemetryCorrelationId,101token: CancellationToken,102): Promise<SemanticCodeSearchResult>;103}104105/**106* Ado currently uses their own scoring system for embeddings.107*/108const adoCustomEmbeddingScoreType = new EmbeddingType('adoCustomEmbeddingScore');109110export class AdoCodeSearchService extends Disposable implements IAdoCodeSearchService {111112declare readonly _serviceBrand: undefined;113114private readonly _onDidChangeIndexState = this._register(new Emitter<void>());115public readonly onDidChangeIndexState = this._onDidChangeIndexState.event;116117constructor(118@IAuthenticationService private readonly _authenticationService: IAuthenticationService,119@IConfigurationService private readonly _configurationService: IConfigurationService,120@IEnvService private readonly _envService: IEnvService,121@ILogService private readonly _logService: ILogService,122@IIgnoreService private readonly _ignoreService: IIgnoreService,123@ITelemetryService private readonly _telemetryService: ITelemetryService,124@IInstantiationService private readonly _instantiationService: IInstantiationService,125) {126super();127}128129private getAdoAlmStatusUrl(repoId: AdoRepoId): string {130return `https://almsearch.dev.azure.com/${repoId.org}/${repoId.project}/_apis/search/semanticsearchstatus/${repoId.repo}?api-version=7.1-preview`;131}132133private getAdoAlmSearchUrl(repo: AdoRepoId): string {134return `https://almsearch.dev.azure.com/${repo.org}/${repo.project}/_apis/search/embeddings?api-version=7.1-preview`;135}136137async getRemoteIndexState(auth: { readonly silent: boolean }, repoId: AdoRepoId, token: CancellationToken): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>> {138return measureExecTime(() => this.getRemoteIndexStateImpl(auth, repoId, token), (execTime, status, result) => {139/* __GDPR__140"adoCodeSearch.getRemoteIndexState" : {141"owner": "mjbvz",142"comment": "Information about failed remote index state requests",143"status": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "If the call succeeded or failed" },144"ok": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Details on successful calls" },145"err": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Details on failed calls" },146"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Time in milliseconds that the call took" }147}148*/149this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.getRemoteIndexState', {150status,151ok: result?.isOk() ? result.val.status : undefined,152error: result?.isError() ? result.err.type : undefined,153}, {154execTime155});156});157}158159private async getRemoteIndexStateImpl(auth: { readonly silent: boolean }, repoId: AdoRepoId, token: CancellationToken): Promise<Result<RemoteCodeSearchIndexState, RemoteCodeSearchError>> {160const authToken = await this.getAdoAuthToken(auth.silent);161if (!authToken) {162this._logService.error(`AdoCodeSearchService::getRemoteIndexState(${repoId}). Failed to fetch indexing status. No valid ADO auth token.`);163return Result.error<RemoteCodeSearchError>({ type: 'not-authorized' });164}165166const endpoint = this.getAdoAlmStatusUrl(repoId);167168const additionalHeaders = {169Accept: 'application/json',170Authorization: `Basic ${authToken}`,171'Content-Type': 'application/json',172...getGithubMetadataHeaders(new CallTracker('AdoCodeSearchService::getRemoteIndexState'), this._envService)173};174175const result = await raceCancellationError(176this._instantiationService.invokeFunction(getRequest, {177endpointOrUrl: endpoint,178secretKey: authToken,179intent: 'copilot-panel',180requestId: '',181additionalHeaders,182cancelToken: token,183}),184token);185186if (!result.ok) {187/* __GDPR__188"adoCodeSearch.getRemoteIndexState.requestError" : {189"owner": "mjbvz",190"comment": "Information about failed remote index state requests",191"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" }192}193*/194this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.getRemoteIndexState.requestError', {}, {195statusCode: result.status,196});197198if (result.status === 401 || result.status === 403) {199return Result.error<RemoteCodeSearchError>({ type: 'not-authorized' });200}201202return Result.error<RemoteCodeSearchError>({ type: 'generic-error', error: new Error(`ADO code search index status request failed with status: ${result.status}`) });203}204type AdoIndexStatusResponse = {205semanticSearchEnabled: boolean;206id: string;207name: string;208indexedBranches: {209name: string;210lastIndexedChangeId: string;211lastProcessedTime: string;212}[];213};214215const body: AdoIndexStatusResponse = await result.json();216if (!body.semanticSearchEnabled) {217return Result.ok<RemoteCodeSearchIndexState>({218status: RemoteCodeSearchIndexStatus.NotIndexable,219});220}221222const indexedCommit = body.indexedBranches.at(0)?.lastIndexedChangeId;223224return Result.ok<RemoteCodeSearchIndexState>({225indexedCommit,226status: RemoteCodeSearchIndexStatus.Ready,227});228}229230public async triggerIndexing(231auth: { readonly silent: boolean },232_triggerReason: 'auto' | 'manual' | 'tool',233repoId: AdoRepoId,234telemetryInfo: TelemetryCorrelationId,235): Promise<Result<true, RemoteCodeSearchError>> {236// ADO doesn't support explicit indexing. Just use the status and assume it's always ready237const status = await this.getRemoteIndexState(auth, repoId, CancellationToken.None);238if (status.isOk()) {239return Result.ok(true);240}241242return status;243}244245async searchRepo(246auth: { readonly silent: boolean },247repo: AdoCodeSearchRepoInfo,248searchQuery: string,249maxResults: number,250options: CodeSearchOptions,251telemetryInfo: TelemetryCorrelationId,252token: CancellationToken253): Promise<SemanticCodeSearchResult> {254const totalSw = new StopWatch();255256const authToken = await this.getAdoAuthToken(auth.silent);257if (!authToken) {258this._logService.error(`AdoCodeSearchService::searchRepo(${repo.adoRepoId}). Failed to search repo. No valid ADO auth token.`);259throw new Error('No valid auth token');260}261262let endpoint = this._configurationService.getConfig(ConfigKey.Advanced.WorkspacePrototypeAdoCodeSearchEndpointOverride);263if (!endpoint) {264endpoint = this.getAdoAlmSearchUrl(repo.adoRepoId);265}266const additionalHeaders = {267Accept: 'application/json',268Authorization: `Basic ${authToken}`,269'Content-Type': 'application/json',270...getGithubMetadataHeaders(new CallTracker('AdoCodeSearchService::searchRepo'), this._envService)271};272273const requestSw = new StopWatch();274const response = await raceCancellationError(275this._instantiationService.invokeFunction(postRequest, {276endpointOrUrl: endpoint,277secretKey: authToken,278intent: 'copilot-panel',279requestId: '',280body: {281// TODO: Unclear what's ADO's actual limit is282prompt: searchQuery.slice(0, 10000),283scoping_query: `repo:${repo.adoRepoId.project}/${repo.adoRepoId.repo}`,284limit: maxResults,285} satisfies {286prompt: string;287scoping_query: string;288limit: number;289},290additionalHeaders,291cancelToken: token,292}),293token);294295const requestExecTime = requestSw.elapsed();296297if (!response.ok) {298/* __GDPR__299"adoCodeSearch.searchRepo.error" : {300"owner": "mjbvz",301"comment": "Information about failed code ado searches",302"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },303"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },304"statusCode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The response status code" },305"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The total time for the search call" },306"requestExecTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The request execution time" }307}308*/309this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.searchRepo.error', {310workspaceSearchSource: telemetryInfo.callTracker.toString(),311workspaceSearchCorrelationId: telemetryInfo.correlationId,312}, {313statusCode: response.status,314execTime: totalSw.elapsed(),315requestExecTime: requestExecTime,316});317318this._logService.trace(`AdoCodeSearchService::searchRepo: Failed. Status code: ${response.status}`);319320throw new Error(`Ado code search semantic search failed with status: ${response.status}`);321}322323const body: ResponseShape = await raceCancellationError(response.json(), token);324if (!Array.isArray(body.results)) {325throw new Error(`Code search semantic search unexpected response json shape`);326}327const rawResultCount = body.results.length;328329const returnedEmbeddingsType = body.embedding_model ? new EmbeddingType(body.embedding_model) : adoCustomEmbeddingScoreType;330331const outChunks: FileChunkAndScore[] = [];332let outOfSync = false;333await Promise.all(body.results.map(async (result: SemanticSearchResult): Promise<FileChunkAndScore | undefined> => {334let fileUri: URI;335if (repo.localRepoRoot) {336fileUri = URI.joinPath(repo.localRepoRoot, result.location.path.replace('%repo%/', ''));337if (await this._ignoreService.isCopilotIgnored(fileUri)) {338return;339}340} else {341// Non-local repo, make up a URI342fileUri = URI.from({343scheme: 'githubRepoResult',344path: '/' + result.location.path345});346}347348if (!shouldInclude(fileUri, options.globPatterns)) {349return;350}351352outOfSync ||= !!repo.indexedCommit && result.location.commit_sha !== repo.indexedCommit;353354outChunks.push({355chunk: {356file: fileUri,357text: stripChunkTextMetadata(result.chunk.text),358rawText: undefined,359range: new Range(result.chunk.line_range.start, 0, result.chunk.line_range.end, 0),360isFullFile: false, // TODO: not provided361},362distance: {363embeddingType: returnedEmbeddingsType,364value: result.distance,365}366});367}));368369/* __GDPR__370"adoCodeSearch.searchRepo.success" : {371"owner": "mjbvz",372"comment": "Information about successful ado code search searches",373"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },374"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },375"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned chunks from the search after filtering" },376"rawResultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Original number of returned chunks from the search before filtering" },377"resultOutOfSync": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Tracks if the commit we think code search has indexed matches the commit code search returns results from" },378"execTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The total time for the search call" },379"requestExecTime": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The request execution time" }380}381*/382this._telemetryService.sendMSFTTelemetryEvent('adoCodeSearch.searchRepo.success', {383workspaceSearchSource: telemetryInfo.callTracker.toString(),384workspaceSearchCorrelationId: telemetryInfo.correlationId,385}, {386resultCount: body.results.length,387rawResultCount,388resultOutOfSync: outOfSync ? 1 : 0,389execTime: totalSw.elapsed(),390requestExecTime: requestExecTime,391});392393this._logService.trace(`AdoCodeSearchService::searchRepo: Returning ${outChunks.length} chunks. Raw result count: ${rawResultCount}`);394return { chunks: outChunks, outOfSync };395}396397private getAdoAuthToken(silent: boolean): Promise<string | undefined> {398return this._authenticationService.getAdoAccessTokenBase64({ silent });399}400}401402403