Path: blob/main/extensions/copilot/src/extension/prompt/node/repoInfoTelemetry.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { ICopilotTokenStore } from '../../../platform/authentication/common/copilotTokenStore';6import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';7import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';8import { IGitDiffService } from '../../../platform/git/common/gitDiffService';9import { IGitExtensionService } from '../../../platform/git/common/gitExtensionService';10import { getOrderedRepoInfosFromContext, IGitService, normalizeFetchUrl, RepoContext, ResolvedRepoRemoteInfo } from '../../../platform/git/common/gitService';11import { Change, Repository } from '../../../platform/git/vscode/git';12import { ILogService } from '../../../platform/log/common/logService';13import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';14import { extUriBiasedIgnorePathCase } from '../../../util/vs/base/common/resources';15import { IWorkspaceFileIndex } from '../../../platform/workspaceChunkSearch/node/workspaceFileIndex';1617// Create a mapping for the git status enum to put the actual status string in telemetry18// The enum is a const enum and part of the public git extension API, so the order should stay stable19const STATUS_TO_STRING: Record<number, string> = {200: 'INDEX_MODIFIED',211: 'INDEX_ADDED',222: 'INDEX_DELETED',233: 'INDEX_RENAMED',244: 'INDEX_COPIED',255: 'MODIFIED',266: 'DELETED',277: 'UNTRACKED',288: 'IGNORED',299: 'INTENT_TO_ADD',3010: 'INTENT_TO_RENAME',3111: 'TYPE_CHANGED',3212: 'ADDED_BY_US',3313: 'ADDED_BY_THEM',3414: 'DELETED_BY_US',3515: 'DELETED_BY_THEM',3616: 'BOTH_ADDED',3717: 'BOTH_DELETED',3818: 'BOTH_MODIFIED',39};4041// Max telemetry payload size is 1MB, we add shared properties in further code and JSON structure overhead to that42// so check our diff JSON size against 900KB to be conservative with space43const MAX_DIFFS_JSON_SIZE = 900 * 1024;4445// Max changes to avoid degenerate cases like mass renames46const MAX_CHANGES = 100;4748// Max age of the merge base commit in days before we skip the diff49const MAX_MERGE_BASE_AGE_DAYS = 30;5051// Max number of commits between merge base and HEAD before we skip the diff52const MAX_DIFF_COMMITS = 30;5354// EVENT: repoInfo55type RepoInfoTelemetryResult = 'success' | 'filesChanged' | 'diffTooLarge' | 'noChanges' | 'tooManyChanges' | 'mergeBaseTooOld' | 'virtualFileSystem' | 'tooManyCommits';5657type RepoInfoTelemetryProperties = {58remoteUrl: string | undefined;59repoId: string | undefined;60repoType: 'github' | 'ado';61headCommitHash: string | undefined;62headBranchName: string | undefined;63fileRelativePaths: string | undefined;64diffsJSON: string | undefined;65result: RepoInfoTelemetryResult;66};6768type RepoInfoTelemetryMeasurements = {69workspaceFileCount: number;70changedFileCount: number;71diffSizeBytes: number;72};7374type RepoInfoTelemetryData = {75properties: RepoInfoTelemetryProperties;76measurements: RepoInfoTelemetryMeasurements;77};7879type RepoInfoInternalTelemetryProperties = RepoInfoTelemetryProperties & {80location: 'begin' | 'end';81telemetryMessageId: string;82};8384// Only send ending telemetry on states where we capture repo info or no changes currently85function shouldSendEndTelemetry(result: RepoInfoTelemetryResult | undefined): boolean {86return result === 'success' || result === 'noChanges';87}8889/*90* Handles sending telemetry about the current git repository.91* Repo metadata and diffsJSON are sent via sendEnhancedGHTelemetryEvent.92* Full repo info is additionally sent for internal users via sendInternalMSFTTelemetryEvent.93*/94export class RepoInfoTelemetry {95private _beginTelemetrySent = false;96private _beginTelemetryPromise: Promise<RepoInfoTelemetryData | undefined> | undefined;97private _beginTelemetryResult: RepoInfoTelemetryResult | undefined;9899constructor(100private readonly _telemetryMessageId: string,101@ITelemetryService private readonly _telemetryService: ITelemetryService,102@IGitService private readonly _gitService: IGitService,103@IGitDiffService private readonly _gitDiffService: IGitDiffService,104@IGitExtensionService private readonly _gitExtensionService: IGitExtensionService,105@ILogService private readonly _logService: ILogService,106@IFileSystemService private readonly _fileSystemService: IFileSystemService,107@IWorkspaceFileIndex private readonly _workspaceFileIndex: IWorkspaceFileIndex,108@IConfigurationService private readonly _configurationService: IConfigurationService,109@ICopilotTokenStore private readonly _copilotTokenStore: ICopilotTokenStore,110) { }111112/*113* Sends the begin event telemetry, make sure to only send one time, as multiple PanelChatTelemetry instances114* are created per user request.115*/116public async sendBeginTelemetryIfNeeded(): Promise<void> {117if (this._beginTelemetrySent) {118// Already sent or in progress119await this._beginTelemetryPromise;120return;121}122123try {124this._beginTelemetrySent = true;125this._beginTelemetryPromise = this._sendRepoInfoTelemetry('begin');126const gitInfo = await this._beginTelemetryPromise;127this._beginTelemetryResult = gitInfo?.properties.result;128} catch (error) {129this._logService.warn(`Failed to send begin repo info telemetry ${error}`);130}131}132133/*134* Sends the end event telemetry135*/136public async sendEndTelemetry(): Promise<void> {137await this._beginTelemetryPromise;138139// Skip end telemetry if begin wasn't successful140if (!shouldSendEndTelemetry(this._beginTelemetryResult)) {141return;142}143144try {145await this._sendRepoInfoTelemetry('end');146} catch (error) {147this._logService.warn(`Failed to send end repo info telemetry ${error}`);148}149}150151private async _sendRepoInfoTelemetry(location: 'begin' | 'end'): Promise<RepoInfoTelemetryData | undefined> {152if (this._configurationService.getConfig(ConfigKey.TeamInternal.DisableRepoInfoTelemetry)) {153return undefined;154}155156const repoInfo = await this._getRepoInfoTelemetry();157if (!repoInfo) {158return undefined;159}160161const internalProperties: RepoInfoInternalTelemetryProperties = {162...repoInfo.properties,163location,164telemetryMessageId: this._telemetryMessageId165};166167const isInternal = !!this._copilotTokenStore.copilotToken?.isInternal;168if (isInternal) {169const { headBranchName: _, fileRelativePaths: _2, ...msftProperties } = internalProperties;170this._telemetryService.sendInternalMSFTTelemetryEvent('request.repoInfo', msftProperties, repoInfo.measurements);171}172this._telemetryService.sendEnhancedGHTelemetryEvent('request.repoInfo', internalProperties, repoInfo.measurements);173174return repoInfo;175}176177private async _resolveRepoContext(): Promise<{ repoContext: RepoContext; repoInfo: ResolvedRepoRemoteInfo; repository: Repository; upstreamCommit: string; headBranchName: string | undefined } | undefined> {178const repoContext = this._gitService.activeRepository?.get();179if (!repoContext) {180return;181}182183const repoInfo = Array.from(getOrderedRepoInfosFromContext(repoContext))[0];184if (!repoInfo || !repoInfo.fetchUrl) {185return;186}187188const gitAPI = this._gitExtensionService.getExtensionApi();189const repository = gitAPI?.getRepository(repoContext.rootUri);190if (!repository) {191return;192}193194let upstreamCommit = await repository.getMergeBase('HEAD', '@{upstream}');195if (!upstreamCommit) {196const baseBranch = await repository.getBranchBase('HEAD');197if (baseBranch) {198const baseRef = `${baseBranch.remote}/${baseBranch.name}`;199upstreamCommit = await repository.getMergeBase('HEAD', baseRef);200}201}202203if (!upstreamCommit) {204return;205}206207const headBranchName = repository.state.HEAD?.name;208return { repoContext, repoInfo, repository, upstreamCommit, headBranchName };209}210211private async _getRepoInfoTelemetry(): Promise<RepoInfoTelemetryData | undefined> {212const ctx = await this._resolveRepoContext();213if (!ctx) {214return;215}216217const { repoContext, repoInfo, repository, upstreamCommit, headBranchName } = ctx;218const normalizedFetchUrl = normalizeFetchUrl(repoInfo.fetchUrl!);219220const skipDiffResult = (result: RepoInfoTelemetryResult): RepoInfoTelemetryData => ({221properties: {222remoteUrl: normalizedFetchUrl,223repoId: repoInfo.repoId.toString(),224repoType: repoInfo.repoId.type,225headCommitHash: upstreamCommit,226headBranchName,227fileRelativePaths: undefined,228diffsJSON: undefined,229result,230},231measurements: {232workspaceFileCount: 0,233changedFileCount: 0,234diffSizeBytes: 0,235}236});237238// VFS and sparse checkout enlistments are unlikely to have all blobs available locally,239// making diff operations expensive or impossible. Skip early if either is configured.240// core.virtualfilesystem is a path to a hook script, any non-empty value means VFS is active.241// core.sparsecheckout is a git boolean: true/yes/on/1 are truthy per git-config spec.242// If we can't determine the config, skip to be safe.243try {244const virtualFileSystem = await repository.getConfig('core.virtualfilesystem');245const sparseCheckout = await repository.getConfig('core.sparsecheckout');246const GIT_TRUE_VALUES = new Set(['true', 'yes', 'on', '1']);247if (virtualFileSystem || GIT_TRUE_VALUES.has(sparseCheckout.toLowerCase())) {248return skipDiffResult('virtualFileSystem');249}250} catch {251return skipDiffResult('virtualFileSystem');252}253254// Check if the merge base commit is too old to avoid expensive diff operations255// on very stale branches where rename detection can consume many GB of memory.256// If we can't determine the commit age, treat it as too old to avoid the potentially expensive diff.257try {258const mergeBaseCommit = await repository.getCommit(upstreamCommit);259const ageDays = mergeBaseCommit.commitDate260? (Date.now() - mergeBaseCommit.commitDate.getTime()) / (1000 * 60 * 60 * 24)261: undefined;262263if (ageDays === undefined || ageDays > MAX_MERGE_BASE_AGE_DAYS) {264return skipDiffResult('mergeBaseTooOld');265}266} catch {267return skipDiffResult('mergeBaseTooOld');268}269270// Check if there are too many commits between the merge base and HEAD.271// Extensive renames can make even the check for number of changed files expensive, and we are likely to have272// too big a diff to log anyways273try {274const commitLog = await repository.log({ range: `${upstreamCommit}..HEAD`, maxEntries: MAX_DIFF_COMMITS });275if (commitLog.length >= MAX_DIFF_COMMITS) {276return skipDiffResult('tooManyCommits');277}278} catch {279return skipDiffResult('tooManyCommits');280}281282// Before we calculate our async diffs, sign up for file system change events283// Any changes during the async operations will invalidate our diff data and we send it284// as a failure without a diffs285const watcher = this._fileSystemService.createFileSystemWatcher('**/*');286let filesChanged = false;287const createDisposable = watcher.onDidCreate(() => filesChanged = true);288const changeDisposable = watcher.onDidChange(() => filesChanged = true);289const deleteDisposable = watcher.onDidDelete(() => filesChanged = true);290291try {292const baseProperties: Omit<RepoInfoTelemetryProperties, 'diffsJSON' | 'fileRelativePaths' | 'result'> = {293remoteUrl: normalizedFetchUrl,294repoId: repoInfo.repoId.toString(),295repoType: repoInfo.repoId.type,296headCommitHash: upstreamCommit,297headBranchName,298};299300// Workspace file index will be used to get a rough count of files in the repository301// We need to call initialize here to have the count, but after first initialize call302// further calls are no-ops so only a hit first time.303await this._workspaceFileIndex.initialize();304const measurements: RepoInfoTelemetryMeasurements = {305workspaceFileCount: this._workspaceFileIndex.fileCount,306changedFileCount: 0, // Will be updated307diffSizeBytes: 0, // Will be updated308};309310// Combine our diff against the upstream commit with untracked changes, and working tree changes311// A change like a new untracked file could end up in either the untracked or working tree changes and won't be in the diffWith.312const diffChanges = await this._gitService.diffWith(repoContext.rootUri, upstreamCommit) ?? [];313314const changeMap = new Map<string, Change>();315316// Prority to the diffWith changes, then working tree changes, then untracked changes.317for (const change of diffChanges) {318changeMap.set(change.uri.toString(), change);319}320for (const change of repository.state.workingTreeChanges) {321if (!changeMap.has(change.uri.toString())) {322changeMap.set(change.uri.toString(), change);323}324}325for (const change of repository.state.untrackedChanges) {326if (!changeMap.has(change.uri.toString())) {327changeMap.set(change.uri.toString(), change);328}329}330331const changes = Array.from(changeMap.values());332333if (!changes || changes.length === 0) {334return {335properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'noChanges' },336measurements337};338}339measurements.changedFileCount = changes.length;340341// Check if there are too many changes (e.g., mass renames)342if (changes.length > MAX_CHANGES) {343return {344properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'tooManyChanges' },345measurements346};347}348349// Check if files changed during the git diff operation350if (filesChanged) {351return {352properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'filesChanged' },353measurements354};355}356357const diffs = (await this._gitDiffService.getWorkingTreeDiffsFromRef(repoContext.rootUri, changes, upstreamCommit)).map(diff => {358return {359uri: diff.uri.toString(),360originalUri: diff.originalUri.toString(),361renameUri: diff.renameUri?.toString(),362status: STATUS_TO_STRING[diff.status] ?? `UNKNOWN_${diff.status}`,363diff: diff.diff,364};365});366367// Check if files changed during the individual file diffs368if (filesChanged) {369return {370properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'filesChanged' },371measurements372};373}374375const rootUri = repoContext.rootUri;376const fileRelativePaths = JSON.stringify(377changes378.filter(c => extUriBiasedIgnorePathCase.isEqualOrParent(c.uri, rootUri))379.map(c => extUriBiasedIgnorePathCase.relativePath(rootUri, c.uri))380.filter((p): p is string => p !== undefined)381);382383const diffsJSON = diffs.length > 0 ? JSON.stringify(diffs) : undefined;384385// Check against our size limit to make sure our telemetry fits in the 1MB limit386if (diffsJSON) {387const diffSizeBytes = Buffer.byteLength(diffsJSON, 'utf8');388measurements.diffSizeBytes = diffSizeBytes;389390if (diffSizeBytes > MAX_DIFFS_JSON_SIZE) {391return {392properties: { ...baseProperties, fileRelativePaths, diffsJSON: undefined, result: 'diffTooLarge' },393measurements394};395}396}397398return {399properties: { ...baseProperties, fileRelativePaths, diffsJSON, result: 'success' },400measurements401};402} finally {403createDisposable.dispose();404changeDisposable.dispose();405deleteDisposable.dispose();406watcher.dispose();407}408}409}410411412