Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/node/repoInfoTelemetry.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { ICopilotTokenStore } from '../../../platform/authentication/common/copilotTokenStore';
7
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
8
import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';
9
import { IGitDiffService } from '../../../platform/git/common/gitDiffService';
10
import { IGitExtensionService } from '../../../platform/git/common/gitExtensionService';
11
import { getOrderedRepoInfosFromContext, IGitService, normalizeFetchUrl, RepoContext, ResolvedRepoRemoteInfo } from '../../../platform/git/common/gitService';
12
import { Change, Repository } from '../../../platform/git/vscode/git';
13
import { ILogService } from '../../../platform/log/common/logService';
14
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
15
import { extUriBiasedIgnorePathCase } from '../../../util/vs/base/common/resources';
16
import { IWorkspaceFileIndex } from '../../../platform/workspaceChunkSearch/node/workspaceFileIndex';
17
18
// Create a mapping for the git status enum to put the actual status string in telemetry
19
// The enum is a const enum and part of the public git extension API, so the order should stay stable
20
const STATUS_TO_STRING: Record<number, string> = {
21
0: 'INDEX_MODIFIED',
22
1: 'INDEX_ADDED',
23
2: 'INDEX_DELETED',
24
3: 'INDEX_RENAMED',
25
4: 'INDEX_COPIED',
26
5: 'MODIFIED',
27
6: 'DELETED',
28
7: 'UNTRACKED',
29
8: 'IGNORED',
30
9: 'INTENT_TO_ADD',
31
10: 'INTENT_TO_RENAME',
32
11: 'TYPE_CHANGED',
33
12: 'ADDED_BY_US',
34
13: 'ADDED_BY_THEM',
35
14: 'DELETED_BY_US',
36
15: 'DELETED_BY_THEM',
37
16: 'BOTH_ADDED',
38
17: 'BOTH_DELETED',
39
18: 'BOTH_MODIFIED',
40
};
41
42
// Max telemetry payload size is 1MB, we add shared properties in further code and JSON structure overhead to that
43
// so check our diff JSON size against 900KB to be conservative with space
44
const MAX_DIFFS_JSON_SIZE = 900 * 1024;
45
46
// Max changes to avoid degenerate cases like mass renames
47
const MAX_CHANGES = 100;
48
49
// Max age of the merge base commit in days before we skip the diff
50
const MAX_MERGE_BASE_AGE_DAYS = 30;
51
52
// Max number of commits between merge base and HEAD before we skip the diff
53
const MAX_DIFF_COMMITS = 30;
54
55
// EVENT: repoInfo
56
type RepoInfoTelemetryResult = 'success' | 'filesChanged' | 'diffTooLarge' | 'noChanges' | 'tooManyChanges' | 'mergeBaseTooOld' | 'virtualFileSystem' | 'tooManyCommits';
57
58
type RepoInfoTelemetryProperties = {
59
remoteUrl: string | undefined;
60
repoId: string | undefined;
61
repoType: 'github' | 'ado';
62
headCommitHash: string | undefined;
63
headBranchName: string | undefined;
64
fileRelativePaths: string | undefined;
65
diffsJSON: string | undefined;
66
result: RepoInfoTelemetryResult;
67
};
68
69
type RepoInfoTelemetryMeasurements = {
70
workspaceFileCount: number;
71
changedFileCount: number;
72
diffSizeBytes: number;
73
};
74
75
type RepoInfoTelemetryData = {
76
properties: RepoInfoTelemetryProperties;
77
measurements: RepoInfoTelemetryMeasurements;
78
};
79
80
type RepoInfoInternalTelemetryProperties = RepoInfoTelemetryProperties & {
81
location: 'begin' | 'end';
82
telemetryMessageId: string;
83
};
84
85
// Only send ending telemetry on states where we capture repo info or no changes currently
86
function shouldSendEndTelemetry(result: RepoInfoTelemetryResult | undefined): boolean {
87
return result === 'success' || result === 'noChanges';
88
}
89
90
/*
91
* Handles sending telemetry about the current git repository.
92
* Repo metadata and diffsJSON are sent via sendEnhancedGHTelemetryEvent.
93
* Full repo info is additionally sent for internal users via sendInternalMSFTTelemetryEvent.
94
*/
95
export class RepoInfoTelemetry {
96
private _beginTelemetrySent = false;
97
private _beginTelemetryPromise: Promise<RepoInfoTelemetryData | undefined> | undefined;
98
private _beginTelemetryResult: RepoInfoTelemetryResult | undefined;
99
100
constructor(
101
private readonly _telemetryMessageId: string,
102
@ITelemetryService private readonly _telemetryService: ITelemetryService,
103
@IGitService private readonly _gitService: IGitService,
104
@IGitDiffService private readonly _gitDiffService: IGitDiffService,
105
@IGitExtensionService private readonly _gitExtensionService: IGitExtensionService,
106
@ILogService private readonly _logService: ILogService,
107
@IFileSystemService private readonly _fileSystemService: IFileSystemService,
108
@IWorkspaceFileIndex private readonly _workspaceFileIndex: IWorkspaceFileIndex,
109
@IConfigurationService private readonly _configurationService: IConfigurationService,
110
@ICopilotTokenStore private readonly _copilotTokenStore: ICopilotTokenStore,
111
) { }
112
113
/*
114
* Sends the begin event telemetry, make sure to only send one time, as multiple PanelChatTelemetry instances
115
* are created per user request.
116
*/
117
public async sendBeginTelemetryIfNeeded(): Promise<void> {
118
if (this._beginTelemetrySent) {
119
// Already sent or in progress
120
await this._beginTelemetryPromise;
121
return;
122
}
123
124
try {
125
this._beginTelemetrySent = true;
126
this._beginTelemetryPromise = this._sendRepoInfoTelemetry('begin');
127
const gitInfo = await this._beginTelemetryPromise;
128
this._beginTelemetryResult = gitInfo?.properties.result;
129
} catch (error) {
130
this._logService.warn(`Failed to send begin repo info telemetry ${error}`);
131
}
132
}
133
134
/*
135
* Sends the end event telemetry
136
*/
137
public async sendEndTelemetry(): Promise<void> {
138
await this._beginTelemetryPromise;
139
140
// Skip end telemetry if begin wasn't successful
141
if (!shouldSendEndTelemetry(this._beginTelemetryResult)) {
142
return;
143
}
144
145
try {
146
await this._sendRepoInfoTelemetry('end');
147
} catch (error) {
148
this._logService.warn(`Failed to send end repo info telemetry ${error}`);
149
}
150
}
151
152
private async _sendRepoInfoTelemetry(location: 'begin' | 'end'): Promise<RepoInfoTelemetryData | undefined> {
153
if (this._configurationService.getConfig(ConfigKey.TeamInternal.DisableRepoInfoTelemetry)) {
154
return undefined;
155
}
156
157
const repoInfo = await this._getRepoInfoTelemetry();
158
if (!repoInfo) {
159
return undefined;
160
}
161
162
const internalProperties: RepoInfoInternalTelemetryProperties = {
163
...repoInfo.properties,
164
location,
165
telemetryMessageId: this._telemetryMessageId
166
};
167
168
const isInternal = !!this._copilotTokenStore.copilotToken?.isInternal;
169
if (isInternal) {
170
const { headBranchName: _, fileRelativePaths: _2, ...msftProperties } = internalProperties;
171
this._telemetryService.sendInternalMSFTTelemetryEvent('request.repoInfo', msftProperties, repoInfo.measurements);
172
}
173
this._telemetryService.sendEnhancedGHTelemetryEvent('request.repoInfo', internalProperties, repoInfo.measurements);
174
175
return repoInfo;
176
}
177
178
private async _resolveRepoContext(): Promise<{ repoContext: RepoContext; repoInfo: ResolvedRepoRemoteInfo; repository: Repository; upstreamCommit: string; headBranchName: string | undefined } | undefined> {
179
const repoContext = this._gitService.activeRepository?.get();
180
if (!repoContext) {
181
return;
182
}
183
184
const repoInfo = Array.from(getOrderedRepoInfosFromContext(repoContext))[0];
185
if (!repoInfo || !repoInfo.fetchUrl) {
186
return;
187
}
188
189
const gitAPI = this._gitExtensionService.getExtensionApi();
190
const repository = gitAPI?.getRepository(repoContext.rootUri);
191
if (!repository) {
192
return;
193
}
194
195
let upstreamCommit = await repository.getMergeBase('HEAD', '@{upstream}');
196
if (!upstreamCommit) {
197
const baseBranch = await repository.getBranchBase('HEAD');
198
if (baseBranch) {
199
const baseRef = `${baseBranch.remote}/${baseBranch.name}`;
200
upstreamCommit = await repository.getMergeBase('HEAD', baseRef);
201
}
202
}
203
204
if (!upstreamCommit) {
205
return;
206
}
207
208
const headBranchName = repository.state.HEAD?.name;
209
return { repoContext, repoInfo, repository, upstreamCommit, headBranchName };
210
}
211
212
private async _getRepoInfoTelemetry(): Promise<RepoInfoTelemetryData | undefined> {
213
const ctx = await this._resolveRepoContext();
214
if (!ctx) {
215
return;
216
}
217
218
const { repoContext, repoInfo, repository, upstreamCommit, headBranchName } = ctx;
219
const normalizedFetchUrl = normalizeFetchUrl(repoInfo.fetchUrl!);
220
221
const skipDiffResult = (result: RepoInfoTelemetryResult): RepoInfoTelemetryData => ({
222
properties: {
223
remoteUrl: normalizedFetchUrl,
224
repoId: repoInfo.repoId.toString(),
225
repoType: repoInfo.repoId.type,
226
headCommitHash: upstreamCommit,
227
headBranchName,
228
fileRelativePaths: undefined,
229
diffsJSON: undefined,
230
result,
231
},
232
measurements: {
233
workspaceFileCount: 0,
234
changedFileCount: 0,
235
diffSizeBytes: 0,
236
}
237
});
238
239
// VFS and sparse checkout enlistments are unlikely to have all blobs available locally,
240
// making diff operations expensive or impossible. Skip early if either is configured.
241
// core.virtualfilesystem is a path to a hook script, any non-empty value means VFS is active.
242
// core.sparsecheckout is a git boolean: true/yes/on/1 are truthy per git-config spec.
243
// If we can't determine the config, skip to be safe.
244
try {
245
const virtualFileSystem = await repository.getConfig('core.virtualfilesystem');
246
const sparseCheckout = await repository.getConfig('core.sparsecheckout');
247
const GIT_TRUE_VALUES = new Set(['true', 'yes', 'on', '1']);
248
if (virtualFileSystem || GIT_TRUE_VALUES.has(sparseCheckout.toLowerCase())) {
249
return skipDiffResult('virtualFileSystem');
250
}
251
} catch {
252
return skipDiffResult('virtualFileSystem');
253
}
254
255
// Check if the merge base commit is too old to avoid expensive diff operations
256
// on very stale branches where rename detection can consume many GB of memory.
257
// If we can't determine the commit age, treat it as too old to avoid the potentially expensive diff.
258
try {
259
const mergeBaseCommit = await repository.getCommit(upstreamCommit);
260
const ageDays = mergeBaseCommit.commitDate
261
? (Date.now() - mergeBaseCommit.commitDate.getTime()) / (1000 * 60 * 60 * 24)
262
: undefined;
263
264
if (ageDays === undefined || ageDays > MAX_MERGE_BASE_AGE_DAYS) {
265
return skipDiffResult('mergeBaseTooOld');
266
}
267
} catch {
268
return skipDiffResult('mergeBaseTooOld');
269
}
270
271
// Check if there are too many commits between the merge base and HEAD.
272
// Extensive renames can make even the check for number of changed files expensive, and we are likely to have
273
// too big a diff to log anyways
274
try {
275
const commitLog = await repository.log({ range: `${upstreamCommit}..HEAD`, maxEntries: MAX_DIFF_COMMITS });
276
if (commitLog.length >= MAX_DIFF_COMMITS) {
277
return skipDiffResult('tooManyCommits');
278
}
279
} catch {
280
return skipDiffResult('tooManyCommits');
281
}
282
283
// Before we calculate our async diffs, sign up for file system change events
284
// Any changes during the async operations will invalidate our diff data and we send it
285
// as a failure without a diffs
286
const watcher = this._fileSystemService.createFileSystemWatcher('**/*');
287
let filesChanged = false;
288
const createDisposable = watcher.onDidCreate(() => filesChanged = true);
289
const changeDisposable = watcher.onDidChange(() => filesChanged = true);
290
const deleteDisposable = watcher.onDidDelete(() => filesChanged = true);
291
292
try {
293
const baseProperties: Omit<RepoInfoTelemetryProperties, 'diffsJSON' | 'fileRelativePaths' | 'result'> = {
294
remoteUrl: normalizedFetchUrl,
295
repoId: repoInfo.repoId.toString(),
296
repoType: repoInfo.repoId.type,
297
headCommitHash: upstreamCommit,
298
headBranchName,
299
};
300
301
// Workspace file index will be used to get a rough count of files in the repository
302
// We need to call initialize here to have the count, but after first initialize call
303
// further calls are no-ops so only a hit first time.
304
await this._workspaceFileIndex.initialize();
305
const measurements: RepoInfoTelemetryMeasurements = {
306
workspaceFileCount: this._workspaceFileIndex.fileCount,
307
changedFileCount: 0, // Will be updated
308
diffSizeBytes: 0, // Will be updated
309
};
310
311
// Combine our diff against the upstream commit with untracked changes, and working tree changes
312
// A change like a new untracked file could end up in either the untracked or working tree changes and won't be in the diffWith.
313
const diffChanges = await this._gitService.diffWith(repoContext.rootUri, upstreamCommit) ?? [];
314
315
const changeMap = new Map<string, Change>();
316
317
// Prority to the diffWith changes, then working tree changes, then untracked changes.
318
for (const change of diffChanges) {
319
changeMap.set(change.uri.toString(), change);
320
}
321
for (const change of repository.state.workingTreeChanges) {
322
if (!changeMap.has(change.uri.toString())) {
323
changeMap.set(change.uri.toString(), change);
324
}
325
}
326
for (const change of repository.state.untrackedChanges) {
327
if (!changeMap.has(change.uri.toString())) {
328
changeMap.set(change.uri.toString(), change);
329
}
330
}
331
332
const changes = Array.from(changeMap.values());
333
334
if (!changes || changes.length === 0) {
335
return {
336
properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'noChanges' },
337
measurements
338
};
339
}
340
measurements.changedFileCount = changes.length;
341
342
// Check if there are too many changes (e.g., mass renames)
343
if (changes.length > MAX_CHANGES) {
344
return {
345
properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'tooManyChanges' },
346
measurements
347
};
348
}
349
350
// Check if files changed during the git diff operation
351
if (filesChanged) {
352
return {
353
properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'filesChanged' },
354
measurements
355
};
356
}
357
358
const diffs = (await this._gitDiffService.getWorkingTreeDiffsFromRef(repoContext.rootUri, changes, upstreamCommit)).map(diff => {
359
return {
360
uri: diff.uri.toString(),
361
originalUri: diff.originalUri.toString(),
362
renameUri: diff.renameUri?.toString(),
363
status: STATUS_TO_STRING[diff.status] ?? `UNKNOWN_${diff.status}`,
364
diff: diff.diff,
365
};
366
});
367
368
// Check if files changed during the individual file diffs
369
if (filesChanged) {
370
return {
371
properties: { ...baseProperties, fileRelativePaths: undefined, diffsJSON: undefined, result: 'filesChanged' },
372
measurements
373
};
374
}
375
376
const rootUri = repoContext.rootUri;
377
const fileRelativePaths = JSON.stringify(
378
changes
379
.filter(c => extUriBiasedIgnorePathCase.isEqualOrParent(c.uri, rootUri))
380
.map(c => extUriBiasedIgnorePathCase.relativePath(rootUri, c.uri))
381
.filter((p): p is string => p !== undefined)
382
);
383
384
const diffsJSON = diffs.length > 0 ? JSON.stringify(diffs) : undefined;
385
386
// Check against our size limit to make sure our telemetry fits in the 1MB limit
387
if (diffsJSON) {
388
const diffSizeBytes = Buffer.byteLength(diffsJSON, 'utf8');
389
measurements.diffSizeBytes = diffSizeBytes;
390
391
if (diffSizeBytes > MAX_DIFFS_JSON_SIZE) {
392
return {
393
properties: { ...baseProperties, fileRelativePaths, diffsJSON: undefined, result: 'diffTooLarge' },
394
measurements
395
};
396
}
397
}
398
399
return {
400
properties: { ...baseProperties, fileRelativePaths, diffsJSON, result: 'success' },
401
measurements
402
};
403
} finally {
404
createDisposable.dispose();
405
changeDisposable.dispose();
406
deleteDisposable.dispose();
407
watcher.dispose();
408
}
409
}
410
}
411
412