Path: blob/main/src/vs/workbench/contrib/chat/browser/chatRepoInfo.ts
5242 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Disposable } from '../../../../base/common/lifecycle.js';6import { relativePath } from '../../../../base/common/resources.js';7import { URI } from '../../../../base/common/uri.js';8import { linesDiffComputers } from '../../../../editor/common/diff/linesDiffComputers.js';9import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';10import { Extensions as ConfigurationExtensions, IConfigurationRegistry } from '../../../../platform/configuration/common/configurationRegistry.js';11import { IFileService } from '../../../../platform/files/common/files.js';12import { ILogService } from '../../../../platform/log/common/log.js';13import { Registry } from '../../../../platform/registry/common/platform.js';14import { IWorkbenchContribution } from '../../../common/contributions.js';15import { IChatEntitlementService } from '../../../services/chat/common/chatEntitlementService.js';16import { ISCMService, ISCMResource } from '../../scm/common/scm.js';17import { IChatService } from '../common/chatService/chatService.js';18import { ChatConfiguration } from '../common/constants.js';19import { IChatModel, IExportableRepoData, IExportableRepoDiff } from '../common/model/chatModel.js';20import * as nls from '../../../../nls.js';2122const MAX_CHANGES = 100;23const MAX_DIFFS_SIZE_BYTES = 900 * 1024;24const MAX_SESSIONS_WITH_FULL_DIFFS = 5;25/**26* Regex to match `url = <remote-url>` lines in git config.27*/28const RemoteMatcher = /^\s*url\s*=\s*(.+\S)\s*$/mg;2930/**31* Extracts raw remote URLs from git config content.32*/33function getRawRemotes(text: string): string[] {34const remotes: string[] = [];35let match: RegExpExecArray | null;36while (match = RemoteMatcher.exec(text)) {37remotes.push(match[1]);38}39return remotes;40}4142/**43* Extracts a hostname from a git remote URL.44*45* Supports:46* - URL-like remotes: https://github.com/..., ssh://[email protected]/..., git://github.com/...47* - SCP-like remotes: [email protected]:owner/repo.git48*/49function getRemoteHost(remoteUrl: string): string | undefined {50try {51// Try standard URL parsing first (works for https://, ssh://, git://)52const url = new URL(remoteUrl);53return url.hostname.toLowerCase();54} catch {55// Fallback for SCP-like syntax: [user@]host:path56const atIndex = remoteUrl.lastIndexOf('@');57const hostAndPath = atIndex !== -1 ? remoteUrl.slice(atIndex + 1) : remoteUrl;58const colonIndex = hostAndPath.indexOf(':');59if (colonIndex !== -1) {60const host = hostAndPath.slice(0, colonIndex);61return host ? host.toLowerCase() : undefined;62}6364// Fallback for hostname/path format without scheme (e.g., devdiv.visualstudio.com/...)65const slashIndex = hostAndPath.indexOf('/');66if (slashIndex !== -1) {67const host = hostAndPath.slice(0, slashIndex);68return host ? host.toLowerCase() : undefined;69}7071return undefined;72}73}7475/**76* Determines the change type based on SCM resource properties.77*/78function determineChangeType(resource: ISCMResource, groupId: string): 'added' | 'modified' | 'deleted' | 'renamed' {79const contextValue = resource.contextValue?.toLowerCase() ?? '';80const groupIdLower = groupId.toLowerCase();8182if (contextValue.includes('untracked') || contextValue.includes('add')) {83return 'added';84}85if (contextValue.includes('delete')) {86return 'deleted';87}88if (contextValue.includes('rename')) {89return 'renamed';90}91if (groupIdLower.includes('untracked')) {92return 'added';93}94if (resource.decorations.strikeThrough) {95return 'deleted';96}97if (!resource.multiDiffEditorOriginalUri) {98return 'added';99}100return 'modified';101}102103/**104* Generates a unified diff string compatible with `git apply`.105*106* Note: This implementation has a known limitation - if the only change between107* files is the presence/absence of a trailing newline (content otherwise identical),108* no diff will be generated because VS Code's diff algorithm treats the lines as equal.109*/110async function generateUnifiedDiff(111fileService: IFileService,112relPath: string,113originalUri: URI | undefined,114modifiedUri: URI,115changeType: 'added' | 'modified' | 'deleted' | 'renamed'116): Promise<string | undefined> {117try {118let originalContent = '';119let modifiedContent = '';120121if (originalUri && changeType !== 'added') {122try {123const originalFile = await fileService.readFile(originalUri);124originalContent = originalFile.value.toString();125} catch {126if (changeType === 'modified') {127return undefined;128}129}130}131132if (changeType !== 'deleted') {133try {134const modifiedFile = await fileService.readFile(modifiedUri);135modifiedContent = modifiedFile.value.toString();136} catch {137return undefined;138}139}140141const originalLines = originalContent.split('\n');142const modifiedLines = modifiedContent.split('\n');143144// Track whether files end with newline for git apply compatibility145// split('\n') on "line1\nline2\n" gives ["line1", "line2", ""]146// split('\n') on "line1\nline2" gives ["line1", "line2"]147const originalEndsWithNewline = originalContent.length > 0 && originalContent.endsWith('\n');148const modifiedEndsWithNewline = modifiedContent.length > 0 && modifiedContent.endsWith('\n');149150// Remove trailing empty element if file ends with newline151if (originalEndsWithNewline && originalLines.length > 0 && originalLines[originalLines.length - 1] === '') {152originalLines.pop();153}154if (modifiedEndsWithNewline && modifiedLines.length > 0 && modifiedLines[modifiedLines.length - 1] === '') {155modifiedLines.pop();156}157158const diffLines: string[] = [];159const aPath = changeType === 'added' ? '/dev/null' : `a/${relPath}`;160const bPath = changeType === 'deleted' ? '/dev/null' : `b/${relPath}`;161162diffLines.push(`--- ${aPath}`);163diffLines.push(`+++ ${bPath}`);164165if (changeType === 'added') {166if (modifiedLines.length > 0) {167diffLines.push(`@@ -0,0 +1,${modifiedLines.length} @@`);168for (const line of modifiedLines) {169diffLines.push(`+${line}`);170}171if (!modifiedEndsWithNewline) {172diffLines.push('\\ No newline at end of file');173}174}175} else if (changeType === 'deleted') {176if (originalLines.length > 0) {177diffLines.push(`@@ -1,${originalLines.length} +0,0 @@`);178for (const line of originalLines) {179diffLines.push(`-${line}`);180}181if (!originalEndsWithNewline) {182diffLines.push('\\ No newline at end of file');183}184}185} else {186const hunks = computeDiffHunks(originalLines, modifiedLines, originalEndsWithNewline, modifiedEndsWithNewline);187for (const hunk of hunks) {188diffLines.push(hunk);189}190}191192return diffLines.join('\n');193} catch {194return undefined;195}196}197198/**199* Computes unified diff hunks using VS Code's diff algorithm.200* Merges adjacent/overlapping hunks to produce a valid patch.201*/202function computeDiffHunks(203originalLines: string[],204modifiedLines: string[],205originalEndsWithNewline: boolean,206modifiedEndsWithNewline: boolean207): string[] {208const contextSize = 3;209const result: string[] = [];210211const diffComputer = linesDiffComputers.getDefault();212const diffResult = diffComputer.computeDiff(originalLines, modifiedLines, {213ignoreTrimWhitespace: false,214maxComputationTimeMs: 1000,215computeMoves: false216});217218if (diffResult.changes.length === 0) {219return result;220}221222// Group changes that should be merged into the same hunk223// Changes are merged if their context regions would overlap224type Change = typeof diffResult.changes[number];225const hunkGroups: Change[][] = [];226let currentGroup: Change[] = [];227228for (const change of diffResult.changes) {229if (currentGroup.length === 0) {230currentGroup.push(change);231} else {232const lastChange = currentGroup[currentGroup.length - 1];233const lastContextEnd = lastChange.original.endLineNumberExclusive - 1 + contextSize;234const currentContextStart = change.original.startLineNumber - contextSize;235236// Merge if context regions overlap or are adjacent237if (currentContextStart <= lastContextEnd + 1) {238currentGroup.push(change);239} else {240hunkGroups.push(currentGroup);241currentGroup = [change];242}243}244}245if (currentGroup.length > 0) {246hunkGroups.push(currentGroup);247}248249// Generate a single hunk for each group250for (const group of hunkGroups) {251const firstChange = group[0];252const lastChange = group[group.length - 1];253254const hunkOrigStart = Math.max(1, firstChange.original.startLineNumber - contextSize);255const hunkOrigEnd = Math.min(originalLines.length, lastChange.original.endLineNumberExclusive - 1 + contextSize);256const hunkModStart = Math.max(1, firstChange.modified.startLineNumber - contextSize);257258const hunkLines: string[] = [];259// Track which line in hunkLines corresponds to the last line of each file260let lastOriginalLineIndex = -1;261let lastModifiedLineIndex = -1;262263let origLineNum = hunkOrigStart;264let origCount = 0;265let modCount = 0;266267// Process each change in the group, emitting context lines between them268for (const change of group) {269const origStart = change.original.startLineNumber;270const origEnd = change.original.endLineNumberExclusive;271const modStart = change.modified.startLineNumber;272const modEnd = change.modified.endLineNumberExclusive;273274// Emit context lines before this change275while (origLineNum < origStart) {276const idx = hunkLines.length;277hunkLines.push(` ${originalLines[origLineNum - 1]}`);278// Context lines are in both files279if (origLineNum === originalLines.length) {280lastOriginalLineIndex = idx;281}282const modLineNum = hunkModStart + modCount;283if (modLineNum === modifiedLines.length) {284lastModifiedLineIndex = idx;285}286origLineNum++;287origCount++;288modCount++;289}290291// Emit deleted lines292for (let i = origStart; i < origEnd; i++) {293const idx = hunkLines.length;294hunkLines.push(`-${originalLines[i - 1]}`);295if (i === originalLines.length) {296lastOriginalLineIndex = idx;297}298origLineNum++;299origCount++;300}301302// Emit added lines303for (let i = modStart; i < modEnd; i++) {304const idx = hunkLines.length;305hunkLines.push(`+${modifiedLines[i - 1]}`);306if (i === modifiedLines.length) {307lastModifiedLineIndex = idx;308}309modCount++;310}311}312313// Emit trailing context lines314while (origLineNum <= hunkOrigEnd) {315const idx = hunkLines.length;316hunkLines.push(` ${originalLines[origLineNum - 1]}`);317// Context lines are in both files318if (origLineNum === originalLines.length) {319lastOriginalLineIndex = idx;320}321const modLineNum = hunkModStart + modCount;322if (modLineNum === modifiedLines.length) {323lastModifiedLineIndex = idx;324}325origLineNum++;326origCount++;327modCount++;328}329330result.push(`@@ -${hunkOrigStart},${origCount} +${hunkModStart},${modCount} @@`);331332// Add "No newline at end of file" markers for git apply compatibility333// The marker must appear immediately after the line that lacks a newline334for (let i = 0; i < hunkLines.length; i++) {335result.push(hunkLines[i]);336337const isLastOriginal = i === lastOriginalLineIndex;338const isLastModified = i === lastModifiedLineIndex;339340if (isLastOriginal && isLastModified) {341// Context line is the last line of both files342// If either lacks newline, we need a marker (but only one)343if (!originalEndsWithNewline || !modifiedEndsWithNewline) {344result.push('\\ No newline at end of file');345}346} else if (isLastOriginal && !originalEndsWithNewline) {347// Deletion or context line that's only the last of original348result.push('\\ No newline at end of file');349} else if (isLastModified && !modifiedEndsWithNewline) {350// Addition or context line that's only the last of modified351result.push('\\ No newline at end of file');352}353}354}355356return result;357}358359/**360* Captures repository state from the first available SCM repository.361*/362export async function captureRepoInfo(scmService: ISCMService, fileService: IFileService): Promise<IExportableRepoData | undefined> {363const repositories = [...scmService.repositories];364if (repositories.length === 0) {365return undefined;366}367368const repository = repositories[0];369const rootUri = repository.provider.rootUri;370if (!rootUri) {371return undefined;372}373374let hasGit = false;375try {376const gitDirUri = rootUri.with({ path: `${rootUri.path}/.git` });377hasGit = await fileService.exists(gitDirUri);378} catch {379// ignore380}381382if (!hasGit) {383return {384workspaceType: 'plain-folder',385syncStatus: 'no-git',386diffs: undefined387};388}389390let remoteUrl: string | undefined;391try {392// TODO: Handle git worktrees where .git is a file pointing to the actual git directory393const gitConfigUri = rootUri.with({ path: `${rootUri.path}/.git/config` });394const exists = await fileService.exists(gitConfigUri);395if (exists) {396const content = await fileService.readFile(gitConfigUri);397const remotes = getRawRemotes(content.value.toString());398remoteUrl = remotes[0];399}400} catch {401// ignore402}403404let localBranch: string | undefined;405let localHeadCommit: string | undefined;406let remoteTrackingBranch: string | undefined;407let remoteHeadCommit: string | undefined;408let remoteBaseBranch: string | undefined;409410const historyProvider = repository.provider.historyProvider?.get();411if (historyProvider) {412const historyItemRef = historyProvider.historyItemRef.get();413localBranch = historyItemRef?.name;414localHeadCommit = historyItemRef?.revision;415416const historyItemRemoteRef = historyProvider.historyItemRemoteRef.get();417if (historyItemRemoteRef) {418remoteTrackingBranch = historyItemRemoteRef.name;419remoteHeadCommit = historyItemRemoteRef.revision;420}421422const historyItemBaseRef = historyProvider.historyItemBaseRef.get();423if (historyItemBaseRef) {424remoteBaseBranch = historyItemBaseRef.name;425}426}427428let workspaceType: IExportableRepoData['workspaceType'];429let syncStatus: IExportableRepoData['syncStatus'];430431if (!remoteUrl) {432workspaceType = 'local-git';433syncStatus = 'local-only';434} else {435workspaceType = 'remote-git';436437if (!remoteTrackingBranch) {438syncStatus = 'unpublished';439} else if (localHeadCommit === remoteHeadCommit) {440syncStatus = 'synced';441} else {442syncStatus = 'unpushed';443}444}445446let remoteVendor: IExportableRepoData['remoteVendor'];447if (remoteUrl) {448const host = getRemoteHost(remoteUrl);449if (host === 'github.com') {450remoteVendor = 'github';451} else if (host === 'dev.azure.com' || (host && host.endsWith('.visualstudio.com'))) {452remoteVendor = 'ado';453} else {454remoteVendor = 'other';455}456}457458let totalChangeCount = 0;459for (const group of repository.provider.groups) {460totalChangeCount += group.resources.length;461}462463const baseRepoData: Omit<IExportableRepoData, 'diffs' | 'diffsStatus' | 'changedFileCount'> = {464workspaceType,465syncStatus,466remoteUrl,467remoteVendor,468localBranch,469remoteTrackingBranch,470remoteBaseBranch,471localHeadCommit,472remoteHeadCommit,473};474475if (totalChangeCount === 0) {476return {477...baseRepoData,478diffs: undefined,479diffsStatus: 'noChanges',480changedFileCount: 0481};482}483484if (totalChangeCount > MAX_CHANGES) {485return {486...baseRepoData,487diffs: undefined,488diffsStatus: 'tooManyChanges',489changedFileCount: totalChangeCount490};491}492493const diffs: IExportableRepoDiff[] = [];494const diffPromises: Promise<IExportableRepoDiff | undefined>[] = [];495496for (const group of repository.provider.groups) {497for (const resource of group.resources) {498const relPath = relativePath(rootUri, resource.sourceUri) ?? resource.sourceUri.path;499const changeType = determineChangeType(resource, group.id);500501const diffPromise = (async (): Promise<IExportableRepoDiff | undefined> => {502const unifiedDiff = await generateUnifiedDiff(503fileService,504relPath,505resource.multiDiffEditorOriginalUri,506resource.sourceUri,507changeType508);509510return {511relativePath: relPath,512changeType,513status: group.label || group.id,514unifiedDiff515};516})();517518diffPromises.push(diffPromise);519}520}521522const generatedDiffs = await Promise.all(diffPromises);523for (const diff of generatedDiffs) {524if (diff) {525diffs.push(diff);526}527}528529const diffsJson = JSON.stringify(diffs);530const diffsSizeBytes = new TextEncoder().encode(diffsJson).length;531532if (diffsSizeBytes > MAX_DIFFS_SIZE_BYTES) {533return {534...baseRepoData,535diffs: undefined,536diffsStatus: 'tooLarge',537changedFileCount: totalChangeCount538};539}540541return {542...baseRepoData,543diffs,544diffsStatus: 'included',545changedFileCount: totalChangeCount546};547}548549/**550* Captures repository information for chat sessions on creation and first message.551*/552export class ChatRepoInfoContribution extends Disposable implements IWorkbenchContribution {553554static readonly ID = 'workbench.contrib.chatRepoInfo';555556private _configurationRegistered = false;557558constructor(559@IChatService private readonly chatService: IChatService,560@IChatEntitlementService private readonly chatEntitlementService: IChatEntitlementService,561@ISCMService private readonly scmService: ISCMService,562@IFileService private readonly fileService: IFileService,563@ILogService private readonly logService: ILogService,564@IConfigurationService private readonly configurationService: IConfigurationService,565) {566super();567this.registerConfigurationIfInternal();568this._register(this.chatEntitlementService.onDidChangeEntitlement(() => {569this.registerConfigurationIfInternal();570}));571572this._register(this.chatService.onDidSubmitRequest(async ({ chatSessionResource }) => {573const model = this.chatService.getSession(chatSessionResource);574if (!model) {575return;576}577await this.captureAndSetRepoData(model);578}));579}580581private registerConfigurationIfInternal(): void {582if (this._configurationRegistered) {583return;584}585586if (!this.chatEntitlementService.isInternal) {587return;588}589590const registry = Registry.as<IConfigurationRegistry>(ConfigurationExtensions.Configuration);591registry.registerConfiguration({592id: 'chatRepoInfo',593title: nls.localize('chatRepoInfoConfigurationTitle', "Chat Repository Info"),594type: 'object',595properties: {596[ChatConfiguration.RepoInfoEnabled]: {597type: 'boolean',598description: nls.localize('chat.repoInfo.enabled', "Controls whether repository information (branch, commit, working tree diffs) is captured at the start of chat sessions for internal diagnostics."),599default: true,600}601}602});603604this._configurationRegistered = true;605this.logService.debug('[ChatRepoInfo] Configuration registered for internal user');606}607608private async captureAndSetRepoData(model: IChatModel): Promise<void> {609if (!this.chatEntitlementService.isInternal) {610return;611}612613// Check if repo info capture is enabled via configuration614if (!this.configurationService.getValue<boolean>(ChatConfiguration.RepoInfoEnabled)) {615return;616}617618if (model.repoData) {619return;620}621622try {623const repoData = await captureRepoInfo(this.scmService, this.fileService);624if (repoData) {625model.setRepoData(repoData);626if (!repoData.localHeadCommit && repoData.workspaceType !== 'plain-folder') {627this.logService.warn('[ChatRepoInfo] Captured repo data without commit hash - git history may not be ready');628}629630// Trim diffs from older sessions to manage storage631this.trimOldSessionDiffs();632} else {633this.logService.debug('[ChatRepoInfo] No SCM repository available for chat session');634}635} catch (error) {636this.logService.warn('[ChatRepoInfo] Failed to capture repo info:', error);637}638}639640/**641* Trims diffs from older sessions, keeping full diffs only for the most recent sessions.642*/643private trimOldSessionDiffs(): void {644try {645// Get all sessions with repoData that has diffs646const sessionsWithDiffs: { model: IChatModel; timestamp: number }[] = [];647648for (const model of this.chatService.chatModels.get()) {649if (model.repoData?.diffs && model.repoData.diffs.length > 0 && model.repoData.diffsStatus === 'included') {650sessionsWithDiffs.push({ model, timestamp: model.timestamp });651}652}653654// Sort by timestamp descending (most recent first)655sessionsWithDiffs.sort((a, b) => b.timestamp - a.timestamp);656657// Trim diffs from sessions beyond the limit658for (let i = MAX_SESSIONS_WITH_FULL_DIFFS; i < sessionsWithDiffs.length; i++) {659const { model } = sessionsWithDiffs[i];660if (model.repoData) {661const trimmedRepoData: IExportableRepoData = {662...model.repoData,663diffs: undefined,664diffsStatus: 'trimmedForStorage'665};666model.setRepoData(trimmedRepoData);667this.logService.trace(`[ChatRepoInfo] Trimmed diffs from older session: ${model.sessionResource.toString()}`);668}669}670} catch (error) {671this.logService.warn('[ChatRepoInfo] Failed to trim old session diffs:', error);672}673}674}675676677