Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/platform/agentHost/test/node/protocol/toolApprovalRealSdk.integrationTest.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
/**
7
* Integration tests using the real Copilot SDK instead of a mock agent.
8
*
9
* These tests are **disabled by default**. To run them, set `AGENT_HOST_REAL_SDK=1`:
10
*
11
* AGENT_HOST_REAL_SDK=1 ./scripts/test-integration.sh --run src/vs/platform/agentHost/test/node/protocol/toolApprovalRealSdk.integrationTest.ts
12
*
13
* Authentication: By default the token is obtained from `gh auth token`.
14
* You can override it by setting `GITHUB_TOKEN=ghp_xxx`.
15
*
16
* SAFETY: These tests create real agent sessions backed by the Copilot SDK.
17
* The agent may execute tool calls on the user's machine. Prompts should be
18
* carefully chosen to avoid destructive side-effects — prefer read-only
19
* questions, safe commands like `echo`, and use isolated temp directories as
20
* working directories. Never ask the agent to delete, modify, or install
21
* anything outside of a test-owned temp directory.
22
*/
23
24
import assert from 'assert';
25
import { execSync } from 'child_process';
26
import { mkdtempSync, rmSync, writeFileSync } from 'fs';
27
import { tmpdir } from 'os';
28
import { removeAnsiEscapeCodes } from '../../../../../base/common/strings.js';
29
import { URI } from '../../../../../base/common/uri.js';
30
import type { SessionToolCallStartAction } from '../../../common/state/protocol/actions.js';
31
import { SubscribeResult } from '../../../common/state/protocol/commands.js';
32
import { PROTOCOL_VERSION } from '../../../common/state/sessionCapabilities.js';
33
import { ResponsePartKind, ROOT_STATE_URI, SessionInputAnswerState, SessionInputAnswerValueKind, SessionInputQuestionKind, SessionInputResponseKind, ToolResultContentType, isSubagentSession, type SessionInputAnswer, type SessionInputRequest, type SessionState, type TerminalState, type ToolResultContent, type ToolResultSubagentContent } from '../../../common/state/sessionState.js';
34
import type { RootState } from '../../../common/state/protocol/state.js';
35
import type { RootAgentsChangedAction, SessionAddedNotification, SessionInputRequestedAction, SessionToolCallReadyAction } from '../../../common/state/sessionActions.js';
36
import type { INotificationBroadcastParams } from '../../../common/state/sessionProtocol.js';
37
import {
38
getActionEnvelope,
39
isActionNotification,
40
IServerHandle,
41
startRealServer,
42
TestProtocolClient,
43
} from './testHelpers.js';
44
45
const REAL_SDK_ENABLED = process.env['AGENT_HOST_REAL_SDK'] === '1';
46
47
/** Resolve GitHub token from env or `gh auth token`. */
48
function resolveGitHubToken(): string {
49
const envToken = process.env['GITHUB_TOKEN'];
50
if (envToken) {
51
return envToken;
52
}
53
try {
54
return execSync('gh auth token', { encoding: 'utf-8' }).trim();
55
} catch {
56
throw new Error('No GITHUB_TOKEN set and `gh auth token` failed. Run `gh auth login` first.');
57
}
58
}
59
60
/** Create a session using the real copilot provider, authenticate, subscribe, and return the session URI. */
61
async function createRealSession(c: TestProtocolClient, clientId: string, trackingList: string[], workingDirectory?: string): Promise<string> {
62
const result = await createRealSessionFull(c, clientId, trackingList, workingDirectory);
63
return result.sessionUri;
64
}
65
66
interface IRealSessionResult {
67
sessionUri: string;
68
addedNotification: SessionAddedNotification;
69
subscribeSnapshot: SessionState;
70
}
71
72
/** Full version that returns the sessionAdded notification and subscribe snapshot for assertions. */
73
async function createRealSessionFull(c: TestProtocolClient, clientId: string, trackingList: string[], workingDirectory?: string): Promise<IRealSessionResult> {
74
await c.call('initialize', { protocolVersion: PROTOCOL_VERSION, clientId }, 30_000);
75
76
await c.call('authenticate', { resource: 'https://api.github.com', token: resolveGitHubToken() }, 30_000);
77
78
const sessionUri = URI.from({ scheme: 'copilotcli', path: `/real-test-${Date.now()}` }).toString();
79
await c.call('createSession', { session: sessionUri, provider: 'copilotcli', workingDirectory }, 30_000);
80
81
const notif = await c.waitForNotification(n =>
82
n.method === 'notification' && (n.params as INotificationBroadcastParams).notification.type === 'notify/sessionAdded',
83
15_000,
84
);
85
const addedNotification = (notif.params as INotificationBroadcastParams).notification as SessionAddedNotification;
86
const realSessionUri = addedNotification.summary.resource;
87
trackingList.push(realSessionUri);
88
89
const subscribeResult = await c.call<SubscribeResult>('subscribe', { resource: realSessionUri });
90
const subscribeSnapshot = subscribeResult.snapshot.state as SessionState;
91
c.clearReceived();
92
93
return { sessionUri: realSessionUri, addedNotification, subscribeSnapshot };
94
}
95
96
/** Dispatch a turn with the given user message text. */
97
function dispatchTurn(c: TestProtocolClient, session: string, turnId: string, text: string, clientSeq: number): void {
98
c.notify('dispatchAction', {
99
clientSeq,
100
action: {
101
type: 'session/turnStarted',
102
session,
103
turnId,
104
userMessage: { text },
105
},
106
});
107
}
108
109
function getAcceptedAnswers(request: SessionInputRequest): Record<string, SessionInputAnswer> | undefined {
110
if (!request.questions?.length) {
111
return undefined;
112
}
113
114
return Object.fromEntries(request.questions.map(question => {
115
switch (question.kind) {
116
case SessionInputQuestionKind.Text:
117
return [question.id, {
118
state: SessionInputAnswerState.Submitted,
119
value: {
120
kind: SessionInputAnswerValueKind.Text,
121
value: question.defaultValue ?? 'interactive',
122
},
123
} satisfies SessionInputAnswer];
124
case SessionInputQuestionKind.Number:
125
case SessionInputQuestionKind.Integer:
126
return [question.id, {
127
state: SessionInputAnswerState.Submitted,
128
value: {
129
kind: SessionInputAnswerValueKind.Number,
130
value: question.defaultValue ?? question.min ?? 1,
131
},
132
} satisfies SessionInputAnswer];
133
case SessionInputQuestionKind.Boolean:
134
return [question.id, {
135
state: SessionInputAnswerState.Submitted,
136
value: {
137
kind: SessionInputAnswerValueKind.Boolean,
138
value: question.defaultValue ?? true,
139
},
140
} satisfies SessionInputAnswer];
141
case SessionInputQuestionKind.SingleSelect: {
142
const preferredOption = question.options.find(option => /interactive/i.test(option.id) || /interactive/i.test(option.label))
143
?? question.options.find(option => option.recommended)
144
?? question.options[0];
145
return [question.id, {
146
state: SessionInputAnswerState.Submitted,
147
value: {
148
kind: SessionInputAnswerValueKind.Selected,
149
value: preferredOption.id,
150
},
151
} satisfies SessionInputAnswer];
152
}
153
case SessionInputQuestionKind.MultiSelect: {
154
const preferredOptions = question.options.filter(option => option.recommended);
155
const selectedOptions = preferredOptions.length > 0 ? preferredOptions : question.options.slice(0, 1);
156
return [question.id, {
157
state: SessionInputAnswerState.Submitted,
158
value: {
159
kind: SessionInputAnswerValueKind.SelectedMany,
160
value: selectedOptions.map(option => option.id),
161
},
162
} satisfies SessionInputAnswer];
163
}
164
}
165
}));
166
}
167
168
function getMarkdownResponseText(c: TestProtocolClient): string {
169
// Markdown content arrives as a `session/responsePart` action that opens
170
// the part with the first chunk, followed by `session/delta` actions
171
// appending subsequent chunks. Concatenate both to get the full text.
172
const markdownPartIds = new Set<string>();
173
const pieces: string[] = [];
174
for (const notification of c.receivedNotifications(n =>
175
isActionNotification(n, 'session/responsePart') || isActionNotification(n, 'session/delta')
176
)) {
177
const action = getActionEnvelope(notification).action;
178
if (action.type === 'session/responsePart' && action.part.kind === ResponsePartKind.Markdown) {
179
markdownPartIds.add(action.part.id);
180
pieces.push(action.part.content);
181
} else if (action.type === 'session/delta' && markdownPartIds.has(action.partId)) {
182
pieces.push(action.content);
183
}
184
}
185
return pieces.join('');
186
}
187
188
interface IDrivenTurnResult {
189
sawInputRequest: boolean;
190
sawPendingConfirmation: boolean;
191
responseText: string;
192
}
193
194
async function driveTurnToCompletion(c: TestProtocolClient, session: string, turnId: string, text: string, clientSeq: number): Promise<IDrivenTurnResult> {
195
c.clearReceived();
196
dispatchTurn(c, session, turnId, text, clientSeq);
197
198
const seenNotifications = new Set<object>();
199
let nextClientSeq = clientSeq + 1;
200
let sawInputRequest = false;
201
let sawPendingConfirmation = false;
202
203
while (true) {
204
const notification = await c.waitForNotification(n => !seenNotifications.has(n as object) && (
205
isActionNotification(n, 'session/toolCallReady')
206
|| isActionNotification(n, 'session/inputRequested')
207
|| isActionNotification(n, 'session/turnComplete')
208
|| isActionNotification(n, 'session/error')
209
), 90_000);
210
seenNotifications.add(notification as object);
211
212
if (isActionNotification(notification, 'session/error')) {
213
throw new Error(`Session error while driving ${turnId}`);
214
}
215
216
if (isActionNotification(notification, 'session/toolCallReady')) {
217
const action = getActionEnvelope(notification).action as SessionToolCallReadyAction;
218
if (!action.confirmed) {
219
sawPendingConfirmation = true;
220
c.notify('dispatchAction', {
221
clientSeq: nextClientSeq++,
222
action: {
223
type: 'session/toolCallConfirmed',
224
session,
225
turnId,
226
toolCallId: action.toolCallId,
227
approved: true,
228
},
229
});
230
}
231
continue;
232
}
233
234
if (isActionNotification(notification, 'session/inputRequested')) {
235
sawInputRequest = true;
236
const action = getActionEnvelope(notification).action as SessionInputRequestedAction;
237
c.notify('dispatchAction', {
238
clientSeq: nextClientSeq++,
239
action: {
240
type: 'session/inputCompleted',
241
session,
242
requestId: action.request.id,
243
response: SessionInputResponseKind.Accept,
244
answers: getAcceptedAnswers(action.request),
245
},
246
});
247
continue;
248
}
249
250
break;
251
}
252
253
return {
254
sawInputRequest,
255
sawPendingConfirmation,
256
responseText: getMarkdownResponseText(c),
257
};
258
}
259
260
function terminalResourceFromContent(content: readonly ToolResultContent[]): string | undefined {
261
const terminalContent = content.find(c => c.type === ToolResultContentType.Terminal);
262
return terminalContent?.resource;
263
}
264
265
function terminalText(state: TerminalState): string {
266
return removeAnsiEscapeCodes(state.content.map(part => part.type === 'command' ? `${part.commandLine}\n${part.output}` : part.value).join(''));
267
}
268
269
/** Looks up the toolName for a toolCallReady by joining against the matching toolCallStart. */
270
function findToolNameForCall(c: TestProtocolClient, toolCallId: string): string | undefined {
271
return c.receivedNotifications(n => isActionNotification(n, 'session/toolCallStart'))
272
.map(n => getActionEnvelope(n).action as SessionToolCallStartAction)
273
.find(a => a.toolCallId === toolCallId)?.toolName;
274
}
275
276
interface IApprovalRule {
277
/** Tool name this rule applies to (e.g. `'bash'`, `'write_bash'`). */
278
toolName: string;
279
/** Optional predicate over the tool input. If omitted, any input matches. */
280
matchInput?: (toolInput: string | undefined) => boolean;
281
/**
282
* Optional inspector run for every matched call before approval.
283
* Push assertion failure messages onto `errors` to fail the test.
284
*/
285
inspect?: (info: {
286
action: SessionToolCallReadyAction;
287
errors: string[];
288
}) => void;
289
}
290
291
interface IBackgroundApprovalLoopOptions {
292
/** Starting clientSeq for dispatched toolCallConfirmed actions. Avoids collisions with the test's own dispatches. */
293
approvalSeqStart: number;
294
/**
295
* Allow-list of tool calls the loop is permitted to auto-approve. Each
296
* pending confirmation must match exactly one rule (by `toolName` plus
297
* optional `matchInput` predicate). Calls that don't match are recorded
298
* as errors and denied — the loop refuses to rubber-stamp anything the
299
* test didn't anticipate (e.g. an unexpected `rm` from the model).
300
*/
301
allow: readonly IApprovalRule[];
302
}
303
304
interface IBackgroundApprovalLoop {
305
/** Errors collected during the run (unmatched tool calls + inspector failures). */
306
readonly errors: readonly string[];
307
/** Tool names that were observed and approved at least once. */
308
readonly approvedToolNames: ReadonlySet<string>;
309
/**
310
* Tool names for every permission request observed by the loop, regardless
311
* of whether they matched the allow-list. Useful for asserting that a
312
* tool with `skipPermission: true` never triggered a permission flow.
313
*/
314
readonly observedToolNames: ReadonlySet<string>;
315
/** Stops the loop and waits for it to drain. */
316
stop(): Promise<void>;
317
}
318
319
/**
320
* Starts a background loop that auto-approves pending tool call confirmations
321
* during a real-SDK turn, but only if they match the supplied allow-list.
322
* Anything outside the allow-list is denied and recorded as an error so the
323
* test fails loudly instead of silently approving model-chosen tool calls.
324
*
325
* Implementation note: `waitForNotification` does NOT consume notifications from
326
* the client's queue, so we dedupe by `serverSeq`.
327
*/
328
function startBackgroundApprovalLoop(c: TestProtocolClient, options: IBackgroundApprovalLoopOptions): IBackgroundApprovalLoop {
329
const errors: string[] = [];
330
const approvedToolNames = new Set<string>();
331
const observedToolNames = new Set<string>();
332
const processedSeqs = new Set<number>();
333
let active = true;
334
let approvalSeq = options.approvalSeqStart;
335
336
const loop = (async () => {
337
while (active) {
338
try {
339
const ready = await c.waitForNotification(n => {
340
if (!isActionNotification(n, 'session/toolCallReady')) {
341
return false;
342
}
343
return !processedSeqs.has(getActionEnvelope(n).serverSeq);
344
}, 2_000);
345
const envelope = getActionEnvelope(ready);
346
processedSeqs.add(envelope.serverSeq);
347
const action = envelope.action as SessionToolCallReadyAction & { session: string; turnId: string };
348
if (action.confirmed) {
349
continue;
350
}
351
352
const toolName = findToolNameForCall(c, action.toolCallId);
353
if (toolName) {
354
observedToolNames.add(toolName);
355
}
356
const matchingRule = options.allow.find(rule =>
357
rule.toolName === toolName
358
&& (rule.matchInput?.(action.toolInput) ?? true));
359
360
if (!matchingRule) {
361
errors.push(`unexpected tool call: toolName=${toolName ?? '<unknown>'} input=${JSON.stringify(action.toolInput)}`);
362
c.notify('dispatchAction', {
363
clientSeq: ++approvalSeq,
364
action: {
365
type: 'session/toolCallConfirmed',
366
session: action.session,
367
turnId: action.turnId,
368
toolCallId: action.toolCallId,
369
approved: false,
370
},
371
});
372
continue;
373
}
374
375
matchingRule.inspect?.({ action, errors });
376
approvedToolNames.add(matchingRule.toolName);
377
378
c.notify('dispatchAction', {
379
clientSeq: ++approvalSeq,
380
action: {
381
type: 'session/toolCallConfirmed',
382
session: action.session,
383
turnId: action.turnId,
384
toolCallId: action.toolCallId,
385
approved: true,
386
},
387
});
388
} catch (e) {
389
// Only ignore the expected 2-second poll timeout. Any other error
390
// (e.g. 'Client closed', exception from matchingRule.inspect) is a
391
// real failure — record it so the test fails deterministically.
392
const msg = e instanceof Error ? e.message : String(e);
393
if (!msg.includes('Timed out') && !msg.includes('timed out')) {
394
errors.push(`approval loop error: ${msg}`);
395
active = false;
396
}
397
}
398
}
399
})();
400
401
return {
402
errors,
403
approvedToolNames,
404
observedToolNames,
405
async stop(): Promise<void> {
406
active = false;
407
await loop;
408
},
409
};
410
}
411
412
(REAL_SDK_ENABLED ? suite : suite.skip)('Protocol WebSocket — Real Copilot SDK', function () {
413
414
let server: IServerHandle;
415
let client: TestProtocolClient;
416
/** Session URIs created during the current test, disposed in teardown. */
417
const createdSessions: string[] = [];
418
/** Temp directories created during the current test, removed in teardown. */
419
const tempDirs: string[] = [];
420
421
suiteSetup(async function () {
422
this.timeout(60_000);
423
server = await startRealServer();
424
});
425
426
suiteTeardown(function () {
427
server?.process.kill();
428
});
429
430
setup(async function () {
431
this.timeout(30_000);
432
client = new TestProtocolClient(server.port);
433
await client.connect();
434
});
435
436
teardown(async function () {
437
// Dispose all sessions created during this test
438
for (const session of createdSessions) {
439
try {
440
await client.call('disposeSession', { session }, 5000);
441
} catch {
442
// Best-effort cleanup — the session may already be gone
443
}
444
}
445
createdSessions.length = 0;
446
client.close();
447
448
// Remove temp directories created during this test. On Windows the
449
// agent subprocess can still hold handles to the working directory for
450
// a brief moment after `disposeSession` returns, which surfaces as
451
// EBUSY. Retry a few times to give the OS a chance to release the
452
// handle before failing the teardown.
453
for (const dir of tempDirs) {
454
try {
455
rmSync(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 200 });
456
} catch {
457
// Best-effort cleanup — leftover temp dirs in os.tmpdir() are
458
// harmless and shouldn't fail an otherwise passing test.
459
}
460
}
461
tempDirs.length = 0;
462
});
463
464
// ---- Basic turn execution ------------------------------------------------
465
466
test('sends a simple message and receives a response', async function () {
467
this.timeout(120_000);
468
469
const sessionUri = await createRealSession(client, 'real-sdk-simple', createdSessions, URI.file(tmpdir()).toString());
470
dispatchTurn(client, sessionUri, 'turn-1', 'Say exactly "hello" and nothing else', 1);
471
472
// Wait for the turn to complete — the real SDK may take a while
473
await client.waitForNotification(n => isActionNotification(n, 'session/turnComplete'), 90_000);
474
475
// Verify we received at least one response part
476
const responseParts = client.receivedNotifications(n => isActionNotification(n, 'session/responsePart'));
477
assert.ok(responseParts.length > 0, 'should have received at least one response part');
478
});
479
480
// ---- Tool call with permission flow -------------------------------------
481
482
test('tool call triggers permission request and can be approved', async function () {
483
this.timeout(120_000);
484
485
const tempDir = mkdtempSync(`${tmpdir()}/ahp-perm-test-`);
486
tempDirs.push(tempDir);
487
const sessionUri = await createRealSession(client, 'real-sdk-permission', createdSessions, URI.file(tempDir).toString());
488
dispatchTurn(client, sessionUri, 'turn-perm', 'Run the shell command: echo "hello from test"', 1);
489
490
// The real SDK should fire a tool call that needs permission
491
const toolStartNotif = await client.waitForNotification(
492
n => isActionNotification(n, 'session/toolCallStart'),
493
60_000,
494
);
495
const toolStartAction = getActionEnvelope(toolStartNotif).action as { toolCallId: string };
496
497
// Wait for toolCallReady (pending confirmation)
498
const toolReadyNotif = await client.waitForNotification(
499
n => isActionNotification(n, 'session/toolCallReady'),
500
30_000,
501
);
502
const toolReadyAction = getActionEnvelope(toolReadyNotif).action as { toolCallId: string; confirmed?: string };
503
504
// If the tool was auto-approved, confirmed will be set; if pending, confirm it
505
if (!toolReadyAction.confirmed) {
506
client.notify('dispatchAction', {
507
clientSeq: 2,
508
action: {
509
type: 'session/toolCallConfirmed',
510
session: sessionUri,
511
turnId: 'turn-perm',
512
toolCallId: toolStartAction.toolCallId,
513
approved: true,
514
},
515
});
516
}
517
518
// Wait for the turn to complete
519
await client.waitForNotification(n => isActionNotification(n, 'session/turnComplete'), 90_000);
520
});
521
522
test('planning-mode session-state writes are auto-approved in default mode', async function () {
523
this.timeout(180_000);
524
525
const tempDir = mkdtempSync(`${tmpdir()}/ahp-plan-test-`);
526
tempDirs.push(tempDir);
527
const sessionUri = await createRealSession(client, 'real-sdk-plan-mode', createdSessions, URI.file(tempDir).toString());
528
529
// Switch the session into plan mode via the standard config-change flow
530
// before sending the first turn. The agent host reads this value at
531
// turn-start time and pushes it to the SDK via `rpc.mode.set`.
532
client.notify('dispatchAction', {
533
clientSeq: 1,
534
action: {
535
type: 'session/configChanged',
536
session: sessionUri,
537
config: { mode: 'plan' },
538
},
539
});
540
await client.waitForNotification(n => isActionNotification(n, 'session/configChanged'));
541
542
const planTurn = await driveTurnToCompletion(client, sessionUri, 'turn-plan',
543
'Help me implement a Python script that prints "hello world" to stdout. Write the shortest possible plan to your session plan.md and use the exit_plan_mode tool to ask me to approve it before writing any code.', 2);
544
assert.strictEqual(planTurn.sawPendingConfirmation, false, 'should not have received pending-confirmation toolCallReady while writing session-state plan.md');
545
assert.ok(planTurn.sawInputRequest, 'should reach the exit_plan_mode question so the test can continue the same session');
546
547
const extraSessionNotificationsAfterPlan = client.receivedNotifications(n =>
548
n.method === 'notification' && (n.params as INotificationBroadcastParams).notification.type === 'notify/sessionAdded',
549
);
550
assert.strictEqual(extraSessionNotificationsAfterPlan.length, 0, 'should not create a second session while answering the plan-mode question');
551
552
// Mirror what a real UI client would do after the user accepted the
553
// plan: update the session config so subsequent turns no longer run
554
// in plan mode. Without this the agent host would re-set the SDK's
555
// mode to 'plan' at the next send because the session config still
556
// holds the original 'plan' value.
557
client.notify('dispatchAction', {
558
clientSeq: 50,
559
action: {
560
type: 'session/configChanged',
561
session: sessionUri,
562
config: { mode: 'interactive' },
563
},
564
});
565
await client.waitForNotification(n => isActionNotification(n, 'session/configChanged'));
566
567
const followupTurn = await driveTurnToCompletion(client, sessionUri, 'turn-followup',
568
'What did the plan I just approved say to print? Reply with exactly "hello world".', 100,
569
);
570
assert.strictEqual(followupTurn.sawPendingConfirmation, false, 'follow-up turn should not surface new pending confirmations');
571
assert.match(followupTurn.responseText, /hello world/i, 'follow-up turn should retain the original plan context');
572
573
const extraSessionNotificationsAfterFollowup = client.receivedNotifications(n =>
574
n.method === 'notification' && (n.params as INotificationBroadcastParams).notification.type === 'notify/sessionAdded',
575
);
576
assert.strictEqual(extraSessionNotificationsAfterFollowup.length, 0, 'sending another message should stay on the same session instead of forking');
577
578
const resubscribeResult = await client.call<SubscribeResult>('subscribe', { resource: sessionUri });
579
const finalSnapshot = resubscribeResult.snapshot.state as SessionState;
580
assert.strictEqual(finalSnapshot.summary.resource, sessionUri, 'follow-up turn should keep the original session resource');
581
});
582
583
// ---- Abort / cancel -----------------------------------------------------
584
585
test('can abort a running turn', async function () {
586
this.timeout(120_000);
587
588
const sessionUri = await createRealSession(client, 'real-sdk-abort', createdSessions, URI.file(tmpdir()).toString());
589
dispatchTurn(client, sessionUri, 'turn-abort', 'Write a very long essay about the history of computing', 1);
590
591
// Wait a moment for the turn to start processing, then abort
592
await client.waitForNotification(
593
n => isActionNotification(n, 'session/responsePart') || isActionNotification(n, 'session/toolCallStart'),
594
60_000,
595
);
596
597
client.notify('dispatchAction', {
598
clientSeq: 2,
599
action: {
600
type: 'session/abortTurn',
601
session: sessionUri,
602
},
603
});
604
605
// Verify the abort action was echoed back by the server.
606
// We don't wait for turnComplete because the real Copilot SDK may
607
// continue streaming after abort and the turn may not terminate within
608
// the test timeout.
609
await client.waitForNotification(
610
n => isActionNotification(n, 'session/abortTurn'),
611
10_000,
612
);
613
});
614
615
// ---- Working directory correctness --------------------------------------
616
617
test('session is created with the correct working directory', async function () {
618
this.timeout(120_000);
619
620
// Use a real temp directory so the path exists on disk.
621
// Clean it up at the end to avoid leaving test artifacts.
622
const tempDir = mkdtempSync(`${tmpdir()}/ahp-test-`);
623
tempDirs.push(tempDir);
624
const workingDirUri = URI.file(tempDir).toString();
625
626
await client.call('initialize', { protocolVersion: PROTOCOL_VERSION, clientId: 'real-sdk-workdir' });
627
await client.call('authenticate', { resource: 'https://api.github.com', token: resolveGitHubToken() });
628
629
const sessionUri = URI.from({ scheme: 'copilotcli', path: `/real-test-wd-${Date.now()}` }).toString();
630
await client.call('createSession', { session: sessionUri, provider: 'copilotcli', workingDirectory: workingDirUri });
631
632
// 1. Verify workingDirectory in the sessionAdded notification
633
const addedNotif = await client.waitForNotification(n =>
634
n.method === 'notification' && (n.params as INotificationBroadcastParams).notification.type === 'notify/sessionAdded',
635
15_000,
636
);
637
const addedSummary = ((addedNotif.params as INotificationBroadcastParams).notification as SessionAddedNotification).summary;
638
createdSessions.push(addedSummary.resource);
639
assert.strictEqual(
640
addedSummary.workingDirectory,
641
workingDirUri,
642
`sessionAdded notification should carry the requested working directory`,
643
);
644
645
// 2. Subscribe and verify workingDirectory in the session state snapshot
646
const subscribeResult = await client.call<SubscribeResult>('subscribe', { resource: addedSummary.resource });
647
const sessionState = subscribeResult.snapshot.state as SessionState;
648
assert.strictEqual(
649
sessionState.summary.workingDirectory,
650
workingDirUri,
651
`subscribe snapshot summary should carry the requested working directory`,
652
);
653
});
654
655
// ---- Worktree isolation -------------------------------------------------
656
657
test('worktree session uses the resolved worktree as working directory', async function () {
658
this.timeout(120_000);
659
660
// Set up a minimal git repo so the server can create a worktree
661
const tempDir = mkdtempSync(`${tmpdir()}/ahp-wt-test-`);
662
tempDirs.push(tempDir, `${tempDir}.worktrees`);
663
execSync('git init', { cwd: tempDir });
664
execSync('git config user.name "Agent Host Test"', { cwd: tempDir });
665
execSync('git config user.email "[email protected]"', { cwd: tempDir });
666
execSync('git commit --allow-empty -m "init"', { cwd: tempDir });
667
const defaultBranch = execSync('git branch --show-current', { cwd: tempDir, encoding: 'utf-8' }).trim();
668
const workingDirUri = URI.file(tempDir).toString();
669
670
await client.call('initialize', { protocolVersion: PROTOCOL_VERSION, clientId: 'real-sdk-worktree' });
671
await client.call('authenticate', { resource: 'https://api.github.com', token: resolveGitHubToken() });
672
673
const sessionUri = URI.from({ scheme: 'copilotcli', path: `/real-test-wt-${Date.now()}` }).toString();
674
await client.call('createSession', {
675
session: sessionUri,
676
provider: 'copilotcli',
677
workingDirectory: workingDirUri,
678
config: { isolation: 'worktree', branch: defaultBranch },
679
});
680
681
const addedNotif = await client.waitForNotification(n =>
682
n.method === 'notification' && (n.params as INotificationBroadcastParams).notification.type === 'notify/sessionAdded',
683
15_000,
684
);
685
const addedSummary = ((addedNotif.params as INotificationBroadcastParams).notification as SessionAddedNotification).summary;
686
createdSessions.push(addedSummary.resource);
687
688
// Subscribe so we receive action broadcasts for this session
689
await client.call<SubscribeResult>('subscribe', { resource: addedSummary.resource });
690
691
// Verify the worktree path is in the summary
692
assert.ok(
693
addedSummary.workingDirectory,
694
'sessionAdded notification should have a workingDirectory',
695
);
696
assert.ok(
697
addedSummary.workingDirectory!.includes('.worktrees'),
698
`workingDirectory should be under the .worktrees folder, got: ${addedSummary.workingDirectory}`,
699
);
700
const resolvedWorkingDirectoryPath = URI.parse(addedSummary.workingDirectory!).fsPath;
701
702
// Set the active client with tools (matching real VS Code flow where
703
// activeClientChanged is dispatched AFTER createSession). When the next
704
// sendMessage detects the tools changed vs the session's creation-time
705
// snapshot, it disposes the SDK session and re-creates it via
706
// _resumeSession. That resume path must use the worktree working
707
// directory, not the original repo path.
708
client.notify('dispatchAction', {
709
clientSeq: 1,
710
action: {
711
type: 'session/activeClientChanged',
712
session: addedSummary.resource,
713
activeClient: {
714
clientId: 'real-sdk-worktree',
715
displayName: 'Test Client',
716
tools: [
717
{
718
name: 'test_echo',
719
description: 'A harmless echo tool for testing',
720
inputSchema: { type: 'object', properties: { message: { type: 'string' } } },
721
},
722
],
723
},
724
},
725
});
726
727
// Send a turn — this triggers sendMessage, which will detect the tools
728
// changed and refresh the session (dispose + _resumeSession). The
729
// resumed session should still have the worktree as its working
730
// directory. Ask a safe, read-only question about the working directory.
731
client.clearReceived();
732
dispatchTurn(client, addedSummary.resource, 'turn-wt',
733
'What is your current working directory? Reply with just the absolute path and nothing else.', 2);
734
735
// Wait for the turn to complete or error
736
await client.waitForNotification(
737
n => isActionNotification(n, 'session/turnComplete') || isActionNotification(n, 'session/error'),
738
90_000,
739
);
740
741
// The session refresh should succeed — if it errors with
742
// "workingDirectory is required to resume", the worktree path was lost.
743
const errors = client.receivedNotifications(n => isActionNotification(n, 'session/error'));
744
assert.strictEqual(errors.length, 0,
745
errors.length > 0
746
? `Session error during turn (worktree path lost on resume): ${(getActionEnvelope(errors[0]).action as { error?: { message?: string } }).error?.message}`
747
: '',
748
);
749
750
// Verify the turn got a response (the session resumed successfully)
751
const responseParts = client.receivedNotifications(n => isActionNotification(n, 'session/responsePart'));
752
assert.ok(responseParts.length > 0, 'should have received at least one response part after session refresh');
753
754
client.clearReceived();
755
dispatchTurn(client, addedSummary.resource, 'turn-wt-terminal', 'Run the shell command: pwd', 3);
756
757
const toolStartNotif = await client.waitForNotification(
758
n => isActionNotification(n, 'session/toolCallStart'),
759
60_000,
760
);
761
const toolStartAction = getActionEnvelope(toolStartNotif).action as { toolCallId: string };
762
763
const toolReadyNotif = await client.waitForNotification(
764
n => isActionNotification(n, 'session/toolCallReady'),
765
30_000,
766
);
767
const toolReadyAction = getActionEnvelope(toolReadyNotif).action as { confirmed?: string };
768
if (!toolReadyAction.confirmed) {
769
client.notify('dispatchAction', {
770
clientSeq: 4,
771
action: {
772
type: 'session/toolCallConfirmed',
773
session: addedSummary.resource,
774
turnId: 'turn-wt-terminal',
775
toolCallId: toolStartAction.toolCallId,
776
approved: true,
777
},
778
});
779
}
780
781
const terminalContentNotif = await client.waitForNotification(n => {
782
if (!isActionNotification(n, 'session/toolCallContentChanged')) {
783
return false;
784
}
785
const action = getActionEnvelope(n).action as { toolCallId: string; content: readonly ToolResultContent[] };
786
return action.toolCallId === toolStartAction.toolCallId && terminalResourceFromContent(action.content) !== undefined;
787
}, 30_000);
788
const terminalContentAction = getActionEnvelope(terminalContentNotif).action as { content: readonly ToolResultContent[] };
789
const terminalUri = terminalResourceFromContent(terminalContentAction.content);
790
assert.ok(terminalUri, 'shell tool should expose its terminal resource');
791
792
const terminalSubscribeResult = await client.call<SubscribeResult>('subscribe', { resource: terminalUri });
793
const initialTerminalState = terminalSubscribeResult.snapshot.state as TerminalState;
794
assert.strictEqual(initialTerminalState.cwd, resolvedWorkingDirectoryPath, 'terminal should be created in the resolved worktree directory');
795
796
await client.waitForNotification(n => isActionNotification(n, 'session/turnComplete'), 90_000);
797
const terminalSnapshot = await client.call<SubscribeResult>('subscribe', { resource: terminalUri });
798
const terminalState = terminalSnapshot.snapshot.state as TerminalState;
799
assert.ok(terminalText(terminalState).includes(resolvedWorkingDirectoryPath), `pwd output should include the resolved worktree path ${resolvedWorkingDirectoryPath}`);
800
});
801
802
// ---- Subagent tool call grouping ----------------------------------------
803
804
test('subagent tool calls are routed to the subagent session, not flat in the parent', async function () {
805
this.timeout(180_000);
806
807
// Set up a small fixture directory so the subagent has something to view.
808
const tempDir = mkdtempSync(`${tmpdir()}/ahp-subagent-test-`);
809
tempDirs.push(tempDir);
810
writeFileSync(`${tempDir}/file-a.txt`, 'alpha');
811
writeFileSync(`${tempDir}/file-b.txt`, 'beta');
812
813
const sessionUri = await createRealSession(client, 'real-sdk-subagent', createdSessions, URI.file(tempDir).toString());
814
815
// Auto-approve every tool that needs confirmation while the turn runs.
816
// Multiple inner tool calls may need approval; doing this in a background
817
// loop keeps the turn unblocked. Track processed serverSeqs so we don't
818
// busy-spin on already-handled notifications (waitForNotification returns
819
// matching notifications from the queue without consuming them). Using
820
// serverSeq rather than toolCallId allows the same tool to be legitimately
821
// re-confirmed in a later notification.
822
let approvalsActive = true;
823
let approvalSeq = 1000;
824
const processedSeqs = new Set<number>();
825
const approvalLoop = (async () => {
826
while (approvalsActive) {
827
try {
828
const ready = await client.waitForNotification(n => {
829
if (!isActionNotification(n, 'session/toolCallReady')) {
830
return false;
831
}
832
const envelope = getActionEnvelope(n);
833
const a = envelope.action as { confirmed?: string };
834
return !a.confirmed && !processedSeqs.has(envelope.serverSeq);
835
}, 2_000);
836
const envelope = getActionEnvelope(ready);
837
if (!processedSeqs.has(envelope.serverSeq)) {
838
processedSeqs.add(envelope.serverSeq);
839
const action = envelope.action as { session: string; turnId: string; toolCallId: string; confirmed?: string };
840
if (!action.confirmed) {
841
client.notify('dispatchAction', {
842
clientSeq: ++approvalSeq,
843
action: {
844
type: 'session/toolCallConfirmed',
845
session: action.session,
846
turnId: action.turnId,
847
toolCallId: action.toolCallId,
848
approved: true,
849
},
850
});
851
}
852
}
853
} catch {
854
// Timeout — re-poll. Loop exits when approvalsActive flips.
855
}
856
}
857
})();
858
859
// Encourage the model to delegate via the `task` subagent tool. The exact
860
// behaviour is non-deterministic — if the model declines we fail the test
861
// with a clear message rather than silently passing.
862
dispatchTurn(client, sessionUri, 'turn-sa',
863
'Use the `task` tool to spawn a subagent to list the files in the current working directory. ' +
864
'The subagent should call a single read-only tool (e.g. `view` or `bash` with `ls`) to enumerate the directory. ' +
865
'Do not enumerate the directory yourself — delegate to the subagent.',
866
1);
867
868
// Wait for the parent's `task` tool call to expose a Subagent content
869
// block carrying the subagent session URI.
870
const subagentContentNotif = await client.waitForNotification(n => {
871
if (!isActionNotification(n, 'session/toolCallContentChanged')) {
872
return false;
873
}
874
const action = getActionEnvelope(n).action as { session: string; content: readonly ToolResultContent[] };
875
return action.session === sessionUri && action.content.some(c => c.type === ToolResultContentType.Subagent);
876
}, 120_000);
877
878
const parentContent = (getActionEnvelope(subagentContentNotif).action as { content: readonly ToolResultContent[] }).content;
879
const subagentRef = parentContent.find((c): c is ToolResultSubagentContent => c.type === ToolResultContentType.Subagent)!;
880
const subagentSessionUri = subagentRef.resource as unknown as string;
881
assert.ok(typeof subagentSessionUri === 'string' && isSubagentSession(subagentSessionUri),
882
`subagent session URI should be subagent-shaped, got: ${JSON.stringify(subagentSessionUri)}`);
883
884
// Subscribe so we receive the subagent session's own action broadcasts.
885
await client.call<SubscribeResult>('subscribe', { resource: subagentSessionUri });
886
887
// Wait for the parent turn to complete (with a generous timeout — the
888
// subagent's turn must finish first).
889
await client.waitForNotification(n => {
890
if (!isActionNotification(n, 'session/turnComplete')) {
891
return false;
892
}
893
return (getActionEnvelope(n).action as { session: string }).session === sessionUri;
894
}, 150_000);
895
896
approvalsActive = false;
897
await approvalLoop;
898
899
// Group all received toolCallStart actions by the session they target.
900
// This is the bug's signature: when inner tool_start arrives before
901
// subagent_started, the inner tool calls leak into the parent session.
902
const toolStarts = client.receivedNotifications(n => isActionNotification(n, 'session/toolCallStart'))
903
.map(n => getActionEnvelope(n).action as SessionToolCallStartAction);
904
905
const parentStarts = toolStarts.filter(a => (a.session as unknown as string) === sessionUri);
906
const subagentStarts = toolStarts.filter(a => (a.session as unknown as string) === subagentSessionUri);
907
908
// Parent should only carry the outer `task` tool call. Any other
909
// tool call on the parent indicates the inner-tool routing bug.
910
const parentNonTaskStarts = parentStarts.filter(a => a.toolName !== 'task');
911
assert.deepStrictEqual(
912
parentNonTaskStarts.map(a => a.toolName),
913
[],
914
`parent session should not contain inner tool calls; found: ${JSON.stringify(parentNonTaskStarts.map(a => a.toolName))}`,
915
);
916
917
// Subagent session must have at least one inner tool call. If this
918
// fails, the subagent never actually executed any work — likely the
919
// model didn't delegate as instructed.
920
assert.ok(subagentStarts.length >= 1,
921
`subagent session should contain at least one inner tool call, got ${subagentStarts.length}. ` +
922
`Parent tool calls: ${JSON.stringify(parentStarts.map(a => a.toolName))}`);
923
});
924
925
// ---- Model discovery -----------------------------------------------------
926
927
test('listModels returns well-shaped model entries after authenticate', async function () {
928
this.timeout(60_000);
929
930
await client.call('initialize', { protocolVersion: PROTOCOL_VERSION, clientId: 'real-sdk-list-models' }, 30_000);
931
932
// Subscribe to root state *before* authenticating so we can observe
933
// the agentsChanged action that carries the populated model list.
934
const rootResult = await client.call<SubscribeResult>('subscribe', { resource: ROOT_STATE_URI }, 30_000);
935
const initial = rootResult.snapshot.state as RootState;
936
const copilotAgent = initial.agents.find(a => a.provider === 'copilotcli');
937
assert.ok(copilotAgent, `Expected copilotcli agent in root state, got: ${initial.agents.map(a => a.provider).join(', ')}`);
938
939
await client.call('authenticate', { resource: 'https://api.github.com', token: resolveGitHubToken() }, 30_000);
940
941
// Models are loaded asynchronously after authenticate. Wait for the
942
// agentsChanged action that populates them.
943
const notif = await client.waitForNotification(n => {
944
if (!isActionNotification(n, 'root/agentsChanged')) {
945
return false;
946
}
947
const action = getActionEnvelope(n).action as RootAgentsChangedAction;
948
const agent = action.agents.find(a => a.provider === 'copilotcli');
949
return !!agent && agent.models.length > 0;
950
}, 30_000);
951
952
const action = getActionEnvelope(notif).action as RootAgentsChangedAction;
953
const agent = action.agents.find(a => a.provider === 'copilotcli')!;
954
955
assert.ok(agent.models.length > 0, 'Expected at least one model from listModels');
956
957
// Assert every model has the shape CopilotAgent._listModels produces.
958
// maxContextWindow is optional because synthetic SDK entries (e.g. the
959
// `auto` router) ship with `capabilities: {}` and no fixed window.
960
for (const model of agent.models) {
961
assert.strictEqual(typeof model.id, 'string', `model.id should be a string: ${JSON.stringify(model)}`);
962
assert.ok(model.id.length > 0, `model.id should be non-empty: ${JSON.stringify(model)}`);
963
assert.strictEqual(typeof model.name, 'string', `model.name should be a string: ${JSON.stringify(model)}`);
964
assert.strictEqual(model.provider, 'copilotcli', `model.provider should be copilotcli: ${JSON.stringify(model)}`);
965
assert.ok(model.maxContextWindow === undefined || (typeof model.maxContextWindow === 'number' && model.maxContextWindow > 0),
966
`model.maxContextWindow should be undefined or a positive number: ${JSON.stringify(model)}`);
967
assert.ok(model.supportsVision === undefined || typeof model.supportsVision === 'boolean', `model.supportsVision should be boolean or undefined: ${JSON.stringify(model)}`);
968
}
969
970
// The `auto` synthetic router model should be present even though it
971
// has no fixed context window.
972
assert.ok(agent.models.some(m => m.id === 'auto'), `Expected 'auto' model in list, got: ${agent.models.map(m => m.id).join(', ')}`);
973
});
974
975
// ---- Redundant cd-prefix stripping --------------------------------------
976
977
test('strips redundant `cd <workingDirectory> &&` prefix from shell tool calls', async function () {
978
this.timeout(180_000);
979
980
const tempDir = mkdtempSync(`${tmpdir()}/ahp-cd-strip-test-`);
981
tempDirs.push(tempDir);
982
const expectedWorkingDirPath = tempDir;
983
const sessionUri = await createRealSession(client, 'real-sdk-cd-strip', createdSessions, URI.file(tempDir).toString());
984
985
// Coax the model into producing a `cd <wd> && X` form. The exact text is
986
// non-deterministic, so the test asserts on rewrite behavior conditional
987
// on actually receiving a cd-prefixed command.
988
client.clearReceived();
989
dispatchTurn(client, sessionUri, 'turn-cd-strip',
990
`Run this exact shell command, do not modify it: cd ${expectedWorkingDirPath} && echo strip-me-please`,
991
1);
992
993
// Wait for the toolCallReady action that carries the rewritten toolInput.
994
const toolReadyNotif = await client.waitForNotification(n => {
995
if (!isActionNotification(n, 'session/toolCallReady')) {
996
return false;
997
}
998
const action = getActionEnvelope(n).action as { toolInput?: string };
999
return typeof action.toolInput === 'string' && action.toolInput.includes('echo strip-me-please');
1000
}, 90_000);
1001
1002
const toolReadyAction = getActionEnvelope(toolReadyNotif).action as { toolCallId: string; toolInput?: string; confirmed?: string };
1003
const toolInput = toolReadyAction.toolInput!;
1004
1005
// The core assertion: regardless of whether the model emitted the cd
1006
// prefix verbatim or already pre-stripped it, the toolInput surfaced to
1007
// the client must NOT contain the redundant `cd <tempDir> &&` prefix.
1008
// Use a regex that anchors to the start of the command and tolerates
1009
// optional surrounding quotes around the directory plus either `&&`
1010
// or `;` as the chain operator (so quoted variants like
1011
// `cd "<wd>" && …` and pwsh-style `cd <wd>; …` are both detected).
1012
const escapedWorkingDirPath = expectedWorkingDirPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1013
const redundantWorkingDirCdPrefix = new RegExp(
1014
`^\\s*cd\\s+(?:"${escapedWorkingDirPath}"|'${escapedWorkingDirPath}'|${escapedWorkingDirPath})\\s*(?:&&|;)\\s*`,
1015
);
1016
assert.ok(
1017
!redundantWorkingDirCdPrefix.test(toolInput),
1018
`toolInput should not contain a redundant cd-prefix targeting the working directory; got: ${JSON.stringify(toolInput)}`,
1019
);
1020
assert.ok(
1021
toolInput.includes('echo strip-me-please'),
1022
`toolInput should contain the rewritten command body; got: ${JSON.stringify(toolInput)}`,
1023
);
1024
1025
// Approve so the turn can complete. If it was already auto-confirmed
1026
// (`confirmed` is set), skip the manual approval.
1027
if (!toolReadyAction.confirmed) {
1028
client.notify('dispatchAction', {
1029
clientSeq: 2,
1030
action: {
1031
type: 'session/toolCallConfirmed',
1032
session: sessionUri,
1033
turnId: 'turn-cd-strip',
1034
toolCallId: toolReadyAction.toolCallId,
1035
approved: true,
1036
},
1037
});
1038
}
1039
1040
// Drive any further confirmations to completion so teardown is clean.
1041
while (true) {
1042
const next = await client.waitForNotification(
1043
n => isActionNotification(n, 'session/toolCallReady') || isActionNotification(n, 'session/turnComplete') || isActionNotification(n, 'session/error'),
1044
90_000,
1045
);
1046
if (isActionNotification(next, 'session/turnComplete') || isActionNotification(next, 'session/error')) {
1047
break;
1048
}
1049
const action = getActionEnvelope(next).action as { session: string; turnId: string; toolCallId: string; confirmed?: string };
1050
if (!action.confirmed) {
1051
client.notify('dispatchAction', {
1052
clientSeq: 3,
1053
action: {
1054
type: 'session/toolCallConfirmed',
1055
session: action.session,
1056
turnId: action.turnId,
1057
toolCallId: action.toolCallId,
1058
approved: true,
1059
},
1060
});
1061
}
1062
}
1063
});
1064
1065
// ---- write_bash skipPermission regression test --------------------------
1066
1067
test('write_bash never triggers a permission request (skipPermission flag)', async function () {
1068
this.timeout(180_000);
1069
1070
// What this test verifies:
1071
// `write_bash` (and `read_bash` / `bash_shutdown` / `list_bash`) are
1072
// registered as external tools with `skipPermission: true`, mirroring
1073
// the SDK's built-in shell helpers which never call `permissions.request`.
1074
// This regression test catches accidental removal of that flag — if it's
1075
// removed, the SDK will route write_bash through our permission flow and
1076
// the test will fail with `observedToolNames` containing 'write_bash'.
1077
//
1078
// How it works:
1079
// 1. Allow-list permits ONLY `bash` (the interactive prompt). write_bash
1080
// is intentionally absent from the allow list.
1081
// 2. The model is instructed to use `write_bash`. If any permission
1082
// request appears for write_bash, the loop records it in
1083
// `observedToolNames` and we fail the assertion.
1084
// 3. We assert that bash actually ran AND that write_bash appeared in
1085
// toolCallStart notifications (so the test is non-vacuous — the model
1086
// actually tried to use the tool, not just piped input via bash).
1087
1088
const tempDir = mkdtempSync(`${tmpdir()}/ahp-write-bash-skip-perm-`);
1089
tempDirs.push(tempDir);
1090
const sessionUri = await createRealSession(client, 'real-sdk-write-bash-skip-perm', createdSessions, URI.file(tempDir).toString());
1091
1092
const approvalLoop = startBackgroundApprovalLoop(client, {
1093
approvalSeqStart: 100,
1094
allow: [
1095
{
1096
// Setup bash command — the interactive `read` prompt.
1097
toolName: 'bash',
1098
matchInput: input => !!input && input.includes('read') && input.includes('Got:'),
1099
},
1100
// Note: write_bash is intentionally NOT in the allow list. With
1101
// skipPermission: true, the SDK won't ask us — so the test passes.
1102
// Without it, the SDK would ask, the loop would deny + record an
1103
// error, and the test would fail loudly.
1104
],
1105
});
1106
1107
dispatchTurn(client, sessionUri, 'turn-write-bash-skip-perm',
1108
'You MUST demonstrate the `write_bash` tool. Steps, in order:\n' +
1109
'1. Use the `bash` tool to run exactly: read -p "Enter: " v; echo "Got: $v"\n' +
1110
' This will block waiting for stdin.\n' +
1111
'2. While that bash call is waiting, you MUST use the `write_bash` tool to send the input "hello\\n" to it.\n' +
1112
' Do NOT pipe the input via the original bash command. Do NOT use `echo hello | ...`.\n' +
1113
' You MUST go through the `write_bash` tool — that is the entire point of this task.\n' +
1114
'3. After the shell prints "Got: hello", reply with the single word "done".',
1115
1);
1116
1117
await client.waitForNotification(
1118
n => isActionNotification(n, 'session/turnComplete') || isActionNotification(n, 'session/error'),
1119
150_000,
1120
);
1121
await approvalLoop.stop();
1122
1123
// Sanity check: the bash setup command actually ran. Otherwise the
1124
// model ignored the prompt and the write_bash assertion below is vacuous.
1125
assert.ok(approvalLoop.approvedToolNames.has('bash'),
1126
`expected the model to invoke bash for setup; observed approved tools: ${[...approvalLoop.approvedToolNames].join(', ') || '<none>'}`);
1127
1128
// Non-vacuousness check: write_bash must have actually been invoked
1129
// (seen in a toolCallStart notification). If the model piped input via
1130
// the original bash command instead of using write_bash, this fails.
1131
const writeBashStarts = client.receivedNotifications(n => isActionNotification(n, 'session/toolCallStart'))
1132
.map(n => getActionEnvelope(n).action as { toolName?: string })
1133
.filter(a => a.toolName === 'write_bash');
1134
assert.ok(writeBashStarts.length > 0,
1135
`expected write_bash to be invoked at least once (toolCallStart), but it was never called. The model may have piped input via the original bash command instead.`);
1136
1137
// The actual regression check: write_bash must never reach our
1138
// permission handler. If this fails, `skipPermission: true` was likely
1139
// removed from copilotShellTools.ts.
1140
assert.ok(!approvalLoop.observedToolNames.has('write_bash'),
1141
`write_bash should be auto-approved by the SDK (skipPermission: true) and never trigger a permission request, but the test observed one. Observed permission requests: ${[...approvalLoop.observedToolNames].join(', ')}`);
1142
1143
// Any other unexpected permission requests (e.g. an unrelated tool the
1144
// model decided to use) would also have been recorded as errors.
1145
assert.deepStrictEqual(approvalLoop.errors, [],
1146
`unexpected approval-loop errors: ${approvalLoop.errors.join('; ')}`);
1147
});
1148
});
1149
1150