Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/agent/core/runner_test.go
3434 views
1
package core_test
2
3
import (
4
"context"
5
"errors"
6
"fmt"
7
"github.com/kardolus/chatgpt-cli/agent/core"
8
"github.com/kardolus/chatgpt-cli/agent/tools"
9
"github.com/kardolus/chatgpt-cli/agent/types"
10
"github.com/sclevine/spec/report"
11
"testing"
12
"time"
13
14
"github.com/golang/mock/gomock"
15
. "github.com/onsi/gomega"
16
"github.com/sclevine/spec"
17
)
18
19
//go:generate mockgen -destination=clockmocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/core Clock
20
//go:generate mockgen -destination=shellmocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/tools Shell
21
//go:generate mockgen -destination=llmmocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/tools LLM
22
//go:generate mockgen -destination=budgetmocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/core Budget
23
//go:generate mockgen -destination=filemocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/tools Files
24
//go:generate mockgen -destination=policymocks_test.go -package=core_test github.com/kardolus/chatgpt-cli/agent/core Policy
25
26
func TestUnitRunner(t *testing.T) {
27
spec.Run(t, "Testing the runner", testRunner, spec.Report(report.Terminal{}))
28
}
29
30
func testRunner(t *testing.T, when spec.G, it spec.S) {
31
var (
32
mockCtrl *gomock.Controller
33
mockClock *MockClock
34
mockShell *MockShell
35
mockLLM *MockLLM
36
mockFiles *MockFiles
37
mockBudget *MockBudget
38
mockPolicy *MockPolicy
39
40
agentTools core.Tools
41
subject *core.DefaultRunner
42
)
43
44
it.Before(func() {
45
RegisterTestingT(t)
46
47
mockCtrl = gomock.NewController(t)
48
mockClock = NewMockClock(mockCtrl)
49
mockShell = NewMockShell(mockCtrl)
50
mockLLM = NewMockLLM(mockCtrl)
51
mockFiles = NewMockFiles(mockCtrl)
52
mockBudget = NewMockBudget(mockCtrl)
53
mockPolicy = NewMockPolicy(mockCtrl)
54
55
agentTools = core.Tools{
56
Shell: mockShell,
57
LLM: mockLLM,
58
Files: mockFiles,
59
}
60
61
subject = core.NewDefaultRunner(agentTools, mockClock, mockBudget, mockPolicy)
62
})
63
64
it.After(func() {
65
mockCtrl.Finish()
66
})
67
68
when("RunStep()", func() {
69
it("returns dry-run result and does not invoke agentTools", func() {
70
dur := expectDuration(mockClock, 123*time.Millisecond)
71
72
cfg := types.Config{DryRun: true, WorkDir: "/tmp"}
73
step := types.Step{
74
Type: types.ToolShell,
75
Command: "echo",
76
Args: []string{"hi"},
77
}
78
79
// Budget: count attempted step, but no tool call.
80
expectAllowStep(mockBudget, step)
81
expectAllowPolicy(mockPolicy, cfg, step)
82
mockBudget.EXPECT().AllowTool(gomock.Any(), gomock.Any()).Times(0)
83
84
res, err := subject.RunStep(context.Background(), cfg, step)
85
Expect(err).NotTo(HaveOccurred())
86
87
Expect(res.Outcome).To(Equal(types.OutcomeDryRun))
88
Expect(res.Duration).To(Equal(dur))
89
Expect(res.Step).To(Equal(step))
90
Expect(res.Transcript).To(ContainSubstring("[dry-run][shell]"))
91
Expect(res.Transcript).To(ContainSubstring(`workdir="/tmp"`))
92
Expect(res.Transcript).To(ContainSubstring(`cmd="echo"`))
93
Expect(res.Exec).To(BeNil())
94
95
expectNoEffects(res)
96
})
97
98
it("runs shell command and returns ok outcome when exit code is 0", func() {
99
dur := expectDuration(mockClock, 123*time.Millisecond)
100
101
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
102
step := types.Step{
103
Type: types.ToolShell,
104
Command: "echo",
105
Args: []string{"hi"},
106
}
107
108
expectAllowStep(mockBudget, step)
109
expectAllowPolicy(mockPolicy, cfg, step)
110
expectAllowTool(mockBudget, types.ToolShell)
111
112
exec := types.Result{ExitCode: 0, Stdout: "hi\n"}
113
mockShell.
114
EXPECT().
115
Run(gomock.Any(), cfg.WorkDir, "echo", "hi").
116
Return(exec, nil).
117
Times(1)
118
119
res, err := subject.RunStep(context.Background(), cfg, step)
120
Expect(err).NotTo(HaveOccurred())
121
122
Expect(res.Step).To(Equal(step))
123
Expect(res.Outcome).To(Equal(types.OutcomeOK))
124
Expect(res.Duration).To(Equal(dur))
125
Expect(res.Exec).NotTo(BeNil())
126
Expect(*res.Exec).To(Equal(exec))
127
128
expectShellTranscript(res, cfg, step, exec)
129
130
expectOneEffect(res, "shell.exec", "", 0)
131
Expect(res.Effects[0].Meta).NotTo(BeNil())
132
Expect(res.Effects[0].Meta["exitCode"]).To(Equal(exec.ExitCode))
133
})
134
135
it("returns error outcome when shell exits non-zero", func() {
136
dur := expectDuration(mockClock, 10*time.Millisecond)
137
138
cfg := types.Config{DryRun: false, WorkDir: "/repo"}
139
step := types.Step{
140
Type: types.ToolShell,
141
Command: "git",
142
Args: []string{"status", "--porcelain"},
143
}
144
145
expectAllowStep(mockBudget, step)
146
expectAllowPolicy(mockPolicy, cfg, step)
147
expectAllowTool(mockBudget, types.ToolShell)
148
149
exec := types.Result{ExitCode: 17, Stdout: " M file.go\n"}
150
mockShell.
151
EXPECT().
152
Run(gomock.Any(), cfg.WorkDir, "git", "status", "--porcelain").
153
Return(exec, nil).
154
Times(1)
155
156
res, err := subject.RunStep(context.Background(), cfg, step)
157
Expect(err).NotTo(HaveOccurred())
158
159
Expect(res.Outcome).To(Equal(types.OutcomeError))
160
Expect(res.Duration).To(Equal(dur))
161
Expect(res.Exec).NotTo(BeNil())
162
Expect(*res.Exec).To(Equal(exec))
163
164
expectShellTranscript(res, cfg, step, exec)
165
166
expectOneEffect(res, "shell.exec", "", 0)
167
Expect(res.Effects[0].Meta).NotTo(BeNil())
168
Expect(res.Effects[0].Meta["exitCode"]).To(Equal(exec.ExitCode))
169
})
170
171
it("returns OutcomeError (no error) when shell runner errors, and surfaces error in Output", func() {
172
dur := expectDuration(mockClock, 5*time.Millisecond)
173
174
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
175
step := types.Step{
176
Type: types.ToolShell,
177
Command: "go",
178
Args: []string{"test", "./..."},
179
}
180
181
expectAllowStep(mockBudget, step)
182
expectAllowPolicy(mockPolicy, cfg, step)
183
expectAllowTool(mockBudget, types.ToolShell)
184
185
runErr := errors.New("shell boom")
186
mockShell.
187
EXPECT().
188
Run(gomock.Any(), cfg.WorkDir, "go", "test", "./...").
189
Return(types.Result{}, runErr).
190
Times(1)
191
192
res, err := subject.RunStep(context.Background(), cfg, step)
193
Expect(err).NotTo(HaveOccurred())
194
195
Expect(res.Step).To(Equal(step))
196
Expect(res.Outcome).To(Equal(types.OutcomeError))
197
Expect(res.Duration).To(Equal(dur))
198
Expect(res.Exec).To(BeNil())
199
Expect(res.Output).To(ContainSubstring(runErr.Error()))
200
201
Expect(res.Transcript).To(ContainSubstring("[shell:start]"))
202
Expect(res.Transcript).To(ContainSubstring(`workdir="/tmp"`))
203
Expect(res.Transcript).To(ContainSubstring(`cmd="go"`))
204
205
expectNoEffects(res)
206
})
207
208
it("returns OutcomeError (no error) when file write is missing Data and does not invoke WriteFile", func() {
209
dur := expectDuration(mockClock, 5*time.Millisecond)
210
211
cfg := types.Config{DryRun: false}
212
step := types.Step{
213
Type: types.ToolFiles,
214
Op: "write",
215
Path: "/tmp/out.txt",
216
Data: "",
217
}
218
219
expectAllowStep(mockBudget, step)
220
expectAllowPolicy(mockPolicy, cfg, step)
221
222
// Guard: tool budget is still charged before op switch (current behavior)
223
expectAllowTool(mockBudget, types.ToolFiles)
224
225
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
226
227
res, err := subject.RunStep(context.Background(), cfg, step)
228
Expect(err).NotTo(HaveOccurred())
229
230
Expect(res.Outcome).To(Equal(types.OutcomeError))
231
Expect(res.Output).To(ContainSubstring("file write requires Data"))
232
Expect(res.Duration).To(Equal(dur))
233
234
expectNoEffects(res)
235
})
236
237
it("handles shell command with no args (variadic call)", func() {
238
dur := expectDuration(mockClock, 1*time.Millisecond)
239
240
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
241
step := types.Step{
242
Type: types.ToolShell,
243
Command: "pwd",
244
Args: nil,
245
}
246
247
expectAllowStep(mockBudget, step)
248
expectAllowPolicy(mockPolicy, cfg, step)
249
expectAllowTool(mockBudget, types.ToolShell)
250
251
exec := types.Result{ExitCode: 0, Stdout: "/tmp\n"}
252
mockShell.
253
EXPECT().
254
Run(gomock.Any(), cfg.WorkDir, "pwd").
255
Return(exec, nil).
256
Times(1)
257
258
res, err := subject.RunStep(context.Background(), cfg, step)
259
Expect(err).NotTo(HaveOccurred())
260
261
Expect(res.Outcome).To(Equal(types.OutcomeOK))
262
Expect(res.Duration).To(Equal(dur))
263
Expect(res.Exec).NotTo(BeNil())
264
Expect(*res.Exec).To(Equal(exec))
265
})
266
267
it("returns OutcomeError (no error) when llm prompt is missing/blank and does not invoke llm tool", func() {
268
dur := expectDuration(mockClock, 123*time.Millisecond)
269
270
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
271
step := types.Step{
272
Type: types.ToolLLM,
273
Prompt: " \n\t",
274
}
275
276
expectAllowStep(mockBudget, step)
277
expectAllowPolicy(mockPolicy, cfg, step)
278
279
// Guard: tool not called / budget tool not charged
280
mockBudget.EXPECT().AllowTool(types.ToolLLM, gomock.Any()).Times(0)
281
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
282
283
res, err := subject.RunStep(context.Background(), cfg, step)
284
Expect(err).NotTo(HaveOccurred())
285
286
Expect(res.Step).To(Equal(step))
287
Expect(res.Outcome).To(Equal(types.OutcomeError))
288
Expect(res.Duration).To(Equal(dur))
289
Expect(res.Output).To(ContainSubstring("llm step requires Prompt"))
290
Expect(res.Exec).To(BeNil())
291
292
expectLLMStartTranscript(res, step)
293
})
294
295
it("runs llm completion and returns ok outcome + output + transcript", func() {
296
dur := expectDuration(mockClock, 123*time.Millisecond)
297
298
cfg := types.Config{DryRun: false}
299
step := types.Step{
300
Type: types.ToolLLM,
301
Prompt: "say hi",
302
}
303
304
expectAllowStep(mockBudget, step)
305
expectAllowPolicy(mockPolicy, cfg, step)
306
307
// NEW: token preflight
308
expectLLMSnapshotOK(mockBudget)
309
310
expectAllowTool(mockBudget, types.ToolLLM)
311
312
mockLLM.
313
EXPECT().
314
Complete(gomock.Any(), step.Prompt).
315
Return("hi there", 12, nil).
316
Times(1)
317
318
// NEW: record token usage
319
mockBudget.EXPECT().
320
ChargeLLMTokens(12, gomock.Any()).
321
Times(1)
322
323
res, err := subject.RunStep(context.Background(), cfg, step)
324
Expect(err).NotTo(HaveOccurred())
325
326
Expect(res.Step).To(Equal(step))
327
Expect(res.Outcome).To(Equal(types.OutcomeOK))
328
Expect(res.Duration).To(Equal(dur))
329
Expect(res.Output).To(Equal("hi there"))
330
Expect(res.Exec).To(BeNil())
331
332
expectLLMOKTranscript(res, step, "hi there")
333
})
334
335
it("returns OutcomeError (no error) when llm tool errors and surfaces error in Output", func() {
336
dur := expectDuration(mockClock, 10*time.Millisecond)
337
338
cfg := types.Config{DryRun: false}
339
step := types.Step{
340
Type: types.ToolLLM,
341
Prompt: "do the thing",
342
}
343
344
expectAllowStep(mockBudget, step)
345
expectAllowPolicy(mockPolicy, cfg, step)
346
347
// token preflight
348
expectLLMSnapshotOK(mockBudget)
349
expectAllowTool(mockBudget, types.ToolLLM)
350
351
runErr := errors.New("llm boom")
352
mockLLM.
353
EXPECT().
354
Complete(gomock.Any(), step.Prompt).
355
Return("", 0, runErr).
356
Times(1)
357
358
// Typically don't charge tokens on error
359
mockBudget.EXPECT().
360
ChargeLLMTokens(gomock.Any(), gomock.Any()).
361
Times(0)
362
363
res, err := subject.RunStep(context.Background(), cfg, step)
364
Expect(err).NotTo(HaveOccurred())
365
366
Expect(res.Step).To(Equal(step))
367
Expect(res.Outcome).To(Equal(types.OutcomeError))
368
Expect(res.Duration).To(Equal(dur))
369
Expect(res.Output).To(ContainSubstring(runErr.Error()))
370
Expect(res.Exec).To(BeNil())
371
372
expectLLMStartTranscript(res, step)
373
})
374
375
it("returns error StepResult when llm tool budget is denied and does not invoke llm tool", func() {
376
dur := expectDuration(mockClock, 5*time.Millisecond)
377
378
cfg := types.Config{DryRun: false}
379
step := types.Step{
380
Type: types.ToolLLM,
381
Prompt: "say hi",
382
}
383
384
expectAllowStep(mockBudget, step)
385
expectAllowPolicy(mockPolicy, cfg, step)
386
387
// token preflight must happen before AllowTool
388
expectLLMSnapshotOK(mockBudget)
389
390
toolErr := errors.New("tool budget denied")
391
mockBudget.EXPECT().AllowTool(types.ToolLLM, gomock.Any()).Return(toolErr).Times(1)
392
393
// Guard: LLM must not run
394
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
395
396
// And no token charging
397
mockBudget.EXPECT().ChargeLLMTokens(gomock.Any(), gomock.Any()).Times(0)
398
399
res, err := subject.RunStep(context.Background(), cfg, step)
400
Expect(err).To(MatchError(toolErr))
401
402
Expect(res.Step).To(Equal(step))
403
Expect(res.Outcome).To(Equal(types.OutcomeError))
404
Expect(res.Duration).To(Equal(dur))
405
406
Expect(res.Transcript).To(ContainSubstring("[llm:start]"))
407
Expect(res.Transcript).To(ContainSubstring("[budget]"))
408
Expect(res.Transcript).To(ContainSubstring(toolErr.Error()))
409
})
410
411
it("returns OutcomeError (no error) when file path is missing/blank and does not invoke file tool", func() {
412
dur := expectDuration(mockClock, 5*time.Millisecond)
413
414
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
415
step := types.Step{
416
Type: types.ToolFiles,
417
Op: "read",
418
Path: " ",
419
}
420
421
expectAllowStep(mockBudget, step)
422
expectAllowPolicy(mockPolicy, cfg, step)
423
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Times(0)
424
425
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
426
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
427
428
res, err := subject.RunStep(context.Background(), cfg, step)
429
Expect(err).NotTo(HaveOccurred())
430
431
Expect(res.Step).To(Equal(step))
432
Expect(res.Outcome).To(Equal(types.OutcomeError))
433
Expect(res.Duration).To(Equal(dur))
434
Expect(res.Exec).To(BeNil())
435
Expect(res.Output).To(ContainSubstring("file step requires Path"))
436
437
expectFileStartTranscript(res, step)
438
})
439
440
it("returns OutcomeError (no error) for unsupported file op and does not invoke file tool", func() {
441
dur := expectDuration(mockClock, 5*time.Millisecond)
442
443
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
444
step := types.Step{
445
Type: types.ToolFiles,
446
Op: "delete",
447
Path: "/tmp/a.txt",
448
}
449
450
expectAllowStep(mockBudget, step)
451
expectAllowPolicy(mockPolicy, cfg, step)
452
453
// NOTE: current runner charges tool budget before op switch.
454
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Times(1)
455
456
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
457
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
458
459
res, err := subject.RunStep(context.Background(), cfg, step)
460
Expect(err).NotTo(HaveOccurred())
461
462
Expect(res.Step).To(Equal(step))
463
Expect(res.Outcome).To(Equal(types.OutcomeError))
464
Expect(res.Duration).To(Equal(dur))
465
Expect(res.Exec).To(BeNil())
466
Expect(res.Output).To(ContainSubstring("unsupported file op"))
467
468
expectFileStartTranscript(res, step)
469
})
470
471
it("reads file and returns ok outcome + Output", func() {
472
dur := expectDuration(mockClock, 123*time.Millisecond)
473
474
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
475
step := types.Step{
476
Type: types.ToolFiles,
477
Op: "read",
478
Path: "/tmp/a.txt",
479
}
480
481
expectAllowStep(mockBudget, step)
482
expectAllowPolicy(mockPolicy, cfg, step)
483
expectAllowTool(mockBudget, types.ToolFiles)
484
485
mockFiles.
486
EXPECT().
487
ReadFile(step.Path).
488
Return([]byte("hello\n"), nil).
489
Times(1)
490
491
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
492
493
res, err := subject.RunStep(context.Background(), cfg, step)
494
Expect(err).NotTo(HaveOccurred())
495
496
Expect(res.Step).To(Equal(step))
497
Expect(res.Outcome).To(Equal(types.OutcomeOK))
498
Expect(res.Duration).To(Equal(dur))
499
Expect(res.Exec).To(BeNil())
500
Expect(res.Output).To(Equal("hello\n"))
501
502
expectFileReadTranscript(res, step.Path, "hello\n")
503
expectNoEffects(res)
504
})
505
506
it("returns OutcomeError (no error) when read errors, and surfaces error in Output (with start transcript)", func() {
507
dur := expectDuration(mockClock, 10*time.Millisecond)
508
509
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
510
step := types.Step{
511
Type: types.ToolFiles,
512
Op: "read",
513
Path: "/tmp/missing.txt",
514
}
515
516
expectAllowStep(mockBudget, step)
517
expectAllowPolicy(mockPolicy, cfg, step)
518
expectAllowTool(mockBudget, types.ToolFiles)
519
520
readErr := errors.New("read boom")
521
mockFiles.
522
EXPECT().
523
ReadFile(step.Path).
524
Return(nil, readErr).
525
Times(1)
526
527
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
528
529
res, err := subject.RunStep(context.Background(), cfg, step)
530
Expect(err).NotTo(HaveOccurred())
531
532
Expect(res.Step).To(Equal(step))
533
Expect(res.Outcome).To(Equal(types.OutcomeError))
534
Expect(res.Duration).To(Equal(dur))
535
Expect(res.Exec).To(BeNil())
536
Expect(res.Output).To(ContainSubstring(readErr.Error()))
537
538
expectFileStartTranscript(res, step)
539
})
540
541
it("writes file and returns ok outcome", func() {
542
dur := expectDuration(mockClock, 50*time.Millisecond)
543
544
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
545
step := types.Step{
546
Type: types.ToolFiles,
547
Op: "write",
548
Path: "/tmp/out.txt",
549
Data: "payload",
550
}
551
552
expectAllowStep(mockBudget, step)
553
expectAllowPolicy(mockPolicy, cfg, step)
554
expectAllowTool(mockBudget, types.ToolFiles)
555
556
mockFiles.
557
EXPECT().
558
WriteFile(step.Path, []byte(step.Data)).
559
Return(nil).
560
Times(1)
561
562
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
563
564
res, err := subject.RunStep(context.Background(), cfg, step)
565
Expect(err).NotTo(HaveOccurred())
566
567
Expect(res.Step).To(Equal(step))
568
Expect(res.Outcome).To(Equal(types.OutcomeOK))
569
Expect(res.Duration).To(Equal(dur))
570
Expect(res.Exec).To(BeNil())
571
Expect(res.Output).To(ContainSubstring("/tmp/out.txt"))
572
573
expectFileWriteTranscript(res, step.Path, len(step.Data))
574
expectOneEffect(res, "file.write", step.Path, len(step.Data))
575
})
576
577
it("returns OutcomeError (no error) when write errors, and surfaces error in Output (with start transcript)", func() {
578
dur := expectDuration(mockClock, 50*time.Millisecond)
579
580
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
581
step := types.Step{
582
Type: types.ToolFiles,
583
Op: "write",
584
Path: "/tmp/out.txt",
585
Data: "payload",
586
}
587
588
expectAllowStep(mockBudget, step)
589
expectAllowPolicy(mockPolicy, cfg, step)
590
expectAllowTool(mockBudget, types.ToolFiles)
591
592
writeErr := errors.New("write boom")
593
mockFiles.
594
EXPECT().
595
WriteFile(step.Path, []byte(step.Data)).
596
Return(writeErr).
597
Times(1)
598
599
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
600
601
res, err := subject.RunStep(context.Background(), cfg, step)
602
Expect(err).NotTo(HaveOccurred())
603
604
Expect(res.Step).To(Equal(step))
605
Expect(res.Outcome).To(Equal(types.OutcomeError))
606
Expect(res.Duration).To(Equal(dur))
607
Expect(res.Exec).To(BeNil())
608
Expect(res.Output).To(ContainSubstring(writeErr.Error()))
609
610
expectFileStartTranscript(res, step)
611
})
612
613
it("treats file op case/whitespace-insensitively (READ)", func() {
614
dur := expectDuration(mockClock, 20*time.Millisecond)
615
616
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
617
step := types.Step{
618
Type: types.ToolFiles,
619
Op: " ReAd ",
620
Path: "/tmp/a.txt",
621
}
622
623
expectAllowStep(mockBudget, step)
624
expectAllowPolicy(mockPolicy, cfg, step)
625
expectAllowTool(mockBudget, types.ToolFiles)
626
627
mockFiles.
628
EXPECT().
629
ReadFile(step.Path).
630
Return([]byte("ok"), nil).
631
Times(1)
632
633
res, err := subject.RunStep(context.Background(), cfg, step)
634
Expect(err).NotTo(HaveOccurred())
635
636
Expect(res.Outcome).To(Equal(types.OutcomeOK))
637
Expect(res.Duration).To(Equal(dur))
638
Expect(res.Output).To(Equal("ok"))
639
})
640
641
it("returns OutcomeError (no error) when step type is unsupported (with transcript)", func() {
642
dur := expectDuration(mockClock, 7*time.Millisecond)
643
644
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
645
step := types.Step{
646
Type: types.ToolKind("wat"),
647
Description: "unknown step",
648
}
649
650
expectAllowStep(mockBudget, step)
651
expectAllowPolicy(mockPolicy, cfg, step)
652
// Guard: no tool should be charged
653
mockBudget.EXPECT().AllowTool(gomock.Any(), gomock.Any()).Times(0)
654
655
res, err := subject.RunStep(context.Background(), cfg, step)
656
Expect(err).NotTo(HaveOccurred())
657
658
Expect(res.Step).To(Equal(step))
659
Expect(res.Outcome).To(Equal(types.OutcomeError))
660
Expect(res.Duration).To(Equal(dur))
661
Expect(res.Exec).To(BeNil())
662
Expect(res.Output).To(ContainSubstring("unsupported step type: wat"))
663
Expect(res.Transcript).To(ContainSubstring(`[unsupported]`))
664
Expect(res.Transcript).To(ContainSubstring(`step_type="wat"`))
665
})
666
667
it("returns error StepResult when step budget is denied (applies to dry-run too) and does not invoke agentTools", func() {
668
dur := expectDuration(mockClock, 5*time.Millisecond)
669
670
cfg := types.Config{DryRun: true, WorkDir: "/tmp"}
671
step := types.Step{
672
Type: types.ToolShell,
673
Command: "echo",
674
Args: []string{"hi"},
675
}
676
677
stepErr := errors.New("step budget denied")
678
mockBudget.EXPECT().AllowStep(step, gomock.Any()).Return(stepErr).Times(1)
679
680
// Guard: no tool budget charge and no tool execution
681
mockBudget.EXPECT().AllowTool(gomock.Any(), gomock.Any()).Times(0)
682
mockShell.EXPECT().Run(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
683
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
684
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
685
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
686
687
res, err := subject.RunStep(context.Background(), cfg, step)
688
Expect(err).To(MatchError(stepErr))
689
690
Expect(res.Step).To(Equal(step))
691
Expect(res.Outcome).To(Equal(types.OutcomeError))
692
Expect(res.Duration).To(Equal(dur))
693
694
// Budget error is appended on top of the dry-run transcript
695
Expect(res.Transcript).To(ContainSubstring("[dry-run][shell]"))
696
Expect(res.Transcript).To(ContainSubstring("[budget]"))
697
Expect(res.Transcript).To(ContainSubstring(stepErr.Error()))
698
})
699
700
it("returns error StepResult when shell tool budget is denied and does not invoke shell tool", func() {
701
dur := expectDuration(mockClock, 5*time.Millisecond)
702
703
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
704
step := types.Step{
705
Type: types.ToolShell,
706
Command: "echo",
707
Args: []string{"hi"},
708
}
709
710
expectAllowStep(mockBudget, step)
711
expectAllowPolicy(mockPolicy, cfg, step)
712
713
toolErr := errors.New("tool budget denied")
714
mockBudget.EXPECT().AllowTool(types.ToolShell, gomock.Any()).Return(toolErr).Times(1)
715
716
// Guard: shell must not run
717
mockShell.EXPECT().Run(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
718
719
res, err := subject.RunStep(context.Background(), cfg, step)
720
Expect(err).To(MatchError(toolErr))
721
722
Expect(res.Step).To(Equal(step))
723
Expect(res.Outcome).To(Equal(types.OutcomeError))
724
Expect(res.Duration).To(Equal(dur))
725
726
Expect(res.Transcript).To(ContainSubstring("[shell:start]"))
727
Expect(res.Transcript).To(ContainSubstring("[budget]"))
728
Expect(res.Transcript).To(ContainSubstring(toolErr.Error()))
729
})
730
731
it("returns error StepResult when llm tool budget is denied and does not invoke llm tool", func() {
732
dur := expectDuration(mockClock, 5*time.Millisecond)
733
734
cfg := types.Config{DryRun: false}
735
step := types.Step{
736
Type: types.ToolLLM,
737
Prompt: "say hi",
738
}
739
740
expectAllowStep(mockBudget, step)
741
expectAllowPolicy(mockPolicy, cfg, step)
742
743
mockBudget.
744
EXPECT().
745
Snapshot(gomock.Any()).
746
Return(core.BudgetSnapshot{
747
Limits: core.BudgetLimits{MaxLLMTokens: 0},
748
LLMTokensUsed: 0,
749
}).
750
Times(1)
751
752
toolErr := errors.New("tool budget denied")
753
mockBudget.EXPECT().AllowTool(types.ToolLLM, gomock.Any()).Return(toolErr).Times(1)
754
755
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
756
mockBudget.EXPECT().ChargeLLMTokens(gomock.Any(), gomock.Any()).Times(0)
757
758
res, err := subject.RunStep(context.Background(), cfg, step)
759
Expect(err).To(MatchError(toolErr))
760
761
Expect(res.Step).To(Equal(step))
762
Expect(res.Outcome).To(Equal(types.OutcomeError))
763
Expect(res.Duration).To(Equal(dur))
764
Expect(res.Transcript).To(ContainSubstring("[llm:start]"))
765
Expect(res.Transcript).To(ContainSubstring("[budget]"))
766
Expect(res.Transcript).To(ContainSubstring(toolErr.Error()))
767
})
768
769
it("returns error StepResult when llm token budget preflight fails and does not invoke llm tool or charge tool budget", func() {
770
dur := expectDuration(mockClock, 5*time.Millisecond)
771
772
cfg := types.Config{DryRun: false}
773
step := types.Step{
774
Type: types.ToolLLM,
775
Prompt: "say hi",
776
}
777
778
expectAllowStep(mockBudget, step)
779
expectAllowPolicy(mockPolicy, cfg, step)
780
781
// Preflight says we're already out of tokens
782
mockBudget.
783
EXPECT().
784
Snapshot(gomock.Any()).
785
Return(core.BudgetSnapshot{
786
Limits: core.BudgetLimits{MaxLLMTokens: 100},
787
LLMTokensUsed: 100,
788
}).
789
Times(1)
790
791
// Guard: should bail before tool budget is charged
792
mockBudget.EXPECT().AllowTool(types.ToolLLM, gomock.Any()).Times(0)
793
794
// Guard: LLM must not run
795
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
796
797
// Guard: no token charging
798
mockBudget.EXPECT().ChargeLLMTokens(gomock.Any(), gomock.Any()).Times(0)
799
800
res, err := subject.RunStep(context.Background(), cfg, step)
801
Expect(err).To(HaveOccurred())
802
Expect(err.Error()).To(ContainSubstring("llm token budget exceeded"))
803
804
Expect(res.Step).To(Equal(step))
805
Expect(res.Outcome).To(Equal(types.OutcomeError))
806
Expect(res.Duration).To(Equal(dur))
807
Expect(res.Transcript).To(ContainSubstring("[llm:start]"))
808
Expect(res.Transcript).To(ContainSubstring("[budget]"))
809
Expect(res.Transcript).To(ContainSubstring("token"))
810
})
811
812
it("returns OutcomeError when policy denies a dry-run step (no agentTools invoked)", func() {
813
dur := expectDuration(mockClock, 5*time.Millisecond)
814
cfg := types.Config{DryRun: true, WorkDir: "/tmp"}
815
step := types.Step{Type: types.ToolShell, Command: "echo", Args: []string{"hi"}}
816
817
expectAllowStep(mockBudget, step)
818
819
polErr := errors.New("policy denied")
820
mockPolicy.EXPECT().AllowStep(cfg, step).Return(polErr).Times(1)
821
822
mockBudget.EXPECT().AllowTool(gomock.Any(), gomock.Any()).Times(0)
823
mockShell.EXPECT().Run(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
824
825
res, err := subject.RunStep(context.Background(), cfg, step)
826
Expect(err).To(MatchError(polErr))
827
Expect(res.Outcome).To(Equal(types.OutcomeError))
828
Expect(res.Duration).To(Equal(dur))
829
Expect(res.Transcript).To(ContainSubstring("[dry-run][shell]"))
830
Expect(res.Transcript).To(ContainSubstring("[policy]"))
831
Expect(res.Transcript).To(ContainSubstring(polErr.Error()))
832
})
833
834
it("returns OutcomeError when policy denies a shell step and does not charge tool budget or run shell", func() {
835
dur := expectDuration(mockClock, 5*time.Millisecond)
836
cfg := types.Config{DryRun: false, WorkDir: "/tmp"}
837
step := types.Step{Type: types.ToolShell, Command: "echo", Args: []string{"hi"}}
838
839
expectAllowStep(mockBudget, step)
840
841
polErr := errors.New("policy denied")
842
mockPolicy.EXPECT().AllowStep(cfg, step).Return(polErr).Times(1)
843
844
mockBudget.EXPECT().AllowTool(types.ToolShell, gomock.Any()).Times(0)
845
mockShell.EXPECT().Run(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
846
847
res, err := subject.RunStep(context.Background(), cfg, step)
848
Expect(err).To(MatchError(polErr))
849
Expect(res.Outcome).To(Equal(types.OutcomeError))
850
Expect(res.Duration).To(Equal(dur))
851
Expect(res.Transcript).To(ContainSubstring("[shell:start]"))
852
Expect(res.Transcript).To(ContainSubstring("[policy]"))
853
})
854
855
it("policy denial short-circuits llm: no token snapshot, no tool budget, no llm call", func() {
856
dur := expectDuration(mockClock, 5*time.Millisecond)
857
cfg := types.Config{DryRun: false}
858
step := types.Step{Type: types.ToolLLM, Prompt: "say hi"}
859
860
expectAllowStep(mockBudget, step)
861
862
polErr := errors.New("policy denied")
863
mockPolicy.EXPECT().AllowStep(cfg, step).Return(polErr).Times(1)
864
865
mockBudget.EXPECT().Snapshot(gomock.Any()).Times(0)
866
mockBudget.EXPECT().AllowTool(types.ToolLLM, gomock.Any()).Times(0)
867
mockLLM.EXPECT().Complete(gomock.Any(), gomock.Any()).Times(0)
868
869
res, err := subject.RunStep(context.Background(), cfg, step)
870
Expect(err).To(MatchError(polErr))
871
Expect(res.Outcome).To(Equal(types.OutcomeError))
872
Expect(res.Duration).To(Equal(dur))
873
Expect(res.Transcript).To(ContainSubstring("[llm:start]"))
874
Expect(res.Transcript).To(ContainSubstring("[policy]"))
875
})
876
877
it("returns OutcomeError when policy denies a file step and does not charge tool budget or touch filesystem", func() {
878
dur := expectDuration(mockClock, 5*time.Millisecond)
879
cfg := types.Config{DryRun: false}
880
step := types.Step{Type: types.ToolFiles, Op: "read", Path: "/tmp/a.txt"}
881
882
expectAllowStep(mockBudget, step)
883
884
polErr := errors.New("policy denied")
885
mockPolicy.EXPECT().AllowStep(cfg, step).Return(polErr).Times(1)
886
887
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Times(0)
888
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
889
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
890
891
res, err := subject.RunStep(context.Background(), cfg, step)
892
Expect(err).To(MatchError(polErr))
893
Expect(res.Outcome).To(Equal(types.OutcomeError))
894
Expect(res.Duration).To(Equal(dur))
895
Expect(res.Transcript).To(ContainSubstring("[file:start]"))
896
Expect(res.Transcript).To(ContainSubstring("[policy]"))
897
})
898
899
it("patches file and returns ok outcome (calls PatchFile)", func() {
900
dur := expectDuration(mockClock, 50*time.Millisecond)
901
902
cfg := types.Config{DryRun: false}
903
step := types.Step{
904
Type: types.ToolFiles,
905
Op: "patch",
906
Path: "/tmp/a.txt",
907
Data: "@@ -1,1 +1,1 @@\n-a\n+b\n",
908
}
909
910
expectAllowStep(mockBudget, step)
911
expectAllowPolicy(mockPolicy, cfg, step)
912
expectAllowTool(mockBudget, types.ToolFiles)
913
914
mockFiles.
915
EXPECT().
916
PatchFile(step.Path, []byte(step.Data)).
917
Return(tools.PatchResult{Hunks: 2}, nil).
918
Times(1)
919
920
// Guard: no other ops
921
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
922
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
923
mockFiles.EXPECT().ReplaceBytesInFile(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
924
925
res, err := subject.RunStep(context.Background(), cfg, step)
926
Expect(err).NotTo(HaveOccurred())
927
928
Expect(res.Step).To(Equal(step))
929
Expect(res.Outcome).To(Equal(types.OutcomeOK))
930
Expect(res.Duration).To(Equal(dur))
931
Expect(res.Exec).To(BeNil())
932
933
Expect(res.Output).NotTo(BeEmpty())
934
Expect(res.Transcript).To(ContainSubstring(`op="patch"`))
935
Expect(res.Transcript).To(ContainSubstring(step.Path))
936
937
expectOneEffect(res, "file.patch", step.Path, 0)
938
Expect(res.Effects[0].Meta["hunks"]).To(Equal(2)) // since you return Hunks:2
939
})
940
941
it("returns OutcomeError (no error) when file patch is missing Data and does not invoke PatchFile", func() {
942
dur := expectDuration(mockClock, 5*time.Millisecond)
943
944
cfg := types.Config{DryRun: false}
945
step := types.Step{
946
Type: types.ToolFiles,
947
Op: "patch",
948
Path: "/tmp/a.txt",
949
Data: "",
950
}
951
952
expectAllowStep(mockBudget, step)
953
expectAllowPolicy(mockPolicy, cfg, step)
954
expectAllowTool(mockBudget, types.ToolFiles)
955
956
mockFiles.EXPECT().PatchFile(gomock.Any(), gomock.Any()).Times(0)
957
958
res, err := subject.RunStep(context.Background(), cfg, step)
959
Expect(err).NotTo(HaveOccurred())
960
961
Expect(res.Outcome).To(Equal(types.OutcomeError))
962
Expect(res.Output).To(ContainSubstring("file patch requires Data"))
963
Expect(res.Duration).To(Equal(dur))
964
expectNoEffects(res)
965
})
966
967
it("returns OutcomeError (no error) when patch errors (still includes patch transcript + error in Output)", func() {
968
dur := expectDuration(mockClock, 50*time.Millisecond)
969
970
cfg := types.Config{DryRun: false}
971
step := types.Step{
972
Type: types.ToolFiles,
973
Op: "patch",
974
Path: "/tmp/a.txt",
975
Data: "@@ -1,1 +1,1 @@\n-a\n+b\n",
976
}
977
978
expectAllowStep(mockBudget, step)
979
expectAllowPolicy(mockPolicy, cfg, step)
980
expectAllowTool(mockBudget, types.ToolFiles)
981
982
patchErr := errors.New("apply patch /tmp/a.txt: first mismatch at line 7")
983
mockFiles.
984
EXPECT().
985
PatchFile(step.Path, []byte(step.Data)).
986
Return(tools.PatchResult{Hunks: 1}, patchErr).
987
Times(1)
988
989
res, err := subject.RunStep(context.Background(), cfg, step)
990
Expect(err).NotTo(HaveOccurred())
991
992
Expect(res.Step).To(Equal(step))
993
Expect(res.Outcome).To(Equal(types.OutcomeError))
994
Expect(res.Duration).To(Equal(dur))
995
Expect(res.Exec).To(BeNil())
996
Expect(res.Output).To(ContainSubstring(patchErr.Error()))
997
998
Expect(res.Transcript).To(ContainSubstring(`op="patch"`))
999
Expect(res.Transcript).To(ContainSubstring(step.Path))
1000
Expect(res.Transcript).To(ContainSubstring("error"))
1001
1002
expectNoEffects(res)
1003
})
1004
1005
it("replaces bytes in file and returns ok outcome (calls ReplaceBytesInFile)", func() {
1006
dur := expectDuration(mockClock, 50*time.Millisecond)
1007
1008
cfg := types.Config{DryRun: false}
1009
step := types.Step{
1010
Type: types.ToolFiles,
1011
Op: "replace",
1012
Path: "/tmp/a.txt",
1013
Old: "aa",
1014
New: "XX",
1015
N: 2,
1016
}
1017
1018
expectAllowStep(mockBudget, step)
1019
expectAllowPolicy(mockPolicy, cfg, step)
1020
expectAllowTool(mockBudget, types.ToolFiles)
1021
1022
mockFiles.
1023
EXPECT().
1024
ReplaceBytesInFile(step.Path, []byte(step.Old), []byte(step.New), step.N).
1025
Return(tools.ReplaceResult{OccurrencesFound: 5, Replaced: 2}, nil).
1026
Times(1)
1027
1028
// Guard: no other ops
1029
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
1030
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
1031
mockFiles.EXPECT().PatchFile(gomock.Any(), gomock.Any()).Times(0)
1032
1033
res, err := subject.RunStep(context.Background(), cfg, step)
1034
Expect(err).NotTo(HaveOccurred())
1035
1036
Expect(res.Step).To(Equal(step))
1037
Expect(res.Outcome).To(Equal(types.OutcomeOK))
1038
Expect(res.Duration).To(Equal(dur))
1039
Expect(res.Exec).To(BeNil())
1040
1041
Expect(res.Output).NotTo(BeEmpty())
1042
Expect(res.Transcript).To(ContainSubstring(`op="replace"`))
1043
Expect(res.Transcript).To(ContainSubstring(step.Path))
1044
1045
expectOneEffect(res, "file.replace", step.Path, 0)
1046
Expect(res.Effects[0].Meta["replaced"]).To(Equal(2))
1047
Expect(res.Effects[0].Meta["found"]).To(Equal(5))
1048
Expect(res.Effects[0].Meta["n"]).To(Equal(step.N))
1049
})
1050
1051
it("returns OutcomeError (no error) when replace errors and surfaces error in Output", func() {
1052
dur := expectDuration(mockClock, 50*time.Millisecond)
1053
1054
cfg := types.Config{DryRun: false}
1055
step := types.Step{
1056
Type: types.ToolFiles,
1057
Op: "replace",
1058
Path: "/tmp/a.txt",
1059
Old: "nope",
1060
New: "x",
1061
N: -1,
1062
}
1063
1064
expectAllowStep(mockBudget, step)
1065
expectAllowPolicy(mockPolicy, cfg, step)
1066
expectAllowTool(mockBudget, types.ToolFiles)
1067
1068
replErr := errors.New("replace /tmp/a.txt: pattern not found")
1069
mockFiles.
1070
EXPECT().
1071
ReplaceBytesInFile(step.Path, []byte(step.Old), []byte(step.New), step.N).
1072
Return(tools.ReplaceResult{OccurrencesFound: 0, Replaced: 0}, replErr).
1073
Times(1)
1074
1075
res, err := subject.RunStep(context.Background(), cfg, step)
1076
Expect(err).NotTo(HaveOccurred())
1077
1078
Expect(res.Step).To(Equal(step))
1079
Expect(res.Outcome).To(Equal(types.OutcomeError))
1080
Expect(res.Duration).To(Equal(dur))
1081
Expect(res.Exec).To(BeNil())
1082
Expect(res.Output).To(ContainSubstring(replErr.Error()))
1083
1084
Expect(res.Transcript).To(ContainSubstring(`op="replace"`))
1085
Expect(res.Transcript).To(ContainSubstring(step.Path))
1086
Expect(res.Transcript).To(ContainSubstring("error"))
1087
expectNoEffects(res)
1088
})
1089
1090
it("dry-run patch does not invoke PatchFile", func() {
1091
dur := expectDuration(mockClock, 10*time.Millisecond)
1092
1093
cfg := types.Config{DryRun: true}
1094
step := types.Step{
1095
Type: types.ToolFiles,
1096
Op: "patch",
1097
Path: "/tmp/a.txt",
1098
Data: "diff-content",
1099
}
1100
1101
expectAllowStep(mockBudget, step)
1102
expectAllowPolicy(mockPolicy, cfg, step)
1103
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Times(0)
1104
1105
mockFiles.EXPECT().PatchFile(gomock.Any(), gomock.Any()).Times(0)
1106
mockFiles.EXPECT().ReplaceBytesInFile(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
1107
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
1108
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
1109
1110
res, err := subject.RunStep(context.Background(), cfg, step)
1111
Expect(err).NotTo(HaveOccurred())
1112
1113
Expect(res.Outcome).To(Equal(types.OutcomeDryRun))
1114
Expect(res.Duration).To(Equal(dur))
1115
Expect(res.Transcript).To(ContainSubstring("dry-run"))
1116
Expect(res.Transcript).To(ContainSubstring(`op="patch"`))
1117
1118
expectNoEffects(res)
1119
})
1120
1121
it("dry-run replace does not invoke ReplaceBytesInFile", func() {
1122
dur := expectDuration(mockClock, 10*time.Millisecond)
1123
1124
cfg := types.Config{DryRun: true}
1125
step := types.Step{
1126
Type: types.ToolFiles,
1127
Op: "replace",
1128
Path: "/tmp/a.txt",
1129
Old: "aa",
1130
New: "XX",
1131
N: 0,
1132
}
1133
1134
expectAllowStep(mockBudget, step)
1135
expectAllowPolicy(mockPolicy, cfg, step)
1136
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Times(0)
1137
1138
mockFiles.EXPECT().PatchFile(gomock.Any(), gomock.Any()).Times(0)
1139
mockFiles.EXPECT().ReplaceBytesInFile(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
1140
mockFiles.EXPECT().ReadFile(gomock.Any()).Times(0)
1141
mockFiles.EXPECT().WriteFile(gomock.Any(), gomock.Any()).Times(0)
1142
1143
res, err := subject.RunStep(context.Background(), cfg, step)
1144
Expect(err).NotTo(HaveOccurred())
1145
1146
Expect(res.Outcome).To(Equal(types.OutcomeDryRun))
1147
Expect(res.Duration).To(Equal(dur))
1148
Expect(res.Transcript).To(ContainSubstring("dry-run"))
1149
Expect(res.Transcript).To(ContainSubstring(`op="replace"`))
1150
1151
expectNoEffects(res)
1152
})
1153
1154
it("returns error StepResult when files tool budget is denied for patch and does not invoke PatchFile", func() {
1155
dur := expectDuration(mockClock, 10*time.Millisecond)
1156
1157
cfg := types.Config{DryRun: false}
1158
step := types.Step{
1159
Type: types.ToolFiles,
1160
Op: "patch",
1161
Path: "/tmp/a.txt",
1162
Data: "diff",
1163
}
1164
1165
expectAllowStep(mockBudget, step)
1166
expectAllowPolicy(mockPolicy, cfg, step)
1167
1168
toolErr := errors.New("tool budget denied")
1169
mockBudget.EXPECT().AllowTool(types.ToolFiles, gomock.Any()).Return(toolErr).Times(1)
1170
1171
mockFiles.EXPECT().PatchFile(gomock.Any(), gomock.Any()).Times(0)
1172
1173
res, err := subject.RunStep(context.Background(), cfg, step)
1174
Expect(err).To(MatchError(toolErr))
1175
1176
Expect(res.Outcome).To(Equal(types.OutcomeError))
1177
Expect(res.Duration).To(Equal(dur))
1178
Expect(res.Transcript).To(ContainSubstring("[budget]"))
1179
})
1180
})
1181
}
1182
1183
func expectDuration(mockClock *MockClock, d time.Duration) time.Duration {
1184
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
1185
t1 := t0.Add(d)
1186
1187
// Robust to extra clock.Now() calls:
1188
// first call = t0, all subsequent calls = t1.
1189
gomock.InOrder(
1190
mockClock.EXPECT().Now().Return(t0).Times(1),
1191
mockClock.EXPECT().Now().Return(t1).AnyTimes(),
1192
)
1193
1194
return d
1195
}
1196
1197
func expectAllowStep(mockBudget *MockBudget, step types.Step) {
1198
mockBudget.
1199
EXPECT().
1200
AllowStep(step, gomock.Any()).
1201
Return(nil).
1202
Times(1)
1203
}
1204
1205
func expectAllowTool(mockBudget *MockBudget, kind types.ToolKind) {
1206
mockBudget.
1207
EXPECT().
1208
AllowTool(kind, gomock.Any()).
1209
Return(nil).
1210
Times(1)
1211
}
1212
1213
func expectAllowPolicy(mockPolicy *MockPolicy, cfg types.Config, step types.Step) {
1214
mockPolicy.
1215
EXPECT().
1216
AllowStep(cfg, step).
1217
Return(nil).
1218
Times(1)
1219
}
1220
1221
func expectShellTranscript(res types.StepResult, cfg types.Config, step types.Step, exec types.Result) {
1222
Expect(res.Transcript).To(ContainSubstring(`[shell]`))
1223
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`workdir=%q`, cfg.WorkDir)))
1224
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`cmd=%q`, step.Command)))
1225
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf("exit=%d", exec.ExitCode)))
1226
1227
if exec.Stdout != "" {
1228
Expect(res.Transcript).To(ContainSubstring("stdout:\n"))
1229
Expect(res.Transcript).To(ContainSubstring(exec.Stdout))
1230
}
1231
if exec.Stderr != "" {
1232
Expect(res.Transcript).To(ContainSubstring("stderr:\n"))
1233
Expect(res.Transcript).To(ContainSubstring(exec.Stderr))
1234
}
1235
}
1236
1237
func expectLLMStartTranscript(res types.StepResult, step types.Step) {
1238
Expect(res.Transcript).To(ContainSubstring("[llm:start]"))
1239
Expect(res.Transcript).To(ContainSubstring("prompt:\n"))
1240
Expect(res.Transcript).To(ContainSubstring(step.Prompt))
1241
}
1242
1243
func expectLLMOKTranscript(res types.StepResult, step types.Step, out string) {
1244
Expect(res.Transcript).To(ContainSubstring("[llm]"))
1245
Expect(res.Transcript).To(ContainSubstring("prompt:\n"))
1246
Expect(res.Transcript).To(ContainSubstring(step.Prompt))
1247
Expect(res.Transcript).To(ContainSubstring("output:\n"))
1248
Expect(res.Transcript).To(ContainSubstring(out))
1249
}
1250
1251
func expectFileStartTranscript(res types.StepResult, step types.Step) {
1252
Expect(res.Transcript).To(ContainSubstring(`[file:start]`))
1253
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`op=%q`, step.Op)))
1254
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`path=%q`, step.Path)))
1255
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`data_len=%d`, len(step.Data))))
1256
}
1257
1258
func expectFileReadTranscript(res types.StepResult, path, content string) {
1259
Expect(res.Transcript).To(ContainSubstring(`[file] op="read"`))
1260
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`path=%q`, path)))
1261
Expect(res.Transcript).To(ContainSubstring("content:\n"))
1262
// content can be large; just check it appears (or a prefix)
1263
if content != "" {
1264
Expect(res.Transcript).To(ContainSubstring(content))
1265
}
1266
}
1267
1268
func expectFileWriteTranscript(res types.StepResult, path string, dataLen int) {
1269
Expect(res.Transcript).To(ContainSubstring(`[file] op="write"`))
1270
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf(`path=%q`, path)))
1271
Expect(res.Transcript).To(ContainSubstring(fmt.Sprintf("data_len=%d", dataLen)))
1272
}
1273
1274
func expectLLMSnapshotOK(mockBudget *MockBudget) {
1275
mockBudget.
1276
EXPECT().
1277
Snapshot(gomock.Any()).
1278
Return(core.BudgetSnapshot{
1279
Limits: core.BudgetLimits{MaxLLMTokens: 0}, // 0 = unlimited => preflight passes
1280
LLMTokensUsed: 0,
1281
}).
1282
Times(1)
1283
}
1284
1285
func expectOneEffect(res types.StepResult, kind, path string, bytes int) {
1286
Expect(res.Effects).To(HaveLen(1))
1287
Expect(res.Effects[0].Kind).To(Equal(kind))
1288
Expect(res.Effects[0].Path).To(Equal(path))
1289
Expect(res.Effects[0].Bytes).To(Equal(bytes))
1290
}
1291
1292
func expectNoEffects(res types.StepResult) {
1293
Expect(res.Effects).To(BeNil()) // or HaveLen(0) if you prefer always-non-nil
1294
}
1295
1296