Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/agent/core/budget_test.go
3434 views
1
package core_test
2
3
import (
4
"errors"
5
"github.com/kardolus/chatgpt-cli/agent/core"
6
"github.com/kardolus/chatgpt-cli/agent/types"
7
"testing"
8
"time"
9
10
. "github.com/onsi/gomega"
11
"github.com/sclevine/spec"
12
"github.com/sclevine/spec/report"
13
)
14
15
func TestUnitBudget(t *testing.T) {
16
spec.Run(t, "Testing the budget", testBudget, spec.Report(report.Terminal{}))
17
}
18
19
func testBudget(t *testing.T, when spec.G, it spec.S) {
20
it.Before(func() {
21
RegisterTestingT(t)
22
})
23
24
when("DefaultBudget", func() {
25
it("auto-starts on AllowStep (ensureStarted) and increments steps", func() {
26
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
27
28
b := core.NewDefaultBudget(core.BudgetLimits{
29
MaxSteps: 10,
30
})
31
32
step := types.Step{Type: types.ToolShell}
33
34
err := b.AllowStep(step, t0)
35
Expect(err).NotTo(HaveOccurred())
36
37
s := b.Snapshot(t0)
38
Expect(s.StartedAt).To(Equal(t0))
39
Expect(s.Elapsed).To(Equal(time.Duration(0)))
40
Expect(s.StepsUsed).To(Equal(1))
41
Expect(s.IterationsUsed).To(Equal(0))
42
})
43
44
it("enforces MaxSteps", func() {
45
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
46
47
b := core.NewDefaultBudget(core.BudgetLimits{
48
MaxSteps: 2,
49
})
50
51
step := types.Step{Type: types.ToolShell}
52
53
Expect(b.AllowStep(step, t0)).To(Succeed())
54
Expect(b.AllowStep(step, t0)).To(Succeed())
55
56
err := b.AllowStep(step, t0)
57
Expect(err).To(HaveOccurred())
58
59
var be core.BudgetExceededError
60
Expect(err).To(MatchError(ContainSubstring("step budget exceeded")))
61
Expect(err).To(BeAssignableToTypeOf(be))
62
63
var typed core.BudgetExceededError
64
errors.As(err, &typed)
65
66
Expect(typed.Kind).To(Equal(core.BudgetKindSteps))
67
Expect(typed.Limit).To(Equal(2))
68
Expect(typed.Used).To(Equal(2)) // already used before the rejected increment
69
70
s := b.Snapshot(t0)
71
Expect(s.StepsUsed).To(Equal(2)) // should not have incremented on failure
72
Expect(s.IterationsUsed).To(Equal(0))
73
})
74
75
it("auto-starts on AllowTool (ensureStarted) and increments tool counters", func() {
76
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
77
78
b := core.NewDefaultBudget(core.BudgetLimits{
79
MaxShellCalls: 10,
80
})
81
82
Expect(b.AllowTool(types.ToolShell, t0)).To(Succeed())
83
84
s := b.Snapshot(t0)
85
Expect(s.StartedAt).To(Equal(t0))
86
Expect(s.ShellUsed).To(Equal(1))
87
Expect(s.LLMUsed).To(Equal(0))
88
Expect(s.FileOpsUsed).To(Equal(0))
89
Expect(s.IterationsUsed).To(Equal(0))
90
})
91
92
it("enforces MaxShellCalls / MaxLLMCalls / MaxFileOps independently", func() {
93
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
94
95
b := core.NewDefaultBudget(core.BudgetLimits{
96
MaxShellCalls: 1,
97
MaxLLMCalls: 2,
98
MaxFileOps: 1,
99
})
100
101
Expect(b.AllowTool(types.ToolShell, t0)).To(Succeed())
102
err := b.AllowTool(types.ToolShell, t0)
103
Expect(err).To(HaveOccurred())
104
Expect(err.Error()).To(ContainSubstring("shell call budget exceeded"))
105
106
Expect(b.AllowTool(types.ToolLLM, t0)).To(Succeed())
107
Expect(b.AllowTool(types.ToolLLM, t0)).To(Succeed())
108
err = b.AllowTool(types.ToolLLM, t0)
109
Expect(err).To(HaveOccurred())
110
Expect(err.Error()).To(ContainSubstring("llm call budget exceeded"))
111
112
Expect(b.AllowTool(types.ToolFiles, t0)).To(Succeed())
113
err = b.AllowTool(types.ToolFiles, t0)
114
Expect(err).To(HaveOccurred())
115
Expect(err.Error()).To(ContainSubstring("file ops budget exceeded"))
116
117
s := b.Snapshot(t0)
118
Expect(s.ShellUsed).To(Equal(1))
119
Expect(s.LLMUsed).To(Equal(2))
120
Expect(s.FileOpsUsed).To(Equal(1))
121
Expect(s.IterationsUsed).To(Equal(0))
122
})
123
124
it("returns error for unknown tool kind", func() {
125
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
126
127
b := core.NewDefaultBudget(core.BudgetLimits{})
128
err := b.AllowTool("wat", t0)
129
Expect(err).To(HaveOccurred())
130
Expect(err.Error()).To(ContainSubstring(`unknown tool kind`))
131
132
s := b.Snapshot(t0)
133
Expect(s.IterationsUsed).To(Equal(0))
134
})
135
136
it("enforces MaxWallTime in AllowStep", func() {
137
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
138
tLate := t0.Add(11 * time.Second)
139
140
b := core.NewDefaultBudget(core.BudgetLimits{
141
MaxWallTime: 10 * time.Second,
142
MaxSteps: 100,
143
})
144
145
step := types.Step{Type: types.ToolShell}
146
147
Expect(b.AllowStep(step, t0)).To(Succeed())
148
149
err := b.AllowStep(step, tLate)
150
Expect(err).To(HaveOccurred())
151
Expect(err.Error()).To(ContainSubstring("wall time budget exceeded"))
152
153
var typed core.BudgetExceededError
154
ok := errors.As(err, &typed)
155
156
Expect(ok).To(BeTrue())
157
Expect(typed.Kind).To(Equal(core.BudgetKindWallTime))
158
Expect(typed.LimitD).To(Equal(10 * time.Second))
159
Expect(typed.UsedD).To(Equal(11 * time.Second))
160
161
// step count should NOT increment on wall-time failure
162
s := b.Snapshot(tLate)
163
Expect(s.StepsUsed).To(Equal(1))
164
Expect(s.IterationsUsed).To(Equal(0))
165
})
166
167
it("enforces MaxWallTime in AllowTool", func() {
168
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
169
tLate := t0.Add(500 * time.Millisecond)
170
171
b := core.NewDefaultBudget(core.BudgetLimits{
172
MaxWallTime: 200 * time.Millisecond,
173
MaxShellCalls: 100,
174
})
175
176
Expect(b.AllowTool(types.ToolShell, t0)).To(Succeed())
177
178
err := b.AllowTool(types.ToolShell, tLate)
179
Expect(err).To(HaveOccurred())
180
Expect(err.Error()).To(ContainSubstring("wall time budget exceeded"))
181
182
// tool counter should NOT increment on wall-time failure
183
s := b.Snapshot(tLate)
184
Expect(s.ShellUsed).To(Equal(1))
185
Expect(s.IterationsUsed).To(Equal(0))
186
})
187
188
it("Snapshot returns elapsed and clamps negative elapsed to 0", func() {
189
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
190
tBefore := t0.Add(-5 * time.Second)
191
tAfter := t0.Add(250 * time.Millisecond)
192
193
b := core.NewDefaultBudget(core.BudgetLimits{})
194
b.Start(t0)
195
196
s0 := b.Snapshot(tBefore)
197
Expect(s0.Elapsed).To(Equal(time.Duration(0)))
198
199
s1 := b.Snapshot(tAfter)
200
Expect(s1.Elapsed).To(Equal(250 * time.Millisecond))
201
})
202
203
it("unknown tool kind starts budget but does not increment counters", func() {
204
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
205
206
b := core.NewDefaultBudget(core.BudgetLimits{})
207
208
err := b.AllowTool("wat", t0)
209
Expect(err).To(HaveOccurred())
210
Expect(err.Error()).To(ContainSubstring(`unknown tool kind`))
211
212
s := b.Snapshot(t0)
213
Expect(s.StartedAt).To(Equal(t0))
214
Expect(s.ShellUsed).To(Equal(0))
215
Expect(s.LLMUsed).To(Equal(0))
216
Expect(s.FileOpsUsed).To(Equal(0))
217
Expect(s.StepsUsed).To(Equal(0))
218
})
219
220
it("does not increment tool counter on max tool-call failure", func() {
221
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
222
223
b := core.NewDefaultBudget(core.BudgetLimits{MaxShellCalls: 1})
224
225
Expect(b.AllowTool(types.ToolShell, t0)).To(Succeed())
226
227
err := b.AllowTool(types.ToolShell, t0)
228
Expect(err).To(HaveOccurred())
229
Expect(err.Error()).To(ContainSubstring("shell call budget exceeded"))
230
231
s := b.Snapshot(t0)
232
Expect(s.ShellUsed).To(Equal(1)) // not 2
233
Expect(s.IterationsUsed).To(Equal(0))
234
})
235
236
it("ChargeLLMTokens auto-starts and increments LLMTokensUsed", func() {
237
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
238
239
b := core.NewDefaultBudget(core.BudgetLimits{
240
MaxLLMTokens: 1000,
241
})
242
243
b.ChargeLLMTokens(12, t0)
244
245
s := b.Snapshot(t0)
246
Expect(s.StartedAt).To(Equal(t0))
247
Expect(s.LLMTokensUsed).To(Equal(12))
248
Expect(s.StepsUsed).To(Equal(0))
249
Expect(s.ShellUsed).To(Equal(0))
250
Expect(s.LLMUsed).To(Equal(0))
251
Expect(s.FileOpsUsed).To(Equal(0))
252
Expect(s.IterationsUsed).To(Equal(0))
253
})
254
255
it("ChargeLLMTokens ignores non-positive token charges", func() {
256
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
257
258
b := core.NewDefaultBudget(core.BudgetLimits{
259
MaxLLMTokens: 1000,
260
})
261
262
b.ChargeLLMTokens(0, t0)
263
b.ChargeLLMTokens(-5, t0)
264
265
s := b.Snapshot(t0)
266
Expect(s.LLMTokensUsed).To(Equal(0))
267
Expect(s.IterationsUsed).To(Equal(0))
268
})
269
270
it("ChargeLLMTokens accumulates across multiple calls", func() {
271
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
272
273
b := core.NewDefaultBudget(core.BudgetLimits{
274
MaxLLMTokens: 1000,
275
})
276
277
b.ChargeLLMTokens(10, t0)
278
b.ChargeLLMTokens(15, t0)
279
280
s := b.Snapshot(t0)
281
Expect(s.LLMTokensUsed).To(Equal(25))
282
Expect(s.IterationsUsed).To(Equal(0))
283
})
284
285
it("auto-starts on AllowIteration and increments iteration counter", func() {
286
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
287
288
b := core.NewDefaultBudget(core.BudgetLimits{
289
MaxIterations: 10,
290
})
291
292
Expect(b.AllowIteration(t0)).To(Succeed())
293
294
s := b.Snapshot(t0)
295
Expect(s.StartedAt).To(Equal(t0))
296
Expect(s.IterationsUsed).To(Equal(1))
297
Expect(s.StepsUsed).To(Equal(0))
298
Expect(s.ShellUsed).To(Equal(0))
299
})
300
301
it("enforces MaxIterations", func() {
302
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
303
304
b := core.NewDefaultBudget(core.BudgetLimits{
305
MaxIterations: 2,
306
})
307
308
Expect(b.AllowIteration(t0)).To(Succeed())
309
Expect(b.AllowIteration(t0)).To(Succeed())
310
311
err := b.AllowIteration(t0)
312
Expect(err).To(HaveOccurred())
313
Expect(err.Error()).To(ContainSubstring("iteration budget exceeded"))
314
315
var typed core.BudgetExceededError
316
ok := errors.As(err, &typed)
317
Expect(ok).To(BeTrue())
318
Expect(typed.Kind).To(Equal(core.BudgetKindIterations))
319
Expect(typed.Limit).To(Equal(2))
320
Expect(typed.Used).To(Equal(2))
321
322
s := b.Snapshot(t0)
323
Expect(s.IterationsUsed).To(Equal(2)) // should not increment on failure
324
})
325
326
it("does not increment iteration counter on wall-time failure", func() {
327
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
328
tLate := t0.Add(2 * time.Second)
329
330
b := core.NewDefaultBudget(core.BudgetLimits{
331
MaxWallTime: 1 * time.Second,
332
MaxIterations: 10,
333
})
334
335
Expect(b.AllowIteration(t0)).To(Succeed())
336
337
err := b.AllowIteration(tLate)
338
Expect(err).To(HaveOccurred())
339
Expect(err.Error()).To(ContainSubstring("wall time budget exceeded"))
340
341
s := b.Snapshot(tLate)
342
Expect(s.IterationsUsed).To(Equal(1)) // not 2
343
})
344
345
it("iteration budget is independent of steps and tools", func() {
346
t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)
347
348
b := core.NewDefaultBudget(core.BudgetLimits{
349
MaxIterations: 1,
350
MaxSteps: 10,
351
MaxShellCalls: 10,
352
})
353
354
Expect(b.AllowIteration(t0)).To(Succeed())
355
Expect(b.AllowStep(types.Step{Type: types.ToolShell}, t0)).To(Succeed())
356
Expect(b.AllowTool(types.ToolShell, t0)).To(Succeed())
357
358
err := b.AllowIteration(t0)
359
Expect(err).To(HaveOccurred())
360
Expect(err.Error()).To(ContainSubstring("iteration budget exceeded"))
361
362
s := b.Snapshot(t0)
363
Expect(s.IterationsUsed).To(Equal(1))
364
Expect(s.StepsUsed).To(Equal(1))
365
Expect(s.ShellUsed).To(Equal(1))
366
})
367
})
368
}
369
370