Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/agent/planexec/plan_test.go
3434 views
1
package planexec_test
2
3
import (
4
"context"
5
"errors"
6
"github.com/kardolus/chatgpt-cli/agent/core"
7
"github.com/kardolus/chatgpt-cli/agent/planexec"
8
"github.com/kardolus/chatgpt-cli/agent/types"
9
"github.com/sclevine/spec/report"
10
"testing"
11
"time"
12
13
. "github.com/onsi/gomega"
14
"github.com/sclevine/spec"
15
16
"github.com/golang/mock/gomock"
17
)
18
19
//go:generate mockgen -destination=clockmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Clock
20
//go:generate mockgen -destination=llmmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/tools LLM
21
//go:generate mockgen -destination=budgetmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Budget
22
23
func TestUnitPlanner(t *testing.T) {
24
spec.Run(t, "Testing the Runner", testDefaultPlanner, spec.Report(report.Terminal{}))
25
}
26
27
func testDefaultPlanner(t *testing.T, when spec.G, it spec.S) {
28
var (
29
ctrl *gomock.Controller
30
llm *MockLLM
31
budget *MockBudget
32
clock *MockClock
33
34
planner *planexec.DefaultPlanner
35
ctx context.Context
36
now time.Time
37
)
38
39
it.Before(func() {
40
RegisterTestingT(t)
41
42
ctrl = gomock.NewController(t)
43
llm = NewMockLLM(ctrl)
44
budget = NewMockBudget(ctrl)
45
clock = NewMockClock(ctrl)
46
47
planner = planexec.NewDefaultPlanner(llm, budget, clock)
48
ctx = context.Background()
49
now = time.Date(2026, 1, 15, 10, 0, 0, 0, time.UTC)
50
})
51
52
it.After(func() {
53
ctrl.Finish()
54
})
55
56
when("goal is empty", func() {
57
it("returns missing goal and does not call tools", func() {
58
_, err := planner.Plan(ctx, " ")
59
Expect(err).To(MatchError("missing goal"))
60
})
61
})
62
63
when("budget refuses the LLM tool call", func() {
64
it("returns the budget error and does not call llm", func() {
65
clock.EXPECT().Now().Return(now)
66
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(core.BudgetExceededError{
67
Kind: core.BudgetKindLLM,
68
Limit: 1,
69
Used: 1,
70
Message: "llm call budget exceeded",
71
})
72
73
_, err := planner.Plan(ctx, "do something")
74
Expect(err).To(HaveOccurred())
75
Expect(err.Error()).To(ContainSubstring("llm call budget exceeded"))
76
})
77
})
78
79
when("llm returns an error", func() {
80
it("returns the llm error", func() {
81
clock.EXPECT().Now().Return(now)
82
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
83
84
llm.EXPECT().
85
Complete(gomock.Any(), gomock.Any()).
86
Return("", 0, errors.New("boom"))
87
88
_, err := planner.Plan(ctx, "do something")
89
Expect(err).To(MatchError("boom"))
90
})
91
})
92
93
when("llm returns invalid json", func() {
94
it("returns parse error", func() {
95
clock.EXPECT().Now().Return(now)
96
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
97
98
llm.EXPECT().
99
Complete(gomock.Any(), gomock.Any()).
100
Return("not json", 12, nil)
101
102
budget.EXPECT().ChargeLLMTokens(12, now)
103
104
_, err := planner.Plan(ctx, "do something")
105
Expect(err).To(HaveOccurred())
106
Expect(err.Error()).To(ContainSubstring("failed to parse Planner JSON"))
107
})
108
})
109
110
when("llm returns empty string", func() {
111
it("returns Planner returned empty response", func() {
112
clock.EXPECT().Now().Return(now)
113
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
114
115
llm.EXPECT().
116
Complete(gomock.Any(), gomock.Any()).
117
Return(" \n", 5, nil)
118
119
budget.EXPECT().ChargeLLMTokens(5, now)
120
121
_, err := planner.Plan(ctx, "do something")
122
Expect(err).To(MatchError("Planner returned empty response"))
123
})
124
})
125
126
when("json goal is empty", func() {
127
it("uses fallback goal passed into parsePlanJSON", func() {
128
clock.EXPECT().Now().Return(now)
129
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
130
131
raw := `{
132
"goal": "",
133
"steps": [
134
{
135
"type": "shell",
136
"description": "List files",
137
"command": "ls",
138
"args": ["-la"]
139
}
140
]
141
}`
142
143
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 7, nil)
144
budget.EXPECT().ChargeLLMTokens(7, now)
145
146
plan, err := planner.Plan(ctx, "fallback-goal")
147
Expect(err).NotTo(HaveOccurred())
148
Expect(plan.Goal).To(Equal("fallback-goal"))
149
Expect(plan.Steps).To(HaveLen(1))
150
Expect(plan.Steps[0].Type).To(Equal(types.ToolShell))
151
})
152
})
153
154
when("validation fails: missing description", func() {
155
it("returns step missing description", func() {
156
clock.EXPECT().Now().Return(now)
157
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
158
159
raw := `{
160
"goal": "x",
161
"steps": [
162
{
163
"type": "shell",
164
"description": "",
165
"command": "ls",
166
"args": []
167
}
168
]
169
}`
170
171
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
172
budget.EXPECT().ChargeLLMTokens(3, now)
173
174
_, err := planner.Plan(ctx, "x")
175
Expect(err).To(HaveOccurred())
176
Expect(err.Error()).To(ContainSubstring("missing description"))
177
})
178
})
179
180
when("validation fails: unknown type", func() {
181
it("returns unknown step type", func() {
182
clock.EXPECT().Now().Return(now)
183
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
184
185
raw := `{
186
"goal": "x",
187
"steps": [
188
{
189
"type": "wat",
190
"description": "???"
191
}
192
]
193
}`
194
195
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
196
budget.EXPECT().ChargeLLMTokens(3, now)
197
198
_, err := planner.Plan(ctx, "x")
199
Expect(err).To(HaveOccurred())
200
Expect(err.Error()).To(ContainSubstring("unknown step type"))
201
})
202
})
203
204
when("templates are invalid", func() {
205
it("rejects invalid go template syntax", func() {
206
clock.EXPECT().Now().Return(now)
207
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
208
209
raw := `{
210
"goal": "x",
211
"steps": [
212
{
213
"type": "llm",
214
"description": "Bad template",
215
"prompt": "hello {{"
216
}
217
]
218
}`
219
220
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
221
budget.EXPECT().ChargeLLMTokens(10, now)
222
223
_, err := planner.Plan(ctx, "x")
224
Expect(err).To(HaveOccurred())
225
Expect(err.Error()).To(ContainSubstring("invalid template"))
226
})
227
228
it("rejects index .Results without a literal index", func() {
229
clock.EXPECT().Now().Return(now)
230
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
231
232
// contains "index .Results" but not "(index .Results <number>)"
233
raw := `{
234
"goal": "x",
235
"steps": [
236
{
237
"type": "llm",
238
"description": "Non literal",
239
"prompt": "value: {{ index .Results .N }}"
240
}
241
]
242
}`
243
244
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
245
budget.EXPECT().ChargeLLMTokens(10, now)
246
247
_, err := planner.Plan(ctx, "x")
248
Expect(err).To(HaveOccurred())
249
Expect(err.Error()).To(ContainSubstring("template uses index .Results but not with a literal index"))
250
})
251
252
it("rejects reference to future results", func() {
253
clock.EXPECT().Now().Return(now)
254
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
255
256
// step 0 references Results[0] -> invalid (must be < stepIndex)
257
raw := `{
258
"goal": "x",
259
"steps": [
260
{
261
"type": "llm",
262
"description": "future",
263
"prompt": "Summarize: {{ (index .Results 0).Output }}"
264
}
265
]
266
}`
267
268
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
269
budget.EXPECT().ChargeLLMTokens(10, now)
270
271
_, err := planner.Plan(ctx, "x")
272
Expect(err).To(HaveOccurred())
273
Expect(err.Error()).To(ContainSubstring("only prior results are available"))
274
})
275
})
276
277
when("llm returns json wrapped in code fences", func() {
278
it("strips code fences and parses successfully", func() {
279
clock.EXPECT().Now().Return(now)
280
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
281
282
raw := "```json\n" + `{
283
"goal": "x",
284
"steps": [
285
{ "type": "shell", "description": "List", "command": "ls", "args": ["-la"] }
286
]
287
}` + "\n```"
288
289
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 7, nil)
290
budget.EXPECT().ChargeLLMTokens(7, now)
291
292
plan, err := planner.Plan(ctx, "fallback")
293
Expect(err).NotTo(HaveOccurred())
294
Expect(plan.Goal).To(Equal("x"))
295
Expect(plan.Steps).To(HaveLen(1))
296
Expect(plan.Steps[0].Type).To(Equal(types.ToolShell))
297
})
298
})
299
300
when("llm returns fenced non-json", func() {
301
it("still returns parse error", func() {
302
clock.EXPECT().Now().Return(now)
303
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
304
305
raw := "```\nnot json\n```"
306
307
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
308
budget.EXPECT().ChargeLLMTokens(3, now)
309
310
_, err := planner.Plan(ctx, "x")
311
Expect(err).To(HaveOccurred())
312
Expect(err.Error()).To(ContainSubstring("failed to parse Planner JSON"))
313
})
314
})
315
316
when("templates are valid", func() {
317
it("accepts plans that reference prior results", func() {
318
clock.EXPECT().Now().Return(now)
319
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
320
321
// step 1 references Results[0] -> valid
322
raw := `{
323
"goal": "x",
324
"steps": [
325
{
326
"type": "shell",
327
"description": "Get status",
328
"command": "git",
329
"args": ["status", "--porcelain"]
330
},
331
{
332
"type": "llm",
333
"description": "Summarize",
334
"prompt": "Summarize:\n{{ (index .Results 0).Output }}"
335
}
336
]
337
}`
338
339
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
340
budget.EXPECT().ChargeLLMTokens(10, now)
341
342
plan, err := planner.Plan(ctx, "x")
343
Expect(err).NotTo(HaveOccurred())
344
Expect(plan.Steps).To(HaveLen(2))
345
Expect(plan.Steps[1].Type).To(Equal(types.ToolLLM))
346
})
347
})
348
}
349
350