CoCalc -- plan

GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/agent/planexec/plan_test.go
³⁴³⁴ views
1
package planexec_test
2

3
import (
4
	"context"
5
	"errors"
6
	"github.com/kardolus/chatgpt-cli/agent/core"
7
	"github.com/kardolus/chatgpt-cli/agent/planexec"
8
	"github.com/kardolus/chatgpt-cli/agent/types"
9
	"github.com/sclevine/spec/report"
10
	"testing"
11
	"time"
12

13
	. "github.com/onsi/gomega"
14
	"github.com/sclevine/spec"
15

16
	"github.com/golang/mock/gomock"
17
)
18

19
//go:generate mockgen -destination=clockmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Clock
20
//go:generate mockgen -destination=llmmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/tools LLM
21
//go:generate mockgen -destination=budgetmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Budget
22

23
func TestUnitPlanner(t *testing.T) {
24
	spec.Run(t, "Testing the Runner", testDefaultPlanner, spec.Report(report.Terminal{}))
25
}
26

27
func testDefaultPlanner(t *testing.T, when spec.G, it spec.S) {
28
	var (
29
		ctrl   *gomock.Controller
30
		llm    *MockLLM
31
		budget *MockBudget
32
		clock  *MockClock
33

34
		planner *planexec.DefaultPlanner
35
		ctx     context.Context
36
		now     time.Time
37
	)
38

39
	it.Before(func() {
40
		RegisterTestingT(t)
41

42
		ctrl = gomock.NewController(t)
43
		llm = NewMockLLM(ctrl)
44
		budget = NewMockBudget(ctrl)
45
		clock = NewMockClock(ctrl)
46

47
		planner = planexec.NewDefaultPlanner(llm, budget, clock)
48
		ctx = context.Background()
49
		now = time.Date(2026, 1, 15, 10, 0, 0, 0, time.UTC)
50
	})
51

52
	it.After(func() {
53
		ctrl.Finish()
54
	})
55

56
	when("goal is empty", func() {
57
		it("returns missing goal and does not call tools", func() {
58
			_, err := planner.Plan(ctx, "   ")
59
			Expect(err).To(MatchError("missing goal"))
60
		})
61
	})
62

63
	when("budget refuses the LLM tool call", func() {
64
		it("returns the budget error and does not call llm", func() {
65
			clock.EXPECT().Now().Return(now)
66
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(core.BudgetExceededError{
67
				Kind:    core.BudgetKindLLM,
68
				Limit:   1,
69
				Used:    1,
70
				Message: "llm call budget exceeded",
71
			})
72

73
			_, err := planner.Plan(ctx, "do something")
74
			Expect(err).To(HaveOccurred())
75
			Expect(err.Error()).To(ContainSubstring("llm call budget exceeded"))
76
		})
77
	})
78

79
	when("llm returns an error", func() {
80
		it("returns the llm error", func() {
81
			clock.EXPECT().Now().Return(now)
82
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
83

84
			llm.EXPECT().
85
				Complete(gomock.Any(), gomock.Any()).
86
				Return("", 0, errors.New("boom"))
87

88
			_, err := planner.Plan(ctx, "do something")
89
			Expect(err).To(MatchError("boom"))
90
		})
91
	})
92

93
	when("llm returns invalid json", func() {
94
		it("returns parse error", func() {
95
			clock.EXPECT().Now().Return(now)
96
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
97

98
			llm.EXPECT().
99
				Complete(gomock.Any(), gomock.Any()).
100
				Return("not json", 12, nil)
101

102
			budget.EXPECT().ChargeLLMTokens(12, now)
103

104
			_, err := planner.Plan(ctx, "do something")
105
			Expect(err).To(HaveOccurred())
106
			Expect(err.Error()).To(ContainSubstring("failed to parse Planner JSON"))
107
		})
108
	})
109

110
	when("llm returns empty string", func() {
111
		it("returns Planner returned empty response", func() {
112
			clock.EXPECT().Now().Return(now)
113
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
114

115
			llm.EXPECT().
116
				Complete(gomock.Any(), gomock.Any()).
117
				Return("   \n", 5, nil)
118

119
			budget.EXPECT().ChargeLLMTokens(5, now)
120

121
			_, err := planner.Plan(ctx, "do something")
122
			Expect(err).To(MatchError("Planner returned empty response"))
123
		})
124
	})
125

126
	when("json goal is empty", func() {
127
		it("uses fallback goal passed into parsePlanJSON", func() {
128
			clock.EXPECT().Now().Return(now)
129
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
130

131
			raw := `{
132
				"goal": "",
133
				"steps": [
134
					{
135
						"type": "shell",
136
						"description": "List files",
137
						"command": "ls",
138
						"args": ["-la"]
139
					}
140
				]
141
			}`
142

143
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 7, nil)
144
			budget.EXPECT().ChargeLLMTokens(7, now)
145

146
			plan, err := planner.Plan(ctx, "fallback-goal")
147
			Expect(err).NotTo(HaveOccurred())
148
			Expect(plan.Goal).To(Equal("fallback-goal"))
149
			Expect(plan.Steps).To(HaveLen(1))
150
			Expect(plan.Steps[0].Type).To(Equal(types.ToolShell))
151
		})
152
	})
153

154
	when("validation fails: missing description", func() {
155
		it("returns step missing description", func() {
156
			clock.EXPECT().Now().Return(now)
157
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
158

159
			raw := `{
160
				"goal": "x",
161
				"steps": [
162
					{
163
						"type": "shell",
164
						"description": "",
165
						"command": "ls",
166
						"args": []
167
					}
168
				]
169
			}`
170

171
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
172
			budget.EXPECT().ChargeLLMTokens(3, now)
173

174
			_, err := planner.Plan(ctx, "x")
175
			Expect(err).To(HaveOccurred())
176
			Expect(err.Error()).To(ContainSubstring("missing description"))
177
		})
178
	})
179

180
	when("validation fails: unknown type", func() {
181
		it("returns unknown step type", func() {
182
			clock.EXPECT().Now().Return(now)
183
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
184

185
			raw := `{
186
				"goal": "x",
187
				"steps": [
188
					{
189
						"type": "wat",
190
						"description": "???"
191
					}
192
				]
193
			}`
194

195
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
196
			budget.EXPECT().ChargeLLMTokens(3, now)
197

198
			_, err := planner.Plan(ctx, "x")
199
			Expect(err).To(HaveOccurred())
200
			Expect(err.Error()).To(ContainSubstring("unknown step type"))
201
		})
202
	})
203

204
	when("templates are invalid", func() {
205
		it("rejects invalid go template syntax", func() {
206
			clock.EXPECT().Now().Return(now)
207
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
208

209
			raw := `{
210
				"goal": "x",
211
				"steps": [
212
					{
213
						"type": "llm",
214
						"description": "Bad template",
215
						"prompt": "hello {{"
216
					}
217
				]
218
			}`
219

220
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
221
			budget.EXPECT().ChargeLLMTokens(10, now)
222

223
			_, err := planner.Plan(ctx, "x")
224
			Expect(err).To(HaveOccurred())
225
			Expect(err.Error()).To(ContainSubstring("invalid template"))
226
		})
227

228
		it("rejects index .Results without a literal index", func() {
229
			clock.EXPECT().Now().Return(now)
230
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
231

232
			// contains "index .Results" but not "(index .Results <number>)"
233
			raw := `{
234
				"goal": "x",
235
				"steps": [
236
					{
237
						"type": "llm",
238
						"description": "Non literal",
239
						"prompt": "value: {{ index .Results .N }}"
240
					}
241
				]
242
			}`
243

244
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
245
			budget.EXPECT().ChargeLLMTokens(10, now)
246

247
			_, err := planner.Plan(ctx, "x")
248
			Expect(err).To(HaveOccurred())
249
			Expect(err.Error()).To(ContainSubstring("template uses index .Results but not with a literal index"))
250
		})
251

252
		it("rejects reference to future results", func() {
253
			clock.EXPECT().Now().Return(now)
254
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
255

256
			// step 0 references Results[0] -> invalid (must be < stepIndex)
257
			raw := `{
258
				"goal": "x",
259
				"steps": [
260
					{
261
						"type": "llm",
262
						"description": "future",
263
						"prompt": "Summarize: {{ (index .Results 0).Output }}"
264
					}
265
				]
266
			}`
267

268
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
269
			budget.EXPECT().ChargeLLMTokens(10, now)
270

271
			_, err := planner.Plan(ctx, "x")
272
			Expect(err).To(HaveOccurred())
273
			Expect(err.Error()).To(ContainSubstring("only prior results are available"))
274
		})
275
	})
276

277
	when("llm returns json wrapped in code fences", func() {
278
		it("strips code fences and parses successfully", func() {
279
			clock.EXPECT().Now().Return(now)
280
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
281

282
			raw := "```json\n" + `{
283
          "goal": "x",
284
          "steps": [
285
            { "type": "shell", "description": "List", "command": "ls", "args": ["-la"] }
286
          ]
287
        }` + "\n```"
288

289
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 7, nil)
290
			budget.EXPECT().ChargeLLMTokens(7, now)
291

292
			plan, err := planner.Plan(ctx, "fallback")
293
			Expect(err).NotTo(HaveOccurred())
294
			Expect(plan.Goal).To(Equal("x"))
295
			Expect(plan.Steps).To(HaveLen(1))
296
			Expect(plan.Steps[0].Type).To(Equal(types.ToolShell))
297
		})
298
	})
299

300
	when("llm returns fenced non-json", func() {
301
		it("still returns parse error", func() {
302
			clock.EXPECT().Now().Return(now)
303
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
304

305
			raw := "```\nnot json\n```"
306

307
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 3, nil)
308
			budget.EXPECT().ChargeLLMTokens(3, now)
309

310
			_, err := planner.Plan(ctx, "x")
311
			Expect(err).To(HaveOccurred())
312
			Expect(err.Error()).To(ContainSubstring("failed to parse Planner JSON"))
313
		})
314
	})
315

316
	when("templates are valid", func() {
317
		it("accepts plans that reference prior results", func() {
318
			clock.EXPECT().Now().Return(now)
319
			budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
320

321
			// step 1 references Results[0] -> valid
322
			raw := `{
323
				"goal": "x",
324
				"steps": [
325
					{
326
						"type": "shell",
327
						"description": "Get status",
328
						"command": "git",
329
						"args": ["status", "--porcelain"]
330
					},
331
					{
332
						"type": "llm",
333
						"description": "Summarize",
334
						"prompt": "Summarize:\n{{ (index .Results 0).Output }}"
335
					}
336
				]
337
			}`
338

339
			llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(raw, 10, nil)
340
			budget.EXPECT().ChargeLLMTokens(10, now)
341

342
			plan, err := planner.Plan(ctx, "x")
343
			Expect(err).NotTo(HaveOccurred())
344
			Expect(plan.Steps).To(HaveLen(2))
345
			Expect(plan.Steps[1].Type).To(Equal(types.ToolLLM))
346
		})
347
	})
348
}
349

350
Product

Resources

Company