Path: blob/main/agent/planexec/plan_execute_agent_test.go
3434 views
package planexec_test12import (3"context"4"fmt"5"github.com/kardolus/chatgpt-cli/agent/core"6"github.com/kardolus/chatgpt-cli/agent/planexec"7"github.com/kardolus/chatgpt-cli/agent/types"8"strings"9"testing"10"time"1112"github.com/golang/mock/gomock"13. "github.com/onsi/gomega"14"github.com/sclevine/spec"15"github.com/sclevine/spec/report"16)1718//go:generate mockgen -destination=runnermocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Runner19//go:generate mockgen -destination=clockmocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/core Clock20//go:generate mockgen -destination=plannermocks_test.go -package=planexec_test github.com/kardolus/chatgpt-cli/agent/planexec Planner2122func TestUnitAgent(t *testing.T) {23spec.Run(t, "Testing the plan-execute agent", testPlanExecuteAgent, spec.Report(report.Terminal{}))24}2526func testPlanExecuteAgent(t *testing.T, when spec.G, it spec.S) {27var (28mockCtrl *gomock.Controller29mockClock *MockClock30mockRunner *MockRunner31mockPlanner *MockPlanner32)3334it.Before(func() {35RegisterTestingT(t)36mockCtrl = gomock.NewController(t)37mockClock = NewMockClock(mockCtrl)38mockPlanner = NewMockPlanner(mockCtrl)39mockRunner = NewMockRunner(mockCtrl)40})4142it.After(func() {43mockCtrl.Finish()44})4546when("RunAgentGoal()", func() {47const goal = "test goal"4849it.Before(func() {50expectAgentDuration(mockClock, 123*time.Millisecond)51})5253it("should bubble up Planner errors and not run any steps", func() {54planErr := fmt.Errorf("Planner boom")5556mockPlanner.57EXPECT().58Plan(gomock.Any(), goal).59Return(types.Plan{}, planErr).60Times(1)6162// Runner must not be invoked if planning fails63mockRunner.64EXPECT().65RunStep(gomock.Any(), gomock.Any(), gomock.Any()).66Times(0)6768subject := planexec.NewPlanExecuteAgent(69mockClock,70mockPlanner,71mockRunner,72)7374_, err := subject.RunAgentGoal(context.Background(), goal)75Expect(err).To(MatchError(planErr))76})7778it("should bubble up Runner errors and stop executing further steps", func() {79runErr := fmt.Errorf("Runner boom")8081plan := types.Plan{82Goal: goal,83Steps: []types.Step{84{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},85{Type: types.ToolShell, Description: "step 2", Command: "echo", Args: []string{"two"}},86},87}8889mockPlanner.90EXPECT().91Plan(gomock.Any(), goal).92Return(plan, nil).93Times(1)9495mockRunner.96EXPECT().97RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).98Return(types.StepResult{}, runErr).99Times(1)100101// Guard: step 2 must not run102mockRunner.103EXPECT().104RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).105Times(0)106107subject := planexec.NewPlanExecuteAgent(108mockClock,109mockPlanner,110mockRunner,111)112113_, err := subject.RunAgentGoal(context.Background(), goal)114Expect(err).To(MatchError(runErr))115})116117it("should return an error when Runner returns OutcomeError (even if err == nil)", func() {118plan := types.Plan{119Goal: goal,120Steps: []types.Step{121{Type: types.ToolShell, Description: "step 1", Command: "false", Args: nil},122},123}124125mockPlanner.126EXPECT().127Plan(gomock.Any(), goal).128Return(plan, nil).129Times(1)130131mockRunner.132EXPECT().133RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).134Return(types.StepResult{135Step: plan.Steps[0],136Outcome: types.OutcomeError,137Exec: &types.Result{ExitCode: 42}, // optional; Agent no longer inspects this138}, nil).139Times(1)140141subject := planexec.NewPlanExecuteAgent(142mockClock,143mockPlanner,144mockRunner,145)146147_, err := subject.RunAgentGoal(context.Background(), goal)148Expect(err).To(MatchError(`step failed: step 1`))149})150151it("should succeed when Planner returns an empty plan and not run any steps", func() {152plan := types.Plan{153Goal: goal,154Steps: nil,155}156157mockPlanner.EXPECT().158Plan(gomock.Any(), goal).159Return(plan, nil).160Times(1)161162mockRunner.EXPECT().163RunStep(gomock.Any(), gomock.Any(), gomock.Any()).164Times(0)165166subject := planexec.NewPlanExecuteAgent(167mockClock,168mockPlanner,169mockRunner,170)171172out, err := subject.RunAgentGoal(context.Background(), goal)173Expect(err).NotTo(HaveOccurred())174Expect(out).To(BeEmpty())175})176177it("should stop executing further steps when Runner returns OutcomeError", func() {178plan := types.Plan{179Goal: goal,180Steps: []types.Step{181{Type: types.ToolShell, Description: "step 1", Command: "false", Args: nil},182{Type: types.ToolShell, Description: "step 2", Command: "echo", Args: []string{"should-not-run"}},183},184}185186mockPlanner.EXPECT().187Plan(gomock.Any(), goal).188Return(plan, nil).189Times(1)190191mockRunner.EXPECT().192RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).193Return(types.StepResult{194Step: plan.Steps[0],195Outcome: types.OutcomeError,196Exec: &types.Result{ExitCode: 7},197}, nil).198Times(1)199200// Guard: step 2 must not run201mockRunner.EXPECT().202RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).203Times(0)204205subject := planexec.NewPlanExecuteAgent(206mockClock,207mockPlanner,208mockRunner,209)210211_, err := subject.RunAgentGoal(context.Background(), goal)212Expect(err).To(MatchError(`step failed: step 1`))213})214215it("should treat Exec == nil as success and continue to next step", func() {216plan := types.Plan{217Goal: goal,218Steps: []types.Step{219{Type: types.ToolLLM, Description: "llm step (no exec)", Prompt: "do something"},220{Type: types.ToolShell, Description: "shell step", Command: "echo", Args: []string{"ok"}},221},222}223224mockPlanner.EXPECT().225Plan(gomock.Any(), goal).226Return(plan, nil).227Times(1)228229// First step: Exec is nil, no error, OutcomeOK => success.230mockRunner.EXPECT().231RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).232Return(types.StepResult{233Step: plan.Steps[0],234Outcome: types.OutcomeOK,235Exec: nil,236}, nil).237Times(1)238239// Second step should still run.240mockRunner.EXPECT().241RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).242Return(types.StepResult{243Step: plan.Steps[1],244Outcome: types.OutcomeOK,245Exec: &types.Result{ExitCode: 0},246}, nil).247Times(1)248249subject := planexec.NewPlanExecuteAgent(250mockClock,251mockPlanner,252mockRunner,253)254255_, err := subject.RunAgentGoal(context.Background(), goal)256Expect(err).NotTo(HaveOccurred())257})258259it("WithWorkDir should pass cfg.WorkDir into the Runner", func() {260plan := types.Plan{261Goal: goal,262Steps: []types.Step{263{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},264},265}266267mockPlanner.EXPECT().268Plan(gomock.Any(), goal).269Return(plan, nil).270Times(1)271272mockRunner.EXPECT().273RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).274DoAndReturn(func(_ context.Context, cfg types.Config, _ types.Step) (types.StepResult, error) {275Expect(cfg.WorkDir).To(Equal("/tmp/my-workdir"))276return types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeOK}, nil277}).278Times(1)279280subject := planexec.NewPlanExecuteAgent(281mockClock,282mockPlanner,283mockRunner,284core.WithWorkDir("/tmp/my-workdir"),285)286287_, err := subject.RunAgentGoal(context.Background(), goal)288Expect(err).NotTo(HaveOccurred())289})290291it("WithDryRun should pass cfg.DryRun into the Runner", func() {292plan := types.Plan{293Goal: goal,294Steps: []types.Step{295{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},296},297}298299mockPlanner.EXPECT().300Plan(gomock.Any(), goal).301Return(plan, nil).302Times(1)303304mockRunner.EXPECT().305RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).306DoAndReturn(func(_ context.Context, cfg types.Config, _ types.Step) (types.StepResult, error) {307Expect(cfg.DryRun).To(BeTrue())308return types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeDryRun}, nil309}).310Times(1)311312subject := planexec.NewPlanExecuteAgent(313mockClock,314mockPlanner,315mockRunner,316core.WithDryRun(true),317)318319_, err := subject.RunAgentGoal(context.Background(), goal)320Expect(err).NotTo(HaveOccurred())321})322323it("happy path: should return the output of the final step", func() {324const goal = "do the thing"325326plan := types.Plan{327Goal: goal,328Steps: []types.Step{329{Type: types.ToolLLM, Description: "step 1", Prompt: "first"},330{Type: types.ToolLLM, Description: "step 2", Prompt: "second"},331},332}333334mockPlanner.EXPECT().335Plan(gomock.Any(), goal).336Return(plan, nil).337Times(1)338339mockRunner.EXPECT().340RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).341Return(types.StepResult{342Step: plan.Steps[0],343Outcome: types.OutcomeOK,344Output: "A",345}, nil).346Times(1)347348mockRunner.EXPECT().349RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).350Return(types.StepResult{351Step: plan.Steps[1],352Outcome: types.OutcomeOK,353Output: "B",354}, nil).355Times(1)356357subject := planexec.NewPlanExecuteAgent(358mockClock,359mockPlanner,360mockRunner,361)362363out, err := subject.RunAgentGoal(context.Background(), goal)364Expect(err).NotTo(HaveOccurred())365Expect(out).To(Equal("B")) // last step wins366})367368it("should run all planned steps (PlanExecuteAgent no longer enforces MaxSteps)", func() {369plan := types.Plan{370Goal: goal,371Steps: []types.Step{372{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},373{Type: types.ToolShell, Description: "step 2", Command: "echo", Args: []string{"two"}},374{Type: types.ToolShell, Description: "step 3", Command: "echo", Args: []string{"three"}},375},376}377378mockPlanner.EXPECT().379Plan(gomock.Any(), goal).380Return(plan, nil).381Times(1)382383gomock.InOrder(384mockRunner.EXPECT().385RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).386Return(types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeOK}, nil),387mockRunner.EXPECT().388RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).389Return(types.StepResult{Step: plan.Steps[1], Outcome: types.OutcomeOK}, nil),390mockRunner.EXPECT().391RunStep(gomock.Any(), gomock.Any(), plan.Steps[2]).392Return(types.StepResult{Step: plan.Steps[2], Outcome: types.OutcomeOK}, nil),393)394395subject := planexec.NewPlanExecuteAgent(396mockClock,397mockPlanner,398mockRunner,399)400401_, err := subject.RunAgentGoal(context.Background(), goal)402Expect(err).NotTo(HaveOccurred())403})404405it("should accumulate results and render templates for later steps", func() {406plan := types.Plan{407Goal: goal,408Steps: []types.Step{409{Type: types.ToolLLM, Description: "step 1", Prompt: "first"},410{Type: types.ToolLLM, Description: "step 2", Prompt: "use {{ (index .Results 0).Output }}"},411},412}413414mockPlanner.EXPECT().415Plan(gomock.Any(), goal).416Return(plan, nil).417Times(1)418419// Step 1 runs normally.420mockRunner.EXPECT().421RunStep(gomock.Any(), gomock.Any(), gomock.Any()).422DoAndReturn(func(_ context.Context, _ types.Config, s types.Step) (types.StepResult, error) {423Expect(s).To(Equal(plan.Steps[0])) // no template here424return types.StepResult{Step: s, Outcome: types.OutcomeOK, Output: "A"}, nil425}).426Times(1)427428// Step 2 should arrive rendered.429mockRunner.EXPECT().430RunStep(gomock.Any(), gomock.Any(), gomock.Any()).431DoAndReturn(func(_ context.Context, _ types.Config, s types.Step) (types.StepResult, error) {432Expect(s.Type).To(Equal(types.ToolLLM))433Expect(s.Prompt).To(Equal("use A"))434return types.StepResult{Step: s, Outcome: types.OutcomeOK, Output: "B"}, nil435}).436Times(1)437438subject := planexec.NewPlanExecuteAgent(mockClock, mockPlanner, mockRunner)439_, err := subject.RunAgentGoal(context.Background(), goal)440Expect(err).NotTo(HaveOccurred())441})442443it("should error if template rendering fails and not call Runner for that step", func() {444plan := types.Plan{445Goal: goal,446Steps: []types.Step{447{Type: types.ToolLLM, Description: "step 1", Prompt: "ok"},448{Type: types.ToolLLM, Description: "step 2", Prompt: "bad {{ .MissingKey }}"},449},450}451452mockPlanner.EXPECT().453Plan(gomock.Any(), goal).454Return(plan, nil).455Times(1)456457// Step 1 runs.458mockRunner.EXPECT().459RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).460Return(types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeOK, Output: "A"}, nil).461Times(1)462463// Step 2 must NOT run (render should fail before Runner call).464mockRunner.EXPECT().465RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).466Times(0)467468subject := planexec.NewPlanExecuteAgent(mockClock, mockPlanner, mockRunner)469470_, err := subject.RunAgentGoal(context.Background(), goal)471Expect(err).To(HaveOccurred())472})473474it("should bubble up policy violations (typed) as a stop reason", func() {475plan := types.Plan{476Goal: goal,477Steps: []types.Step{478{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},479{Type: types.ToolShell, Description: "step 2", Command: "echo", Args: []string{"two"}},480},481}482483mockPlanner.EXPECT().484Plan(gomock.Any(), goal).485Return(plan, nil).486Times(1)487488polErr := core.PolicyDeniedError{489Kind: "workdir",490Reason: "workdir not allowed",491}492493mockRunner.EXPECT().494RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).495Return(types.StepResult{}, polErr).496Times(1)497498// Guard: step 2 must not run499mockRunner.EXPECT().500RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).501Times(0)502503subject := planexec.NewPlanExecuteAgent(mockClock, mockPlanner, mockRunner)504505_, err := subject.RunAgentGoal(context.Background(), goal)506Expect(err).To(MatchError(polErr))507})508509it("should bubble up budget exceeded errors as a stop reason", func() {510plan := types.Plan{511Goal: goal,512Steps: []types.Step{513{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"one"}},514{Type: types.ToolShell, Description: "step 2", Command: "echo", Args: []string{"two"}},515},516}517518mockPlanner.EXPECT().519Plan(gomock.Any(), goal).520Return(plan, nil).521Times(1)522523budgetErr := core.BudgetExceededError{524Kind: core.BudgetKindSteps,525Limit: 10,526Used: 10,527Message: "step budget exceeded",528}529530mockRunner.EXPECT().531RunStep(gomock.Any(), gomock.Any(), plan.Steps[0]).532Return(types.StepResult{}, budgetErr).533Times(1)534535// Guard: step 2 must not run536mockRunner.EXPECT().537RunStep(gomock.Any(), gomock.Any(), plan.Steps[1]).538Times(0)539540subject := planexec.NewPlanExecuteAgent(mockClock, mockPlanner, mockRunner)541542_, err := subject.RunAgentGoal(context.Background(), goal)543Expect(err).To(MatchError(budgetErr))544})545546it("resets transcript and prompt-history at the start of each run", func() {547plan := types.Plan{548Goal: goal,549Steps: []types.Step{550{Type: types.ToolLLM, Description: "step 1", Prompt: "first"},551},552}553554// Run #1555mockPlanner.EXPECT().556Plan(gomock.Any(), goal).557Return(plan, nil).558Times(1)559560mockRunner.EXPECT().561RunStep(gomock.Any(), gomock.Any(), gomock.Any()).562Return(types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeOK, Output: "A"}, nil).563Times(1)564565subject := planexec.NewPlanExecuteAgent(mockClock, mockPlanner, mockRunner)566567_, err := subject.RunAgentGoal(context.Background(), goal)568Expect(err).NotTo(HaveOccurred())569570// Seed old leftovers that MUST be cleared on next run571subject.AddTranscript("OLD_TRANSCRIPT_SHOULD_BE_CLEARED")572if subject.PromptHistory != nil {573subject.PromptHistory.AppendString("OLD_PROMPT_SHOULD_BE_CLEARED")574}575576// Run #2 (same goal is fine)577mockPlanner.EXPECT().578Plan(gomock.Any(), goal).579Return(plan, nil).580Times(1)581582mockRunner.EXPECT().583RunStep(gomock.Any(), gomock.Any(), gomock.Any()).584Return(types.StepResult{Step: plan.Steps[0], Outcome: types.OutcomeOK, Output: "B"}, nil).585Times(1)586587_, err = subject.RunAgentGoal(context.Background(), goal)588Expect(err).NotTo(HaveOccurred())589590// Assert old content is gone after reset591Expect(subject.TranscriptString()).NotTo(ContainSubstring("OLD_TRANSCRIPT_SHOULD_BE_CLEARED"))592593// And prompt history (if enabled) should not contain the old string either594if subject.PromptHistory != nil {595Expect(subject.PromptHistory.String()).NotTo(ContainSubstring("OLD_PROMPT_SHOULD_BE_CLEARED"))596}597598// Sanity: new run wrote fresh markers599Expect(subject.TranscriptString()).To(ContainSubstring("[goal]"))600Expect(subject.TranscriptString()).To(ContainSubstring(goal))601})602603it("caps transcript growth (truncates) when steps produce large transcripts", func() {604plan := types.Plan{605Goal: goal,606Steps: []types.Step{607{Type: types.ToolShell, Description: "step 1", Command: "echo", Args: []string{"hi"}},608},609}610611mockPlanner.EXPECT().612Plan(gomock.Any(), goal).613Return(plan, nil).614Times(1)615616// Runner returns a huge transcript payload617big := strings.Repeat("X", 10_000)618619mockRunner.EXPECT().620RunStep(gomock.Any(), gomock.Any(), gomock.Any()).621Return(types.StepResult{622Step: plan.Steps[0],623Outcome: types.OutcomeOK,624Output: "ok",625Transcript: big,626}, nil).627Times(1)628629subject := planexec.NewPlanExecuteAgent(630mockClock,631mockPlanner,632mockRunner,633core.WithTranscriptMaxBytes(200),634)635636_, err := subject.RunAgentGoal(context.Background(), goal)637Expect(err).NotTo(HaveOccurred())638639ts := subject.TranscriptString()640641// Hard check: buffer is capped642Expect(len([]byte(ts))).To(BeNumerically("<=", 200))643644// And should include your truncation banner (whatever you used in transcript_buffer)645Expect(ts).To(ContainSubstring("…(truncated)"))646})647})648}649650func expectAgentDuration(mockClock *MockClock, d time.Duration) {651t0 := time.Date(2026, 1, 13, 9, 0, 0, 0, time.UTC)652t1 := t0.Add(d)653654// Robust: first call is t0, all subsequent calls are t1.655gomock.InOrder(656mockClock.EXPECT().Now().Return(t0).Times(1),657mockClock.EXPECT().Now().Return(t1).AnyTimes(),658)659}660661662