package react_test
import (
"context"
"errors"
"fmt"
"github.com/kardolus/chatgpt-cli/agent/core"
"github.com/kardolus/chatgpt-cli/agent/react"
"github.com/kardolus/chatgpt-cli/agent/types"
"testing"
"time"
"github.com/golang/mock/gomock"
. "github.com/onsi/gomega"
"github.com/sclevine/spec"
"github.com/sclevine/spec/report"
)
func TestUnitReAct(t *testing.T) {
spec.Run(t, "Testing ReActAgent", testReActAgent, spec.Report(report.Terminal{}))
}
func testReActAgent(t *testing.T, when spec.G, it spec.S) {
var (
ctrl *gomock.Controller
llm *MockLLM
runner *MockRunner
budget *MockBudget
clock *MockClock
reactAgent *react.ReActAgent
ctx context.Context
now time.Time
)
it.Before(func() {
RegisterTestingT(t)
ctrl = gomock.NewController(t)
llm = NewMockLLM(ctrl)
runner = NewMockRunner(ctrl)
budget = NewMockBudget(ctrl)
clock = NewMockClock(ctrl)
reactAgent = react.NewReActAgent(llm, runner, budget, clock)
ctx = context.Background()
now = time.Date(2026, 1, 15, 10, 0, 0, 0, time.UTC)
clock.EXPECT().Now().Return(now).AnyTimes()
})
it.After(func() {
ctrl.Finish()
})
when("LLM returns final answer immediately", func() {
it("returns the answer without tool calls", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "The answer is simple",
"action_type": "answer",
"final_answer": "42"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
_, err := reactAgent.RunAgentGoal(ctx, "What is the answer?")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM uses a shell tool then answers", func() {
it("executes the tool and returns the final answer", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "I need to list files",
"action_type": "tool",
"tool": "shell",
"command": "ls",
"args": ["-la"]
}`, 15, nil)
budget.EXPECT().ChargeLLMTokens(15, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolShell))
Expect(step.Command).To(Equal("ls"))
Expect(step.Args).To(Equal([]string{"-la"}))
return types.StepResult{
Outcome: types.OutcomeOK,
Output: "file1.txt\nfile2.txt",
Duration: 100 * time.Millisecond,
}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: file1.txt"))
return `{
"thought": "I have the file list",
"action_type": "answer",
"final_answer": "There are 2 files"
}`, 12, nil
})
budget.EXPECT().ChargeLLMTokens(12, now)
_, err := reactAgent.RunAgentGoal(ctx, "How many files?")
Expect(err).NotTo(HaveOccurred())
})
})
when("Budget is exceeded", func() {
it("returns Budget error", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(core.BudgetExceededError{
Kind: core.BudgetKindLLM,
Limit: 5,
Used: 5,
Message: "LLM call Budget exceeded",
})
_, err := reactAgent.RunAgentGoal(ctx, "Do something")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("LLM call Budget exceeded"))
})
})
when("LLM returns invalid JSON", func() {
it("recovers in-band and returns a final answer", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return("not valid json", 5, nil)
budget.EXPECT().ChargeLLMTokens(5, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR: Your last response violated the ReAct protocol"))
Expect(prompt).To(ContainSubstring("failed to locate JSON object"))
Expect(prompt).To(ContainSubstring(`Raw response (truncated): "not valid json"`))
return `{
"thought": "ok",
"action_type": "answer",
"final_answer": "recovered"
}`, 7, nil
})
budget.EXPECT().ChargeLLMTokens(7, now)
res, err := reactAgent.RunAgentGoal(ctx, "Do something")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("recovered"))
})
it("recovers in-band, then hard-fails after max parse recoveries", func() {
for i := 0; i < 4; i++ {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
if i == 0 {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return("not valid json", 5, nil)
} else {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR: Your last response violated the ReAct protocol"))
Expect(prompt).To(ContainSubstring(`"action_type"`))
return "not valid json", 5, nil
})
}
budget.EXPECT().ChargeLLMTokens(5, now)
}
_, err := reactAgent.RunAgentGoal(ctx, "Do something")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("agent failed to produce valid JSON after 3 attempts"))
Expect(err.Error()).To(ContainSubstring("failed to locate JSON"))
})
})
when("LLM returns JSON with missing action_type", func() {
it("recovers in-band and returns a final answer", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{"thought":"thinking"}`, 5, nil)
budget.EXPECT().ChargeLLMTokens(5, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR: Your last response violated the ReAct protocol"))
Expect(prompt).To(ContainSubstring("missing action_type"))
Expect(prompt).To(ContainSubstring(`Raw response (truncated): "{\"thought\":\"thinking\"}"`))
return `{
"thought": "ok",
"action_type": "answer",
"final_answer": "recovered"
}`, 7, nil
})
budget.EXPECT().ChargeLLMTokens(7, now)
res, err := reactAgent.RunAgentGoal(ctx, "Do something")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("recovered"))
})
it("recovers in-band, then hard-fails after max parse recoveries", func() {
for i := 0; i < 4; i++ {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
if i == 0 {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{"thought":"thinking"}`, 5, nil)
} else {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR: Your last response violated the ReAct protocol"))
Expect(prompt).To(ContainSubstring("missing action_type"))
return `{"thought":"thinking"}`, 5, nil
})
}
budget.EXPECT().ChargeLLMTokens(5, now)
}
_, err := reactAgent.RunAgentGoal(ctx, "Do something")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("agent failed to produce valid JSON after 3 attempts"))
Expect(err.Error()).To(ContainSubstring("missing action_type"))
})
})
when("tool execution fails", func() {
it("returns the execution error", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "running command",
"action_type": "tool",
"tool": "shell",
"command": "false"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Return(types.StepResult{
Outcome: types.OutcomeError,
Transcript: "command failed",
}, errors.New("exit 1"))
_, err := reactAgent.RunAgentGoal(ctx, "Run false")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("exit 1"))
})
})
when("iteration Budget is exceeded", func() {
it("returns iteration Budget exceeded error", func() {
for i := 0; i < 10; i++ {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(fmt.Sprintf(`{
"thought": "still working",
"action_type": "tool",
"tool": "shell",
"command": "echo",
"args": ["test-%d"]
}`, i), 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
}
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Times(10).
Return(types.StepResult{
Outcome: types.OutcomeOK,
Output: "ok",
Duration: 10 * time.Millisecond,
}, nil)
budget.EXPECT().AllowIteration(now).Return(core.BudgetExceededError{
Kind: core.BudgetKindIterations,
Limit: 10,
Used: 10,
Message: "iteration Budget exceeded",
})
_, err := reactAgent.RunAgentGoal(ctx, "Keep looping")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("iteration Budget exceeded"))
})
})
when("LLM output has markdown code fences", func() {
it("strips the fences and parses correctly", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return("```json\n{\"thought\": \"done\", \"action_type\": \"answer\", \"final_answer\": \"Success\"}\n```", 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
_, err := reactAgent.RunAgentGoal(ctx, "Test markdown")
Expect(err).NotTo(HaveOccurred())
})
when("shell tool missing command", func() {
it("injects error observation and lets LLM recover", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "using shell",
"action_type": "tool",
"tool": "shell",
"command": ""
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR"))
Expect(prompt).To(ContainSubstring("shell tool requires command"))
return `{
"thought": "ok",
"action_type": "answer",
"final_answer": "cannot run shell without a command"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
res, err := reactAgent.RunAgentGoal(ctx, "Test")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("cannot run shell without a command"))
})
})
})
when("LLM uses shorthand action_type like file/shell/LLM", func() {
it("treats action_type=file as a tool call", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "read it",
"action_type": "file",
"op": "read",
"path": "AGENTS.md"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolFiles))
Expect(step.Op).To(Equal("read"))
Expect(step.Path).To(Equal("AGENTS.md"))
return types.StepResult{
Outcome: types.OutcomeOK,
Output: "ok",
Duration: 1 * time.Millisecond,
}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "done",
"action_type": "answer",
"final_answer": "ok"
}`, 5, nil)
budget.EXPECT().ChargeLLMTokens(5, now)
_, err := reactAgent.RunAgentGoal(ctx, "Read AGENTS")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM returns multiple JSON objects back-to-back", func() {
it("parses only the first JSON object", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(
`{"thought":"one","action_type":"answer","final_answer":"A"}{"thought":"two","action_type":"answer","final_answer":"B"}`,
10,
nil,
)
budget.EXPECT().ChargeLLMTokens(10, now)
res, err := reactAgent.RunAgentGoal(ctx, "Test")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("A"))
})
})
when("LLM repeats the same tool call twice in a row", func() {
it("injects a repetition observation and forces a different next step", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "do it",
"action_type": "tool",
"tool": "shell",
"command": "ls",
"args": ["-la"]
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Times(1).
Return(types.StepResult{
Outcome: types.OutcomeOK,
Output: "file1\nfile2\n",
Duration: 1 * time.Millisecond,
}, nil)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "try again",
"action_type": "tool",
"tool": "shell",
"command": "ls",
"args": ["-la"]
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: You are repeating the same tool call"))
return `{
"thought": "ok, I'll stop repeating",
"action_type": "answer",
"final_answer": "done"
}`, 5, nil
})
budget.EXPECT().ChargeLLMTokens(5, now)
res, err := reactAgent.RunAgentGoal(ctx, "List files")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("done"))
})
})
when("LLM ignores repetition warnings", func() {
it("hard-stops after too many repeats in the rolling window", func() {
for i := 0; i < 6; i++ {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "list files again",
"action_type": "tool",
"tool": "shell",
"command": "ls",
"args": ["-la"]
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
}
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Times(1).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolShell))
Expect(step.Command).To(Equal("ls"))
Expect(step.Args).To(Equal([]string{"-la"}))
return types.StepResult{
Outcome: types.OutcomeOK,
Output: "file1\nfile2\n",
Duration: 10 * time.Millisecond,
}, nil
})
_, err := reactAgent.RunAgentGoal(ctx, "Loop forever")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("agent appears stuck"))
Expect(err.Error()).To(ContainSubstring("repeated tool call too many times"))
})
})
when("LLM uses shorthand action_type=file (no tool field)", func() {
it("treats it as a tool call and executes file op", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "read README",
"action_type": "file",
"op": "read",
"path": "README.md"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolFiles))
Expect(step.Op).To(Equal("read"))
Expect(step.Path).To(Equal("README.md"))
return types.StepResult{
Outcome: types.OutcomeOK,
Output: "README CONTENT",
Duration: 5 * time.Millisecond,
}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: README CONTENT"))
return `{
"thought": "done",
"action_type": "answer",
"final_answer": "ok"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Read README and answer")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM uses shorthand action_type=file AND also sets tool=file", func() {
it("still treats it as a tool call (compat mode)", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "read AGENTS",
"action_type": "file",
"tool": "file",
"op": "read",
"path": "AGENTS.md"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
runner.EXPECT().
RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Return(types.StepResult{
Outcome: types.OutcomeOK,
Output: "AGENTS CONTENT",
Duration: 5 * time.Millisecond,
}, nil)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "done",
"action_type": "answer",
"final_answer": "ok"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Read AGENTS and answer")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM uses shorthand action_type=file but tool mismatches", func() {
it("recovers in-band rather than failing the whole run", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "oops",
"action_type": "file",
"tool": "shell",
"command": "ls"
}`, 10, nil)
budget.EXPECT().ChargeLLMTokens(10, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR: Your last response violated the ReAct protocol"))
Expect(prompt).To(ContainSubstring(`invalid action_type: "file"`))
Expect(prompt).To(ContainSubstring(`"tool": "shell"`))
return `{
"thought": "ack, correct schema",
"action_type": "answer",
"final_answer": "recovered"
}`, 5, nil
})
budget.EXPECT().ChargeLLMTokens(5, now)
res, err := reactAgent.RunAgentGoal(ctx, "Bad shorthand")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("recovered"))
})
it("recovers in-band, then hard-fails after max parse recoveries", func() {
bad := `{
"thought": "oops",
"action_type": "file",
"tool": "shell",
"command": "ls"
}`
for i := 0; i < 4; i++ {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
if i == 0 {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(bad, 10, nil)
} else {
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("ACTION_TAKEN: tool=LLM details=INVALID_RESPONSE"))
Expect(prompt).To(ContainSubstring("invalid action_type"))
Expect(prompt).To(ContainSubstring(`Raw response (truncated)`))
return bad, 10, nil
})
}
budget.EXPECT().ChargeLLMTokens(10, now)
}
_, err := reactAgent.RunAgentGoal(ctx, "Bad shorthand")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("agent failed to produce valid JSON after 3 attempts"))
Expect(err.Error()).To(ContainSubstring(`invalid action_type: "file"`))
})
})
when("LLM uses file patch", func() {
it("converts to a ToolFiles step and executes it", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"apply diff",
"action_type":"tool",
"tool":"file",
"op":"patch",
"path":"a.txt",
"data":"--- a/a.txt\n+++ b/a.txt\n@@\n+hi\n"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolFiles))
Expect(step.Op).To(Equal("patch"))
Expect(step.Path).To(Equal("a.txt"))
Expect(step.Data).To(ContainSubstring("+++ b/a.txt"))
return types.StepResult{Outcome: types.OutcomeOK, Output: "patched"}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"done",
"action_type":"answer",
"final_answer":"ok"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Patch a.txt")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM uses file replace", func() {
it("converts to a ToolFiles step with Old/New/N", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"swap token",
"action_type":"tool",
"tool":"file",
"op":"replace",
"path":"a.txt",
"old":"foo",
"new":"bar",
"n":2
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolFiles))
Expect(step.Op).To(Equal("replace"))
Expect(step.Path).To(Equal("a.txt"))
Expect(step.Old).To(Equal("foo"))
Expect(step.New).To(Equal("bar"))
Expect(step.N).To(Equal(2))
return types.StepResult{Outcome: types.OutcomeOK, Output: "replaced"}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"done",
"action_type":"answer",
"final_answer":"ok"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Replace in a.txt")
Expect(err).NotTo(HaveOccurred())
})
})
when("LLM uses file patch without data", func() {
it("injects error observation and lets LLM recover", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"patch",
"action_type":"tool",
"tool":"file",
"op":"patch",
"path":"a.txt",
"data":" "
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("file patch requires data"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR"))
return `{
"thought":"ok",
"action_type":"answer",
"final_answer":"fixed"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
res, err := reactAgent.RunAgentGoal(ctx, "Patch")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("fixed"))
})
})
when("LLM uses file replace without old", func() {
it("injects error observation and lets LLM recover", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"replace",
"action_type":"tool",
"tool":"file",
"op":"replace",
"path":"a.txt",
"new":""
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("file replace requires old"))
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR"))
return `{
"thought":"ok",
"action_type":"answer",
"final_answer":"recovered"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
res, err := reactAgent.RunAgentGoal(ctx, "Replace")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("recovered"))
})
})
when("patch fails and agent falls back to full write", func() {
it("continues after patch failure observation and then writes", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"try patch first",
"action_type":"tool",
"tool":"file",
"op":"patch",
"path":"a.txt",
"data":"--- a/a.txt\n+++ b/a.txt\n@@\n+hi\n"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Op).To(Equal("patch"))
return types.StepResult{
Outcome: types.OutcomeError,
Output: "patch failed: hunk did not apply",
Duration: 1 * time.Millisecond,
}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR:"))
Expect(prompt).To(ContainSubstring("patch failed"))
return `{
"thought":"fallback to write full file",
"action_type":"tool",
"tool":"file",
"op":"write",
"path":"a.txt",
"data":"FULL NEW CONTENT\n"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Op).To(Equal("write"))
Expect(step.Path).To(Equal("a.txt"))
Expect(step.Data).To(Equal("FULL NEW CONTENT\n"))
return types.StepResult{Outcome: types.OutcomeOK, Output: "wrote"}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"done",
"action_type":"answer",
"final_answer":"ok"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Modify a.txt")
Expect(err).NotTo(HaveOccurred())
})
})
when("a step produces side effects", func() {
it("includes STATE line with cumulative effects in the next prompt", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"write a file",
"action_type":"tool",
"tool":"file",
"op":"write",
"path":"a.txt",
"data":"hi"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
Return(types.StepResult{
Outcome: types.OutcomeOK,
Output: "wrote",
Duration: 1 * time.Millisecond,
Effects: types.Effects{
{Kind: "file.write", Path: "a.txt"},
},
}, nil)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("State:"))
Expect(prompt).To(ContainSubstring("file.write x1"))
return `{
"thought":"done",
"action_type":"answer",
"final_answer":"ok"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Write")
Expect(err).NotTo(HaveOccurred())
})
})
when("patch fails and agent injects FALLBACK REQUIRED guidance", func() {
it("includes fallback-required instruction (read+write) in the next prompt", func() {
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).Return(`{
"thought":"try patch first",
"action_type":"tool",
"tool":"file",
"op":"patch",
"path":"a.txt",
"data":"--- a/a.txt\n+++ b/a.txt\n@@ -1,1 +1,1 @@\n-old\n+new\n"
}`, 1, nil)
budget.EXPECT().ChargeLLMTokens(1, now)
runner.EXPECT().RunStep(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, _ types.Config, step types.Step) (types.StepResult, error) {
Expect(step.Type).To(Equal(types.ToolFiles))
Expect(step.Op).To(Equal("patch"))
Expect(step.Path).To(Equal("a.txt"))
return types.StepResult{
Outcome: types.OutcomeError,
Output: "invalid unified diff: missing hunk header",
Duration: 1 * time.Millisecond,
}, nil
})
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(prompt).To(ContainSubstring("OBSERVATION: ERROR:"))
Expect(prompt).To(ContainSubstring("invalid unified diff"))
Expect(prompt).To(ContainSubstring("OBSERVATION: FALLBACK REQUIRED"))
Expect(prompt).To(ContainSubstring(`Do NOT try op="patch" or op=patch/replace again`))
Expect(prompt).To(ContainSubstring(`{"action_type":"tool","tool":"file","op":"read","path":"a.txt"}`))
return `{
"thought":"ack",
"action_type":"answer",
"final_answer":"ok"
}`, 1, nil
})
budget.EXPECT().ChargeLLMTokens(1, now)
_, err := reactAgent.RunAgentGoal(ctx, "Modify a.txt")
Expect(err).NotTo(HaveOccurred())
})
})
when("agent already has transcript + history from a previous run", func() {
it("resets them before starting a new run", func() {
reactAgent.AddTranscript("OLD_TRANSCRIPT_SHOULD_BE_CLEARED")
reactAgent.AddHistory("OLD_HISTORY_SHOULD_BE_CLEARED")
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
Return(`{
"thought": "ok",
"action_type": "answer",
"final_answer": "done"
}`, 3, nil)
budget.EXPECT().ChargeLLMTokens(3, now)
res, err := reactAgent.RunAgentGoal(ctx, "New goal")
Expect(err).NotTo(HaveOccurred())
Expect(res).To(Equal("done"))
Expect(reactAgent.TranscriptString()).NotTo(ContainSubstring("OLD_TRANSCRIPT_SHOULD_BE_CLEARED"))
Expect(reactAgent.History()).NotTo(ContainSubstring("OLD_HISTORY_SHOULD_BE_CLEARED"))
Expect(reactAgent.TranscriptString()).To(ContainSubstring("[goal]"))
Expect(reactAgent.TranscriptString()).To(ContainSubstring("New goal"))
Expect(reactAgent.History()).To(ContainSubstring("USER: New goal"))
})
})
when("prompt logging is enabled and transcript max is small", func() {
it("caps transcript length (and truncates) even if prompt is large", func() {
agent := react.NewReActAgent(
llm, runner, budget, clock,
core.WithTranscriptMaxBytes(120),
core.WithPromptHistoryMaxBytes(120),
)
budget.EXPECT().AllowIteration(now).Return(nil)
budget.EXPECT().Snapshot(now).Return(core.BudgetSnapshot{})
budget.EXPECT().AllowTool(types.ToolLLM, now).Return(nil)
llm.EXPECT().
Complete(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, prompt string) (string, int, error) {
Expect(len(prompt)).To(BeNumerically(">", 200))
return `{
"thought": "ok",
"action_type": "answer",
"final_answer": "done"
}`, 2, nil
})
budget.EXPECT().ChargeLLMTokens(2, now)
_, err := agent.RunAgentGoal(ctx, "Goal that triggers large prompt")
Expect(err).NotTo(HaveOccurred())
ts := agent.TranscriptString()
Expect(len([]byte(ts))).To(BeNumerically("<=", 120))
Expect(ts).To(ContainSubstring("…(truncated)"))
})
})
}