From 65b45adbd83a98daa57770e340b448924edeca55 Mon Sep 17 00:00:00 2001 From: Kynan Ware <47394200+BagToad@users.noreply.github.com> Date: Tue, 16 Sep 2025 17:21:01 -0600 Subject: [PATCH] Improve log rendering error handling and test coverage Refactors log rendering to print errors to stderr and continue processing on JSON parse or rendering failures, instead of returning early. Updates tests to check both stdout and stderr outputs, and adds new test cases and testdata for tolerant parsing and error scenarios. --- pkg/cmd/agent-task/shared/log.go | 36 ++++++++------ pkg/cmd/agent-task/shared/log_test.go | 48 ++++++++++++++----- .../shared/testdata/log-3-input.txt | 27 +++++++++++ .../log-3-synthetic-failures-input.txt | 27 +++++++++++ .../log-3-synthetic-failures-want-stderr.txt | 10 ++++ .../log-3-synthetic-failures-want.txt | 39 +++++++++++++++ .../agent-task/shared/testdata/log-3-want.txt | 39 +++++++++++++++ 7 files changed, 199 insertions(+), 27 deletions(-) create mode 100644 pkg/cmd/agent-task/shared/testdata/log-3-input.txt create mode 100644 pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt create mode 100644 pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt create mode 100644 pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt create mode 100644 pkg/cmd/agent-task/shared/testdata/log-3-want.txt diff --git a/pkg/cmd/agent-task/shared/log.go b/pkg/cmd/agent-task/shared/log.go index e58bed79a..a1b1740e0 100644 --- a/pkg/cmd/agent-task/shared/log.go +++ b/pkg/cmd/agent-task/shared/log.go @@ -133,15 +133,16 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I case "view": args := viewToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'view' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'view' tool call arguments: %v\n", err) + continue } - fmt.Fprintf(w, "View %s\n", cs.Bold(relativeFilePath(args.Path))) + renderToolCallTitle(w, cs, fmt.Sprintf("View %s", cs.Bold(relativeFilePath(args.Path))), "") content := stripDiffFormat(choice.Delta.Content) - // TODO: Strip the diff formatting from this, but for now render as it is. if err := renderFileContentAsMarkdown(args.Path, content, w, io); err != nil { - return false, fmt.Errorf("failed to render viewed file content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render viewed file content: %v\n\n", err) + fmt.Fprintln(io.ErrOut, content) // raw fallback } case "bash": if v := unmarshal[bashToolArgs](args); v != nil { @@ -156,7 +157,8 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I contentWithCommand = fmt.Sprintf("$ %s\n%s", v.Command, choice.Delta.Content) } if err := renderFileContentAsMarkdown("commands.sh", contentWithCommand, w, io); err != nil { - return false, fmt.Errorf("failed to render bash command output: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render bash command output: %v\n\n", err) + fmt.Fprintln(io.ErrOut, contentWithCommand) } } // TODO: consider including more details for these bash-related tool calls. @@ -193,24 +195,26 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I case "think": args := thinkToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'think' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'think' tool call arguments: %v\n", err) + continue } // NOTE: omit the delta.content since it's the same as thought renderToolCallTitle(w, cs, "Thought", "") if err := renderRawMarkdown(args.Thought, w, io); err != nil { - return false, fmt.Errorf("failed to render thought: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render thought: %v\n", err) } case "report_progress": args := reportProgressToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'report_progress' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'report_progress' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Progress update", cs.Bold(args.CommitMessage)) if args.PrDescription != "" { if err := renderRawMarkdown(args.PrDescription, w, io); err != nil { - return false, fmt.Errorf("failed to render PR description: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render PR description: %v\n", err) } } @@ -218,29 +222,33 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I if choice.Delta.Content != "" { // Try to treat this as JSON if err := renderContentAsJSONMarkdown("", choice.Delta.Content, w, io); err != nil { - return false, fmt.Errorf("failed to render progress update content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render progress update content: %v\n", err) } } case "create": args := createToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'create' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'create' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Create", cs.Bold(relativeFilePath(args.Path))) if err := renderFileContentAsMarkdown(args.Path, args.FileText, w, io); err != nil { - return false, fmt.Errorf("failed to render created file content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render created file content: %v\n\n", err) + fmt.Fprintln(io.ErrOut, args.FileText) } case "str_replace": args := strReplaceToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'str_replace' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'str_replace' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Edit", cs.Bold(relativeFilePath(args.Path))) if err := renderFileContentAsMarkdown("output.diff", choice.Delta.Content, w, io); err != nil { - return false, fmt.Errorf("failed to render str_replace diff: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render str_replace diff: %v\n\n", err) + fmt.Fprintln(io.ErrOut, choice.Delta.Content) } default: // Unknown tool call. For example for "codeql_checker": diff --git a/pkg/cmd/agent-task/shared/log_test.go b/pkg/cmd/agent-task/shared/log_test.go index 4c95a9853..07b562dc8 100644 --- a/pkg/cmd/agent-task/shared/log_test.go +++ b/pkg/cmd/agent-task/shared/log_test.go @@ -13,19 +13,26 @@ import ( func TestFollow(t *testing.T) { tests := []struct { - name string - log string - want string + name string + log string + wantStdoutFile string + wantStderrFile string }{ { - name: "sample log 1", - log: "testdata/log-1-input.txt", - want: "testdata/log-1-want.txt", + name: "sample log 1", + log: "testdata/log-1-input.txt", + wantStdoutFile: "testdata/log-1-want.txt", }, { - name: "sample log 2", - log: "testdata/log-2-input.txt", - want: "testdata/log-2-want.txt", + name: "sample log 2", + log: "testdata/log-2-input.txt", + wantStdoutFile: "testdata/log-2-want.txt", + }, + { + name: "sample log 3 (tolerant parse failures)", + log: "testdata/log-3-synthetic-failures-input.txt", + wantStdoutFile: "testdata/log-3-synthetic-failures-want.txt", + wantStderrFile: "testdata/log-3-synthetic-failures-want-stderr.txt", }, } @@ -50,7 +57,7 @@ func TestFollow(t *testing.T) { return []byte(strings.Join(lines[0:hits], "\n\n")), nil } - ios, _, stdout, _ := iostreams.Test() + ios, _, stdout, stderr := iostreams.Test() err = NewLogRenderer().Follow(fetcher, stdout, ios) require.NoError(t, err) @@ -60,14 +67,29 @@ func TestFollow(t *testing.T) { // stripped := strings.TrimSuffix(tt.log, ext) // stripped = strings.TrimSuffix(stripped, "-input") // os.WriteFile(stripped+"-want"+ext, stdout.Bytes(), 0644) + // if tt.wantStderrFile != "" { + // os.WriteFile(stripped+"-want-stderr"+ext, stderr.Bytes(), 0644) + // } - want, err := os.ReadFile(tt.want) + wantStdout, err := os.ReadFile(tt.wantStdoutFile) require.NoError(t, err) // Normalize CRLF to LF to make the tests OS-agnostic. - want = []byte(strings.ReplaceAll(string(want), "\r\n", "\n")) + wantStdout = []byte(strings.ReplaceAll(string(wantStdout), "\r\n", "\n")) - assert.Equal(t, string(want), stdout.String()) + assert.Equal(t, string(wantStdout), stdout.String()) + + if tt.wantStderrFile != "" { + wantStderr, err := os.ReadFile(tt.wantStderrFile) + require.NoError(t, err) + + // Normalize CRLF to LF to make the tests OS-agnostic. + wantStderr = []byte(strings.ReplaceAll(string(wantStderr), "\r\n", "\n")) + + assert.Equal(t, string(wantStderr), stderr.String()) + } else { + require.Empty(t, stderr, "expected no stderr output") + } }) } } diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-input.txt b/pkg/cmd/agent-task/shared/testdata/log-3-input.txt new file mode 100644 index 000000000..5fa8e0a63 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-input.txt @@ -0,0 +1,27 @@ +data: {"id": "bad1", "object": "chat.completion.chunk", "choices": [ { "delta": { "tool_calls": [ { "function": { "name": "view", "arguments": "{bad json" } } ] } } ] } + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1b","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1b","index":0}]}],"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t1","object":"chat.completion.chunk","choices":[{"delta":{"content":"THINK","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":123"},"id":"tc2","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t2","object":"chat.completion.chunk","choices":[{"delta":{"content":"A valid thought to render.","reasoning_text":"Interim reasoning that should show as raw markdown.","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":\"A valid thought to render.\"}"},"id":"tc3","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp1","object":"chat.completion.chunk","choices":[{"delta":{"content":"RP","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\": 5"},"id":"tc4","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp2","object":"chat.completion.chunk","choices":[{"delta":{"content":"not-json","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\":\"Valid commit msg\"}"},"id":"tc5","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c1","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/abs/path/file.txt\""},"id":"tc6","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c2","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE2","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/new.txt\",\"file_text\":\"hello world\"}"},"id":"tc7","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr1","object":"chat.completion.chunk","choices":[{"delta":{"content":"SR","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff"},"id":"tc8","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr2","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff\"}"},"id":"tc9","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"u1","object":"chat.completion.chunk","choices":[{"delta":{"content":"{\"foo\":1}","tool_calls":[{"function":{"name":"mystery_tool","arguments":"{\"bar\":2}"},"id":"tc10","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"end","object":"chat.completion.chunk","choices":[{"delta":{"content":"","tool_calls":[],"role":"assistant"},"finish_reason":"stop","index":0}]} diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt new file mode 100644 index 000000000..5fa8e0a63 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt @@ -0,0 +1,27 @@ +data: {"id": "bad1", "object": "chat.completion.chunk", "choices": [ { "delta": { "tool_calls": [ { "function": { "name": "view", "arguments": "{bad json" } } ] } } ] } + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1b","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1b","index":0}]}],"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t1","object":"chat.completion.chunk","choices":[{"delta":{"content":"THINK","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":123"},"id":"tc2","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t2","object":"chat.completion.chunk","choices":[{"delta":{"content":"A valid thought to render.","reasoning_text":"Interim reasoning that should show as raw markdown.","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":\"A valid thought to render.\"}"},"id":"tc3","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp1","object":"chat.completion.chunk","choices":[{"delta":{"content":"RP","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\": 5"},"id":"tc4","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp2","object":"chat.completion.chunk","choices":[{"delta":{"content":"not-json","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\":\"Valid commit msg\"}"},"id":"tc5","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c1","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/abs/path/file.txt\""},"id":"tc6","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c2","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE2","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/new.txt\",\"file_text\":\"hello world\"}"},"id":"tc7","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr1","object":"chat.completion.chunk","choices":[{"delta":{"content":"SR","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff"},"id":"tc8","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr2","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff\"}"},"id":"tc9","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"u1","object":"chat.completion.chunk","choices":[{"delta":{"content":"{\"foo\":1}","tool_calls":[{"function":{"name":"mystery_tool","arguments":"{\"bar\":2}"},"id":"tc10","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"end","object":"chat.completion.chunk","choices":[{"delta":{"content":"","tool_calls":[],"role":"assistant"},"finish_reason":"stop","index":0}]} diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt new file mode 100644 index 000000000..199ab66f3 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt @@ -0,0 +1,10 @@ + +failed to parse 'view' tool call arguments: unexpected end of JSON input + +failed to parse 'think' tool call arguments: unexpected end of JSON input + +failed to parse 'report_progress' tool call arguments: unexpected end of JSON input + +failed to parse 'create' tool call arguments: unexpected end of JSON input + +failed to parse 'str_replace' tool call arguments: unexpected end of JSON input diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt new file mode 100644 index 000000000..52a36d427 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt @@ -0,0 +1,39 @@ +View repo/README.md + +old line + new line + unchanged line + INSIDE A VIEW CALL + + +Interim reasoning that should show as raw markdown. + +Thought + +A valid thought to render. + +Progress update: Valid commit msg +Create: repo/new.txt +hello world + +Edit: repo/file.diff +@@ -1,2 +1,2 @@ + -old line + +new line + unchanged line + +Call to mystery_tool + +Output: + +{ + "foo": 1 + } + + +Input: + +{ + "bar": 2 + } + diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-want.txt b/pkg/cmd/agent-task/shared/testdata/log-3-want.txt new file mode 100644 index 000000000..52a36d427 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-want.txt @@ -0,0 +1,39 @@ +View repo/README.md + +old line + new line + unchanged line + INSIDE A VIEW CALL + + +Interim reasoning that should show as raw markdown. + +Thought + +A valid thought to render. + +Progress update: Valid commit msg +Create: repo/new.txt +hello world + +Edit: repo/file.diff +@@ -1,2 +1,2 @@ + -old line + +new line + unchanged line + +Call to mystery_tool + +Output: + +{ + "foo": 1 + } + + +Input: + +{ + "bar": 2 + } +