diff --git a/pkg/cmd/agent-task/shared/log.go b/pkg/cmd/agent-task/shared/log.go index e58bed79a..a1b1740e0 100644 --- a/pkg/cmd/agent-task/shared/log.go +++ b/pkg/cmd/agent-task/shared/log.go @@ -133,15 +133,16 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I case "view": args := viewToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'view' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'view' tool call arguments: %v\n", err) + continue } - fmt.Fprintf(w, "View %s\n", cs.Bold(relativeFilePath(args.Path))) + renderToolCallTitle(w, cs, fmt.Sprintf("View %s", cs.Bold(relativeFilePath(args.Path))), "") content := stripDiffFormat(choice.Delta.Content) - // TODO: Strip the diff formatting from this, but for now render as it is. if err := renderFileContentAsMarkdown(args.Path, content, w, io); err != nil { - return false, fmt.Errorf("failed to render viewed file content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render viewed file content: %v\n\n", err) + fmt.Fprintln(io.ErrOut, content) // raw fallback } case "bash": if v := unmarshal[bashToolArgs](args); v != nil { @@ -156,7 +157,8 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I contentWithCommand = fmt.Sprintf("$ %s\n%s", v.Command, choice.Delta.Content) } if err := renderFileContentAsMarkdown("commands.sh", contentWithCommand, w, io); err != nil { - return false, fmt.Errorf("failed to render bash command output: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render bash command output: %v\n\n", err) + fmt.Fprintln(io.ErrOut, contentWithCommand) } } // TODO: consider including more details for these bash-related tool calls. @@ -193,24 +195,26 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I case "think": args := thinkToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'think' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'think' tool call arguments: %v\n", err) + continue } // NOTE: omit the delta.content since it's the same as thought renderToolCallTitle(w, cs, "Thought", "") if err := renderRawMarkdown(args.Thought, w, io); err != nil { - return false, fmt.Errorf("failed to render thought: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render thought: %v\n", err) } case "report_progress": args := reportProgressToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'report_progress' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'report_progress' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Progress update", cs.Bold(args.CommitMessage)) if args.PrDescription != "" { if err := renderRawMarkdown(args.PrDescription, w, io); err != nil { - return false, fmt.Errorf("failed to render PR description: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render PR description: %v\n", err) } } @@ -218,29 +222,33 @@ func renderLogEntry(entry chatCompletionChunkEntry, w io.Writer, io *iostreams.I if choice.Delta.Content != "" { // Try to treat this as JSON if err := renderContentAsJSONMarkdown("", choice.Delta.Content, w, io); err != nil { - return false, fmt.Errorf("failed to render progress update content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render progress update content: %v\n", err) } } case "create": args := createToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'create' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'create' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Create", cs.Bold(relativeFilePath(args.Path))) if err := renderFileContentAsMarkdown(args.Path, args.FileText, w, io); err != nil { - return false, fmt.Errorf("failed to render created file content: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render created file content: %v\n\n", err) + fmt.Fprintln(io.ErrOut, args.FileText) } case "str_replace": args := strReplaceToolArgs{} if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil { - return false, fmt.Errorf("failed to parse 'str_replace' tool call arguments: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to parse 'str_replace' tool call arguments: %v\n", err) + continue } renderToolCallTitle(w, cs, "Edit", cs.Bold(relativeFilePath(args.Path))) if err := renderFileContentAsMarkdown("output.diff", choice.Delta.Content, w, io); err != nil { - return false, fmt.Errorf("failed to render str_replace diff: %w", err) + fmt.Fprintf(io.ErrOut, "\nfailed to render str_replace diff: %v\n\n", err) + fmt.Fprintln(io.ErrOut, choice.Delta.Content) } default: // Unknown tool call. For example for "codeql_checker": diff --git a/pkg/cmd/agent-task/shared/log_test.go b/pkg/cmd/agent-task/shared/log_test.go index 4c95a9853..07b562dc8 100644 --- a/pkg/cmd/agent-task/shared/log_test.go +++ b/pkg/cmd/agent-task/shared/log_test.go @@ -13,19 +13,26 @@ import ( func TestFollow(t *testing.T) { tests := []struct { - name string - log string - want string + name string + log string + wantStdoutFile string + wantStderrFile string }{ { - name: "sample log 1", - log: "testdata/log-1-input.txt", - want: "testdata/log-1-want.txt", + name: "sample log 1", + log: "testdata/log-1-input.txt", + wantStdoutFile: "testdata/log-1-want.txt", }, { - name: "sample log 2", - log: "testdata/log-2-input.txt", - want: "testdata/log-2-want.txt", + name: "sample log 2", + log: "testdata/log-2-input.txt", + wantStdoutFile: "testdata/log-2-want.txt", + }, + { + name: "sample log 3 (tolerant parse failures)", + log: "testdata/log-3-synthetic-failures-input.txt", + wantStdoutFile: "testdata/log-3-synthetic-failures-want.txt", + wantStderrFile: "testdata/log-3-synthetic-failures-want-stderr.txt", }, } @@ -50,7 +57,7 @@ func TestFollow(t *testing.T) { return []byte(strings.Join(lines[0:hits], "\n\n")), nil } - ios, _, stdout, _ := iostreams.Test() + ios, _, stdout, stderr := iostreams.Test() err = NewLogRenderer().Follow(fetcher, stdout, ios) require.NoError(t, err) @@ -60,14 +67,29 @@ func TestFollow(t *testing.T) { // stripped := strings.TrimSuffix(tt.log, ext) // stripped = strings.TrimSuffix(stripped, "-input") // os.WriteFile(stripped+"-want"+ext, stdout.Bytes(), 0644) + // if tt.wantStderrFile != "" { + // os.WriteFile(stripped+"-want-stderr"+ext, stderr.Bytes(), 0644) + // } - want, err := os.ReadFile(tt.want) + wantStdout, err := os.ReadFile(tt.wantStdoutFile) require.NoError(t, err) // Normalize CRLF to LF to make the tests OS-agnostic. - want = []byte(strings.ReplaceAll(string(want), "\r\n", "\n")) + wantStdout = []byte(strings.ReplaceAll(string(wantStdout), "\r\n", "\n")) - assert.Equal(t, string(want), stdout.String()) + assert.Equal(t, string(wantStdout), stdout.String()) + + if tt.wantStderrFile != "" { + wantStderr, err := os.ReadFile(tt.wantStderrFile) + require.NoError(t, err) + + // Normalize CRLF to LF to make the tests OS-agnostic. + wantStderr = []byte(strings.ReplaceAll(string(wantStderr), "\r\n", "\n")) + + assert.Equal(t, string(wantStderr), stderr.String()) + } else { + require.Empty(t, stderr, "expected no stderr output") + } }) } } diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-input.txt b/pkg/cmd/agent-task/shared/testdata/log-3-input.txt new file mode 100644 index 000000000..5fa8e0a63 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-input.txt @@ -0,0 +1,27 @@ +data: {"id": "bad1", "object": "chat.completion.chunk", "choices": [ { "delta": { "tool_calls": [ { "function": { "name": "view", "arguments": "{bad json" } } ] } } ] } + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1b","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1b","index":0}]}],"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t1","object":"chat.completion.chunk","choices":[{"delta":{"content":"THINK","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":123"},"id":"tc2","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t2","object":"chat.completion.chunk","choices":[{"delta":{"content":"A valid thought to render.","reasoning_text":"Interim reasoning that should show as raw markdown.","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":\"A valid thought to render.\"}"},"id":"tc3","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp1","object":"chat.completion.chunk","choices":[{"delta":{"content":"RP","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\": 5"},"id":"tc4","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp2","object":"chat.completion.chunk","choices":[{"delta":{"content":"not-json","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\":\"Valid commit msg\"}"},"id":"tc5","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c1","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/abs/path/file.txt\""},"id":"tc6","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c2","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE2","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/new.txt\",\"file_text\":\"hello world\"}"},"id":"tc7","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr1","object":"chat.completion.chunk","choices":[{"delta":{"content":"SR","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff"},"id":"tc8","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr2","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff\"}"},"id":"tc9","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"u1","object":"chat.completion.chunk","choices":[{"delta":{"content":"{\"foo\":1}","tool_calls":[{"function":{"name":"mystery_tool","arguments":"{\"bar\":2}"},"id":"tc10","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"end","object":"chat.completion.chunk","choices":[{"delta":{"content":"","tool_calls":[],"role":"assistant"},"finish_reason":"stop","index":0}]} diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt new file mode 100644 index 000000000..5fa8e0a63 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-input.txt @@ -0,0 +1,27 @@ +data: {"id": "bad1", "object": "chat.completion.chunk", "choices": [ { "delta": { "tool_calls": [ { "function": { "name": "view", "arguments": "{bad json" } } ] } } ] } + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1b","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md\"}"},"id":"tc1b","index":0}]}],"finish_reason":"tool_calls","index":0}]} + +data: {"id":"v1","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line\nINSIDE A VIEW CALL","tool_calls":[{"function":{"name":"view","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/README.md"},"id":"tc1","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t1","object":"chat.completion.chunk","choices":[{"delta":{"content":"THINK","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":123"},"id":"tc2","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"t2","object":"chat.completion.chunk","choices":[{"delta":{"content":"A valid thought to render.","reasoning_text":"Interim reasoning that should show as raw markdown.","tool_calls":[{"function":{"name":"think","arguments":"{\"thought\":\"A valid thought to render.\"}"},"id":"tc3","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp1","object":"chat.completion.chunk","choices":[{"delta":{"content":"RP","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\": 5"},"id":"tc4","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"rp2","object":"chat.completion.chunk","choices":[{"delta":{"content":"not-json","tool_calls":[{"function":{"name":"report_progress","arguments":"{\"commitMessage\":\"Valid commit msg\"}"},"id":"tc5","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c1","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/abs/path/file.txt\""},"id":"tc6","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"c2","object":"chat.completion.chunk","choices":[{"delta":{"content":"CREATE2","tool_calls":[{"function":{"name":"create","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/new.txt\",\"file_text\":\"hello world\"}"},"id":"tc7","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr1","object":"chat.completion.chunk","choices":[{"delta":{"content":"SR","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff"},"id":"tc8","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"sr2","object":"chat.completion.chunk","choices":[{"delta":{"content":"@@ -1,2 +1,2 @@\n-old line\n+new line\nunchanged line","tool_calls":[{"function":{"name":"str_replace","arguments":"{\"path\":\"/home/runner/work/repo/owner/repo/file.diff\"}"},"id":"tc9","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"u1","object":"chat.completion.chunk","choices":[{"delta":{"content":"{\"foo\":1}","tool_calls":[{"function":{"name":"mystery_tool","arguments":"{\"bar\":2}"},"id":"tc10","index":0}]},"finish_reason":"tool_calls","index":0}]} + +data: {"id":"end","object":"chat.completion.chunk","choices":[{"delta":{"content":"","tool_calls":[],"role":"assistant"},"finish_reason":"stop","index":0}]} diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt new file mode 100644 index 000000000..199ab66f3 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want-stderr.txt @@ -0,0 +1,10 @@ + +failed to parse 'view' tool call arguments: unexpected end of JSON input + +failed to parse 'think' tool call arguments: unexpected end of JSON input + +failed to parse 'report_progress' tool call arguments: unexpected end of JSON input + +failed to parse 'create' tool call arguments: unexpected end of JSON input + +failed to parse 'str_replace' tool call arguments: unexpected end of JSON input diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt new file mode 100644 index 000000000..52a36d427 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-synthetic-failures-want.txt @@ -0,0 +1,39 @@ +View repo/README.md + +old line + new line + unchanged line + INSIDE A VIEW CALL + + +Interim reasoning that should show as raw markdown. + +Thought + +A valid thought to render. + +Progress update: Valid commit msg +Create: repo/new.txt +hello world + +Edit: repo/file.diff +@@ -1,2 +1,2 @@ + -old line + +new line + unchanged line + +Call to mystery_tool + +Output: + +{ + "foo": 1 + } + + +Input: + +{ + "bar": 2 + } + diff --git a/pkg/cmd/agent-task/shared/testdata/log-3-want.txt b/pkg/cmd/agent-task/shared/testdata/log-3-want.txt new file mode 100644 index 000000000..52a36d427 --- /dev/null +++ b/pkg/cmd/agent-task/shared/testdata/log-3-want.txt @@ -0,0 +1,39 @@ +View repo/README.md + +old line + new line + unchanged line + INSIDE A VIEW CALL + + +Interim reasoning that should show as raw markdown. + +Thought + +A valid thought to render. + +Progress update: Valid commit msg +Create: repo/new.txt +hello world + +Edit: repo/file.diff +@@ -1,2 +1,2 @@ + -old line + +new line + unchanged line + +Call to mystery_tool + +Output: + +{ + "foo": 1 + } + + +Input: + +{ + "bar": 2 + } +