From 32bf9159ea429de66c8130760009c5343e3ca711 Mon Sep 17 00:00:00 2001 From: Kynan Ware <47394200+BagToad@users.noreply.github.com> Date: Wed, 3 Sep 2025 12:34:13 -0600 Subject: [PATCH] Improve job error handling and update tests Normalizes job API error messages to include status code and text, ensures errors from job polling are surfaced to stderr without halting execution, and updates tests to verify error output and remove outdated fallback behavior. --- pkg/cmd/agent-task/capi/job.go | 4 +- pkg/cmd/agent-task/create/create.go | 22 ++++++----- pkg/cmd/agent-task/create/create_test.go | 49 +++++++++++++----------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/pkg/cmd/agent-task/capi/job.go b/pkg/cmd/agent-task/capi/job.go index f504b66e0..ec215c5fe 100644 --- a/pkg/cmd/agent-task/capi/job.go +++ b/pkg/cmd/agent-task/capi/job.go @@ -116,7 +116,9 @@ func (c *CAPIClient) GetJob(ctx context.Context, owner, repo, jobID string) (*Jo } defer res.Body.Close() if res.StatusCode != http.StatusOK { - return nil, fmt.Errorf("failed to get job: %s", res.Status) + // Normalize to " " form + statusText := fmt.Sprintf("%d %s", res.StatusCode, http.StatusText(res.StatusCode)) + return nil, fmt.Errorf("failed to get job: %s", statusText) } var j Job if err := json.NewDecoder(res.Body).Decode(&j); err != nil { diff --git a/pkg/cmd/agent-task/create/create.go b/pkg/cmd/agent-task/create/create.go index af3b27bfa..61e3d54b9 100644 --- a/pkg/cmd/agent-task/create/create.go +++ b/pkg/cmd/agent-task/create/create.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io" "net/url" "time" @@ -120,9 +119,12 @@ func createRun(opts *CreateOptions) error { ) } - jobWithPR, err := fetchJobWithBackoff(ctx, client, repo, job.ID, opts.IO.ErrOut, opts.BackOff) + jobWithPR, err := fetchJobWithBackoff(ctx, client, repo, job.ID, opts.BackOff) if err != nil { - return err + // If this does happen ever, we still want the user to get the + // fallback message and URL. So, we don't return with this error, + // but we do still want to print it. + fmt.Fprintf(opts.IO.ErrOut, "%v\n", err) } if jobWithPR != nil { @@ -150,16 +152,16 @@ func agentSessionWebURL(repo ghrepo.Interface, j *capi.Job) string { // fetchJobWithBackoff polls the job resource until a PR number is present or the overall // timeout elapses. It returns the updated Job on success, (nil, nil) on timeout, // and (nil, error) only for non-retryable failures. -func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrepo.Interface, jobID string, errOut io.Writer, bo backoff.BackOff) (*capi.Job, error) { - // sentinel error to signal retry without surfacing to caller +func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrepo.Interface, jobID string, bo backoff.BackOff) (*capi.Job, error) { + // sentinel error to signal timeout var errPRNotReady = errors.New("job not ready") var result *capi.Job retryErr := backoff.Retry(func() error { - j, getErr := client.GetJob(ctx, repo.RepoOwner(), repo.RepoName(), jobID) - if getErr != nil { - fmt.Fprintf(errOut, "warning: failed to get job status: %v\n", getErr) - return errPRNotReady + j, err := client.GetJob(ctx, repo.RepoOwner(), repo.RepoName(), jobID) + if err != nil { + // Do not retry on GetJob errors; surface immediately. + return backoff.Permanent(err) } if j.PullRequest != nil && j.PullRequest.Number > 0 { result = j @@ -170,7 +172,7 @@ func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrep if retryErr != nil { if errors.Is(retryErr, errPRNotReady) { - // Timed out or failed to fetch + // Timed out return nil, nil } return nil, retryErr diff --git a/pkg/cmd/agent-task/create/create_test.go b/pkg/cmd/agent-task/create/create_test.go index 3e7ba4043..aec6831c4 100644 --- a/pkg/cmd/agent-task/create/create_test.go +++ b/pkg/cmd/agent-task/create/create_test.go @@ -57,8 +57,26 @@ func Test_createRun(t *testing.T) { baseRepoErr error problemStatement string wantStdout string + wantStdErr string wantErr string }{ + { + name: "get job API failure surfaces error", + baseRepo: ghrepo.New("OWNER", "REPO"), + problemStatement: "Do the thing", + stubs: func(reg *httpmock.Registry) { + reg.Register( + httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"), + httpmock.StatusStringResponse(201, createdJobTimeoutResponse), + ) + reg.Register( + httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"), + httpmock.StatusStringResponse(500, `{"error":{"message":"internal server error"}}`), + ) + }, + wantStdErr: "failed to get job: 500 Internal Server Error\n", + wantStdout: "job jobABC queued. View progress: https://github.com/copilot/agents\n", + }, { name: "success with immediate PR", baseRepo: ghrepo.New("OWNER", "REPO"), @@ -96,7 +114,8 @@ func Test_createRun(t *testing.T) { httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"), httpmock.StatusStringResponse(201, createdJobTimeoutResponse), ) - for range 3 { + // 4 attempts: initial + 3 retries + for range 4 { reg.Register( httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"), httpmock.StringResponse(`{"job_id":"jobABC"}`), @@ -123,31 +142,12 @@ func Test_createRun(t *testing.T) { }, wantErr: "failed to create job: some API error", }, - { - name: "error fetching job during polling returns error and falls back to global agents page", - baseRepo: ghrepo.New("OWNER", "REPO"), - problemStatement: "Do the thing", - stubs: func(reg *httpmock.Registry) { - reg.Register( - httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"), - httpmock.StatusStringResponse(201, createdJobTimeoutResponse), - ) - reg.Register( - httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"), - httpmock.StringResponse(`{"job_id":"jobABC"}`), - ) - reg.Register( - httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"), - httpmock.StatusStringResponse(500, `{"error":{"message":"something went wrong"}}`), - ) - }, - wantStdout: "job jobABC queued. View progress: https://github.com/copilot/agents\n", - }, + // Removed test case that previously expected fallback after polling error. } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ios, _, stdout, _ := iostreams.Test() + ios, _, stdout, stderr := iostreams.Test() opts := &CreateOptions{ IO: ios, ProblemStatement: tt.problemStatement, @@ -176,13 +176,16 @@ func Test_createRun(t *testing.T) { if tt.wantErr != "" { require.Error(t, err) - require.Equal(t, err.Error(), tt.wantErr) + require.Equal(t, tt.wantErr, err.Error()) } else { require.NoError(t, err) } if tt.wantStdout != "" { require.Equal(t, tt.wantStdout, stdout.String()) } + + require.Equal(t, tt.wantStdErr, stderr.String()) + if tt.stubs != nil { reg.Verify(t) }