Improve job error handling and update tests

Normalizes job API error messages to include status code and text, ensures errors from job polling are surfaced to stderr without halting execution, and updates tests to verify error output and remove outdated fallback behavior.
This commit is contained in:
Kynan Ware 2025-09-03 12:34:13 -06:00
parent 5472fcffe9
commit 32bf9159ea
3 changed files with 41 additions and 34 deletions

View file

@ -116,7 +116,9 @@ func (c *CAPIClient) GetJob(ctx context.Context, owner, repo, jobID string) (*Jo
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to get job: %s", res.Status)
// Normalize to "<code> <text>" form
statusText := fmt.Sprintf("%d %s", res.StatusCode, http.StatusText(res.StatusCode))
return nil, fmt.Errorf("failed to get job: %s", statusText)
}
var j Job
if err := json.NewDecoder(res.Body).Decode(&j); err != nil {

View file

@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"io"
"net/url"
"time"
@ -120,9 +119,12 @@ func createRun(opts *CreateOptions) error {
)
}
jobWithPR, err := fetchJobWithBackoff(ctx, client, repo, job.ID, opts.IO.ErrOut, opts.BackOff)
jobWithPR, err := fetchJobWithBackoff(ctx, client, repo, job.ID, opts.BackOff)
if err != nil {
return err
// If this does happen ever, we still want the user to get the
// fallback message and URL. So, we don't return with this error,
// but we do still want to print it.
fmt.Fprintf(opts.IO.ErrOut, "%v\n", err)
}
if jobWithPR != nil {
@ -150,16 +152,16 @@ func agentSessionWebURL(repo ghrepo.Interface, j *capi.Job) string {
// fetchJobWithBackoff polls the job resource until a PR number is present or the overall
// timeout elapses. It returns the updated Job on success, (nil, nil) on timeout,
// and (nil, error) only for non-retryable failures.
func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrepo.Interface, jobID string, errOut io.Writer, bo backoff.BackOff) (*capi.Job, error) {
// sentinel error to signal retry without surfacing to caller
func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrepo.Interface, jobID string, bo backoff.BackOff) (*capi.Job, error) {
// sentinel error to signal timeout
var errPRNotReady = errors.New("job not ready")
var result *capi.Job
retryErr := backoff.Retry(func() error {
j, getErr := client.GetJob(ctx, repo.RepoOwner(), repo.RepoName(), jobID)
if getErr != nil {
fmt.Fprintf(errOut, "warning: failed to get job status: %v\n", getErr)
return errPRNotReady
j, err := client.GetJob(ctx, repo.RepoOwner(), repo.RepoName(), jobID)
if err != nil {
// Do not retry on GetJob errors; surface immediately.
return backoff.Permanent(err)
}
if j.PullRequest != nil && j.PullRequest.Number > 0 {
result = j
@ -170,7 +172,7 @@ func fetchJobWithBackoff(ctx context.Context, client capi.CapiClient, repo ghrep
if retryErr != nil {
if errors.Is(retryErr, errPRNotReady) {
// Timed out or failed to fetch
// Timed out
return nil, nil
}
return nil, retryErr

View file

@ -57,8 +57,26 @@ func Test_createRun(t *testing.T) {
baseRepoErr error
problemStatement string
wantStdout string
wantStdErr string
wantErr string
}{
{
name: "get job API failure surfaces error",
baseRepo: ghrepo.New("OWNER", "REPO"),
problemStatement: "Do the thing",
stubs: func(reg *httpmock.Registry) {
reg.Register(
httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"),
httpmock.StatusStringResponse(201, createdJobTimeoutResponse),
)
reg.Register(
httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"),
httpmock.StatusStringResponse(500, `{"error":{"message":"internal server error"}}`),
)
},
wantStdErr: "failed to get job: 500 Internal Server Error\n",
wantStdout: "job jobABC queued. View progress: https://github.com/copilot/agents\n",
},
{
name: "success with immediate PR",
baseRepo: ghrepo.New("OWNER", "REPO"),
@ -96,7 +114,8 @@ func Test_createRun(t *testing.T) {
httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"),
httpmock.StatusStringResponse(201, createdJobTimeoutResponse),
)
for range 3 {
// 4 attempts: initial + 3 retries
for range 4 {
reg.Register(
httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"),
httpmock.StringResponse(`{"job_id":"jobABC"}`),
@ -123,31 +142,12 @@ func Test_createRun(t *testing.T) {
},
wantErr: "failed to create job: some API error",
},
{
name: "error fetching job during polling returns error and falls back to global agents page",
baseRepo: ghrepo.New("OWNER", "REPO"),
problemStatement: "Do the thing",
stubs: func(reg *httpmock.Registry) {
reg.Register(
httpmock.WithHost(httpmock.REST("POST", "agents/swe/v1/jobs/OWNER/REPO"), "api.githubcopilot.com"),
httpmock.StatusStringResponse(201, createdJobTimeoutResponse),
)
reg.Register(
httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"),
httpmock.StringResponse(`{"job_id":"jobABC"}`),
)
reg.Register(
httpmock.WithHost(httpmock.REST("GET", "agents/swe/v1/jobs/OWNER/REPO/jobABC"), "api.githubcopilot.com"),
httpmock.StatusStringResponse(500, `{"error":{"message":"something went wrong"}}`),
)
},
wantStdout: "job jobABC queued. View progress: https://github.com/copilot/agents\n",
},
// Removed test case that previously expected fallback after polling error.
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ios, _, stdout, _ := iostreams.Test()
ios, _, stdout, stderr := iostreams.Test()
opts := &CreateOptions{
IO: ios,
ProblemStatement: tt.problemStatement,
@ -176,13 +176,16 @@ func Test_createRun(t *testing.T) {
if tt.wantErr != "" {
require.Error(t, err)
require.Equal(t, err.Error(), tt.wantErr)
require.Equal(t, tt.wantErr, err.Error())
} else {
require.NoError(t, err)
}
if tt.wantStdout != "" {
require.Equal(t, tt.wantStdout, stdout.String())
}
require.Equal(t, tt.wantStdErr, stderr.String())
if tt.stubs != nil {
reg.Verify(t)
}