refactor(run view): use API as fallback to fetch job logs

Signed-off-by: Babak K. Shandiz <babakks@github.com>
This commit is contained in:
Babak K. Shandiz 2025-07-04 14:11:09 +01:00
parent e0c7a328d2
commit 6d65904fee
No known key found for this signature in database
GPG key ID: 9472CAEFF56C742E
2 changed files with 375 additions and 213 deletions

349
pkg/cmd/run/view/logs.go Normal file
View file

@ -0,0 +1,349 @@
package view
import (
"archive/zip"
"errors"
"fmt"
"io"
"net/http"
"regexp"
"slices"
"sort"
"strings"
"unicode/utf16"
"github.com/cli/cli/v2/api"
"github.com/cli/cli/v2/internal/ghinstance"
"github.com/cli/cli/v2/internal/ghrepo"
"github.com/cli/cli/v2/pkg/cmd/run/shared"
)
type logFetcher interface {
GetLog() (io.ReadCloser, error)
}
type zipLogFetcher struct {
File *zip.File
}
func (f *zipLogFetcher) GetLog() (io.ReadCloser, error) {
return f.File.Open()
}
type apiLogFetcher struct {
httpClient *http.Client
repo ghrepo.Interface
jobID int64
}
func (f *apiLogFetcher) GetLog() (io.ReadCloser, error) {
logURL := fmt.Sprintf("%srepos/%s/actions/jobs/%d/logs",
ghinstance.RESTPrefix(f.repo.RepoHost()), ghrepo.FullName(f.repo), f.jobID)
req, err := http.NewRequest("GET", logURL, nil)
if err != nil {
return nil, err
}
resp, err := f.httpClient.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode == 404 {
return nil, fmt.Errorf("log not found: %v", f.jobID)
} else if resp.StatusCode != 200 {
return nil, api.HandleHTTPError(resp)
}
return resp.Body, nil
}
// logSegment represents a segment of a log trail, which can be either an entire
// job log or an individual step log.
type logSegment struct {
job *shared.Job
step *shared.Step
fetcher logFetcher
}
// maxAPILogFetchers is the maximum allowed number of API log fetchers that can
// be assigned to log segments. This is a heuristic limit to avoid overwhelming
// the API with too many requests when fetching logs for a run with many jobs or
// steps.
const maxAPILogFetchers = 25
var errTooManyAPILogFetchers = errors.New("too many missing logs")
// populateLogSegments populates log segments from the provided jobs and data
// available in the given ZIP archive map. Any missing logs will be assigned a
// log fetcher that retrieves logs from the API.
//
// For example, if there's no step log available in the ZIP archive, the entire
// job log will be selected as a log segment.
//
// Note that, as heuristic approach, we only allow a limited number of API log
// fetchers to be assigned. This is to avoid overwhelming the API with too many
// requests.
func populateLogSegments(httpClient *http.Client, repo ghrepo.Interface, jobs []shared.Job, zlm *zipLogMap, onlyFailed bool) ([]logSegment, error) {
segments := make([]logSegment, 0, len(jobs))
apiLogFetcherCount := 0
for _, job := range jobs {
if onlyFailed && !shared.IsFailureState(job.Conclusion) {
continue
}
stepLogAvailable := slices.ContainsFunc(job.Steps, func(step shared.Step) bool {
_, ok := zlm.forStep(job.ID, step.Number)
return ok
})
// If at least one step log is available, we populate the segments with
// them and don't use the entire job log.
if stepLogAvailable {
steps := slices.Clone(job.Steps)
sort.Sort(steps)
for _, step := range steps {
if onlyFailed && !shared.IsFailureState(step.Conclusion) {
continue
}
zf, ok := zlm.forStep(job.ID, step.Number)
if !ok {
// We have no step log in the zip archive, but there's nothing we can do
// about that because there is no API endpoint to fetch step logs.
continue
}
segments = append(segments, logSegment{
job: &job,
step: &step,
fetcher: &zipLogFetcher{File: zf},
})
}
continue
}
segment := logSegment{job: &job}
if zf, ok := zlm.forJob(job.ID); ok {
segment.fetcher = &zipLogFetcher{File: zf}
} else {
segment.fetcher = &apiLogFetcher{
httpClient: httpClient,
repo: repo,
jobID: job.ID,
}
apiLogFetcherCount++
}
segments = append(segments, segment)
if apiLogFetcherCount > maxAPILogFetchers {
return nil, errTooManyAPILogFetchers
}
}
return segments, nil
}
// zipLogMap is a map of job and step logs available in a ZIP archive.
type zipLogMap struct {
jobs map[int64]*zip.File
steps map[string]*zip.File
}
func newZipLogMap() *zipLogMap {
return &zipLogMap{
jobs: make(map[int64]*zip.File),
steps: make(map[string]*zip.File),
}
}
func (l *zipLogMap) forJob(jobID int64) (*zip.File, bool) {
f, ok := l.jobs[jobID]
return f, ok
}
func (l *zipLogMap) forStep(jobID int64, stepNumber int) (*zip.File, bool) {
logFetcherKey := fmt.Sprintf("%d/%d", jobID, stepNumber)
f, ok := l.steps[logFetcherKey]
return f, ok
}
func (l *zipLogMap) addStep(jobID int64, stepNumber int, zf *zip.File) {
logFetcherKey := fmt.Sprintf("%d/%d", jobID, stepNumber)
l.steps[logFetcherKey] = zf
}
func (l *zipLogMap) addJob(jobID int64, zf *zip.File) {
l.jobs[jobID] = zf
}
// getZipLogMap populates a logs struct with appropriate log fetchers based on
// the provided zip file and list of jobs.
//
// The structure of zip file is expected to be as:
//
// zip/
// ├── jobname1/
// │ ├── 1_stepname.txt
// │ ├── 2_anotherstepname.txt
// │ ├── 3_stepstepname.txt
// │ └── 4_laststepname.txt
// ├── jobname2/
// | ├── 1_stepname.txt
// | └── 2_somestepname.txt
// ├── 0_jobname1.txt
// ├── 1_jobname2.txt
// └── -9999999999_jobname3.txt
//
// The function iterates through the list of jobs and tries to find the matching
// log file in the ZIP archive.
//
// The top-level .txt files include the logs for an entire job run. Note that
// the prefixed number is either:
// - An ordinal and cannot be mapped to the corresponding job's ID.
// - A negative integer which is the ID of the job in the old Actions service.
// The service right now tries to get logs and use an ordinal in a loop.
// However, if it doesn't get the logs, it falls back to an old service
// where the ID can apparently be negative.
func getZipLogMap(rlz *zip.Reader, jobs []shared.Job) *zipLogMap {
zlm := newZipLogMap()
for _, job := range jobs {
// So far we haven't yet encountered a ZIP containing both top-level job
// logs (i.e. the normal and the legacy .txt files). However, it's still
// possible. Therefore, we prioritise the normal log over the legacy one.
if zf := matchFileInZIPArchive(rlz, jobLogFilenameRegexp(job)); zf != nil {
zlm.addJob(job.ID, zf)
} else if zf := matchFileInZIPArchive(rlz, legacyJobLogFilenameRegexp(job)); zf != nil {
zlm.addJob(job.ID, zf)
}
for _, step := range job.Steps {
if zf := matchFileInZIPArchive(rlz, stepLogFilenameRegexp(job, step)); zf != nil {
zlm.addStep(job.ID, step.Number, zf)
}
}
}
return zlm
}
const JOB_NAME_MAX_LENGTH = 90
func getJobNameForLogFilename(name string) string {
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
// and the filename in the zip including:
// * Removing characters in the job name that aren't allowed in file paths
// * Truncating names that are too long for zip files
// * Adding collision deduplicating numbers for jobs with the same name
//
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
// * Truncate long job names
//
sanitizedJobName := strings.ReplaceAll(name, "/", "")
sanitizedJobName = strings.ReplaceAll(sanitizedJobName, ":", "")
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
return sanitizedJobName
}
// A job run log file is a top-level .txt file whose name starts with an ordinal
// number; e.g., "0_jobname.txt".
func jobLogFilenameRegexp(job shared.Job) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^\d+_%s\.txt$`, regexp.QuoteMeta(sanitizedJobName))
return regexp.MustCompile(re)
}
// A legacy job run log file is a top-level .txt file whose name starts with a
// negative number which is the ID of the run; e.g., "-2147483648_jobname.txt".
func legacyJobLogFilenameRegexp(job shared.Job) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^-\d+_%s\.txt$`, regexp.QuoteMeta(sanitizedJobName))
return regexp.MustCompile(re)
}
func stepLogFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^%s\/%d_.*\.txt$`, regexp.QuoteMeta(sanitizedJobName), step.Number)
return regexp.MustCompile(re)
}
/*
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
Here's a summary:
"a" takes 1 byte (`\x61`)
"😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
"a😅😅" therefore takes 9 `bytes`
In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes`
In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars.
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
In C#, the server truncates strings with this function:
public static string TruncateAfter(string str, int max)
{
string result = str.Length > max ? str.Substring(0, max) : str;
result = result.Trim();
return result;
}
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4)
results in the final character being cleaved in two, resulting in "a😅<61>". Since our int32 runes are being encoded as
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
Here's a program you can put into a dotnet playground to see how C# works:
using System;
public class Program {
public static void Main() {
string s = "a😅😅";
Console.WriteLine("{0} {1}", s.Length, s);
string t = TruncateAfter(s, 4);
Console.WriteLine("{0} {1}", t.Length, t);
}
public static string TruncateAfter(string str, int max) {
string result = str.Length > max ? str.Substring(0, max) : str;
return result.Trim();
}
}
This will output:
5 a😅😅
4 a😅<EFBFBD>
*/
func truncateAsUTF16(str string, max int) string {
// Encode the string to UTF-16 to count code units
utf16Encoded := utf16.Encode([]rune(str))
if len(utf16Encoded) > max {
// Decode back to UTF-8 up to the max length
str = string(utf16.Decode(utf16Encoded[:max]))
}
return strings.TrimSpace(str)
}
func matchFileInZIPArchive(zr *zip.Reader, re *regexp.Regexp) *zip.File {
for _, file := range zr.File {
if re.MatchString(file.Name) {
return file
}
}
return nil
}

View file

@ -10,12 +10,8 @@ import (
"net/http"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"time"
"unicode/utf16"
"github.com/MakeNowJust/heredoc"
"github.com/cli/cli/v2/api"
@ -322,9 +318,16 @@ func runView(opts *ViewOptions) error {
}
defer runLogZip.Close()
attachRunLog(&runLogZip.Reader, jobs)
zlm := getZipLogMap(&runLogZip.Reader, jobs)
segments, err := populateLogSegments(httpClient, repo, jobs, zlm, opts.LogFailed)
if err != nil {
if errors.Is(err, errTooManyAPILogFetchers) {
return fmt.Errorf("too many API requests needed to fetch logs; try narrowing down to a specific job with the `--job` option")
}
return err
}
return displayRunLog(opts.IO.Out, jobs, opts.LogFailed)
return displayLogSegments(opts.IO.Out, segments)
}
prNumber := ""
@ -535,212 +538,28 @@ func promptForJob(prompter shared.Prompter, cs *iostreams.ColorScheme, jobs []sh
return nil, nil
}
const JOB_NAME_MAX_LENGTH = 90
func getJobNameForLogFilename(name string) string {
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
// and the filename in the zip including:
// * Removing characters in the job name that aren't allowed in file paths
// * Truncating names that are too long for zip files
// * Adding collision deduplicating numbers for jobs with the same name
//
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
// * Truncate long job names
//
sanitizedJobName := strings.ReplaceAll(name, "/", "")
sanitizedJobName = strings.ReplaceAll(sanitizedJobName, ":", "")
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
return sanitizedJobName
}
// A job run log file is a top-level .txt file whose name starts with an ordinal
// number; e.g., "0_jobname.txt".
func jobLogFilenameRegexp(job shared.Job) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^\d+_%s\.txt$`, regexp.QuoteMeta(sanitizedJobName))
return regexp.MustCompile(re)
}
// A legacy job run log file is a top-level .txt file whose name starts with a
// negative number which is the ID of the run; e.g., "-2147483648_jobname.txt".
func legacyJobLogFilenameRegexp(job shared.Job) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^-\d+_%s\.txt$`, regexp.QuoteMeta(sanitizedJobName))
return regexp.MustCompile(re)
}
func stepLogFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
sanitizedJobName := getJobNameForLogFilename(job.Name)
re := fmt.Sprintf(`^%s\/%d_.*\.txt$`, regexp.QuoteMeta(sanitizedJobName), step.Number)
return regexp.MustCompile(re)
}
/*
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
Here's a summary:
"a" takes 1 byte (`\x61`)
"😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
"a😅😅" therefore takes 9 `bytes`
In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes`
In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars.
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
In C#, the server truncates strings with this function:
public static string TruncateAfter(string str, int max)
{
string result = str.Length > max ? str.Substring(0, max) : str;
result = result.Trim();
return result;
}
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4)
results in the final character being cleaved in two, resulting in "a😅<61>". Since our int32 runes are being encoded as
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
Here's a program you can put into a dotnet playground to see how C# works:
using System;
public class Program {
public static void Main() {
string s = "a😅😅";
Console.WriteLine("{0} {1}", s.Length, s);
string t = TruncateAfter(s, 4);
Console.WriteLine("{0} {1}", t.Length, t);
}
public static string TruncateAfter(string str, int max) {
string result = str.Length > max ? str.Substring(0, max) : str;
return result.Trim();
}
}
This will output:
5 a😅😅
4 a😅<EFBFBD>
*/
func truncateAsUTF16(str string, max int) string {
// Encode the string to UTF-16 to count code units
utf16Encoded := utf16.Encode([]rune(str))
if len(utf16Encoded) > max {
// Decode back to UTF-8 up to the max length
str = string(utf16.Decode(utf16Encoded[:max]))
}
return strings.TrimSpace(str)
}
// This function takes a zip file of logs and a list of jobs.
// Structure of zip file
//
// zip/
// ├── jobname1/
// │ ├── 1_stepname.txt
// │ ├── 2_anotherstepname.txt
// │ ├── 3_stepstepname.txt
// │ └── 4_laststepname.txt
// ├── jobname2/
// | ├── 1_stepname.txt
// | └── 2_somestepname.txt
// ├── 0_jobname1.txt
// ├── 1_jobname2.txt
// └── -9999999999_jobname3.txt
//
// It iterates through the list of jobs and tries to find the matching
// log in the zip file. If the matching log is found it is attached
// to the job.
//
// The top-level .txt files include the logs for an entire job run. Note that
// the prefixed number is either:
// - An ordinal and cannot be mapped to the corresponding job's ID.
// - A negative integer which is the ID of the job in the old Actions service.
// The service right now tries to get logs and use an ordinal in a loop.
// However, if it doesn't get the logs, it falls back to an old service
// where the ID can apparently be negative.
func attachRunLog(rlz *zip.Reader, jobs []shared.Job) {
for i, job := range jobs {
// As a highest priority, we try to use the step logs first. We have seen zips that surprisingly contain
// step logs, normal job logs and legacy job logs. In this case, both job logs would be ignored. We have
// never seen a zip containing both job logs and no step logs, however, it may be possible. In that case
// let's prioritise the normal log over the legacy one.
jobLog := matchFileInZIPArchive(rlz, jobLogFilenameRegexp(job))
if jobLog == nil {
jobLog = matchFileInZIPArchive(rlz, legacyJobLogFilenameRegexp(job))
func displayLogSegments(w io.Writer, segments []logSegment) error {
for _, segment := range segments {
stepName := "UNKNOWN STEP"
if segment.step != nil {
stepName = segment.step.Name
}
jobs[i].Log = jobLog
for j, step := range job.Steps {
jobs[i].Steps[j].Log = matchFileInZIPArchive(rlz, stepLogFilenameRegexp(job, step))
rc, err := segment.fetcher.GetLog()
if err != nil {
return err
}
}
}
func matchFileInZIPArchive(zr *zip.Reader, re *regexp.Regexp) *zip.File {
for _, file := range zr.File {
if re.MatchString(file.Name) {
return file
}
}
return nil
}
func displayRunLog(w io.Writer, jobs []shared.Job, failed bool) error {
for _, job := range jobs {
// To display a run log, we first try to compile it from individual step
// logs, because this way we can prepend lines with the corresponding
// step name. However, at the time of writing, logs are sometimes being
// served by a service that doesnt include the step logs (none of them),
// in which case we fall back to print the entire job run log.
var hasStepLogs bool
steps := job.Steps
sort.Sort(steps)
for _, step := range steps {
if failed && !shared.IsFailureState(step.Conclusion) {
continue
}
if step.Log == nil {
continue
}
hasStepLogs = true
prefix := fmt.Sprintf("%s\t%s\t", job.Name, step.Name)
if err := printZIPFile(w, step.Log, prefix); err != nil {
err = func() error {
defer rc.Close()
prefix := fmt.Sprintf("%s\t%s\t", segment.job.Name, stepName)
if err := copyLogWithLinePrefix(w, rc, prefix); err != nil {
return err
}
}
return nil
}()
if hasStepLogs {
continue
}
if failed && !shared.IsFailureState(job.Conclusion) {
continue
}
if job.Log == nil {
continue
}
// Here, we fall back to the job run log, which means we do not know
// the step name of lines. However, we want to keep the same line
// formatting to avoid breaking any code or script that rely on the
// tab-delimited formatting. So, an unknown-step placeholder is used
// instead of the actual step name.
prefix := fmt.Sprintf("%s\tUNKNOWN STEP\t", job.Name)
if err := printZIPFile(w, job.Log, prefix); err != nil {
if err != nil {
return err
}
}
@ -748,14 +567,8 @@ func displayRunLog(w io.Writer, jobs []shared.Job, failed bool) error {
return nil
}
func printZIPFile(w io.Writer, file *zip.File, prefix string) error {
f, err := file.Open()
if err != nil {
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
func copyLogWithLinePrefix(w io.Writer, r io.Reader, prefix string) error {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
fmt.Fprintf(w, "%s%s\n", prefix, scanner.Text())
}