Merge pull request #8684 from shayn-orca/bugfix-7642

Added support for jobs with long filenames
This commit is contained in:
William Martin 2024-05-06 11:48:21 +02:00 committed by GitHub
commit 4896546432
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 107 additions and 9 deletions

View file

@ -15,6 +15,7 @@ import (
"strconv"
"strings"
"time"
"unicode/utf16"
"github.com/MakeNowJust/heredoc"
"github.com/cli/cli/v2/api"
@ -519,6 +520,8 @@ func promptForJob(prompter shared.Prompter, cs *iostreams.ColorScheme, jobs []sh
return nil, nil
}
const JOB_NAME_MAX_LENGTH = 90
func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
@ -527,15 +530,82 @@ func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
// * Truncating names that are too long for zip files
// * Adding collision deduplicating numbers for jobs with the same name
//
// We are hesitant to duplicate all the server logic due to the fragility but while we explore our options, it
// is sensible to fix the issue that is unavoidable for users, that when a job uses a composite action, the server
// constructs a job name by constructing a job name of `<JOB_NAME`> / <ACTION_NAME>`. This means that logs will
// never be found for jobs that use composite actions.
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
// * Truncate long job names
//
sanitizedJobName := strings.ReplaceAll(job.Name, "/", "")
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number)
return regexp.MustCompile(re)
}
/*
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
Here's a summary:
"a" takes 1 byte (`\x61`)
"😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
"a😅😅" therefore takes 9 `bytes`
In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes`
In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars.
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
In C#, the server truncates strings with this function:
public static string TruncateAfter(string str, int max)
{
string result = str.Length > max ? str.Substring(0, max) : str;
result = result.Trim();
return result;
}
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4)
results in the final character being cleaved in two, resulting in "a😅<61>". Since our int32 runes are being encoded as
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
Here's a program you can put into a dotnet playground to see how C# works:
using System;
public class Program {
public static void Main() {
string s = "a😅😅";
Console.WriteLine("{0} {1}", s.Length, s);
string t = TruncateAfter(s, 4);
Console.WriteLine("{0} {1}", t.Length, t);
}
public static string TruncateAfter(string str, int max) {
string result = str.Length > max ? str.Substring(0, max) : str;
return result.Trim();
}
}
This will output:
5 a😅😅
4 a😅<EFBFBD>
*/
func truncateAsUTF16(str string, max int) string {
// Encode the string to UTF-16 to count code units
utf16Encoded := utf16.Encode([]rune(str))
if len(utf16Encoded) > max {
// Decode back to UTF-8 up to the max length
str = string(utf16.Decode(utf16Encoded[:max]))
}
return strings.TrimSpace(str)
}
// This function takes a zip file of logs and a list of jobs.
// Structure of zip file
//

View file

@ -1376,6 +1376,8 @@ func TestViewRun(t *testing.T) {
}
// Structure of fixture zip file
// To see the structure of fixture zip file, run:
// ` unzip -lv pkg/cmd/run/view/fixtures/run_log.zip`
//
// run log/
// ├── cool job/
@ -1487,23 +1489,49 @@ func Test_attachRunLog(t *testing.T) {
// not the double space in the dir name, as the slash has been removed
wantFilename: "cool job with slash/1_fob the barz.txt",
},
{
name: "Job name with really long name (over the ZIP limit)",
job: shared.Job{
Name: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_",
Steps: []shared.Step{{
Name: "Long Name Job",
Number: 1,
}},
},
wantMatch: true,
wantFilename: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnine/1_Long Name Job.txt",
},
{
name: "Job name that would be truncated by the C# server to split a grapheme",
job: shared.Job{
Name: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅",
Steps: []shared.Step{{
Name: "Emoji Job",
Number: 1,
}},
},
wantMatch: true,
wantFilename: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅<F09F9885>/1_Emoji Job.txt",
},
}
rlz, err := zip.OpenReader("./fixtures/run_log.zip")
run_log_zip_reader, err := zip.OpenReader("./fixtures/run_log.zip")
require.NoError(t, err)
defer rlz.Close()
defer run_log_zip_reader.Close()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
attachRunLog(&rlz.Reader, []shared.Job{tt.job})
attachRunLog(&run_log_zip_reader.Reader, []shared.Job{tt.job})
t.Logf("Job details: ")
for _, step := range tt.job.Steps {
log := step.Log
logPresent := log != nil
require.Equal(t, tt.wantMatch, logPresent)
require.Equal(t, tt.wantMatch, logPresent, "log not present")
if logPresent {
require.Equal(t, tt.wantFilename, log.Name)
require.Equal(t, tt.wantFilename, log.Name, "Filename mismatch")
}
}
})