Merge pull request #8684 from shayn-orca/bugfix-7642
Added support for jobs with long filenames
This commit is contained in:
commit
4896546432
3 changed files with 107 additions and 9 deletions
Binary file not shown.
|
|
@ -15,6 +15,7 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf16"
|
||||
|
||||
"github.com/MakeNowJust/heredoc"
|
||||
"github.com/cli/cli/v2/api"
|
||||
|
|
@ -519,6 +520,8 @@ func promptForJob(prompter shared.Prompter, cs *iostreams.ColorScheme, jobs []sh
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
const JOB_NAME_MAX_LENGTH = 90
|
||||
|
||||
func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
|
||||
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
|
||||
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
|
||||
|
|
@ -527,15 +530,82 @@ func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
|
|||
// * Truncating names that are too long for zip files
|
||||
// * Adding collision deduplicating numbers for jobs with the same name
|
||||
//
|
||||
// We are hesitant to duplicate all the server logic due to the fragility but while we explore our options, it
|
||||
// is sensible to fix the issue that is unavoidable for users, that when a job uses a composite action, the server
|
||||
// constructs a job name by constructing a job name of `<JOB_NAME`> / <ACTION_NAME>`. This means that logs will
|
||||
// never be found for jobs that use composite actions.
|
||||
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
|
||||
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
|
||||
// * Truncate long job names
|
||||
//
|
||||
sanitizedJobName := strings.ReplaceAll(job.Name, "/", "")
|
||||
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
|
||||
re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number)
|
||||
return regexp.MustCompile(re)
|
||||
}
|
||||
|
||||
/*
|
||||
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
|
||||
|
||||
What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
|
||||
|
||||
Here's a summary:
|
||||
|
||||
"a" takes 1 byte (`\x61`)
|
||||
"😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
|
||||
"a😅😅" therefore takes 9 `bytes`
|
||||
In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes`
|
||||
In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
|
||||
In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars.
|
||||
|
||||
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
|
||||
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
|
||||
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
|
||||
In C#, the server truncates strings with this function:
|
||||
|
||||
public static string TruncateAfter(string str, int max)
|
||||
{
|
||||
string result = str.Length > max ? str.Substring(0, max) : str;
|
||||
result = result.Trim();
|
||||
return result;
|
||||
}
|
||||
|
||||
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
|
||||
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
|
||||
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
|
||||
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
|
||||
|
||||
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
|
||||
a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4)
|
||||
results in the final character being cleaved in two, resulting in "a😅<61>". Since our int32 runes are being encoded as
|
||||
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
|
||||
|
||||
Here's a program you can put into a dotnet playground to see how C# works:
|
||||
|
||||
using System;
|
||||
public class Program {
|
||||
public static void Main() {
|
||||
string s = "a😅😅";
|
||||
Console.WriteLine("{0} {1}", s.Length, s);
|
||||
string t = TruncateAfter(s, 4);
|
||||
Console.WriteLine("{0} {1}", t.Length, t);
|
||||
}
|
||||
public static string TruncateAfter(string str, int max) {
|
||||
string result = str.Length > max ? str.Substring(0, max) : str;
|
||||
return result.Trim();
|
||||
}
|
||||
}
|
||||
|
||||
This will output:
|
||||
5 a😅😅
|
||||
4 a😅<EFBFBD>
|
||||
*/
|
||||
func truncateAsUTF16(str string, max int) string {
|
||||
// Encode the string to UTF-16 to count code units
|
||||
utf16Encoded := utf16.Encode([]rune(str))
|
||||
if len(utf16Encoded) > max {
|
||||
// Decode back to UTF-8 up to the max length
|
||||
str = string(utf16.Decode(utf16Encoded[:max]))
|
||||
}
|
||||
return strings.TrimSpace(str)
|
||||
}
|
||||
|
||||
// This function takes a zip file of logs and a list of jobs.
|
||||
// Structure of zip file
|
||||
//
|
||||
|
|
|
|||
|
|
@ -1376,6 +1376,8 @@ func TestViewRun(t *testing.T) {
|
|||
}
|
||||
|
||||
// Structure of fixture zip file
|
||||
// To see the structure of fixture zip file, run:
|
||||
// `❯ unzip -lv pkg/cmd/run/view/fixtures/run_log.zip`
|
||||
//
|
||||
// run log/
|
||||
// ├── cool job/
|
||||
|
|
@ -1487,23 +1489,49 @@ func Test_attachRunLog(t *testing.T) {
|
|||
// not the double space in the dir name, as the slash has been removed
|
||||
wantFilename: "cool job with slash/1_fob the barz.txt",
|
||||
},
|
||||
{
|
||||
name: "Job name with really long name (over the ZIP limit)",
|
||||
job: shared.Job{
|
||||
Name: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_",
|
||||
Steps: []shared.Step{{
|
||||
Name: "Long Name Job",
|
||||
Number: 1,
|
||||
}},
|
||||
},
|
||||
wantMatch: true,
|
||||
wantFilename: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnine/1_Long Name Job.txt",
|
||||
},
|
||||
{
|
||||
name: "Job name that would be truncated by the C# server to split a grapheme",
|
||||
job: shared.Job{
|
||||
Name: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅",
|
||||
Steps: []shared.Step{{
|
||||
Name: "Emoji Job",
|
||||
Number: 1,
|
||||
}},
|
||||
},
|
||||
wantMatch: true,
|
||||
wantFilename: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅<F09F9885>/1_Emoji Job.txt",
|
||||
},
|
||||
}
|
||||
|
||||
rlz, err := zip.OpenReader("./fixtures/run_log.zip")
|
||||
run_log_zip_reader, err := zip.OpenReader("./fixtures/run_log.zip")
|
||||
require.NoError(t, err)
|
||||
defer rlz.Close()
|
||||
defer run_log_zip_reader.Close()
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
attachRunLog(&rlz.Reader, []shared.Job{tt.job})
|
||||
attachRunLog(&run_log_zip_reader.Reader, []shared.Job{tt.job})
|
||||
|
||||
t.Logf("Job details: ")
|
||||
|
||||
for _, step := range tt.job.Steps {
|
||||
log := step.Log
|
||||
logPresent := log != nil
|
||||
require.Equal(t, tt.wantMatch, logPresent)
|
||||
require.Equal(t, tt.wantMatch, logPresent, "log not present")
|
||||
if logPresent {
|
||||
require.Equal(t, tt.wantFilename, log.Name)
|
||||
require.Equal(t, tt.wantFilename, log.Name, "Filename mismatch")
|
||||
}
|
||||
}
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue