Add support for jobs with long filenames
Co-authored-by: John Basila <john@orca.security> Co-authored-by: Shay Nehmad <shay.nehmad@orca.security>
This commit is contained in:
parent
6f350827d2
commit
61a698a55a
3 changed files with 107 additions and 9 deletions
Binary file not shown.
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf16"
|
||||||
|
|
||||||
"github.com/MakeNowJust/heredoc"
|
"github.com/MakeNowJust/heredoc"
|
||||||
"github.com/cli/cli/v2/api"
|
"github.com/cli/cli/v2/api"
|
||||||
|
|
@ -519,6 +520,8 @@ func promptForJob(prompter shared.Prompter, cs *iostreams.ColorScheme, jobs []sh
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const JOB_NAME_MAX_LENGTH = 90
|
||||||
|
|
||||||
func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
|
func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
|
||||||
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
|
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
|
||||||
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
|
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
|
||||||
|
|
@ -527,15 +530,82 @@ func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
|
||||||
// * Truncating names that are too long for zip files
|
// * Truncating names that are too long for zip files
|
||||||
// * Adding collision deduplicating numbers for jobs with the same name
|
// * Adding collision deduplicating numbers for jobs with the same name
|
||||||
//
|
//
|
||||||
// We are hesitant to duplicate all the server logic due to the fragility but while we explore our options, it
|
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
|
||||||
// is sensible to fix the issue that is unavoidable for users, that when a job uses a composite action, the server
|
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
|
||||||
// constructs a job name by constructing a job name of `<JOB_NAME`> / <ACTION_NAME>`. This means that logs will
|
// * Truncate long job names
|
||||||
// never be found for jobs that use composite actions.
|
//
|
||||||
sanitizedJobName := strings.ReplaceAll(job.Name, "/", "")
|
sanitizedJobName := strings.ReplaceAll(job.Name, "/", "")
|
||||||
|
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
|
||||||
re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number)
|
re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number)
|
||||||
return regexp.MustCompile(re)
|
return regexp.MustCompile(re)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
|
||||||
|
|
||||||
|
What is the length of this string "aπ
π
"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
|
||||||
|
|
||||||
|
Here's a summary:
|
||||||
|
|
||||||
|
"a" takes 1 byte (`\x61`)
|
||||||
|
"π
" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
|
||||||
|
"aπ
π
" therefore takes 9 `bytes`
|
||||||
|
In Go `len("aπ
π
")` is 9 because the `len` builtin counts `bytes`
|
||||||
|
In Go `len([]rune("aπ
π
"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
|
||||||
|
In C# `"aπ
π
".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "π
" takes 2 Chars.
|
||||||
|
|
||||||
|
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
|
||||||
|
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
|
||||||
|
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
|
||||||
|
In C#, the server truncates strings with this function:
|
||||||
|
|
||||||
|
public static string TruncateAfter(string str, int max)
|
||||||
|
{
|
||||||
|
string result = str.Length > max ? str.Substring(0, max) : str;
|
||||||
|
result = result.Trim();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
|
||||||
|
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
|
||||||
|
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
|
||||||
|
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
|
||||||
|
|
||||||
|
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
|
||||||
|
a visual, "representable" character. For example we know `"aπ
π
".Length` = 5 (1+2+2) and therefore Substring(0,4)
|
||||||
|
results in the final character being cleaved in two, resulting in "aπ
<61>". Since our int32 runes are being encoded as
|
||||||
|
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
|
||||||
|
|
||||||
|
Here's a program you can put into a dotnet playground to see how C# works:
|
||||||
|
|
||||||
|
using System;
|
||||||
|
public class Program {
|
||||||
|
public static void Main() {
|
||||||
|
string s = "aπ
π
";
|
||||||
|
Console.WriteLine("{0} {1}", s.Length, s);
|
||||||
|
string t = TruncateAfter(s, 4);
|
||||||
|
Console.WriteLine("{0} {1}", t.Length, t);
|
||||||
|
}
|
||||||
|
public static string TruncateAfter(string str, int max) {
|
||||||
|
string result = str.Length > max ? str.Substring(0, max) : str;
|
||||||
|
return result.Trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
This will output:
|
||||||
|
5 aπ
π
|
||||||
|
4 aπ
<EFBFBD>
|
||||||
|
*/
|
||||||
|
func truncateAsUTF16(str string, max int) string {
|
||||||
|
// Encode the string to UTF-16 to count code units
|
||||||
|
utf16Encoded := utf16.Encode([]rune(str))
|
||||||
|
if len(utf16Encoded) > max {
|
||||||
|
// Decode back to UTF-8 up to the max length
|
||||||
|
str = string(utf16.Decode(utf16Encoded[:max]))
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(str)
|
||||||
|
}
|
||||||
|
|
||||||
// This function takes a zip file of logs and a list of jobs.
|
// This function takes a zip file of logs and a list of jobs.
|
||||||
// Structure of zip file
|
// Structure of zip file
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -1376,6 +1376,8 @@ func TestViewRun(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Structure of fixture zip file
|
// Structure of fixture zip file
|
||||||
|
// To see the structure of fixture zip file, run:
|
||||||
|
// `β― unzip -lv pkg/cmd/run/view/fixtures/run_log.zip`
|
||||||
//
|
//
|
||||||
// run log/
|
// run log/
|
||||||
// βββ cool job/
|
// βββ cool job/
|
||||||
|
|
@ -1487,23 +1489,49 @@ func Test_attachRunLog(t *testing.T) {
|
||||||
// not the double space in the dir name, as the slash has been removed
|
// not the double space in the dir name, as the slash has been removed
|
||||||
wantFilename: "cool job with slash/1_fob the barz.txt",
|
wantFilename: "cool job with slash/1_fob the barz.txt",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Job name with really long name (over the ZIP limit)",
|
||||||
|
job: shared.Job{
|
||||||
|
Name: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_",
|
||||||
|
Steps: []shared.Step{{
|
||||||
|
Name: "Long Name Job",
|
||||||
|
Number: 1,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
wantMatch: true,
|
||||||
|
wantFilename: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnine/1_Long Name Job.txt",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Job name that would be truncated by the C# server to split a grapheme",
|
||||||
|
job: shared.Job{
|
||||||
|
Name: "Emoji Test π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
",
|
||||||
|
Steps: []shared.Step{{
|
||||||
|
Name: "Emoji Job",
|
||||||
|
Number: 1,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
wantMatch: true,
|
||||||
|
wantFilename: "Emoji Test π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
π
<F09F9885>/1_Emoji Job.txt",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
rlz, err := zip.OpenReader("./fixtures/run_log.zip")
|
run_log_zip_reader, err := zip.OpenReader("./fixtures/run_log.zip")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
defer rlz.Close()
|
defer run_log_zip_reader.Close()
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
|
||||||
attachRunLog(&rlz.Reader, []shared.Job{tt.job})
|
attachRunLog(&run_log_zip_reader.Reader, []shared.Job{tt.job})
|
||||||
|
|
||||||
|
t.Logf("Job details: ")
|
||||||
|
|
||||||
for _, step := range tt.job.Steps {
|
for _, step := range tt.job.Steps {
|
||||||
log := step.Log
|
log := step.Log
|
||||||
logPresent := log != nil
|
logPresent := log != nil
|
||||||
require.Equal(t, tt.wantMatch, logPresent)
|
require.Equal(t, tt.wantMatch, logPresent, "log not present")
|
||||||
if logPresent {
|
if logPresent {
|
||||||
require.Equal(t, tt.wantFilename, log.Name)
|
require.Equal(t, tt.wantFilename, log.Name, "Filename mismatch")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
Loadingβ¦
Add table
Add a link
Reference in a new issue