From 61a698a55ae327ce71fda9f1f49a5cfc0ce5d3f9 Mon Sep 17 00:00:00 2001 From: William Martin Date: Mon, 6 May 2024 11:27:07 +0200 Subject: [PATCH] Add support for jobs with long filenames Co-authored-by: John Basila Co-authored-by: Shay Nehmad --- pkg/cmd/run/view/fixtures/run_log.zip | Bin 2656 -> 6822 bytes pkg/cmd/run/view/view.go | 78 ++++++++++++++++++++++++-- pkg/cmd/run/view/view_test.go | 38 +++++++++++-- 3 files changed, 107 insertions(+), 9 deletions(-) diff --git a/pkg/cmd/run/view/fixtures/run_log.zip b/pkg/cmd/run/view/fixtures/run_log.zip index e298df38649fae359e8f6576705b58490b6276e3..2422c3b2496a37b98c164713aee746f65ce150d1 100644 GIT binary patch literal 6822 zcmdT|2~bm46#XGAL8_z`L_ph*vPgwM&{}ldB3emVOa)WwG6_o%C?*XFiqsjB9;{G2rl%!;7?xUCn;5CKOSU&TDLOfng zamg0TgTXVD*_+?@S%q-Eb@)<#7{4RHpW9){P2#LKN*zgcq1!V(JugzC@oX`b`q2rd zqa&@vsFmN0d2#o;h9)L=PVh`}X=%CZG{UM`es{B+n^th!spZFr+xvVTq)^Q%rlzi& zzRoIZ+F8?dAuX+JXYj4{{M>1GXXfmZogDQYqZ{|9mfA>oLDHo>edoIP73dR-X z{XBJl!Nj`bHB-kQ^JY#vaP`7CX8v2b$9{7Rs4r=#zEoCGeYK`!_R@oM>?&^fZ&|{( zm>%Gp;&m~kkdvpJwQPBCF8$+9LHW<4f?89G8^zy#RM{f1-S}mC@uPR1P5->7$fq-@u+>`AX9l_=Gy;zih4oGf7Gx6n40KYA4I=?>EzkMea~>UmW@e_Wokelb7{ zQET$C)mKa}*a)js)dbn7i|O^Miirl-S4;yq2ELe9dtfooEz2$)nnQtuIot7ot#&a% zRWaHI1sDD&%o^!T9A6PFjlw0dp^~WHG0o$yh=3PKPiofZ#b$TW<j6FPl{ zlx`5?1r3byK>69Rzlh%myp?*~GSQN2$G+lgN6G6tQ))%S>q+%H}V+s=OWcXDG)ZR62B2XBQfo$UNqcGl!w^jS2AA-7{b zNMDjjZ?c*H&lkQfg)8i@j4yn<*?3CLmivvT*IF;B9Xk3Byf3Ns8u8BQ%&peRCR1u! z=PKOF6gR7^tGX0rmEAm=@{9ac;zJpK?#I1zvJNxL8$Dagoc8`*Ued-=z;CyW!OQkN zQjOq_DU*M^51IqkPqV~mWfQq>B&t45_vOg*`KbMzRzdsl?A?I&T2ji`D?ZNrlv#0@1`-rF;K6wTMVZtPe$ zxo){^mf}+8`$gUC3AViAMZ+62R;?8(C-!8!Z{2GnwKlt( z^%mEy>z{nSCpBOB`0%td+)mJGI*8^5gKk!7V%uereE|ht!RZgAI72#4_?G_U9 zG)-|cF~vv7opvr9aa)rI}5%Jpn6nG zV4ZTk7bqdv(QO6_cEaSc%z*`JNaT`q0mn}pJF3v2Iwwpni=to}WiWu%{$E<1NCUz} zl_2~KajI(=^oUIqJej5>;T$y zcB29i?j3~5WouBk{U_c8e%Svo5UR|eo+L~z>jr^djk-i*LfNG|xhfN+W(Jg(fdPSm zP^F}u++0)g(mvLTqqb=Hy8`$|4@+-CYJ?(Uv>I4rB*Tfp-+gqLiZ@KgrA|f zAgrC$1;P#@cByJ8y;4K7+rY%pLlBA$!sN21A%iM$eWcnz-@!Vdj1Vdd!sN22;1@-8 z-wmQp=(o;5X96X$uoRFA6V}e+4a3xwR@HENiLs{PpjUJU(>~tzAzuFnQ6}KStrF5; h-GRnfYT)bQ0eVO=$Ik>DAsA)_{*+)yvVl7c`xkJFQjY)t delta 941 zcmZ2x`apywz?+$6a-WdYBoqLtM8$Y+S(p8;@B z?BLVqFh%kwE7+ft4Wz^;JMgQZNX0O-Z~;S!f#FJ0O$3+$CD<8c7!p$yvhtJkLqj+j zm>Ggu(?A#!%-@lN8DYS)uOAaQfd+uE6obs<2h6g{P`!+pddo8sOH#{Hi?}9t2x-ZI z>`l(k&w*OW%fQarlAi#=;7}425@%$X{6a{P$(>;`x3C%_c!4skpm>&?{6 / `. This means that logs will - // never be found for jobs that use composite actions. + // We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we: + // * Strip `/` which occur when composite action job names are constructed of the form ` / ` + // * Truncate long job names + // sanitizedJobName := strings.ReplaceAll(job.Name, "/", "") + sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH) re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number) return regexp.MustCompile(re) } +/* +If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo. + +What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right! + +Here's a summary: + + "a" takes 1 byte (`\x61`) + "😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`) + "a😅😅" therefore takes 9 `bytes` + In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes` + In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune` + In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars. + +But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log +files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260 +byte zip file path length limitation, it applies transformations to various strings in order to limit their length. +In C#, the server truncates strings with this function: + + public static string TruncateAfter(string str, int max) + { + string result = str.Length > max ? str.Substring(0, max) : str; + result = result.Trim(); + return result; + } + +This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't +as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents +characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration. +Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8. + +Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of +a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4) +results in the final character being cleaved in two, resulting in "a😅�". Since our int32 runes are being encoded as +2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string. + +Here's a program you can put into a dotnet playground to see how C# works: + + using System; + public class Program { + public static void Main() { + string s = "a😅😅"; + Console.WriteLine("{0} {1}", s.Length, s); + string t = TruncateAfter(s, 4); + Console.WriteLine("{0} {1}", t.Length, t); + } + public static string TruncateAfter(string str, int max) { + string result = str.Length > max ? str.Substring(0, max) : str; + return result.Trim(); + } + } + +This will output: +5 a😅😅 +4 a😅� +*/ +func truncateAsUTF16(str string, max int) string { + // Encode the string to UTF-16 to count code units + utf16Encoded := utf16.Encode([]rune(str)) + if len(utf16Encoded) > max { + // Decode back to UTF-8 up to the max length + str = string(utf16.Decode(utf16Encoded[:max])) + } + return strings.TrimSpace(str) +} + // This function takes a zip file of logs and a list of jobs. // Structure of zip file // diff --git a/pkg/cmd/run/view/view_test.go b/pkg/cmd/run/view/view_test.go index 1ac8e49e4..38ae1be44 100644 --- a/pkg/cmd/run/view/view_test.go +++ b/pkg/cmd/run/view/view_test.go @@ -1376,6 +1376,8 @@ func TestViewRun(t *testing.T) { } // Structure of fixture zip file +// To see the structure of fixture zip file, run: +// `❯ unzip -lv pkg/cmd/run/view/fixtures/run_log.zip` // // run log/ // ├── cool job/ @@ -1487,23 +1489,49 @@ func Test_attachRunLog(t *testing.T) { // not the double space in the dir name, as the slash has been removed wantFilename: "cool job with slash/1_fob the barz.txt", }, + { + name: "Job name with really long name (over the ZIP limit)", + job: shared.Job{ + Name: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_", + Steps: []shared.Step{{ + Name: "Long Name Job", + Number: 1, + }}, + }, + wantMatch: true, + wantFilename: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnine/1_Long Name Job.txt", + }, + { + name: "Job name that would be truncated by the C# server to split a grapheme", + job: shared.Job{ + Name: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅", + Steps: []shared.Step{{ + Name: "Emoji Job", + Number: 1, + }}, + }, + wantMatch: true, + wantFilename: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅�/1_Emoji Job.txt", + }, } - rlz, err := zip.OpenReader("./fixtures/run_log.zip") + run_log_zip_reader, err := zip.OpenReader("./fixtures/run_log.zip") require.NoError(t, err) - defer rlz.Close() + defer run_log_zip_reader.Close() for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - attachRunLog(&rlz.Reader, []shared.Job{tt.job}) + attachRunLog(&run_log_zip_reader.Reader, []shared.Job{tt.job}) + + t.Logf("Job details: ") for _, step := range tt.job.Steps { log := step.Log logPresent := log != nil - require.Equal(t, tt.wantMatch, logPresent) + require.Equal(t, tt.wantMatch, logPresent, "log not present") if logPresent { - require.Equal(t, tt.wantFilename, log.Name) + require.Equal(t, tt.wantFilename, log.Name, "Filename mismatch") } } })