From 2ee6737053591c49ce6a16deb6f30ed0d704e676 Mon Sep 17 00:00:00 2001 From: William Martin Date: Tue, 27 Feb 2024 20:44:04 +0100 Subject: [PATCH] Use regex to split apart the git client Commit logs Theoretically this should be clearer and more robust than the previous version which had some custom loop logic while trying to parse newlines and determine whether it had reached a new commit entry by trying to parse a git sha. This would not have worked correctly if a commit body contained a sha on a new line. --- git/client.go | 76 +++++++++++++++++--------------- git/client_test.go | 15 ++++--- pkg/cmd/pr/create/create_test.go | 20 ++++----- 3 files changed, 59 insertions(+), 52 deletions(-) diff --git a/git/client.go b/git/client.go index 308491e1f..491981fae 100644 --- a/git/client.go +++ b/git/client.go @@ -20,6 +20,26 @@ import ( var remoteRE = regexp.MustCompile(`(.+)\s+(.+)\s+\((push|fetch)\)`) +// This regexp exists to match lines of the following form: +// \u00006a6872b918c601a0e730710ad8473938a7516d30\u0000title 1\u0000Body 1\u0000\n +// \u00007a6872b918c601a0e730710ad8473938a7516d31\u0000title 2\u0000Body 2\u0000 +// +// This is the format we use when collecting commit information, +// with null bytes as separators. Using null bytes this way allows for us +// to easily maintain newlines that might be in the body. +// +// The ?m modifier is the multi-line modifier, meaning that ^ and $ +// match the beginning and end of lines, respectively. +// +// The [\S\s] matches any whitespace or non-whitespace character, +// which is different from .* because it allows for newlines as well. +// +// The ? following .* and [\S\s] is a lazy modifier, meaning that it will +// match as few characters as possible while still satisfying the rest of the regexp. +// This is important because it allows us to match the first null byte after the title and body, +// rather than the last null byte in the entire string. +var commitLogRE = regexp.MustCompile(`(?m)^[0-9a-fA-F]{7,40}\x00.*?\x00[\S\s]*?\x00$`) + type errWithExitCode interface { ExitCode() int } @@ -227,19 +247,13 @@ func (c *Client) UncommittedChangeCount(ctx context.Context) (int, error) { return count, nil } -func isGitSha(s string) bool { - shaRegex := regexp.MustCompile(`^[0-9a-fA-F]{7,40}$`) - ret := shaRegex.MatchString(s) - return ret -} - func (c *Client) Commits(ctx context.Context, baseRef, headRef string) ([]*Commit, error) { // The formatting directive %x00 indicates that git should include the null byte as a separator. // We use this because it is not a valid character to include in a commit message. Previously, // commas were used here but when we Split on them, we would get incorrect results if commit titles // happened to contain them. // https://git-scm.com/docs/pretty-formats#Documentation/pretty-formats.txt-emx00em - args := []string{"-c", "log.ShowSignature=false", "log", "--pretty=format:%H%x00%s%x00%b", "--cherry", fmt.Sprintf("%s...%s", baseRef, headRef)} + args := []string{"-c", "log.ShowSignature=false", "log", "--pretty=format:%H%x00%s%x00%b%x00", "--cherry", fmt.Sprintf("%s...%s", baseRef, headRef)} cmd, err := c.Command(ctx, args...) if err != nil { return nil, err @@ -248,41 +262,33 @@ func (c *Client) Commits(ctx context.Context, baseRef, headRef string) ([]*Commi if err != nil { return nil, err } + commits := []*Commit{} - sha := 0 - title := 1 - body := 2 - lines := outputLines(out) - for i := 0; i < len(lines); i++ { - split := strings.SplitN(lines[i], "\u0000", 3) - if isGitSha(split[sha]) { - c := &Commit{ - Sha: split[sha], - Title: split[title], - } + commitLogs := commitLogRE.FindAllString(string(out), -1) + for _, commitLog := range commitLogs { + // Each line looks like this: + // 6a6872b918c601a0e730710ad8473938a7516d30\u0000title 1\u0000Body 1\u0000\n - c.Body = split[body] - // This consumes all lines until the next commit and adds them to the body. - for { - i++ - if i >= len(lines) { - break - } + // Or with an optional body: + // 6a6872b918c601a0e730710ad8473938a7516d30\u0000title 1\u0000\u0000\n - possibleSplit := strings.SplitN(lines[i], "\u0000", 3) - if len(possibleSplit) > 2 && isGitSha(possibleSplit[sha]) { - i-- - break - } - c.Body += "\n" - c.Body += lines[i] - } - commits = append(commits, c) - } + // Therefore after splitting we will have: + // ["6a6872b918c601a0e730710ad8473938a7516d30", "title 1", "Body 1", ""] + + // Or with an optional body: + // ["6a6872b918c601a0e730710ad8473938a7516d30", "title 1", "", ""] + commitLogParts := strings.Split(commitLog, "\u0000") + commits = append(commits, &Commit{ + Sha: commitLogParts[0], + Title: commitLogParts[1], + Body: commitLogParts[2], + }) } + if len(commits) == 0 { return nil, fmt.Errorf("could not find any commits between %s and %s", baseRef, headRef) } + return commits, nil } diff --git a/git/client_test.go b/git/client_test.go index a8ac173fe..420683659 100644 --- a/git/client_test.go +++ b/git/client_test.go @@ -493,7 +493,7 @@ func TestClientCommits(t *testing.T) { }, }, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantCommits: []*Commit{{ Sha: "6a6872b918c601a0e730710ad8473938a7516d30", Title: "testing testability test", @@ -510,7 +510,7 @@ func TestClientCommits(t *testing.T) { }, }, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantCommits: []*Commit{{ Sha: "6a6872b918c601a0e730710ad8473938a7516d30", Title: "testing testability test", @@ -533,7 +533,7 @@ func TestClientCommits(t *testing.T) { }, }, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantCommits: []*Commit{ { Sha: "6a6872b918c601a0e730710ad8473938a7516d30", @@ -562,7 +562,7 @@ func TestClientCommits(t *testing.T) { }, }, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantCommits: []*Commit{ { Sha: "6a6872b918c601a0e730710ad8473938a7516d30", @@ -591,7 +591,7 @@ func TestClientCommits(t *testing.T) { }, }, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantCommits: []*Commit{ { Sha: "6a6872b918c601a0e730710ad8473938a7516d30", @@ -610,7 +610,7 @@ func TestClientCommits(t *testing.T) { testData: stubbedCommitsCommandData{ Commits: []stubbedCommit{}, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantErrorMsg: "could not find any commits between SHA1 and SHA2", }, { @@ -619,7 +619,7 @@ func TestClientCommits(t *testing.T) { ErrMsg: "git error message", ExitStatus: 1, }, - wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry SHA1...SHA2`, + wantCmdArgs: `path/to/git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry SHA1...SHA2`, wantErrorMsg: "failed to run git: git error message", }, } @@ -660,6 +660,7 @@ func TestCommitsHelperProcess(t *testing.T) { sb.WriteString(commit.Title) sb.WriteString("\u0000") sb.WriteString(commit.Body) + sb.WriteString("\u0000") sb.WriteString("\n") } fmt.Fprint(os.Stdout, sb.String()) diff --git a/pkg/cmd/pr/create/create_test.go b/pkg/cmd/pr/create/create_test.go index c9906b7ec..7d981e686 100644 --- a/pkg/cmd/pr/create/create_test.go +++ b/pkg/cmd/pr/create/create_test.go @@ -627,7 +627,7 @@ func Test_createRun(t *testing.T) { })) }, cmdStubs: func(cs *run.CommandStubber) { - cs.Register(`git( .+)? log( .+)? origin/master\.\.\.feature`, 0, "d3476a1\u0000commit 0\u0000\n7a6ea13\u0000commit 1\u0000") + cs.Register(`git( .+)? log( .+)? origin/master\.\.\.feature`, 0, "d3476a1\u0000commit 0\u0000\u0000\n7a6ea13\u0000commit 1\u0000\u0000") }, promptStubs: func(pm *prompter.PrompterMock) { pm.MarkdownEditorFunc = func(p, d string, ba bool) (string, error) { @@ -851,7 +851,7 @@ func Test_createRun(t *testing.T) { })) }, cmdStubs: func(cs *run.CommandStubber) { - cs.Register(`git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry origin/master...feature`, 0, "") + cs.Register(`git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry origin/master...feature`, 0, "") cs.Register(`git rev-parse --show-toplevel`, 0, "") }, promptStubs: func(pm *prompter.PrompterMock) { @@ -1004,9 +1004,9 @@ func Test_createRun(t *testing.T) { }, cmdStubs: func(cs *run.CommandStubber) { cs.Register( - "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry origin/master...feature", + "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry origin/master...feature", 0, - "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit description\n", + "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit description\u0000\n", ) cs.Register(`git rev-parse --show-toplevel`, 0, "") }, @@ -1079,10 +1079,10 @@ func Test_createRun(t *testing.T) { }, cmdStubs: func(cs *run.CommandStubber) { cs.Register( - "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry origin/master...feature", + "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry origin/master...feature", 0, - "56b6f8bb7c9e3a30093cd17e48934ce354148e80\u0000second commit of pr\u0000\n"+ - "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit description\n", + "56b6f8bb7c9e3a30093cd17e48934ce354148e80\u0000second commit of pr\u0000\u0000\n"+ + "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit description\u0000\n", ) }, httpStubs: func(reg *httpmock.Registry, t *testing.T) { @@ -1115,10 +1115,10 @@ func Test_createRun(t *testing.T) { }, cmdStubs: func(cs *run.CommandStubber) { cs.Register( - "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b --cherry origin/master...feature", + "git -c log.ShowSignature=false log --pretty=format:%H%x00%s%x00%b%x00 --cherry origin/master...feature", 0, - "56b6f8bb7c9e3a30093cd17e48934ce354148e80\u0000second commit of pr\u0000second commit description\n"+ - "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit with super long description, with super long description, with super long description, with super long description.\n", + "56b6f8bb7c9e3a30093cd17e48934ce354148e80\u0000second commit of pr\u0000second commit description\u0000\n"+ + "3a9b48085046d156c5acce8f3b3a0532cd706a4a\u0000first commit of pr\u0000first commit with super long description, with super long description, with super long description, with super long description.\u0000\n", ) }, httpStubs: func(reg *httpmock.Registry, t *testing.T) {