From 44b02188e2c33b48940be4eadc3f34c0ec07a381 Mon Sep 17 00:00:00 2001 From: Sam Coe Date: Tue, 7 Mar 2023 12:44:30 +1100 Subject: [PATCH] Properly handle C1 control sequences (#7079) --- api/sanitize_ascii.go | 129 +++++++++++++++++++++++-------------- api/sanitize_ascii_test.go | 37 ++++++++++- 2 files changed, 117 insertions(+), 49 deletions(-) diff --git a/api/sanitize_ascii.go b/api/sanitize_ascii.go index 6033a07a6..951cc1bf8 100644 --- a/api/sanitize_ascii.go +++ b/api/sanitize_ascii.go @@ -11,13 +11,15 @@ import ( var jsonTypeRE = regexp.MustCompile(`[/+]json($|;)`) -// GitHub servers return non-printable characters as their unicode code point values. -// The values of \u0000 to \u001F represent C0 ASCII control characters and -// the values of \u0080 to \u009F represent C1 ASCII control characters. These control -// characters will be interpreted by the terminal, this behaviour can be used maliciously -// as an attack vector, especially the control character \u001B. This function wraps -// JSON response bodies in a ReadCloser that transforms C0 and C1 control characters -// to their caret and hex notations respectively so that the terminal will not interpret them. +// GitHub servers do not sanitize their API output for terminal display +// and leave in unescaped ASCII control characters. +// C0 control characters are represented in their unicode code point form ranging from \u0000 to \u001F. +// C1 control characters are represented in two bytes, the first being 0xC2 and the second ranging from 0x80 to 0x9F. +// These control characters will be interpreted by the terminal, this behaviour can be +// used maliciously as an attack vector, especially the control characters \u001B and \u009B. +// This function wraps JSON response bodies in a ReadCloser that transforms C0 and C1 +// control characters to their caret notations respectively so that the terminal will not +// interpret them. func AddASCIISanitizer(rt http.RoundTripper) http.RoundTripper { return &funcTripper{roundTrip: func(req *http.Request) (*http.Response, error) { res, err := rt.RoundTrip(req) @@ -64,12 +66,28 @@ func (s *sanitizeASCIIReadCloser) Read(out []byte) (int, error) { for bufIndex < bufLen-6 && outIndex < outLen { window := buf[bufIndex : bufIndex+6] - if bytes.HasPrefix(window, []byte(`\u00`)) { - repl, _ := mapControlCharacterToCaret(window) - if s.addEscape { - repl = append([]byte{'\\'}, repl...) - s.addEscape = false + // Replace C1 Control Characters + if window[0] == 0xC2 { + repl, _ := mapC1ToCaret(window[:2]) + for j := 0; j < len(repl); j++ { + if outIndex < outLen { + out[outIndex] = repl[j] + outIndex++ + } else { + s.remainder = append(s.remainder, repl[j]) + } } + bufIndex += 2 + continue + } + + // Replace C0 Control Characters + if bytes.HasPrefix(window, []byte(`\u00`)) { + repl, found := mapC0ToCaret(window) + if s.addEscape && found { + repl = append([]byte{'\\'}, repl...) + } + s.addEscape = false for j := 0; j < len(repl); j++ { if outIndex < outLen { out[outIndex] = repl[j] @@ -118,10 +136,8 @@ func (s *sanitizeASCIIReadCloser) Read(out []byte) (int, error) { return outIndex, readErr } -// mapControlCharacterToCaret maps C0 control sequences to caret notation -// and C1 control sequences to hex notation. C1 control sequences do not -// have caret notation representation. -func mapControlCharacterToCaret(b []byte) ([]byte, bool) { +// mapC0ToCaret maps C0 control sequences to caret notation. +func mapC0ToCaret(b []byte) ([]byte, bool) { m := map[string]string{ `\u0000`: `^@`, `\u0001`: `^A`, @@ -155,41 +171,58 @@ func mapControlCharacterToCaret(b []byte) ([]byte, bool) { `\u001d`: `^]`, `\u001e`: `^^`, `\u001f`: `^_`, - `\u0080`: `\\200`, - `\u0081`: `\\201`, - `\u0082`: `\\202`, - `\u0083`: `\\203`, - `\u0084`: `\\204`, - `\u0085`: `\\205`, - `\u0086`: `\\206`, - `\u0087`: `\\207`, - `\u0088`: `\\210`, - `\u0089`: `\\211`, - `\u008a`: `\\212`, - `\u008b`: `\\213`, - `\u008c`: `\\214`, - `\u008d`: `\\215`, - `\u008e`: `\\216`, - `\u008f`: `\\217`, - `\u0090`: `\\220`, - `\u0091`: `\\221`, - `\u0092`: `\\222`, - `\u0093`: `\\223`, - `\u0094`: `\\224`, - `\u0095`: `\\225`, - `\u0096`: `\\226`, - `\u0097`: `\\227`, - `\u0098`: `\\230`, - `\u0099`: `\\231`, - `\u009a`: `\\232`, - `\u009b`: `\\233`, - `\u009c`: `\\234`, - `\u009d`: `\\235`, - `\u009e`: `\\236`, - `\u009f`: `\\237`, } if c, ok := m[strings.ToLower(string(b))]; ok { return []byte(c), true } return b, false } + +// mapC1ToCaret maps C1 control sequences to caret notation. +// C1 control sequences are two bytes and start with 0xC2. +func mapC1ToCaret(b []byte) ([]byte, bool) { + if len(b) != 2 { + return b, false + } + if b[0] != 0xC2 { + return b, false + } + m := map[byte]string{ + 128: `^@`, + 129: `^A`, + 130: `^B`, + 131: `^C`, + 132: `^D`, + 133: `^E`, + 134: `^F`, + 135: `^G`, + 136: `^H`, + 137: `^I`, + 138: `^J`, + 139: `^K`, + 140: `^L`, + 141: `^M`, + 142: `^N`, + 143: `^O`, + 144: `^P`, + 145: `^Q`, + 146: `^R`, + 147: `^S`, + 148: `^T`, + 149: `^U`, + 150: `^V`, + 151: `^W`, + 152: `^X`, + 153: `^Y`, + 154: `^Z`, + 155: `^[`, + 156: `^\\`, + 157: `^]`, + 158: `^^`, + 159: `^_`, + } + if c, ok := m[b[1]]; ok { + return []byte(c), true + } + return b, false +} diff --git a/api/sanitize_ascii_test.go b/api/sanitize_ascii_test.go index ff43f9287..6188e1e0e 100644 --- a/api/sanitize_ascii_test.go +++ b/api/sanitize_ascii_test.go @@ -14,7 +14,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestHTTPClient_SanitizeASCIIControlCharacters(t *testing.T) { +func TestHTTPClientSanitizeASCIIControlCharactersC0(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { issue := Issue{ Title: "\u001B[31mRed Title\u001B[0m", @@ -51,6 +51,41 @@ func TestHTTPClient_SanitizeASCIIControlCharacters(t *testing.T) { assert.Equal(t, "Escaped ^[ \\^[ \\^[ \\\\^[", issue.ActiveLockReason) } +func TestHTTPClientSanitizeASCIIControlCharactersC1(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + issue := Issue{ + Title: "\xC2\x9B[31mRed Title\xC2\x9B[0m", + Body: "80\xC2\x80 81\xC2\x81 82\xC2\x82 83\xC2\x83 84\xC2\x84 85\xC2\x85 86\xC2\x86 87\xC2\x87 88\xC2\x88 89\xC2\x89 8A\xC2\x8A 8B\xC2\x8B 8C\xC2\x8C 8D\xC2\x8D 8E\xC2\x8E 8F\xC2\x8F", + Author: Author{ + ID: "1", + Name: "90\xC2\x90 91\xC2\x91 92\xC2\x92 93\xC2\x93 94\xC2\x94 95\xC2\x95 96\xC2\x96 97\xC2\x97 98\xC2\x98 99\xC2\x99 9A\xC2\x9A 9B\xC2\x9B 9C\xC2\x9C 9D\xC2\x9D 9E\xC2\x9E 9F\xC2\x9F", + Login: "monalisa\xC2\xA1", + }, + } + responseData, _ := json.Marshal(issue) + w.Header().Set("Content-Type", "application/json; charset=utf-8") + fmt.Fprint(w, string(responseData)) + })) + defer ts.Close() + + client, err := NewHTTPClient(HTTPClientOptions{}) + require.NoError(t, err) + req, err := http.NewRequest("GET", ts.URL, nil) + require.NoError(t, err) + res, err := client.Do(req) + require.NoError(t, err) + body, err := io.ReadAll(res.Body) + res.Body.Close() + require.NoError(t, err) + var issue Issue + err = json.Unmarshal(body, &issue) + require.NoError(t, err) + assert.Equal(t, "^[[31mRed Title^[[0m", issue.Title) + assert.Equal(t, "80^@ 81^A 82^B 83^C 84^D 85^E 86^F 87^G 88^H 89^I 8A^J 8B^K 8C^L 8D^M 8E^N 8F^O", issue.Body) + assert.Equal(t, "90^P 91^Q 92^R 93^S 94^T 95^U 96^V 97^W 98^X 99^Y 9A^Z 9B^[ 9C^\\ 9D^] 9E^^ 9F^_", issue.Author.Name) + assert.Equal(t, "monalisa¡", issue.Author.Login) +} + func TestSanitizeASCIIReadCloser(t *testing.T) { data := []byte(`"Assign},"L`) var r io.Reader = bytes.NewReader(data)