From ced071feaea37478ea9bb7ad656077eceb209e01 Mon Sep 17 00:00:00 2001 From: Sam Coe Date: Thu, 2 Feb 2023 08:19:30 +1100 Subject: [PATCH] Sanitize ANSII control characters returned from the server (#6916) --- api/http_client.go | 2 + api/sanitize_ascii.go | 193 +++++++++++++++++++++++++++++++++++++ api/sanitize_ascii_test.go | 50 ++++++++++ 3 files changed, 245 insertions(+) create mode 100644 api/sanitize_ascii.go create mode 100644 api/sanitize_ascii_test.go diff --git a/api/http_client.go b/api/http_client.go index 81693cbd1..83f228409 100644 --- a/api/http_client.go +++ b/api/http_client.go @@ -64,6 +64,8 @@ func NewHTTPClient(opts HTTPClientOptions) (*http.Client, error) { client.Transport = AddAuthTokenHeader(client.Transport, opts.Config) } + client.Transport = AddASCIISanitizer(client.Transport) + return client, nil } diff --git a/api/sanitize_ascii.go b/api/sanitize_ascii.go new file mode 100644 index 000000000..92741a147 --- /dev/null +++ b/api/sanitize_ascii.go @@ -0,0 +1,193 @@ +package api + +import ( + "bytes" + "errors" + "io" + "net/http" + "regexp" + "strings" +) + +var jsonTypeRE = regexp.MustCompile(`[/+]json($|;)`) + +// GitHub servers return non-printable characters as their unicode code point values. +// The values of \u0000 to \u001F represent C0 ASCII control characters and +// the values of \u0080 to \u009F represent C1 ASCII control characters. These control +// characters will be interpreted by the terminal, this behaviour can be used maliciously +// as an attack vector, especially the control character \u001B. This function wraps +// JSON response bodies in a ReadCloser that transforms C0 and C1 control characters +// to their caret and hex notations respectively so that the terminal will not interpret them. +func AddASCIISanitizer(rt http.RoundTripper) http.RoundTripper { + return &funcTripper{roundTrip: func(req *http.Request) (*http.Response, error) { + res, err := rt.RoundTrip(req) + if err != nil || !jsonTypeRE.MatchString(res.Header.Get("Content-Type")) { + return res, err + } + res.Body = &sanitizeASCIIReadCloser{ReadCloser: res.Body} + return res, err + }} +} + +// sanitizeASCIIReadCloser implements the ReadCloser interface. +type sanitizeASCIIReadCloser struct { + io.ReadCloser + addBackslash bool + previousWindow []byte +} + +// Read uses a sliding window alogorithm to detect C0 and C1 +// ASCII control sequences as they are read and replaces them +// with equivelent inert characters. Characters that are not part +// of a control sequence not modified. +func (s *sanitizeASCIIReadCloser) Read(out []byte) (int, error) { + var readErr error + var outIndex int + var bufIndex int + var bufLen int + var window []byte + buf := make([]byte, len(out)) + + bufLen, readErr = s.ReadCloser.Read(buf) + if readErr != nil && !errors.Is(readErr, io.EOF) { + if bufLen > 0 { + // Do not sanitize if there was a read error that is not EOF. + bufLen = copy(out, buf) + } + return bufLen, readErr + } + + if s.previousWindow != nil { + buf = append(s.previousWindow, buf...) + bufLen += len(s.previousWindow) + } + + for { + remaining := min(6, (bufLen - bufIndex)) + window = buf[bufIndex : bufIndex+remaining] + if remaining < 6 { + break + } + + if bytes.HasPrefix(window, []byte(`\u00`)) { + repl, _ := mapControlCharacterToCaret(window) + if s.addBackslash { + repl = append([]byte{92}, repl...) + } + l := len(repl) + for j := 0; j < l; j++ { + out[outIndex] = repl[j] + outIndex++ + } + bufIndex += 6 + s.addBackslash = false + continue + } + + if window[0] == '\\' { + s.addBackslash = !s.addBackslash + } else { + s.addBackslash = false + } + + out[outIndex] = buf[bufIndex] + outIndex++ + bufIndex++ + } + + if readErr != nil && errors.Is(readErr, io.EOF) { + remaining := bufLen - bufIndex + for j := 0; j < remaining; j++ { + out[outIndex] = window[j] + outIndex++ + bufIndex++ + } + } else { + s.previousWindow = window + } + + return outIndex, readErr +} + +// mapControlCharacterToCaret maps C0 control sequences to caret notation +// and C1 control sequences to hex notation. C1 control sequences do not +// have caret notation representation. +func mapControlCharacterToCaret(b []byte) ([]byte, bool) { + m := map[string]string{ + `\u0000`: `^@`, + `\u0001`: `^A`, + `\u0002`: `^B`, + `\u0003`: `^C`, + `\u0004`: `^D`, + `\u0005`: `^E`, + `\u0006`: `^F`, + `\u0007`: `^G`, + `\u0008`: `^H`, + `\u0009`: `^I`, + `\u000a`: `^J`, + `\u000b`: `^K`, + `\u000c`: `^L`, + `\u000d`: `^M`, + `\u000e`: `^N`, + `\u000f`: `^O`, + `\u0010`: `^P`, + `\u0011`: `^Q`, + `\u0012`: `^R`, + `\u0013`: `^S`, + `\u0014`: `^T`, + `\u0015`: `^U`, + `\u0016`: `^V`, + `\u0017`: `^W`, + `\u0018`: `^X`, + `\u0019`: `^Y`, + `\u001a`: `^Z`, + `\u001b`: `^[`, + `\u001c`: `^\\`, + `\u001d`: `^]`, + `\u001e`: `^^`, + `\u001f`: `^_`, + `\u0080`: `\\200`, + `\u0081`: `\\201`, + `\u0082`: `\\202`, + `\u0083`: `\\203`, + `\u0084`: `\\204`, + `\u0085`: `\\205`, + `\u0086`: `\\206`, + `\u0087`: `\\207`, + `\u0088`: `\\210`, + `\u0089`: `\\211`, + `\u008a`: `\\212`, + `\u008b`: `\\213`, + `\u008c`: `\\214`, + `\u008d`: `\\215`, + `\u008e`: `\\216`, + `\u008f`: `\\217`, + `\u0090`: `\\220`, + `\u0091`: `\\221`, + `\u0092`: `\\222`, + `\u0093`: `\\223`, + `\u0094`: `\\224`, + `\u0095`: `\\225`, + `\u0096`: `\\226`, + `\u0097`: `\\227`, + `\u0098`: `\\230`, + `\u0099`: `\\231`, + `\u009a`: `\\232`, + `\u009b`: `\\233`, + `\u009c`: `\\234`, + `\u009d`: `\\235`, + `\u009e`: `\\236`, + `\u009f`: `\\237`, + } + if c, ok := m[strings.ToLower(string(b))]; ok { + return []byte(c), true + } + return b, false +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/api/sanitize_ascii_test.go b/api/sanitize_ascii_test.go new file mode 100644 index 000000000..9b405edc8 --- /dev/null +++ b/api/sanitize_ascii_test.go @@ -0,0 +1,50 @@ +package api + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHTTPClient_SanitizeASCIIControlCharacters(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + issue := Issue{ + Title: "\u001B[31mRed Title\u001B[0m", + Body: "1\u0001 2\u0002 3\u0003 4\u0004 5\u0005 6\u0006 7\u0007 8\u0008 9\t A\r\n B\u000b C\u000c D\r\n E\u000e F\u000f", + Author: Author{ + ID: "1", + Name: "10\u0010 11\u0011 12\u0012 13\u0013 14\u0014 15\u0015 16\u0016 17\u0017 18\u0018 19\u0019 1A\u001a 1B\u001b 1C\u001c 1D\u001d 1E\u001e 1F\u001f", + Login: "monalisa", + }, + ActiveLockReason: "Escaped \u001B \\u001B \\\u001B \\\\u001B", + } + responseData, _ := json.Marshal(issue) + w.Header().Set("Content-Type", "application/json; charset=utf-8") + fmt.Fprint(w, string(responseData)) + })) + defer ts.Close() + + client, err := NewHTTPClient(HTTPClientOptions{}) + require.NoError(t, err) + req, err := http.NewRequest("GET", ts.URL, nil) + require.NoError(t, err) + res, err := client.Do(req) + require.NoError(t, err) + body, err := io.ReadAll(res.Body) + res.Body.Close() + require.NoError(t, err) + var issue Issue + err = json.Unmarshal(body, &issue) + require.NoError(t, err) + assert.Equal(t, "^[[31mRed Title^[[0m", issue.Title) + assert.Equal(t, "1^A 2^B 3^C 4^D 5^E 6^F 7^G 8^H 9\t A\r\n B^K C^L D\r\n E^N F^O", issue.Body) + assert.Equal(t, "10^P 11^Q 12^R 13^S 14^T 15^U 16^V 17^W 18^X 19^Y 1A^Z 1B^[ 1C^\\ 1D^] 1E^^ 1F^_", issue.Author.Name) + assert.Equal(t, "monalisa", issue.Author.Login) + assert.Equal(t, "Escaped ^[ \\^[ \\^[ \\\\^[", issue.ActiveLockReason) +}