From 5c55ac9bf1e5f779220294c843526536605f42ab Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Wed, 25 Jan 2023 09:27:01 -0800 Subject: [PATCH] [release-branch.go1.19] mime/multipart: limit memory/inode consumption of ReadForm Reader.ReadForm is documented as storing "up to maxMemory bytes + 10MB" in memory. Parsed forms can consume substantially more memory than this limit, since ReadForm does not account for map entry overhead and MIME headers. In addition, while the amount of disk memory consumed by ReadForm can be constrained by limiting the size of the parsed input, ReadForm will create one temporary file per form part stored on disk, potentially consuming a large number of inodes. Update ReadForm's memory accounting to include part names, MIME headers, and map entry overhead. Update ReadForm to store all on-disk file parts in a single temporary file. Files returned by FileHeader.Open are documented as having a concrete type of *os.File when a file is stored on disk. The change to use a single temporary file for all parts means that this is no longer the case when a form contains more than a single file part stored on disk. The previous behavior of storing each file part in a separate disk file may be reenabled with GODEBUG=multipartfiles=distinct. Update Reader.NextPart and Reader.NextRawPart to set a 10MiB cap on the size of MIME headers. Thanks to Jakob Ackermann (@das7pad) for reporting this issue. Updates #58006 Fixes #58362 Fixes CVE-2022-41725 Change-Id: Ibd780a6c4c83ac8bcfd3cbe344f042e9940f2eab Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1714276 Reviewed-by: Julie Qiu TryBot-Result: Security TryBots Reviewed-by: Roland Shoemaker Run-TryBot: Damien Neil (cherry picked from commit ed4664330edcd91b24914c9371c377c132dbce8c) Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1728949 Reviewed-by: Tatiana Bradley Run-TryBot: Roland Shoemaker Reviewed-by: Damien Neil Reviewed-on: https://go-review.googlesource.com/c/go/+/468116 TryBot-Result: Gopher Robot Reviewed-by: Than McIntosh Run-TryBot: Michael Pratt Auto-Submit: Michael Pratt --- src/mime/multipart/formdata.go | 132 ++++++++++++++++++++----- src/mime/multipart/formdata_test.go | 140 ++++++++++++++++++++++++++- src/mime/multipart/multipart.go | 25 +++-- src/mime/multipart/readmimeheader.go | 14 +++ src/net/http/request_test.go | 2 +- src/net/textproto/reader.go | 20 +++- 6 files changed, 295 insertions(+), 38 deletions(-) create mode 100644 src/mime/multipart/readmimeheader.go Index: go/src/mime/multipart/formdata.go =================================================================== --- go.orig/src/mime/multipart/formdata.go +++ go/src/mime/multipart/formdata.go @@ -7,6 +7,7 @@ package multipart import ( "bytes" "errors" + "internal/godebug" "io" "math" "net/textproto" @@ -33,23 +34,58 @@ func (r *Reader) ReadForm(maxMemory int6 func (r *Reader) readForm(maxMemory int64) (_ *Form, err error) { form := &Form{make(map[string][]string), make(map[string][]*FileHeader)} + var ( + file *os.File + fileOff int64 + ) + numDiskFiles := 0 + multipartFiles := godebug.Get("multipartfiles") + combineFiles := multipartFiles != "distinct" defer func() { + if file != nil { + if cerr := file.Close(); err == nil { + err = cerr + } + } + if combineFiles && numDiskFiles > 1 { + for _, fhs := range form.File { + for _, fh := range fhs { + fh.tmpshared = true + } + } + } if err != nil { form.RemoveAll() + if file != nil { + os.Remove(file.Name()) + } } }() - // Reserve an additional 10 MB for non-file parts. - maxValueBytes := maxMemory + int64(10<<20) - if maxValueBytes <= 0 { + // maxFileMemoryBytes is the maximum bytes of file data we will store in memory. + // Data past this limit is written to disk. + // This limit strictly applies to content, not metadata (filenames, MIME headers, etc.), + // since metadata is always stored in memory, not disk. + // + // maxMemoryBytes is the maximum bytes we will store in memory, including file content, + // non-file part values, metdata, and map entry overhead. + // + // We reserve an additional 10 MB in maxMemoryBytes for non-file data. + // + // The relationship between these parameters, as well as the overly-large and + // unconfigurable 10 MB added on to maxMemory, is unfortunate but difficult to change + // within the constraints of the API as documented. + maxFileMemoryBytes := maxMemory + maxMemoryBytes := maxMemory + int64(10<<20) + if maxMemoryBytes <= 0 { if maxMemory < 0 { - maxValueBytes = 0 + maxMemoryBytes = 0 } else { - maxValueBytes = math.MaxInt64 + maxMemoryBytes = math.MaxInt64 } } for { - p, err := r.NextPart() + p, err := r.nextPart(false, maxMemoryBytes) if err == io.EOF { break } @@ -63,16 +99,27 @@ func (r *Reader) readForm(maxMemory int6 } filename := p.FileName() + // Multiple values for the same key (one map entry, longer slice) are cheaper + // than the same number of values for different keys (many map entries), but + // using a consistent per-value cost for overhead is simpler. + maxMemoryBytes -= int64(len(name)) + maxMemoryBytes -= 100 // map overhead + if maxMemoryBytes < 0 { + // We can't actually take this path, since nextPart would already have + // rejected the MIME headers for being too large. Check anyway. + return nil, ErrMessageTooLarge + } + var b bytes.Buffer if filename == "" { // value, store as string in memory - n, err := io.CopyN(&b, p, maxValueBytes+1) + n, err := io.CopyN(&b, p, maxMemoryBytes+1) if err != nil && err != io.EOF { return nil, err } - maxValueBytes -= n - if maxValueBytes < 0 { + maxMemoryBytes -= n + if maxMemoryBytes < 0 { return nil, ErrMessageTooLarge } form.Value[name] = append(form.Value[name], b.String()) @@ -80,35 +127,45 @@ func (r *Reader) readForm(maxMemory int6 } // file, store in memory or on disk + maxMemoryBytes -= mimeHeaderSize(p.Header) + if maxMemoryBytes < 0 { + return nil, ErrMessageTooLarge + } fh := &FileHeader{ Filename: filename, Header: p.Header, } - n, err := io.CopyN(&b, p, maxMemory+1) + n, err := io.CopyN(&b, p, maxFileMemoryBytes+1) if err != nil && err != io.EOF { return nil, err } - if n > maxMemory { - // too big, write to disk and flush buffer - file, err := os.CreateTemp("", "multipart-") - if err != nil { - return nil, err + if n > maxFileMemoryBytes { + if file == nil { + file, err = os.CreateTemp(r.tempDir, "multipart-") + if err != nil { + return nil, err + } } + numDiskFiles++ size, err := io.Copy(file, io.MultiReader(&b, p)) - if cerr := file.Close(); err == nil { - err = cerr - } if err != nil { - os.Remove(file.Name()) return nil, err } fh.tmpfile = file.Name() fh.Size = size + fh.tmpoff = fileOff + fileOff += size + if !combineFiles { + if err := file.Close(); err != nil { + return nil, err + } + file = nil + } } else { fh.content = b.Bytes() fh.Size = int64(len(fh.content)) - maxMemory -= n - maxValueBytes -= n + maxFileMemoryBytes -= n + maxMemoryBytes -= n } form.File[name] = append(form.File[name], fh) } @@ -116,6 +173,17 @@ func (r *Reader) readForm(maxMemory int6 return form, nil } +func mimeHeaderSize(h textproto.MIMEHeader) (size int64) { + for k, vs := range h { + size += int64(len(k)) + size += 100 // map entry overhead + for _, v := range vs { + size += int64(len(v)) + } + } + return size +} + // Form is a parsed multipart form. // Its File parts are stored either in memory or on disk, // and are accessible via the *FileHeader's Open method. @@ -133,7 +201,7 @@ func (f *Form) RemoveAll() error { for _, fh := range fhs { if fh.tmpfile != "" { e := os.Remove(fh.tmpfile) - if e != nil && err == nil { + if e != nil && !errors.Is(e, os.ErrNotExist) && err == nil { err = e } } @@ -148,15 +216,25 @@ type FileHeader struct { Header textproto.MIMEHeader Size int64 - content []byte - tmpfile string + content []byte + tmpfile string + tmpoff int64 + tmpshared bool } // Open opens and returns the FileHeader's associated File. func (fh *FileHeader) Open() (File, error) { if b := fh.content; b != nil { r := io.NewSectionReader(bytes.NewReader(b), 0, int64(len(b))) - return sectionReadCloser{r}, nil + return sectionReadCloser{r, nil}, nil + } + if fh.tmpshared { + f, err := os.Open(fh.tmpfile) + if err != nil { + return nil, err + } + r := io.NewSectionReader(f, fh.tmpoff, fh.Size) + return sectionReadCloser{r, f}, nil } return os.Open(fh.tmpfile) } @@ -175,8 +253,12 @@ type File interface { type sectionReadCloser struct { *io.SectionReader + io.Closer } func (rc sectionReadCloser) Close() error { + if rc.Closer != nil { + return rc.Closer.Close() + } return nil } Index: go/src/mime/multipart/formdata_test.go =================================================================== --- go.orig/src/mime/multipart/formdata_test.go +++ go/src/mime/multipart/formdata_test.go @@ -6,8 +6,10 @@ package multipart import ( "bytes" + "fmt" "io" "math" + "net/textproto" "os" "strings" "testing" @@ -208,8 +210,8 @@ Content-Disposition: form-data; name="la maxMemory int64 err error }{ - {"smaller", 50, nil}, - {"exact-fit", 25, nil}, + {"smaller", 50 + int64(len("largetext")) + 100, nil}, + {"exact-fit", 25 + int64(len("largetext")) + 100, nil}, {"too-large", 0, ErrMessageTooLarge}, } for _, tc := range testCases { @@ -224,7 +226,7 @@ Content-Disposition: form-data; name="la defer f.RemoveAll() } if tc.err != err { - t.Fatalf("ReadForm error - got: %v; expected: %v", tc.err, err) + t.Fatalf("ReadForm error - got: %v; expected: %v", err, tc.err) } if err == nil { if g := f.Value["largetext"][0]; g != largeTextValue { @@ -234,3 +236,135 @@ Content-Disposition: form-data; name="la }) } } + +// TestReadForm_MetadataTooLarge verifies that we account for the size of field names, +// MIME headers, and map entry overhead while limiting the memory consumption of parsed forms. +func TestReadForm_MetadataTooLarge(t *testing.T) { + for _, test := range []struct { + name string + f func(*Writer) + }{{ + name: "large name", + f: func(fw *Writer) { + name := strings.Repeat("a", 10<<20) + w, _ := fw.CreateFormField(name) + w.Write([]byte("value")) + }, + }, { + name: "large MIME header", + f: func(fw *Writer) { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", `form-data; name="a"`) + h.Set("X-Foo", strings.Repeat("a", 10<<20)) + w, _ := fw.CreatePart(h) + w.Write([]byte("value")) + }, + }, { + name: "many parts", + f: func(fw *Writer) { + for i := 0; i < 110000; i++ { + w, _ := fw.CreateFormField("f") + w.Write([]byte("v")) + } + }, + }} { + t.Run(test.name, func(t *testing.T) { + var buf bytes.Buffer + fw := NewWriter(&buf) + test.f(fw) + if err := fw.Close(); err != nil { + t.Fatal(err) + } + fr := NewReader(&buf, fw.Boundary()) + _, err := fr.ReadForm(0) + if err != ErrMessageTooLarge { + t.Errorf("fr.ReadForm() = %v, want ErrMessageTooLarge", err) + } + }) + } +} + +// TestReadForm_ManyFiles_Combined tests that a multipart form containing many files only +// results in a single on-disk file. +func TestReadForm_ManyFiles_Combined(t *testing.T) { + const distinct = false + testReadFormManyFiles(t, distinct) +} + +// TestReadForm_ManyFiles_Distinct tests that setting GODEBUG=multipartfiles=distinct +// results in every file in a multipart form being placed in a distinct on-disk file. +func TestReadForm_ManyFiles_Distinct(t *testing.T) { + t.Setenv("GODEBUG", "multipartfiles=distinct") + const distinct = true + testReadFormManyFiles(t, distinct) +} + +func testReadFormManyFiles(t *testing.T, distinct bool) { + var buf bytes.Buffer + fw := NewWriter(&buf) + const numFiles = 10 + for i := 0; i < numFiles; i++ { + name := fmt.Sprint(i) + w, err := fw.CreateFormFile(name, name) + if err != nil { + t.Fatal(err) + } + w.Write([]byte(name)) + } + if err := fw.Close(); err != nil { + t.Fatal(err) + } + fr := NewReader(&buf, fw.Boundary()) + fr.tempDir = t.TempDir() + form, err := fr.ReadForm(0) + if err != nil { + t.Fatal(err) + } + for i := 0; i < numFiles; i++ { + name := fmt.Sprint(i) + if got := len(form.File[name]); got != 1 { + t.Fatalf("form.File[%q] has %v entries, want 1", name, got) + } + fh := form.File[name][0] + file, err := fh.Open() + if err != nil { + t.Fatalf("form.File[%q].Open() = %v", name, err) + } + if distinct { + if _, ok := file.(*os.File); !ok { + t.Fatalf("form.File[%q].Open: %T, want *os.File", name, file) + } + } + got, err := io.ReadAll(file) + file.Close() + if string(got) != name || err != nil { + t.Fatalf("read form.File[%q]: %q, %v; want %q, nil", name, string(got), err, name) + } + } + dir, err := os.Open(fr.tempDir) + if err != nil { + t.Fatal(err) + } + defer dir.Close() + names, err := dir.Readdirnames(0) + if err != nil { + t.Fatal(err) + } + wantNames := 1 + if distinct { + wantNames = numFiles + } + if len(names) != wantNames { + t.Fatalf("temp dir contains %v files; want 1", len(names)) + } + if err := form.RemoveAll(); err != nil { + t.Fatalf("form.RemoveAll() = %v", err) + } + names, err = dir.Readdirnames(0) + if err != nil { + t.Fatal(err) + } + if len(names) != 0 { + t.Fatalf("temp dir contains %v files; want 0", len(names)) + } +} Index: go/src/mime/multipart/multipart.go =================================================================== --- go.orig/src/mime/multipart/multipart.go +++ go/src/mime/multipart/multipart.go @@ -128,12 +128,12 @@ func (r *stickyErrorReader) Read(p []byt return n, r.err } -func newPart(mr *Reader, rawPart bool) (*Part, error) { +func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize int64) (*Part, error) { bp := &Part{ Header: make(map[string][]string), mr: mr, } - if err := bp.populateHeaders(); err != nil { + if err := bp.populateHeaders(maxMIMEHeaderSize); err != nil { return nil, err } bp.r = partReader{bp} @@ -149,11 +149,15 @@ func newPart(mr *Reader, rawPart bool) ( return bp, nil } -func (bp *Part) populateHeaders() error { - r := textproto.NewReader(bp.mr.bufReader) - header, err := r.ReadMIMEHeader() +func (p *Part) populateHeaders(maxMIMEHeaderSize int64) error { + r := textproto.NewReader(p.mr.bufReader) + header, err := readMIMEHeader(r, maxMIMEHeaderSize) if err == nil { - bp.Header = header + p.Header = header + } + // TODO: Add a distinguishable error to net/textproto. + if err != nil && err.Error() == "message too large" { + err = ErrMessageTooLarge } return err } @@ -294,6 +298,7 @@ func (p *Part) Close() error { // isn't supported. type Reader struct { bufReader *bufio.Reader + tempDir string // used in tests currentPart *Part partsRead int @@ -304,6 +309,10 @@ type Reader struct { dashBoundary []byte // "--boundary" } +// maxMIMEHeaderSize is the maximum size of a MIME header we will parse, +// including header keys, values, and map overhead. +const maxMIMEHeaderSize = 10 << 20 + // NextPart returns the next part in the multipart or an error. // When there are no more parts, the error io.EOF is returned. // @@ -311,7 +320,7 @@ type Reader struct { // has a value of "quoted-printable", that header is instead // hidden and the body is transparently decoded during Read calls. func (r *Reader) NextPart() (*Part, error) { - return r.nextPart(false) + return r.nextPart(false, maxMIMEHeaderSize) } // NextRawPart returns the next part in the multipart or an error. @@ -320,10 +329,10 @@ func (r *Reader) NextPart() (*Part, erro // Unlike NextPart, it does not have special handling for // "Content-Transfer-Encoding: quoted-printable". func (r *Reader) NextRawPart() (*Part, error) { - return r.nextPart(true) + return r.nextPart(true, maxMIMEHeaderSize) } -func (r *Reader) nextPart(rawPart bool) (*Part, error) { +func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize int64) (*Part, error) { if r.currentPart != nil { r.currentPart.Close() } @@ -348,7 +357,7 @@ func (r *Reader) nextPart(rawPart bool) if r.isBoundaryDelimiterLine(line) { r.partsRead++ - bp, err := newPart(r, rawPart) + bp, err := newPart(r, rawPart, maxMIMEHeaderSize) if err != nil { return nil, err } Index: go/src/mime/multipart/readmimeheader.go =================================================================== --- /dev/null +++ go/src/mime/multipart/readmimeheader.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package multipart + +import ( + "net/textproto" + _ "unsafe" // for go:linkname +) + +// readMIMEHeader is defined in package net/textproto. +// +//go:linkname readMIMEHeader net/textproto.readMIMEHeader +func readMIMEHeader(r *textproto.Reader, lim int64) (textproto.MIMEHeader, error) Index: go/src/net/http/request_test.go =================================================================== --- go.orig/src/net/http/request_test.go +++ go/src/net/http/request_test.go @@ -1116,7 +1116,7 @@ func testMissingFile(t *testing.T, req * t.Errorf("FormFile file = %v, want nil", f) } if fh != nil { - t.Errorf("FormFile file header = %q, want nil", fh) + t.Errorf("FormFile file header = %v, want nil", fh) } if err != ErrMissingFile { t.Errorf("FormFile err = %q, want ErrMissingFile", err) Index: go/src/net/textproto/reader.go =================================================================== --- go.orig/src/net/textproto/reader.go +++ go/src/net/textproto/reader.go @@ -7,8 +7,10 @@ package textproto import ( "bufio" "bytes" + "errors" "fmt" "io" + "math" "strconv" "strings" "sync" @@ -483,6 +485,12 @@ var colon = []byte(":") // } // func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { + return readMIMEHeader(r, math.MaxInt64) +} + +// readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size. +// It is called by the mime/multipart package. +func readMIMEHeader(r *Reader, lim int64) (MIMEHeader, error) { // Avoid lots of small slice allocations later by allocating one // large one ahead of time which we'll cut up into smaller // slices. If this isn't big enough later, we allocate small ones. @@ -524,9 +532,19 @@ func (r *Reader) ReadMIMEHeader() (MIMEH } // Skip initial spaces in value. - value := strings.TrimLeft(string(v), " \t") + value := string(bytes.TrimLeft(v, " \t")) vv := m[key] + if vv == nil { + lim -= int64(len(key)) + lim -= 100 // map entry overhead + } + lim -= int64(len(value)) + if lim < 0 { + // TODO: This should be a distinguishable error (ErrMessageTooLarge) + // to allow mime/multipart to detect it. + return m, errors.New("message too large") + } if vv == nil && len(strs) > 0 { // More than likely this will be a single-element key. // Most headers aren't multi-valued.