Handle empty blob files more appropriately
Several API tests were added to ensure correct acceptance of zero-size and empty tar files. This led to several changes in the storage backend around the guarantees of remote file reading, which backs the layer and layer upload type. In support of these changes, zero-length and empty checks have been added to the digest package. These provide a sanity check against upstream tarsum changes. The fileReader has been modified to be more robust when reading and seeking on zero-length or non-existent files. The file no longer needs to exist for the reader to be created. Seeks can now move beyond the end of the file, causing reads to issue an io.EOF. This eliminates errors during certain race conditions for reading files which should be detected by stat calls. As a part of this, a few error types were factored out and the read buffer size was increased to something more reasonable. Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
parent
097fce3bb2
commit
0270bec916
@ -1332,9 +1332,9 @@ var errorDescriptors = []ErrorDescriptor{
|
||||
{
|
||||
Code: ErrorCodeNameInvalid,
|
||||
Value: "NAME_INVALID",
|
||||
Message: "manifest name did not match URI",
|
||||
Description: `During a manifest upload, if the name in the manifest
|
||||
does not match the uri name, this error will be returned.`,
|
||||
Message: "invalid repository name",
|
||||
Description: `Invalid repository name encountered either during
|
||||
manifest validation or any API operation.`,
|
||||
HTTPStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound},
|
||||
},
|
||||
{
|
||||
|
@ -13,6 +13,11 @@ import (
|
||||
"github.com/docker/docker/pkg/tarsum"
|
||||
)
|
||||
|
||||
const (
|
||||
// DigestTarSumV1EmptyTar is the digest for the empty tar file.
|
||||
DigestTarSumV1EmptyTar = "tarsum.v1+sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
)
|
||||
|
||||
// Digest allows simple protection of hex formatted digest strings, prefixed
|
||||
// by their algorithm. Strings of type Digest have some guarantee of being in
|
||||
// the correct format and it provides quick access to the components of a
|
||||
|
@ -1,6 +1,10 @@
|
||||
package digest
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseDigest(t *testing.T) {
|
||||
for _, testcase := range []struct {
|
||||
@ -78,3 +82,25 @@ func TestParseDigest(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A few test cases used to fix behavior we expect in storage backend.
|
||||
|
||||
func TestFromTarArchiveZeroLength(t *testing.T) {
|
||||
checkTarsumDigest(t, "zero-length archive", bytes.NewReader([]byte{}), DigestTarSumV1EmptyTar)
|
||||
}
|
||||
|
||||
func TestFromTarArchiveEmptyTar(t *testing.T) {
|
||||
// String of 1024 zeros is a valid, empty tar file.
|
||||
checkTarsumDigest(t, "1024 zero bytes", bytes.NewReader(bytes.Repeat([]byte("\x00"), 1024)), DigestTarSumV1EmptyTar)
|
||||
}
|
||||
|
||||
func checkTarsumDigest(t *testing.T, msg string, rd io.Reader, expected Digest) {
|
||||
dgst, err := FromTarArchive(rd)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error digesting %s: %v", msg, err)
|
||||
}
|
||||
|
||||
if dgst != expected {
|
||||
t.Fatalf("unexpected digest for %s: %q != %q", msg, dgst, expected)
|
||||
}
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ func TestLayerAPI(t *testing.T) {
|
||||
checkResponse(t, "status of deleted upload", resp, http.StatusNotFound)
|
||||
|
||||
// -----------------------------------------
|
||||
// Do layer push with an empty body
|
||||
// Do layer push with an empty body and different digest
|
||||
uploadURLBase = startPushLayer(t, builder, imageName)
|
||||
resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader([]byte{}))
|
||||
if err != nil {
|
||||
@ -152,21 +152,30 @@ func TestLayerAPI(t *testing.T) {
|
||||
}
|
||||
|
||||
checkResponse(t, "bad layer push", resp, http.StatusBadRequest)
|
||||
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeBlobUploadInvalid)
|
||||
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid)
|
||||
|
||||
// -----------------------------------------
|
||||
// Do layer push with an invalid body
|
||||
|
||||
// This is a valid but empty tarfile!
|
||||
badTar := bytes.Repeat([]byte("\x00"), 1024)
|
||||
uploadURLBase = startPushLayer(t, builder, imageName)
|
||||
resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader(badTar))
|
||||
// Do layer push with an empty body and correct digest
|
||||
zeroDigest, err := digest.FromTarArchive(bytes.NewReader([]byte{}))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error doing bad layer push: %v", err)
|
||||
t.Fatalf("unexpected error digesting empty buffer: %v", err)
|
||||
}
|
||||
|
||||
checkResponse(t, "bad layer push", resp, http.StatusBadRequest)
|
||||
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid)
|
||||
uploadURLBase = startPushLayer(t, builder, imageName)
|
||||
pushLayer(t, builder, imageName, zeroDigest, uploadURLBase, bytes.NewReader([]byte{}))
|
||||
|
||||
// -----------------------------------------
|
||||
// Do layer push with an empty body and correct digest
|
||||
|
||||
// This is a valid but empty tarfile!
|
||||
emptyTar := bytes.Repeat([]byte("\x00"), 1024)
|
||||
emptyDigest, err := digest.FromTarArchive(bytes.NewReader(emptyTar))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error digesting empty tar: %v", err)
|
||||
}
|
||||
|
||||
uploadURLBase = startPushLayer(t, builder, imageName)
|
||||
pushLayer(t, builder, imageName, emptyDigest, uploadURLBase, bytes.NewReader(emptyTar))
|
||||
|
||||
// ------------------------------------------
|
||||
// Now, actually do successful upload.
|
||||
@ -517,7 +526,7 @@ func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, error
|
||||
|
||||
for _, err := range errs.Errors {
|
||||
if _, ok := expected[err.Code]; !ok {
|
||||
t.Fatalf("unexpected error code %v encountered: %s ", err.Code, string(p))
|
||||
t.Fatalf("unexpected error code %v encountered during %s: %s ", err.Code, msg, string(p))
|
||||
}
|
||||
counts[err.Code]++
|
||||
}
|
||||
@ -525,7 +534,7 @@ func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, error
|
||||
// Ensure that counts of expected errors were all non-zero
|
||||
for code := range expected {
|
||||
if counts[code] == 0 {
|
||||
t.Fatalf("expected error code %v not encounterd: %s", code, string(p))
|
||||
t.Fatalf("expected error code %v not encounterd during %s: %s", code, msg, string(p))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -198,13 +198,6 @@ func (luh *layerUploadHandler) PutLayerUploadComplete(w http.ResponseWriter, r *
|
||||
layer, err := luh.Upload.Finish(dgst)
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
case storage.ErrLayerUploadUnavailable:
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
// TODO(stevvooe): Arguably, we may want to add an error code to
|
||||
// cover this condition. It is not always a client error but it
|
||||
// may be. For now, we effectively throw out the upload and have
|
||||
// them start over.
|
||||
luh.Errors.Push(v2.ErrorCodeBlobUploadInvalid, err.Err)
|
||||
case storage.ErrLayerInvalidDigest:
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
luh.Errors.Push(v2.ErrorCodeDigestInvalid, err)
|
||||
|
@ -2,14 +2,23 @@ package storage
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/docker/distribution/storagedriver"
|
||||
)
|
||||
|
||||
// TODO(stevvooe): Set an optimal buffer size here. We'll have to
|
||||
// understand the latency characteristics of the underlying network to
|
||||
// set this correctly, so we may want to leave it to the driver. For
|
||||
// out of process drivers, we'll have to optimize this buffer size for
|
||||
// local communication.
|
||||
const fileReaderBufferSize = 4 << 20
|
||||
|
||||
// remoteFileReader provides a read seeker interface to files stored in
|
||||
// storagedriver. Used to implement part of layer interface and will be used
|
||||
// to implement read side of LayerUpload.
|
||||
@ -28,24 +37,40 @@ type fileReader struct {
|
||||
err error // terminal error, if set, reader is closed
|
||||
}
|
||||
|
||||
// newFileReader initializes a file reader for the remote file. The read takes
|
||||
// on the offset and size at the time the reader is created. If the underlying
|
||||
// file changes, one must create a new fileReader.
|
||||
func newFileReader(driver storagedriver.StorageDriver, path string) (*fileReader, error) {
|
||||
rd := &fileReader{
|
||||
driver: driver,
|
||||
path: path,
|
||||
}
|
||||
|
||||
// Grab the size of the layer file, ensuring existence.
|
||||
fi, err := driver.Stat(path)
|
||||
if fi, err := driver.Stat(path); err != nil {
|
||||
switch err := err.(type) {
|
||||
case storagedriver.PathNotFoundError:
|
||||
// NOTE(stevvooe): We really don't care if the file is not
|
||||
// actually present for the reader. If the caller needs to know
|
||||
// whether or not the file exists, they should issue a stat call
|
||||
// on the path. There is still no guarantee, since the file may be
|
||||
// gone by the time the reader is created. The only correct
|
||||
// behavior is to return a reader that immediately returns EOF.
|
||||
default:
|
||||
// Any other error we want propagated up the stack.
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
if fi.IsDir() {
|
||||
return nil, fmt.Errorf("cannot read a directory")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// Fill in file information
|
||||
rd.size = fi.Size()
|
||||
rd.modtime = fi.ModTime()
|
||||
}
|
||||
|
||||
if fi.IsDir() {
|
||||
return nil, fmt.Errorf("cannot read a directory")
|
||||
}
|
||||
|
||||
return &fileReader{
|
||||
driver: driver,
|
||||
path: path,
|
||||
size: fi.Size(),
|
||||
modtime: fi.ModTime(),
|
||||
}, nil
|
||||
return rd, nil
|
||||
}
|
||||
|
||||
func (fr *fileReader) Read(p []byte) (n int, err error) {
|
||||
@ -88,8 +113,6 @@ func (fr *fileReader) Seek(offset int64, whence int) (int64, error) {
|
||||
|
||||
if newOffset < 0 {
|
||||
err = fmt.Errorf("cannot seek to negative position")
|
||||
} else if newOffset > fr.size {
|
||||
err = fmt.Errorf("cannot seek passed end of file")
|
||||
} else {
|
||||
if fr.offset != newOffset {
|
||||
fr.reset()
|
||||
@ -134,9 +157,17 @@ func (fr *fileReader) reader() (io.Reader, error) {
|
||||
|
||||
// If we don't have a reader, open one up.
|
||||
rc, err := fr.driver.ReadStream(fr.path, fr.offset)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
switch err := err.(type) {
|
||||
case storagedriver.PathNotFoundError:
|
||||
// NOTE(stevvooe): If the path is not found, we simply return a
|
||||
// reader that returns io.EOF. However, we do not set fr.rc,
|
||||
// allowing future attempts at getting a reader to possibly
|
||||
// succeed if the file turns up later.
|
||||
return ioutil.NopCloser(bytes.NewReader([]byte{})), nil
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
fr.rc = rc
|
||||
@ -147,7 +178,7 @@ func (fr *fileReader) reader() (io.Reader, error) {
|
||||
// set this correctly, so we may want to leave it to the driver. For
|
||||
// out of process drivers, we'll have to optimize this buffer size for
|
||||
// local communication.
|
||||
fr.brd = bufio.NewReader(fr.rc)
|
||||
fr.brd = bufio.NewReaderSize(fr.rc, fileReaderBufferSize)
|
||||
} else {
|
||||
fr.brd.Reset(fr.rc)
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ func TestFileReaderSeek(t *testing.T) {
|
||||
t.Fatalf("expected to seek to end: %v != %v", end, len(content))
|
||||
}
|
||||
|
||||
// 4. Seek past end and before start, ensure error.
|
||||
// 4. Seek before start, ensure error.
|
||||
|
||||
// seek before start
|
||||
before, err := fr.Seek(-1, os.SEEK_SET)
|
||||
@ -132,9 +132,44 @@ func TestFileReaderSeek(t *testing.T) {
|
||||
t.Fatalf("error expected, returned offset=%v", before)
|
||||
}
|
||||
|
||||
after, err := fr.Seek(int64(len(content)+1), os.SEEK_END)
|
||||
if err == nil {
|
||||
t.Fatalf("error expected, returned offset=%v", after)
|
||||
// 5. Seek after end,
|
||||
after, err := fr.Seek(1, os.SEEK_END)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error expected, returned offset=%v", after)
|
||||
}
|
||||
|
||||
p := make([]byte, 16)
|
||||
n, err := fr.Read(p)
|
||||
|
||||
if n != 0 {
|
||||
t.Fatalf("bytes reads %d != %d", n, 0)
|
||||
}
|
||||
|
||||
if err != io.EOF {
|
||||
t.Fatalf("expected io.EOF, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileReaderNonExistentFile ensures the reader behaves as expected with a
|
||||
// missing or zero-length remote file. While the file may not exist, the
|
||||
// reader should not error out on creation and should return 0-bytes from the
|
||||
// read method, with an io.EOF error.
|
||||
func TestFileReaderNonExistentFile(t *testing.T) {
|
||||
driver := inmemory.New()
|
||||
fr, err := newFileReader(driver, "/doesnotexist")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error initializing reader: %v", err)
|
||||
}
|
||||
|
||||
var buf [1024]byte
|
||||
|
||||
n, err := fr.Read(buf[:])
|
||||
if n != 0 {
|
||||
t.Fatalf("non-zero byte read reported: %d != 0", n)
|
||||
}
|
||||
|
||||
if err != io.EOF {
|
||||
t.Fatalf("read on missing file should return io.EOF, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,9 +99,6 @@ func (fw *fileWriter) Seek(offset int64, whence int) (int64, error) {
|
||||
|
||||
if newOffset < 0 {
|
||||
err = fmt.Errorf("cannot seek to negative position")
|
||||
} else if newOffset > fw.size {
|
||||
fw.offset = newOffset
|
||||
fw.size = newOffset
|
||||
} else {
|
||||
// No problems, set the offset.
|
||||
fw.offset = newOffset
|
||||
|
@ -80,28 +80,10 @@ func (err ErrUnknownLayer) Error() string {
|
||||
// ErrLayerInvalidDigest returned when tarsum check fails.
|
||||
type ErrLayerInvalidDigest struct {
|
||||
Digest digest.Digest
|
||||
Reason error
|
||||
}
|
||||
|
||||
func (err ErrLayerInvalidDigest) Error() string {
|
||||
return fmt.Sprintf("invalid digest for referenced layer: %v", err.Digest)
|
||||
}
|
||||
|
||||
// ErrLayerInvalidSize returned when length check fails.
|
||||
type ErrLayerInvalidSize struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
func (err ErrLayerInvalidSize) Error() string {
|
||||
return fmt.Sprintf("invalid layer size: %d", err.Size)
|
||||
}
|
||||
|
||||
// ErrLayerUploadUnavailable signals missing upload data, either when no data
|
||||
// has been received or when the backend reports the data as missing. This is
|
||||
// different from ErrLayerUploadUnknown.
|
||||
type ErrLayerUploadUnavailable struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func (err ErrLayerUploadUnavailable) Error() string {
|
||||
return fmt.Sprintf("layer upload unavialable: %v", err)
|
||||
return fmt.Sprintf("invalid digest for referenced layer: %v, %v",
|
||||
err.Digest, err.Reason)
|
||||
}
|
||||
|
@ -235,6 +235,40 @@ func TestSimpleLayerRead(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestLayerUploadZeroLength uploads zero-length
|
||||
func TestLayerUploadZeroLength(t *testing.T) {
|
||||
imageName := "foo/bar"
|
||||
driver := inmemory.New()
|
||||
registry := NewRegistryWithDriver(driver)
|
||||
ls := registry.Repository(imageName).Layers()
|
||||
|
||||
upload, err := ls.Upload()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error starting upload: %v", err)
|
||||
}
|
||||
|
||||
io.Copy(upload, bytes.NewReader([]byte{}))
|
||||
|
||||
dgst, err := digest.FromTarArchive(bytes.NewReader([]byte{}))
|
||||
if err != nil {
|
||||
t.Fatalf("error getting zero digest: %v", err)
|
||||
}
|
||||
|
||||
if dgst != digest.DigestTarSumV1EmptyTar {
|
||||
// sanity check on zero digest
|
||||
t.Fatalf("digest not as expected: %v != %v", dgst, digest.DigestTarSumV1EmptyTar)
|
||||
}
|
||||
|
||||
layer, err := upload.Finish(dgst)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error finishing upload: %v", err)
|
||||
}
|
||||
|
||||
if layer.Digest() != dgst {
|
||||
t.Fatalf("unexpected digest: %v != %v", layer.Digest(), dgst)
|
||||
}
|
||||
}
|
||||
|
||||
// writeRandomLayer creates a random layer under name and tarSum using driver
|
||||
// and pathMapper. An io.ReadSeeker with the data is returned, along with the
|
||||
// sha256 hex digest.
|
||||
|
@ -1,6 +1,7 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"time"
|
||||
@ -89,7 +90,10 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige
|
||||
case tarsum.Version1:
|
||||
default:
|
||||
// version 0 and dev, for now.
|
||||
return "", ErrLayerTarSumVersionUnsupported
|
||||
return "", ErrLayerInvalidDigest{
|
||||
Digest: dgst,
|
||||
Reason: ErrLayerTarSumVersionUnsupported,
|
||||
}
|
||||
}
|
||||
|
||||
digestVerifier := digest.NewDigestVerifier(dgst)
|
||||
@ -102,22 +106,7 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige
|
||||
// Read the file from the backend driver and validate it.
|
||||
fr, err := newFileReader(luc.fileWriter.driver, luc.path)
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
case storagedriver.PathNotFoundError:
|
||||
// NOTE(stevvooe): Path not found can mean several things by we
|
||||
// should report the upload is not available. This can happen if
|
||||
// the following happens:
|
||||
//
|
||||
// 1. If not data was received for the upload instance.
|
||||
// 2. Backend storage driver has not convereged after receiving latest data.
|
||||
//
|
||||
// This *does not* mean that the upload does not exist, since we
|
||||
// can't even get a LayerUpload object without having the
|
||||
// directory exist.
|
||||
return "", ErrLayerUploadUnavailable{Err: err}
|
||||
default:
|
||||
return "", err
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
|
||||
tr := io.TeeReader(fr, digestVerifier)
|
||||
@ -132,7 +121,10 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige
|
||||
}
|
||||
|
||||
if !digestVerifier.Verified() {
|
||||
return "", ErrLayerInvalidDigest{Digest: dgst}
|
||||
return "", ErrLayerInvalidDigest{
|
||||
Digest: dgst,
|
||||
Reason: fmt.Errorf("content does not match digest"),
|
||||
}
|
||||
}
|
||||
|
||||
return canonical, nil
|
||||
@ -151,7 +143,7 @@ func (luc *layerUploadController) moveLayer(dgst digest.Digest) error {
|
||||
}
|
||||
|
||||
// Check for existence
|
||||
if _, err := luc.layerStore.repository.registry.driver.Stat(blobPath); err != nil {
|
||||
if _, err := luc.driver.Stat(blobPath); err != nil {
|
||||
switch err := err.(type) {
|
||||
case storagedriver.PathNotFoundError:
|
||||
break // ensure that it doesn't exist.
|
||||
@ -166,6 +158,31 @@ func (luc *layerUploadController) moveLayer(dgst digest.Digest) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If no data was received, we may not actually have a file on disk. Check
|
||||
// the size here and write a zero-length file to blobPath if this is the
|
||||
// case. For the most part, this should only ever happen with zero-length
|
||||
// tars.
|
||||
if _, err := luc.driver.Stat(luc.path); err != nil {
|
||||
switch err := err.(type) {
|
||||
case storagedriver.PathNotFoundError:
|
||||
// HACK(stevvooe): This is slightly dangerous: if we verify above,
|
||||
// get a hash, then the underlying file is deleted, we risk moving
|
||||
// a zero-length blob into a nonzero-length blob location. To
|
||||
// prevent this horrid thing, we employ the hack of only allowing
|
||||
// to this happen for the zero tarsum.
|
||||
if dgst == digest.DigestTarSumV1EmptyTar {
|
||||
return luc.driver.PutContent(blobPath, []byte{})
|
||||
}
|
||||
|
||||
// We let this fail during the move below.
|
||||
logrus.
|
||||
WithField("upload.uuid", luc.UUID()).
|
||||
WithField("digest", dgst).Warnf("attempted to move zero-length content with non-zero digest")
|
||||
default:
|
||||
return err // unrelated error
|
||||
}
|
||||
}
|
||||
|
||||
return luc.driver.Move(luc.path, blobPath)
|
||||
}
|
||||
|
||||
|
@ -57,8 +57,6 @@ func (rs *revisionStore) get(revision digest.Digest) (*manifest.SignedManifest,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logrus.Infof("retrieved signatures: %v", string(signatures[0]))
|
||||
|
||||
jsig, err := libtrust.NewJSONSignature(content, signatures...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
Loading…
Reference in New Issue
Block a user