Clean up layer storage layout

Previously, discussions were still ongoing about different storage layouts that
could support various access models. This changeset removes a layer of
indirection that was in place due to earlier designs. Effectively, this both
associates a layer with a named repository and ensures that content cannot be
accessed across repositories. It also moves to rely on tarsum as a true
content-addressable identifier, removing a layer of indirection during blob
resolution.
This commit is contained in:
Stephen J Day 2014-11-24 16:21:02 -08:00
parent 756989c011
commit 68944ea9cf
6 changed files with 153 additions and 270 deletions

View File

@ -47,32 +47,9 @@ var (
// ParseDigest parses s and returns the validated digest object. An error will // ParseDigest parses s and returns the validated digest object. An error will
// be returned if the format is invalid. // be returned if the format is invalid.
func ParseDigest(s string) (Digest, error) { func ParseDigest(s string) (Digest, error) {
// Common case will be tarsum d := Digest(s)
_, err := common.ParseTarSum(s)
if err == nil {
return Digest(s), nil
}
// Continue on for general parser return d, d.Validate()
i := strings.Index(s, ":")
if i < 0 {
return "", ErrDigestInvalidFormat
}
// case: "sha256:" with no hex.
if i+1 == len(s) {
return "", ErrDigestInvalidFormat
}
switch s[:i] {
case "md5", "sha1", "sha256":
break
default:
return "", ErrDigestUnsupported
}
return Digest(s), nil
} }
// FromReader returns the most valid digest for the underlying content. // FromReader returns the most valid digest for the underlying content.
@ -119,6 +96,38 @@ func FromBytes(p []byte) (Digest, error) {
return FromReader(bytes.NewReader(p)) return FromReader(bytes.NewReader(p))
} }
// Validate checks that the contents of d is a valid digest, returning an
// error if not.
func (d Digest) Validate() error {
s := string(d)
// Common case will be tarsum
_, err := common.ParseTarSum(s)
if err == nil {
return nil
}
// Continue on for general parser
i := strings.Index(s, ":")
if i < 0 {
return ErrDigestInvalidFormat
}
// case: "sha256:" with no hex.
if i+1 == len(s) {
return ErrDigestInvalidFormat
}
switch s[:i] {
case "md5", "sha1", "sha256":
break
default:
return ErrDigestUnsupported
}
return nil
}
// Algorithm returns the algorithm portion of the digest. This will panic if // Algorithm returns the algorithm portion of the digest. This will panic if
// the underlying digest is not in a valid format. // the underlying digest is not in a valid format.
func (d Digest) Algorithm() string { func (d Digest) Algorithm() string {

View File

@ -304,18 +304,13 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
blobDigestSHA := digest.NewDigest("sha256", h) blobDigestSHA := digest.NewDigest("sha256", h)
blobPath, err := pathMapper.path(blobPathSpec{ blobPath, err := pathMapper.path(blobPathSpec{
alg: blobDigestSHA.Algorithm(), digest: dgst,
digest: blobDigestSHA.Hex(),
}) })
if err := driver.PutContent(blobPath, p); err != nil { if err := driver.PutContent(blobPath, p); err != nil {
return "", err return "", err
} }
layerIndexLinkPath, err := pathMapper.path(layerIndexLinkPathSpec{
digest: dgst,
})
if err != nil { if err != nil {
return "", err return "", err
} }
@ -329,11 +324,7 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
return "", err return "", err
} }
if err := driver.PutContent(layerLinkPath, []byte(blobDigestSHA.String())); err != nil { if err := driver.PutContent(layerLinkPath, []byte(dgst)); err != nil {
return "", nil
}
if err = driver.PutContent(layerIndexLinkPath, []byte(name)); err != nil {
return "", nil return "", nil
} }

View File

@ -1,11 +1,8 @@
package storage package storage
import ( import (
"fmt"
"strings"
"time" "time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker-registry/digest" "github.com/docker/docker-registry/digest"
"github.com/docker/docker-registry/storagedriver" "github.com/docker/docker-registry/storagedriver"
) )
@ -33,31 +30,17 @@ func (ls *layerStore) Exists(name string, digest digest.Digest) (bool, error) {
} }
func (ls *layerStore) Fetch(name string, digest digest.Digest) (Layer, error) { func (ls *layerStore) Fetch(name string, digest digest.Digest) (Layer, error) {
repos, err := ls.resolveContainingRepositories(digest) blobPath, err := ls.resolveBlobPath(name, digest)
if err != nil { if err != nil {
// TODO(stevvooe): Unknown tarsum error: need to wrap. switch err := err.(type) {
return nil, err case storagedriver.PathNotFoundError, *storagedriver.PathNotFoundError:
return nil, ErrLayerUnknown
default:
return nil, err
}
} }
// TODO(stevvooe): Access control for layer pulls need to happen here: we fr, err := newFileReader(ls.driver, blobPath)
// have a list of repos that "own" the tarsum that need to be checked
// against the list of repos to which we have pull access. The argument
// repos needs to be filtered against that access list.
_, blobPath, err := ls.resolveBlobPath(repos, digest)
if err != nil {
// TODO(stevvooe): Map this error correctly, perhaps in the callee.
return nil, err
}
p, err := ls.pathMapper.path(blobPath)
if err != nil {
return nil, err
}
fr, err := newFileReader(ls.driver, p)
if err != nil { if err != nil {
switch err := err.(type) { switch err := err.(type) {
case storagedriver.PathNotFoundError, *storagedriver.PathNotFoundError: case storagedriver.PathNotFoundError, *storagedriver.PathNotFoundError:
@ -117,69 +100,30 @@ func (ls *layerStore) newLayerUpload(lus LayerUploadState) LayerUpload {
} }
} }
func (ls *layerStore) resolveContainingRepositories(digest digest.Digest) ([]string, error) { // resolveBlobId looks up the blob location in the repositories from a
// Lookup the layer link in the index by tarsum id. // layer/blob link file, returning blob path or an error on failure.
layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{digest: digest}) func (ls *layerStore) resolveBlobPath(name string, dgst digest.Digest) (string, error) {
pathSpec := layerLinkPathSpec{name: name, digest: dgst}
layerLinkPath, err := ls.pathMapper.path(pathSpec)
if err != nil { if err != nil {
return nil, err return "", err
} }
layerIndexLinkContent, err := ls.driver.GetContent(layerIndexLinkPath) layerLinkContent, err := ls.driver.GetContent(layerLinkPath)
if err != nil { if err != nil {
switch err := err.(type) { return "", err
case storagedriver.PathNotFoundError:
return nil, ErrLayerUnknown
default:
return nil, err
}
} }
results := strings.Split(string(layerIndexLinkContent), "\n") // NOTE(stevvooe): The content of the layer link should match the digest.
// This layer of indirection is for name-based content protection.
// clean these up linked, err := digest.ParseDigest(string(layerLinkContent))
for i, result := range results { if err != nil {
results[i] = strings.TrimSpace(result) return "", err
} }
return results, nil bp := blobPathSpec{digest: linked}
}
return ls.pathMapper.path(bp)
// resolveBlobId lookups up the tarSum in the various repos to find the blob
// link, returning the repo name and blob path spec or an error on failure.
func (ls *layerStore) resolveBlobPath(repos []string, digest digest.Digest) (name string, bps blobPathSpec, err error) {
for _, repo := range repos {
pathSpec := layerLinkPathSpec{name: repo, digest: digest}
layerLinkPath, err := ls.pathMapper.path(pathSpec)
if err != nil {
// TODO(stevvooe): This looks very lazy, may want to collect these
// errors and report them if we exit this for loop without
// resolving the blob id.
logrus.Debugf("error building linkLayerPath (%V): %v", pathSpec, err)
continue
}
layerLinkContent, err := ls.driver.GetContent(layerLinkPath)
if err != nil {
logrus.Debugf("error getting layerLink content (%V): %v", pathSpec, err)
continue
}
// Yay! We've resolved our blob id and we're ready to go.
parts := strings.SplitN(strings.TrimSpace(string(layerLinkContent)), ":", 2)
if len(parts) != 2 {
return "", bps, fmt.Errorf("invalid blob reference: %q", string(layerLinkContent))
}
name = repo
bp := blobPathSpec{alg: parts[0], digest: parts[1]}
return repo, bp, nil
}
// TODO(stevvooe): Map this error to repo not found, but it basically
// means we exited the loop above without finding a blob link.
return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and digest=%v", repos, digest)
} }

View File

@ -6,8 +6,6 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"sort"
"strings"
"code.google.com/p/go-uuid/uuid" "code.google.com/p/go-uuid/uuid"
@ -285,10 +283,9 @@ func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst d
// writeLayer actually writes the the layer file into its final destination. // writeLayer actually writes the the layer file into its final destination.
// The layer should be validated before commencing the write. // The layer should be validated before commencing the write.
func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest digest.Digest) error { func (luc *layerUploadController) writeLayer(fp layerFile, size int64, dgst digest.Digest) error {
blobPath, err := luc.layerStore.pathMapper.path(blobPathSpec{ blobPath, err := luc.layerStore.pathMapper.path(blobPathSpec{
alg: digest.Algorithm(), digest: dgst,
digest: digest.Hex(),
}) })
if err != nil { if err != nil {
@ -324,8 +321,8 @@ func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest di
return nil return nil
} }
// linkLayer links a valid, written layer blog into the registry, first // linkLayer links a valid, written layer blob into the registry under the
// linking the repository namespace, then adding it to the layerindex. // named repository for the upload controller.
func (luc *layerUploadController) linkLayer(digest digest.Digest) error { func (luc *layerUploadController) linkLayer(digest digest.Digest) error {
layerLinkPath, err := luc.layerStore.pathMapper.path(layerLinkPathSpec{ layerLinkPath, err := luc.layerStore.pathMapper.path(layerLinkPathSpec{
name: luc.Name(), name: luc.Name(),
@ -336,56 +333,7 @@ func (luc *layerUploadController) linkLayer(digest digest.Digest) error {
return err return err
} }
if err := luc.layerStore.driver.PutContent(layerLinkPath, []byte(digest)); err != nil { return luc.layerStore.driver.PutContent(layerLinkPath, []byte(digest))
return nil
}
// Link the layer into the name index.
layerIndexLinkPath, err := luc.layerStore.pathMapper.path(layerIndexLinkPathSpec{
digest: digest,
})
if err != nil {
return err
}
// Read back the name index file. If it exists, create it. If not, add the
// new repo to the name list.
// TODO(stevvooe): This is very racy, as well. Reconsider using list for
// this operation?
layerIndexLinkContent, err := luc.layerStore.driver.GetContent(layerIndexLinkPath)
if err != nil {
switch err := err.(type) {
case storagedriver.PathNotFoundError:
layerIndexLinkContent = []byte(luc.Name())
default:
return err
}
}
layerIndexLinkContent = luc.maybeAddNameToLayerIndexLinkContent(layerIndexLinkContent)
// Write the index content back to the index.
return luc.layerStore.driver.PutContent(layerIndexLinkPath, layerIndexLinkContent)
}
func (luc *layerUploadController) maybeAddNameToLayerIndexLinkContent(content []byte) []byte {
names := strings.Split(string(content), "\n")
var found bool
// Search the names and find ours
for _, name := range names {
if name == luc.Name() {
found = true
}
}
if !found {
names = append(names, luc.Name())
}
sort.Strings(names)
return []byte(strings.Join(names, "\n"))
} }
// localFSLayerUploadStore implements a local layerUploadStore. There are some // localFSLayerUploadStore implements a local layerUploadStore. There are some

View File

@ -11,11 +11,6 @@ import (
const storagePathVersion = "v2" const storagePathVersion = "v2"
// TODO(sday): This needs to be changed: all layers for an image will be
// linked under the repository. Lookup from tarsum to name is not necessary,
// so we can remove the layer index. For this to properly work, image push
// must link the images layers under the repo.
// pathMapper maps paths based on "object names" and their ids. The "object // pathMapper maps paths based on "object names" and their ids. The "object
// names" mapped by pathMapper are internal to the storage system. // names" mapped by pathMapper are internal to the storage system.
// //
@ -27,31 +22,21 @@ const storagePathVersion = "v2"
// -> manifests/ // -> manifests/
// <manifests by tag name> // <manifests by tag name>
// -> layers/ // -> layers/
// -> tarsum/ // <layer links to blob store>
// -> <tarsum version>/ // -> blob/<algorithm>
// -> <tarsum hash alg>/ // <split directory content addressable storage>
// <layer links to blob store>
// -> layerindex/
// -> tarsum/
// -> <tarsum version>/
// -> <tarsum hash alg>/
// <repo name links>
// -> blob/sha256
// <split directory sha256 content addressable storage>
// //
// There are few important components to this path layout. First, we have the // There are few important components to this path layout. First, we have the
// repository store identified by name. This contains the image manifests and // repository store identified by name. This contains the image manifests and
// a layer store with links to CAS blob ids. Outside of the named repo area, // a layer store with links to CAS blob ids. Outside of the named repo area,
// we have the layerindex, which provides lookup from tarsum id to repo // we have the the blob store. It contains the actual layer data and any other
// storage. The blob store contains the actual layer data and any other data // data that can be referenced by a CAS id.
// that can be referenced by a CAS id.
// //
// We cover the path formats implemented by this path mapper below. // We cover the path formats implemented by this path mapper below.
// //
// manifestPathSpec: <root>/v2/repositories/<name>/manifests/<tag> // manifestPathSpec: <root>/v2/repositories/<name>/manifests/<tag>
// layerLinkPathSpec: <root>/v2/repositories/<name>/layers/tarsum/<tarsum version>/<tarsum hash alg>/<tarsum hash> // layerLinkPathSpec: <root>/v2/repositories/<name>/layers/tarsum/<tarsum version>/<tarsum hash alg>/<tarsum hash>
// layerIndexLinkPathSpec: <root>/v2/layerindex/tarsum/<tarsum version>/<tarsum hash alg>/<tarsum hash> // blobPathSpec: <root>/v2/blob/<algorithm>/<first two hex bytes of digest>/<hex digest>
// blobPathSpec: <root>/v2/blob/sha256/<first two hex bytes of digest>/<hex digest>
// //
// For more information on the semantic meaning of each path and their // For more information on the semantic meaning of each path and their
// contents, please see the path spec documentation. // contents, please see the path spec documentation.
@ -60,16 +45,6 @@ type pathMapper struct {
version string // should be a constant? version string // should be a constant?
} }
// TODO(stevvooe): This storage layout currently allows lookup to layer stores
// by repo name via the tarsum. The layer index lookup could come with an
// access control check against the link contents before proceeding. The main
// problem with this comes with a collision in the tarsum algorithm: if party
// A uploads a layer before party B, with an identical tarsum, party B may
// never be able to get access to the tarsum stored under party A. We'll need
// a way for party B to associate with a "unique" version of their image. This
// may be as simple as forcing the client to re-upload images to which they
// don't have access.
// path returns the path identified by spec. // path returns the path identified by spec.
func (pm *pathMapper) path(spec pathSpec) (string, error) { func (pm *pathMapper) path(spec pathSpec) (string, error) {
@ -93,44 +68,34 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) {
// TODO(sday): May need to store manifest by architecture. // TODO(sday): May need to store manifest by architecture.
return path.Join(append(repoPrefix, v.name, "manifests", v.tag)...), nil return path.Join(append(repoPrefix, v.name, "manifests", v.tag)...), nil
case layerLinkPathSpec: case layerLinkPathSpec:
if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") { components, err := digestPathComoponents(v.digest)
// Only tarsum is supported, for now
return "", fmt.Errorf("unsupport content digest: %v", v.digest)
}
tsi, err := common.ParseTarSum(v.digest.String())
if err != nil { if err != nil {
// TODO(sday): This will return an InvalidTarSumError from
// ParseTarSum but we may want to wrap this. This error should
// never be encountered in production, since the tarsum should be
// validated by this point.
return "", err return "", err
} }
return path.Join(append(append(repoPrefix, v.name, "layers"), // For now, only map tarsum paths.
tarSumInfoPathComponents(tsi)...)...), nil if components[0] != "tarsum" {
case layerIndexLinkPathSpec:
if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") {
// Only tarsum is supported, for now // Only tarsum is supported, for now
return "", fmt.Errorf("unsupport content digest: %v", v.digest) return "", fmt.Errorf("unsupported content digest: %v", v.digest)
} }
tsi, err := common.ParseTarSum(v.digest.String()) layerLinkPathComponents := append(repoPrefix, v.name, "layers")
if err != nil { return path.Join(append(layerLinkPathComponents, components...)...), nil
// TODO(sday): This will return an InvalidTarSumError from
// ParseTarSum but we may want to wrap this. This error should
// never be encountered in production, since the tarsum should be
// validated by this point.
return "", err
}
return path.Join(append(append(rootPrefix, "layerindex"),
tarSumInfoPathComponents(tsi)...)...), nil
case blobPathSpec: case blobPathSpec:
p := path.Join([]string{pm.root, pm.version, "blob", v.alg, v.digest[:2], v.digest}...) components, err := digestPathComoponents(v.digest)
return p, nil if err != nil {
return "", err
}
// For now, only map tarsum paths.
if components[0] != "tarsum" {
// Only tarsum is supported, for now
return "", fmt.Errorf("unsupported content digest: %v", v.digest)
}
blobPathPrefix := append(rootPrefix, "blob")
return path.Join(append(blobPathPrefix, components...)...), nil
default: default:
// TODO(sday): This is an internal error. Ensure it doesn't escape (panic?). // TODO(sday): This is an internal error. Ensure it doesn't escape (panic?).
return "", fmt.Errorf("unknown path spec: %#v", v) return "", fmt.Errorf("unknown path spec: %#v", v)
@ -172,40 +137,61 @@ type layerLinkPathSpec struct {
func (layerLinkPathSpec) pathSpec() {} func (layerLinkPathSpec) pathSpec() {}
// layerIndexLinkPath provides a path to a registry global layer store, // blobAlgorithmReplacer does some very simple path sanitization for user
// indexed by tarsum. The target file will contain the repo name of the // input. Mostly, this is to provide some heirachry for tarsum digests. Paths
// "owner" of the layer. An example name link file follows: // should be "safe" before getting this far due to strict digest requirements
// // but we can add further path conversion here, if needed.
// library/ubuntu var blobAlgorithmReplacer = strings.NewReplacer(
// foo/bar "+", "/",
// ".", "/",
// The above file has the tarsum stored under the foo/bar repository and the ";", "/",
// library/ubuntu repository. The storage layer should access the tarsum from )
// the first repository to which the client has access.
type layerIndexLinkPathSpec struct {
digest digest.Digest
}
func (layerIndexLinkPathSpec) pathSpec() {}
// blobPath contains the path for the registry global blob store. For now, // blobPath contains the path for the registry global blob store. For now,
// this contains layer data, exclusively. // this contains layer data, exclusively.
type blobPathSpec struct { type blobPathSpec struct {
// TODO(stevvooe): Port this to make better use of Digest type. digest digest.Digest
alg string
digest string
} }
func (blobPathSpec) pathSpec() {} func (blobPathSpec) pathSpec() {}
// tarSumInfoPath generates storage path components for the provided // digestPathComoponents provides a consistent path breakdown for a given
// TarSumInfo. // digest. For a generic digest, it will be as follows:
func tarSumInfoPathComponents(tsi common.TarSumInfo) []string { //
version := tsi.Version // <algorithm>/<first two bytes of digest>/<full digest>
//
if version == "" { // Most importantly, for tarsum, the layout looks like this:
version = "v0" //
// tarsum/<version>/<digest algorithm>/<first two bytes of digest>/<full digest>
//
// This is slightly specialized to store an extra version path for version 0
// tarsums.
func digestPathComoponents(dgst digest.Digest) ([]string, error) {
if err := dgst.Validate(); err != nil {
return nil, err
} }
return []string{"tarsum", version, tsi.Algorithm, tsi.Digest} algorithm := blobAlgorithmReplacer.Replace(dgst.Algorithm())
hex := dgst.Hex()
prefix := []string{algorithm}
suffix := []string{
hex[:2], // Breaks heirarchy up.
hex,
}
if tsi, err := common.ParseTarSum(dgst.String()); err == nil {
// We have a tarsum!
version := tsi.Version
if version == "" {
version = "v0"
}
prefix = []string{
"tarsum",
version,
tsi.Algorithm,
}
}
return append(prefix, suffix...), nil
} }

View File

@ -28,20 +28,25 @@ func TestPathMapper(t *testing.T) {
name: "foo/bar", name: "foo/bar",
digest: digest.Digest("tarsum.v1+test:abcdef"), digest: digest.Digest("tarsum.v1+test:abcdef"),
}, },
expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef", expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/ab/abcdef",
},
{
spec: layerIndexLinkPathSpec{
digest: digest.Digest("tarsum.v1+test:abcdef"),
},
expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef",
}, },
{ {
spec: blobPathSpec{ spec: blobPathSpec{
alg: "sha512", digest: digest.Digest("tarsum.dev+sha512:abcdefabcdefabcdef908909909"),
digest: "abcdefabcdefabcdef908909909",
}, },
expected: "/pathmapper-test/blob/sha512/ab/abcdefabcdefabcdef908909909", expected: "/pathmapper-test/blob/tarsum/dev/sha512/ab/abcdefabcdefabcdef908909909",
},
{
spec: blobPathSpec{
digest: digest.Digest("tarsum.v1+sha256:abcdefabcdefabcdef908909909"),
},
expected: "/pathmapper-test/blob/tarsum/v1/sha256/ab/abcdefabcdefabcdef908909909",
},
{
spec: blobPathSpec{
digest: digest.Digest("tarsum+sha256:abcdefabcdefabcdef908909909"),
},
expected: "/pathmapper-test/blob/tarsum/v0/sha256/ab/abcdefabcdefabcdef908909909",
}, },
} { } {
p, err := pm.path(testcase.spec) p, err := pm.path(testcase.spec)