Use the New Driver Walk method for catalog enumeration
This changes the Walk Method used for catalog enumeration. Just to show how much an effect this has on our s3 storage: Original: List calls: 6839 real 3m16.636s user 0m0.000s sys 0m0.016s New: ListObjectsV2 Calls: 1805 real 0m49.970s user 0m0.008s sys 0m0.000s This is because it no longer performs a list and stat per item, and instead is able to use the metadata gained from the list as a replacement to stat. Signed-off-by: Sargun Dhillon <sargun@sargun.me>
This commit is contained in:
@@ -34,6 +34,7 @@ import (
|
||||
"github.com/aws/aws-sdk-go/aws/session"
|
||||
"github.com/aws/aws-sdk-go/service/s3"
|
||||
|
||||
dcontext "github.com/docker/distribution/context"
|
||||
"github.com/docker/distribution/registry/client/transport"
|
||||
storagedriver "github.com/docker/distribution/registry/storage/driver"
|
||||
"github.com/docker/distribution/registry/storage/driver/base"
|
||||
@@ -876,8 +877,132 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int
|
||||
|
||||
// Walk traverses a filesystem defined within driver, starting
|
||||
// from the given path, calling f on each file
|
||||
func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error {
|
||||
return storagedriver.WalkFallback(ctx, d, path, f)
|
||||
func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error {
|
||||
path := from
|
||||
if !strings.HasSuffix(path, "/") {
|
||||
path = path + "/"
|
||||
}
|
||||
|
||||
prefix := ""
|
||||
if d.s3Path("") == "" {
|
||||
prefix = "/"
|
||||
}
|
||||
|
||||
var objectCount int64
|
||||
if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// S3 doesn't have the concept of empty directories, so it'll return path not found if there are no objects
|
||||
if objectCount == 0 {
|
||||
return storagedriver.PathNotFoundError{Path: from}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type walkInfoContainer struct {
|
||||
storagedriver.FileInfoFields
|
||||
prefix *string
|
||||
}
|
||||
|
||||
// Path provides the full path of the target of this file info.
|
||||
func (wi walkInfoContainer) Path() string {
|
||||
return wi.FileInfoFields.Path
|
||||
}
|
||||
|
||||
// Size returns current length in bytes of the file. The return value can
|
||||
// be used to write to the end of the file at path. The value is
|
||||
// meaningless if IsDir returns true.
|
||||
func (wi walkInfoContainer) Size() int64 {
|
||||
return wi.FileInfoFields.Size
|
||||
}
|
||||
|
||||
// ModTime returns the modification time for the file. For backends that
|
||||
// don't have a modification time, the creation time should be returned.
|
||||
func (wi walkInfoContainer) ModTime() time.Time {
|
||||
return wi.FileInfoFields.ModTime
|
||||
}
|
||||
|
||||
// IsDir returns true if the path is a directory.
|
||||
func (wi walkInfoContainer) IsDir() bool {
|
||||
return wi.FileInfoFields.IsDir
|
||||
}
|
||||
|
||||
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error {
|
||||
var retError error
|
||||
|
||||
listObjectsInput := &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Prefix: aws.String(path),
|
||||
Delimiter: aws.String("/"),
|
||||
MaxKeys: aws.Int64(listMax),
|
||||
}
|
||||
|
||||
ctx, done := dcontext.WithTrace(parentCtx)
|
||||
defer done("s3aws.ListObjectsV2Pages(%s)", path)
|
||||
listObjectErr := d.S3.ListObjectsV2PagesWithContext(ctx, listObjectsInput, func(objects *s3.ListObjectsV2Output, lastPage bool) bool {
|
||||
|
||||
*objectCount += *objects.KeyCount
|
||||
walkInfos := make([]walkInfoContainer, 0, *objects.KeyCount)
|
||||
|
||||
for _, dir := range objects.CommonPrefixes {
|
||||
commonPrefix := *dir.Prefix
|
||||
walkInfos = append(walkInfos, walkInfoContainer{
|
||||
prefix: dir.Prefix,
|
||||
FileInfoFields: storagedriver.FileInfoFields{
|
||||
IsDir: true,
|
||||
Path: strings.Replace(commonPrefix[:len(commonPrefix)-1], d.s3Path(""), prefix, 1),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
for _, file := range objects.Contents {
|
||||
walkInfos = append(walkInfos, walkInfoContainer{
|
||||
FileInfoFields: storagedriver.FileInfoFields{
|
||||
IsDir: false,
|
||||
Size: *file.Size,
|
||||
ModTime: *file.LastModified,
|
||||
Path: strings.Replace(*file.Key, d.s3Path(""), prefix, 1),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
sort.SliceStable(walkInfos, func(i, j int) bool { return walkInfos[i].FileInfoFields.Path < walkInfos[j].FileInfoFields.Path })
|
||||
|
||||
for _, walkInfo := range walkInfos {
|
||||
err := f(walkInfo)
|
||||
|
||||
if err == storagedriver.ErrSkipDir {
|
||||
if walkInfo.IsDir() {
|
||||
continue
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else if err != nil {
|
||||
retError = err
|
||||
return false
|
||||
}
|
||||
|
||||
if walkInfo.IsDir() {
|
||||
if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, f); err != nil {
|
||||
retError = err
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if retError != nil {
|
||||
return retError
|
||||
}
|
||||
|
||||
if listObjectErr != nil {
|
||||
return listObjectErr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) s3Path(path string) string {
|
||||
|
Reference in New Issue
Block a user