Merge pull request #260 from dmcgowan/short-digests

Add short digest implementation
This commit is contained in:
Stephen Day 2015-04-29 14:05:41 -07:00
commit 5556cd1ba1
2 changed files with 467 additions and 0 deletions

195
digest/set.go Normal file
View File

@ -0,0 +1,195 @@
package digest
import (
"errors"
"sort"
"strings"
)
var (
// ErrDigestNotFound is used when a matching digest
// could not be found in a set.
ErrDigestNotFound = errors.New("digest not found")
// ErrDigestAmbiguous is used when multiple digests
// are found in a set. None of the matching digests
// should be considered valid matches.
ErrDigestAmbiguous = errors.New("ambiguous digest string")
)
// Set is used to hold a unique set of digests which
// may be easily referenced by easily referenced by a string
// representation of the digest as well as short representation.
// The uniqueness of the short representation is based on other
// digests in the set. If digests are ommited from this set,
// collisions in a larger set may not be detected, therefore it
// is important to always do short representation lookups on
// the complete set of digests. To mitigate collisions, an
// appropriately long short code should be used.
type Set struct {
entries digestEntries
}
// NewSet creates an empty set of digests
// which may have digests added.
func NewSet() *Set {
return &Set{
entries: digestEntries{},
}
}
// checkShortMatch checks whether two digests match as either whole
// values or short values. This function does not test equality,
// rather whether the second value could match against the first
// value.
func checkShortMatch(alg, hex, shortAlg, shortHex string) bool {
if len(hex) == len(shortHex) {
if hex != shortHex {
return false
}
if len(shortAlg) > 0 && alg != shortAlg {
return false
}
} else if !strings.HasPrefix(hex, shortHex) {
return false
} else if len(shortAlg) > 0 && alg != shortAlg {
return false
}
return true
}
// Lookup looks for a digest matching the given string representation.
// If no digests could be found ErrDigestNotFound will be returned
// with an empty digest value. If multiple matches are found
// ErrDigestAmbiguous will be returned with an empty digest value.
func (dst *Set) Lookup(d string) (Digest, error) {
if len(dst.entries) == 0 {
return "", ErrDigestNotFound
}
var (
searchFunc func(int) bool
alg string
hex string
)
dgst, err := ParseDigest(d)
if err == ErrDigestInvalidFormat {
hex = d
searchFunc = func(i int) bool {
return dst.entries[i].val >= d
}
} else {
hex = dgst.Hex()
alg = dgst.Algorithm()
searchFunc = func(i int) bool {
if dst.entries[i].val == hex {
return dst.entries[i].alg >= alg
}
return dst.entries[i].val >= hex
}
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) {
return "", ErrDigestNotFound
}
if dst.entries[idx].alg == alg && dst.entries[idx].val == hex {
return dst.entries[idx].digest, nil
}
if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) {
return "", ErrDigestAmbiguous
}
return dst.entries[idx].digest, nil
}
// Add adds the given digests to the set. An error will be returned
// if the given digest is invalid. If the digest already exists in the
// table, this operation will be a no-op.
func (dst *Set) Add(d Digest) error {
if err := d.Validate(); err != nil {
return err
}
entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d}
searchFunc := func(i int) bool {
if dst.entries[i].val == entry.val {
return dst.entries[i].alg >= entry.alg
}
return dst.entries[i].val >= entry.val
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) {
dst.entries = append(dst.entries, entry)
return nil
} else if dst.entries[idx].digest == d {
return nil
}
entries := append(dst.entries, nil)
copy(entries[idx+1:], entries[idx:len(entries)-1])
entries[idx] = entry
dst.entries = entries
return nil
}
// ShortCodeTable returns a map of Digest to unique short codes. The
// length represents the minimum value, the maximum length may be the
// entire value of digest if uniqueness cannot be achieved without the
// full value. This function will attempt to make short codes as short
// as possible to be unique.
func ShortCodeTable(dst *Set, length int) map[Digest]string {
m := make(map[Digest]string, len(dst.entries))
l := length
resetIdx := 0
for i := 0; i < len(dst.entries); i++ {
var short string
extended := true
for extended {
extended = false
if len(dst.entries[i].val) <= l {
short = dst.entries[i].digest.String()
} else {
short = dst.entries[i].val[:l]
for j := i + 1; j < len(dst.entries); j++ {
if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) {
if j > resetIdx {
resetIdx = j
}
extended = true
} else {
break
}
}
if extended {
l++
}
}
}
m[dst.entries[i].digest] = short
if i >= resetIdx {
l = length
}
}
return m
}
type digestEntry struct {
alg string
val string
digest Digest
}
type digestEntries []*digestEntry
func (d digestEntries) Len() int {
return len(d)
}
func (d digestEntries) Less(i, j int) bool {
if d[i].val != d[j].val {
return d[i].val < d[j].val
}
return d[i].alg < d[j].alg
}
func (d digestEntries) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}

272
digest/set_test.go Normal file
View File

@ -0,0 +1,272 @@
package digest
import (
"crypto/sha256"
"encoding/binary"
"math/rand"
"testing"
)
func assertEqualDigests(t *testing.T, d1, d2 Digest) {
if d1 != d2 {
t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2)
}
}
func TestLookup(t *testing.T) {
digests := []Digest{
"sha256:12345",
"sha256:1234",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha256:64321",
"sha256:65421",
"sha256:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
dgst, err := dset.Lookup("54")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[3])
dgst, err = dset.Lookup("1234")
if err == nil {
t.Fatal("Expected ambiguous error looking up: 1234")
}
if err != ErrDigestAmbiguous {
t.Fatal(err)
}
dgst, err = dset.Lookup("9876")
if err == nil {
t.Fatal("Expected ambiguous error looking up: 9876")
}
if err != ErrDigestNotFound {
t.Fatal(err)
}
dgst, err = dset.Lookup("sha256:1234")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[1])
dgst, err = dset.Lookup("sha256:12345")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[0])
dgst, err = dset.Lookup("sha256:12346")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[2])
dgst, err = dset.Lookup("12346")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[2])
dgst, err = dset.Lookup("12345")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[0])
}
func TestAddDuplication(t *testing.T) {
digests := []Digest{
"sha256:1234",
"sha256:12345",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha512:65431",
"sha512:65421",
"sha512:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
if len(dset.entries) != 8 {
t.Fatal("Invalid dset size")
}
if err := dset.Add(Digest("sha256:12345")); err != nil {
t.Fatal(err)
}
if len(dset.entries) != 8 {
t.Fatal("Duplicate digest insert allowed")
}
if err := dset.Add(Digest("sha384:12345")); err != nil {
t.Fatal(err)
}
if len(dset.entries) != 9 {
t.Fatal("Insert with different algorithm not allowed")
}
}
func assertEqualShort(t *testing.T, actual, expected string) {
if actual != expected {
t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual)
}
}
func TestShortCodeTable(t *testing.T) {
digests := []Digest{
"sha256:1234",
"sha256:12345",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha256:64321",
"sha256:65421",
"sha256:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
dump := ShortCodeTable(dset, 2)
if len(dump) < len(digests) {
t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests))
}
assertEqualShort(t, dump[digests[0]], "sha256:1234")
assertEqualShort(t, dump[digests[1]], "sha256:12345")
assertEqualShort(t, dump[digests[2]], "sha256:12346")
assertEqualShort(t, dump[digests[3]], "54")
assertEqualShort(t, dump[digests[4]], "6543")
assertEqualShort(t, dump[digests[5]], "64")
assertEqualShort(t, dump[digests[6]], "6542")
assertEqualShort(t, dump[digests[7]], "653")
}
func createDigests(count int) ([]Digest, error) {
r := rand.New(rand.NewSource(25823))
digests := make([]Digest, count)
for i := range digests {
h := sha256.New()
if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil {
return nil, err
}
digests[i] = NewDigest("sha256", h)
}
return digests, nil
}
func benchAddNTable(b *testing.B, n int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for j := range digests {
if err = dset.Add(digests[j]); err != nil {
b.Fatal(err)
}
}
}
}
func benchLookupNTable(b *testing.B, n int, shortLen int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
b.Fatal(err)
}
}
shorts := make([]string, 0, n)
for _, short := range ShortCodeTable(dset, shortLen) {
shorts = append(shorts, short)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err = dset.Lookup(shorts[i%n]); err != nil {
b.Fatal(err)
}
}
}
func benchShortCodeNTable(b *testing.B, n int, shortLen int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
ShortCodeTable(dset, shortLen)
}
}
func BenchmarkAdd10(b *testing.B) {
benchAddNTable(b, 10)
}
func BenchmarkAdd100(b *testing.B) {
benchAddNTable(b, 100)
}
func BenchmarkAdd1000(b *testing.B) {
benchAddNTable(b, 1000)
}
func BenchmarkLookup10(b *testing.B) {
benchLookupNTable(b, 10, 12)
}
func BenchmarkLookup100(b *testing.B) {
benchLookupNTable(b, 100, 12)
}
func BenchmarkLookup1000(b *testing.B) {
benchLookupNTable(b, 1000, 12)
}
func BenchmarkShortCode10(b *testing.B) {
benchShortCodeNTable(b, 10, 12)
}
func BenchmarkShortCode100(b *testing.B) {
benchShortCodeNTable(b, 100, 12)
}
func BenchmarkShortCode1000(b *testing.B) {
benchShortCodeNTable(b, 1000, 12)
}