Skip to content

Commit

Permalink
packfile: improve Index memory representation to be more compact
Browse files Browse the repository at this point in the history
Instead of using a map for offset indexing, use a sorted slice.
Binary searching is fast, and a slice is much more compact.
This has a negligible hit on speed, but has a significant impact on
memory usage, especially for larger repos.

benchmark                         old ns/op     new ns/op     delta
BenchmarkIndexConstruction-12     15506506      14056098      -9.35%

benchmark                         old allocs     new allocs     delta
BenchmarkIndexConstruction-12     60764          60385          -0.62%

benchmark                         old bytes     new bytes     delta
BenchmarkIndexConstruction-12     4318145       3913169       -9.38%

Signed-off-by: David Symonds <dsymonds@golang.org>
  • Loading branch information
dsymonds committed May 30, 2018
1 parent 57570e8 commit cf532f9
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 23 deletions.
53 changes: 43 additions & 10 deletions plumbing/format/packfile/index.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package packfile

import (
"sort"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
)
Expand All @@ -10,7 +12,7 @@ import (
// or to store them.
type Index struct {
byHash map[plumbing.Hash]*idxfile.Entry
byOffset map[uint64]*idxfile.Entry
byOffset []*idxfile.Entry // sorted by their offset
}

// NewIndex creates a new empty index with the given size. Size is a hint and
Expand All @@ -19,36 +21,62 @@ type Index struct {
func NewIndex(size int) *Index {
return &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, size),
byOffset: make(map[uint64]*idxfile.Entry, size),
byOffset: make([]*idxfile.Entry, 0, size),
}
}

// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile.
func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
idx := &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount),
byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
}
for _, e := range idxf.Entries {
idx.add(e)
idx.addUnsorted(e)
}
sort.Sort(orderByOffset(idx.byOffset))

return idx
}

// orderByOffset is a sort.Interface adapter that arranges
// a slice of entries by their offset.
type orderByOffset []*idxfile.Entry

func (o orderByOffset) Len() int { return len(o) }
func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] }

// Add adds a new Entry with the given values to the index.
func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
e := idxfile.Entry{
e := &idxfile.Entry{
Hash: h,
Offset: offset,
CRC32: crc32,
}
idx.add(&e)
idx.byHash[e.Hash] = e

// Find the right position in byOffset.
// Look for the first position whose offset is *greater* than e.Offset.
i := sort.Search(len(idx.byOffset), func(i int) bool {
return idx.byOffset[i].Offset > offset
})
if i == len(idx.byOffset) {
// Simple case: add it to the end.
idx.byOffset = append(idx.byOffset, e)
return
}
// Harder case: shift existing entries down by one to make room.
// Append a nil entry first so we can use existing capacity in case
// the index was carefully preallocated.
idx.byOffset = append(idx.byOffset, nil)
copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
idx.byOffset[i] = e
}

func (idx *Index) add(e *idxfile.Entry) {
func (idx *Index) addUnsorted(e *idxfile.Entry) {
idx.byHash[e.Hash] = e
idx.byOffset[e.Offset] = e
idx.byOffset = append(idx.byOffset, e)
}

// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
Expand All @@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
// is returned and a bool, which is true if it was found or false if it wasn't.
func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
e, ok := idx.byOffset[offset]
return e, ok
i := sort.Search(len(idx.byOffset), func(i int) bool {
return idx.byOffset[i].Offset >= offset
})
if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
return nil, false // not present
}
return idx.byOffset[i], true
}

// Size returns the number of entries in the index.
Expand Down
37 changes: 24 additions & 13 deletions plumbing/format/packfile/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package packfile
import (
"strconv"
"strings"
"testing"

"gopkg.in/src-d/go-git.v4/plumbing"

Expand All @@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}

h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)

for o2 := 0; o2 < 10000; o2 += 100 {
Expand All @@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
Expand All @@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) {
for o1 := 0; o1 < 10000; o1 += 100 {
for o2 := 0; o2 < 10000; o2 += 100 {
if o2 >= o1 {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}

h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)

for o2 := 0; o2 < 10000; o2 += 100 {
if o2 > o1 {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
Expand All @@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) {

for o1 := 0; o1 < 1000; o1++ {
c.Assert(idx.Size(), Equals, o1)
h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}
}
Expand All @@ -107,16 +108,26 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) {
func (s *IndexSuite) TestIdxFile(c *C) {
idx := NewIndex(0)
for o1 := 0; o1 < 1000; o1++ {
h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}

idx2 := NewIndexFromIdxFile(idx.ToIdxFile())
c.Assert(idx, DeepEquals, idx2)
}

func (s *IndexSuite) toHash(i int) plumbing.Hash {
func toHash(i int) plumbing.Hash {
is := strconv.Itoa(i)
padding := strings.Repeat("a", 40-len(is))
return plumbing.NewHash(padding + is)
}

func BenchmarkIndexConstruction(b *testing.B) {
b.ReportAllocs()

idx := NewIndex(0)
for o := 0; o < 1e6*b.N; o += 100 {
h1 := toHash(o)
idx.Add(h1, uint64(o), 0)
}
}

0 comments on commit cf532f9

Please sign in to comment.