Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

Feature/new packfile parser #898

Merged
merged 10 commits into from
Jul 26, 2018
55 changes: 53 additions & 2 deletions plumbing/format/idxfile/idxfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type Index interface {
FindOffset(h plumbing.Hash) (int64, error)
// FindCRC32 finds the CRC32 of the object with the given hash.
FindCRC32(h plumbing.Hash) (uint32, error)
// FindHash finds the hash for the object with the given offset.
FindHash(o int64) (plumbing.Hash, error)
// Count returns the number of entries in the index.
Count() (int64, error)
// Entries returns an iterator to retrieve all index entries.
Expand All @@ -48,6 +50,8 @@ type MemoryIndex struct {
Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte

offsetHash map[int64]plumbing.Hash
}

var _ Index = (*MemoryIndex)(nil)
Expand All @@ -72,7 +76,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int {
low := uint64(0)
for {
mid := (low + high) >> 1
offset := mid + (mid << 2)
offset := mid * objectIDLength

cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
if cmp < 0 {
Expand All @@ -83,7 +87,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int {
low = mid + 1
}

if low < high {
if low > high {
break
}
}
Expand Down Expand Up @@ -149,6 +153,53 @@ func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) {
return binary.ReadUint32(buf)
}

// FindHash implements the Index interface.
func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
// Lazily generate the reverse offset/hash map if required.
if idx.offsetHash == nil {
err := idx.genOffsetHash()
if err != nil {
return plumbing.ZeroHash, nil
}
}

hash, ok := idx.offsetHash[o]
if !ok {
return plumbing.ZeroHash, plumbing.ErrObjectNotFound
}

return hash, nil
}

// genOffsetHash generates the offset/hash mapping for reverse search.
func (idx *MemoryIndex) genOffsetHash() error {
count, err := idx.Count()
if err != nil {
return err
}

idx.offsetHash = make(map[int64]plumbing.Hash, count)

iter, err := idx.Entries()
if err != nil {
return err
}

var entry *Entry
for err != nil {
entry, err = iter.Next()
if err == nil {
idx.offsetHash[int64(entry.Offset)] = entry.Hash
}
}

if err == io.EOF {
return nil
}

return err
}

// Count implements the Index interface.
func (idx *MemoryIndex) Count() (int64, error) {
return int64(idx.Fanout[fanout-1]), nil
Expand Down
177 changes: 177 additions & 0 deletions plumbing/format/idxfile/writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package idxfile

import (
"bytes"
"fmt"
"math"
"sort"
"sync"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)

// objects implements sort.Interface and uses hash as sorting key.
type objects []Entry

// Writer implements a packfile Observer interface and is used to generate
// indexes.
type Writer struct {
m sync.Mutex

count uint32
checksum plumbing.Hash
objects objects
offset64 uint32
finished bool
index *MemoryIndex
}

// Index returns a previously created MemoryIndex or creates a new one if
// needed.
func (w *Writer) Index() (*MemoryIndex, error) {
w.m.Lock()
defer w.m.Unlock()

if w.index == nil {
return w.createIndex()
}

return w.index, nil
}

// Add appends new object data.
func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) {
w.m.Lock()
defer w.m.Unlock()

w.objects = append(w.objects, Entry{h, crc, pos})
}

func (w *Writer) Finished() bool {
return w.finished
}

// OnHeader implements packfile.Observer interface.
func (w *Writer) OnHeader(count uint32) error {
w.count = count
w.objects = make(objects, 0, count)
return nil
}

// OnInflatedObjectHeader implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error {
return nil
}

// OnInflatedObjectContent implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error {
w.Add(h, uint64(pos), crc)
return nil
}

// OnFooter implements packfile.Observer interface.
func (w *Writer) OnFooter(h plumbing.Hash) error {
w.checksum = h
w.finished = true
_, err := w.createIndex()
if err != nil {
return err
}

return nil
}

// creatIndex returns a filled MemoryIndex with the information filled by
// the observer callbacks.
func (w *Writer) createIndex() (*MemoryIndex, error) {
if !w.finished {
return nil, fmt.Errorf("the index still hasn't finished building")
}

idx := new(MemoryIndex)
w.index = idx

sort.Sort(w.objects)

// unmap all fans by default
for i := range idx.FanoutMapping {
idx.FanoutMapping[i] = noMapping
}

buf := new(bytes.Buffer)

last := -1
bucket := -1
for i, o := range w.objects {
fan := o.Hash[0]

// fill the gaps between fans
for j := last + 1; j < int(fan); j++ {
idx.Fanout[j] = uint32(i)
}

// update the number of objects for this position
idx.Fanout[fan] = uint32(i + 1)

// we move from one bucket to another, update counters and allocate
// memory
if last != int(fan) {
bucket++
idx.FanoutMapping[fan] = bucket
last = int(fan)

idx.Names = append(idx.Names, make([]byte, 0))
idx.Offset32 = append(idx.Offset32, make([]byte, 0))
idx.Crc32 = append(idx.Crc32, make([]byte, 0))
}

idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...)

offset := o.Offset
if offset > math.MaxInt32 {
offset = w.addOffset64(offset)
}

buf.Truncate(0)
binary.WriteUint32(buf, uint32(offset))
idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...)

buf.Truncate(0)
binary.WriteUint32(buf, uint32(o.CRC32))
idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...)
}

for j := last + 1; j < 256; j++ {
idx.Fanout[j] = uint32(len(w.objects))
}

idx.Version = VersionSupported
idx.PackfileChecksum = w.checksum

return idx, nil
}

func (w *Writer) addOffset64(pos uint64) uint64 {
buf := new(bytes.Buffer)
binary.WriteUint64(buf, pos)
w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...)

index := uint64(w.offset64 | (1 << 31))
w.offset64++

return index
}

func (o objects) Len() int {
return len(o)
}

func (o objects) Less(i int, j int) bool {
cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:])
return cmp < 0
}

func (o objects) Swap(i int, j int) {
o[i], o[j] = o[j], o[i]
}
45 changes: 45 additions & 0 deletions plumbing/format/idxfile/writer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package idxfile_test

import (
"bytes"
"io/ioutil"

"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
)

type IndexSuite struct {
fixtures.Suite
}

var _ = Suite(&IndexSuite{})

func (s *IndexSuite) TestIndexWriter(c *C) {
f := fixtures.Basic().One()
scanner := packfile.NewScanner(f.Packfile())

obs := new(idxfile.Writer)
parser := packfile.NewParser(scanner, obs)

_, err := parser.Parse()
c.Assert(err, IsNil)

idx, err := obs.Index()
c.Assert(err, IsNil)

idxFile := f.Idx()
expected, err := ioutil.ReadAll(idxFile)
c.Assert(err, IsNil)
idxFile.Close()

buf := new(bytes.Buffer)
encoder := idxfile.NewEncoder(buf)
n, err := encoder.Encode(idx)
c.Assert(err, IsNil)
c.Assert(n, Equals, len(expected))

c.Assert(buf.Bytes(), DeepEquals, expected)
}
18 changes: 10 additions & 8 deletions plumbing/format/packfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,13 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
return 0, err
}

e, ok := d.idx.LookupOffset(uint64(offset))
var base plumbing.EncodedObject
if ok {
base, ok = d.cacheGet(e.Hash)
}
// e, ok := d.idx.LookupOffset(uint64(offset))
// if ok {
// base, ok = d.cacheGet(e.Hash)
// }

var base plumbing.EncodedObject
ok := false
if !ok {
base, err = d.recallByOffset(offset)
if err != nil {
Expand Down Expand Up @@ -446,11 +447,12 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
return d.DecodeObjectAt(o)
}

if e, ok := d.idx.LookupOffset(uint64(o)); ok {
return d.recallByHashNonSeekable(e.Hash)
hash, err := d.idx.FindHash(o)
if err != nil {
return nil, err
}

return nil, plumbing.ErrObjectNotFound
return d.recallByHashNonSeekable(hash)
}

func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) {
Expand Down
Loading