Skip to content

Commit

Permalink
packfile: slightly haster hash function for chunk-offset index key
Browse files Browse the repository at this point in the history
Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
  • Loading branch information
erizocosmico committed Sep 6, 2017
1 parent 0b68b0f commit b953011
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
4 changes: 2 additions & 2 deletions plumbing/format/packfile/delta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (s *DeltaSuite) TestAddDelta(c *C) {
for _, t := range s.testCases {
baseBuf := genBytes(t.base)
targetBuf := genBytes(t.target)
delta := DiffDelta(make(deltaIndex), baseBuf, targetBuf)
delta := DiffDelta(baseBuf, targetBuf)
result, err := PatchDelta(baseBuf, delta)

c.Log("Executing test case:", t.description)
Expand All @@ -98,7 +98,7 @@ func (s *DeltaSuite) TestIncompleteDelta(c *C) {
c.Log("Incomplete delta on:", t.description)
baseBuf := genBytes(t.base)
targetBuf := genBytes(t.target)
delta := DiffDelta(make(deltaIndex), baseBuf, targetBuf)
delta := DiffDelta(baseBuf, targetBuf)
delta = delta[:len(delta)-2]
result, err := PatchDelta(baseBuf, delta)
c.Assert(err, NotNil)
Expand Down
39 changes: 30 additions & 9 deletions plumbing/format/packfile/diff_delta.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package packfile

import (
"bytes"
"hash/adler32"
"io/ioutil"

"gopkg.in/src-d/go-git.v4/plumbing"
Expand Down Expand Up @@ -49,7 +48,7 @@ func getDelta(index deltaIndex, base, target plumbing.EncodedObject) (plumbing.E
return nil, err
}

db := DiffDelta(index, bb, tb)
db := diffDelta(index, bb, tb)
delta := &plumbing.MemoryObject{}
_, err = delta.Write(db)
if err != nil {
Expand All @@ -63,7 +62,11 @@ func getDelta(index deltaIndex, base, target plumbing.EncodedObject) (plumbing.E
}

// DiffDelta returns the delta that transforms src into tgt.
func DiffDelta(sindex deltaIndex, src []byte, tgt []byte) []byte {
func DiffDelta(src, tgt []byte) []byte {
return diffDelta(make(deltaIndex), src, tgt)
}

func diffDelta(sindex deltaIndex, src []byte, tgt []byte) []byte {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
buf.Write(deltaEncodeSize(len(src)))
Expand Down Expand Up @@ -132,24 +135,42 @@ func encodeInsertOperation(ibuf, buf *bytes.Buffer) {
ibuf.Reset()
}

func initMatch(index map[uint32]int, src []byte) map[uint32]int {
func initMatch(index deltaIndex, src []byte) {
i := 0
for {
if i+s > len(src) {
break
}

ch := adler32.Checksum(src[i : i+s])
ch := hashBuf(src[i : i+s])
index[ch] = i
i += s
}
}

// https://lemire.me/blog/2015/10/22/faster-hashing-without-effort/
func hashBuf(buf []byte) int64 {
var h int64
var i int
len := len(buf)
for ; i+3 < len; i += 4 {
h = 31*31*31*31*h +
31*31*31*int64(buf[i]) +
31*31*int64(buf[i+1]) +
31*int64(buf[i+2]) +
int64(buf[i+3])
}

for ; i < len; i++ {
h = 31*h + int64(buf[i])
}

return index
return h
}

func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) {
func findMatch(src, tgt []byte, sindex deltaIndex, tgtOffset int) (srcOffset, l int) {
if len(tgt) >= tgtOffset+s {
ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s])
ch := hashBuf(tgt[tgtOffset : tgtOffset+s])
var ok bool
srcOffset, ok = sindex[ch]
if !ok {
Expand Down Expand Up @@ -219,4 +240,4 @@ func encodeCopyOperation(offset, length int) []byte {
return append([]byte{byte(code)}, opcodes...)
}

type deltaIndex map[uint32]int
type deltaIndex map[int64]int

0 comments on commit b953011

Please sign in to comment.