Skip to content

Commit

Permalink
Exposed sloppyness parameter in Query & Searcher
Browse files Browse the repository at this point in the history
  • Loading branch information
voldyman authored and mschoch committed Feb 12, 2021
1 parent 2b085f4 commit 55db290
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 191 deletions.
29 changes: 28 additions & 1 deletion query.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ type MatchPhraseQuery struct {
field string
analyzer *analysis.Analyzer
boost *boost
slop int
}

// NewMatchPhraseQuery creates a new Query object
Expand Down Expand Up @@ -773,6 +774,18 @@ func (q *MatchPhraseQuery) Field() string {
return q.field
}

// Slop returns the acceptable distance between tokens
func (q *MatchPhraseQuery) Slop() int {
return q.slop
}

// SetSlop updates the sloppyness of the query
// the phrase terms can be as "dist" terms away from each other
func (q *MatchPhraseQuery) SetSlop(dist int) *MatchPhraseQuery {
q.slop = dist
return q
}

func (q *MatchPhraseQuery) SetAnalyzer(a *analysis.Analyzer) *MatchPhraseQuery {
q.analyzer = a
return q
Expand Down Expand Up @@ -802,6 +815,7 @@ func (q *MatchPhraseQuery) Searcher(i search.Reader, options search.SearcherOpti
phraseQuery := NewMultiPhraseQuery(phrase)
phraseQuery.SetField(field)
phraseQuery.SetBoost(q.boost.Value())
phraseQuery.SetSlop(q.slop)
return phraseQuery.Searcher(i, options)
}
noneQuery := NewMatchNoneQuery()
Expand Down Expand Up @@ -989,6 +1003,7 @@ type MultiPhraseQuery struct {
field string
boost *boost
scorer search.Scorer
slop int
}

// NewMultiPhraseQuery creates a new Query for finding
Expand Down Expand Up @@ -1030,13 +1045,25 @@ func (q *MultiPhraseQuery) Field() string {
return q.field
}

// Slop returns the acceptable distance between terms
func (q *MultiPhraseQuery) Slop() int {
return q.slop
}

// SetSlop updates the sloppyness of the query
// the phrase terms can be as "dist" terms away from each other
func (q *MultiPhraseQuery) SetSlop(dist int) *MultiPhraseQuery {
q.slop = dist
return q
}

func (q *MultiPhraseQuery) Searcher(i search.Reader, options search.SearcherOptions) (search.Searcher, error) {
field := q.field
if q.field == "" {
field = options.DefaultSearchField
}

return searcher.NewMultiPhraseSearcher(i, q.terms, field, q.scorer, options)
return searcher.NewSloppyMultiPhraseSearcher(i, q.terms, field, q.slop, q.scorer, options)
}

func (q *MultiPhraseQuery) Validate() error {
Expand Down
11 changes: 10 additions & 1 deletion search/searcher/search_phrase.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type PhraseSearcher struct {
paths []phrasePath
locations []search.Location
initialized bool
slop int
}

func (s *PhraseSearcher) Size() int {
Expand All @@ -55,6 +56,13 @@ func (s *PhraseSearcher) Size() int {

func NewMultiPhraseSearcher(indexReader search.Reader, terms [][]string, field string, scorer search.Scorer,
options search.SearcherOptions) (*PhraseSearcher, error) {
return NewSloppyMultiPhraseSearcher(indexReader, terms, field, 0, scorer, options)
}

// NewSloppyMultiPhraseSearcher create a multi-phrase searcher which tolerates a specified "sloppyness"
// the value of the slop parameter restricts the distance between the terms
func NewSloppyMultiPhraseSearcher(indexReader search.Reader, terms [][]string, field string, slop int,
scorer search.Scorer, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
Expand Down Expand Up @@ -114,6 +122,7 @@ func NewMultiPhraseSearcher(indexReader search.Reader, terms [][]string, field s
rv := PhraseSearcher{
mustSearcher: mustSearcher,
terms: terms,
slop: slop,
}

return &rv, nil
Expand Down Expand Up @@ -213,7 +222,7 @@ func (s *PhraseSearcher) checkCurrMustMatchField(field string, tlm search.TermLo
if s.path == nil {
s.path = make(phrasePath, 0, len(s.terms))
}
s.paths = findPhrasePaths(0, s.terms, tlm, s.path[:0], 0, s.paths[:0])
s.paths = findPhrasePaths(0, s.terms, tlm, s.path[:0], s.slop, s.paths[:0])
for _, p := range s.paths {
for _, pp := range p {
ftls = append(ftls, search.FieldTermLocation{
Expand Down
65 changes: 65 additions & 0 deletions search/searcher/search_phrase_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,71 @@ func TestMultiPhraseSearch(t *testing.T) {
}
}

func TestSloppyMultiPhraseSearch(t *testing.T) {
soptions := search.SearcherOptions{
SimilarityForField: func(field string) search.Similarity {
return similarity.NewBM25Similarity()
},
Explain: true,
IncludeTermVectors: true,
}

tests := []struct {
phrase [][]string
docids []uint64
slop int
}{
{
phrase: [][]string{{"angst"}, {"beer"}, {"database"}},
docids: []uint64{},
slop: 0,
},
{
phrase: [][]string{{"angst"}, {"beer"}, {"database"}},
docids: []uint64{baseTestIndexReaderDirect.docNumByID("2")},
slop: 1,
},
{
phrase: [][]string{{"apple", "angst"}, {"dank"}},
docids: []uint64{baseTestIndexReaderDirect.docNumByID("3")},
slop: 2,
},
}

for i, test := range tests {
searcher, err := NewSloppyMultiPhraseSearcher(baseTestIndexReader, test.phrase, "desc", test.slop, nil, soptions)
if err != nil {
t.Error(err)
}
ctx := &search.Context{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
actualIds := []uint64{}
for err == nil && next != nil {
actualIds = append(actualIds, next.Number)
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, i)
}
if !reflect.DeepEqual(test.docids, actualIds) {
t.Fatalf("test case %d: expected ids: %v, got %v", i, test.docids, actualIds)
}

err = searcher.Close()
if err != nil {
t.Error(err)
}

err = baseTestIndexReader.Close()
if err != nil {
t.Error(err)
}
}
}

func TestFindPhrasePaths(t *testing.T) {
tests := []struct {
phrase [][]string
Expand Down
12 changes: 12 additions & 0 deletions test/integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ type match struct {
Locations search.FieldTermLocationMap
}

func newIDMatches(ids ...string) []*match {
result := []*match{}

for _, id := range ids {
result = append(result, &match{
Fields: map[string][][]byte{
"_id": {[]byte(id)},
}})
}
return result
}

type ExpectHighlight struct {
Highlighter highlight.Highlighter
Field string
Expand Down
Loading

0 comments on commit 55db290

Please sign in to comment.