inverted_index_test.go (7664B)
1 // Copyright 2019 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package related
15
16 import (
17 "fmt"
18 "math/rand"
19 "testing"
20 "time"
21
22 qt "github.com/frankban/quicktest"
23 )
24
25 type testDoc struct {
26 keywords map[string][]Keyword
27 date time.Time
28 name string
29 }
30
31 func (d *testDoc) String() string {
32 s := "\n"
33 for k, v := range d.keywords {
34 s += k + ":\t\t"
35 for _, vv := range v {
36 s += " " + vv.String()
37 }
38 s += "\n"
39 }
40 return s
41 }
42
43 func (d *testDoc) Name() string {
44 return d.name
45 }
46
47 func newTestDoc(name string, keywords ...string) *testDoc {
48 time.Sleep(1 * time.Millisecond)
49 return newTestDocWithDate(name, time.Now(), keywords...)
50 }
51
52 func newTestDocWithDate(name string, date time.Time, keywords ...string) *testDoc {
53 km := make(map[string][]Keyword)
54
55 kw := &testDoc{keywords: km, date: date}
56
57 kw.addKeywords(name, keywords...)
58 return kw
59 }
60
61 func (d *testDoc) addKeywords(name string, keywords ...string) *testDoc {
62 keywordm := createTestKeywords(name, keywords...)
63
64 for k, v := range keywordm {
65 keywords := make([]Keyword, len(v))
66 for i := 0; i < len(v); i++ {
67 keywords[i] = StringKeyword(v[i])
68 }
69 d.keywords[k] = keywords
70 }
71 return d
72 }
73
74 func createTestKeywords(name string, keywords ...string) map[string][]string {
75 return map[string][]string{
76 name: keywords,
77 }
78 }
79
80 func (d *testDoc) RelatedKeywords(cfg IndexConfig) ([]Keyword, error) {
81 return d.keywords[cfg.Name], nil
82 }
83
84 func (d *testDoc) PublishDate() time.Time {
85 return d.date
86 }
87
88 func TestSearch(t *testing.T) {
89 config := Config{
90 Threshold: 90,
91 IncludeNewer: false,
92 Indices: IndexConfigs{
93 IndexConfig{Name: "tags", Weight: 50},
94 IndexConfig{Name: "keywords", Weight: 65},
95 },
96 }
97
98 idx := NewInvertedIndex(config)
99 // idx.debug = true
100
101 docs := []Document{
102 newTestDoc("tags", "a", "b", "c", "d"),
103 newTestDoc("tags", "b", "d", "g"),
104 newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
105 newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
106 }
107
108 idx.Add(docs...)
109
110 t.Run("count", func(t *testing.T) {
111 c := qt.New(t)
112 c.Assert(len(idx.index), qt.Equals, 2)
113 set1, found := idx.index["tags"]
114 c.Assert(found, qt.Equals, true)
115 // 6 tags
116 c.Assert(len(set1), qt.Equals, 6)
117
118 set2, found := idx.index["keywords"]
119 c.Assert(found, qt.Equals, true)
120 c.Assert(len(set2), qt.Equals, 2)
121 })
122
123 t.Run("search-tags", func(t *testing.T) {
124 c := qt.New(t)
125 m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
126 c.Assert(err, qt.IsNil)
127 c.Assert(len(m), qt.Equals, 2)
128 c.Assert(m[0], qt.Equals, docs[0])
129 c.Assert(m[1], qt.Equals, docs[1])
130 })
131
132 t.Run("search-tags-and-keywords", func(t *testing.T) {
133 c := qt.New(t)
134 m, err := idx.search(
135 newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
136 newQueryElement("keywords", StringsToKeywords("a", "b")...))
137 c.Assert(err, qt.IsNil)
138 c.Assert(len(m), qt.Equals, 3)
139 c.Assert(m[0], qt.Equals, docs[3])
140 c.Assert(m[1], qt.Equals, docs[2])
141 c.Assert(m[2], qt.Equals, docs[0])
142 })
143
144 t.Run("searchdoc-all", func(t *testing.T) {
145 c := qt.New(t)
146 doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
147 m, err := idx.SearchDoc(doc)
148 c.Assert(err, qt.IsNil)
149 c.Assert(len(m), qt.Equals, 2)
150 c.Assert(m[0], qt.Equals, docs[3])
151 c.Assert(m[1], qt.Equals, docs[2])
152 })
153
154 t.Run("searchdoc-tags", func(t *testing.T) {
155 c := qt.New(t)
156 doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
157 m, err := idx.SearchDoc(doc, "tags")
158 c.Assert(err, qt.IsNil)
159 c.Assert(len(m), qt.Equals, 2)
160 c.Assert(m[0], qt.Equals, docs[0])
161 c.Assert(m[1], qt.Equals, docs[1])
162 })
163
164 t.Run("searchdoc-keywords-date", func(t *testing.T) {
165 c := qt.New(t)
166 doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
167 // This will get a date newer than the others.
168 newDoc := newTestDoc("keywords", "a", "b")
169 idx.Add(newDoc)
170
171 m, err := idx.SearchDoc(doc, "keywords")
172 c.Assert(err, qt.IsNil)
173 c.Assert(len(m), qt.Equals, 2)
174 c.Assert(m[0], qt.Equals, docs[3])
175 })
176
177 t.Run("searchdoc-keywords-same-date", func(t *testing.T) {
178 c := qt.New(t)
179 idx := NewInvertedIndex(config)
180
181 date := time.Now()
182
183 doc := newTestDocWithDate("keywords", date, "a", "b")
184 doc.name = "thedoc"
185
186 for i := 0; i < 10; i++ {
187 docc := *doc
188 docc.name = fmt.Sprintf("doc%d", i)
189 idx.Add(&docc)
190 }
191
192 m, err := idx.SearchDoc(doc, "keywords")
193 c.Assert(err, qt.IsNil)
194 c.Assert(len(m), qt.Equals, 10)
195 for i := 0; i < 10; i++ {
196 c.Assert(m[i].Name(), qt.Equals, fmt.Sprintf("doc%d", i))
197 }
198 })
199 }
200
201 func TestToKeywordsToLower(t *testing.T) {
202 c := qt.New(t)
203 slice := []string{"A", "B", "C"}
204 config := IndexConfig{ToLower: true}
205 keywords, err := config.ToKeywords(slice)
206 c.Assert(err, qt.IsNil)
207 c.Assert(slice, qt.DeepEquals, []string{"A", "B", "C"})
208 c.Assert(keywords, qt.DeepEquals, []Keyword{
209 StringKeyword("a"),
210 StringKeyword("b"),
211 StringKeyword("c"),
212 })
213 }
214
215 func BenchmarkRelatedNewIndex(b *testing.B) {
216 pages := make([]*testDoc, 100)
217 numkeywords := 30
218 allKeywords := make([]string, numkeywords)
219 for i := 0; i < numkeywords; i++ {
220 allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
221 }
222
223 for i := 0; i < len(pages); i++ {
224 start := rand.Intn(len(allKeywords))
225 end := start + 3
226 if end >= len(allKeywords) {
227 end = start + 1
228 }
229
230 kw := newTestDoc("tags", allKeywords[start:end]...)
231 if i%5 == 0 {
232 start := rand.Intn(len(allKeywords))
233 end := start + 3
234 if end >= len(allKeywords) {
235 end = start + 1
236 }
237 kw.addKeywords("keywords", allKeywords[start:end]...)
238 }
239
240 pages[i] = kw
241 }
242
243 cfg := Config{
244 Threshold: 50,
245 Indices: IndexConfigs{
246 IndexConfig{Name: "tags", Weight: 100},
247 IndexConfig{Name: "keywords", Weight: 200},
248 },
249 }
250
251 b.Run("singles", func(b *testing.B) {
252 for i := 0; i < b.N; i++ {
253 idx := NewInvertedIndex(cfg)
254 for _, doc := range pages {
255 idx.Add(doc)
256 }
257 }
258 })
259
260 b.Run("all", func(b *testing.B) {
261 for i := 0; i < b.N; i++ {
262 idx := NewInvertedIndex(cfg)
263 docs := make([]Document, len(pages))
264 for i := 0; i < len(pages); i++ {
265 docs[i] = pages[i]
266 }
267 idx.Add(docs...)
268 }
269 })
270 }
271
272 func BenchmarkRelatedMatchesIn(b *testing.B) {
273 q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
274 q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
275
276 docs := make([]*testDoc, 1000)
277 numkeywords := 20
278 allKeywords := make([]string, numkeywords)
279 for i := 0; i < numkeywords; i++ {
280 allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
281 }
282
283 cfg := Config{
284 Threshold: 20,
285 Indices: IndexConfigs{
286 IndexConfig{Name: "tags", Weight: 100},
287 IndexConfig{Name: "keywords", Weight: 200},
288 },
289 }
290
291 idx := NewInvertedIndex(cfg)
292
293 for i := 0; i < len(docs); i++ {
294 start := rand.Intn(len(allKeywords))
295 end := start + 3
296 if end >= len(allKeywords) {
297 end = start + 1
298 }
299
300 index := "tags"
301 if i%5 == 0 {
302 index = "keywords"
303 }
304
305 idx.Add(newTestDoc(index, allKeywords[start:end]...))
306 }
307
308 b.ResetTimer()
309 for i := 0; i < b.N; i++ {
310 if i%10 == 0 {
311 idx.search(q2)
312 } else {
313 idx.search(q1)
314 }
315 }
316 }