inverted_index_test.go (7664B)
1 // Copyright 2019 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package related 15 16 import ( 17 "fmt" 18 "math/rand" 19 "testing" 20 "time" 21 22 qt "github.com/frankban/quicktest" 23 ) 24 25 type testDoc struct { 26 keywords map[string][]Keyword 27 date time.Time 28 name string 29 } 30 31 func (d *testDoc) String() string { 32 s := "\n" 33 for k, v := range d.keywords { 34 s += k + ":\t\t" 35 for _, vv := range v { 36 s += " " + vv.String() 37 } 38 s += "\n" 39 } 40 return s 41 } 42 43 func (d *testDoc) Name() string { 44 return d.name 45 } 46 47 func newTestDoc(name string, keywords ...string) *testDoc { 48 time.Sleep(1 * time.Millisecond) 49 return newTestDocWithDate(name, time.Now(), keywords...) 50 } 51 52 func newTestDocWithDate(name string, date time.Time, keywords ...string) *testDoc { 53 km := make(map[string][]Keyword) 54 55 kw := &testDoc{keywords: km, date: date} 56 57 kw.addKeywords(name, keywords...) 58 return kw 59 } 60 61 func (d *testDoc) addKeywords(name string, keywords ...string) *testDoc { 62 keywordm := createTestKeywords(name, keywords...) 63 64 for k, v := range keywordm { 65 keywords := make([]Keyword, len(v)) 66 for i := 0; i < len(v); i++ { 67 keywords[i] = StringKeyword(v[i]) 68 } 69 d.keywords[k] = keywords 70 } 71 return d 72 } 73 74 func createTestKeywords(name string, keywords ...string) map[string][]string { 75 return map[string][]string{ 76 name: keywords, 77 } 78 } 79 80 func (d *testDoc) RelatedKeywords(cfg IndexConfig) ([]Keyword, error) { 81 return d.keywords[cfg.Name], nil 82 } 83 84 func (d *testDoc) PublishDate() time.Time { 85 return d.date 86 } 87 88 func TestSearch(t *testing.T) { 89 config := Config{ 90 Threshold: 90, 91 IncludeNewer: false, 92 Indices: IndexConfigs{ 93 IndexConfig{Name: "tags", Weight: 50}, 94 IndexConfig{Name: "keywords", Weight: 65}, 95 }, 96 } 97 98 idx := NewInvertedIndex(config) 99 // idx.debug = true 100 101 docs := []Document{ 102 newTestDoc("tags", "a", "b", "c", "d"), 103 newTestDoc("tags", "b", "d", "g"), 104 newTestDoc("tags", "b", "h").addKeywords("keywords", "a"), 105 newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"), 106 } 107 108 idx.Add(docs...) 109 110 t.Run("count", func(t *testing.T) { 111 c := qt.New(t) 112 c.Assert(len(idx.index), qt.Equals, 2) 113 set1, found := idx.index["tags"] 114 c.Assert(found, qt.Equals, true) 115 // 6 tags 116 c.Assert(len(set1), qt.Equals, 6) 117 118 set2, found := idx.index["keywords"] 119 c.Assert(found, qt.Equals, true) 120 c.Assert(len(set2), qt.Equals, 2) 121 }) 122 123 t.Run("search-tags", func(t *testing.T) { 124 c := qt.New(t) 125 m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...)) 126 c.Assert(err, qt.IsNil) 127 c.Assert(len(m), qt.Equals, 2) 128 c.Assert(m[0], qt.Equals, docs[0]) 129 c.Assert(m[1], qt.Equals, docs[1]) 130 }) 131 132 t.Run("search-tags-and-keywords", func(t *testing.T) { 133 c := qt.New(t) 134 m, err := idx.search( 135 newQueryElement("tags", StringsToKeywords("a", "b", "z")...), 136 newQueryElement("keywords", StringsToKeywords("a", "b")...)) 137 c.Assert(err, qt.IsNil) 138 c.Assert(len(m), qt.Equals, 3) 139 c.Assert(m[0], qt.Equals, docs[3]) 140 c.Assert(m[1], qt.Equals, docs[2]) 141 c.Assert(m[2], qt.Equals, docs[0]) 142 }) 143 144 t.Run("searchdoc-all", func(t *testing.T) { 145 c := qt.New(t) 146 doc := newTestDoc("tags", "a").addKeywords("keywords", "a") 147 m, err := idx.SearchDoc(doc) 148 c.Assert(err, qt.IsNil) 149 c.Assert(len(m), qt.Equals, 2) 150 c.Assert(m[0], qt.Equals, docs[3]) 151 c.Assert(m[1], qt.Equals, docs[2]) 152 }) 153 154 t.Run("searchdoc-tags", func(t *testing.T) { 155 c := qt.New(t) 156 doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") 157 m, err := idx.SearchDoc(doc, "tags") 158 c.Assert(err, qt.IsNil) 159 c.Assert(len(m), qt.Equals, 2) 160 c.Assert(m[0], qt.Equals, docs[0]) 161 c.Assert(m[1], qt.Equals, docs[1]) 162 }) 163 164 t.Run("searchdoc-keywords-date", func(t *testing.T) { 165 c := qt.New(t) 166 doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") 167 // This will get a date newer than the others. 168 newDoc := newTestDoc("keywords", "a", "b") 169 idx.Add(newDoc) 170 171 m, err := idx.SearchDoc(doc, "keywords") 172 c.Assert(err, qt.IsNil) 173 c.Assert(len(m), qt.Equals, 2) 174 c.Assert(m[0], qt.Equals, docs[3]) 175 }) 176 177 t.Run("searchdoc-keywords-same-date", func(t *testing.T) { 178 c := qt.New(t) 179 idx := NewInvertedIndex(config) 180 181 date := time.Now() 182 183 doc := newTestDocWithDate("keywords", date, "a", "b") 184 doc.name = "thedoc" 185 186 for i := 0; i < 10; i++ { 187 docc := *doc 188 docc.name = fmt.Sprintf("doc%d", i) 189 idx.Add(&docc) 190 } 191 192 m, err := idx.SearchDoc(doc, "keywords") 193 c.Assert(err, qt.IsNil) 194 c.Assert(len(m), qt.Equals, 10) 195 for i := 0; i < 10; i++ { 196 c.Assert(m[i].Name(), qt.Equals, fmt.Sprintf("doc%d", i)) 197 } 198 }) 199 } 200 201 func TestToKeywordsToLower(t *testing.T) { 202 c := qt.New(t) 203 slice := []string{"A", "B", "C"} 204 config := IndexConfig{ToLower: true} 205 keywords, err := config.ToKeywords(slice) 206 c.Assert(err, qt.IsNil) 207 c.Assert(slice, qt.DeepEquals, []string{"A", "B", "C"}) 208 c.Assert(keywords, qt.DeepEquals, []Keyword{ 209 StringKeyword("a"), 210 StringKeyword("b"), 211 StringKeyword("c"), 212 }) 213 } 214 215 func BenchmarkRelatedNewIndex(b *testing.B) { 216 pages := make([]*testDoc, 100) 217 numkeywords := 30 218 allKeywords := make([]string, numkeywords) 219 for i := 0; i < numkeywords; i++ { 220 allKeywords[i] = fmt.Sprintf("keyword%d", i+1) 221 } 222 223 for i := 0; i < len(pages); i++ { 224 start := rand.Intn(len(allKeywords)) 225 end := start + 3 226 if end >= len(allKeywords) { 227 end = start + 1 228 } 229 230 kw := newTestDoc("tags", allKeywords[start:end]...) 231 if i%5 == 0 { 232 start := rand.Intn(len(allKeywords)) 233 end := start + 3 234 if end >= len(allKeywords) { 235 end = start + 1 236 } 237 kw.addKeywords("keywords", allKeywords[start:end]...) 238 } 239 240 pages[i] = kw 241 } 242 243 cfg := Config{ 244 Threshold: 50, 245 Indices: IndexConfigs{ 246 IndexConfig{Name: "tags", Weight: 100}, 247 IndexConfig{Name: "keywords", Weight: 200}, 248 }, 249 } 250 251 b.Run("singles", func(b *testing.B) { 252 for i := 0; i < b.N; i++ { 253 idx := NewInvertedIndex(cfg) 254 for _, doc := range pages { 255 idx.Add(doc) 256 } 257 } 258 }) 259 260 b.Run("all", func(b *testing.B) { 261 for i := 0; i < b.N; i++ { 262 idx := NewInvertedIndex(cfg) 263 docs := make([]Document, len(pages)) 264 for i := 0; i < len(pages); i++ { 265 docs[i] = pages[i] 266 } 267 idx.Add(docs...) 268 } 269 }) 270 } 271 272 func BenchmarkRelatedMatchesIn(b *testing.B) { 273 q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...) 274 q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...) 275 276 docs := make([]*testDoc, 1000) 277 numkeywords := 20 278 allKeywords := make([]string, numkeywords) 279 for i := 0; i < numkeywords; i++ { 280 allKeywords[i] = fmt.Sprintf("keyword%d", i+1) 281 } 282 283 cfg := Config{ 284 Threshold: 20, 285 Indices: IndexConfigs{ 286 IndexConfig{Name: "tags", Weight: 100}, 287 IndexConfig{Name: "keywords", Weight: 200}, 288 }, 289 } 290 291 idx := NewInvertedIndex(cfg) 292 293 for i := 0; i < len(docs); i++ { 294 start := rand.Intn(len(allKeywords)) 295 end := start + 3 296 if end >= len(allKeywords) { 297 end = start + 1 298 } 299 300 index := "tags" 301 if i%5 == 0 { 302 index = "keywords" 303 } 304 305 idx.Add(newTestDoc(index, allKeywords[start:end]...)) 306 } 307 308 b.ResetTimer() 309 for i := 0; i < b.N; i++ { 310 if i%10 == 0 { 311 idx.search(q2) 312 } else { 313 idx.search(q1) 314 } 315 } 316 }