pages_capture.go (13084B)
1 // Copyright 2019 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package hugolib
15
16 import (
17 "context"
18 "fmt"
19 "os"
20 pth "path"
21 "path/filepath"
22 "reflect"
23
24 "github.com/gohugoio/hugo/common/maps"
25
26 "github.com/gohugoio/hugo/parser/pageparser"
27
28 "github.com/gohugoio/hugo/hugofs/files"
29
30 "github.com/gohugoio/hugo/source"
31
32 "github.com/gohugoio/hugo/common/loggers"
33 "github.com/gohugoio/hugo/hugofs"
34 "github.com/spf13/afero"
35 )
36
37 const (
38 walkIsRootFileMetaKey = "walkIsRootFileMetaKey"
39 )
40
41 func newPagesCollector(
42 sp *source.SourceSpec,
43 contentMap *pageMaps,
44 logger loggers.Logger,
45 contentTracker *contentChangeMap,
46 proc pagesCollectorProcessorProvider, filenames ...string) *pagesCollector {
47 return &pagesCollector{
48 fs: sp.SourceFs,
49 contentMap: contentMap,
50 proc: proc,
51 sp: sp,
52 logger: logger,
53 filenames: filenames,
54 tracker: contentTracker,
55 }
56 }
57
58 type contentDirKey struct {
59 dirname string
60 filename string
61 tp bundleDirType
62 }
63
64 type fileinfoBundle struct {
65 header hugofs.FileMetaInfo
66 resources []hugofs.FileMetaInfo
67 }
68
69 func (b *fileinfoBundle) containsResource(name string) bool {
70 for _, r := range b.resources {
71 if r.Name() == name {
72 return true
73 }
74 }
75
76 return false
77 }
78
79 type pageBundles map[string]*fileinfoBundle
80
81 type pagesCollector struct {
82 sp *source.SourceSpec
83 fs afero.Fs
84 logger loggers.Logger
85
86 contentMap *pageMaps
87
88 // Ordered list (bundle headers first) used in partial builds.
89 filenames []string
90
91 // Content files tracker used in partial builds.
92 tracker *contentChangeMap
93
94 proc pagesCollectorProcessorProvider
95 }
96
97 // isCascadingEdit returns whether the dir represents a cascading edit.
98 // That is, if a front matter cascade section is removed, added or edited.
99 // If this is the case we must re-evaluate its descendants.
100 func (c *pagesCollector) isCascadingEdit(dir contentDirKey) (bool, string) {
101 // This is either a section or a taxonomy node. Find it.
102 prefix := cleanTreeKey(dir.dirname)
103
104 section := "/"
105 var isCascade bool
106
107 c.contentMap.walkBranchesPrefix(prefix, func(s string, n *contentNode) bool {
108 if n.fi == nil || dir.filename != n.fi.Meta().Filename {
109 return false
110 }
111
112 f, err := n.fi.Meta().Open()
113 if err != nil {
114 // File may have been removed, assume a cascading edit.
115 // Some false positives is not too bad.
116 isCascade = true
117 return true
118 }
119
120 pf, err := pageparser.ParseFrontMatterAndContent(f)
121 f.Close()
122 if err != nil {
123 isCascade = true
124 return true
125 }
126
127 if n.p == nil || n.p.bucket == nil {
128 return true
129 }
130
131 section = s
132
133 maps.PrepareParams(pf.FrontMatter)
134 cascade1, ok := pf.FrontMatter["cascade"]
135 hasCascade := n.p.bucket.cascade != nil && len(n.p.bucket.cascade) > 0
136 if !ok {
137 isCascade = hasCascade
138
139 return true
140 }
141
142 if !hasCascade {
143 isCascade = true
144 return true
145 }
146
147 for _, v := range n.p.bucket.cascade {
148 isCascade = !reflect.DeepEqual(cascade1, v)
149 if isCascade {
150 break
151 }
152 }
153
154 return true
155 })
156
157 return isCascade, section
158 }
159
160 // Collect.
161 func (c *pagesCollector) Collect() (collectErr error) {
162 c.proc.Start(context.Background())
163 defer func() {
164 err := c.proc.Wait()
165 if collectErr == nil {
166 collectErr = err
167 }
168 }()
169
170 if len(c.filenames) == 0 {
171 // Collect everything.
172 collectErr = c.collectDir("", false, nil)
173 } else {
174 for _, pm := range c.contentMap.pmaps {
175 pm.cfg.isRebuild = true
176 }
177 dirs := make(map[contentDirKey]bool)
178 for _, filename := range c.filenames {
179 dir, btype := c.tracker.resolveAndRemove(filename)
180 dirs[contentDirKey{dir, filename, btype}] = true
181 }
182
183 for dir := range dirs {
184 for _, pm := range c.contentMap.pmaps {
185 pm.s.ResourceSpec.DeleteBySubstring(dir.dirname)
186 }
187
188 switch dir.tp {
189 case bundleLeaf:
190 collectErr = c.collectDir(dir.dirname, true, nil)
191 case bundleBranch:
192 isCascading, section := c.isCascadingEdit(dir)
193
194 if isCascading {
195 c.contentMap.deleteSection(section)
196 }
197 collectErr = c.collectDir(dir.dirname, !isCascading, nil)
198 default:
199 // We always start from a directory.
200 collectErr = c.collectDir(dir.dirname, true, func(fim hugofs.FileMetaInfo) bool {
201 return dir.filename == fim.Meta().Filename
202 })
203 }
204
205 if collectErr != nil {
206 break
207 }
208 }
209
210 }
211
212 return
213 }
214
215 func (c *pagesCollector) isBundleHeader(fi hugofs.FileMetaInfo) bool {
216 class := fi.Meta().Classifier
217 return class == files.ContentClassLeaf || class == files.ContentClassBranch
218 }
219
220 func (c *pagesCollector) getLang(fi hugofs.FileMetaInfo) string {
221 lang := fi.Meta().Lang
222 if lang != "" {
223 return lang
224 }
225
226 return c.sp.DefaultContentLanguage
227 }
228
229 func (c *pagesCollector) addToBundle(info hugofs.FileMetaInfo, btyp bundleDirType, bundles pageBundles) error {
230 getBundle := func(lang string) *fileinfoBundle {
231 return bundles[lang]
232 }
233
234 cloneBundle := func(lang string) *fileinfoBundle {
235 // Every bundled content file needs a content file header.
236 // Use the default content language if found, else just
237 // pick one.
238 var (
239 source *fileinfoBundle
240 found bool
241 )
242
243 source, found = bundles[c.sp.DefaultContentLanguage]
244 if !found {
245 for _, b := range bundles {
246 source = b
247 break
248 }
249 }
250
251 if source == nil {
252 panic(fmt.Sprintf("no source found, %d", len(bundles)))
253 }
254
255 clone := c.cloneFileInfo(source.header)
256 clone.Meta().Lang = lang
257
258 return &fileinfoBundle{
259 header: clone,
260 }
261 }
262
263 lang := c.getLang(info)
264 bundle := getBundle(lang)
265 isBundleHeader := c.isBundleHeader(info)
266 if bundle != nil && isBundleHeader {
267 // index.md file inside a bundle, see issue 6208.
268 info.Meta().Classifier = files.ContentClassContent
269 isBundleHeader = false
270 }
271 classifier := info.Meta().Classifier
272 isContent := classifier == files.ContentClassContent
273 if bundle == nil {
274 if isBundleHeader {
275 bundle = &fileinfoBundle{header: info}
276 bundles[lang] = bundle
277 } else {
278 if btyp == bundleBranch {
279 // No special logic for branch bundles.
280 // Every language needs its own _index.md file.
281 // Also, we only clone bundle headers for lonesome, bundled,
282 // content files.
283 return c.handleFiles(info)
284 }
285
286 if isContent {
287 bundle = cloneBundle(lang)
288 bundles[lang] = bundle
289 }
290 }
291 }
292
293 if !isBundleHeader && bundle != nil {
294 bundle.resources = append(bundle.resources, info)
295 }
296
297 if classifier == files.ContentClassFile {
298 translations := info.Meta().Translations
299
300 for lang, b := range bundles {
301 if !stringSliceContains(lang, translations...) && !b.containsResource(info.Name()) {
302
303 // Clone and add it to the bundle.
304 clone := c.cloneFileInfo(info)
305 clone.Meta().Lang = lang
306 b.resources = append(b.resources, clone)
307 }
308 }
309 }
310
311 return nil
312 }
313
314 func (c *pagesCollector) cloneFileInfo(fi hugofs.FileMetaInfo) hugofs.FileMetaInfo {
315 return hugofs.NewFileMetaInfo(fi, hugofs.NewFileMeta())
316 }
317
318 func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func(fim hugofs.FileMetaInfo) bool) error {
319 fi, err := c.fs.Stat(dirname)
320 if err != nil {
321 if os.IsNotExist(err) {
322 // May have been deleted.
323 return nil
324 }
325 return err
326 }
327
328 handleDir := func(
329 btype bundleDirType,
330 dir hugofs.FileMetaInfo,
331 path string,
332 readdir []hugofs.FileMetaInfo) error {
333 if btype > bundleNot && c.tracker != nil {
334 c.tracker.add(path, btype)
335 }
336
337 if btype == bundleBranch {
338 if err := c.handleBundleBranch(readdir); err != nil {
339 return err
340 }
341 // A branch bundle is only this directory level, so keep walking.
342 return nil
343 } else if btype == bundleLeaf {
344 if err := c.handleBundleLeaf(dir, path, readdir); err != nil {
345 return err
346 }
347
348 return nil
349 }
350
351 if err := c.handleFiles(readdir...); err != nil {
352 return err
353 }
354
355 return nil
356 }
357
358 filter := func(fim hugofs.FileMetaInfo) bool {
359 if fim.Meta().SkipDir {
360 return false
361 }
362
363 if c.sp.IgnoreFile(fim.Meta().Filename) {
364 return false
365 }
366
367 if inFilter != nil {
368 return inFilter(fim)
369 }
370 return true
371 }
372
373 preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
374 var btype bundleDirType
375
376 filtered := readdir[:0]
377 for _, fi := range readdir {
378 if filter(fi) {
379 filtered = append(filtered, fi)
380
381 if c.tracker != nil {
382 // Track symlinks.
383 c.tracker.addSymbolicLinkMapping(fi)
384 }
385 }
386 }
387 walkRoot := dir.Meta().IsRootFile
388 readdir = filtered
389
390 // We merge language directories, so there can be duplicates, but they
391 // will be ordered, most important first.
392 var duplicates []int
393 seen := make(map[string]bool)
394
395 for i, fi := range readdir {
396
397 if fi.IsDir() {
398 continue
399 }
400
401 meta := fi.Meta()
402 meta.IsRootFile = walkRoot
403 class := meta.Classifier
404 translationBase := meta.TranslationBaseNameWithExt
405 key := pth.Join(meta.Lang, translationBase)
406
407 if seen[key] {
408 duplicates = append(duplicates, i)
409 continue
410 }
411 seen[key] = true
412
413 var thisBtype bundleDirType
414
415 switch class {
416 case files.ContentClassLeaf:
417 thisBtype = bundleLeaf
418 case files.ContentClassBranch:
419 thisBtype = bundleBranch
420 }
421
422 // Folders with both index.md and _index.md type of files have
423 // undefined behaviour and can never work.
424 // The branch variant will win because of sort order, but log
425 // a warning about it.
426 if thisBtype > bundleNot && btype > bundleNot && thisBtype != btype {
427 c.logger.Warnf("Content directory %q have both index.* and _index.* files, pick one.", dir.Meta().Filename)
428 // Reclassify it so it will be handled as a content file inside the
429 // section, which is in line with the <= 0.55 behaviour.
430 meta.Classifier = files.ContentClassContent
431 } else if thisBtype > bundleNot {
432 btype = thisBtype
433 }
434
435 }
436
437 if len(duplicates) > 0 {
438 for i := len(duplicates) - 1; i >= 0; i-- {
439 idx := duplicates[i]
440 readdir = append(readdir[:idx], readdir[idx+1:]...)
441 }
442 }
443
444 err := handleDir(btype, dir, path, readdir)
445 if err != nil {
446 return nil, err
447 }
448
449 if btype == bundleLeaf || partial {
450 return nil, filepath.SkipDir
451 }
452
453 // Keep walking.
454 return readdir, nil
455 }
456
457 var postHook hugofs.WalkHook
458 if c.tracker != nil {
459 postHook = func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
460 if c.tracker == nil {
461 // Nothing to do.
462 return readdir, nil
463 }
464
465 return readdir, nil
466 }
467 }
468
469 wfn := func(path string, info hugofs.FileMetaInfo, err error) error {
470 if err != nil {
471 return err
472 }
473
474 return nil
475 }
476
477 fim := fi.(hugofs.FileMetaInfo)
478 // Make sure the pages in this directory gets re-rendered,
479 // even in fast render mode.
480 fim.Meta().IsRootFile = true
481
482 w := hugofs.NewWalkway(hugofs.WalkwayConfig{
483 Fs: c.fs,
484 Logger: c.logger,
485 Root: dirname,
486 Info: fim,
487 HookPre: preHook,
488 HookPost: postHook,
489 WalkFn: wfn,
490 })
491
492 return w.Walk()
493 }
494
495 func (c *pagesCollector) handleBundleBranch(readdir []hugofs.FileMetaInfo) error {
496 // Maps bundles to its language.
497 bundles := pageBundles{}
498
499 var contentFiles []hugofs.FileMetaInfo
500
501 for _, fim := range readdir {
502
503 if fim.IsDir() {
504 continue
505 }
506
507 meta := fim.Meta()
508
509 switch meta.Classifier {
510 case files.ContentClassContent:
511 contentFiles = append(contentFiles, fim)
512 default:
513 if err := c.addToBundle(fim, bundleBranch, bundles); err != nil {
514 return err
515 }
516 }
517
518 }
519
520 // Make sure the section is created before its pages.
521 if err := c.proc.Process(bundles); err != nil {
522 return err
523 }
524
525 return c.handleFiles(contentFiles...)
526 }
527
528 func (c *pagesCollector) handleBundleLeaf(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) error {
529 // Maps bundles to its language.
530 bundles := pageBundles{}
531
532 walk := func(path string, info hugofs.FileMetaInfo, err error) error {
533 if err != nil {
534 return err
535 }
536 if info.IsDir() {
537 return nil
538 }
539
540 return c.addToBundle(info, bundleLeaf, bundles)
541 }
542
543 // Start a new walker from the given path.
544 w := hugofs.NewWalkway(hugofs.WalkwayConfig{
545 Root: path,
546 Fs: c.fs,
547 Logger: c.logger,
548 Info: dir,
549 DirEntries: readdir,
550 WalkFn: walk,
551 })
552
553 if err := w.Walk(); err != nil {
554 return err
555 }
556
557 return c.proc.Process(bundles)
558 }
559
560 func (c *pagesCollector) handleFiles(fis ...hugofs.FileMetaInfo) error {
561 for _, fi := range fis {
562 if fi.IsDir() {
563 continue
564 }
565
566 if err := c.proc.Process(fi); err != nil {
567 return err
568 }
569 }
570 return nil
571 }
572
573 func stringSliceContains(k string, values ...string) bool {
574 for _, v := range values {
575 if k == v {
576 return true
577 }
578 }
579 return false
580 }