filecache.go (8451B)
1 // Copyright 2018 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package filecache
15
16 import (
17 "bytes"
18 "errors"
19 "io"
20 "io/ioutil"
21 "os"
22 "path/filepath"
23 "strings"
24 "sync"
25 "time"
26
27 "github.com/gohugoio/hugo/common/hugio"
28
29 "github.com/gohugoio/hugo/helpers"
30
31 "github.com/BurntSushi/locker"
32 "github.com/spf13/afero"
33 )
34
35 // ErrFatal can be used to signal an unrecoverable error.
36 var ErrFatal = errors.New("fatal filecache error")
37
38 const (
39 filecacheRootDirname = "filecache"
40 )
41
42 // Cache caches a set of files in a directory. This is usually a file on
43 // disk, but since this is backed by an Afero file system, it can be anything.
44 type Cache struct {
45 Fs afero.Fs
46
47 // Max age for items in this cache. Negative duration means forever,
48 // 0 is effectively turning this cache off.
49 maxAge time.Duration
50
51 // When set, we just remove this entire root directory on expiration.
52 pruneAllRootDir string
53
54 nlocker *lockTracker
55 }
56
57 type lockTracker struct {
58 seenMu sync.RWMutex
59 seen map[string]struct{}
60
61 *locker.Locker
62 }
63
64 // Lock tracks the ids in use. We use this information to do garbage collection
65 // after a Hugo build.
66 func (l *lockTracker) Lock(id string) {
67 l.seenMu.RLock()
68 if _, seen := l.seen[id]; !seen {
69 l.seenMu.RUnlock()
70 l.seenMu.Lock()
71 l.seen[id] = struct{}{}
72 l.seenMu.Unlock()
73 } else {
74 l.seenMu.RUnlock()
75 }
76
77 l.Locker.Lock(id)
78 }
79
80 // ItemInfo contains info about a cached file.
81 type ItemInfo struct {
82 // This is the file's name relative to the cache's filesystem.
83 Name string
84 }
85
86 // NewCache creates a new file cache with the given filesystem and max age.
87 func NewCache(fs afero.Fs, maxAge time.Duration, pruneAllRootDir string) *Cache {
88 return &Cache{
89 Fs: fs,
90 nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
91 maxAge: maxAge,
92 pruneAllRootDir: pruneAllRootDir,
93 }
94 }
95
96 // lockedFile is a file with a lock that is released on Close.
97 type lockedFile struct {
98 afero.File
99 unlock func()
100 }
101
102 func (l *lockedFile) Close() error {
103 defer l.unlock()
104 return l.File.Close()
105 }
106
107 // WriteCloser returns a transactional writer into the cache.
108 // It's important that it's closed when done.
109 func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) {
110 id = cleanID(id)
111 c.nlocker.Lock(id)
112
113 info := ItemInfo{Name: id}
114
115 f, err := helpers.OpenFileForWriting(c.Fs, id)
116 if err != nil {
117 c.nlocker.Unlock(id)
118 return info, nil, err
119 }
120
121 return info, &lockedFile{
122 File: f,
123 unlock: func() { c.nlocker.Unlock(id) },
124 }, nil
125 }
126
127 // ReadOrCreate tries to lookup the file in cache.
128 // If found, it is passed to read and then closed.
129 // If not found a new file is created and passed to create, which should close
130 // it when done.
131 func (c *Cache) ReadOrCreate(id string,
132 read func(info ItemInfo, r io.ReadSeeker) error,
133 create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) {
134 id = cleanID(id)
135
136 c.nlocker.Lock(id)
137 defer c.nlocker.Unlock(id)
138
139 info = ItemInfo{Name: id}
140
141 if r := c.getOrRemove(id); r != nil {
142 err = read(info, r)
143 defer r.Close()
144 if err == nil || err == ErrFatal {
145 // See https://github.com/gohugoio/hugo/issues/6401
146 // To recover from file corruption we handle read errors
147 // as the cache item was not found.
148 // Any file permission issue will also fail in the next step.
149 return
150 }
151 }
152
153 f, err := helpers.OpenFileForWriting(c.Fs, id)
154 if err != nil {
155 return
156 }
157
158 err = create(info, f)
159
160 return
161 }
162
163 // GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
164 // be invoked and the result cached.
165 // This method is protected by a named lock using the given id as identifier.
166 func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
167 id = cleanID(id)
168
169 c.nlocker.Lock(id)
170 defer c.nlocker.Unlock(id)
171
172 info := ItemInfo{Name: id}
173
174 if r := c.getOrRemove(id); r != nil {
175 return info, r, nil
176 }
177
178 var (
179 r io.ReadCloser
180 err error
181 )
182
183 r, err = create()
184 if err != nil {
185 return info, nil, err
186 }
187
188 if c.maxAge == 0 {
189 // No caching.
190 return info, hugio.ToReadCloser(r), nil
191 }
192
193 var buff bytes.Buffer
194 return info,
195 hugio.ToReadCloser(&buff),
196 afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff))
197 }
198
199 // GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice.
200 func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) {
201 id = cleanID(id)
202
203 c.nlocker.Lock(id)
204 defer c.nlocker.Unlock(id)
205
206 info := ItemInfo{Name: id}
207
208 if r := c.getOrRemove(id); r != nil {
209 defer r.Close()
210 b, err := ioutil.ReadAll(r)
211 return info, b, err
212 }
213
214 var (
215 b []byte
216 err error
217 )
218
219 b, err = create()
220 if err != nil {
221 return info, nil, err
222 }
223
224 if c.maxAge == 0 {
225 return info, b, nil
226 }
227
228 if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil {
229 return info, nil, err
230 }
231 return info, b, nil
232 }
233
234 // GetBytes gets the file content with the given id from the cache, nil if none found.
235 func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) {
236 id = cleanID(id)
237
238 c.nlocker.Lock(id)
239 defer c.nlocker.Unlock(id)
240
241 info := ItemInfo{Name: id}
242
243 if r := c.getOrRemove(id); r != nil {
244 defer r.Close()
245 b, err := ioutil.ReadAll(r)
246 return info, b, err
247 }
248
249 return info, nil, nil
250 }
251
252 // Get gets the file with the given id from the cahce, nil if none found.
253 func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) {
254 id = cleanID(id)
255
256 c.nlocker.Lock(id)
257 defer c.nlocker.Unlock(id)
258
259 info := ItemInfo{Name: id}
260
261 r := c.getOrRemove(id)
262
263 return info, r, nil
264 }
265
266 // getOrRemove gets the file with the given id. If it's expired, it will
267 // be removed.
268 func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
269 if c.maxAge == 0 {
270 // No caching.
271 return nil
272 }
273
274 if c.maxAge > 0 {
275 fi, err := c.Fs.Stat(id)
276 if err != nil {
277 return nil
278 }
279
280 if c.isExpired(fi.ModTime()) {
281 c.Fs.Remove(id)
282 return nil
283 }
284 }
285
286 f, err := c.Fs.Open(id)
287 if err != nil {
288 return nil
289 }
290
291 return f
292 }
293
294 func (c *Cache) isExpired(modTime time.Time) bool {
295 if c.maxAge < 0 {
296 return false
297 }
298
299 // Note the use of time.Since here.
300 // We cannot use Hugo's global Clock for this.
301 return c.maxAge == 0 || time.Since(modTime) > c.maxAge
302 }
303
304 // For testing
305 func (c *Cache) getString(id string) string {
306 id = cleanID(id)
307
308 c.nlocker.Lock(id)
309 defer c.nlocker.Unlock(id)
310
311 f, err := c.Fs.Open(id)
312 if err != nil {
313 return ""
314 }
315 defer f.Close()
316
317 b, _ := ioutil.ReadAll(f)
318 return string(b)
319 }
320
321 // Caches is a named set of caches.
322 type Caches map[string]*Cache
323
324 // Get gets a named cache, nil if none found.
325 func (f Caches) Get(name string) *Cache {
326 return f[strings.ToLower(name)]
327 }
328
329 // NewCaches creates a new set of file caches from the given
330 // configuration.
331 func NewCaches(p *helpers.PathSpec) (Caches, error) {
332 var dcfg Configs
333 if c, ok := p.Cfg.Get("filecacheConfigs").(Configs); ok {
334 dcfg = c
335 } else {
336 var err error
337 dcfg, err = DecodeConfig(p.Fs.Source, p.Cfg)
338 if err != nil {
339 return nil, err
340 }
341 }
342
343 fs := p.Fs.Source
344
345 m := make(Caches)
346 for k, v := range dcfg {
347 var cfs afero.Fs
348
349 if v.isResourceDir {
350 cfs = p.BaseFs.ResourcesCache
351 } else {
352 cfs = fs
353 }
354
355 if cfs == nil {
356 // TODO(bep) we still have some places that do not initialize the
357 // full dependencies of a site, e.g. the import Jekyll command.
358 // That command does not need these caches, so let us just continue
359 // for now.
360 continue
361 }
362
363 baseDir := v.Dir
364
365 if err := cfs.MkdirAll(baseDir, 0777); err != nil && !os.IsExist(err) {
366 return nil, err
367 }
368
369 bfs := afero.NewBasePathFs(cfs, baseDir)
370
371 var pruneAllRootDir string
372 if k == cacheKeyModules {
373 pruneAllRootDir = "pkg"
374 }
375
376 m[k] = NewCache(bfs, v.MaxAge, pruneAllRootDir)
377 }
378
379 return m, nil
380 }
381
382 func cleanID(name string) string {
383 return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
384 }