hugo

Unnamed repository; edit this file 'description' to name the repository.

git clone git://git.shimmy1996.com/hugo.git
commit 66753416b5ec0f9f1be588a935d5551dfb5eebb9
parent 133e4bfbeee47bf6843fbcad90f14501f3d3a099
Author: Paul van Brouwershaven <vanbroup@users.noreply.github.com>
Date:   Thu,  2 Dec 2021 12:56:25 +0100

Make resources.Get use a file cache for remote resources

Closes #9228
Diffstat:
Mcache/filecache/filecache_config.go | 17++++++++++++-----
Mcache/filecache/filecache_config_test.go | 16+++++++++++-----
Mdocs/content/en/getting-started/configuration.md | 3+++
Mdocs/content/en/hugo-pipes/introduction.md | 10++++++++++
Mresources/resource_factories/create/create.go | 97+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
5 files changed, 94 insertions(+), 49 deletions(-)
diff --git a/cache/filecache/filecache_config.go b/cache/filecache/filecache_config.go
@@ -42,11 +42,12 @@ var defaultCacheConfig = Config{
 }
 
 const (
-	cacheKeyGetJSON = "getjson"
-	cacheKeyGetCSV  = "getcsv"
-	cacheKeyImages  = "images"
-	cacheKeyAssets  = "assets"
-	cacheKeyModules = "modules"
+	cacheKeyGetJSON  = "getjson"
+	cacheKeyGetCSV   = "getcsv"
+	cacheKeyImages   = "images"
+	cacheKeyAssets   = "assets"
+	cacheKeyModules  = "modules"
+	cacheGetResource = "getresource"
 )
 
 type Configs map[string]Config
@@ -70,6 +71,7 @@ var defaultCacheConfigs = Configs{
 		MaxAge: -1,
 		Dir:    resourcesGenDir,
 	},
+	cacheGetResource: defaultCacheConfig,
 }
 
 type Config struct {
@@ -111,6 +113,11 @@ func (f Caches) AssetsCache() *Cache {
 	return f[cacheKeyAssets]
 }
 
+// GetResourceCache gets the file cache for remote resources.
+func (f Caches) GetResourceCache() *Cache {
+	return f[cacheGetResource]
+}
+
 func DecodeConfig(fs afero.Fs, cfg config.Provider) (Configs, error) {
 	c := make(Configs)
 	valid := make(map[string]bool)
diff --git a/cache/filecache/filecache_config_test.go b/cache/filecache/filecache_config_test.go
@@ -50,7 +50,8 @@ maxAge = "11h"
 dir = "/path/to/c2"
 [caches.images]
 dir = "/path/to/c3"
-
+[caches.getResource]
+dir = "/path/to/c4"
 `
 
 	cfg, err := config.FromConfigString(configStr, "toml")
@@ -59,7 +60,7 @@ dir = "/path/to/c3"
 	decoded, err := DecodeConfig(fs, cfg)
 	c.Assert(err, qt.IsNil)
 
-	c.Assert(len(decoded), qt.Equals, 5)
+	c.Assert(len(decoded), qt.Equals, 6)
 
 	c2 := decoded["getcsv"]
 	c.Assert(c2.MaxAge.String(), qt.Equals, "11h0m0s")
@@ -68,6 +69,10 @@ dir = "/path/to/c3"
 	c3 := decoded["images"]
 	c.Assert(c3.MaxAge, qt.Equals, time.Duration(-1))
 	c.Assert(c3.Dir, qt.Equals, filepath.FromSlash("/path/to/c3/filecache/images"))
+
+	c4 := decoded["getresource"]
+	c.Assert(c4.MaxAge, qt.Equals, time.Duration(-1))
+	c.Assert(c4.Dir, qt.Equals, filepath.FromSlash("/path/to/c4/filecache/getresource"))
 }
 
 func TestDecodeConfigIgnoreCache(t *testing.T) {
@@ -94,7 +99,8 @@ maxAge = 3456
 dir = "/path/to/c2"
 [caches.images]
 dir = "/path/to/c3"
-
+[caches.getResource]
+dir = "/path/to/c4"
 `
 
 	cfg, err := config.FromConfigString(configStr, "toml")
@@ -103,7 +109,7 @@ dir = "/path/to/c3"
 	decoded, err := DecodeConfig(fs, cfg)
 	c.Assert(err, qt.IsNil)
 
-	c.Assert(len(decoded), qt.Equals, 5)
+	c.Assert(len(decoded), qt.Equals, 6)
 
 	for _, v := range decoded {
 		c.Assert(v.MaxAge, qt.Equals, time.Duration(0))
@@ -129,7 +135,7 @@ func TestDecodeConfigDefault(t *testing.T) {
 
 	c.Assert(err, qt.IsNil)
 
-	c.Assert(len(decoded), qt.Equals, 5)
+	c.Assert(len(decoded), qt.Equals, 6)
 
 	imgConfig := decoded[cacheKeyImages]
 	jsonConfig := decoded[cacheKeyGetJSON]
diff --git a/docs/content/en/getting-started/configuration.md b/docs/content/en/getting-started/configuration.md
@@ -657,6 +657,9 @@ maxAge = -1
 [caches.getcsv]
 dir = ":cacheDir/:project"
 maxAge = -1
+[caches.getresource]
+dir = ":cacheDir/:project"
+maxAge = -1
 [caches.images]
 dir = ":resourceDir/_gen"
 maxAge = -1
diff --git a/docs/content/en/hugo-pipes/introduction.md b/docs/content/en/hugo-pipes/introduction.md
@@ -55,6 +55,16 @@ You can also change the request method and set the request body:
 )}}
 ```
 
+#### Cache of remote resources
+
+Each downloaded URL will be cached in the default folder `$TMPDIR/hugo_cache/`. The variable `$TMPDIR` will be resolved to your system-dependent temporary directory.
+
+With the command-line flag `--cacheDir`, you can specify any folder on your system as a caching directory.
+
+You can also set `cacheDir` or `caches.getresource` in the [main configuration file][config].
+
+If you don't like caching at all, you can fully disable caching with the command line flag `--ignoreCache`.
+
 ### Asset publishing
 
 Assets will only be published (to `/public`) if `.Permalink` or `.RelPermalink` is used.
diff --git a/resources/resource_factories/create/create.go b/resources/resource_factories/create/create.go
@@ -16,12 +16,14 @@
 package create
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"mime"
 	"net/http"
+	"net/http/httputil"
 	"net/url"
 	"path"
 	"path/filepath"
@@ -32,6 +34,7 @@ import (
 
 	"github.com/gohugoio/hugo/hugofs"
 
+	"github.com/gohugoio/hugo/cache/filecache"
 	"github.com/gohugoio/hugo/common/hugio"
 	"github.com/gohugoio/hugo/common/maps"
 	"github.com/gohugoio/hugo/common/types"
@@ -45,8 +48,9 @@ import (
 // Client contains methods to create Resource objects.
 // tasks to Resource objects.
 type Client struct {
-	rs         *resources.Spec
-	httpClient *http.Client
+	rs               *resources.Spec
+	httpClient       *http.Client
+	cacheGetResource *filecache.Cache
 }
 
 // New creates a new Client with the given specification.
@@ -56,6 +60,7 @@ func New(rs *resources.Spec) *Client {
 		httpClient: &http.Client{
 			Timeout: 10 * time.Second,
 		},
+		cacheGetResource: rs.FileCaches.GetResourceCache(),
 	}
 }
 
@@ -156,10 +161,7 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc
 
 	resourceID := helpers.HashString(uri, options)
 
-	// This caches to memory and will, in server mode, not be evicted unless the resourceID changes
-	// or the server restarts.
-	// There is ongoing work to improve this.
-	return c.rs.ResourceCache.GetOrCreate(resourceID, func() (resource.Resource, error) {
+	_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
 		method, reqBody, err := getMethodAndBody(options)
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to get method or body for resource %s", uri)
@@ -187,51 +189,68 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc
 			return nil, errors.Errorf("failed to retrieve remote resource: %s", http.StatusText(res.StatusCode))
 		}
 
-		body, err := ioutil.ReadAll(res.Body)
+		httpResponse, err := httputil.DumpResponse(res, true)
 		if err != nil {
-			return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
+			return nil, err
 		}
 
-		filename := path.Base(rURL.Path)
-		if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
-			if _, ok := params["filename"]; ok {
-				filename = params["filename"]
-			}
-		}
+		return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	defer httpResponse.Close()
+
+	res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	body, err := ioutil.ReadAll(res.Body)
+	if err != nil {
+		return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
+	}
 
-		var contentType string
-		if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
-			contentType = arr[0]
+	filename := path.Base(rURL.Path)
+	if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
+		if _, ok := params["filename"]; ok {
+			filename = params["filename"]
 		}
+	}
 
-		// If content type was not determined by header, look for a file extention
-		if contentType == "" {
-			if ext := path.Ext(filename); ext != "" {
-				contentType = ext
-			}
+	var contentType string
+	if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
+		contentType = arr[0]
+	}
+
+	// If content type was not determined by header, look for a file extention
+	if contentType == "" {
+		if ext := path.Ext(filename); ext != "" {
+			contentType = ext
 		}
+	}
 
-		// If content type was not determined by header or file extention, try using content itself
-		if contentType == "" {
-			if ct := http.DetectContentType(body); ct != "application/octet-stream" {
-				if arr, _ := mime.ExtensionsByType(ct); arr != nil {
-					contentType = arr[0]
-				}
+	// If content type was not determined by header or file extention, try using content itself
+	if contentType == "" {
+		if ct := http.DetectContentType(body); ct != "application/octet-stream" {
+			if arr, _ := mime.ExtensionsByType(ct); arr != nil {
+				contentType = arr[0]
 			}
 		}
+	}
 
-		resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType
+	resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType
+
+	return c.rs.New(
+		resources.ResourceSourceDescriptor{
+			Fs:          c.rs.FileCaches.AssetsCache().Fs,
+			LazyPublish: true,
+			OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
+				return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
+			},
+			RelTargetFilename: filepath.Clean(resourceID),
+		})
 
-		return c.rs.New(
-			resources.ResourceSourceDescriptor{
-				Fs:          c.rs.FileCaches.AssetsCache().Fs,
-				LazyPublish: true,
-				OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
-					return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
-				},
-				RelTargetFilename: filepath.Clean(resourceID),
-			})
-	})
 }
 
 func addDefaultHeaders(req *http.Request, accepts ...string) {