shithub: hugo

Download patch

ref: 3c29c5af8ee865ef20741f576088e031e940c3d2
parent: 33502667fbacf57167ede66df8f13e308a4a9aec
author: Bjørn Erik Pedersen <[email protected]>
date: Wed Nov 14 07:06:46 EST 2018

cache/filecache: Add a cache prune func

Fixes #5439

--- a/cache/filecache/filecache.go
+++ b/cache/filecache/filecache.go
@@ -19,6 +19,7 @@
 	"io/ioutil"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/gohugoio/hugo/common/hugio"
@@ -44,9 +45,32 @@
 	// 0 is effectively turning this cache off.
 	maxAge time.Duration
 
-	nlocker *locker.Locker
+	nlocker *lockTracker
 }
 
+type lockTracker struct {
+	seenMu sync.RWMutex
+	seen   map[string]struct{}
+
+	*locker.Locker
+}
+
+// Lock tracks the ids in use. We use this information to do garbage collection
+// after a Hugo build.
+func (l *lockTracker) Lock(id string) {
+	l.seenMu.RLock()
+	if _, seen := l.seen[id]; !seen {
+		l.seenMu.RUnlock()
+		l.seenMu.Lock()
+		l.seen[id] = struct{}{}
+		l.seenMu.Unlock()
+	} else {
+		l.seenMu.RUnlock()
+	}
+
+	l.Locker.Lock(id)
+}
+
 // ItemInfo contains info about a cached file.
 type ItemInfo struct {
 	// This is the file's name relative to the cache's filesystem.
@@ -57,7 +81,7 @@
 func NewCache(fs afero.Fs, maxAge time.Duration) *Cache {
 	return &Cache{
 		Fs:      fs,
-		nlocker: locker.NewLocker(),
+		nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
 		maxAge:  maxAge,
 	}
 }
@@ -232,7 +256,7 @@
 			return nil
 		}
 
-		if time.Now().Sub(fi.ModTime()) > c.maxAge {
+		if c.isExpired(fi.ModTime()) {
 			c.Fs.Remove(id)
 			return nil
 		}
@@ -247,6 +271,10 @@
 	return f
 }
 
+func (c *Cache) isExpired(modTime time.Time) bool {
+	return c.maxAge >= 0 && time.Now().Sub(modTime) > c.maxAge
+}
+
 // For testing
 func (c *Cache) getString(id string) string {
 	id = cleanID(id)
@@ -254,13 +282,15 @@
 	c.nlocker.Lock(id)
 	defer c.nlocker.Unlock(id)
 
-	if r := c.getOrRemove(id); r != nil {
-		defer r.Close()
-		b, _ := ioutil.ReadAll(r)
-		return string(b)
+	f, err := c.Fs.Open(id)
+
+	if err != nil {
+		return ""
 	}
+	defer f.Close()
 
-	return ""
+	b, _ := ioutil.ReadAll(f)
+	return string(b)
 
 }
 
@@ -309,5 +339,5 @@
 }
 
 func cleanID(name string) string {
-	return filepath.Clean(name)
+	return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
 }
--- /dev/null
+++ b/cache/filecache/filecache_pruner.go
@@ -1,0 +1,80 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filecache
+
+import (
+	"io"
+	"os"
+
+	"github.com/pkg/errors"
+	"github.com/spf13/afero"
+)
+
+// Prune removes expired and unused items from this cache.
+// The last one requires a full build so the cache usage can be tracked.
+// Note that we operate directly on the filesystem here, so this is not
+// thread safe.
+func (c Caches) Prune() (int, error) {
+	counter := 0
+	for k, cache := range c {
+		err := afero.Walk(cache.Fs, "", func(name string, info os.FileInfo, err error) error {
+			if info == nil {
+				return nil
+			}
+
+			name = cleanID(name)
+
+			if info.IsDir() {
+				f, err := cache.Fs.Open(name)
+				if err != nil {
+					// This cache dir may not exist.
+					return nil
+				}
+				defer f.Close()
+				_, err = f.Readdirnames(1)
+				if err == io.EOF {
+					// Empty dir.
+					return cache.Fs.Remove(name)
+				}
+
+				return nil
+			}
+
+			shouldRemove := cache.isExpired(info.ModTime())
+
+			if !shouldRemove && len(cache.nlocker.seen) > 0 {
+				// Remove it if it's not been touched/used in the last build.
+				_, seen := cache.nlocker.seen[name]
+				shouldRemove = !seen
+			}
+
+			if shouldRemove {
+				err := cache.Fs.Remove(name)
+				if err == nil {
+					counter++
+				}
+				return err
+			}
+
+			return nil
+		})
+
+		if err != nil {
+			return counter, errors.Wrapf(err, "failed to prune cache %q", k)
+		}
+
+	}
+
+	return counter, nil
+}
--- /dev/null
+++ b/cache/filecache/filecache_pruner_test.go
@@ -1,0 +1,100 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filecache
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/gohugoio/hugo/config"
+	"github.com/gohugoio/hugo/hugofs"
+	"github.com/gohugoio/hugo/hugolib/paths"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestPrune(t *testing.T) {
+	t.Parallel()
+
+	assert := require.New(t)
+
+	configStr := `
+resourceDir = "myresources"
+[caches]
+[caches.getjson]
+maxAge = "200ms"
+dir = "/cache/c"
+
+`
+
+	cfg, err := config.FromConfigString(configStr, "toml")
+	assert.NoError(err)
+	fs := hugofs.NewMem(cfg)
+	p, err := paths.New(fs, cfg)
+	assert.NoError(err)
+
+	caches, err := NewCachesFromPaths(p)
+	assert.NoError(err)
+
+	jsonCache := caches.GetJSONCache()
+	for i := 0; i < 10; i++ {
+		id := fmt.Sprintf("i%d", i)
+		jsonCache.GetOrCreateBytes(id, func() ([]byte, error) {
+			return []byte("abc"), nil
+		})
+		if i == 4 {
+			// This will expire the first 5
+			time.Sleep(201 * time.Millisecond)
+		}
+	}
+
+	count, err := caches.Prune()
+	assert.NoError(err)
+	assert.Equal(5, count)
+
+	for i := 0; i < 10; i++ {
+		id := fmt.Sprintf("i%d", i)
+		v := jsonCache.getString(id)
+		if i < 5 {
+			assert.Equal("", v, id)
+		} else {
+			assert.Equal("abc", v, id)
+		}
+	}
+
+	caches, err = NewCachesFromPaths(p)
+	assert.NoError(err)
+	jsonCache = caches.GetJSONCache()
+	// Touch one and then prune.
+	jsonCache.GetOrCreateBytes("i5", func() ([]byte, error) {
+		return []byte("abc"), nil
+	})
+
+	count, err = caches.Prune()
+	assert.NoError(err)
+	assert.Equal(4, count)
+
+	// Now only the i5 should be left.
+	for i := 0; i < 10; i++ {
+		id := fmt.Sprintf("i%d", i)
+		v := jsonCache.getString(id)
+		if i != 5 {
+			assert.Equal("", v, id)
+		} else {
+			assert.Equal("abc", v, id)
+		}
+	}
+
+}
--- a/cache/filecache/filecache_test.go
+++ b/cache/filecache/filecache_test.go
@@ -209,3 +209,9 @@
 	}
 	wg.Wait()
 }
+
+func TestCleanID(t *testing.T) {
+	assert := require.New(t)
+	assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("/a/b//c.txt")))
+	assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("a/b//c.txt")))
+}
--- a/hugolib/prune_resources.go
+++ b/hugolib/prune_resources.go
@@ -1,4 +1,4 @@
-// Copyright 2017-present The Hugo Authors. All rights reserved.
+// Copyright 2018 The Hugo Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,89 +13,7 @@
 
 package hugolib
 
-import (
-	"io"
-	"os"
-	"strings"
-
-	"github.com/gohugoio/hugo/helpers"
-
-	"github.com/spf13/afero"
-)
-
-// GC requires a build first.
+// GC requires a build first and must run on it's own. It is not thread safe.
 func (h *HugoSites) GC() (int, error) {
-	s := h.Sites[0]
-	assetsCacheFs := h.Deps.FileCaches.AssetsCache().Fs
-	imageCacheFs := h.Deps.FileCaches.ImageCache().Fs
-
-	isImageInUse := func(name string) bool {
-		for _, site := range h.Sites {
-			if site.ResourceSpec.IsInImageCache(name) {
-				return true
-			}
-		}
-
-		return false
-	}
-
-	isAssetInUse := func(name string) bool {
-		// These assets are stored in tuplets with an added extension to the key.
-		key := strings.TrimSuffix(name, helpers.Ext(name))
-		for _, site := range h.Sites {
-			if site.ResourceSpec.ResourceCache.Contains(key) {
-				return true
-			}
-		}
-
-		return false
-	}
-
-	walker := func(fs afero.Fs, dirname string, inUse func(filename string) bool) (int, error) {
-		counter := 0
-		err := afero.Walk(fs, dirname, func(path string, info os.FileInfo, err error) error {
-			if info == nil {
-				return nil
-			}
-
-			if info.IsDir() {
-				f, err := fs.Open(path)
-				if err != nil {
-					return nil
-				}
-				defer f.Close()
-				_, err = f.Readdirnames(1)
-				if err == io.EOF {
-					// Empty dir.
-					s.Fs.Source.Remove(path)
-				}
-
-				return nil
-			}
-
-			inUse := inUse(path)
-			if !inUse {
-				err := fs.Remove(path)
-				if err != nil && !os.IsNotExist(err) {
-					s.Log.ERROR.Printf("Failed to remove %q: %s", path, err)
-				} else {
-					counter++
-				}
-			}
-			return nil
-		})
-
-		return counter, err
-	}
-
-	imageCounter, err1 := walker(imageCacheFs, "", isImageInUse)
-	assetsCounter, err2 := walker(assetsCacheFs, "", isAssetInUse)
-	totalCount := imageCounter + assetsCounter
-
-	if err1 != nil {
-		return totalCount, err1
-	}
-
-	return totalCount, err2
-
+	return h.Deps.FileCaches.Prune()
 }
--- a/tpl/data/resources_test.go
+++ b/tpl/data/resources_test.go
@@ -192,7 +192,10 @@
 	logger := loggers.NewErrorLogger()
 	p, _ := paths.New(fs, cfg)
 
-	fileCaches, _ := filecache.NewCachesFromPaths(p)
+	fileCaches, err := filecache.NewCachesFromPaths(p)
+	if err != nil {
+		panic(err)
+	}
 
 	return &deps.Deps{
 		Cfg:              cfg,