shithub: hugo

Download patch

ref: a9c5133a77c8490fad59807f44348214c7f3c954
parent: ea6ae769dc651c2a29e8fb3600fa8c10859f60dc
author: Bjørn Erik Pedersen <[email protected]>
date: Mon Jul 20 21:29:22 EDT 2015

Fix data races in sorting and Reverse

The custom sort functions used from the templates had some subtle data race- and related issues,
especially when used in the single page template.

This commit fixes this by making copies and protect the read and writes with a RWMutex.

The results are cached (it will typically be invoked *number of pages* times with exactly the same data).

This is, not surprisingly, also faster:

```
benchmark                           old ns/op     new ns/op     delta
BenchmarkSortByWeightAndReverse     14228         491           -96.55%

benchmark                           old allocs     new allocs     delta
BenchmarkSortByWeightAndReverse     1              0              -100.00%

benchmark                           old bytes     new bytes     delta
BenchmarkSortByWeightAndReverse     32            0             -100.00%
```

Fixes #1293

--- /dev/null
+++ b/hugolib/pageCache.go
@@ -1,0 +1,108 @@
+// Copyright © 2015 Steve Francia <[email protected]>.
+//
+// Licensed under the Simple Public License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://opensource.org/licenses/Simple-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+	"sync"
+)
+
+type pageCache struct {
+	sync.RWMutex
+	m map[string][][2]Pages
+}
+
+func newPageCache() *pageCache {
+	return &pageCache{m: make(map[string][][2]Pages)}
+}
+
+// get gets a Pages slice from the cache matching the given key and Pages slice.
+// If none found in cache, a copy of the supplied slice is created.
+//
+// If an apply func is provided, that func is applied to the newly created copy.
+//
+// The cache and the execution of the apply func is protected by a RWMutex.
+func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool) {
+	c.RLock()
+	if cached, ok := c.m[key]; ok {
+		for _, ps := range cached {
+			if probablyEqualPages(p, ps[0]) {
+				c.RUnlock()
+				return ps[1], true
+			}
+		}
+
+	}
+	c.RUnlock()
+
+	c.Lock()
+	defer c.Unlock()
+
+	// double-check
+	if cached, ok := c.m[key]; ok {
+		for _, ps := range cached {
+			if probablyEqualPages(p, ps[0]) {
+				return ps[1], true
+			}
+		}
+	}
+
+	pagesCopy := append(Pages(nil), p...)
+
+	if v, ok := c.m[key]; ok {
+		c.m[key] = append(v, [2]Pages{p, pagesCopy})
+	} else {
+		c.m[key] = [][2]Pages{[2]Pages{p, pagesCopy}}
+	}
+
+	if apply != nil {
+		apply(pagesCopy)
+	}
+
+	return pagesCopy, false
+
+}
+
+// "probably" as in: we do not compare every element for big slices, but that is
+// good enough for our use case.
+// TODO(bep) there is a similar method in pagination.go. DRY.
+func probablyEqualPages(p1, p2 Pages) bool {
+	if p1 == nil && p2 == nil {
+		return true
+	}
+
+	if p1 == nil || p2 == nil {
+		return false
+	}
+
+	if p1.Len() != p2.Len() {
+		return false
+	}
+
+	if p1.Len() == 0 {
+		return true
+	}
+
+	step := 1
+
+	if len(p1) >= 50 {
+		step = len(p1) / 10
+	}
+
+	for i := 0; i < len(p1); i += step {
+		if p1[i] != p2[i] {
+			return false
+		}
+	}
+	return true
+}
--- /dev/null
+++ b/hugolib/pageCache_test.go
@@ -1,0 +1,60 @@
+package hugolib
+
+import (
+	"fmt"
+	"github.com/stretchr/testify/assert"
+	"sync"
+	"sync/atomic"
+	"testing"
+)
+
+func TestPageCache(t *testing.T) {
+	c1 := newPageCache()
+
+	changeFirst := func(p Pages) {
+		p[0].Description = "changed"
+	}
+
+	var o1 uint64 = 0
+	var o2 uint64 = 0
+
+	var wg sync.WaitGroup
+
+	var l1 sync.Mutex
+	var l2 sync.Mutex
+
+	var testPageSets []Pages
+
+	for j := 0; j < 50; j++ {
+		testPageSets = append(testPageSets, createSortTestPages(j+1))
+	}
+
+	for i := 0; i < 100; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for j, pages := range testPageSets {
+				msg := fmt.Sprintf("Go %d %d %d %d", i, j, o1, o2)
+				l1.Lock()
+				p, c := c1.get("k1", pages, nil)
+				assert.Equal(t, !atomic.CompareAndSwapUint64(&o1, uint64(j), uint64(j+1)), c, "c1: "+msg)
+				l1.Unlock()
+				p2, c2 := c1.get("k1", p, nil)
+				assert.True(t, c2)
+				assert.True(t, probablyEqualPages(p, p2))
+				assert.True(t, probablyEqualPages(p, pages))
+				assert.NotNil(t, p, msg)
+
+				l2.Lock()
+				p3, c3 := c1.get("k2", pages, changeFirst)
+				assert.Equal(t, !atomic.CompareAndSwapUint64(&o2, uint64(j), uint64(j+1)), c3, "c3: "+msg)
+				l2.Unlock()
+				assert.NotNil(t, p3, msg)
+				assert.Equal(t, p3[0].Description, "changed", msg)
+			}
+		}()
+	}
+
+	wg.Wait()
+
+}
--- a/hugolib/pageSort.go
+++ b/hugolib/pageSort.go
@@ -17,6 +17,8 @@
 	"sort"
 )
 
+var spc = newPageCache()
+
 /*
  * Implementation of a custom sorter for Pages
  */
@@ -62,60 +64,121 @@
 	return p
 }
 
+// ByWeight sorts the Pages by weight and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByWeight() Pages {
-	PageBy(DefaultPageSort).Sort(p)
-	return p
+	key := "pageSort.ByWeight"
+	pages, _ := spc.get(key, p, PageBy(DefaultPageSort).Sort)
+	return pages
 }
 
+// ByTitle sorts the Pages by title and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByTitle() Pages {
+
+	key := "pageSort.ByTitle"
+
 	title := func(p1, p2 *Page) bool {
 		return p1.Title < p2.Title
 	}
 
-	PageBy(title).Sort(p)
-	return p
+	pages, _ := spc.get(key, p, PageBy(title).Sort)
+	return pages
 }
 
+// ByLinkTitle sorts the Pages by link title and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByLinkTitle() Pages {
+
+	key := "pageSort.ByLinkTitle"
+
 	linkTitle := func(p1, p2 *Page) bool {
 		return p1.linkTitle < p2.linkTitle
 	}
 
-	PageBy(linkTitle).Sort(p)
-	return p
+	pages, _ := spc.get(key, p, PageBy(linkTitle).Sort)
+
+	return pages
 }
 
+// ByDate sorts the Pages by date and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByDate() Pages {
+
+	key := "pageSort.ByDate"
+
 	date := func(p1, p2 *Page) bool {
 		return p1.Date.Unix() < p2.Date.Unix()
 	}
 
-	PageBy(date).Sort(p)
-	return p
+	pages, _ := spc.get(key, p, PageBy(date).Sort)
+
+	return pages
 }
 
+// ByPublishDate sorts the Pages by publish date and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByPublishDate() Pages {
+
+	key := "pageSort.ByPublishDate"
+
 	pubDate := func(p1, p2 *Page) bool {
 		return p1.PublishDate.Unix() < p2.PublishDate.Unix()
 	}
 
-	PageBy(pubDate).Sort(p)
-	return p
+	pages, _ := spc.get(key, p, PageBy(pubDate).Sort)
+
+	return pages
 }
 
+// ByLength sorts the Pages by length and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) ByLength() Pages {
+
+	key := "pageSort.ByLength"
+
 	length := func(p1, p2 *Page) bool {
 		return len(p1.Content) < len(p2.Content)
 	}
 
-	PageBy(length).Sort(p)
-	return p
+	pages, _ := spc.get(key, p, PageBy(length).Sort)
+
+	return pages
 }
 
+// Reverse reverses the order in Pages and returns a copy.
+//
+// Adjacent invocactions on the same receiver will return a cached result.
+//
+// This may safely be executed  in parallel.
 func (p Pages) Reverse() Pages {
-	for i, j := 0, len(p)-1; i < j; i, j = i+1, j-1 {
-		p[i], p[j] = p[j], p[i]
+	key := "pageSort.Reverse"
+
+	reverseFunc := func(pages Pages) {
+		for i, j := 0, len(pages)-1; i < j; i, j = i+1, j-1 {
+			pages[i], pages[j] = pages[j], pages[i]
+		}
 	}
 
-	return p
+	pages, _ := spc.get(key, p, reverseFunc)
+
+	return pages
 }
--- a/hugolib/pageSort_test.go
+++ b/hugolib/pageSort_test.go
@@ -10,12 +10,14 @@
 )
 
 func TestPageSortReverse(t *testing.T) {
-	p := createSortTestPages(10)
-	assert.Equal(t, 0, p[0].FuzzyWordCount)
-	assert.Equal(t, 9, p[9].FuzzyWordCount)
-	p = p.Reverse()
-	assert.Equal(t, 9, p[0].FuzzyWordCount)
-	assert.Equal(t, 0, p[9].FuzzyWordCount)
+	p1 := createSortTestPages(10)
+	assert.Equal(t, 0, p1[0].FuzzyWordCount)
+	assert.Equal(t, 9, p1[9].FuzzyWordCount)
+	p2 := p1.Reverse()
+	assert.Equal(t, 9, p2[0].FuzzyWordCount)
+	assert.Equal(t, 0, p2[9].FuzzyWordCount)
+	// cached
+	assert.True(t, probablyEqualPages(p2, p1.Reverse()))
 }
 
 func BenchmarkSortByWeightAndReverse(b *testing.B) {
@@ -51,6 +53,7 @@
 		}
 		pages[i].FuzzyWordCount = i
 		pages[i].Weight = w
+		pages[i].Description = "initial"
 	}
 
 	return pages