shithub: hugo

Download patch

ref: dd45e6d7e5406991d8df3a2f9ba4c7e5ae039c34
parent: 4abaec5c045e92ae5f8b3a2dc66606b080ef6ea5
author: Bjørn Erik Pedersen <[email protected]>
date: Wed Aug 17 09:41:48 EDT 2016

Lazy calculate  WordCount, ReadingTime and FuzzyWordCount

This avoids having to execute these expensive operations for sites not using these values.

This commit sums up a set of wordcounting and autosummary related performance improvements.

The effect of these kind of depends on what features your site use, but a benchmark from 4 Hugo sites in the wild shows promise:

```
benchmark           old ns/op       new ns/op       delta
BenchmarkHugo-4     21293005843     20032857342     -5.92%

benchmark           old allocs     new allocs     delta
BenchmarkHugo-4     65290922       65186032       -0.16%

benchmark           old bytes      new bytes      delta
BenchmarkHugo-4     9771213416     9681866464     -0.91%
```

Closes #2378

--- a/helpers/content.go
+++ b/helpers/content.go
@@ -138,19 +138,28 @@
 	// Walk through the string removing all tags
 	b := bp.GetBuffer()
 	defer bp.PutBuffer(b)
-
-	inTag := false
+	var inTag, isSpace, wasSpace bool
 	for _, r := range s {
-		switch r {
-		case '<':
+		if !inTag {
+			isSpace = false
+		}
+
+		switch {
+		case r == '<':
 			inTag = true
-		case '>':
+		case r == '>':
 			inTag = false
+		case unicode.IsSpace(r):
+			isSpace = true
+			fallthrough
 		default:
-			if !inTag {
+			if !inTag && (!isSpace || (isSpace && !wasSpace)) {
 				b.WriteRune(r)
 			}
 		}
+
+		wasSpace = isSpace
+
 	}
 	return b.String()
 }
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -34,11 +34,22 @@
 	}
 	data := []test{
 		{"<h1>strip h1 tag <h1>", "strip h1 tag "},
-		{"<p> strip p tag </p>", " strip p tag \n"},
+		{"<p> strip p tag </p>", " strip p tag "},
 		{"</br> strip br<br>", " strip br\n"},
 		{"</br> strip br2<br />", " strip br2\n"},
 		{"This <strong>is</strong> a\nnewline", "This is a newline"},
 		{"No Tags", "No Tags"},
+		{`<p>Summary Next Line. 
+<figure >
+    
+        <img src="/not/real" />
+    
+    
+</figure>
+.
+More text here.</p>
+
+<p>Some more text</p>`, "Summary Next Line.  . More text here.\nSome more text\n"},
 	}
 	for i, d := range data {
 		output := StripHTML(d.input)
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -107,9 +107,10 @@
 	source.File
 }
 type PageMeta struct {
-	WordCount      int
-	FuzzyWordCount int
-	ReadingTime    int
+	wordCount      int
+	fuzzyWordCount int
+	readingTime    int
+	pageMetaInit   sync.Once
 	Weight         int
 }
 
@@ -485,28 +486,48 @@
 	return int64(len(p.rawContent)), nil
 }
 
+func (p *Page) WordCount() int {
+	p.analyzePage()
+	return p.wordCount
+}
+
+func (p *Page) ReadingTime() int {
+	p.analyzePage()
+	return p.readingTime
+}
+
+func (p *Page) FuzzyWordCount() int {
+	p.analyzePage()
+	return p.fuzzyWordCount
+}
+
 func (p *Page) analyzePage() {
-	if p.isCJKLanguage {
-		p.WordCount = 0
-		for _, word := range p.PlainWords() {
-			runeCount := utf8.RuneCountInString(word)
-			if len(word) == runeCount {
-				p.WordCount++
-			} else {
-				p.WordCount += runeCount
+	p.pageMetaInit.Do(func() {
+		if p.isCJKLanguage {
+			p.wordCount = 0
+			for _, word := range p.PlainWords() {
+				runeCount := utf8.RuneCountInString(word)
+				if len(word) == runeCount {
+					p.wordCount++
+				} else {
+					p.wordCount += runeCount
+				}
 			}
+		} else {
+			p.wordCount = helpers.TotalWords(p.Plain())
 		}
-	} else {
-		p.WordCount = len(p.PlainWords())
-	}
 
-	p.FuzzyWordCount = (p.WordCount + 100) / 100 * 100
+		// TODO(bep) is set in a test. Fix that.
+		if p.fuzzyWordCount == 0 {
+			p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
+		}
 
-	if p.isCJKLanguage {
-		p.ReadingTime = (p.WordCount + 500) / 501
-	} else {
-		p.ReadingTime = (p.WordCount + 212) / 213
-	}
+		if p.isCJKLanguage {
+			p.readingTime = (p.wordCount + 500) / 501
+		} else {
+			p.readingTime = (p.wordCount + 212) / 213
+		}
+	})
 }
 
 func (p *Page) permalink() (*url.URL, error) {
--- a/hugolib/pageSort_test.go
+++ b/hugolib/pageSort_test.go
@@ -95,11 +95,11 @@
 
 func TestPageSortReverse(t *testing.T) {
 	p1 := createSortTestPages(10)
-	assert.Equal(t, 0, p1[0].FuzzyWordCount)
-	assert.Equal(t, 9, p1[9].FuzzyWordCount)
+	assert.Equal(t, 0, p1[0].fuzzyWordCount)
+	assert.Equal(t, 9, p1[9].fuzzyWordCount)
 	p2 := p1.Reverse()
-	assert.Equal(t, 9, p2[0].FuzzyWordCount)
-	assert.Equal(t, 0, p2[9].FuzzyWordCount)
+	assert.Equal(t, 9, p2[0].fuzzyWordCount)
+	assert.Equal(t, 0, p2[9].fuzzyWordCount)
 	// cached
 	assert.True(t, probablyEqualPages(p2, p1.Reverse()))
 }
@@ -149,7 +149,7 @@
 		if i%2 == 0 {
 			w = 10
 		}
-		pages[i].FuzzyWordCount = i
+		pages[i].fuzzyWordCount = i
 		pages[i].Weight = w
 		pages[i].Description = "initial"
 	}
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -504,10 +504,13 @@
 }
 
 func normalizeContent(c string) string {
-	norm := strings.Replace(c, "\n", "", -1)
+	norm := c
+	norm = strings.Replace(norm, "\n", " ", -1)
 	norm = strings.Replace(norm, "    ", " ", -1)
 	norm = strings.Replace(norm, "   ", " ", -1)
 	norm = strings.Replace(norm, "  ", " ", -1)
+	norm = strings.Replace(norm, "p> ", "p>", -1)
+	norm = strings.Replace(norm, ">  <", "> <", -1)
 	return strings.TrimSpace(norm)
 }
 
@@ -710,8 +713,8 @@
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
 		checkPageTitle(t, p, "Simple")
-		checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. <figure > <img src=\"/not/real\" /> </figure>.\nMore text here.</p><p>Some more text</p>"), ext)
-		checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text", ext)
+		checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. \n<figure >\n    \n        <img src=\"/not/real\" />\n    \n    \n</figure>\n.\nMore text here.</p>\n\n<p>Some more text</p>\n"))
+		checkPageSummary(t, p, "Summary Next Line.  . More text here. Some more text")
 		checkPageType(t, p, "page")
 		checkPageLayout(t, p, "page/single.html", "_default/single.html", "theme/page/single.html", "theme/_default/single.html")
 	}
@@ -793,8 +796,8 @@
 	testCommonResetState()
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
-		if p.WordCount != 8 {
-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount)
+		if p.WordCount() != 8 {
+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount())
 		}
 	}
 
@@ -806,11 +809,10 @@
 	viper.Set("HasCJKLanguage", true)
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
-		if p.WordCount != 15 {
-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount)
+		if p.WordCount() != 15 {
+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount())
 		}
 	}
-
 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithAllCJKRunes)
 }
 
@@ -820,8 +822,8 @@
 	viper.Set("HasCJKLanguage", true)
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
-		if p.WordCount != 74 {
-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)
+		if p.WordCount() != 74 {
+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())
 		}
 
 		if p.Summary != simplePageWithMainEnglishWithCJKRunesSummary {
@@ -828,7 +830,6 @@
 			t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,
 				simplePageWithMainEnglishWithCJKRunesSummary, p.Summary)
 		}
-
 	}
 
 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithMainEnglishWithCJKRunes)
@@ -839,8 +840,8 @@
 	viper.Set("HasCJKLanguage", true)
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
-		if p.WordCount != 75 {
-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)
+		if p.WordCount() != 75 {
+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())
 		}
 
 		if p.Summary != simplePageWithIsCJKLanguageFalseSummary {
@@ -847,7 +848,6 @@
 			t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,
 				simplePageWithIsCJKLanguageFalseSummary, p.Summary)
 		}
-
 	}
 
 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithIsCJKLanguageFalse)
@@ -857,16 +857,16 @@
 func TestWordCount(t *testing.T) {
 
 	assertFunc := func(t *testing.T, ext string, p *Page) {
-		if p.WordCount != 483 {
-			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount)
+		if p.WordCount() != 483 {
+			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount())
 		}
 
-		if p.FuzzyWordCount != 500 {
-			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount)
+		if p.FuzzyWordCount() != 500 {
+			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount())
 		}
 
-		if p.ReadingTime != 3 {
-			t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime)
+		if p.ReadingTime() != 3 {
+			t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime())
 		}
 
 		checkTruncation(t, p, true, "long page")
--- a/hugolib/pagination_test.go
+++ b/hugolib/pagination_test.go
@@ -55,7 +55,7 @@
 			// first group 10 in weight
 			assert.Equal(t, 10, pg.Key)
 			for _, p := range pg.Pages {
-				assert.True(t, p.FuzzyWordCount%2 == 0) // magic test
+				assert.True(t, p.fuzzyWordCount%2 == 0) // magic test
 			}
 		}
 	} else {
@@ -70,7 +70,7 @@
 			// last should have 5 in weight
 			assert.Equal(t, 5, pg.Key)
 			for _, p := range pg.Pages {
-				assert.True(t, p.FuzzyWordCount%2 != 0) // magic test
+				assert.True(t, p.fuzzyWordCount%2 != 0) // magic test
 			}
 		}
 	} else {
@@ -443,10 +443,10 @@
 	page21, _ := f2.page(1)
 	page2Nil, _ := f2.page(3)
 
-	assert.Equal(t, 1, page11.FuzzyWordCount)
+	assert.Equal(t, 3, page11.fuzzyWordCount)
 	assert.Nil(t, page1Nil)
 
-	assert.Equal(t, 1, page21.FuzzyWordCount)
+	assert.Equal(t, 3, page21.fuzzyWordCount)
 	assert.Nil(t, page2Nil)
 }
 
@@ -468,7 +468,7 @@
 		if i%2 == 0 {
 			w = 10
 		}
-		pages[i].FuzzyWordCount = i
+		pages[i].fuzzyWordCount = i + 2
 		pages[i].Weight = w
 	}
 
--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@@ -33,6 +33,11 @@
 	"github.com/stretchr/testify/require"
 )
 
+func init() {
+	//There are expected ERROR logging in tests that produces a lot of noise.
+	jww.SetStdoutThreshold(jww.LevelCritical)
+}
+
 const (
 	pageSimpleTitle = `---
 title: simple template