shithub: hugo

Download patch

ref: 4abaec5c045e92ae5f8b3a2dc66606b080ef6ea5
parent: bcd434794a28ff75a6e6504c6c3bada554ba88ce
author: Bjørn Erik Pedersen <[email protected]>
date: Wed Aug 17 02:37:19 EDT 2016

Improve TotalWords counter func

It is obviously more efficient when we do not care about the actual words.

```
BenchmarkTotalWords-4            100000         18795 ns/op           0 B/op           0 allocs/op
BenchmarkTotalWordsOld-4          30000         46751 ns/op        6400 B/op           1 allocs/op
```

--- a/helpers/content.go
+++ b/helpers/content.go
@@ -384,8 +384,25 @@
 	}
 }
 
-// TotalWords returns an int of the total number of words in a given content.
+// TotalWords counts instance of one or more consecutive white space
+// characters, as defined by unicode.IsSpace, in s.
+// This is a cheaper way of word counting than the obvious len(strings.Fields(s)).
 func TotalWords(s string) int {
+	n := 0
+	inWord := false
+	for _, r := range s {
+		wasInWord := inWord
+		inWord = !unicode.IsSpace(r)
+		if inWord && !wasInWord {
+			n++
+		}
+	}
+	return n
+}
+
+// Old implementation only kept for benchmark comparison.
+// TODO(bep) remove
+func totalWordsOld(s string) int {
 	return len(strings.Fields(s))
 }
 
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -408,12 +408,45 @@
 	}
 }
 
+var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200)
+
 func TestTotalWords(t *testing.T) {
-	testString := "Two, Words!"
-	actualWordCount := TotalWords(testString)
 
-	if actualWordCount != 2 {
-		t.Errorf("Actual word count (%d) for test string (%s) did not match 2.", actualWordCount, testString)
+	for i, this := range []struct {
+		s     string
+		words int
+	}{
+		{"Two, Words!", 2},
+		{"Word", 1},
+		{"", 0},
+		{"One, Two,      Three", 3},
+		{totalWordsBenchmarkString, 400},
+	} {
+		actualWordCount := TotalWords(this.s)
+
+		if actualWordCount != this.words {
+			t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words)
+		}
+	}
+}
+
+func BenchmarkTotalWords(b *testing.B) {
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		wordCount := TotalWords(totalWordsBenchmarkString)
+		if wordCount != 400 {
+			b.Fatal("Wordcount error")
+		}
+	}
+}
+
+func BenchmarkTotalWordsOld(b *testing.B) {
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		wordCount := totalWordsOld(totalWordsBenchmarkString)
+		if wordCount != 400 {
+			b.Fatal("Wordcount error")
+		}
 	}
 }
 
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -486,10 +486,6 @@
 }
 
 func (p *Page) analyzePage() {
-	// TODO(bep)
-	if true {
-		return
-	}
 	if p.isCJKLanguage {
 		p.WordCount = 0
 		for _, word := range p.PlainWords() {