shithub: hugo

--- a/helpers/content.go

+++ b/helpers/content.go

@@ -21,6 +21,7 @@

 	"bytes"

 	"html/template"

 	"os/exec"

+	"unicode"

 	"unicode/utf8"

 	"github.com/miekg/mmark"

@@ -424,10 +425,55 @@

 	return strings.Join(words, " "), false

-// TruncateWordsToWholeSentence takes content and an int

-// and returns entire sentences from content, delimited by the int

-// and whether it's truncated or not.

-func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {

+// TruncateWordsToWholeSentence takes content and truncates to whole sentence

+// limited by max number of words. It also returns whether it is truncated.

+func TruncateWordsToWholeSentence(s string, max int) (string, bool) {

+	var (

+		wordCount     = 0

+		lastWordIndex = -1

+	)

+	for i, r := range s {

+		if unicode.IsSpace(r) {

+			wordCount++

+			lastWordIndex = i

+			if wordCount >= max {

+				break

+			}

+		}

+	}

+	if lastWordIndex == -1 {

+		return s, false

+	}

+	endIndex := -1

+	for j, r := range s[lastWordIndex:] {

+		if isEndOfSentence(r) {

+			endIndex = j + lastWordIndex + utf8.RuneLen(r)

+			break

+		}

+	}

+	if endIndex == -1 {

+		return s, false

+	}

+	return strings.TrimSpace(s[:endIndex]), endIndex < len(s)

+}

+func isEndOfSentence(r rune) bool {

+	return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n'

+}

+// Kept only for benchmark.

+func truncateWordsToWholeSentenceOld(content string, max int) (string, bool) {

+	words := strings.Fields(content)

 	if max >= len(words) {

 		return strings.Join(words, " "), false

--- a/helpers/content_test.go

+++ b/helpers/content_test.go

@@ -64,6 +64,22 @@

 	assert.Equal(t, template.HTML("dobedobedo"), BytesToHTML([]byte("dobedobedo")))

+var benchmarkTruncateString = strings.Repeat("This is a sentence about nothing.", 20)

+func BenchmarkTestTruncateWordsToWholeSentence(b *testing.B) {

+	b.ResetTimer()

+	for i := 0; i < b.N; i++ {

+		TruncateWordsToWholeSentence(benchmarkTruncateString, SummaryLength)

+	}

+}

+func BenchmarkTestTruncateWordsToWholeSentenceOld(b *testing.B) {

+	b.ResetTimer()

+	for i := 0; i < b.N; i++ {

+		truncateWordsToWholeSentenceOld(benchmarkTruncateString, SummaryLength)

+	}

+}

 func TestTruncateWordsToWholeSentence(t *testing.T) {

 	type test struct {

 		input, expected string

@@ -77,10 +93,11 @@

 		{"This is a sentence.", "This is a sentence.", 5, false},

 		{"This is also a sentence!", "This is also a sentence!", 1, false},

 		{"To be. Or not to be. That's the question.", "To be.", 1, true},

-		{" \nThis is not a sentence\n ", "This is not a", 4, true},

+		{" \nThis is not a sentence\nAnd this is another", "This is not a sentence", 4, true},

+		{"", "", 10, false},

 	for i, d := range data {

-		output, truncated := TruncateWordsToWholeSentence(strings.Fields(d.input), d.max)

+		output, truncated := TruncateWordsToWholeSentence(d.input, d.max)

 		if d.expected != output {

 			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)

--- a/hugolib/page.go

+++ b/hugolib/page.go

@@ -89,6 +89,7 @@

 	plain               string // TODO should be []byte

 	plainWords          []string

 	plainInit           sync.Once

+	plainWordsInit      sync.Once

 	renderingConfig     *helpers.Blackfriday

 	renderingConfigInit sync.Once

 	pageMenus           PageMenus

@@ -147,7 +148,7 @@

 func (p *Page) PlainWords() []string {

-	p.initPlain()

+	p.initPlainWords()

 	return p.plainWords

@@ -154,11 +155,17 @@

 func (p *Page) initPlain() {

 	p.plainInit.Do(func() {

 		p.plain = helpers.StripHTML(string(p.Content))

-		p.plainWords = strings.Fields(p.plain)

 		return

})

+func (p *Page) initPlainWords() {

+	p.plainWordsInit.Do(func() {

+		p.plainWords = strings.Fields(p.Plain())

+		return

+	})

+}

 func (p *Page) IsNode() bool {

 	return false

@@ -335,7 +342,7 @@

 	if p.isCJKLanguage {

 		summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength)

 	} else {

-		summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)

+		summary, truncated = helpers.TruncateWordsToWholeSentence(p.Plain(), helpers.SummaryLength)

 	p.Summary = template.HTML(summary)

 	p.Truncated = truncated

@@ -479,6 +486,10 @@

 func (p *Page) analyzePage() {

+	// TODO(bep)

+	if true {

+		return

+	}

 	if p.isCJKLanguage {

 		p.WordCount = 0

 		for _, word := range p.PlainWords() {