shithub: hugo

Download patch

ref: 0e1fd78fb22e8a870ff3a922f36a9a4d0475c090
parent: c7521b3d672b8d857bfe698f021c498dd27226c9
author: coderzh <[email protected]>
date: Thu Sep 3 14:22:20 EDT 2015

WordCount Summary support UTF-8 string

--- a/helpers/content.go
+++ b/helpers/content.go
@@ -19,6 +19,7 @@
 
 import (
 	"bytes"
+	"unicode/utf8"
 	"html/template"
 	"os/exec"
 
@@ -386,21 +387,57 @@
 // and returns entire sentences from content, delimited by the int
 // and whether it's truncated or not.
 func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
-	if max >= len(words) {
-		return strings.Join(words, " "), false
+	count := 0
+	index, word := 0, ""
+	truncated := false
+	
+	for index, word = range words {
+		runeCount := utf8.RuneCountInString(word)
+		if len(word) == runeCount {
+			count++;
+		} else {
+			if count + runeCount <= max {
+				count += runeCount
+			} else {
+				offset := 0
+				for count < max {
+					_, width := utf8.DecodeRuneInString(word[offset:])
+			        offset += width
+					count++
+				}
+				words[index] = word[:offset]
+				truncated = true
+			}
+		}
+		
+		if count >= max {
+			if index < len(words) - 1 {
+				truncated = true	
+			}
+			break
+		}
 	}
-
-	for counter, word := range words[max:] {
-		if strings.HasSuffix(word, ".") ||
-			strings.HasSuffix(word, "?") ||
-			strings.HasSuffix(word, ".\"") ||
-			strings.HasSuffix(word, "!") {
-			upper := max + counter + 1
-			return strings.Join(words[:upper], " "), (upper < len(words))
+	
+	index += 1
+	
+	if index < len(words) {
+		for counter, word := range words[index:] {
+			if len(word) != utf8.RuneCountInString(word) {
+				break
+			}
+			if strings.HasSuffix(word, ".") ||
+				strings.HasSuffix(word, "?") ||
+				strings.HasSuffix(word, ".\"") ||
+				strings.HasSuffix(word, "!") {
+				upper := index + counter + 1
+				return strings.Join(words[:upper], " "), (upper < len(words))
+			}
 		}
+	} else if index > len(words) {
+		return strings.Join(words, " "), truncated
 	}
-
-	return strings.Join(words[:max], " "), true
+	
+	return strings.Join(words[:index], " "), truncated
 }
 
 // GetAsciidocContent calls asciidoctor or asciidoc as an external helper
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -54,6 +54,8 @@
 		{"a b c", "a b c", 12, false},
 		{"a b c", "a b c", 3, false},
 		{"a", "a", 1, false},
+		{"Hello 中国", "Hello 中", 2, true},
+		{"Hello 中国", "Hello 中国", 3, false},
 		{"This is a sentence.", "This is a sentence.", 5, false},
 		{"This is also a sentence!", "This is also a sentence!", 1, false},
 		{"To be. Or not to be. That's the question.", "To be.", 1, true},
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -31,6 +31,7 @@
 	"strings"
 	"sync"
 	"time"
+	"unicode/utf8"
 
 	"github.com/spf13/cast"
 	bp "github.com/spf13/hugo/bufferpool"
@@ -362,7 +363,16 @@
 }
 
 func (p *Page) analyzePage() {
-	p.WordCount = len(p.PlainWords())
+	p.WordCount = 0
+	for _, word := range p.PlainWords() {
+		runeCount := utf8.RuneCountInString(word)
+		if len(word) == runeCount {
+			p.WordCount++	
+		} else {
+			p.WordCount += runeCount
+		}
+	}
+	
 	p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
 	p.ReadingTime = int((p.WordCount + 212) / 213)
 }