shithub: hugo

Download patch

ref: 77c60a3440806067109347d04eb5368b65ea0fe8
parent: 90af334c2192287b982238829db003848edd53c8
author: Bjørn Erik Pedersen <[email protected]>
date: Sun Jul 12 07:05:37 EDT 2015

Add RuneCount to Page

Fixes #1266

--- a/docs/content/templates/variables.md
+++ b/docs/content/templates/variables.md
@@ -47,6 +47,7 @@
 **.NextInSection** Pointer to the following content within the same section (based on pub date)<br>
 **.FuzzyWordCount** The approximate number of words in the content.<br>
 **.WordCount** The number of words in the content.<br>
+**.RuneCount** The number of [runes](http://blog.golang.org/strings) in the content, excluding any whitespace. This may be a good alternative to `.WordCount`  for Japanese and other CJK languages where a word-split by spaces makes no sense.
 **.ReadingTime** The estimated time it takes to read the content in minutes.<br>
 **.Weight** Assigned weight (in the front matter) to this content, used in sorting.<br>
 **.RawContent** Raw Markdown content without the metadata header. Useful with [remarkjs.com](http://remarkjs.com)<br>
--- a/helpers/general.go
+++ b/helpers/general.go
@@ -228,6 +228,11 @@
 	return hex.EncodeToString(h.Sum([]byte{}))
 }
 
+// IsWhitespace determines if the given rune is whitespace.
+func IsWhitespace(r rune) bool {
+	return r == ' ' || r == '\t' || r == '\n' || r == '\r'
+}
+
 // Seq creates a sequence of integers.
 // It's named and used as GNU's seq.
 // Examples:
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -66,6 +66,7 @@
 	contentShortCodes   map[string]string
 	plain               string // TODO should be []byte
 	plainWords          []string
+	plainRuneCount      int
 	plainInit           sync.Once
 	renderingConfig     *helpers.Blackfriday
 	renderingConfigInit sync.Once
@@ -108,10 +109,24 @@
 	return p.plainWords
 }
 
+// RuneCount returns the rune count, excluding any whitespace, of the plain content.
+func (p *Page) RuneCount() int {
+	p.initPlain()
+	return p.plainRuneCount
+}
+
 func (p *Page) initPlain() {
 	p.plainInit.Do(func() {
 		p.plain = helpers.StripHTML(string(p.Content))
 		p.plainWords = strings.Fields(p.plain)
+		runeCount := 0
+		for _, r := range p.plain {
+			if !helpers.IsWhitespace(r) {
+				runeCount++
+			}
+		}
+		p.plainRuneCount = runeCount
+		return
 	})
 }
 
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -140,6 +140,16 @@
 Some more text
 `
 
+	SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---
+title: Simple
+---
+
+
+€ € € € €
+
+
+`
+
 	SIMPLE_PAGE_WITH_LONG_CONTENT = `---
 title: Simple
 ---
@@ -468,6 +478,21 @@
 		t.Fatalf("Unable to prase page.")
 	}
 	checkPageDate(t, p, d)
+}
+
+func TestRuneCount(t *testing.T) {
+	p, _ := NewPage("simple.md")
+	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))
+	p.Convert()
+	p.analyzePage()
+	if err != nil {
+		t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
+	}
+
+	if p.RuneCount() != 5 {
+		t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 1, p.RuneCount())
+
+	}
 }
 
 func TestWordCount(t *testing.T) {