ref: 77c60a3440806067109347d04eb5368b65ea0fe8
parent: 90af334c2192287b982238829db003848edd53c8
author: Bjørn Erik Pedersen <[email protected]>
date: Sun Jul 12 07:05:37 EDT 2015
Add RuneCount to Page Fixes #1266
--- a/docs/content/templates/variables.md
+++ b/docs/content/templates/variables.md
@@ -47,6 +47,7 @@
**.NextInSection** Pointer to the following content within the same section (based on pub date)<br>
**.FuzzyWordCount** The approximate number of words in the content.<br>
**.WordCount** The number of words in the content.<br>
+**.RuneCount** The number of [runes](http://blog.golang.org/strings) in the content, excluding any whitespace. This may be a good alternative to `.WordCount` for Japanese and other CJK languages where a word-split by spaces makes no sense.
**.ReadingTime** The estimated time it takes to read the content in minutes.<br>
**.Weight** Assigned weight (in the front matter) to this content, used in sorting.<br>
**.RawContent** Raw Markdown content without the metadata header. Useful with [remarkjs.com](http://remarkjs.com)<br>
--- a/helpers/general.go
+++ b/helpers/general.go
@@ -228,6 +228,11 @@
return hex.EncodeToString(h.Sum([]byte{}))
}
+// IsWhitespace determines if the given rune is whitespace.
+func IsWhitespace(r rune) bool {
+ return r == ' ' || r == '\t' || r == '\n' || r == '\r'
+}
+
// Seq creates a sequence of integers.
// It's named and used as GNU's seq.
// Examples:
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -66,6 +66,7 @@
contentShortCodes map[string]string
plain string // TODO should be []byte
plainWords []string
+ plainRuneCount int
plainInit sync.Once
renderingConfig *helpers.Blackfriday
renderingConfigInit sync.Once
@@ -108,10 +109,24 @@
return p.plainWords
}
+// RuneCount returns the rune count, excluding any whitespace, of the plain content.
+func (p *Page) RuneCount() int {
+ p.initPlain()
+ return p.plainRuneCount
+}
+
func (p *Page) initPlain() {
p.plainInit.Do(func() {
p.plain = helpers.StripHTML(string(p.Content))
p.plainWords = strings.Fields(p.plain)
+ runeCount := 0
+ for _, r := range p.plain {
+ if !helpers.IsWhitespace(r) {
+ runeCount++
+ }
+ }
+ p.plainRuneCount = runeCount
+ return
})
}
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -140,6 +140,16 @@
Some more text
`
+ SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---
+title: Simple
+---
+
+
+€ € € € €
+
+
+`
+
SIMPLE_PAGE_WITH_LONG_CONTENT = `---
title: Simple
---
@@ -468,6 +478,21 @@
t.Fatalf("Unable to prase page.")
}
checkPageDate(t, p, d)
+}
+
+func TestRuneCount(t *testing.T) {
+ p, _ := NewPage("simple.md")
+ _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))
+ p.Convert()
+ p.analyzePage()
+ if err != nil {
+ t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
+ }
+
+ if p.RuneCount() != 5 {
+ t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 1, p.RuneCount())
+
+ }
}
func TestWordCount(t *testing.T) {