shithub: hugo

--- a/commands/hugo.go

+++ b/commands/hugo.go

@@ -168,6 +168,7 @@

 	viper.SetDefault("RSSUri", "index.xml")

 	viper.SetDefault("SectionPagesMenu", "")

 	viper.SetDefault("DisablePathToLower", false)

+	viper.SetDefault("HasCJKLanguage", false)

 // InitializeConfig initializes a config file with sensible default configuration flags.

--- a/helpers/content.go

+++ b/helpers/content.go

@@ -19,9 +19,9 @@

 import (

 	"bytes"

-	"unicode/utf8"

 	"html/template"

 	"os/exec"

+	"unicode/utf8"

 	"github.com/miekg/mmark"

 	"github.com/russross/blackfriday"

@@ -178,7 +178,6 @@

 func getMarkdownExtensions(ctx *RenderingContext) int {

 	flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |

 		blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |

@@ -385,61 +384,51 @@

 	return strings.Join(words[:max], " ")

-// TruncateWordsToWholeSentence takes content and an int

-// and returns entire sentences from content, delimited by the int

-// and whether it's truncated or not.

-func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {

+func TruncateWordsByRune(words []string, max int) (string, bool) {

 	count := 0

-	index, word := 0, ""

-	truncated := false

-	for index, word = range words {

+	for index, word := range words {

+		if count >= max {

+			return strings.Join(words[:index], " "), true

+		}

 		runeCount := utf8.RuneCountInString(word)

 		if len(word) == runeCount {

-			count++;

+			count++

+		} else if count+runeCount < max {

+			count += runeCount

 		} else {

-			if count + runeCount <= max {

-				count += runeCount

-			} else {

-				offset := 0

-				for count < max {

-					_, width := utf8.DecodeRuneInString(word[offset:])

-			        offset += width

+			for ri, _ := range word {

+				if count >= max {

+					truncatedWords := append(words[:index], word[:ri])

+					return strings.Join(truncatedWords, " "), true

+				} else {

 					count++

-				words[index] = word[:offset]

-				truncated = true

-		if count >= max {

-			if index < len(words) - 1 {

-				truncated = true

-			}

-			break

-		}

-	index += 1

-	if index < len(words) {

-		for counter, word := range words[index:] {

-			if len(word) != utf8.RuneCountInString(word) {

-				break

-			}

-			if strings.HasSuffix(word, ".") ||

-				strings.HasSuffix(word, "?") ||

-				strings.HasSuffix(word, ".\"") ||

-				strings.HasSuffix(word, "!") {

-				upper := index + counter + 1

-				return strings.Join(words[:upper], " "), (upper < len(words))

-			}

+	return strings.Join(words, " "), false

+}

+// TruncateWordsToWholeSentence takes content and an int

+// and returns entire sentences from content, delimited by the int

+// and whether it's truncated or not.

+func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {

+	if max >= len(words) {

+		return strings.Join(words, " "), false

+	}

+	for counter, word := range words[max:] {

+		if strings.HasSuffix(word, ".") ||

+			strings.HasSuffix(word, "?") ||

+			strings.HasSuffix(word, ".\"") ||

+			strings.HasSuffix(word, "!") {

+			upper := max + counter + 1

+			return strings.Join(words[:upper], " "), (upper < len(words))

-	} else if index > len(words) {

-		return strings.Join(words, " "), truncated

-	return strings.Join(words[:index], " "), truncated

+	return strings.Join(words[:max], " "), true

 // GetAsciidocContent calls asciidoctor or asciidoc as an external helper

--- a/helpers/content_test.go

+++ b/helpers/content_test.go

@@ -1,10 +1,11 @@

 package helpers

 import (

-	"github.com/stretchr/testify/assert"

 	"html/template"

 	"strings"

 	"testing"

+	"github.com/stretchr/testify/assert"

 const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"

@@ -54,8 +55,6 @@

 		{"a b c", "a b c", 12, false},

 		{"a b c", "a b c", 3, false},

 		{"a", "a", 1, false},

-		{"Hello 中国", "Hello 中", 2, true},

-		{"Hello 中国", "Hello 中国", 3, false},

 		{"This is a sentence.", "This is a sentence.", 5, false},

 		{"This is also a sentence!", "This is also a sentence!", 1, false},

 		{"To be. Or not to be. That's the question.", "To be.", 1, true},

@@ -63,6 +62,39 @@

 	for i, d := range data {

 		output, truncated := TruncateWordsToWholeSentence(strings.Fields(d.input), d.max)

+		if d.expected != output {

+			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)

+		}

+		if d.truncated != truncated {

+			t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)

+		}

+	}

+}

+func TestTruncateWordsByRune(t *testing.T) {

+	type test struct {

+		input, expected string

+		max             int

+		truncated       bool

+	}

+	data := []test{

+		{"", "", 1, false},

+		{"a b c", "a b c", 12, false},

+		{"a b c", "a b c", 3, false},

+		{"a", "a", 1, false},

+		{"Hello 中国", "", 0, true},

+		{"这是中文，全中文。", "这是中文，", 5, true},

+		{"Hello 中国", "Hello 中", 2, true},

+		{"Hello 中国", "Hello 中国", 3, false},

+		{"Hello中国 Good 好的", "Hello中国 Good 好", 9, true},

+		{"This is a sentence.", "This is", 2, true},

+		{"This is also a sentence!", "This", 1, true},

+		{"To be. Or not to be. That's the question.", "To be. Or not", 4, true},

+		{" \nThis is    not a sentence\n ", "This is not", 3, true},

+	}

+	for i, d := range data {

+		output, truncated := TruncateWordsByRune(strings.Fields(d.input), d.max)

 		if d.expected != output {

 			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)

--- a/hugolib/page.go

+++ b/hugolib/page.go

@@ -28,6 +28,7 @@

 	"net/url"

 	"path"

 	"path/filepath"

+	"regexp"

 	"strings"

 	"sync"

 	"time"

@@ -42,6 +43,10 @@

 	"github.com/spf13/viper"

+var (

+	cjk = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`)

+)

 type Page struct {

 	Params          map[string]interface{}

 	Content         template.HTML

@@ -67,7 +72,6 @@

 	contentShortCodes   map[string]string

 	plain               string // TODO should be []byte

 	plainWords          []string

-	plainRuneCount      int

 	plainInit           sync.Once

 	plainSecondaryInit  sync.Once

 	renderingConfig     *helpers.Blackfriday

@@ -78,6 +82,7 @@

 	Node

 	pageMenus     PageMenus

 	pageMenusInit sync.Once

+	isCJKLanguage bool

 type Source struct {

@@ -111,12 +116,6 @@

 	return p.plainWords

-// RuneCount returns the rune count, excluding any whitespace, of the plain content.

-func (p *Page) RuneCount() int {

-	p.initPlainSecondary()

-	return p.plainRuneCount

-}

 func (p *Page) initPlain() {

 	p.plainInit.Do(func() {

 		p.plain = helpers.StripHTML(string(p.Content))

@@ -125,20 +124,6 @@

})

-func (p *Page) initPlainSecondary() {

-	p.plainSecondaryInit.Do(func() {

-		p.initPlain()

-		runeCount := 0

-		for _, r := range p.plain {

-			if !helpers.IsWhitespace(r) {

-				runeCount++

-			}

-		}

-		p.plainRuneCount = runeCount

-		return

-	})

-}

 func (p *Page) IsNode() bool {

 	return false

@@ -218,7 +203,13 @@

 	} else {

 		// If hugo defines split:

 		// render, strip html, then split

-		summary, truncated := helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)

+		var summary string

+		var truncated bool

+		if p.isCJKLanguage {

+			summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength)

+		} else {

+			summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)

+		}

 		p.Summary = template.HTML(summary)

 		p.Truncated = truncated

@@ -363,18 +354,27 @@

 func (p *Page) analyzePage() {

-	p.WordCount = 0

-	for _, word := range p.PlainWords() {

-		runeCount := utf8.RuneCountInString(word)

-		if len(word) == runeCount {

-			p.WordCount++

-		} else {

-			p.WordCount += runeCount

+	if p.isCJKLanguage {

+		p.WordCount = 0

+		for _, word := range p.PlainWords() {

+			runeCount := utf8.RuneCountInString(word)

+			if len(word) == runeCount {

+				p.WordCount++

+			} else {

+				p.WordCount += runeCount

+			}

+	} else {

+		p.WordCount = len(p.PlainWords())

 	p.FuzzyWordCount = int((p.WordCount+100)/100) * 100

-	p.ReadingTime = int((p.WordCount + 212) / 213)

+	if p.isCJKLanguage {

+		p.ReadingTime = int((p.WordCount + 500) / 501)

+	} else {

+		p.ReadingTime = int((p.WordCount + 212) / 213)

+	}

 func (p *Page) permalink() (*url.URL, error) {

@@ -481,7 +481,7 @@

 	m := f.(map[string]interface{})

 	var err error

-	var draft, published *bool

+	var draft, published, isCJKLanguage *bool

 	for k, v := range m {

 		loki := strings.ToLower(k)

 		switch loki {

@@ -542,6 +542,9 @@

 			p.Status = cast.ToString(v)

 		case "sitemap":

 			p.Sitemap = parseSitemap(cast.ToStringMap(v))

+		case "iscjklanguage":

+			isCJKLanguage = new(bool)

+			*isCJKLanguage = cast.ToBool(v)

 		default:

 			// If not one of the explicit values, store in Params

 			switch vv := v.(type) {

@@ -596,6 +599,16 @@

 		p.Lastmod = p.Date

+	if isCJKLanguage != nil {

+		p.isCJKLanguage = *isCJKLanguage

+	} else if viper.GetBool("HasCJKLanguage") {

+		if cjk.Match(p.rawContent) {

+			p.isCJKLanguage = true

+		} else {

+			p.isCJKLanguage = false

+		}

+	}

 	return nil

@@ -766,6 +779,8 @@

 	p.renderable = psr.IsRenderable()

 	p.frontmatter = psr.FrontMatter()

+	p.rawContent = psr.Content()

 	meta, err := psr.Metadata()

 	if meta != nil {

 		if err != nil {

@@ -777,8 +792,6 @@

 			return err

-	p.rawContent = psr.Content()

 	return nil

--- a/hugolib/page_test.go

+++ b/hugolib/page_test.go

@@ -146,16 +146,67 @@

 Some more text

-	SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---

+	SIMPLE_PAGE_WITH_ALL_CJK_RUNES = `---

 title: Simple

---

 € € € € €

+你好

+도형이

+カテゴリー

+	SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES = `---

+title: Simple

+---

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+In Chinese, 好 means good.  In Chinese, 好 means good.

+More then 70 words.

+`

+	SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY = "In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good. " +

+		"In Chinese, 好 means good. In Chinese, 好 means good."

+	SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE = `---

+title: Simple

+isCJKLanguage: false

+---

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀 means good.

+In Chinese, 好的啊 means good.  In Chinese, 好的呀呀 means good enough.

+More then 70 words.

+`

+	SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY = "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +

+		"In Chinese, 好的啊 means good. In Chinese, 好的呀呀 means good enough."

 	SIMPLE_PAGE_WITH_LONG_CONTENT = `---

 title: Simple

---

@@ -584,9 +635,11 @@

 	checkPageDate(t, p, d)

-func TestRuneCount(t *testing.T) {

+func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) {

+	viper.Reset()

 	p, _ := NewPage("simple.md")

-	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))

+	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))

 	p.Convert()

 	p.analyzePage()

 	if err != nil {

@@ -593,9 +646,75 @@

 		t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)

-	if p.RuneCount() != 5 {

-		t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 5, p.RuneCount())

+	if p.WordCount != 8 {

+		t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 8, p.WordCount)

+	}

+}

+func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) {

+	viper.Reset()

+	defer viper.Reset()

+	viper.Set("HasCJKLanguage", true)

+	p, _ := NewPage("simple.md")

+	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))

+	p.Convert()

+	p.analyzePage()

+	if err != nil {

+		t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)

+	}

+	if p.WordCount != 15 {

+		t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 15, p.WordCount)

+	}

+}

+func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) {

+	viper.Reset()

+	defer viper.Reset()

+	viper.Set("HasCJKLanguage", true)

+	p, _ := NewPage("simple.md")

+	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES))

+	p.Convert()

+	p.analyzePage()

+	if err != nil {

+		t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)

+	}

+	if p.WordCount != 74 {

+		t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 74, p.WordCount)

+	}

+	if p.Summary != SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY {

+		t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,

+			SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY, p.Summary)

+	}

+}

+func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {

+	viper.Reset()

+	defer viper.Reset()

+	viper.Set("HasCJKLanguage", true)

+	p, _ := NewPage("simple.md")

+	_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE))

+	p.Convert()

+	p.analyzePage()

+	if err != nil {

+		t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)

+	}

+	if p.WordCount != 75 {

+		t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 75, p.WordCount)

+	}

+	if p.Summary != SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY {

+		t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,

+			SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY, p.Summary)