shithub: hugo

--- a/commands/convert.go

+++ b/commands/convert.go

@@ -215,7 +215,7 @@

 func parseContentFile(r io.Reader) (parsedFile, error) {

 	var pf parsedFile

-	psr, err := pageparser.Parse(r)

+	psr, err := pageparser.Parse(r, pageparser.Config{})

 	if err != nil {

 		return pf, err

--- a/helpers/emoji.go

+++ b/helpers/emoji.go

@@ -30,6 +30,12 @@

 	emojiMaxSize   int

+// Emoji returns the emojy given a key, e.g. ":smile:", nil if not found.

+func Emoji(key string) []byte {

+	emojiInit.Do(initEmoji)

+	return emojis[key]

+}

 // Emojify "emojifies" the input source.

 // Note that the input byte slice will be modified if needed.

 // See http://www.emoji-cheat-sheet.com/

--- a/hugolib/page_content.go

+++ b/hugolib/page_content.go

@@ -17,6 +17,8 @@

 	"bytes"

 	"io"

+	"github.com/gohugoio/hugo/helpers"

 	errors "github.com/pkg/errors"

 	bp "github.com/gohugoio/hugo/bufferpool"

@@ -149,6 +151,12 @@

 			result.WriteString(placeHolder)

 			ordinal++

 			s.shortcodes.Add(placeHolder, currShortcode)

+		case it.Type == pageparser.TypeEmoji:

+			if emoji := helpers.Emoji(it.ValStr()); emoji != nil {

+				result.Write(emoji)

+			} else {

+				result.Write(it.Val)

+			}

 		case it.IsEOF():

 			break Loop

 		case it.IsError():

@@ -170,7 +178,10 @@

 func (p *Page) parse(reader io.Reader) error {

-	parseResult, err := pageparser.Parse(reader)

+	parseResult, err := pageparser.Parse(

+		reader,

+		pageparser.Config{EnableEmoji: p.s.Cfg.GetBool("enableEmoji")},

+	)

 	if err != nil {

 		return err

--- a/hugolib/page_test.go

+++ b/hugolib/page_test.go

@@ -1497,6 +1497,45 @@

 	checkPageTitle(t, p, "Simple")

+func TestPageWithEmoji(t *testing.T) {

+	for _, enableEmoji := range []bool{true, false} {

+		v := viper.New()

+		v.Set("enableEmoji", enableEmoji)

+		b := newTestSitesBuilder(t)

+		b.WithViper(v)

+		b.WithSimpleConfigFile()

+		b.WithContent("page-emoji.md", `---

+title: "Hugo Smile"

+---

+This is a :smile:.

+<!--more-->

+Another :smile: This is :not: an emoji.

+`)

+		b.CreateSites().Build(BuildCfg{})

+		if enableEmoji {

+			b.AssertFileContent("public/page-emoji/index.html",

+				"This is a 😄",

+				"Another 😄",

+				"This is :not: an emoji",

+			)

+		} else {

+			b.AssertFileContent("public/page-emoji/index.html",

+				"This is a :smile:",

+				"Another :smile:",

+				"This is :not: an emoji",

+			)

+		}

+	}

+}

 // https://github.com/gohugoio/hugo/issues/5381

 func TestPageManualSummary(t *testing.T) {

 	b := newTestSitesBuilder(t)

--- a/hugolib/pagebundler_handlers.go

+++ b/hugolib/pagebundler_handlers.go

@@ -272,10 +272,6 @@

 		p := ctx.currentPage

-		if c.s.Cfg.GetBool("enableEmoji") {

-			p.workContent = helpers.Emojify(p.workContent)

-		}

 		p.workContent = p.renderContent(p.workContent)

 		tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)

--- a/hugolib/shortcode.go

+++ b/hugolib/shortcode.go

@@ -177,6 +177,16 @@

 	pos       int // the position in bytes in the source file

+func (s shortcode) innerString() string {

+	var sb strings.Builder

+	for _, inner := range s.inner {

+		sb.WriteString(inner.(string))

+	}

+	return sb.String()

+}

 func (sc shortcode) String() string {

 	// for testing (mostly), so any change here will break tests!

 	var params interface{}

@@ -363,7 +373,7 @@

 	if sc.isInline {

 		templName := path.Join("_inline_shortcode", p.Path(), sc.name)

 		if sc.isClosing {

-			templStr := sc.inner[0].(string)

+			templStr := sc.innerString()

 			var err error

 			tmpl, err = p.s.TextTmpl.Parse(templName, templStr)

--- a/parser/pageparser/item.go

+++ b/parser/pageparser/item.go

@@ -113,6 +113,7 @@

 	TypeFrontMatterTOML

 	TypeFrontMatterJSON

 	TypeFrontMatterORG

+	TypeEmoji

 	TypeIgnore // // The BOM Unicode byte order marker and possibly others

 	// shortcode items

--- a/parser/pageparser/pagelexer.go

+++ b/parser/pageparser/pagelexer.go

@@ -37,6 +37,12 @@

 	start      int // item start position

 	width      int // width of last element

+	// Contains lexers for shortcodes and other main section

+	// elements.

+	sectionHandlers *sectionHandlers

+	cfg Config

 	// The summary divider to look for.

 	summaryDivider []byte

 	// Set when we have parsed any summary divider

@@ -60,13 +66,17 @@

+type Config struct {

+	EnableEmoji bool

+}

 // note: the input position here is normally 0 (start), but

 // can be set if position of first shortcode is known

-func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {

+func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer {

 	lexer := &pageLexer{

 		input:      input,

-		pos:        inputPosition,

 		stateStart: stateStart,

+		cfg:        cfg,

 		lexerShortcodeState: lexerShortcodeState{

 			currLeftDelimItem:  tLeftDelimScNoMarkup,

 			currRightDelimItem: tRightDelimScNoMarkup,

@@ -75,6 +85,8 @@

 		items: make([]Item, 0, 5),

+	lexer.sectionHandlers = createSectionHandlers(lexer)

 	return lexer

@@ -100,6 +112,8 @@

 	delimOrg          = []byte("#+")

 	htmlCommentStart  = []byte("<!--")

 	htmlCommentEnd    = []byte("-->")

+	emojiDelim = byte(':')

 func (l *pageLexer) next() rune {

@@ -132,6 +146,10 @@

 	l.start = l.pos

+func (l *pageLexer) isEOF() bool {

+	return l.pos >= len(l.input)

+}

 // special case, do not send '\\' back to client

 func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) {

 	val := bytes.Map(func(r rune) rune {

@@ -193,30 +211,80 @@

-func lexMainSection(l *pageLexer) stateFunc {

-	if l.isInHTMLComment {

-		return lexEndFromtMatterHTMLComment

+// lex a string starting at ":"

+func lexEmoji(l *pageLexer) stateFunc {

+	pos := l.pos + 1

+	valid := false

+	for i := pos; i < len(l.input); i++ {

+		if i > pos && l.input[i] == emojiDelim {

+			pos = i + 1

+			valid = true

+			break

+		}

+		r, _ := utf8.DecodeRune(l.input[i:])

+		if !isAlphaNumeric(r) {

+			break

+		}

-	// Fast forward as far as possible.

-	var l1, l2 int

+	if valid {

+		l.pos = pos

+		l.emit(TypeEmoji)

+	} else {

+		l.pos++

+		l.emit(tText)

+	}

-	if !l.summaryDividerChecked && l.summaryDivider != nil {

-		l1 = l.index(l.summaryDivider)

-		if l1 == -1 {

-			l.summaryDividerChecked = true

+	return lexMainSection

+}

+type sectionHandlers struct {

+	l *pageLexer

+	// Set when none of the sections are found so we

+	// can safely stop looking and skip to the end.

+	skipAll bool

+	handlers    []*sectionHandler

+	skipIndexes []int

+}

+func (s *sectionHandlers) skip() int {

+	if s.skipAll {

+		return -1

+	}

+	s.skipIndexes = s.skipIndexes[:0]

+	var shouldSkip bool

+	for _, skipper := range s.handlers {

+		idx := skipper.skip()

+		if idx != -1 {

+			shouldSkip = true

+			s.skipIndexes = append(s.skipIndexes, idx)

-	l2 = l.index(leftDelimSc)

-	skip := minIndex(l1, l2)

-	if skip > 0 {

-		l.pos += skip

+	if !shouldSkip {

+		s.skipAll = true

+		return -1

-	for {

-		if l.isShortCodeStart() {

+	return minIndex(s.skipIndexes...)

+}

+func createSectionHandlers(l *pageLexer) *sectionHandlers {

+	shortCodeHandler := &sectionHandler{

+		l: l,

+		skipFunc: func(l *pageLexer) int {

+			return l.index(leftDelimSc)

+		},

+		lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {

+			if !l.isShortCodeStart() {

+				return origin, false

+			}

 			if l.isInline {

 				// If we're inside an inline shortcode, the only valid shortcode markup is

 				// the markup which closes it.

@@ -225,14 +293,11 @@

 				if end != len(l.input)-1 {

 					b = bytes.TrimSpace(b[end+1:])

 					if end == -1 || !bytes.HasPrefix(b, []byte(l.currShortcodeName+" ")) {

-						return l.errorf("inline shortcodes do not support nesting")

+						return l.errorf("inline shortcodes do not support nesting"), true

-			if l.pos > l.start {

-				l.emit(tText)

-			}

 			if l.hasPrefix(leftDelimScWithMarkup) {

 				l.currLeftDelimItem = tLeftDelimScWithMarkup

 				l.currRightDelimItem = tRightDelimScWithMarkup

@@ -240,32 +305,139 @@

 				l.currLeftDelimItem = tLeftDelimScNoMarkup

 				l.currRightDelimItem = tRightDelimScNoMarkup

-			return lexShortcodeLeftDelim

-		}

-		if !l.summaryDividerChecked && l.summaryDivider != nil {

-			if l.hasPrefix(l.summaryDivider) {

-				if l.pos > l.start {

-					l.emit(tText)

-				}

-				l.summaryDividerChecked = true

-				l.pos += len(l.summaryDivider)

-				// This makes it a little easier to reason about later.

-				l.consumeSpace()

-				l.emit(TypeLeadSummaryDivider)

+			return lexShortcodeLeftDelim, true

+		},

+	}

-				// We have already moved to the next.

-				continue

+	summaryDividerHandler := &sectionHandler{

+		l: l,

+		skipFunc: func(l *pageLexer) int {

+			if l.summaryDividerChecked || l.summaryDivider == nil {

+				return -1

+			return l.index(l.summaryDivider)

+		},

+		lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {

+			if !l.hasPrefix(l.summaryDivider) {

+				return origin, false

+			}

+			l.summaryDividerChecked = true

+			l.pos += len(l.summaryDivider)

+			// This makes it a little easier to reason about later.

+			l.consumeSpace()

+			l.emit(TypeLeadSummaryDivider)

+			return origin, true

+		},

+	}

+	handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}

+	if l.cfg.EnableEmoji {

+		emojiHandler := &sectionHandler{

+			l: l,

+			skipFunc: func(l *pageLexer) int {

+				return l.indexByte(emojiDelim)

+			},

+			lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {

+				return lexEmoji, true

+			},

-		r := l.next()

-		if r == eof {

-			break

+		handlers = append(handlers, emojiHandler)

+	}

+	return &sectionHandlers{

+		l:           l,

+		handlers:    handlers,

+		skipIndexes: make([]int, len(handlers)),

+	}

+}

+func (s *sectionHandlers) lex(origin stateFunc) stateFunc {

+	if s.skipAll {

+		return nil

+	}

+	if s.l.pos > s.l.start {

+		s.l.emit(tText)

+	}

+	for _, handler := range s.handlers {

+		if handler.skipAll {

+			continue

+		next, handled := handler.lexFunc(origin, handler.l)

+		if next == nil || handled {

+			return next

+		}

+	// Not handled by the above.

+	s.l.pos++

+	return origin

+}

+type sectionHandler struct {

+	l *pageLexer

+	// No more sections of this type.

+	skipAll bool

+	// Returns the index of the next match, -1 if none found.

+	skipFunc func(l *pageLexer) int

+	// Lex lexes the current section and returns the next state func and

+	// a bool telling if this section was handled.

+	// Note that returning nil as the next state will terminate the

+	// lexer.

+	lexFunc func(origin stateFunc, l *pageLexer) (stateFunc, bool)

+}

+func (s *sectionHandler) skip() int {

+	if s.skipAll {

+		return -1

+	}

+	idx := s.skipFunc(s.l)

+	if idx == -1 {

+		s.skipAll = true

+	}

+	return idx

+}

+func lexMainSection(l *pageLexer) stateFunc {

+	if l.isEOF() {

+		return lexDone

+	}

+	if l.isInHTMLComment {

+		return lexEndFromtMatterHTMLComment

+	}

+	// Fast forward as far as possible.

+	skip := l.sectionHandlers.skip()

+	if skip == -1 {

+		l.pos = len(l.input)

+		return lexDone

+	} else if skip > 0 {

+		l.pos += skip

+	}

+	next := l.sectionHandlers.lex(lexMainSection)

+	if next != nil {

+		return next

+	}

+	l.pos = len(l.input)

 	return lexDone

@@ -297,8 +469,20 @@

 	return bytes.Index(l.input[l.pos:], sep)

+func (l *pageLexer) indexByte(sep byte) int {

+	return bytes.IndexByte(l.input[l.pos:], sep)

+}

 func (l *pageLexer) hasPrefix(prefix []byte) bool {

 	return bytes.HasPrefix(l.input[l.pos:], prefix)

+}

+func (l *pageLexer) hasPrefixByte(prefix byte) bool {

+	b := l.input[l.pos:]

+	if len(b) == 0 {

+		return false

+	}

+	return b[0] == prefix

 // helper functions

--- a/parser/pageparser/pageparser.go

+++ b/parser/pageparser/pageparser.go

@@ -27,7 +27,7 @@

 // Result holds the parse result.

 type Result interface {

-	// Iterator returns a new Iterator positioned at the benning of the parse tree.

+	// Iterator returns a new Iterator positioned at the beginning of the parse tree.

 	Iterator() *Iterator

 	// Input returns the input to Parse.

 	Input() []byte

@@ -35,25 +35,19 @@

 var _ Result = (*pageLexer)(nil)

-// Parse parses the page in the given reader.

-func Parse(r io.Reader) (Result, error) {

+// Parse parses the page in the given reader according to the given Config.

+func Parse(r io.Reader, cfg Config) (Result, error) {

 	b, err := ioutil.ReadAll(r)

 	if err != nil {

 		return nil, errors.Wrap(err, "failed to read page content")

-	return parseBytes(b)

+	return parseBytes(b, cfg)

-func parseBytes(b []byte) (Result, error) {

-	lexer := newPageLexer(b, 0, lexIntroSection)

+func parseBytes(b []byte, cfg Config) (Result, error) {

+	lexer := newPageLexer(b, lexIntroSection, cfg)

 	lexer.run()

 	return lexer, nil

-}

-func parseMainSection(input []byte, from int) Result {

-	lexer := newPageLexer(input, from, lexMainSection)

-	lexer.run()

-	return lexer

 // An Iterator has methods to iterate a parsed page with support going back

--- a/parser/pageparser/pageparser_intro_test.go

+++ b/parser/pageparser/pageparser_intro_test.go

@@ -88,8 +88,8 @@

-func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {

-	l := newPageLexer(input, 0, stateStart)

+func collectWithConfig(input []byte, skipFrontMatter bool, stateStart stateFunc, cfg Config) (items []Item) {

+	l := newPageLexer(input, stateStart, cfg)

 	l.run()

 	t := l.newIterator()

@@ -101,6 +101,13 @@

 	return

+}

+func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {

+	var cfg Config

+	return collectWithConfig(input, skipFrontMatter, stateStart, cfg)

 // no positional checking, for now ...

--- /dev/null

+++ b/parser/pageparser/pageparser_main_test.go

@@ -1,0 +1,40 @@

+// Copyright 2018 The Hugo Authors. All rights reserved.

+//

+// Licensed under the Apache License, Version 2.0 (the "License");

+// you may not use this file except in compliance with the License.

+// You may obtain a copy of the License at

+// http://www.apache.org/licenses/LICENSE-2.0

+//

+// Unless required by applicable law or agreed to in writing, software

+// distributed under the License is distributed on an "AS IS" BASIS,

+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+// See the License for the specific language governing permissions and

+// limitations under the License.

+package pageparser

+import (

+	"fmt"

+	"testing"

+)

+func TestMain(t *testing.T) {

+	t.Parallel()

+	var mainTests = []lexerTest{

+		{"emoji #1", "Some text with :emoji:", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},

+		{"emoji #2", "Some text with :emoji: and some text.", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},

+		{"looks like an emoji #1", "Some text and then :emoji", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},

+		{"looks like an emoji #2", "Some text and then ::", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},

+		{"looks like an emoji #3", ":Some :text", []Item{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},

+	}

+	for i, test := range mainTests {

+		items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})

+		if !equal(items, test.items) {

+			got := crLfReplacer.Replace(fmt.Sprint(items))

+			expected := crLfReplacer.Replace(fmt.Sprint(test.items))

+			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)

+		}

+	}

+}

--- a/parser/pageparser/pageparser_shortcode_test.go

+++ b/parser/pageparser/pageparser_shortcode_test.go

@@ -152,7 +152,8 @@

 	{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},

 	{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},

 	{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},

-	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tError, "inline shortcodes do not support nesting")}},

+	{"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},

+	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}},

 	{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},

@@ -171,10 +172,11 @@

 	for i, input := range shortCodeLexerTests {

 		testInputs[i] = []byte(input.input)

+	var cfg Config

 	b.ResetTimer()

 	for i := 0; i < b.N; i++ {

 		for _, input := range testInputs {

-			items := collect(input, true, lexMainSection)

+			items := collectWithConfig(input, true, lexMainSection, cfg)

 			if len(items) == 0 {

--- a/parser/pageparser/pageparser_test.go

+++ b/parser/pageparser/pageparser_test.go

@@ -34,10 +34,37 @@

 	input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))

+	cfg := Config{EnableEmoji: false}

 	b.ResetTimer()

 	for i := 0; i < b.N; i++ {

-		if _, err := parseBytes(input); err != nil {

+		if _, err := parseBytes(input, cfg); err != nil {

+			b.Fatal(err)

+		}

+	}

+}

+func BenchmarkParseWithEmoji(b *testing.B) {

+	start := `

+---

+title: "Front Matters"

+description: "It really does"

+---

+This is some summary. This is some summary. This is some summary. This is some summary.

+ <!--more-->

+`

+	input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))

+	cfg := Config{EnableEmoji: true}

+	b.ResetTimer()

+	for i := 0; i < b.N; i++ {

+		if _, err := parseBytes(input, cfg); err != nil {

 			b.Fatal(err)