shithub: hugo

--- a/parser/pageparser/item.go

+++ b/parser/pageparser/item.go

@@ -73,10 +73,10 @@

 		return i.Val

 	case i.typ > tKeywordMarker:

 		return fmt.Sprintf("<%s>", i.Val)

-	case len(i.Val) > 20:

-		return fmt.Sprintf("%.20q...", i.Val)

+	case len(i.Val) > 50:

+		return fmt.Sprintf("%v:%.20q...", i.typ, i.Val)

-	return fmt.Sprintf("[%s]", i.Val)

+	return fmt.Sprintf("%v:[%s]", i.typ, i.Val)

 type itemType int

@@ -85,6 +85,15 @@

 	tError itemType = iota

 	tEOF

+	// page items

+	tHTMLLead          // <

+	tSummaryDivider    // <!--more-->

+	tSummaryDividerOrg // # more

+	tFrontMatterYAML

+	tFrontMatterTOML

+	tFrontMatterJSON

+	tFrontMatterORG

 	// shortcode items

 	tLeftDelimScNoMarkup

 	tRightDelimScNoMarkup

@@ -95,8 +104,7 @@

 	tScParam

 	tScParamVal

-	//itemIdentifier

-	tText // plain text, used for everything outside the shortcodes

+	tText // plain text

 	// preserved for later - keywords come after this

 	tKeywordMarker

--- a/parser/pageparser/pagelexer.go

+++ b/parser/pageparser/pagelexer.go

@@ -44,14 +44,16 @@

 type pageLexer struct {

-	name    string

-	input   string

-	state   stateFunc

-	pos     pos // input position

-	start   pos // item start position

-	width   pos // width of last element

-	lastPos pos // position of the last item returned by nextItem

+	input      string

+	stateStart stateFunc

+	state      stateFunc

+	pos        pos // input position

+	start      pos // item start position

+	width      pos // width of last element

+	lastPos    pos // position of the last item returned by nextItem

+	contentSections int

 	lexerShortcodeState

 	// items delivered to client

@@ -63,7 +65,7 @@

 func ParseFrom(s string, from int) *Tokens {

-	lexer := newPageLexer("default", s, pos(from))

+	lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors

 	lexer.run()

 	return &Tokens{lexer: lexer}

@@ -70,11 +72,11 @@

 // note: the input position here is normally 0 (start), but

 // can be set if position of first shortcode is known

-func newPageLexer(name, input string, inputPosition pos) *pageLexer {

+func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer {

 	lexer := &pageLexer{

-		name:  name,

-		input: input,

-		pos:   inputPosition,

+		input:      input,

+		pos:        inputPosition,

+		stateStart: stateStart,

 		lexerShortcodeState: lexerShortcodeState{

 			currLeftDelimItem:  tLeftDelimScNoMarkup,

 			currRightDelimItem: tRightDelimScNoMarkup,

@@ -88,14 +90,13 @@

 // main loop

 func (l *pageLexer) run() *pageLexer {

-	for l.state = lexTextOutsideShortcodes; l.state != nil; {

+	for l.state = l.stateStart; l.state != nil; {

 		l.state = l.state(l)

 	return l

-// state functions

+// Shortcode syntax

 const (

 	leftDelimScNoMarkup    = "{{<"

 	rightDelimScNoMarkup   = ">}}"

@@ -105,6 +106,12 @@

 	rightComment           = "*/"

+// Page syntax

+const (

+	summaryDivider    = "<!--more-->"

+	summaryDividerOrg = "# more"

+)

 func (l *pageLexer) next() rune {

 	if int(l.pos) >= len(l.input) {

 		l.width = 0

@@ -178,11 +185,21 @@

 	return item

-// scans until an opening shortcode opening bracket.

-// if no shortcodes, it will keep on scanning until EOF

-func lexTextOutsideShortcodes(l *pageLexer) stateFunc {

+func (l *pageLexer) consumeCRLF() bool {

+	var consumed bool

+	for _, r := range crLf {

+		if l.next() != r {

+			l.backup()

+		} else {

+			consumed = true

+		}

+	}

+	return consumed

+}

+func lexMainSection(l *pageLexer) stateFunc {

 	for {

-		if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {

+		if l.isShortCodeStart() {

 			if l.pos > l.start {

 				l.emit(tText)

@@ -194,12 +211,79 @@

 				l.currRightDelimItem = tRightDelimScNoMarkup

 			return lexShortcodeLeftDelim

+		}

+		if l.contentSections <= 1 {

+			if strings.HasPrefix(l.input[l.pos:], summaryDivider) {

+				if l.pos > l.start {

+					l.emit(tText)

+				}

+				l.contentSections++

+				l.pos += pos(len(summaryDivider))

+				l.emit(tSummaryDivider)

+			} else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) {

+				if l.pos > l.start {

+					l.emit(tText)

+				}

+				l.contentSections++

+				l.pos += pos(len(summaryDividerOrg))

+				l.emit(tSummaryDividerOrg)

+			}

-		if l.next() == eof {

+		r := l.next()

+		if r == eof {

 			break

+	return lexDone

+}

+func (l *pageLexer) isShortCodeStart() bool {

+	return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)

+}

+func lexIntroSection(l *pageLexer) stateFunc {

+LOOP:

+	for {

+		r := l.next()

+		if r == eof {

+			break

+		}

+		switch {

+		case r == '+':

+			return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++")

+		case r == '-':

+			return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---")

+		case r == '{':

+			return lexFrontMatterJSON

+		case r == '#':

+			return lexFrontMatterOrgMode

+		case !isSpace(r) && !isEndOfLine(r):

+			if r == '<' {

+				l.emit(tHTMLLead)

+				// Not need to look further. Hugo treats this as plain HTML,

+				// no front matter, no shortcodes, no nothing.

+				l.pos = pos(len(l.input))

+				l.emit(tText)

+				break LOOP

+			}

+			return l.errorf("failed to detect front matter type; got unknown identifier %q", r)

+		}

+	}

+	l.contentSections = 1

+	// Now move on to the shortcodes.

+	return lexMainSection

+}

+func lexDone(l *pageLexer) stateFunc {

 	// Done!

 	if l.pos > l.start {

 		l.emit(tText)

@@ -208,6 +292,122 @@

 	return nil

+func lexFrontMatterJSON(l *pageLexer) stateFunc {

+	// Include the left delimiter

+	l.backup()

+	var (

+		inQuote bool

+		level   int

+	)

+	for {

+		r := l.next()

+		switch {

+		case r == eof:

+			return l.errorf("unexpected EOF parsing JSON front matter")

+		case r == '{':

+			if !inQuote {

+				level++

+			}

+		case r == '}':

+			if !inQuote {

+				level--

+			}

+		case r == '"':

+			inQuote = !inQuote

+		case r == '\\':

+			// This may be an escaped quote. Make sure it's not marked as a

+			// real one.

+			l.next()

+		}

+		if level == 0 {

+			break

+		}

+	}

+	l.consumeCRLF()

+	l.emit(tFrontMatterJSON)

+	return lexMainSection

+}

+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {

+	/*

+		#+TITLE: Test File For chaseadamsio/goorgeous

+		#+AUTHOR: Chase Adams

+		#+DESCRIPTION: Just another golang parser for org content!

+	*/

+	const prefix = "#+"

+	l.backup()

+	if !strings.HasPrefix(l.input[l.pos:], prefix) {

+		// TODO(bep) consider error

+		return lexMainSection

+	}

+	// Read lines until we no longer see a #+ prefix

+LOOP:

+	for {

+		r := l.next()

+		switch {

+		case r == '\n':

+			if !strings.HasPrefix(l.input[l.pos:], prefix) {

+				break LOOP

+			}

+		case r == eof:

+			break LOOP

+		}

+	}

+	l.emit(tFrontMatterORG)

+	return lexMainSection

+}

+// Handle YAML or TOML front matter.

+func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc {

+	for i := 0; i < 2; i++ {

+		if r := l.next(); r != delimr {

+			return l.errorf("invalid %s delimiter", name)

+		}

+	}

+	if !l.consumeCRLF() {

+		return l.errorf("invalid %s delimiter", name)

+	}

+	// We don't care about the delimiters.

+	l.ignore()

+	for {

+		r := l.next()

+		if r == eof {

+			return l.errorf("EOF looking for end %s front matter delimiter", name)

+		}

+		if isEndOfLine(r) {

+			if strings.HasPrefix(l.input[l.pos:], delim) {

+				l.emit(tp)

+				l.pos += 3

+				l.consumeCRLF()

+				l.ignore()

+				break

+			}

+		}

+	}

+	return lexMainSection

+}

 func lexShortcodeLeftDelim(l *pageLexer) stateFunc {

 	l.pos += pos(len(l.currentLeftShortcodeDelim()))

 	if strings.HasPrefix(l.input[l.pos:], leftComment) {

@@ -234,7 +434,7 @@

 	l.ignore()

 	l.pos += pos(len(l.currentRightShortcodeDelim()))

 	l.emit(tText)

-	return lexTextOutsideShortcodes

+	return lexMainSection

 func lexShortcodeRightDelim(l *pageLexer) stateFunc {

@@ -241,7 +441,7 @@

 	l.closingState = 0

 	l.pos += pos(len(l.currentRightShortcodeDelim()))

 	l.emit(l.currentRightShortcodeDelimItem())

-	return lexTextOutsideShortcodes

+	return lexMainSection

 // either:

@@ -484,6 +684,8 @@

 	// let unquoted YouTube ids as positional params slip through (they contain hyphens)

 	return isAlphaNumeric(r) || r == '-'

+var crLf = []rune{'\r', '\n'}

 func isEndOfLine(r rune) bool {

 	return r == '\r' || r == '\n'

--- /dev/null

+++ b/parser/pageparser/pageparser_intro_test.go

@@ -1,0 +1,103 @@

+// Copyright 2018 The Hugo Authors. All rights reserved.

+//

+// Licensed under the Apache License, Version 2.0 (the "License");

+// you may not use this file except in compliance with the License.

+// You may obtain a copy of the License at

+// http://www.apache.org/licenses/LICENSE-2.0

+//

+// Unless required by applicable law or agreed to in writing, software

+// distributed under the License is distributed on an "AS IS" BASIS,

+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+// See the License for the specific language governing permissions and

+// limitations under the License.

+package pageparser

+import (

+	"fmt"

+	"strings"

+	"testing"

+)

+type lexerTest struct {

+	name  string

+	input string

+	items []Item

+}

+var (

+	tstJSON                = `{ "a": { "b": "\"Hugo\"}" } }`

+	tstHTMLLead            = Item{tHTMLLead, 0, "  <"}

+	tstFrontMatterTOML     = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"}

+	tstFrontMatterYAML     = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"}

+	tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"}

+	tstFrontMatterJSON     = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"}

+	tstSomeText            = Item{tText, 0, "\nSome text.\n"}

+	tstSummaryDivider      = Item{tSummaryDivider, 0, "<!--more-->"}

+	tstSummaryDividerOrg   = Item{tSummaryDividerOrg, 0, "# more"}

+	tstORG = `

+#+TITLE: T1

+#+AUTHOR: A1

+#+DESCRIPTION: D1

+`

+	tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG}

+)

+var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")

+// TODO(bep) a way to toggle ORG mode vs the rest.

+var frontMatterTests = []lexerTest{

+	{"empty", "", []Item{tstEOF}},

+	{"HTML Document", `  <html>  `, []Item{tstHTMLLead, Item{tText, 0, "html>  "}, tstEOF}},

+	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},

+	// Note that we keep all bytes as they are, but we need to handle CRLF

+	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},

+	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},

+	{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},

+	{"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},

+	{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}},

+	{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},

+}

+func TestFrontMatter(t *testing.T) {

+	t.Parallel()

+	for i, test := range frontMatterTests {

+		items := collect(test.name, test.input, false, lexIntroSection)

+		if !equal(items, test.items) {

+			got := crLfReplacer.Replace(fmt.Sprint(items))

+			expected := crLfReplacer.Replace(fmt.Sprint(test.items))

+			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)

+		}

+	}

+}

+func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) {

+	l := newPageLexer(input, 0, stateStart)

+	l.run()

+	for {

+		item := l.nextItem()

+		items = append(items, item)

+		if item.typ == tEOF || item.typ == tError {

+			break

+		}

+	}

+	return

+}

+// no positional checking, for now ...

+func equal(i1, i2 []Item) bool {

+	if len(i1) != len(i2) {

+		return false

+	}

+	for k := range i1 {

+		if i1[k].typ != i2[k].typ {

+			return false

+		}

+		if i1[k].Val != i2[k].Val {

+			return false

+		}

+	}

+	return true

+}

--- /dev/null

+++ b/parser/pageparser/pageparser_shortcode_test.go

@@ -1,0 +1,171 @@

+// Copyright 2018 The Hugo Authors. All rights reserved.

+//

+// Licensed under the Apache License, Version 2.0 (the "License");

+// you may not use this file except in compliance with the License.

+// You may obtain a copy of the License at

+// http://www.apache.org/licenses/LICENSE-2.0

+//

+// Unless required by applicable law or agreed to in writing, software

+// distributed under the License is distributed on an "AS IS" BASIS,

+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+// See the License for the specific language governing permissions and

+// limitations under the License.

+package pageparser

+import "testing"

+var (

+	tstEOF       = Item{tEOF, 0, ""}

+	tstLeftNoMD  = Item{tLeftDelimScNoMarkup, 0, "{{<"}

+	tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}

+	tstLeftMD    = Item{tLeftDelimScWithMarkup, 0, "{{%"}

+	tstRightMD   = Item{tRightDelimScWithMarkup, 0, "%}}"}

+	tstSCClose   = Item{tScClose, 0, "/"}

+	tstSC1       = Item{tScName, 0, "sc1"}

+	tstSC2       = Item{tScName, 0, "sc2"}

+	tstSC3       = Item{tScName, 0, "sc3"}

+	tstSCSlash   = Item{tScName, 0, "sc/sub"}

+	tstParam1    = Item{tScParam, 0, "param1"}

+	tstParam2    = Item{tScParam, 0, "param2"}

+	tstVal       = Item{tScParamVal, 0, "Hello World"}

+)

+var shortCodeLexerTests = []lexerTest{

+	{"empty", "", []Item{tstEOF}},

+	{"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},

+	{"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},

+	{"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

+	{"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

+	{"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},

+	{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},

+	{"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

+	{"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,

+		{tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},

+	{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{

+		tstLeftMD,

+		tstSC1,

+		tstRightMD,

+		{tText, 0, " inner "},

+		tstLeftMD,

+		tstSCClose,

+		tstSC1,

+		tstRightMD,

+		tstEOF,

+	}},

+	{"close, but no open", `{{< /sc1 >}}`, []Item{

+		tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},

+	{"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,

+		{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},

+	{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,

+		{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},

+	{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{

+		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,

+		{tError, 0, "unclosed shortcode"}}},

+	{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{

+		tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},

+	{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{

+		tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},

+	{"two params", `{{< sc1 param1   param2 >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},

+	// issue #934

+	{"self-closing", `{{< sc1 />}}`, []Item{

+		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},

+	// Issue 2498

+	{"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{

+		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD,

+		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},

+	{"self-closing with param", `{{< sc1 param1 />}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

+	{"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,

+		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

+	{"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,

+		tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

+	{"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstRightNoMD,

+		tstLeftNoMD, tstSC2, tstRightNoMD,

+		tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},

+	{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{

+		tstLeftNoMD, tstSC1, tstRightNoMD,

+		{tText, 0, "ab"},

+		tstLeftMD, tstSC2, tstParam1, tstRightMD,

+		{tText, 0, "cd"},

+		tstLeftNoMD, tstSC3, tstRightNoMD,

+		{tText, 0, "ef"},

+		tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,

+		{tText, 0, "gh"},

+		tstLeftMD, tstSCClose, tstSC2, tstRightMD,

+		{tText, 0, "ij"},

+		tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,

+		{tText, 0, "kl"}, tstEOF,

+	}},

+	{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{

+		tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},

+	{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},

+	{"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},

+	{"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},

+	{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1,

+		{tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},

+	{"escaped quotes inside nonescaped quotes",

+		`{{< sc1 param1="Hello \"escaped\" World"  >}}`, []Item{

+			tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},

+	{"escaped quotes inside nonescaped quotes in positional param",

+		`{{< sc1 "Hello \"escaped\" World"  >}}`, []Item{

+			tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},

+	{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},

+	{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstVal,

+		{tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},

+	{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1, tstVal,

+		{tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},

+	{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1,

+		{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},

+	{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{

+		tstLeftNoMD, tstSC1, tstParam1,

+		{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},

+	{"commented out", `{{</* sc1 */>}}`, []Item{

+		{tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},

+	{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{

+		{tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},

+	{"commented out, missing close", `{{</* sc1 >}}`, []Item{

+		{tError, 0, "comment must be closed"}}},

+	{"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{

+		{tError, 0, "comment must be closed"}}},

+}

+func TestShortcodeLexer(t *testing.T) {

+	t.Parallel()

+	for i, test := range shortCodeLexerTests {

+		items := collect(test.name, test.input, true, lexMainSection)

+		if !equal(items, test.items) {

+			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)

+		}

+	}

+}

+func BenchmarkShortcodeLexer(b *testing.B) {

+	b.ResetTimer()

+	for i := 0; i < b.N; i++ {

+		for _, test := range shortCodeLexerTests {

+			items := collect(test.name, test.input, true, lexMainSection)

+			if !equal(items, test.items) {

+				b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)

+			}

+		}

+	}

+}

--- a/parser/pageparser/pageparser_test.go

+++ /dev/null

@@ -1,207 +1,0 @@

-// Copyright 2018 The Hugo Authors. All rights reserved.

-//

-// Licensed under the Apache License, Version 2.0 (the "License");

-// you may not use this file except in compliance with the License.

-// You may obtain a copy of the License at

-// http://www.apache.org/licenses/LICENSE-2.0

-//

-// Unless required by applicable law or agreed to in writing, software

-// distributed under the License is distributed on an "AS IS" BASIS,

-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-// See the License for the specific language governing permissions and

-// limitations under the License.

-package pageparser

-import (

-	"testing"

-)

-type shortCodeLexerTest struct {

-	name  string

-	input string

-	items []Item

-}

-var (

-	tstEOF       = Item{tEOF, 0, ""}

-	tstLeftNoMD  = Item{tLeftDelimScNoMarkup, 0, "{{<"}

-	tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}

-	tstLeftMD    = Item{tLeftDelimScWithMarkup, 0, "{{%"}

-	tstRightMD   = Item{tRightDelimScWithMarkup, 0, "%}}"}

-	tstSCClose   = Item{tScClose, 0, "/"}

-	tstSC1       = Item{tScName, 0, "sc1"}

-	tstSC2       = Item{tScName, 0, "sc2"}

-	tstSC3       = Item{tScName, 0, "sc3"}

-	tstSCSlash   = Item{tScName, 0, "sc/sub"}

-	tstParam1    = Item{tScParam, 0, "param1"}

-	tstParam2    = Item{tScParam, 0, "param2"}

-	tstVal       = Item{tScParamVal, 0, "Hello World"}

-)

-var shortCodeLexerTests = []shortCodeLexerTest{

-	{"empty", "", []Item{tstEOF}},

-	{"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},

-	{"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},

-	{"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

-	{"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

-	{"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},

-	{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},

-	{"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

-	{"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,

-		{tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},

-	{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{

-		tstLeftMD,

-		tstSC1,

-		tstRightMD,

-		{tText, 0, " inner "},

-		tstLeftMD,

-		tstSCClose,

-		tstSC1,

-		tstRightMD,

-		tstEOF,

-	}},

-	{"close, but no open", `{{< /sc1 >}}`, []Item{

-		tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},

-	{"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,

-		{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},

-	{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,

-		{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},

-	{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{

-		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,

-		{tError, 0, "unclosed shortcode"}}},

-	{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{

-		tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},

-	{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{

-		tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},

-	{"two params", `{{< sc1 param1   param2 >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},

-	// issue #934

-	{"self-closing", `{{< sc1 />}}`, []Item{

-		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},

-	// Issue 2498

-	{"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{

-		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD,

-		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},

-	{"self-closing with param", `{{< sc1 param1 />}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

-	{"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,

-		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

-	{"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,

-		tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},

-	{"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstRightNoMD,

-		tstLeftNoMD, tstSC2, tstRightNoMD,

-		tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},

-	{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{

-		tstLeftNoMD, tstSC1, tstRightNoMD,

-		{tText, 0, "ab"},

-		tstLeftMD, tstSC2, tstParam1, tstRightMD,

-		{tText, 0, "cd"},

-		tstLeftNoMD, tstSC3, tstRightNoMD,

-		{tText, 0, "ef"},

-		tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,

-		{tText, 0, "gh"},

-		tstLeftMD, tstSCClose, tstSC2, tstRightMD,

-		{tText, 0, "ij"},

-		tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,

-		{tText, 0, "kl"}, tstEOF,

-	}},

-	{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{

-		tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},

-	{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},

-	{"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},

-	{"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},

-	{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1,

-		{tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},

-	{"escaped quotes inside nonescaped quotes",

-		`{{< sc1 param1="Hello \"escaped\" World"  >}}`, []Item{

-			tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},

-	{"escaped quotes inside nonescaped quotes in positional param",

-		`{{< sc1 "Hello \"escaped\" World"  >}}`, []Item{

-			tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},

-	{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},

-	{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstVal,

-		{tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},

-	{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1, tstVal,

-		{tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},

-	{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1,

-		{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},

-	{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{

-		tstLeftNoMD, tstSC1, tstParam1,

-		{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},

-	{"commented out", `{{</* sc1 */>}}`, []Item{

-		{tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},

-	{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{

-		{tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},

-	{"commented out, missing close", `{{</* sc1 >}}`, []Item{

-		{tError, 0, "comment must be closed"}}},

-	{"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{

-		{tError, 0, "comment must be closed"}}},

-}

-func TestShortcodeLexer(t *testing.T) {

-	t.Parallel()

-	for i, test := range shortCodeLexerTests {

-		items := collect(&test)

-		if !equal(items, test.items) {

-			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)

-		}

-	}

-}

-func BenchmarkShortcodeLexer(b *testing.B) {

-	b.ResetTimer()

-	for i := 0; i < b.N; i++ {

-		for _, test := range shortCodeLexerTests {

-			items := collect(&test)

-			if !equal(items, test.items) {

-				b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)

-			}

-		}

-	}

-}

-func collect(t *shortCodeLexerTest) (items []Item) {

-	l := newPageLexer(t.name, t.input, 0).run()

-	for {

-		item := l.nextItem()

-		items = append(items, item)

-		if item.typ == tEOF || item.typ == tError {

-			break

-		}

-	}

-	return

-}

-// no positional checking, for now ...

-func equal(i1, i2 []Item) bool {

-	if len(i1) != len(i2) {

-		return false

-	}

-	for k := range i1 {

-		if i1[k].typ != i2[k].typ {

-			return false

-		}

-		if i1[k].Val != i2[k].Val {

-			return false

-		}

-	}

-	return true

-}