ref: 2fdc4a24d5450a98cf38a4456e8e0e8e97a3343d
parent: f6863e1ef725f654a4c869ef4955f9add6908a46
author: Bjørn Erik Pedersen <[email protected]>
date: Wed Oct 17 09:48:55 EDT 2018
parser/pageparser: Add front matter etc. support See #5324
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -73,10 +73,10 @@
return i.Val
case i.typ > tKeywordMarker:
return fmt.Sprintf("<%s>", i.Val)
- case len(i.Val) > 20:
- return fmt.Sprintf("%.20q...", i.Val)
+ case len(i.Val) > 50:
+ return fmt.Sprintf("%v:%.20q...", i.typ, i.Val)
}
- return fmt.Sprintf("[%s]", i.Val)
+ return fmt.Sprintf("%v:[%s]", i.typ, i.Val)
}
type itemType int
@@ -85,6 +85,15 @@
tError itemType = iota
tEOF
+ // page items
+ tHTMLLead // <
+ tSummaryDivider // <!--more-->
+ tSummaryDividerOrg // # more
+ tFrontMatterYAML
+ tFrontMatterTOML
+ tFrontMatterJSON
+ tFrontMatterORG
+
// shortcode items
tLeftDelimScNoMarkup
tRightDelimScNoMarkup
@@ -95,8 +104,7 @@
tScParam
tScParamVal
- //itemIdentifier
- tText // plain text, used for everything outside the shortcodes
+ tText // plain text
// preserved for later - keywords come after this
tKeywordMarker
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -44,14 +44,16 @@
}
type pageLexer struct {
- name string
- input string
- state stateFunc
- pos pos // input position
- start pos // item start position
- width pos // width of last element
- lastPos pos // position of the last item returned by nextItem
+ input string
+ stateStart stateFunc
+ state stateFunc
+ pos pos // input position
+ start pos // item start position
+ width pos // width of last element
+ lastPos pos // position of the last item returned by nextItem
+ contentSections int
+
lexerShortcodeState
// items delivered to client
@@ -63,7 +65,7 @@
}
func ParseFrom(s string, from int) *Tokens {
- lexer := newPageLexer("default", s, pos(from))
+ lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors
lexer.run()
return &Tokens{lexer: lexer}
}
@@ -70,11 +72,11 @@
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
-func newPageLexer(name, input string, inputPosition pos) *pageLexer {
+func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer {
lexer := &pageLexer{
- name: name,
- input: input,
- pos: inputPosition,
+ input: input,
+ pos: inputPosition,
+ stateStart: stateStart,
lexerShortcodeState: lexerShortcodeState{
currLeftDelimItem: tLeftDelimScNoMarkup,
currRightDelimItem: tRightDelimScNoMarkup,
@@ -88,14 +90,13 @@
// main loop
func (l *pageLexer) run() *pageLexer {
- for l.state = lexTextOutsideShortcodes; l.state != nil; {
+ for l.state = l.stateStart; l.state != nil; {
l.state = l.state(l)
}
return l
}
-// state functions
-
+// Shortcode syntax
const (
leftDelimScNoMarkup = "{{<"
rightDelimScNoMarkup = ">}}"
@@ -105,6 +106,12 @@
rightComment = "*/"
)
+// Page syntax
+const (
+ summaryDivider = "<!--more-->"
+ summaryDividerOrg = "# more"
+)
+
func (l *pageLexer) next() rune {
if int(l.pos) >= len(l.input) {
l.width = 0
@@ -178,11 +185,21 @@
return item
}
-// scans until an opening shortcode opening bracket.
-// if no shortcodes, it will keep on scanning until EOF
-func lexTextOutsideShortcodes(l *pageLexer) stateFunc {
+func (l *pageLexer) consumeCRLF() bool {
+ var consumed bool
+ for _, r := range crLf {
+ if l.next() != r {
+ l.backup()
+ } else {
+ consumed = true
+ }
+ }
+ return consumed
+}
+
+func lexMainSection(l *pageLexer) stateFunc {
for {
- if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {
+ if l.isShortCodeStart() {
if l.pos > l.start {
l.emit(tText)
}
@@ -194,12 +211,79 @@
l.currRightDelimItem = tRightDelimScNoMarkup
}
return lexShortcodeLeftDelim
+ }
+ if l.contentSections <= 1 {
+ if strings.HasPrefix(l.input[l.pos:], summaryDivider) {
+ if l.pos > l.start {
+ l.emit(tText)
+ }
+ l.contentSections++
+ l.pos += pos(len(summaryDivider))
+ l.emit(tSummaryDivider)
+ } else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
+ if l.pos > l.start {
+ l.emit(tText)
+ }
+ l.contentSections++
+ l.pos += pos(len(summaryDividerOrg))
+ l.emit(tSummaryDividerOrg)
+ }
}
- if l.next() == eof {
+
+ r := l.next()
+ if r == eof {
break
}
+
}
+
+ return lexDone
+
+}
+
+func (l *pageLexer) isShortCodeStart() bool {
+ return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
+}
+
+func lexIntroSection(l *pageLexer) stateFunc {
+LOOP:
+ for {
+ r := l.next()
+ if r == eof {
+ break
+ }
+
+ switch {
+ case r == '+':
+ return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++")
+ case r == '-':
+ return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---")
+ case r == '{':
+ return lexFrontMatterJSON
+ case r == '#':
+ return lexFrontMatterOrgMode
+ case !isSpace(r) && !isEndOfLine(r):
+ if r == '<' {
+ l.emit(tHTMLLead)
+ // Not need to look further. Hugo treats this as plain HTML,
+ // no front matter, no shortcodes, no nothing.
+ l.pos = pos(len(l.input))
+ l.emit(tText)
+ break LOOP
+ }
+ return l.errorf("failed to detect front matter type; got unknown identifier %q", r)
+ }
+ }
+
+ l.contentSections = 1
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexDone(l *pageLexer) stateFunc {
+
// Done!
if l.pos > l.start {
l.emit(tText)
@@ -208,6 +292,122 @@
return nil
}
+func lexFrontMatterJSON(l *pageLexer) stateFunc {
+ // Include the left delimiter
+ l.backup()
+
+ var (
+ inQuote bool
+ level int
+ )
+
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == eof:
+ return l.errorf("unexpected EOF parsing JSON front matter")
+ case r == '{':
+ if !inQuote {
+ level++
+ }
+ case r == '}':
+ if !inQuote {
+ level--
+ }
+ case r == '"':
+ inQuote = !inQuote
+ case r == '\\':
+ // This may be an escaped quote. Make sure it's not marked as a
+ // real one.
+ l.next()
+ }
+
+ if level == 0 {
+ break
+ }
+ }
+
+ l.consumeCRLF()
+ l.emit(tFrontMatterJSON)
+
+ return lexMainSection
+}
+
+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
+ /*
+ #+TITLE: Test File For chaseadamsio/goorgeous
+ #+AUTHOR: Chase Adams
+ #+DESCRIPTION: Just another golang parser for org content!
+ */
+
+ const prefix = "#+"
+
+ l.backup()
+
+ if !strings.HasPrefix(l.input[l.pos:], prefix) {
+ // TODO(bep) consider error
+ return lexMainSection
+ }
+
+ // Read lines until we no longer see a #+ prefix
+LOOP:
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == '\n':
+ if !strings.HasPrefix(l.input[l.pos:], prefix) {
+ break LOOP
+ }
+ case r == eof:
+ break LOOP
+
+ }
+ }
+
+ l.emit(tFrontMatterORG)
+
+ return lexMainSection
+
+}
+
+// Handle YAML or TOML front matter.
+func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc {
+ for i := 0; i < 2; i++ {
+ if r := l.next(); r != delimr {
+ return l.errorf("invalid %s delimiter", name)
+ }
+ }
+
+ if !l.consumeCRLF() {
+ return l.errorf("invalid %s delimiter", name)
+ }
+
+ // We don't care about the delimiters.
+ l.ignore()
+
+ for {
+ r := l.next()
+ if r == eof {
+ return l.errorf("EOF looking for end %s front matter delimiter", name)
+ }
+ if isEndOfLine(r) {
+ if strings.HasPrefix(l.input[l.pos:], delim) {
+ l.emit(tp)
+ l.pos += 3
+ l.consumeCRLF()
+ l.ignore()
+ break
+ }
+ }
+ }
+
+ return lexMainSection
+}
+
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
l.pos += pos(len(l.currentLeftShortcodeDelim()))
if strings.HasPrefix(l.input[l.pos:], leftComment) {
@@ -234,7 +434,7 @@
l.ignore()
l.pos += pos(len(l.currentRightShortcodeDelim()))
l.emit(tText)
- return lexTextOutsideShortcodes
+ return lexMainSection
}
func lexShortcodeRightDelim(l *pageLexer) stateFunc {
@@ -241,7 +441,7 @@
l.closingState = 0
l.pos += pos(len(l.currentRightShortcodeDelim()))
l.emit(l.currentRightShortcodeDelimItem())
- return lexTextOutsideShortcodes
+ return lexMainSection
}
// either:
@@ -484,6 +684,8 @@
// let unquoted YouTube ids as positional params slip through (they contain hyphens)
return isAlphaNumeric(r) || r == '-'
}
+
+var crLf = []rune{'\r', '\n'}
func isEndOfLine(r rune) bool {
return r == '\r' || r == '\n'
--- /dev/null
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -1,0 +1,103 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pageparser
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+)
+
+type lexerTest struct {
+ name string
+ input string
+ items []Item
+}
+
+var (
+ tstJSON = `{ "a": { "b": "\"Hugo\"}" } }`
+ tstHTMLLead = Item{tHTMLLead, 0, " <"}
+ tstFrontMatterTOML = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"}
+ tstFrontMatterYAML = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"}
+ tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"}
+ tstFrontMatterJSON = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"}
+ tstSomeText = Item{tText, 0, "\nSome text.\n"}
+ tstSummaryDivider = Item{tSummaryDivider, 0, "<!--more-->"}
+ tstSummaryDividerOrg = Item{tSummaryDividerOrg, 0, "# more"}
+
+ tstORG = `
+#+TITLE: T1
+#+AUTHOR: A1
+#+DESCRIPTION: D1
+`
+ tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG}
+)
+
+var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
+
+// TODO(bep) a way to toggle ORG mode vs the rest.
+var frontMatterTests = []lexerTest{
+ {"empty", "", []Item{tstEOF}},
+ {"HTML Document", ` <html> `, []Item{tstHTMLLead, Item{tText, 0, "html> "}, tstEOF}},
+ {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
+ // Note that we keep all bytes as they are, but we need to handle CRLF
+ {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
+ {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
+ {"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
+ {"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
+ {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}},
+ {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},
+}
+
+func TestFrontMatter(t *testing.T) {
+ t.Parallel()
+ for i, test := range frontMatterTests {
+ items := collect(test.name, test.input, false, lexIntroSection)
+ if !equal(items, test.items) {
+ got := crLfReplacer.Replace(fmt.Sprint(items))
+ expected := crLfReplacer.Replace(fmt.Sprint(test.items))
+ t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
+ }
+ }
+}
+
+func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
+ l := newPageLexer(input, 0, stateStart)
+ l.run()
+
+ for {
+ item := l.nextItem()
+ items = append(items, item)
+ if item.typ == tEOF || item.typ == tError {
+ break
+ }
+ }
+ return
+}
+
+// no positional checking, for now ...
+func equal(i1, i2 []Item) bool {
+ if len(i1) != len(i2) {
+ return false
+ }
+ for k := range i1 {
+ if i1[k].typ != i2[k].typ {
+ return false
+ }
+ if i1[k].Val != i2[k].Val {
+ return false
+ }
+ }
+ return true
+}
--- /dev/null
+++ b/parser/pageparser/pageparser_shortcode_test.go
@@ -1,0 +1,171 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pageparser
+
+import "testing"
+
+var (
+ tstEOF = Item{tEOF, 0, ""}
+ tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"}
+ tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}
+ tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"}
+ tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"}
+ tstSCClose = Item{tScClose, 0, "/"}
+ tstSC1 = Item{tScName, 0, "sc1"}
+ tstSC2 = Item{tScName, 0, "sc2"}
+ tstSC3 = Item{tScName, 0, "sc3"}
+ tstSCSlash = Item{tScName, 0, "sc/sub"}
+ tstParam1 = Item{tScParam, 0, "param1"}
+ tstParam2 = Item{tScParam, 0, "param2"}
+ tstVal = Item{tScParamVal, 0, "Hello World"}
+)
+
+var shortCodeLexerTests = []lexerTest{
+ {"empty", "", []Item{tstEOF}},
+ {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
+ {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
+ {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+ {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+
+ {"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},
+
+ {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+ {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+ {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,
+ {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
+ {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
+ tstLeftMD,
+ tstSC1,
+ tstRightMD,
+ {tText, 0, " inner "},
+ tstLeftMD,
+ tstSCClose,
+ tstSC1,
+ tstRightMD,
+ tstEOF,
+ }},
+ {"close, but no open", `{{< /sc1 >}}`, []Item{
+ tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
+ {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
+ {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+ {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
+ {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+ {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
+ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
+ {tError, 0, "unclosed shortcode"}}},
+ {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
+ tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
+ {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
+ tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
+
+ {"two params", `{{< sc1 param1 param2 >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
+ // issue #934
+ {"self-closing", `{{< sc1 />}}`, []Item{
+ tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},
+ // Issue 2498
+ {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{
+ tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD,
+ tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},
+ {"self-closing with param", `{{< sc1 param1 />}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
+ {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
+ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
+ {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
+ tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
+ {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstRightNoMD,
+ tstLeftNoMD, tstSC2, tstRightNoMD,
+ tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
+ {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
+ tstLeftNoMD, tstSC1, tstRightNoMD,
+ {tText, 0, "ab"},
+ tstLeftMD, tstSC2, tstParam1, tstRightMD,
+ {tText, 0, "cd"},
+ tstLeftNoMD, tstSC3, tstRightNoMD,
+ {tText, 0, "ef"},
+ tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
+ {tText, 0, "gh"},
+ tstLeftMD, tstSCClose, tstSC2, tstRightMD,
+ {tText, 0, "ij"},
+ tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
+ {tText, 0, "kl"}, tstEOF,
+ }},
+
+ {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
+ tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
+ {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
+ {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
+ {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
+ {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1,
+ {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
+ {"escaped quotes inside nonescaped quotes",
+ `{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+ {"escaped quotes inside nonescaped quotes in positional param",
+ `{{< sc1 "Hello \"escaped\" World" >}}`, []Item{
+ tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+ {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
+ {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstVal,
+ {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
+ {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1, tstVal,
+ {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
+ {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1,
+ {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+ {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
+ tstLeftNoMD, tstSC1, tstParam1,
+ {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+ {"commented out", `{{</* sc1 */>}}`, []Item{
+ {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},
+ {"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
+ {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},
+ {"commented out, missing close", `{{</* sc1 >}}`, []Item{
+ {tError, 0, "comment must be closed"}}},
+ {"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
+ {tError, 0, "comment must be closed"}}},
+}
+
+func TestShortcodeLexer(t *testing.T) {
+ t.Parallel()
+ for i, test := range shortCodeLexerTests {
+ items := collect(test.name, test.input, true, lexMainSection)
+ if !equal(items, test.items) {
+ t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
+ }
+ }
+}
+
+func BenchmarkShortcodeLexer(b *testing.B) {
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ for _, test := range shortCodeLexerTests {
+ items := collect(test.name, test.input, true, lexMainSection)
+ if !equal(items, test.items) {
+ b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
+ }
+ }
+ }
+}
--- a/parser/pageparser/pageparser_test.go
+++ /dev/null
@@ -1,207 +1,0 @@
-// Copyright 2018 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pageparser
-
-import (
- "testing"
-)
-
-type shortCodeLexerTest struct {
- name string
- input string
- items []Item
-}
-
-var (
- tstEOF = Item{tEOF, 0, ""}
- tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"}
- tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}
- tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"}
- tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"}
- tstSCClose = Item{tScClose, 0, "/"}
- tstSC1 = Item{tScName, 0, "sc1"}
- tstSC2 = Item{tScName, 0, "sc2"}
- tstSC3 = Item{tScName, 0, "sc3"}
- tstSCSlash = Item{tScName, 0, "sc/sub"}
- tstParam1 = Item{tScParam, 0, "param1"}
- tstParam2 = Item{tScParam, 0, "param2"}
- tstVal = Item{tScParamVal, 0, "Hello World"}
-)
-
-var shortCodeLexerTests = []shortCodeLexerTest{
- {"empty", "", []Item{tstEOF}},
- {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
- {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
- {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
- {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
-
- {"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},
-
- {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
- {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
- {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,
- {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
- {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
- tstLeftMD,
- tstSC1,
- tstRightMD,
- {tText, 0, " inner "},
- tstLeftMD,
- tstSCClose,
- tstSC1,
- tstRightMD,
- tstEOF,
- }},
- {"close, but no open", `{{< /sc1 >}}`, []Item{
- tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
- {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
- tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
- {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
- {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
- tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
- {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
- {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
- tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
- {tError, 0, "unclosed shortcode"}}},
- {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
- tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
- {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
- tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
-
- {"two params", `{{< sc1 param1 param2 >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
- // issue #934
- {"self-closing", `{{< sc1 />}}`, []Item{
- tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},
- // Issue 2498
- {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{
- tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD,
- tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}},
- {"self-closing with param", `{{< sc1 param1 />}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
- {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
- tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
- {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
- tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}},
- {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{
- tstLeftNoMD, tstSC1, tstRightNoMD,
- tstLeftNoMD, tstSC2, tstRightNoMD,
- tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
- {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
- tstLeftNoMD, tstSC1, tstRightNoMD,
- {tText, 0, "ab"},
- tstLeftMD, tstSC2, tstParam1, tstRightMD,
- {tText, 0, "cd"},
- tstLeftNoMD, tstSC3, tstRightNoMD,
- {tText, 0, "ef"},
- tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
- {tText, 0, "gh"},
- tstLeftMD, tstSCClose, tstSC2, tstRightMD,
- {tText, 0, "ij"},
- tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
- {tText, 0, "kl"}, tstEOF,
- }},
-
- {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
- tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
- {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
- {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
- {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
- {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1,
- {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
- {"escaped quotes inside nonescaped quotes",
- `{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
- {"escaped quotes inside nonescaped quotes in positional param",
- `{{< sc1 "Hello \"escaped\" World" >}}`, []Item{
- tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
- {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
- {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstVal,
- {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
- {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1, tstVal,
- {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
- {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1,
- {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
- {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
- tstLeftNoMD, tstSC1, tstParam1,
- {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
- {"commented out", `{{</* sc1 */>}}`, []Item{
- {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},
- {"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
- {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},
- {"commented out, missing close", `{{</* sc1 >}}`, []Item{
- {tError, 0, "comment must be closed"}}},
- {"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
- {tError, 0, "comment must be closed"}}},
-}
-
-func TestShortcodeLexer(t *testing.T) {
- t.Parallel()
- for i, test := range shortCodeLexerTests {
- items := collect(&test)
- if !equal(items, test.items) {
- t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
- }
- }
-}
-
-func BenchmarkShortcodeLexer(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- for _, test := range shortCodeLexerTests {
- items := collect(&test)
- if !equal(items, test.items) {
- b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
- }
- }
- }
-}
-
-func collect(t *shortCodeLexerTest) (items []Item) {
- l := newPageLexer(t.name, t.input, 0).run()
- for {
- item := l.nextItem()
- items = append(items, item)
- if item.typ == tEOF || item.typ == tError {
- break
- }
- }
- return
-}
-
-// no positional checking, for now ...
-func equal(i1, i2 []Item) bool {
- if len(i1) != len(i2) {
- return false
- }
- for k := range i1 {
- if i1[k].typ != i2[k].typ {
- return false
- }
- if i1[k].Val != i2[k].Val {
- return false
- }
- }
- return true
-}