shithub: hugo

ref: b05a292c218aeb1e21fc5fee2e94437b09cb977b
dir: /parser/page.go/

View raw version
package parser

import (
	"bufio"
	"bytes"
	"fmt"
	"io"
	"regexp"
	"unicode"
)

const (
	HTML_LEAD       = "<"
	YAML_LEAD       = "-"
	YAML_DELIM_UNIX = "---\n"
	YAML_DELIM_DOS  = "---\r\n"
	YAML_DELIM      = "---"
	TOML_LEAD       = "+"
	TOML_DELIM_UNIX = "+++\n"
	TOML_DELIM_DOS  = "+++\r\n"
	TOML_DELIM      = "+++"
	JSON_LEAD       = "{"
)

var (
	delims = regexp.MustCompile(
		"^(" + regexp.QuoteMeta(YAML_DELIM) + `\s*\n|` + regexp.QuoteMeta(TOML_DELIM) + `\s*\n|` + regexp.QuoteMeta(JSON_LEAD) + ")",
	)

	UnixEnding = []byte("\n")
	DosEnding  = []byte("\r\n")
)

type FrontMatter []byte
type Content []byte

type Page interface {
	FrontMatter() FrontMatter
	Content() Content
	IsRenderable() bool
	Metadata() (interface{}, error)
}

type page struct {
	render      bool
	frontmatter FrontMatter
	content     Content
}

func (p *page) Content() Content {
	return p.content
}

func (p *page) FrontMatter() FrontMatter {
	return p.frontmatter
}

func (p *page) IsRenderable() bool {
	return p.render
}

func (p *page) Metadata() (meta interface{}, err error) {
	frontmatter := p.FrontMatter()

	if len(frontmatter) != 0 {
		fm := DetectFrontMatter(rune(frontmatter[0]))
		meta, err = fm.Parse(frontmatter)
		if err != nil {
			return
		}
	}
	return
}

// ReadFrom reads the content from an io.Reader and constructs a page.
func ReadFrom(r io.Reader) (p Page, err error) {
	reader := bufio.NewReader(r)

	if err = chompWhitespace(reader); err != nil && err != io.EOF {
		return
	}

	firstLine, err := peekLine(reader)
	if err != nil && err != io.EOF {
		return
	}

	newp := new(page)
	newp.render = shouldRender(firstLine)

	if newp.render && isFrontMatterDelim(firstLine) {
		left, right := determineDelims(firstLine)
		fm, err := extractFrontMatterDelims(reader, left, right)
		if err != nil {
			return nil, err
		}
		newp.frontmatter = fm
	}

	content, err := extractContent(reader)
	if err != nil {
		return nil, err
	}

	newp.content = content

	return newp, nil
}

func chompWhitespace(r io.RuneScanner) (err error) {
	for {
		c, _, err := r.ReadRune()
		if err != nil {
			return err
		}
		if !unicode.IsSpace(c) {
			r.UnreadRune()
			return nil
		}
	}
}

func peekLine(r *bufio.Reader) (line []byte, err error) {
	firstFive, err := r.Peek(5)
	if err != nil {
		return
	}
	idx := bytes.IndexByte(firstFive, '\n')
	if idx == -1 {
		return firstFive, nil
	}
	idx++ // include newline.
	return firstFive[:idx], nil
}

func shouldRender(lead []byte) (frontmatter bool) {
	if len(lead) <= 0 {
		return
	}

	if bytes.Equal(lead[:1], []byte(HTML_LEAD)) {
		return
	}
	return true
}

func isFrontMatterDelim(data []byte) bool {
	return delims.Match(data)
}

func determineDelims(firstLine []byte) (left, right []byte) {
	switch len(firstLine) {
	case 5:
		fallthrough
	case 4:
		if firstLine[0] == YAML_LEAD[0] {
			return []byte(YAML_DELIM), []byte(YAML_DELIM)
		}
		return []byte(TOML_DELIM), []byte(TOML_DELIM)
	case 3:
		fallthrough
	case 2:
		fallthrough
	case 1:
		return []byte(JSON_LEAD), []byte("}")
	default:
		panic(fmt.Sprintf("Unable to determine delims from %q", firstLine))
	}
}

// extractFrontMatterDelims takes a frontmatter from the content bufio.Reader.
// Begining white spaces of the bufio.Reader must be trimmed before call this
// function.
func extractFrontMatterDelims(r *bufio.Reader, left, right []byte) (fm FrontMatter, err error) {
	var (
		c         byte
		buf       bytes.Buffer
		level     int
		sameDelim = bytes.Equal(left, right)
	)

	// Frontmatter must start with a delimiter. To check it first,
	// pre-reads beginning delimiter length - 1 bytes from Reader
	for i := 0; i < len(left)-1; i++ {
		if c, err = r.ReadByte(); err != nil {
			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
		}
		if err = buf.WriteByte(c); err != nil {
			return nil, err
		}
	}

	// Reads a character from Reader one by one and checks it matches the
	// last character of one of delemiters to find the last character of
	// frontmatter. If it matches, makes sure it contains the delimiter
	// and if so, also checks it is followed by CR+LF or LF when YAML,
	// TOML case. In JSON case, nested delimiters must be parsed and it
	// is expected that the delimiter only contains one character.
	for {
		if c, err = r.ReadByte(); err != nil {
			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
		}
		if err = buf.WriteByte(c); err != nil {
			return nil, err
		}

		switch c {
		case left[len(left)-1]:
			if sameDelim { // YAML, TOML case
				if bytes.HasSuffix(buf.Bytes(), left) && (buf.Len() == len(left) || buf.Bytes()[buf.Len()-len(left)-1] == '\n') {
				nextByte:
					c, err = r.ReadByte()
					if err != nil {
						// It is ok that the end delimiter ends with EOF
						if err != io.EOF || level != 1 {
							return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
						}
					} else {
						switch c {
						case '\n':
							// ok
						case ' ':
							// Consume this byte and try to match again
							goto nextByte
						case '\r':
							if err = buf.WriteByte(c); err != nil {
								return nil, err
							}
							if c, err = r.ReadByte(); err != nil {
								return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
							}
							if c != '\n' {
								return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
							}
						default:
							return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
						}
						if err = buf.WriteByte(c); err != nil {
							return nil, err
						}
					}
					if level == 0 {
						level = 1
					} else {
						level = 0
					}
				}
			} else { // JSON case
				level++
			}
		case right[len(right)-1]: // JSON case only reaches here
			level--
		}

		if level == 0 {
			// Consumes white spaces immediately behind frontmatter
			if err = chompWhitespace(r); err != nil {
				if err != io.EOF {
					return nil, err
				}
			}
			return buf.Bytes(), nil
		}
	}
}

func extractContent(r io.Reader) (content Content, err error) {
	wr := new(bytes.Buffer)
	if _, err = wr.ReadFrom(r); err != nil {
		return
	}
	return wr.Bytes(), nil
}