shithub: hugo

Download patch

ref: 5d50c46482d231efa26c95e4705e720fb9bf753c
parent: d48b986c45f8d5a5fef7c98da17af8ca82867b25
author: Mattias Wadman <[email protected]>
date: Tue Apr 12 20:14:00 EDT 2016

Chomp Unicode BOM if present

Useful if using or sharing files with users that use editors that
append a unicode byte order marker header (like Windows notepad).

This will still assume files are UTF-8 encoded.

Closes #2075

--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -1052,6 +1052,19 @@
 	}
 }
 
+func TestChompBOM(t *testing.T) {
+	p, _ := NewPage("simple.md")
+	const utf8BOM = "\xef\xbb\xbf"
+	_, err := p.ReadFrom(strings.NewReader(utf8BOM + simplePage))
+	p.Convert()
+
+	if err != nil {
+		t.Fatalf("Unable to create a page with BOM prefixed frontmatter and body content: %s", err)
+	}
+
+	checkPageTitle(t, p, "Simple")
+}
+
 func listEqual(left, right []string) bool {
 	if len(left) != len(right) {
 		return false
--- a/parser/page.go
+++ b/parser/page.go
@@ -50,6 +50,8 @@
 	HTMLCommentStart = "<!--"
 	// HTMLCommentEnd identifies the end of HTML comment.
 	HTMLCommentEnd = "-->"
+	// BOM Unicode byte order marker
+	BOM = '\ufeff'
 )
 
 var (
@@ -101,6 +103,10 @@
 func ReadFrom(r io.Reader) (p Page, err error) {
 	reader := bufio.NewReader(r)
 
+	// chomp BOM and assume UTF-8
+	if err = chompBOM(reader); err != nil && err != io.EOF {
+		return
+	}
 	if err = chompWhitespace(reader); err != nil && err != io.EOF {
 		return
 	}
@@ -133,6 +139,19 @@
 	newp.content = content
 
 	return newp, nil
+}
+
+func chompBOM(r io.RuneScanner) (err error) {
+	for {
+		c, _, err := r.ReadRune()
+		if err != nil {
+			return err
+		}
+		if c != BOM {
+			r.UnreadRune()
+			return nil
+		}
+	}
 }
 
 func chompWhitespace(r io.RuneScanner) (err error) {