ref: 5d50c46482d231efa26c95e4705e720fb9bf753c
parent: d48b986c45f8d5a5fef7c98da17af8ca82867b25
author: Mattias Wadman <[email protected]>
date: Tue Apr 12 20:14:00 EDT 2016
Chomp Unicode BOM if present Useful if using or sharing files with users that use editors that append a unicode byte order marker header (like Windows notepad). This will still assume files are UTF-8 encoded. Closes #2075
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -1052,6 +1052,19 @@
}
}
+func TestChompBOM(t *testing.T) {
+ p, _ := NewPage("simple.md")
+ const utf8BOM = "\xef\xbb\xbf"
+ _, err := p.ReadFrom(strings.NewReader(utf8BOM + simplePage))
+ p.Convert()
+
+ if err != nil {
+ t.Fatalf("Unable to create a page with BOM prefixed frontmatter and body content: %s", err)
+ }
+
+ checkPageTitle(t, p, "Simple")
+}
+
func listEqual(left, right []string) bool {
if len(left) != len(right) {
return false
--- a/parser/page.go
+++ b/parser/page.go
@@ -50,6 +50,8 @@
HTMLCommentStart = "<!--"
// HTMLCommentEnd identifies the end of HTML comment.
HTMLCommentEnd = "-->"
+ // BOM Unicode byte order marker
+ BOM = '\ufeff'
)
var (
@@ -101,6 +103,10 @@
func ReadFrom(r io.Reader) (p Page, err error) {
reader := bufio.NewReader(r)
+ // chomp BOM and assume UTF-8
+ if err = chompBOM(reader); err != nil && err != io.EOF {
+ return
+ }
if err = chompWhitespace(reader); err != nil && err != io.EOF {
return
}
@@ -133,6 +139,19 @@
newp.content = content
return newp, nil
+}
+
+func chompBOM(r io.RuneScanner) (err error) {
+ for {
+ c, _, err := r.ReadRune()
+ if err != nil {
+ return err
+ }
+ if c != BOM {
+ r.UnreadRune()
+ return nil
+ }
+ }
}
func chompWhitespace(r io.RuneScanner) (err error) {