shithub: hugo

Download patch

ref: 16e7c1120346bd853cf6510ffac8e94824bf2c7f
parent: 8f071fc159ce9a0fc0ea14a73bde8f299bedd109
author: Bjørn Erik Pedersen <[email protected]>
date: Sun Jan 5 06:52:00 EST 2020

markup/goldmark: Add an optional Blackfriday auto ID strategy

Fixes #6707

--- a/markup/blackfriday/convert.go
+++ b/markup/blackfriday/convert.go
@@ -15,6 +15,8 @@
 package blackfriday
 
 import (
+	"unicode"
+
 	"github.com/gohugoio/hugo/identity"
 	"github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config"
 	"github.com/gohugoio/hugo/markup/converter"
@@ -61,7 +63,27 @@
 }
 
 func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
-	return blackfriday.SanitizedAnchorName(s)
+	return SanitizedAnchorName(s)
+}
+
+// SanitizedAnchorName is how Blackfriday sanitizes anchor names.
+// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464
+func SanitizedAnchorName(text string) string {
+	var anchorName []rune
+	futureDash := false
+	for _, r := range text {
+		switch {
+		case unicode.IsLetter(r) || unicode.IsNumber(r):
+			if futureDash && len(anchorName) > 0 {
+				anchorName = append(anchorName, '-')
+			}
+			futureDash = false
+			anchorName = append(anchorName, unicode.ToLower(r))
+		default:
+			futureDash = true
+		}
+	}
+	return string(anchorName)
 }
 
 func (c *blackfridayConverter) AnchorSuffix() string {
--- a/markup/blackfriday/convert_test.go
+++ b/markup/blackfriday/convert_test.go
@@ -179,3 +179,45 @@
 	c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>")
 	c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>")
 }
+
+// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817
+func TestSanitizedAnchorName(t *testing.T) {
+	tests := []struct {
+		text string
+		want string
+	}{
+		{
+			text: "This is a header",
+			want: "this-is-a-header",
+		},
+		{
+			text: "This is also          a header",
+			want: "this-is-also-a-header",
+		},
+		{
+			text: "main.go",
+			want: "main-go",
+		},
+		{
+			text: "Article 123",
+			want: "article-123",
+		},
+		{
+			text: "<- Let's try this, shall we?",
+			want: "let-s-try-this-shall-we",
+		},
+		{
+			text: "        ",
+			want: "",
+		},
+		{
+			text: "Hello, 世界",
+			want: "hello-世界",
+		},
+	}
+	for _, test := range tests {
+		if got := SanitizedAnchorName(test.text); got != test.want {
+			t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
+		}
+	}
+}
--- a/markup/goldmark/autoid.go
+++ b/markup/goldmark/autoid.go
@@ -19,6 +19,8 @@
 	"unicode"
 	"unicode/utf8"
 
+	"github.com/gohugoio/hugo/markup/blackfriday"
+
 	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 
 	"github.com/gohugoio/hugo/common/text"
@@ -30,34 +32,41 @@
 	bp "github.com/gohugoio/hugo/bufferpool"
 )
 
-func sanitizeAnchorNameString(s string, asciiOnly bool) string {
-	return string(sanitizeAnchorName([]byte(s), asciiOnly))
+func sanitizeAnchorNameString(s string, idType string) string {
+	return string(sanitizeAnchorName([]byte(s), idType))
 }
 
-func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
-	return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
+func sanitizeAnchorName(b []byte, idType string) []byte {
+	return sanitizeAnchorNameWithHook(b, idType, nil)
 }
 
-func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
+func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
 	buf := bp.GetBuffer()
 
-	if asciiOnly {
-		// Normalize it to preserve accents if possible.
-		b = text.RemoveAccents(b)
-	}
+	if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
+		// TODO(bep) make it more efficient.
+		buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
+	} else {
+		asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
 
-	for len(b) > 0 {
-		r, size := utf8.DecodeRune(b)
-		switch {
-		case asciiOnly && size != 1:
-		case r == '-' || isSpace(r):
-			buf.WriteRune('-')
-		case isAlphaNumeric(r):
-			buf.WriteRune(unicode.ToLower(r))
-		default:
+		if asciiOnly {
+			// Normalize it to preserve accents if possible.
+			b = text.RemoveAccents(b)
 		}
 
-		b = b[size:]
+		for len(b) > 0 {
+			r, size := utf8.DecodeRune(b)
+			switch {
+			case asciiOnly && size != 1:
+			case r == '-' || isSpace(r):
+				buf.WriteRune('-')
+			case isAlphaNumeric(r):
+				buf.WriteRune(unicode.ToLower(r))
+			default:
+			}
+
+			b = b[size:]
+		}
 	}
 
 	if hook != nil {
@@ -83,19 +92,19 @@
 var _ parser.IDs = (*idFactory)(nil)
 
 type idFactory struct {
-	asciiOnly bool
-	vals      map[string]struct{}
+	idType string
+	vals   map[string]struct{}
 }
 
 func newIDFactory(idType string) *idFactory {
 	return &idFactory{
-		vals:      make(map[string]struct{}),
-		asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii,
+		vals:   make(map[string]struct{}),
+		idType: idType,
 	}
 }
 
 func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
-	return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
+	return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
 		if buf.Len() == 0 {
 			if kind == ast.KindHeading {
 				buf.WriteString("heading")
--- a/markup/goldmark/autoid_test.go
+++ b/markup/goldmark/autoid_test.go
@@ -17,6 +17,8 @@
 	"strings"
 	"testing"
 
+	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
+
 	qt "github.com/frankban/quicktest"
 )
 
@@ -69,9 +71,9 @@
 		expect := expectlines[i]
 		c.Run(input, func(c *qt.C) {
 			b := []byte(input)
-			got := string(sanitizeAnchorName(b, false))
+			got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
 			c.Assert(got, qt.Equals, expect)
-			c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
+			c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
 			c.Assert(string(b), qt.Equals, input)
 		})
 	}
@@ -80,16 +82,21 @@
 func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
 	c := qt.New(t)
 
-	c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
-	c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
+	c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
+	c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
 
 }
 
+func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
+	c := qt.New(t)
+	c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
+}
+
 func BenchmarkSanitizeAnchorName(b *testing.B) {
 	input := []byte("God is good: 神真美好")
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		result := sanitizeAnchorName(input, false)
+		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
 		if len(result) != 24 {
 			b.Fatalf("got %d", len(result))
 
@@ -101,7 +108,7 @@
 	input := []byte("God is good: 神真美好")
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		result := sanitizeAnchorName(input, true)
+		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
 		if len(result) != 12 {
 			b.Fatalf("got %d", len(result))
 
@@ -109,11 +116,23 @@
 	}
 }
 
+func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
+	input := []byte("God is good: 神真美好")
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
+		if len(result) != 24 {
+			b.Fatalf("got %d", len(result))
+
+		}
+	}
+}
+
 func BenchmarkSanitizeAnchorNameString(b *testing.B) {
 	input := "God is good: 神真美好"
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		result := sanitizeAnchorNameString(input, false)
+		result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
 		if len(result) != 24 {
 			b.Fatalf("got %d", len(result))
 		}
--- a/markup/goldmark/convert.go
+++ b/markup/goldmark/convert.go
@@ -29,7 +29,6 @@
 
 	"github.com/gohugoio/hugo/hugofs"
 	"github.com/gohugoio/hugo/markup/converter"
-	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 	"github.com/gohugoio/hugo/markup/highlight"
 	"github.com/gohugoio/hugo/markup/tableofcontents"
 	"github.com/yuin/goldmark"
@@ -57,7 +56,7 @@
 			cfg: cfg,
 			md:  md,
 			sanitizeAnchorName: func(s string) string {
-				return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub)
+				return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType)
 			},
 		}, nil
 	}), nil
--- a/markup/goldmark/convert_test.go
+++ b/markup/goldmark/convert_test.go
@@ -178,6 +178,21 @@
 	c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
 }
 
+func TestConvertAutoIDBlackfriday(t *testing.T) {
+	c := qt.New(t)
+
+	content := `
+## Let's try this, shall we?
+
+`
+	mconf := markup_config.Default
+	mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday
+	b := convert(c, mconf, content)
+	got := string(b.Bytes())
+
+	c.Assert(got, qt.Contains, "<h2 id=\"let-s-try-this-shall-we\">")
+}
+
 func TestCodeFence(t *testing.T) {
 	c := qt.New(t)
 
--- a/markup/goldmark/goldmark_config/config.go
+++ b/markup/goldmark/goldmark_config/config.go
@@ -17,6 +17,7 @@
 const (
 	AutoHeadingIDTypeGitHub      = "github"
 	AutoHeadingIDTypeGitHubAscii = "github-ascii"
+	AutoHeadingIDTypeBlackfriday = "blackfriday"
 )
 
 // DefaultConfig holds the default Goldmark configuration.