shithub: hugo

Download patch

ref: 3ea4df35f2435f1cb371fa54f6fd89fd6d7d980f
parent: be0903c71a1851a4d13df9de37a2037e7bc431ec
author: bep <[email protected]>
date: Sat May 30 10:46:58 EDT 2015

Remove accents in URLs

So the taxonomy `Gérard Depardieu` gives paths on the form `gerard-depardieu`.

Unfortunately this introduces two imports from the `golang.org/`, but Unicode-normalization isn't something we'd want to write from scratch.

See https://blog.golang.org/normalization

See #1180

--- a/helpers/path.go
+++ b/helpers/path.go
@@ -19,6 +19,8 @@
 	"github.com/spf13/afero"
 	jww "github.com/spf13/jwalterweatherman"
 	"github.com/spf13/viper"
+	"golang.org/x/text/transform"
+	"golang.org/x/text/unicode/norm"
 	"io"
 	"os"
 	"path/filepath"
@@ -97,7 +99,16 @@
 		}
 	}
 
+	// remove accents - see https://blog.golang.org/normalization
+	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
+	result, _, _ := transform.String(t, string(target))
+	return result
+
 	return string(target)
+}
+
+func isMn(r rune) bool {
+	return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
 }
 
 // ReplaceExtension takes a path and an extension, strips the old extension
--- a/helpers/path_test.go
+++ b/helpers/path_test.go
@@ -27,7 +27,7 @@
 		{"FOo/BaR.html", "FOo/BaR.html"},
 		{"трям/трям", "трям/трям"},
 		{"은행", "은행"},
-		{"Банковский кассир", "Банковский-кассир"},
+		{"Банковский кассир", "Банковскии-кассир"},
 	}
 
 	for _, test := range tests {
@@ -717,7 +717,7 @@
 		{testDir + "FOo/BaR.html", dir + testDir + "FOo/BaR.html" + FilePathSeparator},
 		{testDir + "трям/трям", dir + testDir + "трям/трям" + FilePathSeparator},
 		{testDir + "은행", dir + testDir + "은행" + FilePathSeparator},
-		{testDir + "Банковский кассир", dir + testDir + "Банковский-кассир" + FilePathSeparator},
+		{testDir + "Банковский кассир", dir + testDir + "Банковскии-кассир" + FilePathSeparator},
 	}
 
 	for _, test := range tests {