ref: 3ea4df35f2435f1cb371fa54f6fd89fd6d7d980f
parent: be0903c71a1851a4d13df9de37a2037e7bc431ec
author: bep <[email protected]>
date: Sat May 30 10:46:58 EDT 2015
Remove accents in URLs So the taxonomy `Gérard Depardieu` gives paths on the form `gerard-depardieu`. Unfortunately this introduces two imports from the `golang.org/`, but Unicode-normalization isn't something we'd want to write from scratch. See https://blog.golang.org/normalization See #1180
--- a/helpers/path.go
+++ b/helpers/path.go
@@ -19,6 +19,8 @@
"github.com/spf13/afero"
jww "github.com/spf13/jwalterweatherman"
"github.com/spf13/viper"
+ "golang.org/x/text/transform"
+ "golang.org/x/text/unicode/norm"
"io"
"os"
"path/filepath"
@@ -97,7 +99,16 @@
}
}
+ // remove accents - see https://blog.golang.org/normalization
+ t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
+ result, _, _ := transform.String(t, string(target))
+ return result
+
return string(target)
+}
+
+func isMn(r rune) bool {
+ return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
}
// ReplaceExtension takes a path and an extension, strips the old extension
--- a/helpers/path_test.go
+++ b/helpers/path_test.go
@@ -27,7 +27,7 @@
{"FOo/BaR.html", "FOo/BaR.html"},
{"трям/трям", "трям/трям"},
{"은행", "은행"},
- {"Банковский кассир", "Банковский-кассир"},
+ {"Банковский кассир", "Банковскии-кассир"},
}
for _, test := range tests {
@@ -717,7 +717,7 @@
{testDir + "FOo/BaR.html", dir + testDir + "FOo/BaR.html" + FilePathSeparator},
{testDir + "трям/трям", dir + testDir + "трям/трям" + FilePathSeparator},
{testDir + "은행", dir + testDir + "은행" + FilePathSeparator},
- {testDir + "Банковский кассир", dir + testDir + "Банковский-кассир" + FilePathSeparator},
+ {testDir + "Банковский кассир", dir + testDir + "Банковскии-кассир" + FilePathSeparator},
}
for _, test := range tests {