shithub: hugo

Download patch

ref: 11ca84f8cb19f01b7977b549e63e0ce0d126054e
parent: 24ffe04360f337c7b189df52b9a7c22683ef395e
author: Anton Ageev <[email protected]>
date: Sun Feb 2 13:18:01 EST 2014

Add unicode support for aliases, indexes, urlize template filter.
Now aliases and indexes are not restricted ASCII letters and can include
any unicode letters.

--- a/helpers/templates.go
+++ b/helpers/templates.go
@@ -14,16 +14,43 @@
 package helpers
 
 import (
+	"net/url"
 	"regexp"
 	"strings"
+	"unicode"
 )
 
 var sanitizeRegexp = regexp.MustCompile("[^a-zA-Z0-9./_-]")
 
-func Urlize(url string) string {
-	return Sanitize(strings.ToLower(strings.Replace(strings.TrimSpace(url), " ", "-", -1)))
+func MakePath(s string) string {
+	return unicodeSanitize(strings.ToLower(strings.Replace(strings.TrimSpace(s), " ", "-", -1)))
 }
 
+func Urlize(uri string) string {
+	sanitized := MakePath(uri)
+
+	// escape unicode letters
+	parsedUri, err := url.Parse(sanitized)
+	if err != nil {
+		// if net/url can not parse URL it's meaning Sanitize works incorrect
+		panic(err)
+	}
+	return parsedUri.String()
+}
+
 func Sanitize(s string) string {
 	return sanitizeRegexp.ReplaceAllString(s, "")
+}
+
+func unicodeSanitize(s string) string {
+	source := []rune(s)
+	target := make([]rune, 0, len(source))
+
+	for _, r := range source {
+		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '.' || r == '/' || r == '_' || r == '-' {
+			target = append(target, r)
+		}
+	}
+
+	return string(target)
 }
--- /dev/null
+++ b/helpers/templates_test.go
@@ -1,0 +1,45 @@
+package helpers
+
+import (
+	"testing"
+)
+
+func TestMakePath(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"  foo bar  ", "foo-bar"},
+		{"foo.bar/foo_bar-foo", "foo.bar/foo_bar-foo"},
+		{"foo,bar:foo%bar", "foobarfoobar"},
+		{"foo/bar.html", "foo/bar.html"},
+		{"трям/трям", "трям/трям"},
+	}
+
+	for _, test := range tests {
+		output := MakePath(test.input)
+		if output != test.expected {
+			t.Errorf("Expected %#v, got %#v\n", test.expected, output)
+		}
+	}
+}
+
+func TestUrlize(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"  foo bar  ", "foo-bar"},
+		{"foo.bar/foo_bar-foo", "foo.bar/foo_bar-foo"},
+		{"foo,bar:foo%bar", "foobarfoobar"},
+		{"foo/bar.html", "foo/bar.html"},
+		{"трям/трям", "%D1%82%D1%80%D1%8F%D0%BC/%D1%82%D1%80%D1%8F%D0%BC"},
+	}
+
+	for _, test := range tests {
+		output := Urlize(test.input)
+		if output != test.expected {
+			t.Errorf("Expected %#v, got %#v\n", test.expected, output)
+		}
+	}
+}
--- a/hugolib/index.go
+++ b/hugolib/index.go
@@ -59,7 +59,7 @@
 
 // KeyPrep... Indexes should be case insensitive. Can make it easily conditional later.
 func kp(in string) string {
-	return helpers.Urlize(in)
+	return helpers.MakePath(in)
 }
 
 func (i Index) Get(key string) WeightedPages { return i[kp(key)] }
--- a/target/alias_test.go
+++ b/target/alias_test.go
@@ -20,6 +20,7 @@
 		{"alias 3.html", "alias-3.html"},
 		{"alias4.html", "alias4.html"},
 		{"/alias 5.html", "/alias-5.html"},
+		{"/трям.html", "/трям.html"},
 	}
 
 	for _, test := range tests {
--- a/target/htmlredirect.go
+++ b/target/htmlredirect.go
@@ -39,7 +39,7 @@
 	} else if !strings.HasSuffix(alias, ".html") {
 		alias = alias + "/index.html"
 	}
-	return path.Join(h.PublishDir, helpers.Urlize(alias)), nil
+	return path.Join(h.PublishDir, helpers.MakePath(alias)), nil
 }
 
 type AliasNode struct {