ref: ef595aedfce66caf2e8560b3605d38e95872e1ca
parent: 90a902c843758bb0d4c6da5ace4b9a813ab3a10e
author: Noah Campbell <[email protected]>
date: Sat Nov 9 09:35:09 EST 2013
Handle schema-less urls when apply absurl Fixes #114
--- a/transform/absurl.go
+++ b/transform/absurl.go
@@ -23,13 +23,20 @@
hrefsq = []byte(" href='" + base + "/")
)
trs = append(trs, func(content []byte) []byte {
- content = bytes.Replace(content, []byte(" src=\"/"), srcdq, -1)
- content = bytes.Replace(content, []byte(" src='/"), srcsq, -1)
- content = bytes.Replace(content, []byte(" href=\"/"), hrefdq, -1)
- content = bytes.Replace(content, []byte(" href='/"), hrefsq, -1)
+ content = guardReplace(content, []byte(" src=\"//"), []byte(" src=\"/"), srcdq)
+ content = guardReplace(content, []byte(" src='//"), []byte(" src='/"), srcsq)
+ content = guardReplace(content, []byte(" href=\"//"), []byte(" href=\"/"), hrefdq)
+ content = guardReplace(content, []byte(" href='//"), []byte(" href='/"), hrefsq)
return content
})
return
+}
+
+func guardReplace(content, guard, match, replace []byte) []byte {
+ if !bytes.Contains(content, guard) {
+ content = bytes.Replace(content, match, replace, -1)
+ }
+ return content
}
type elattr struct {
--- a/transform/posttrans_test.go
+++ b/transform/posttrans_test.go
@@ -12,8 +12,7 @@
const H5_JS_CONTENT_ABS_URL = "<!DOCTYPE html><html><head><script src=\"http://user@host:10234/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>"
-// URL doesn't recognize authorities. BUG?
-//const H5_JS_CONTENT_ABS_URL = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>"
+const H5_JS_CONTENT_ABS_URL_SCHEMALESS = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script><script src='//host2/barfoo.js'></head><body><nav><h1>title</h1></nav><article>content <a href=\"//host/foobar\">foobar</a>. <a href='//host2/foobar'>Follow up</a></article></body></html>"
const CORRECT_OUTPUT_SRC_HREF_DQ = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"http://base/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"http://base/foobar\">Follow up</a></article></body></html>"
@@ -34,6 +33,7 @@
{H5_JS_CONTENT_DOUBLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_DQ},
{H5_JS_CONTENT_SINGLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_SQ},
{H5_JS_CONTENT_ABS_URL, H5_JS_CONTENT_ABS_URL},
+ {H5_JS_CONTENT_ABS_URL_SCHEMALESS, H5_JS_CONTENT_ABS_URL_SCHEMALESS},
}
type errorf func(string, ...interface{})