ref: 70dbd6be889621dc1b07b732d757c3dddd6dc164
parent: 561b01eedfb87965b2ff1f7e32d8270ad98cf59e
author: uriel <[email protected]>
date: Tue Jul 28 10:49:52 EDT 2009
Add yiyus md2html.awk script as an alternative to markdown.pl
--- /dev/null
+++ b/bin/contrib/md2html.awk
@@ -1,0 +1,427 @@
+#!/bin/awk -f
+#
+# by: Jesus Galan (yiyus) 2009
+#
+# Usage: md2html.awk file.md > file.html
+# See: http://4l77.com/src/md2html.awk
+
+function eschtml(t) {
+ gsub("&", "\\&", t);
+ gsub("<", "\\<", t);
+ return t;
+}
+
+function oprint(t){
+ if(nr == 0)
+ print t;
+ else
+ otext = otext "\n" t;
+}
+
+function subref(id){
+ for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);
+ if(nr == 0 && otext) {
+ print otext;
+ otext = "";
+ }
+}
+
+function nextil(t) {
+ if(!match(t, /[`<&\[*_\\-]|(\!\[)/))
+ return t;
+ t1 = substr(t, 1, RSTART - 1);
+ tag = substr(t, RSTART, RLENGTH);
+ t2 = substr(t, RSTART + RLENGTH);
+ if(ilcode && tag != "`")
+ return eschtml(t1 tag) nextil(t2);
+ # Backslash escaping
+ if(tag == "\\"){
+ if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){
+ tag = substr(t2, 1, 1);
+ t2 = substr(t2, 2);
+ }
+ return t1 tag nextil(t2);
+ }
+ # Dashes
+ if(tag == "-"){
+ if(sub(/^-/, "", t2))
+ tag = "—";
+ return t1 tag nextil(t2);
+ }
+ # Inline Code
+ if(tag == "`"){
+ if(sub(/^`/, "", t2)){
+ if(!match(t2, /``/))
+ return t1 "”" nextil(t2);
+ ilcode2 = !ilcode2;
+ }
+ else if(ilcode2)
+ return t1 tag nextil(t2);
+ tag = "<code>";
+ if(ilcode){
+ t1 = eschtml(t1);
+ tag = "</code>";
+ }
+ ilcode = !ilcode;
+ return t1 tag nextil(t2);
+ }
+ if(tag == "<"){
+ # Autolinks
+ if(match(t2, /^[^ ]+[\.@][^ ]+>/)){
+ url = eschtml(substr(t2, 1, RLENGTH - 1));
+ t2 = substr(t2, RLENGTH + 1);
+ linktext = url;
+ if(match(url, /@/) && !match(url, /^mailto:/))
+ url = "mailto:" url;
+ return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2);
+ }
+ # Html tags
+ if(match(t2, /^[A-Za-z\/!][^>]*>/)){
+ tag = tag substr(t2, RSTART, RLENGTH);
+ t2 = substr(t2, RLENGTH + 1);
+ return t1 tag nextil(t2);
+ }
+ return t1 "<" nextil(t2);
+ }
+ # Html special entities
+ if(tag == "&"){
+ if(match(t2, /^#?[A-Za-z0-9]+;/)){
+ tag = tag substr(t2, RSTART, RLENGTH);
+ t2 = substr(t2, RLENGTH + 1);
+ return t1 tag nextil(t2);
+ }
+ return t1 "&" nextil(t2);
+ }
+ # Images
+ if(tag == "!["){
+ if(!match(t2, /(\[.*\])|(\(.*\))/))
+ return t1 tag nextil(t2);
+ match(t2, /^[^\]]*/);
+ alt = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(match(t2, /^\(/)){
+ # Inline
+ sub(/^\(/, "", t2);
+ match(t2, /^[^\)]+/);
+ url = eschtml(substr(t2, 1, RLENGTH));
+ t2 = substr(t2, RLENGTH + 2);
+ title = "";
+ if(match(url, /[ ]+\".*\"[ ]*$/)) {
+ title = substr(url, RSTART, RLENGTH);
+ url = substr(url, 1, RSTART - 1);
+ match(title, /\".*\"/);
+ title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
+ }
+ if(match(url, /^<.*>$/))
+ url = substr(url, 2, RLENGTH - 2);
+ return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2);
+ }
+ else{
+ # Referenced
+ sub(/^ ?\[/, "", t2);
+ id = alt;
+ if(match(t2, /^[^\]]+/))
+ id = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(ref[id])
+ r = ref[id];
+ else{
+ r = "<<" id;
+ nr++;
+ }
+ return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2);
+ }
+ }
+ # Links
+ if(tag == "["){
+ if(!match(t2, /(\[.*\])|(\(.*\))/))
+ return t1 tag nextil(t2);
+ match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
+ linktext = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(match(t2, /^\(/)){
+ # Inline
+ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
+ url = substr(t2, 2, RLENGTH - 1);
+ pt2 = substr(t2, RLENGTH + 2);
+ title = "";
+ if(match(url, /[ ]+\".*\"[ ]*$/)) {
+ title = substr(url, RSTART, RLENGTH);
+ url = substr(url, 1, RSTART - 1);
+ match(title, /\".*\"/);
+ title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
+ }
+ if(match(url, /^<.*>$/))
+ url = substr(url, 2, RLENGTH - 2);
+ url = eschtml(url);
+ return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2);
+ }
+ else{
+ # Referenced
+ sub(/^ ?\[/, "", t2);
+ id = linktext;
+ if(match(t2, /^[^\]]+/))
+ id = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(ref[id])
+ r = ref[id];
+ else{
+ r = "<<" id;
+ nr++;
+ }
+ pt2 = t2;
+ return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2);
+ }
+ }
+ # Emphasis
+ if(match(tag, /[*_]/)){
+ ntag = tag;
+ if(sub("^" tag, "", t2)){
+ if(stag[ns] == tag && match(t2, "^" tag))
+ t2 = tag t2;
+ else
+ ntag = tag tag
+ }
+ n = length(ntag);
+ tag = (n == 2) ? "strong" : "em";
+ if(match(t1, / $/) && match(t2, /^ /))
+ return t1 tag nextil(t2);
+ if(stag[ns] == ntag){
+ tag = "/" tag;
+ ns--;
+ }
+ else
+ stag[++ns] = ntag;
+ tag = "<" tag ">";
+ return t1 tag nextil(t2);
+ }
+}
+
+function inline(t) {
+ ilcode = 0;
+ ilcode2 = 0;
+ ns = 0;
+
+ return nextil(t);
+}
+
+function printp(tag) {
+ if(!match(text, /^[ ]*$/)){
+ text = inline(text);
+ if(tag != "")
+ oprint("<" tag ">" text "</" tag ">");
+ else
+ oprint(text);
+ }
+ text = "";
+}
+
+BEGIN {
+ blank = 0;
+ code = 0;
+ hr = 0;
+ html = 0;
+ nl = 0;
+ nr = 0;
+ otext = "";
+ text = "";
+ par = "p";
+}
+
+# References
+!code && /^ *\[[^\]]*\]:[ ]+/ {
+ sub(/^ *\[/, "");
+ match($0, /\]/);
+ id = substr($0, 1, RSTART - 1);
+ sub(id "\\]:[ ]+", "");
+ title = "";
+ if(match($0, /\".*\"$/))
+ title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
+ sub(/[ ]+\".*\"$/, "");
+ url = eschtml($0);
+ ref[id] = url title;
+
+ subref(id);
+ next;
+}
+
+# html
+!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ {
+ if(code)
+ oprint("</pre></code>");
+ for(; !text && block[nl] == "blockquote"; nl--)
+ oprint("</blockquote>");
+ match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+ isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/);
+ htag = substr($0, 2, RLENGTH - 1);
+ if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)"))
+ html = 1;
+ if(html && match($0, /^<hr/))
+ hr = 1;
+ oprint($0);
+ next;
+}
+
+html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ ||
+(hr && />$/)) {
+ html = 0;
+ hr = 0;
+ oprint($0);
+ next;
+}
+
+html {
+ oprint($0);
+ next;
+}
+
+# List and quote blocks
+
+# Remove indentation
+{
+ for(nnl = 0; nnl < nl; nnl++)
+ if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \
+ (block[nnl + 1] == "blockquote" && !sub(/^> ?/, "")))
+ break;
+}
+nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; }
+# Quote blocks
+{
+ while(sub(/^> /, ""))
+ nblock[++nnl] = "blockquote";
+}
+# Horizontal rules
+{ hr = 0; }
+(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ {
+ if(code){
+ oprint("</pre></code>");
+ code = 0;
+ }
+ blank = 0;
+ nnl = 0;
+ hr = 1;
+}
+# List items
+block[nl] ~ /[ou]l/ && /^$/ {
+ blank = 1;
+ next;
+}
+{ newli = 0; }
+!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ {
+ sub(/^ ? ? ?[*+-]( +| )/, "");
+ nnl++;
+ nblock[nnl] = "ul";
+ newli = 1;
+}
+(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ {
+ sub(/^ ? ? ?([0-9]+\.)+( +| )/, "");
+ nnl++;
+ nblock[nnl] = "ol";
+ newli = 1;
+}
+newli {
+ if(blank && nnl == nl && !par)
+ par = "p";
+ blank = 0;
+ printp(par);
+ if(nnl == nl && block[nl] == nblock[nl])
+ oprint("</li><li>");
+}
+blank && ! /^$/ {
+ if(match(block[nnl], /[ou]l/) && !par)
+ par = "p";
+ printp(par);
+ par = "p";
+ blank = 0;
+}
+
+# Close old blocks and open new ones
+nnl != nl || nblock[nl] != block[nl] {
+ if(code){
+ oprint("</pre></code>");
+ code = 0;
+ }
+ printp(par);
+ b = (nnl > nl) ? nblock[nnl] : block[nnl];
+ par = (match(b, /[ou]l/)) ? "" : "p";
+}
+nnl < nl || (nnl == nl && nblock[nl] != block[nl]) {
+ for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){
+ if(match(block[nl], /[ou]l/))
+ oprint("</li>");
+ oprint("</" block[nl] ">");
+ }
+}
+nnl > nl {
+ for(; nl < nnl; nl++){
+ block[nl + 1] = nblock[nl + 1];
+ oprint("<" block[nl + 1] ">");
+ if(match(block[nl + 1], /[ou]l/))
+ oprint("<li>");
+ }
+}
+hr {
+ oprint("<hr>");
+ next;
+}
+
+# Code blocks
+code && /^$/ {
+ if(blanK)
+ oprint("");
+ blank = 1;
+ next;
+}
+!text && sub(/^( | )/, "") {
+ if(blanK)
+ oprint("");
+ blank = 0;
+ if(!code)
+ oprint("<code><pre>");
+ code = 1;
+ $0 = eschtml($0);
+ oprint($0);
+ next;
+}
+code {
+ oprint("</pre></code>");
+ code = 0;
+}
+
+# Setex-style Headers
+text && /^=+$/ {printp("h1"); next;}
+text && /^-+$/ {printp("h2"); next;}
+
+# Atx-Style headers
+/^#+/ && (!newli || par=="p" || /^##/) {
+ for(n = 0; n < 6 && sub(/^# */, ""); n++)
+ sub(/#$/, "");
+ par = "h" n;
+}
+
+# Paragraph
+/^$/ {
+ printp(par);
+ par = "p";
+ next;
+}
+
+# Add text
+{ text = (text ? text " " : "") $0; }
+
+END {
+ if(code){
+ oprint("</pre></code>");
+ code = 0;
+ }
+ printp(par);
+ for(; nl > 0; nl--){
+ if(match(block[nl], /[ou]l/))
+ oprint("</li>");
+ oprint("</" block[nl] ">");
+ }
+ gsub(/<<[^\"]*/, "", otext);
+ print(otext);
+}