ref: 864073948ed33ab4f2aadc2e1caf54beafe42304
parent: 2a1b90b3575a1918fce472d3f7a2161aff1b3452
author: uriel <[email protected]>
date: Sun Oct 26 00:29:50 EDT 2008
Big cleanup of bin/, move external programs to contrib/, move minor useful scripts to aux/, and most important of all move controller.rc to werc.rc
--- a/bin/TODO
+++ /dev/null
@@ -1,7 +1,0 @@
-02:50 < devn> <META name="description" content="This is the Google Summer of Code blog for Plan9 and Inferno projects.">
-02:50 < devn> <META name="keywords" content="google, summer of code, inferno, plan9, programming">
-
-Should add werc to certain wikipedia lists:
-* http://en.wikipedia.org/wiki/List_of_content_management_systems
-* http://en.wikipedia.org/wiki/Comparison_of_wiki_software
-
--- /dev/null
+++ b/bin/aux/bpst.rc
@@ -1,0 +1,55 @@
+#!/usr/bin/env rc
+
+path=( $PLAN9/bin $path )
+base=.
+
+fn die {
+ echo error: $* >[1=2]
+ exit 1
+}
+
+if (~ $#* 0)
+ die 'Missing title'
+
+if(~ $#user 0)
+ user = `{whoami}
+
+file = (); title = ();
+bloguser = $user
+while(! ~ $#* 0) {
+ switch($1) {
+ case -u
+ base=/gsoc/www/people/$user/blog/
+ shift
+ case *
+ title = $"*
+ }
+ shift
+}
+
+date=`{/bin/date +%F}
+
+n = 1
+for(f in $base/$date-*) {
+ i = `{echo $f | sed -n 's|^.*/'$date'-([0-9]+)_.*|\1|p'}
+ if(! ~ $#i 0 && test $i -ge $n)
+ n = `{hoc -e $i'+1'}
+}
+
+if(~ $EDITOR '')
+ EDITOR = vi
+
+file = $home/.blogtmp.$pid
+rm $file >[2]/dev/null
+touch $file
+$EDITOR $file
+ispell $file
+rm $file.bak >[2]/dev/null
+
+
+title = `{ echo $"title | sed 's/ /_/g' }
+
+title = `{echo $"title | sed 's/[ ]+/_/g; 1q'}
+
+mv $file $base/$"date^'-'^$"n^_$"title.md
+
--- a/bin/bpst.rc
+++ /dev/null
@@ -1,55 +1,0 @@
-#!/usr/bin/env rc
-
-path=( $PLAN9/bin $path )
-base=.
-
-fn die {
- echo error: $* >[1=2]
- exit 1
-}
-
-if (~ $#* 0)
- die 'Missing title'
-
-if(~ $#user 0)
- user = `{whoami}
-
-file = (); title = ();
-bloguser = $user
-while(! ~ $#* 0) {
- switch($1) {
- case -u
- base=/gsoc/www/people/$user/blog/
- shift
- case *
- title = $"*
- }
- shift
-}
-
-date=`{/bin/date +%F}
-
-n = 1
-for(f in $base/$date-*) {
- i = `{echo $f | sed -n 's|^.*/'$date'-([0-9]+)_.*|\1|p'}
- if(! ~ $#i 0 && test $i -ge $n)
- n = `{hoc -e $i'+1'}
-}
-
-if(~ $EDITOR '')
- EDITOR = vi
-
-file = $home/.blogtmp.$pid
-rm $file >[2]/dev/null
-touch $file
-$EDITOR $file
-ispell $file
-rm $file.bak >[2]/dev/null
-
-
-title = `{ echo $"title | sed 's/ /_/g' }
-
-title = `{echo $"title | sed 's/[ ]+/_/g; 1q'}
-
-mv $file $base/$"date^'-'^$"n^_$"title.md
-
--- /dev/null
+++ b/bin/contrib/hgweb.config
@@ -1,0 +1,12 @@
+[web]
+style = gitweb
+allow_archive = bz2
+
+#[paths]
+#w9 = /gsoc/hg/w9/
+
+[collections]
+#allow_archive = bz2 zip
+/gsoc/hg = /gsoc/hg/
+#/var/hg = /var/hg/
+
--- /dev/null
+++ b/bin/contrib/hgwebdir.cgi
@@ -1,0 +1,47 @@
+#!/usr/bin/env python
+#
+# An example CGI script to export multiple hgweb repos, edit as necessary
+
+# send python tracebacks to the browser if an error occurs:
+import cgitb
+cgitb.enable()
+
+# adjust python path if not a system-wide install:
+#import sys
+#sys.path.insert(0, "/path/to/python/lib")
+
+# If you'd like to serve pages with UTF-8 instead of your default
+# locale charset, you can do so by uncommenting the following lines.
+# Note that this will cause your .hgrc files to be interpreted in
+# UTF-8 and all your repo files to be displayed using UTF-8.
+#
+#import os
+#os.environ["HGENCODING"] = "UTF-8"
+
+from mercurial.hgweb.hgwebdir_mod import hgwebdir
+from mercurial.hgweb.request import wsgiapplication
+import mercurial.hgweb.wsgicgi as wsgicgi
+
+# The config file looks like this. You can have paths to individual
+# repos, collections of repos in a directory tree, or both.
+#
+# [paths]
+# virtual/path = /real/path
+# virtual/path = /real/path
+#
+# [collections]
+# /prefix/to/strip/off = /root/of/tree/full/of/repos
+#
+# collections example: say directory tree /foo contains repos /foo/bar,
+# /foo/quux/baz. Give this config section:
+# [collections]
+# /foo = /foo
+# Then repos will list as bar and quux/baz.
+#
+# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples
+# or use a dictionary with entries like 'virtual/path': '/real/path'
+
+def make_web_app():
+ return hgwebdir("hgweb.config")
+
+wsgicgi.launch(wsgiapplication(make_web_app))
--- /dev/null
+++ b/bin/contrib/markdown.pl
@@ -1,0 +1,1447 @@
+#!/usr/bin/env perl
+#
+# Markdown -- A text-to-HTML conversion tool for web writers
+#
+# Copyright (c) 2004 John Gruber
+# <http://daringfireball.net/projects/markdown/>
+#
+package Markdown;
+require 5.006_000;
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+use vars qw($VERSION);
+$VERSION = '1.0.1';
+# Tue 14 Dec 2004
+
+## Disabled; causes problems under Perl 5.6.1:
+# use utf8;
+# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
+
+
+#
+# Global default settings:
+#
+my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
+my $g_tab_width = 4;
+
+
+#
+# Globals:
+#
+
+# Regex to match balanced [brackets]. See Friedl's
+# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+my $g_nested_brackets;
+$g_nested_brackets = qr{
+ (?> # Atomic matching
+ [^\[\]]+ # Anything other than brackets
+ |
+ \[
+ (??{ $g_nested_brackets }) # Recursive set of nested brackets
+ \]
+ )*
+}x;
+
+
+# Table of hash values for escaped characters:
+my %g_escape_table;
+foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
+ $g_escape_table{$char} = md5_hex($char);
+}
+
+
+# Global hashes, used by various utility routines
+my %g_urls;
+my %g_titles;
+my %g_html_blocks;
+
+# Used to track when we're inside an ordered or unordered list
+# (see _ProcessListItems() for details):
+my $g_list_level = 0;
+
+
+#### Blosxom plug-in interface ##########################################
+
+# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
+# which posts Markdown should process, using a "meta-markup: markdown"
+# header. If it's set to 0 (the default), Markdown will process all
+# entries.
+my $g_blosxom_use_meta = 0;
+
+sub start { 1; }
+sub story {
+ my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
+
+ if ( (! $g_blosxom_use_meta) or
+ (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
+ ){
+ $$body_ref = Markdown($$body_ref);
+ }
+ 1;
+}
+
+
+#### Movable Type plug-in interface #####################################
+eval {require MT}; # Test to see if we're running in MT.
+unless ($@) {
+ require MT;
+ import MT;
+ require MT::Template::Context;
+ import MT::Template::Context;
+
+ eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
+ unless ($@) {
+ require MT::Plugin;
+ import MT::Plugin;
+ my $plugin = new MT::Plugin({
+ name => "Markdown",
+ description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
+ doc_link => 'http://daringfireball.net/projects/markdown/'
+ });
+ MT->add_plugin( $plugin );
+ }
+
+ MT::Template::Context->add_container_tag(MarkdownOptions => sub {
+ my $ctx = shift;
+ my $args = shift;
+ my $builder = $ctx->stash('builder');
+ my $tokens = $ctx->stash('tokens');
+
+ if (defined ($args->{'output'}) ) {
+ $ctx->stash('markdown_output', lc $args->{'output'});
+ }
+
+ defined (my $str = $builder->build($ctx, $tokens) )
+ or return $ctx->error($builder->errstr);
+ $str; # return value
+ });
+
+ MT->add_text_filter('markdown' => {
+ label => 'Markdown',
+ docs => 'http://daringfireball.net/projects/markdown/',
+ on_format => sub {
+ my $text = shift;
+ my $ctx = shift;
+ my $raw = 0;
+ if (defined $ctx) {
+ my $output = $ctx->stash('markdown_output');
+ if (defined $output && $output =~ m/^html/i) {
+ $g_empty_element_suffix = ">";
+ $ctx->stash('markdown_output', '');
+ }
+ elsif (defined $output && $output eq 'raw') {
+ $raw = 1;
+ $ctx->stash('markdown_output', '');
+ }
+ else {
+ $raw = 0;
+ $g_empty_element_suffix = " />";
+ }
+ }
+ $text = $raw ? $text : Markdown($text);
+ $text;
+ },
+ });
+
+ # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
+ my $smartypants;
+
+ {
+ no warnings "once";
+ $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
+ }
+
+ if ($smartypants) {
+ MT->add_text_filter('markdown_with_smartypants' => {
+ label => 'Markdown With SmartyPants',
+ docs => 'http://daringfireball.net/projects/markdown/',
+ on_format => sub {
+ my $text = shift;
+ my $ctx = shift;
+ if (defined $ctx) {
+ my $output = $ctx->stash('markdown_output');
+ if (defined $output && $output eq 'html') {
+ $g_empty_element_suffix = ">";
+ }
+ else {
+ $g_empty_element_suffix = " />";
+ }
+ }
+ $text = Markdown($text);
+ $text = $smartypants->($text, '1');
+ },
+ });
+ }
+}
+else {
+#### BBEdit/command-line text filter interface ##########################
+# Needs to be hidden from MT (and Blosxom when running in static mode).
+
+ # We're only using $blosxom::version once; tell Perl not to warn us:
+ no warnings 'once';
+ unless ( defined($blosxom::version) ) {
+ use warnings;
+
+ #### Check for command-line switches: #################
+ my %cli_opts;
+ use Getopt::Long;
+ Getopt::Long::Configure('pass_through');
+ GetOptions(\%cli_opts,
+ 'version',
+ 'shortversion',
+ 'html4tags',
+ );
+ if ($cli_opts{'version'}) { # Version info
+ print "\nThis is Markdown, version $VERSION.\n";
+ print "Copyright 2004 John Gruber\n";
+ print "http://daringfireball.net/projects/markdown/\n\n";
+ exit 0;
+ }
+ if ($cli_opts{'shortversion'}) { # Just the version number string.
+ print $VERSION;
+ exit 0;
+ }
+ if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
+ $g_empty_element_suffix = ">";
+ }
+
+
+ #### Process incoming text: ###########################
+ my $text;
+ {
+ local $/; # Slurp the whole file
+ $text = <>;
+ }
+ print Markdown($text);
+ }
+}
+
+
+
+sub Markdown {
+#
+# Main function. The order in which other subs are called here is
+# essential. Link and image substitutions need to happen before
+# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+# and <img> tags get encoded.
+#
+ my $text = shift;
+
+ # Clear the global hashes. If we don't clear these, you get conflicts
+ # from other articles when generating a page which contains more than
+ # one article (e.g. an index page that shows the N most recent
+ # articles):
+ %g_urls = ();
+ %g_titles = ();
+ %g_html_blocks = ();
+
+
+ # Standardize line endings:
+ $text =~ s{\r\n}{\n}g; # DOS to Unix
+ $text =~ s{\r}{\n}g; # Mac to Unix
+
+ # Make sure $text ends with a couple of newlines:
+ $text .= "\n\n";
+
+ # Convert all tabs to spaces.
+ $text = _Detab($text);
+
+ # Strip any lines consisting only of spaces and tabs.
+ # This makes subsequent regexen easier to write, because we can
+ # match consecutive blank lines with /\n+/ instead of something
+ # contorted like /[ \t]*\n+/ .
+ $text =~ s/^[ \t]+$//mg;
+
+ # Turn block-level HTML blocks into hash entries
+ $text = _HashHTMLBlocks($text);
+
+ # Strip link definitions, store in hashes.
+ $text = _StripLinkDefinitions($text);
+
+ $text = _RunBlockGamut($text);
+
+ $text = _UnescapeSpecialChars($text);
+
+ return $text . "\n";
+}
+
+
+sub _StripLinkDefinitions {
+#
+# Strips link definitions from text, stores the URLs and titles in
+# hash references.
+#
+ my $text = shift;
+ my $less_than_tab = $g_tab_width - 1;
+
+ # Link defs are in the form: ^[id]: url "optional title"
+ while ($text =~ s{
+ ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ <?(\S+?)>? # url = $2
+ [ \t]*
+ \n? # maybe one newline
+ [ \t]*
+ (?:
+ (?<=\s) # lookbehind for whitespace
+ ["(]
+ (.+?) # title = $3
+ [")]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)
+ }
+ {}mx) {
+ $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
+ if ($3) {
+ $g_titles{lc $1} = $3;
+ $g_titles{lc $1} =~ s/"/"/g;
+ }
+ }
+
+ return $text;
+}
+
+
+sub _HashHTMLBlocks {
+ my $text = shift;
+ my $less_than_tab = $g_tab_width - 1;
+
+ # Hashify HTML blocks:
+ # We only want to do this for block-level HTML tags, such as headers,
+ # lists, and tables. That's because we still want to wrap <p>s around
+ # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ # phrase emphasis, and spans. The list of tags we're looking for is
+ # hard-coded:
+ my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
+ my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
+
+ # First, look for nested blocks, e.g.:
+ # <div>
+ # <div>
+ # tags for inner block must be indented.
+ # </div>
+ # </div>
+ #
+ # The outermost tags must start at the left margin for this to match, and
+ # the inner nested divs must be indented.
+ # We need to do this before the next, more liberal match, because the next
+ # match will start at the first `<div>` and stop at the first `</div>`.
+ $text =~ s{
+ ( # save in $1
+ ^ # start of line (with /m)
+ <($block_tags_a) # start tag = $2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ </\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ }{
+ my $key = md5_hex($1);
+ $g_html_blocks{$key} = $1;
+ "\n\n" . $key . "\n\n";
+ }egmx;
+
+
+ #
+ # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+ #
+ $text =~ s{
+ ( # save in $1
+ ^ # start of line (with /m)
+ <($block_tags_b) # start tag = $2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*</\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ }{
+ my $key = md5_hex($1);
+ $g_html_blocks{$key} = $1;
+ "\n\n" . $key . "\n\n";
+ }egmx;
+ # Special case just for <hr />. It was easier to make a special case than
+ # to make the other regex more complicated.
+ $text =~ s{
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{0,$less_than_tab}
+ <(hr) # start tag = $2
+ \b # word break
+ ([^<>])*? #
+ /?> # the matching end tag
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ }{
+ my $key = md5_hex($1);
+ $g_html_blocks{$key} = $1;
+ "\n\n" . $key . "\n\n";
+ }egx;
+
+ # Special case for standalone HTML comments:
+ $text =~ s{
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{0,$less_than_tab}
+ (?s:
+ <!
+ (--.*?--\s*)+
+ >
+ )
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ }{
+ my $key = md5_hex($1);
+ $g_html_blocks{$key} = $1;
+ "\n\n" . $key . "\n\n";
+ }egx;
+
+
+ return $text;
+}
+
+
+sub _RunBlockGamut {
+#
+# These are all the transformations that form block-level
+# tags like paragraphs, headers, and list items.
+#
+ my $text = shift;
+
+ $text = _DoHeaders($text);
+
+ # Do Horizontal Rules:
+ $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+ $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+ $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+
+ $text = _DoLists($text);
+
+ $text = _DoCodeBlocks($text);
+
+ $text = _DoBlockQuotes($text);
+
+ # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+ # was to escape raw HTML in the original Markdown source. This time,
+ # we're escaping the markup we've just created, so that we don't wrap
+ # <p> tags around block-level tags.
+ $text = _HashHTMLBlocks($text);
+
+ $text = _FormParagraphs($text);
+
+ return $text;
+}
+
+
+sub _RunSpanGamut {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
+ my $text = shift;
+
+ $text = _DoCodeSpans($text);
+
+ $text = _EscapeSpecialChars($text);
+
+ # Process anchor and image tags. Images must come first,
+ # because ![foo][f] looks like an anchor.
+ $text = _DoImages($text);
+ $text = _DoAnchors($text);
+
+ # Make links out of things like `<http://example.com/>`
+ # Must come after _DoAnchors(), because you can use < and >
+ # delimiters in inline links like [this](<url>).
+ $text = _DoAutoLinks($text);
+
+ $text = _EncodeAmpsAndAngles($text);
+
+ $text = _DoItalicsAndBold($text);
+
+ # Do hard breaks:
+ $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
+
+ return $text;
+}
+
+
+sub _EscapeSpecialChars {
+ my $text = shift;
+ my $tokens ||= _TokenizeHTML($text);
+
+ $text = ''; # rebuild $text from the tokens
+# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
+# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+ foreach my $cur_token (@$tokens) {
+ if ($cur_token->[0] eq "tag") {
+ # Within tags, encode * and _ so they don't conflict
+ # with their use in Markdown for italics and strong.
+ # We're replacing each such character with its
+ # corresponding MD5 checksum value; this is likely
+ # overkill, but it should prevent us from colliding
+ # with the escape values by accident.
+ $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
+ $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
+ $text .= $cur_token->[1];
+ } else {
+ my $t = $cur_token->[1];
+ $t = _EncodeBackslashEscapes($t);
+ $text .= $t;
+ }
+ }
+ return $text;
+}
+
+
+sub _DoAnchors {
+#
+# Turn Markdown link shortcuts into XHTML <a> tags.
+#
+ my $text = shift;
+
+ #
+ # First, handle reference-style links: [link text] [id]
+ #
+ $text =~ s{
+ ( # wrap whole match in $1
+ \[
+ ($g_nested_brackets) # link text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $link_text = $2;
+ my $link_id = lc $3;
+
+ if ($link_id eq "") {
+ $link_id = lc $link_text; # for shortcut links like [this][].
+ }
+
+ if (defined $g_urls{$link_id}) {
+ my $url = $g_urls{$link_id};
+ $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
+ $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
+ $result = "<a href=\"$url\"";
+ if ( defined $g_titles{$link_id} ) {
+ my $title = $g_titles{$link_id};
+ $title =~ s! \* !$g_escape_table{'*'}!gx;
+ $title =~ s! _ !$g_escape_table{'_'}!gx;
+ $result .= " title=\"$title\"";
+ }
+ $result .= ">$link_text</a>";
+ }
+ else {
+ $result = $whole_match;
+ }
+ $result;
+ }xsge;
+
+ #
+ # Next, inline-style links: [link text](url "optional title")
+ #
+ $text =~ s{
+ ( # wrap whole match in $1
+ \[
+ ($g_nested_brackets) # link text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ <?(.*?)>? # href = $3
+ [ \t]*
+ ( # $4
+ (['"]) # quote char = $5
+ (.*?) # Title = $6
+ \5 # matching quote
+ )? # title is optional
+ \)
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $link_text = $2;
+ my $url = $3;
+ my $title = $6;
+
+ $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
+ $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
+ $result = "<a href=\"$url\"";
+
+ if (defined $title) {
+ $title =~ s/"/"/g;
+ $title =~ s! \* !$g_escape_table{'*'}!gx;
+ $title =~ s! _ !$g_escape_table{'_'}!gx;
+ $result .= " title=\"$title\"";
+ }
+
+ $result .= ">$link_text</a>";
+
+ $result;
+ }xsge;
+
+ return $text;
+}
+
+
+sub _DoImages {
+#
+# Turn Markdown image shortcuts into <img> tags.
+#
+ my $text = shift;
+
+ #
+ # First, handle reference-style labeled images: ![alt text][id]
+ #
+ $text =~ s{
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $alt_text = $2;
+ my $link_id = lc $3;
+
+ if ($link_id eq "") {
+ $link_id = lc $alt_text; # for shortcut links like ![this][].
+ }
+
+ $alt_text =~ s/"/"/g;
+ if (defined $g_urls{$link_id}) {
+ my $url = $g_urls{$link_id};
+ $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
+ $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
+ $result = "<img src=\"$url\" alt=\"$alt_text\"";
+ if (defined $g_titles{$link_id}) {
+ my $title = $g_titles{$link_id};
+ $title =~ s! \* !$g_escape_table{'*'}!gx;
+ $title =~ s! _ !$g_escape_table{'_'}!gx;
+ $result .= " title=\"$title\"";
+ }
+ $result .= $g_empty_element_suffix;
+ }
+ else {
+ # If there's no such link ID, leave intact:
+ $result = $whole_match;
+ }
+
+ $result;
+ }xsge;
+
+ #
+ # Next, handle inline images: ![alt text](url "optional title")
+ # Don't forget: encode * and _
+
+ $text =~ s{
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ <?(\S+?)>? # src url = $3
+ [ \t]*
+ ( # $4
+ (['"]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ \t]*
+ )? # title is optional
+ \)
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $alt_text = $2;
+ my $url = $3;
+ my $title = '';
+ if (defined($6)) {
+ $title = $6;
+ }
+
+ $alt_text =~ s/"/"/g;
+ $title =~ s/"/"/g;
+ $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
+ $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
+ $result = "<img src=\"$url\" alt=\"$alt_text\"";
+ if (defined $title) {
+ $title =~ s! \* !$g_escape_table{'*'}!gx;
+ $title =~ s! _ !$g_escape_table{'_'}!gx;
+ $result .= " title=\"$title\"";
+ }
+ $result .= $g_empty_element_suffix;
+
+ $result;
+ }xsge;
+
+ return $text;
+}
+
+
+sub _DoHeaders {
+ my $text = shift;
+
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ #
+ $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
+ "<h1>" . _RunSpanGamut($1) . "</h1>\n\n";
+ }egmx;
+
+ $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
+ "<h2>" . _RunSpanGamut($1) . "</h2>\n\n";
+ }egmx;
+
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ #
+ $text =~ s{
+ ^(\#{1,6}) # $1 = string of #'s
+ [ \t]*
+ (.+?) # $2 = Header text
+ [ \t]*
+ \#* # optional closing #'s (not counted)
+ \n+
+ }{
+ my $h_level = length($1);
+ "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n";
+ }egmx;
+
+ return $text;
+}
+
+
+sub _DoLists {
+#
+# Form HTML ordered (numbered) and unordered (bulleted) lists.
+#
+ my $text = shift;
+ my $less_than_tab = $g_tab_width - 1;
+
+ # Re-usable patterns to match list item bullets and number markers:
+ my $marker_ul = qr/[*+-]/;
+ my $marker_ol = qr/\d+[.]/;
+ my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
+
+ # Re-usable pattern to match any entirel ul or ol list:
+ my $whole_list = qr{
+ ( # $1 = whole list
+ ( # $2
+ [ ]{0,$less_than_tab}
+ (${marker_any}) # $3 = first list item marker
+ [ \t]+
+ )
+ (?s:.+?)
+ ( # $4
+ \z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ ${marker_any}[ \t]+
+ )
+ )
+ )
+ }mx;
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _ProcessListItems().
+ #
+ # Note: There's a bit of duplication here. My original implementation
+ # created a scalar regex pattern as the conditional result of the test on
+ # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+ # substitution once, using the scalar as the pattern. This worked,
+ # everywhere except when running under MT on my hosting account at Pair
+ # Networks. There, this caused all rebuilds to be killed by the reaper (or
+ # perhaps they crashed, but that seems incredibly unlikely given that the
+ # same script on the same server ran fine *except* under MT. I've spent
+ # more time trying to figure out why this is happening than I'd like to
+ # admit. My only guess, backed up by the fact that this workaround works,
+ # is that Perl optimizes the substition when it can figure out that the
+ # pattern will never change, and when this optimization isn't on, we run
+ # afoul of the reaper. Thus, the slightly redundant code to that uses two
+ # static s/// patterns rather than one conditional pattern.
+
+ if ($g_list_level) {
+ $text =~ s{
+ ^
+ $whole_list
+ }{
+ my $list = $1;
+ my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+ # Turn double returns into triple returns, so that we can make a
+ # paragraph for the last item in a list, if necessary:
+ $list =~ s/\n{2,}/\n\n\n/g;
+ my $result = _ProcessListItems($list, $marker_any);
+ $result = "<$list_type>\n" . $result . "</$list_type>\n";
+ $result;
+ }egmx;
+ }
+ else {
+ $text =~ s{
+ (?:(?<=\n\n)|\A\n?)
+ $whole_list
+ }{
+ my $list = $1;
+ my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+ # Turn double returns into triple returns, so that we can make a
+ # paragraph for the last item in a list, if necessary:
+ $list =~ s/\n{2,}/\n\n\n/g;
+ my $result = _ProcessListItems($list, $marker_any);
+ $result = "<$list_type>\n" . $result . "</$list_type>\n";
+ $result;
+ }egmx;
+ }
+
+
+ return $text;
+}
+
+
+sub _ProcessListItems {
+#
+# Process the contents of a single ordered or unordered list, splitting it
+# into individual list items.
+#
+
+ my $list_str = shift;
+ my $marker_any = shift;
+
+
+ # The $g_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+
+ $g_list_level++;
+
+ # trim trailing blank lines:
+ $list_str =~ s/\n{2,}\z/\n/;
+
+
+ $list_str =~ s{
+ (\n)? # leading line = $1
+ (^[ \t]*) # leading whitespace = $2
+ ($marker_any) [ \t]+ # list marker = $3
+ ((?s:.+?) # list item text = $4
+ (\n{1,2}))
+ (?= \n* (\z | \2 ($marker_any) [ \t]+))
+ }{
+ my $item = $4;
+ my $leading_line = $1;
+ my $leading_space = $2;
+
+ if ($leading_line or ($item =~ m/\n{2,}/)) {
+ $item = _RunBlockGamut(_Outdent($item));
+ }
+ else {
+ # Recursion for sub-lists:
+ $item = _DoLists(_Outdent($item));
+ chomp $item;
+ $item = _RunSpanGamut($item);
+ }
+
+ "<li>" . $item . "</li>\n";
+ }egmx;
+
+ $g_list_level--;
+ return $list_str;
+}
+
+
+
+sub _DoCodeBlocks {
+#
+# Process Markdown `<pre><code>` blocks.
+#
+
+ my $text = shift;
+
+ $text =~ s{
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }{
+ my $codeblock = $1;
+ my $result; # return value
+
+ $codeblock = _EncodeCode(_Outdent($codeblock));
+ $codeblock = _Detab($codeblock);
+ $codeblock =~ s/\A\n+//; # trim leading newlines
+ $codeblock =~ s/\s+\z//; # trim trailing whitespace
+
+ $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
+
+ $result;
+ }egmx;
+
+ return $text;
+}
+
+
+sub _DoCodeSpans {
+#
+# * Backtick quotes are used for <code></code> spans.
+#
+# * You can use multiple backticks as the delimiters if you want to
+# include literal backticks in the code span. So, this input:
+#
+# Just type ``foo `bar` baz`` at the prompt.
+#
+# Will translate to:
+#
+# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+#
+# There's no arbitrary limit to the number of backticks you
+# can use as delimters. If you need three consecutive backticks
+# in your code, use four for delimiters, etc.
+#
+# * You can use spaces to get literal backticks at the edges:
+#
+# ... type `` `bar` `` ...
+#
+# Turns to:
+#
+# ... type <code>`bar`</code> ...
+#
+
+ my $text = shift;
+
+ $text =~ s@
+ (`+) # $1 = Opening run of `
+ (.+?) # $2 = The code block
+ (?<!`)
+ \1 # Matching closer
+ (?!`)
+ @
+ my $c = "$2";
+ $c =~ s/^[ \t]*//g; # leading whitespace
+ $c =~ s/[ \t]*$//g; # trailing whitespace
+ $c = _EncodeCode($c);
+ "<code>$c</code>";
+ @egsx;
+
+ return $text;
+}
+
+
+sub _EncodeCode {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
+ local $_ = shift;
+
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ s/&/&/g;
+
+ # Encode $'s, but only if we're running under Blosxom.
+ # (Blosxom interpolates Perl variables in article bodies.)
+ {
+ no warnings 'once';
+ if (defined($blosxom::version)) {
+ s/\$/$/g;
+ }
+ }
+
+
+ # Do the angle bracket song and dance:
+ s! < !<!gx;
+ s! > !>!gx;
+
+ # Now, escape characters that are magic in Markdown:
+ s! \* !$g_escape_table{'*'}!gx;
+ s! _ !$g_escape_table{'_'}!gx;
+ s! { !$g_escape_table{'{'}!gx;
+ s! } !$g_escape_table{'}'}!gx;
+ s! \[ !$g_escape_table{'['}!gx;
+ s! \] !$g_escape_table{']'}!gx;
+ s! \\ !$g_escape_table{'\\'}!gx;
+
+ return $_;
+}
+
+
+sub _DoItalicsAndBold {
+ my $text = shift;
+
+ # <strong> must go first:
+ $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
+ {<strong>$2</strong>}gsx;
+
+ $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
+ {<em>$2</em>}gsx;
+
+ return $text;
+}
+
+
+sub _DoBlockQuotes {
+ my $text = shift;
+
+ $text =~ s{
+ ( # Wrap whole match in $1
+ (
+ ^[ \t]*>[ \t]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ }{
+ my $bq = $1;
+ $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
+ $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
+ $bq = _RunBlockGamut($bq); # recurse
+
+ $bq =~ s/^/ /g;
+ # These leading spaces screw with <pre> content, so we need to fix that:
+ $bq =~ s{
+ (\s*<pre>.+?</pre>)
+ }{
+ my $pre = $1;
+ $pre =~ s/^ //mg;
+ $pre;
+ }egsx;
+
+ "<blockquote>\n$bq\n</blockquote>\n\n";
+ }egmx;
+
+
+ return $text;
+}
+
+
+sub _FormParagraphs {
+#
+# Params:
+# $text - string to process with html <p> tags
+#
+ my $text = shift;
+
+ # Strip leading and trailing lines:
+ $text =~ s/\A\n+//;
+ $text =~ s/\n+\z//;
+
+ my @grafs = split(/\n{2,}/, $text);
+
+ #
+ # Wrap <p> tags.
+ #
+ foreach (@grafs) {
+ unless (defined( $g_html_blocks{$_} )) {
+ $_ = _RunSpanGamut($_);
+ s/^([ \t]*)/<p>/;
+ $_ .= "</p>";
+ }
+ }
+
+ #
+ # Unhashify HTML blocks
+ #
+ foreach (@grafs) {
+ if (defined( $g_html_blocks{$_} )) {
+ $_ = $g_html_blocks{$_};
+ }
+ }
+
+ return join "\n\n", @grafs;
+}
+
+
+sub _EncodeAmpsAndAngles {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+ my $text = shift;
+
+ # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+ # http://bumppo.net/projects/amputator/
+ $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
+
+ # Encode naked <'s
+ $text =~ s{<(?![a-z/?\$!])}{<}gi;
+
+ return $text;
+}
+
+
+sub _EncodeBackslashEscapes {
+#
+# Parameter: String.
+# Returns: The string, with after processing the following backslash
+# escape sequences.
+#
+ local $_ = shift;
+
+ s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
+ s! \\` !$g_escape_table{'`'}!gx;
+ s! \\\* !$g_escape_table{'*'}!gx;
+ s! \\_ !$g_escape_table{'_'}!gx;
+ s! \\\{ !$g_escape_table{'{'}!gx;
+ s! \\\} !$g_escape_table{'}'}!gx;
+ s! \\\[ !$g_escape_table{'['}!gx;
+ s! \\\] !$g_escape_table{']'}!gx;
+ s! \\\( !$g_escape_table{'('}!gx;
+ s! \\\) !$g_escape_table{')'}!gx;
+ s! \\> !$g_escape_table{'>'}!gx;
+ s! \\\# !$g_escape_table{'#'}!gx;
+ s! \\\+ !$g_escape_table{'+'}!gx;
+ s! \\\- !$g_escape_table{'-'}!gx;
+ s! \\\. !$g_escape_table{'.'}!gx;
+ s{ \\! }{$g_escape_table{'!'}}gx;
+
+ return $_;
+}
+
+
+sub _DoAutoLinks {
+ my $text = shift;
+
+ $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
+
+ # Email addresses: <[email protected]>
+ $text =~ s{
+ <
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+ )
+ >
+ }{
+ _EncodeEmailAddress( _UnescapeSpecialChars($1) );
+ }egix;
+
+ return $text;
+}
+
+
+sub _EncodeEmailAddress {
+#
+# Input: an email address, e.g. "[email protected]"
+#
+# Output: the email address as a mailto link, with each character
+# of the address encoded as either a decimal or hex entity, in
+# the hopes of foiling most address harvesting spam bots. E.g.:
+#
+# <a href="mailto:foo@e
+# xample.com">foo
+# @example.com</a>
+#
+# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+# mailing list: <http://tinyurl.com/yu7ue>
+#
+
+ my $addr = shift;
+
+ srand;
+ my @encode = (
+ sub { '&#' . ord(shift) . ';' },
+ sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
+ sub { shift },
+ );
+
+ $addr = "mailto:" . $addr;
+
+ $addr =~ s{(.)}{
+ my $char = $1;
+ if ( $char eq '@' ) {
+ # this *must* be encoded. I insist.
+ $char = $encode[int rand 1]->($char);
+ } elsif ( $char ne ':' ) {
+ # leave ':' alone (to spot mailto: later)
+ my $r = rand;
+ # roughly 10% raw, 45% hex, 45% dec
+ $char = (
+ $r > .9 ? $encode[2]->($char) :
+ $r < .45 ? $encode[1]->($char) :
+ $encode[0]->($char)
+ );
+ }
+ $char;
+ }gex;
+
+ $addr = qq{<a href="$addr">$addr</a>};
+ $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
+
+ return $addr;
+}
+
+
+sub _UnescapeSpecialChars {
+#
+# Swap back in all the special characters we've hidden.
+#
+ my $text = shift;
+
+ while( my($char, $hash) = each(%g_escape_table) ) {
+ $text =~ s/$hash/$char/g;
+ }
+ return $text;
+}
+
+
+sub _TokenizeHTML {
+#
+# Parameter: String containing HTML markup.
+# Returns: Reference to an array of the tokens comprising the input
+# string. Each token is either a tag (possibly with nested,
+# tags contained therein, such as <a href="<MTFoo>">, or a
+# run of text between tags. Each element of the array is a
+# two-element array; the first is either 'tag' or 'text';
+# the second is the actual value.
+#
+#
+# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+# <http://www.bradchoate.com/past/mtregex.php>
+#
+
+ my $str = shift;
+ my $pos = 0;
+ my $len = length $str;
+ my @tokens;
+
+ my $depth = 6;
+ my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
+ my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
+ (?s: <\? .*? \?> ) | # processing instruction
+ $nested_tags/ix; # nested tags
+
+ while ($str =~ m/($match)/g) {
+ my $whole_tag = $1;
+ my $sec_start = pos $str;
+ my $tag_start = $sec_start - length $whole_tag;
+ if ($pos < $tag_start) {
+ push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
+ }
+ push @tokens, ['tag', $whole_tag];
+ $pos = pos $str;
+ }
+ push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
+ \@tokens;
+}
+
+
+sub _Outdent {
+#
+# Remove one level of line-leading tabs or spaces
+#
+ my $text = shift;
+
+ $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
+ return $text;
+}
+
+
+sub _Detab {
+#
+# Cribbed from a post by Bart Lateur:
+# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+#
+ my $text = shift;
+
+ $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
+ return $text;
+}
+
+
+1;
+
+__END__
+
+
+=pod
+
+=head1 NAME
+
+B<Markdown>
+
+
+=head1 SYNOPSIS
+
+B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
+ [ I<file> ... ]
+
+
+=head1 DESCRIPTION
+
+Markdown is a text-to-HTML filter; it translates an easy-to-read /
+easy-to-write structured text format into HTML. Markdown's text format
+is most similar to that of plain text email, and supports features such
+as headers, *emphasis*, code blocks, blockquotes, and links.
+
+Markdown's syntax is designed not as a generic markup language, but
+specifically to serve as a front-end to (X)HTML. You can use span-level
+HTML tags anywhere in a Markdown document, and you can use block level
+HTML tags (like <div> and <table> as well).
+
+For more information about Markdown's syntax, see:
+
+ http://daringfireball.net/projects/markdown/
+
+
+=head1 OPTIONS
+
+Use "--" to end switch parsing. For example, to open a file named "-z", use:
+
+ Markdown.pl -- -z
+
+=over 4
+
+
+=item B<--html4tags>
+
+Use HTML 4 style for empty element tags, e.g.:
+
+ <br>
+
+instead of Markdown's default XHTML style tags, e.g.:
+
+ <br />
+
+
+=item B<-v>, B<--version>
+
+Display Markdown's version number and copyright information.
+
+
+=item B<-s>, B<--shortversion>
+
+Display the short-form version number.
+
+
+=back
+
+
+
+=head1 BUGS
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to:
+
+ [email protected]
+
+Please include with your report: (1) the example input; (2) the output
+you expected; (3) the output Markdown actually produced.
+
+
+=head1 VERSION HISTORY
+
+See the readme file for detailed release notes for this version.
+
+1.0.1 - 14 Dec 2004
+
+1.0 - 28 Aug 2004
+
+
+=head1 AUTHOR
+
+ John Gruber
+ http://daringfireball.net
+
+ PHP port and other contributions by Michel Fortin
+ http://michelf.com
+
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2003-2004 John Gruber
+<http://daringfireball.net/>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+
+=cut
--- /dev/null
+++ b/bin/contrib/urldecode.awk
@@ -1,0 +1,39 @@
+#!/usr/bin/awk -f
+BEGIN {
+ hextab ["0"] = 0; hextab ["8"] = 8;
+ hextab ["1"] = 1; hextab ["9"] = 9;
+ hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10
+ hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11;
+ hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12;
+ hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13;
+ hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14;
+ hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15;
+}
+{
+ decoded = ""
+ i = 1
+ len = length ($0)
+ while ( i <= len ) {
+ c = substr ($0, i, 1)
+ if ( c == "%" ) {
+ if ( i+2 <= len ) {
+ c1 = substr ($0, i+1, 1)
+ c2 = substr ($0, i+2, 1)
+ if ( hextab [c1] == "" || hextab [c2] == "" ) {
+ print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2"
+ } else {
+ code = 0 + hextab [c1] * 16 + hextab [c2] + 0
+ c = sprintf ("%c", code)
+ i = i + 2
+ }
+ } else {
+ print "WARNING: invalid % encoding: " substr ($0, i, len - i)
+ }
+ } else if ( c == "+" ) {
+ c = " "
+ }
+ decoded = decoded c
+ ++i
+ }
+ print decoded
+}
--- a/bin/controller.rc
+++ /dev/null
@@ -1,324 +1,0 @@
-#!/usr/local/plan9/bin/rc
-. ./cgilib.rc
-cd ..
-
-forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.]'
-response_format=html
-
-fn get_lib_file {
- wantedfile = $1
- if (test -f $sitedir/_werc/lib/$wantedfile)
- echo -n $sitedir/_werc/lib/$wantedfile
- if not if(! ~ $#masterSite 0 && test -f sites/$masterSite/_werc/lib/$wantedfile)
- echo -n sites/$masterSite/_werc/lib/$wantedfile
- if not if (test -f lib/$wantedfile)
- echo -n lib/$wantedfile
-}
-
-# Title
-fn gentitle {
- echo '<h1 class="headerTitle"><a href="/">' ^ $"siteTitle ^ ' <span id="headerSubTitle">' ^ $"siteSubTitle ^ '</span></a></h1>'
-}
-
-# Don't change var name or trailing ';', some dirs change the filter!
-dirfilter = '/\/[._]/d; /'^$forbidden_uri_chars^'/d; /\/robots.\txt$/d; /\/sitemap\.txt$/d; s,^\./,,; s,\.md$,,; s,\.html,,; s,\.txt,,; '
-
-# To be used from config files
-fn hide_paths {
- for(i in $*)
- dirfilter = $dirfilter^'/^'$i'$/d; '
-}
-
-# Sidebar
-fn menu {
- ls -F $1 | sed $dirfilter | awk -F/ '
- BEGIN { print "<ul class=\"side-bar\">" }
- END { print "</ul>" }
- # Should add \. to the list of allowed chars in file names, but need to find a way to filter out .txt and so on
- /^([a-zA-Z0-9+_\-]+[\/*]?)+$/ && $NF != "index" {
- isdir = match($0, "/$")
- sub("[*/]$", "")
-
- path = bname = $0
- sub("^(.*/)?([0-9]+_)?", "", bname)
- gsub("_", " ", bname)
-
- if(isdir) {
- bname = bname "/"
- path = $0 "/"
- }
-
- if(index(ENVIRON["REQUEST_URI"] "/", "/" path) == 1) {
- if(isdir) {
- print "<li><a href=\"/" path "\" class=\"thisPage\">»<i> " bname "</i></a>"
- system("rc -c ''menu " path "''")
- } else {
- print "<li><a href=\"/" path "\" class=\"thisPage\">»<i> " bname "</i></a>"
- }
- } else
- print "<li><a href=\"/" path "\">› " bname "</a>"
-
- print "</li>"
- }'
-}
-
-fn gensidebar {
- @{
- cd $sitedir
- menu .
- }
-}
-
-fn sortedBlogPostList {
- # the /./ is added so we can sort -t. and order only the file name
- if (! ~ $#* 0)
- ls $*^'/./' | grep '/[0-9]+.*\.md$'| sort -r -t. +1
-}
-
-fn gen_blog_post_title {
- title=`{basename $1 | sed 's/^[0-9\-]*_(.*)\.md$/\1/; s/_/ /g' }
- permlink= `{echo $1 | sed 's,^/[a-z/]*www/,/,; s,^sites/[^/]*/*/,/,; s/\.md$//' }
- du=`{ls -l $1}
- by = ''
- if (! ~ $#blogAuthor 0)
- by='By '$"blogAuthor
- if not if (~ $#blogDirs 1)
- by='By '$du(4)
- echo '##<a href="'^$"permlink^'">' $"title^'</a> *( '$by Last mod: $du(7 8 9) ' )*'
-}
-
-
-
-# Handlers
-fn set_handler {
- handler = $1
- shift
- handler_args = $*
-}
-
-fn md_handler { cat $* | $formatter }
-
-fn tpl_handler { template $1 }
-
-fn html_handler {
- cat $1 | /bin/sed '0,/<[Bb][Oo][Dd][Yy][^>]*>/d; /<\/[Bb][Oo][Dd][Yy]>/,$d'
-}
-
-fn txt_handler {
- echo '<pre>'
- # XXX Insering a blank line between lines in input is good for fortunes.txt, but maybe not for other .txt files
- # XXX Words are not broken, even if they are way beyond 82 chars long
- cat $1 |sed 's/$/\n/g; s/</\</g; s/>/\>/g' |fmt -l 82 -j
- echo '</pre>'
-}
-
-fn dir_listing_handler {
- d = `{basename -d $1}
- echo '<h1 style="text-transform: capitalize;">' `{echo $d|sed -e 's,.*//,,g' -e 's,/$,,' -e 's,/, / ,g' } '</h1>'
- echo '<ul style="text-transform: capitalize;">'
- ls -F $d | sed -e $dirfilter' s,^'$sitedir'/.*/([^$].*),<li><a href="\1">\1</a></li>,' # XXX I'm not sure what the [^$] does there
- echo '</ul>'
-}
-
-fn 404_handler { template `{ get_lib_file 404.tpl } }
-
-fn blog_dir_handler {
- blogDirs = $*
- tpl_handler lib/feeds/html.tpl
-}
-
-fn blog_post_handler {
- gen_blog_post_title $1 | $formatter
- $formatter < $1
-}
-
-
-fn select_handler {
-
- if (test -f $body.md) {
- if (! ~ $#inBlog 0)
- set_handler blog_post_handler $body.md
- if not
- set_handler md_handler $body.md
- if (! ~ $#allowComments 0 && ~ $REQUEST_METHOD POST) {
- get_post_args comment_text
- d = `{date -n} # FIXME Obvious race
- d = $body.md_werc/comments/$d/
-
- u = $logged_user
- if (~ $#logged_user 0) {
- get_post_args comment_user_name comment_user_password
- # TODO Should take this path too if the user can login but doesn't
- # belong to required group
- if (! login_user $comment_user_name $comment_user_password) {
- u = $comment_user_name ':' $comment_user_password
- d = $d^'_pending'
- }
- if not
- u = $logged_user
- }
-
- umask 002
- mkdir -m 775 -p $d
- echo $u > $d/user
- echo $comment_text > $d/body
- }
- }
- if not if (test -f $body.tpl)
- set_handler tpl_handler $body.tpl
-
- if not if (test -f $body.html)
- set_handler html_handler $body.html
-
- # Explicit .html urls, unfortunately usually the web server will handle this as static files
- if not if (~ $body *.html && test -f $body)
- perm_redirect `{ echo $REQUEST_URI|sed 's/.html$//' }
-
- # Rss feeds. TODO: we should check that the request is for a real blog dir
- if not if (~ $REQUEST_URI */index.rss) {
- uri = `{echo $uri | sed 's/index.rss$//'}
- if (~ $#blogDirs 0)
- blogDirs = $sitedir^$uri
- uri=$baseuri$"uri
- master_template=feeds/rss20.tpl
- response_format=raw
- }
-
- # Blog handler
- if not if (~ $body */[bB]log/index */[bB]log//index || ! ~ $#blogDirs 0) {
- if (~ $#blogDirs 0)
- blogDirs = `{basename -d $body}
-
- rssuri=`{basename -d $uri}
- rssuri=$baseuri`{cleanname $"rssuri^/index.rss}
- extraHeaders=$"extraHeaders ^ \
- '<link rel="alternate" type="application/rss+xml" title="RSS" href="'$rssuri'" />
-'
- set_handler tpl_handler lib/feeds/html.tpl
- }
-
- # Global tpl (eg sitemap.tpl)
- if not if (test -f lib/^$uri^.tpl)
- set_handler tpl_handler lib/^$uri^.tpl
-
- if not if (test -f $body.txt)
- set_handler txt_handler $body.txt
-
- # Dir listing
- if not if(~ $body */index)
- set_handler dir_listing_handler $body
-
- # File not found
- if not {
- set_handler 404_handler $body
- dprint 'NOT FOUND: '$SERVER_NAME^$REQUEST_URI^' - '^$"HTTP_REFERER^' - '^$"HTTP_USER_AGENT
- echo 'Status: 404 Not Found'
- }
-
-}
-
-
-fn genbody {
- $handler $handler_args
-}
-
-
-# Careful, the proper p9p path might not be set until initrc.local is sourced
-path=(. $PLAN9/bin ./bin/ /bin/ /usr/bin)
-
-site=$SERVER_NAME
-headers=lib/headers.tpl
-master_template=default_master.tpl
-sidebar=sidebar
-baseuri=http://$site/
-for(i in siteTitle siteSubTitle pageTitle extraHeaders)
- $i = ''
-
-. etc/initrc
-
-if(test -f etc/initrc.local)
- . etc/initrc.local
-
-
-# Parse request URL
-uri = `{echo -n $REQUEST_URI | sed -e 's/\?.*//; s/'^$forbidden_uri_chars^'//g; s/\.\.*/./g;' -e '1q'}
-ifs='/' { args = `{echo -n $uri} }
-
-
-if(! ~ $#debug 0)
- dprint ' ' $SERVER_NAME^$REQUEST_URI^' - '^$"HTTP_USER_AGENT
-
-# Hack: preload post data so we can access it from templates where cgi's stdin is not accesible
-if(~ $REQUEST_METHOD POST) {
- get_post_args
- login_user
-}
-
-if (! ~ $args '') {
- if (~ $args($#args) 'index')
- perm_redirect `{ echo $REQUEST_URI | sed 's,/index$,/,' }
-
- pageTitle=`{echo $args | sed -e 's/ / - /g' -e 's/_/ /g'}
- req_path=$uri
-}
-if not {
- req_path='/'
-}
-
-fpath=$sitedir
-for (i in ('' $args)) {
- fpath = $fpath/$i
- # We don't want blog settings to cascade into posts, note that we are inBlog instead
- if (! ~ $#blogDirs 0 && ! ~ $req_path */index.rss */[bB]log */[bB]log/) {
- inBlog = $blogDirs
- blogDirs = ()
- }
-
- if (test -f $fpath/_config)
- . $fpath/_config # DEPRECATED
- if (test -f $fpath/_werc/config)
- . $fpath/_werc/config
-
- if (~ $#blogDirs 0 && ~ $#inBlog 0 && ~ $i [Bb]log)
- inBlog = 'yes'
-}
-
-# Redirections and other preprocessing
-if (~ $#redirectPermanent 1)
- perm_redirect $"redirectPermanent
-
-if not if (~ $#redirectPermanent 2 && {echo $SERVER_NAME^$REQUEST_URI|grep -s $redirectPermanent(1) }) {
- # Experimental regexp sub-based redirect, probably should find a nicer interface
- # For now only used at sites/harmful.cat-v.org/software/OO_programming/_config
- to=`{echo $SERVER_NAME^$REQUEST_URI|sed 's@'^$redirectPermanent(1)^'@'^$redirectPermanent(2)^'@'}
- if(! ~ $to $REQUEST_URI)
- perm_redirect $to
-}
-
-# Set Page title
-if(~ $pageTitle '')
- pageTitle=$siteTitle^' '^$siteSubTitle
-if not
- pageTitle=$"pageTitle^' | '^$"siteTitle^' '^$"siteSubTitle
-
-
-body=$sitedir/$req_path
-if (test -d $body) {
- if(! ~ $body */)
- perm_redirect $REQUEST_URI^'/'
- body=$body/index
-}
-
-select_handler
-
-# Template/body selection
-master_template= `{get_lib_file $master_template}
-
-if(~ $response_format html) {
- # Is awk_buffer really useful?
- template $headers $master_template | awk_buffer
- echo '</body></html>'
-}
-if not if (~ $response_format raw) {
- template < $master_template
-}
--- a/bin/gensitemaptxt.rc
+++ /dev/null
@@ -1,13 +1,0 @@
-#!/usr/bin/env rc
-
-for ( d in sites/*/ ) {
-echo $d
-9 du -a $d | awk '/\.(md|html)$/ { print $2 }; {}' | 9 sed -e 's/\.(md|html)$//' -e 's,/index$,/,' -e 's,^sites/,http://,' > $d/sitemap.txt
-
-if(! test -f $d/robots.txt) {
- echo generating missing robots.txt for $d
- echo $d|sed 's,sites/,Sitemap: http://,; s/$/sitemap.txt/;' > $d/robots.txt
- cat $d/robots.txt
-}
-
-}
--- a/bin/hgweb.config
+++ /dev/null
@@ -1,12 +1,0 @@
-[web]
-style = gitweb
-allow_archive = bz2
-
-#[paths]
-#w9 = /gsoc/hg/w9/
-
-[collections]
-#allow_archive = bz2 zip
-/gsoc/hg = /gsoc/hg/
-#/var/hg = /var/hg/
-
--- a/bin/hgwebdir.cgi
+++ /dev/null
@@ -1,47 +1,0 @@
-#!/usr/bin/env python
-#
-# An example CGI script to export multiple hgweb repos, edit as necessary
-
-# send python tracebacks to the browser if an error occurs:
-import cgitb
-cgitb.enable()
-
-# adjust python path if not a system-wide install:
-#import sys
-#sys.path.insert(0, "/path/to/python/lib")
-
-# If you'd like to serve pages with UTF-8 instead of your default
-# locale charset, you can do so by uncommenting the following lines.
-# Note that this will cause your .hgrc files to be interpreted in
-# UTF-8 and all your repo files to be displayed using UTF-8.
-#
-#import os
-#os.environ["HGENCODING"] = "UTF-8"
-
-from mercurial.hgweb.hgwebdir_mod import hgwebdir
-from mercurial.hgweb.request import wsgiapplication
-import mercurial.hgweb.wsgicgi as wsgicgi
-
-# The config file looks like this. You can have paths to individual
-# repos, collections of repos in a directory tree, or both.
-#
-# [paths]
-# virtual/path = /real/path
-# virtual/path = /real/path
-#
-# [collections]
-# /prefix/to/strip/off = /root/of/tree/full/of/repos
-#
-# collections example: say directory tree /foo contains repos /foo/bar,
-# /foo/quux/baz. Give this config section:
-# [collections]
-# /foo = /foo
-# Then repos will list as bar and quux/baz.
-#
-# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples
-# or use a dictionary with entries like 'virtual/path': '/real/path'
-
-def make_web_app():
- return hgwebdir("hgweb.config")
-
-wsgicgi.launch(wsgiapplication(make_web_app))
--- a/bin/markdown.pl
+++ /dev/null
@@ -1,1447 +1,0 @@
-#!/usr/bin/env perl
-#
-# Markdown -- A text-to-HTML conversion tool for web writers
-#
-# Copyright (c) 2004 John Gruber
-# <http://daringfireball.net/projects/markdown/>
-#
-package Markdown;
-require 5.006_000;
-use strict;
-use warnings;
-
-use Digest::MD5 qw(md5_hex);
-use vars qw($VERSION);
-$VERSION = '1.0.1';
-# Tue 14 Dec 2004
-
-## Disabled; causes problems under Perl 5.6.1:
-# use utf8;
-# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
-
-
-#
-# Global default settings:
-#
-my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
-my $g_tab_width = 4;
-
-
-#
-# Globals:
-#
-
-# Regex to match balanced [brackets]. See Friedl's
-# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
-my $g_nested_brackets;
-$g_nested_brackets = qr{
- (?> # Atomic matching
- [^\[\]]+ # Anything other than brackets
- |
- \[
- (??{ $g_nested_brackets }) # Recursive set of nested brackets
- \]
- )*
-}x;
-
-
-# Table of hash values for escaped characters:
-my %g_escape_table;
-foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
- $g_escape_table{$char} = md5_hex($char);
-}
-
-
-# Global hashes, used by various utility routines
-my %g_urls;
-my %g_titles;
-my %g_html_blocks;
-
-# Used to track when we're inside an ordered or unordered list
-# (see _ProcessListItems() for details):
-my $g_list_level = 0;
-
-
-#### Blosxom plug-in interface ##########################################
-
-# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
-# which posts Markdown should process, using a "meta-markup: markdown"
-# header. If it's set to 0 (the default), Markdown will process all
-# entries.
-my $g_blosxom_use_meta = 0;
-
-sub start { 1; }
-sub story {
- my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
-
- if ( (! $g_blosxom_use_meta) or
- (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
- ){
- $$body_ref = Markdown($$body_ref);
- }
- 1;
-}
-
-
-#### Movable Type plug-in interface #####################################
-eval {require MT}; # Test to see if we're running in MT.
-unless ($@) {
- require MT;
- import MT;
- require MT::Template::Context;
- import MT::Template::Context;
-
- eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
- unless ($@) {
- require MT::Plugin;
- import MT::Plugin;
- my $plugin = new MT::Plugin({
- name => "Markdown",
- description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
- doc_link => 'http://daringfireball.net/projects/markdown/'
- });
- MT->add_plugin( $plugin );
- }
-
- MT::Template::Context->add_container_tag(MarkdownOptions => sub {
- my $ctx = shift;
- my $args = shift;
- my $builder = $ctx->stash('builder');
- my $tokens = $ctx->stash('tokens');
-
- if (defined ($args->{'output'}) ) {
- $ctx->stash('markdown_output', lc $args->{'output'});
- }
-
- defined (my $str = $builder->build($ctx, $tokens) )
- or return $ctx->error($builder->errstr);
- $str; # return value
- });
-
- MT->add_text_filter('markdown' => {
- label => 'Markdown',
- docs => 'http://daringfireball.net/projects/markdown/',
- on_format => sub {
- my $text = shift;
- my $ctx = shift;
- my $raw = 0;
- if (defined $ctx) {
- my $output = $ctx->stash('markdown_output');
- if (defined $output && $output =~ m/^html/i) {
- $g_empty_element_suffix = ">";
- $ctx->stash('markdown_output', '');
- }
- elsif (defined $output && $output eq 'raw') {
- $raw = 1;
- $ctx->stash('markdown_output', '');
- }
- else {
- $raw = 0;
- $g_empty_element_suffix = " />";
- }
- }
- $text = $raw ? $text : Markdown($text);
- $text;
- },
- });
-
- # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
- my $smartypants;
-
- {
- no warnings "once";
- $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
- }
-
- if ($smartypants) {
- MT->add_text_filter('markdown_with_smartypants' => {
- label => 'Markdown With SmartyPants',
- docs => 'http://daringfireball.net/projects/markdown/',
- on_format => sub {
- my $text = shift;
- my $ctx = shift;
- if (defined $ctx) {
- my $output = $ctx->stash('markdown_output');
- if (defined $output && $output eq 'html') {
- $g_empty_element_suffix = ">";
- }
- else {
- $g_empty_element_suffix = " />";
- }
- }
- $text = Markdown($text);
- $text = $smartypants->($text, '1');
- },
- });
- }
-}
-else {
-#### BBEdit/command-line text filter interface ##########################
-# Needs to be hidden from MT (and Blosxom when running in static mode).
-
- # We're only using $blosxom::version once; tell Perl not to warn us:
- no warnings 'once';
- unless ( defined($blosxom::version) ) {
- use warnings;
-
- #### Check for command-line switches: #################
- my %cli_opts;
- use Getopt::Long;
- Getopt::Long::Configure('pass_through');
- GetOptions(\%cli_opts,
- 'version',
- 'shortversion',
- 'html4tags',
- );
- if ($cli_opts{'version'}) { # Version info
- print "\nThis is Markdown, version $VERSION.\n";
- print "Copyright 2004 John Gruber\n";
- print "http://daringfireball.net/projects/markdown/\n\n";
- exit 0;
- }
- if ($cli_opts{'shortversion'}) { # Just the version number string.
- print $VERSION;
- exit 0;
- }
- if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
- $g_empty_element_suffix = ">";
- }
-
-
- #### Process incoming text: ###########################
- my $text;
- {
- local $/; # Slurp the whole file
- $text = <>;
- }
- print Markdown($text);
- }
-}
-
-
-
-sub Markdown {
-#
-# Main function. The order in which other subs are called here is
-# essential. Link and image substitutions need to happen before
-# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
-# and <img> tags get encoded.
-#
- my $text = shift;
-
- # Clear the global hashes. If we don't clear these, you get conflicts
- # from other articles when generating a page which contains more than
- # one article (e.g. an index page that shows the N most recent
- # articles):
- %g_urls = ();
- %g_titles = ();
- %g_html_blocks = ();
-
-
- # Standardize line endings:
- $text =~ s{\r\n}{\n}g; # DOS to Unix
- $text =~ s{\r}{\n}g; # Mac to Unix
-
- # Make sure $text ends with a couple of newlines:
- $text .= "\n\n";
-
- # Convert all tabs to spaces.
- $text = _Detab($text);
-
- # Strip any lines consisting only of spaces and tabs.
- # This makes subsequent regexen easier to write, because we can
- # match consecutive blank lines with /\n+/ instead of something
- # contorted like /[ \t]*\n+/ .
- $text =~ s/^[ \t]+$//mg;
-
- # Turn block-level HTML blocks into hash entries
- $text = _HashHTMLBlocks($text);
-
- # Strip link definitions, store in hashes.
- $text = _StripLinkDefinitions($text);
-
- $text = _RunBlockGamut($text);
-
- $text = _UnescapeSpecialChars($text);
-
- return $text . "\n";
-}
-
-
-sub _StripLinkDefinitions {
-#
-# Strips link definitions from text, stores the URLs and titles in
-# hash references.
-#
- my $text = shift;
- my $less_than_tab = $g_tab_width - 1;
-
- # Link defs are in the form: ^[id]: url "optional title"
- while ($text =~ s{
- ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
- [ \t]*
- \n? # maybe *one* newline
- [ \t]*
- <?(\S+?)>? # url = $2
- [ \t]*
- \n? # maybe one newline
- [ \t]*
- (?:
- (?<=\s) # lookbehind for whitespace
- ["(]
- (.+?) # title = $3
- [")]
- [ \t]*
- )? # title is optional
- (?:\n+|\Z)
- }
- {}mx) {
- $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
- if ($3) {
- $g_titles{lc $1} = $3;
- $g_titles{lc $1} =~ s/"/"/g;
- }
- }
-
- return $text;
-}
-
-
-sub _HashHTMLBlocks {
- my $text = shift;
- my $less_than_tab = $g_tab_width - 1;
-
- # Hashify HTML blocks:
- # We only want to do this for block-level HTML tags, such as headers,
- # lists, and tables. That's because we still want to wrap <p>s around
- # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
- # phrase emphasis, and spans. The list of tags we're looking for is
- # hard-coded:
- my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
- my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
-
- # First, look for nested blocks, e.g.:
- # <div>
- # <div>
- # tags for inner block must be indented.
- # </div>
- # </div>
- #
- # The outermost tags must start at the left margin for this to match, and
- # the inner nested divs must be indented.
- # We need to do this before the next, more liberal match, because the next
- # match will start at the first `<div>` and stop at the first `</div>`.
- $text =~ s{
- ( # save in $1
- ^ # start of line (with /m)
- <($block_tags_a) # start tag = $2
- \b # word break
- (.*\n)*? # any number of lines, minimally matching
- </\2> # the matching end tag
- [ \t]* # trailing spaces/tabs
- (?=\n+|\Z) # followed by a newline or end of document
- )
- }{
- my $key = md5_hex($1);
- $g_html_blocks{$key} = $1;
- "\n\n" . $key . "\n\n";
- }egmx;
-
-
- #
- # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
- #
- $text =~ s{
- ( # save in $1
- ^ # start of line (with /m)
- <($block_tags_b) # start tag = $2
- \b # word break
- (.*\n)*? # any number of lines, minimally matching
- .*</\2> # the matching end tag
- [ \t]* # trailing spaces/tabs
- (?=\n+|\Z) # followed by a newline or end of document
- )
- }{
- my $key = md5_hex($1);
- $g_html_blocks{$key} = $1;
- "\n\n" . $key . "\n\n";
- }egmx;
- # Special case just for <hr />. It was easier to make a special case than
- # to make the other regex more complicated.
- $text =~ s{
- (?:
- (?<=\n\n) # Starting after a blank line
- | # or
- \A\n? # the beginning of the doc
- )
- ( # save in $1
- [ ]{0,$less_than_tab}
- <(hr) # start tag = $2
- \b # word break
- ([^<>])*? #
- /?> # the matching end tag
- [ \t]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
- )
- }{
- my $key = md5_hex($1);
- $g_html_blocks{$key} = $1;
- "\n\n" . $key . "\n\n";
- }egx;
-
- # Special case for standalone HTML comments:
- $text =~ s{
- (?:
- (?<=\n\n) # Starting after a blank line
- | # or
- \A\n? # the beginning of the doc
- )
- ( # save in $1
- [ ]{0,$less_than_tab}
- (?s:
- <!
- (--.*?--\s*)+
- >
- )
- [ \t]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
- )
- }{
- my $key = md5_hex($1);
- $g_html_blocks{$key} = $1;
- "\n\n" . $key . "\n\n";
- }egx;
-
-
- return $text;
-}
-
-
-sub _RunBlockGamut {
-#
-# These are all the transformations that form block-level
-# tags like paragraphs, headers, and list items.
-#
- my $text = shift;
-
- $text = _DoHeaders($text);
-
- # Do Horizontal Rules:
- $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
- $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
- $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
-
- $text = _DoLists($text);
-
- $text = _DoCodeBlocks($text);
-
- $text = _DoBlockQuotes($text);
-
- # We already ran _HashHTMLBlocks() before, in Markdown(), but that
- # was to escape raw HTML in the original Markdown source. This time,
- # we're escaping the markup we've just created, so that we don't wrap
- # <p> tags around block-level tags.
- $text = _HashHTMLBlocks($text);
-
- $text = _FormParagraphs($text);
-
- return $text;
-}
-
-
-sub _RunSpanGamut {
-#
-# These are all the transformations that occur *within* block-level
-# tags like paragraphs, headers, and list items.
-#
- my $text = shift;
-
- $text = _DoCodeSpans($text);
-
- $text = _EscapeSpecialChars($text);
-
- # Process anchor and image tags. Images must come first,
- # because ![foo][f] looks like an anchor.
- $text = _DoImages($text);
- $text = _DoAnchors($text);
-
- # Make links out of things like `<http://example.com/>`
- # Must come after _DoAnchors(), because you can use < and >
- # delimiters in inline links like [this](<url>).
- $text = _DoAutoLinks($text);
-
- $text = _EncodeAmpsAndAngles($text);
-
- $text = _DoItalicsAndBold($text);
-
- # Do hard breaks:
- $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
-
- return $text;
-}
-
-
-sub _EscapeSpecialChars {
- my $text = shift;
- my $tokens ||= _TokenizeHTML($text);
-
- $text = ''; # rebuild $text from the tokens
-# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
-# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
-
- foreach my $cur_token (@$tokens) {
- if ($cur_token->[0] eq "tag") {
- # Within tags, encode * and _ so they don't conflict
- # with their use in Markdown for italics and strong.
- # We're replacing each such character with its
- # corresponding MD5 checksum value; this is likely
- # overkill, but it should prevent us from colliding
- # with the escape values by accident.
- $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
- $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
- $text .= $cur_token->[1];
- } else {
- my $t = $cur_token->[1];
- $t = _EncodeBackslashEscapes($t);
- $text .= $t;
- }
- }
- return $text;
-}
-
-
-sub _DoAnchors {
-#
-# Turn Markdown link shortcuts into XHTML <a> tags.
-#
- my $text = shift;
-
- #
- # First, handle reference-style links: [link text] [id]
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ($g_nested_brackets) # link text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- my $link_id = lc $3;
-
- if ($link_id eq "") {
- $link_id = lc $link_text; # for shortcut links like [this][].
- }
-
- if (defined $g_urls{$link_id}) {
- my $url = $g_urls{$link_id};
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
- $result = "<a href=\"$url\"";
- if ( defined $g_titles{$link_id} ) {
- my $title = $g_titles{$link_id};
- $title =~ s! \* !$g_escape_table{'*'}!gx;
- $title =~ s! _ !$g_escape_table{'_'}!gx;
- $result .= " title=\"$title\"";
- }
- $result .= ">$link_text</a>";
- }
- else {
- $result = $whole_match;
- }
- $result;
- }xsge;
-
- #
- # Next, inline-style links: [link text](url "optional title")
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ($g_nested_brackets) # link text = $2
- \]
- \( # literal paren
- [ \t]*
- <?(.*?)>? # href = $3
- [ \t]*
- ( # $4
- (['"]) # quote char = $5
- (.*?) # Title = $6
- \5 # matching quote
- )? # title is optional
- \)
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- my $url = $3;
- my $title = $6;
-
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
- $result = "<a href=\"$url\"";
-
- if (defined $title) {
- $title =~ s/"/"/g;
- $title =~ s! \* !$g_escape_table{'*'}!gx;
- $title =~ s! _ !$g_escape_table{'_'}!gx;
- $result .= " title=\"$title\"";
- }
-
- $result .= ">$link_text</a>";
-
- $result;
- }xsge;
-
- return $text;
-}
-
-
-sub _DoImages {
-#
-# Turn Markdown image shortcuts into <img> tags.
-#
- my $text = shift;
-
- #
- # First, handle reference-style labeled images: ![alt text][id]
- #
- $text =~ s{
- ( # wrap whole match in $1
- !\[
- (.*?) # alt text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
-
- )
- }{
- my $result;
- my $whole_match = $1;
- my $alt_text = $2;
- my $link_id = lc $3;
-
- if ($link_id eq "") {
- $link_id = lc $alt_text; # for shortcut links like ![this][].
- }
-
- $alt_text =~ s/"/"/g;
- if (defined $g_urls{$link_id}) {
- my $url = $g_urls{$link_id};
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
- $result = "<img src=\"$url\" alt=\"$alt_text\"";
- if (defined $g_titles{$link_id}) {
- my $title = $g_titles{$link_id};
- $title =~ s! \* !$g_escape_table{'*'}!gx;
- $title =~ s! _ !$g_escape_table{'_'}!gx;
- $result .= " title=\"$title\"";
- }
- $result .= $g_empty_element_suffix;
- }
- else {
- # If there's no such link ID, leave intact:
- $result = $whole_match;
- }
-
- $result;
- }xsge;
-
- #
- # Next, handle inline images: ![alt text](url "optional title")
- # Don't forget: encode * and _
-
- $text =~ s{
- ( # wrap whole match in $1
- !\[
- (.*?) # alt text = $2
- \]
- \( # literal paren
- [ \t]*
- <?(\S+?)>? # src url = $3
- [ \t]*
- ( # $4
- (['"]) # quote char = $5
- (.*?) # title = $6
- \5 # matching quote
- [ \t]*
- )? # title is optional
- \)
- )
- }{
- my $result;
- my $whole_match = $1;
- my $alt_text = $2;
- my $url = $3;
- my $title = '';
- if (defined($6)) {
- $title = $6;
- }
-
- $alt_text =~ s/"/"/g;
- $title =~ s/"/"/g;
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
- $result = "<img src=\"$url\" alt=\"$alt_text\"";
- if (defined $title) {
- $title =~ s! \* !$g_escape_table{'*'}!gx;
- $title =~ s! _ !$g_escape_table{'_'}!gx;
- $result .= " title=\"$title\"";
- }
- $result .= $g_empty_element_suffix;
-
- $result;
- }xsge;
-
- return $text;
-}
-
-
-sub _DoHeaders {
- my $text = shift;
-
- # Setext-style headers:
- # Header 1
- # ========
- #
- # Header 2
- # --------
- #
- $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
- "<h1>" . _RunSpanGamut($1) . "</h1>\n\n";
- }egmx;
-
- $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
- "<h2>" . _RunSpanGamut($1) . "</h2>\n\n";
- }egmx;
-
-
- # atx-style headers:
- # # Header 1
- # ## Header 2
- # ## Header 2 with closing hashes ##
- # ...
- # ###### Header 6
- #
- $text =~ s{
- ^(\#{1,6}) # $1 = string of #'s
- [ \t]*
- (.+?) # $2 = Header text
- [ \t]*
- \#* # optional closing #'s (not counted)
- \n+
- }{
- my $h_level = length($1);
- "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n";
- }egmx;
-
- return $text;
-}
-
-
-sub _DoLists {
-#
-# Form HTML ordered (numbered) and unordered (bulleted) lists.
-#
- my $text = shift;
- my $less_than_tab = $g_tab_width - 1;
-
- # Re-usable patterns to match list item bullets and number markers:
- my $marker_ul = qr/[*+-]/;
- my $marker_ol = qr/\d+[.]/;
- my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
-
- # Re-usable pattern to match any entirel ul or ol list:
- my $whole_list = qr{
- ( # $1 = whole list
- ( # $2
- [ ]{0,$less_than_tab}
- (${marker_any}) # $3 = first list item marker
- [ \t]+
- )
- (?s:.+?)
- ( # $4
- \z
- |
- \n{2,}
- (?=\S)
- (?! # Negative lookahead for another list item marker
- [ \t]*
- ${marker_any}[ \t]+
- )
- )
- )
- }mx;
-
- # We use a different prefix before nested lists than top-level lists.
- # See extended comment in _ProcessListItems().
- #
- # Note: There's a bit of duplication here. My original implementation
- # created a scalar regex pattern as the conditional result of the test on
- # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
- # substitution once, using the scalar as the pattern. This worked,
- # everywhere except when running under MT on my hosting account at Pair
- # Networks. There, this caused all rebuilds to be killed by the reaper (or
- # perhaps they crashed, but that seems incredibly unlikely given that the
- # same script on the same server ran fine *except* under MT. I've spent
- # more time trying to figure out why this is happening than I'd like to
- # admit. My only guess, backed up by the fact that this workaround works,
- # is that Perl optimizes the substition when it can figure out that the
- # pattern will never change, and when this optimization isn't on, we run
- # afoul of the reaper. Thus, the slightly redundant code to that uses two
- # static s/// patterns rather than one conditional pattern.
-
- if ($g_list_level) {
- $text =~ s{
- ^
- $whole_list
- }{
- my $list = $1;
- my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
- # Turn double returns into triple returns, so that we can make a
- # paragraph for the last item in a list, if necessary:
- $list =~ s/\n{2,}/\n\n\n/g;
- my $result = _ProcessListItems($list, $marker_any);
- $result = "<$list_type>\n" . $result . "</$list_type>\n";
- $result;
- }egmx;
- }
- else {
- $text =~ s{
- (?:(?<=\n\n)|\A\n?)
- $whole_list
- }{
- my $list = $1;
- my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
- # Turn double returns into triple returns, so that we can make a
- # paragraph for the last item in a list, if necessary:
- $list =~ s/\n{2,}/\n\n\n/g;
- my $result = _ProcessListItems($list, $marker_any);
- $result = "<$list_type>\n" . $result . "</$list_type>\n";
- $result;
- }egmx;
- }
-
-
- return $text;
-}
-
-
-sub _ProcessListItems {
-#
-# Process the contents of a single ordered or unordered list, splitting it
-# into individual list items.
-#
-
- my $list_str = shift;
- my $marker_any = shift;
-
-
- # The $g_list_level global keeps track of when we're inside a list.
- # Each time we enter a list, we increment it; when we leave a list,
- # we decrement. If it's zero, we're not in a list anymore.
- #
- # We do this because when we're not inside a list, we want to treat
- # something like this:
- #
- # I recommend upgrading to version
- # 8. Oops, now this line is treated
- # as a sub-list.
- #
- # As a single paragraph, despite the fact that the second line starts
- # with a digit-period-space sequence.
- #
- # Whereas when we're inside a list (or sub-list), that line will be
- # treated as the start of a sub-list. What a kludge, huh? This is
- # an aspect of Markdown's syntax that's hard to parse perfectly
- # without resorting to mind-reading. Perhaps the solution is to
- # change the syntax rules such that sub-lists must start with a
- # starting cardinal number; e.g. "1." or "a.".
-
- $g_list_level++;
-
- # trim trailing blank lines:
- $list_str =~ s/\n{2,}\z/\n/;
-
-
- $list_str =~ s{
- (\n)? # leading line = $1
- (^[ \t]*) # leading whitespace = $2
- ($marker_any) [ \t]+ # list marker = $3
- ((?s:.+?) # list item text = $4
- (\n{1,2}))
- (?= \n* (\z | \2 ($marker_any) [ \t]+))
- }{
- my $item = $4;
- my $leading_line = $1;
- my $leading_space = $2;
-
- if ($leading_line or ($item =~ m/\n{2,}/)) {
- $item = _RunBlockGamut(_Outdent($item));
- }
- else {
- # Recursion for sub-lists:
- $item = _DoLists(_Outdent($item));
- chomp $item;
- $item = _RunSpanGamut($item);
- }
-
- "<li>" . $item . "</li>\n";
- }egmx;
-
- $g_list_level--;
- return $list_str;
-}
-
-
-
-sub _DoCodeBlocks {
-#
-# Process Markdown `<pre><code>` blocks.
-#
-
- my $text = shift;
-
- $text =~ s{
- (?:\n\n|\A)
- ( # $1 = the code block -- one or more lines, starting with a space/tab
- (?:
- (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
- .*\n+
- )+
- )
- ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
- }{
- my $codeblock = $1;
- my $result; # return value
-
- $codeblock = _EncodeCode(_Outdent($codeblock));
- $codeblock = _Detab($codeblock);
- $codeblock =~ s/\A\n+//; # trim leading newlines
- $codeblock =~ s/\s+\z//; # trim trailing whitespace
-
- $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
-
- $result;
- }egmx;
-
- return $text;
-}
-
-
-sub _DoCodeSpans {
-#
-# * Backtick quotes are used for <code></code> spans.
-#
-# * You can use multiple backticks as the delimiters if you want to
-# include literal backticks in the code span. So, this input:
-#
-# Just type ``foo `bar` baz`` at the prompt.
-#
-# Will translate to:
-#
-# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
-#
-# There's no arbitrary limit to the number of backticks you
-# can use as delimters. If you need three consecutive backticks
-# in your code, use four for delimiters, etc.
-#
-# * You can use spaces to get literal backticks at the edges:
-#
-# ... type `` `bar` `` ...
-#
-# Turns to:
-#
-# ... type <code>`bar`</code> ...
-#
-
- my $text = shift;
-
- $text =~ s@
- (`+) # $1 = Opening run of `
- (.+?) # $2 = The code block
- (?<!`)
- \1 # Matching closer
- (?!`)
- @
- my $c = "$2";
- $c =~ s/^[ \t]*//g; # leading whitespace
- $c =~ s/[ \t]*$//g; # trailing whitespace
- $c = _EncodeCode($c);
- "<code>$c</code>";
- @egsx;
-
- return $text;
-}
-
-
-sub _EncodeCode {
-#
-# Encode/escape certain characters inside Markdown code runs.
-# The point is that in code, these characters are literals,
-# and lose their special Markdown meanings.
-#
- local $_ = shift;
-
- # Encode all ampersands; HTML entities are not
- # entities within a Markdown code span.
- s/&/&/g;
-
- # Encode $'s, but only if we're running under Blosxom.
- # (Blosxom interpolates Perl variables in article bodies.)
- {
- no warnings 'once';
- if (defined($blosxom::version)) {
- s/\$/$/g;
- }
- }
-
-
- # Do the angle bracket song and dance:
- s! < !<!gx;
- s! > !>!gx;
-
- # Now, escape characters that are magic in Markdown:
- s! \* !$g_escape_table{'*'}!gx;
- s! _ !$g_escape_table{'_'}!gx;
- s! { !$g_escape_table{'{'}!gx;
- s! } !$g_escape_table{'}'}!gx;
- s! \[ !$g_escape_table{'['}!gx;
- s! \] !$g_escape_table{']'}!gx;
- s! \\ !$g_escape_table{'\\'}!gx;
-
- return $_;
-}
-
-
-sub _DoItalicsAndBold {
- my $text = shift;
-
- # <strong> must go first:
- $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
- {<strong>$2</strong>}gsx;
-
- $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
- {<em>$2</em>}gsx;
-
- return $text;
-}
-
-
-sub _DoBlockQuotes {
- my $text = shift;
-
- $text =~ s{
- ( # Wrap whole match in $1
- (
- ^[ \t]*>[ \t]? # '>' at the start of a line
- .+\n # rest of the first line
- (.+\n)* # subsequent consecutive lines
- \n* # blanks
- )+
- )
- }{
- my $bq = $1;
- $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
- $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
- $bq = _RunBlockGamut($bq); # recurse
-
- $bq =~ s/^/ /g;
- # These leading spaces screw with <pre> content, so we need to fix that:
- $bq =~ s{
- (\s*<pre>.+?</pre>)
- }{
- my $pre = $1;
- $pre =~ s/^ //mg;
- $pre;
- }egsx;
-
- "<blockquote>\n$bq\n</blockquote>\n\n";
- }egmx;
-
-
- return $text;
-}
-
-
-sub _FormParagraphs {
-#
-# Params:
-# $text - string to process with html <p> tags
-#
- my $text = shift;
-
- # Strip leading and trailing lines:
- $text =~ s/\A\n+//;
- $text =~ s/\n+\z//;
-
- my @grafs = split(/\n{2,}/, $text);
-
- #
- # Wrap <p> tags.
- #
- foreach (@grafs) {
- unless (defined( $g_html_blocks{$_} )) {
- $_ = _RunSpanGamut($_);
- s/^([ \t]*)/<p>/;
- $_ .= "</p>";
- }
- }
-
- #
- # Unhashify HTML blocks
- #
- foreach (@grafs) {
- if (defined( $g_html_blocks{$_} )) {
- $_ = $g_html_blocks{$_};
- }
- }
-
- return join "\n\n", @grafs;
-}
-
-
-sub _EncodeAmpsAndAngles {
-# Smart processing for ampersands and angle brackets that need to be encoded.
-
- my $text = shift;
-
- # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
- # http://bumppo.net/projects/amputator/
- $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
-
- # Encode naked <'s
- $text =~ s{<(?![a-z/?\$!])}{<}gi;
-
- return $text;
-}
-
-
-sub _EncodeBackslashEscapes {
-#
-# Parameter: String.
-# Returns: The string, with after processing the following backslash
-# escape sequences.
-#
- local $_ = shift;
-
- s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
- s! \\` !$g_escape_table{'`'}!gx;
- s! \\\* !$g_escape_table{'*'}!gx;
- s! \\_ !$g_escape_table{'_'}!gx;
- s! \\\{ !$g_escape_table{'{'}!gx;
- s! \\\} !$g_escape_table{'}'}!gx;
- s! \\\[ !$g_escape_table{'['}!gx;
- s! \\\] !$g_escape_table{']'}!gx;
- s! \\\( !$g_escape_table{'('}!gx;
- s! \\\) !$g_escape_table{')'}!gx;
- s! \\> !$g_escape_table{'>'}!gx;
- s! \\\# !$g_escape_table{'#'}!gx;
- s! \\\+ !$g_escape_table{'+'}!gx;
- s! \\\- !$g_escape_table{'-'}!gx;
- s! \\\. !$g_escape_table{'.'}!gx;
- s{ \\! }{$g_escape_table{'!'}}gx;
-
- return $_;
-}
-
-
-sub _DoAutoLinks {
- my $text = shift;
-
- $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
-
- # Email addresses: <[email protected]>
- $text =~ s{
- <
- (?:mailto:)?
- (
- [-.\w]+
- \@
- [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
- )
- >
- }{
- _EncodeEmailAddress( _UnescapeSpecialChars($1) );
- }egix;
-
- return $text;
-}
-
-
-sub _EncodeEmailAddress {
-#
-# Input: an email address, e.g. "[email protected]"
-#
-# Output: the email address as a mailto link, with each character
-# of the address encoded as either a decimal or hex entity, in
-# the hopes of foiling most address harvesting spam bots. E.g.:
-#
-# <a href="mailto:foo@e
-# xample.com">foo
-# @example.com</a>
-#
-# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
-# mailing list: <http://tinyurl.com/yu7ue>
-#
-
- my $addr = shift;
-
- srand;
- my @encode = (
- sub { '&#' . ord(shift) . ';' },
- sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
- sub { shift },
- );
-
- $addr = "mailto:" . $addr;
-
- $addr =~ s{(.)}{
- my $char = $1;
- if ( $char eq '@' ) {
- # this *must* be encoded. I insist.
- $char = $encode[int rand 1]->($char);
- } elsif ( $char ne ':' ) {
- # leave ':' alone (to spot mailto: later)
- my $r = rand;
- # roughly 10% raw, 45% hex, 45% dec
- $char = (
- $r > .9 ? $encode[2]->($char) :
- $r < .45 ? $encode[1]->($char) :
- $encode[0]->($char)
- );
- }
- $char;
- }gex;
-
- $addr = qq{<a href="$addr">$addr</a>};
- $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
-
- return $addr;
-}
-
-
-sub _UnescapeSpecialChars {
-#
-# Swap back in all the special characters we've hidden.
-#
- my $text = shift;
-
- while( my($char, $hash) = each(%g_escape_table) ) {
- $text =~ s/$hash/$char/g;
- }
- return $text;
-}
-
-
-sub _TokenizeHTML {
-#
-# Parameter: String containing HTML markup.
-# Returns: Reference to an array of the tokens comprising the input
-# string. Each token is either a tag (possibly with nested,
-# tags contained therein, such as <a href="<MTFoo>">, or a
-# run of text between tags. Each element of the array is a
-# two-element array; the first is either 'tag' or 'text';
-# the second is the actual value.
-#
-#
-# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
-# <http://www.bradchoate.com/past/mtregex.php>
-#
-
- my $str = shift;
- my $pos = 0;
- my $len = length $str;
- my @tokens;
-
- my $depth = 6;
- my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
- my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
- (?s: <\? .*? \?> ) | # processing instruction
- $nested_tags/ix; # nested tags
-
- while ($str =~ m/($match)/g) {
- my $whole_tag = $1;
- my $sec_start = pos $str;
- my $tag_start = $sec_start - length $whole_tag;
- if ($pos < $tag_start) {
- push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
- }
- push @tokens, ['tag', $whole_tag];
- $pos = pos $str;
- }
- push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
- \@tokens;
-}
-
-
-sub _Outdent {
-#
-# Remove one level of line-leading tabs or spaces
-#
- my $text = shift;
-
- $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
- return $text;
-}
-
-
-sub _Detab {
-#
-# Cribbed from a post by Bart Lateur:
-# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
-#
- my $text = shift;
-
- $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
- return $text;
-}
-
-
-1;
-
-__END__
-
-
-=pod
-
-=head1 NAME
-
-B<Markdown>
-
-
-=head1 SYNOPSIS
-
-B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
- [ I<file> ... ]
-
-
-=head1 DESCRIPTION
-
-Markdown is a text-to-HTML filter; it translates an easy-to-read /
-easy-to-write structured text format into HTML. Markdown's text format
-is most similar to that of plain text email, and supports features such
-as headers, *emphasis*, code blocks, blockquotes, and links.
-
-Markdown's syntax is designed not as a generic markup language, but
-specifically to serve as a front-end to (X)HTML. You can use span-level
-HTML tags anywhere in a Markdown document, and you can use block level
-HTML tags (like <div> and <table> as well).
-
-For more information about Markdown's syntax, see:
-
- http://daringfireball.net/projects/markdown/
-
-
-=head1 OPTIONS
-
-Use "--" to end switch parsing. For example, to open a file named "-z", use:
-
- Markdown.pl -- -z
-
-=over 4
-
-
-=item B<--html4tags>
-
-Use HTML 4 style for empty element tags, e.g.:
-
- <br>
-
-instead of Markdown's default XHTML style tags, e.g.:
-
- <br />
-
-
-=item B<-v>, B<--version>
-
-Display Markdown's version number and copyright information.
-
-
-=item B<-s>, B<--shortversion>
-
-Display the short-form version number.
-
-
-=back
-
-
-
-=head1 BUGS
-
-To file bug reports or feature requests (other than topics listed in the
-Caveats section above) please send email to:
-
- [email protected]
-
-Please include with your report: (1) the example input; (2) the output
-you expected; (3) the output Markdown actually produced.
-
-
-=head1 VERSION HISTORY
-
-See the readme file for detailed release notes for this version.
-
-1.0.1 - 14 Dec 2004
-
-1.0 - 28 Aug 2004
-
-
-=head1 AUTHOR
-
- John Gruber
- http://daringfireball.net
-
- PHP port and other contributions by Michel Fortin
- http://michelf.com
-
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2003-2004 John Gruber
-<http://daringfireball.net/>
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-* Neither the name "Markdown" nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-This software is provided by the copyright holders and contributors "as
-is" and any express or implied warranties, including, but not limited
-to, the implied warranties of merchantability and fitness for a
-particular purpose are disclaimed. In no event shall the copyright owner
-or contributors be liable for any direct, indirect, incidental, special,
-exemplary, or consequential damages (including, but not limited to,
-procurement of substitute goods or services; loss of use, data, or
-profits; or business interruption) however caused and on any theory of
-liability, whether in contract, strict liability, or tort (including
-negligence or otherwise) arising in any way out of the use of this
-software, even if advised of the possibility of such damage.
-
-=cut
--- a/bin/md_cache
+++ /dev/null
@@ -1,17 +1,0 @@
-#!/usr/bin/env rc
-. 9.rc
-
-cachedir=/tmp/md_cache
-mkdir -p $cachedir >[2]/dev/null
-
-tmpfile=$cachedir/mdtmp.$pid
-score=`{{tee $tmpfile || exit 1} | sha1sum}
-cachefile=$cachedir/$score
-
-if(test -f $cachefile)
- cat $cachefile
-if not {
- markdown.pl < $tmpfile | tee $cachefile.$pid
- mv $cachefile.$pid $cachefile
-}
-rm $tmpfile >[2]/dev/null
--- /dev/null
+++ b/bin/md_cache.rc
@@ -1,0 +1,17 @@
+#!/usr/bin/env rc
+. 9.rc
+
+cachedir=/tmp/md_cache
+mkdir -p $cachedir >[2]/dev/null
+
+tmpfile=$cachedir/mdtmp.$pid
+score=`{{tee $tmpfile || exit 1} | sha1sum}
+cachefile=$cachedir/$score
+
+if(test -f $cachefile)
+ cat $cachefile
+if not {
+ markdown.pl < $tmpfile | tee $cachefile.$pid
+ mv $cachefile.$pid $cachefile
+}
+rm $tmpfile >[2]/dev/null
--- a/bin/urldecode.awk
+++ /dev/null
@@ -1,39 +1,0 @@
-#!/usr/bin/awk -f
-BEGIN {
- hextab ["0"] = 0; hextab ["8"] = 8;
- hextab ["1"] = 1; hextab ["9"] = 9;
- hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10
- hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11;
- hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12;
- hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13;
- hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14;
- hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15;
-}
-{
- decoded = ""
- i = 1
- len = length ($0)
- while ( i <= len ) {
- c = substr ($0, i, 1)
- if ( c == "%" ) {
- if ( i+2 <= len ) {
- c1 = substr ($0, i+1, 1)
- c2 = substr ($0, i+2, 1)
- if ( hextab [c1] == "" || hextab [c2] == "" ) {
- print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2"
- } else {
- code = 0 + hextab [c1] * 16 + hextab [c2] + 0
- c = sprintf ("%c", code)
- i = i + 2
- }
- } else {
- print "WARNING: invalid % encoding: " substr ($0, i, len - i)
- }
- } else if ( c == "+" ) {
- c = " "
- }
- decoded = decoded c
- ++i
- }
- print decoded
-}
--- /dev/null
+++ b/bin/werc.rc
@@ -1,0 +1,322 @@
+#!/usr/local/plan9/bin/rc
+. ./cgilib.rc
+cd ..
+
+forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.]'
+response_format=html
+
+fn get_lib_file {
+ wantedfile = $1
+ if (test -f $sitedir/_werc/lib/$wantedfile)
+ echo -n $sitedir/_werc/lib/$wantedfile
+ if not if(! ~ $#masterSite 0 && test -f sites/$masterSite/_werc/lib/$wantedfile)
+ echo -n sites/$masterSite/_werc/lib/$wantedfile
+ if not if (test -f lib/$wantedfile)
+ echo -n lib/$wantedfile
+}
+
+# Title
+fn gentitle {
+ echo '<h1 class="headerTitle"><a href="/">' ^ $"siteTitle ^ ' <span id="headerSubTitle">' ^ $"siteSubTitle ^ '</span></a></h1>'
+}
+
+# Don't change var name or trailing ';', some dirs change the filter!
+dirfilter = '/\/[._]/d; /'^$forbidden_uri_chars^'/d; /\/robots.\txt$/d; /\/sitemap\.txt$/d; s,^\./,,; s,\.md$,,; s,\.html,,; s,\.txt,,; '
+
+# To be used from config files
+fn hide_paths {
+ for(i in $*)
+ dirfilter = $dirfilter^'/^'$i'$/d; '
+}
+
+# Sidebar
+fn menu {
+ ls -F $1 | sed $dirfilter | awk -F/ '
+ BEGIN { print "<ul class=\"side-bar\">" }
+ END { print "</ul>" }
+ # Should add \. to the list of allowed chars in file names, but need to find a way to filter out .txt and so on
+ /^([a-zA-Z0-9+_\-]+[\/*]?)+$/ && $NF != "index" {
+ isdir = match($0, "/$")
+ sub("[*/]$", "")
+
+ path = bname = $0
+ sub("^(.*/)?([0-9]+_)?", "", bname)
+ gsub("_", " ", bname)
+
+ if(isdir) {
+ bname = bname "/"
+ path = $0 "/"
+ }
+
+ if(index(ENVIRON["REQUEST_URI"] "/", "/" path) == 1) {
+ if(isdir) {
+ print "<li><a href=\"/" path "\" class=\"thisPage\">»<i> " bname "</i></a>"
+ system("rc -c ''menu " path "''")
+ } else {
+ print "<li><a href=\"/" path "\" class=\"thisPage\">»<i> " bname "</i></a>"
+ }
+ } else
+ print "<li><a href=\"/" path "\">› " bname "</a>"
+
+ print "</li>"
+ }'
+}
+
+fn gensidebar {
+ @{
+ cd $sitedir
+ menu .
+ }
+}
+
+fn sortedBlogPostList {
+ # the /./ is added so we can sort -t. and order only the file name
+ if (! ~ $#* 0)
+ ls $*^'/./' | grep '/[0-9]+.*\.md$'| sort -r -t. +1
+}
+
+fn gen_blog_post_title {
+ title=`{basename $1 | sed 's/^[0-9\-]*_(.*)\.md$/\1/; s/_/ /g' }
+ permlink= `{echo $1 | sed 's,^/[a-z/]*www/,/,; s,^sites/[^/]*/*/,/,; s/\.md$//' }
+ du=`{ls -l $1}
+ by = ''
+ if (! ~ $#blogAuthor 0)
+ by='By '$"blogAuthor
+ if not if (~ $#blogDirs 1)
+ by='By '$du(4)
+ echo '##<a href="'^$"permlink^'">' $"title^'</a> *( '$by Last mod: $du(7 8 9) ' )*'
+}
+
+
+
+# Handlers
+fn set_handler {
+ handler = $1
+ shift
+ handler_args = $*
+}
+
+fn md_handler { cat $* | $formatter }
+
+fn tpl_handler { template $1 }
+
+fn html_handler {
+ cat $1 | /bin/sed '0,/<[Bb][Oo][Dd][Yy][^>]*>/d; /<\/[Bb][Oo][Dd][Yy]>/,$d'
+}
+
+fn txt_handler {
+ echo '<pre>'
+ # XXX Insering a blank line between lines in input is good for fortunes.txt, but maybe not for other .txt files
+ # XXX Words are not broken, even if they are way beyond 82 chars long
+ cat $1 |sed 's/$/\n/g; s/</\</g; s/>/\>/g' |fmt -l 82 -j
+ echo '</pre>'
+}
+
+fn dir_listing_handler {
+ d = `{basename -d $1}
+ echo '<h1 style="text-transform: capitalize;">' `{echo $d|sed -e 's,.*//,,g' -e 's,/$,,' -e 's,/, / ,g' } '</h1>'
+ echo '<ul style="text-transform: capitalize;">'
+ ls -F $d | sed -e $dirfilter' s,^'$sitedir'/.*/([^$].*),<li><a href="\1">\1</a></li>,' # XXX I'm not sure what the [^$] does there
+ echo '</ul>'
+}
+
+fn 404_handler { template `{ get_lib_file 404.tpl } }
+
+fn blog_dir_handler {
+ blogDirs = $*
+ tpl_handler lib/feeds/html.tpl
+}
+
+fn blog_post_handler {
+ gen_blog_post_title $1 | $formatter
+ $formatter < $1
+}
+
+
+fn select_handler {
+
+ if (test -f $body.md) {
+ if (! ~ $#inBlog 0)
+ set_handler blog_post_handler $body.md
+ if not
+ set_handler md_handler $body.md
+ if (! ~ $#allowComments 0 && ~ $REQUEST_METHOD POST) {
+ get_post_args comment_text
+ d = `{date -n} # FIXME Obvious race
+ d = $body.md_werc/comments/$d/
+
+ u = $logged_user
+ if (~ $#logged_user 0) {
+ get_post_args comment_user_name comment_user_password
+ # TODO Should take this path too if the user can login but doesn't
+ # belong to required group
+ if (! login_user $comment_user_name $comment_user_password) {
+ u = $comment_user_name ':' $comment_user_password
+ d = $d^'_pending'
+ }
+ if not
+ u = $logged_user
+ }
+
+ umask 002
+ mkdir -m 775 -p $d
+ echo $u > $d/user
+ echo $comment_text > $d/body
+ }
+ }
+ if not if (test -f $body.tpl)
+ set_handler tpl_handler $body.tpl
+
+ if not if (test -f $body.html)
+ set_handler html_handler $body.html
+
+ # Explicit .html urls, unfortunately usually the web server will handle this as static files
+ if not if (~ $body *.html && test -f $body)
+ perm_redirect `{ echo $REQUEST_URI|sed 's/.html$//' }
+
+ # Rss feeds. TODO: we should check that the request is for a real blog dir
+ if not if (~ $REQUEST_URI */index.rss) {
+ uri = `{echo $uri | sed 's/index.rss$//'}
+ if (~ $#blogDirs 0)
+ blogDirs = $sitedir^$uri
+ uri=$baseuri$"uri
+ master_template=feeds/rss20.tpl
+ response_format=raw
+ }
+
+ # Blog handler
+ if not if (~ $body */[bB]log/index */[bB]log//index || ! ~ $#blogDirs 0) {
+ if (~ $#blogDirs 0)
+ blogDirs = `{basename -d $body}
+
+ rssuri=`{basename -d $uri}
+ rssuri=$baseuri`{cleanname $"rssuri^/index.rss}
+ extraHeaders=$"extraHeaders ^ \
+ '<link rel="alternate" type="application/rss+xml" title="RSS" href="'$rssuri'" />
+'
+ set_handler tpl_handler lib/feeds/html.tpl
+ }
+
+ # Global tpl (eg sitemap.tpl)
+ if not if (test -f lib/^$uri^.tpl)
+ set_handler tpl_handler lib/^$uri^.tpl
+
+ if not if (test -f $body.txt)
+ set_handler txt_handler $body.txt
+
+ # Dir listing
+ if not if(~ $body */index)
+ set_handler dir_listing_handler $body
+
+ # File not found
+ if not {
+ set_handler 404_handler $body
+ dprint 'NOT FOUND: '$SERVER_NAME^$REQUEST_URI^' - '^$"HTTP_REFERER^' - '^$"HTTP_USER_AGENT
+ echo 'Status: 404 Not Found'
+ }
+
+}
+
+
+fn genbody {
+ $handler $handler_args
+}
+
+
+# Careful, the proper p9p path might not be set until initrc.local is sourced
+path=(. $PLAN9/bin ./bin/ /bin/ /usr/bin)
+
+site=$SERVER_NAME
+headers=lib/headers.tpl
+master_template=default_master.tpl
+sidebar=sidebar
+baseuri=http://$site/
+for(i in siteTitle siteSubTitle pageTitle extraHeaders)
+ $i = ''
+
+. etc/initrc
+
+if(test -f etc/initrc.local)
+ . etc/initrc.local
+
+
+# Parse request URL
+uri = `{echo -n $REQUEST_URI | sed -e 's/\?.*//; s/'^$forbidden_uri_chars^'//g; s/\.\.*/./g;' -e '1q'}
+ifs='/' { args = `{echo -n $uri} }
+
+
+if(! ~ $#debug 0)
+ dprint ' ' $SERVER_NAME^$REQUEST_URI^' - '^$"HTTP_USER_AGENT
+
+# Hack: preload post data so we can access it from templates where cgi's stdin is not accesible
+if(~ $REQUEST_METHOD POST) {
+ get_post_args
+ login_user
+}
+
+if (! ~ $args '') {
+ if (~ $args($#args) 'index')
+ perm_redirect `{ echo $REQUEST_URI | sed 's,/index$,/,' }
+
+ pageTitle=`{echo $args | sed -e 's/ / - /g' -e 's/_/ /g'}
+ req_path=$uri
+}
+if not {
+ req_path='/'
+}
+
+fpath=$sitedir
+for (i in ('' $args)) {
+ fpath = $fpath/$i
+ # We don't want blog settings to cascade into posts, note that we are inBlog instead
+ if (! ~ $#blogDirs 0 && ! ~ $req_path */index.rss */[bB]log */[bB]log/) {
+ inBlog = $blogDirs
+ blogDirs = ()
+ }
+
+ if (test -f $fpath/_werc/config)
+ . $fpath/_werc/config
+
+ if (~ $#blogDirs 0 && ~ $#inBlog 0 && ~ $i [Bb]log)
+ inBlog = 'yes'
+}
+
+# Redirections and other preprocessing
+if (~ $#redirectPermanent 1)
+ perm_redirect $"redirectPermanent
+
+if not if (~ $#redirectPermanent 2 && {echo $SERVER_NAME^$REQUEST_URI|grep -s $redirectPermanent(1) }) {
+ # Experimental regexp sub-based redirect, probably should find a nicer interface
+ # For now only used at sites/harmful.cat-v.org/software/OO_programming/_werc/config
+ to=`{echo $SERVER_NAME^$REQUEST_URI|sed 's@'^$redirectPermanent(1)^'@'^$redirectPermanent(2)^'@'}
+ if(! ~ $to $REQUEST_URI)
+ perm_redirect $to
+}
+
+# Set Page title
+if(~ $pageTitle '')
+ pageTitle=$siteTitle^' '^$siteSubTitle
+if not
+ pageTitle=$"pageTitle^' | '^$"siteTitle^' '^$"siteSubTitle
+
+
+body=$sitedir/$req_path
+if (test -d $body) {
+ if(! ~ $body */)
+ perm_redirect $REQUEST_URI^'/'
+ body=$body/index
+}
+
+select_handler
+
+# Template/body selection
+master_template= `{get_lib_file $master_template}
+
+if(~ $response_format html) {
+ # Is awk_buffer really useful?
+ template $headers $master_template | awk_buffer
+ echo '</body></html>'
+}
+if not if (~ $response_format raw) {
+ template < $master_template
+}