# HG changeset patch # User Steve Losh # Date 1276914511 14400 # Node ID 5101c0cba85d83ca4caf198574d0ffe69caecbb5 # Parent bc5a004bfccadc76dd3405fe83012956442d33bf bundled: add markdown2 diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/CHANGES.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/CHANGES.txt Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,246 @@ +# python-markdown2 Changelog + +## python-markdown2 v1.0.1.17 + +- [Issue 36] Fix "cuddled-lists" extra handling for an + looks-like-a-cuddled-list-but-is-indented block. See the + "test/tm-cases/cuddled_list_indented.text" test case. + +- Experimental new "toc" extra. The returned string from conversion will have + a `toc_html` attribute. + +- New "header-ids" extra that will add an `id` attribute to headers: + + # My First Section + + will become: + +

My First Section

+ + An argument can be give for the extra, which will be used as a prefix for + the ids: + + $ cat foo.txt + # hi there + $ python markdown2.py foo.txt +

hi there

+ $ python markdown2.py foo.txt -x header-ids +

hi there

+ $ python markdown2.py foo.txt -x header-ids=prefix +

hi there

+ +- Preliminary support for "html-classes" extra: takes a dict mapping HTML tag + to the string value to use for a "class" attribute for that emitted tag. + Currently just supports "pre" and "code" for code *blocks*. + + +## python-markdown2 v1.0.1.16 + +- [Issue 33] Implement a "cuddled-lists" extra that allows: + + I did these things: + * bullet1 + * bullet2 + * bullet3 + + to be converted to: + +

I did these things:

+ + + + +## python-markdown2 v1.0.1.15 + +- [Issue 30] Fix a possible XSS via JavaScript injection in a carefully + crafted image reference (usage of double-quotes in the URL). + +## python-markdown2 v1.0.1.14 + +- [Issue 29] Fix security hole in the md5-hashing scheme for handling HTML + chunks during processing. +- [Issue 27] Fix problem with underscores in footnotes content (with + "footnotes" extra). + +## python-markdown2 v1.0.1.13 + +- [Issue 24] Set really long sentinel for max-length of link text to avoid + problems with reasonably long ones. +- [Issue 26] Complete the fix for this issue. Before this change the + randomized obscuring of 'mailto:' link letters would sometimes result + in emails with underscores getting misinterpreted as for italics. + +## python-markdown2 v1.0.1.12 + +- [Issue 26] Fix bug where email auto linking wouldn't work for emails with + underscores. E.g. `Mail me: ` wouldn't work. +- Update MANIFEST.in to ensure bin/markdown2 gets included in sdist. +- [Issue 23] Add support for passing options to pygments for the "code-color" + extra. For example: + + >>> markdown("...", extras={'code-color': {"noclasses": True}}) + + This `formatter_opts` dict is passed to the pygments HtmlCodeFormatter. + Patch from 'svetlyak.40wt'. +- [Issue 21] Escape naked '>' characters, as is already done for '&' and '<' + characters. Note that other markdown implementations (both Perl and PHP) do + *not* do this. This results in differing output with two 3rd-party tests: + "php-markdown-cases/Backslash escapes.text" and "markdowntest-cases/Amps + and angle encoding.tags". +- "link-patterns" extra: Add support for the href replacement being a + callable, e.g.: + + >>> link_patterns = [ + ... (re.compile("PEP\s+(\d+)", re.I), + ... lambda m: "http://www.python.org/dev/peps/pep-%04d/" % int(m.group(1))), + ... ] + >>> markdown2.markdown("Here is PEP 42.", extras=["link-patterns"], + ... link_patterns=link_patterns) + u'

Here is PEP 42.

\n' + +## python-markdown2 v1.0.1.11 + +- Fix syntax_color test for the latest Pygments. +- [Issue 20] Can't assume that `sys.argv` is defined at top-level code -- + e.g. when used at a PostreSQL stored procedure. Fix that. + +## python-markdown2 v1.0.1.10 + +- Fix sys.path manipulation in setup.py so `easy_install markdown2-*.tar.gz` + works. (Henry Precheur pointed out the problem.) +- "bin/markdown2" is now a stub runner script rather than a symlink to + "lib/markdown2.py". The symlink was a problem for sdist: tar makes it a + copy. +- Added 'xml' extra: passes *one-liner* XML processing instructions and + namespaced XML tags without wrapping in a `

` -- i.e. treats them as a HTML + block tag. + +## python-markdown2 v1.0.1.9 + +- Fix bug in processing text with two HTML comments, where the first comment + is cuddled to other content. See "test/tm-cases/two_comments.text". Noted + by Wolfgang Machert. +- Revert change in v1.0.1.6 passing XML processing instructions and one-liner + tags. This changed caused some bugs. Similar XML processing support will + make it back via an "xml" extra. + +## python-markdown2 v1.0.1.8 + +- License note updates to facilitate Thomas Moschny building a package for + Fedora Core Linux. No functional change. + +## python-markdown2 v1.0.1.7 + +- Add a proper setup.py and release to pypi: + http://pypi.python.org/pypi/markdown2/ +- Move markdown2.py module to a lib subdir. This allows one to put the "lib" + dir of a source checkout (e.g. via an svn:externals) on ones Python Path + without have the .py files at the top-level getting in the way. + +## python-markdown2 v1.0.1.6 + +- Fix Python 2.6 deprecation warning about the `md5` module. +- Pass XML processing instructions and one-liner tags. For example: + + + + + Limitations: they must be on one line. Test: pi_and_xinclude. + Suggested by Wolfgang Machert. + +## python-markdown2 v1.0.1.5 + +- Add ability for 'extras' to have arguments. Internally the 'extras' + attribute of the Markdown class is a dict (it was a set). +- Add "demote-headers" extra that will demote the markdown for, e.g., an h1 + to h2-6 by the number of the demote-headers argument. + + >>> markdown('# this would be an h1', extras={'demote-headers': 2}) + u'

this would be an h1

\n' + + This can be useful for user-supplied Markdown content for a sub-section of + a page. + +## python-markdown2 v1.0.1.4 + +- [Issue 18] Allow spaces in the URL for link definitions. +- [Issue 15] Fix some edge cases with backslash-escapes. +- Fix this error that broken command-line usage: + + NameError: global name 'use_file_vars' is not defined + +- Add "pyshell" extra for auto-codeblock'ing Python interactive shell + sessions even if they weren't properly indented by the tab width. + +## python-markdown2 v1.0.1.3 + +- Make the use of the `-*- markdown-extras: ... -*-` emacs-style files + variable to set "extras" **off** be default. It can be turned on via + `--use-file-vars` on the command line and `use_file_vars=True` via the + module interface. +- [Issue 3] Drop the code-color extra hack added *for* issue3 that was + causing the a unicode error with unicode in a code-colored block, + + +## python-markdown2 v1.0.1.2 + +- [Issue 8] Alleviate some of the incompat of the last change by allowing (at + the Python module level) the usage of `safe_mode=True` to mean what it used + to -- i.e. "replace" safe mode. +- [Issue 8, **incompatible change**] The "-s|--safe" command line option and + the equivalent "safe_mode" option has changed semantics to be a string + instead of a boolean. Legal values of the string are "replace" (the old + behaviour: literal HTML is replaced with "[HTML_REMOVED]") and "escape" + (meta chars in literal HTML is escaped). +- [Issue 11] Process markup in footnote definition bodies. +- Add support for `-*- markdown-extras: ... -*-` emacs-style files variables + (typically in an XML comment) to set "extras" for the markdown conversion. +- [Issue 6] Fix problem with footnotes if the reference string had uppercase + letters. + +## python-markdown2 v1.0.1.1 + +- [Issue 3] Fix conversion of unicode strings. +- Make the "safe_mode" replacement test overridable via subclassing: change + `Markdown.html_removed_text`. +- [Issue 2] Fix problems with "safe_mode" removing generated HTML, instead of + just raw HTML in the text. +- Add "-s|--safe" command-line option to set "safe_mode" conversion + boolean. This option is mainly for compat with markdown.py. +- Add "link-patterns" extra: allows one to specify a list of regexes that + should be automatically made into links. For example, one can define a + mapping for things like "Mozilla Bug 1234": + + regex: mozilla\s+bug\s+(\d+) + href: http://bugzilla.mozilla.org/show_bug.cgi?id=\1 + + See for details. +- Add a "MarkdownWithExtras" class that enables all extras (except + "code-friendly"): + + >>> import markdown2 + >>> converter = markdown2.MarkdownWithExtras() + >>> converter.convert('...TEXT...') + ...HTML... + +- [Issue 1] Added "code-color" extra: pygments-based (TODO: link) syntax + coloring of code blocks. Requires the pygments Python library on sys.path. + See for details. +- [Issue 1] Added "footnotes" extra: adds support for footnotes syntax. See + for details. + +## python-markdown2 v1.0.1.0 + +- Added "code-friendly" extra: disables the use of leading and trailing `_` + and `__` for emphasis and strong. These can easily get in the way when + writing docs about source code with variable_list_this and when one is not + careful about quoting. +- Full basic Markdown syntax. + + +(Started maintaining this log 15 Oct 2007. At that point there had been no +releases of python-markdown2.) diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/CONTRIBUTORS.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/CONTRIBUTORS.txt Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,3 @@ +Trent Mick (primary author) +Thomas Moschny (redhat packaging, https://bugzilla.redhat.com/show_bug.cgi?id=461692) +Massimo Di Pierro (security fix, issue 29) diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/LICENSE.txt Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,58 @@ +This implementation of Markdown is licensed under the MIT License: + + The MIT License + + Copyright (c) 2008 ActiveState Software Inc. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +All files in a *source package* of markdown2 (i.e. those available on +pypi.python.org and the Google Code project "downloads" page) are under the +MIT license. However, in the *subversion repository* there are some files +(used for performance and testing purposes) that are under different licenses +as follows: + +- perf/recipes.pprint + + Python License. This file includes a number of real-world examples of + Markdown from the ActiveState Python Cookbook, used for doing some + performance testing of markdown2.py. + +- test/php-markdown-cases/... + test/php-markdown-extra-cases/... + + GPL. These are from the MDTest package announced here: + http://six.pairlist.net/pipermail/markdown-discuss/2007-July/000674.html + +- test/markdown.py + + GPL 2 or BSD. A copy (currently old) of Python-Markdown -- the other + Python Markdown implementation. + +- test/markdown.php + + BSD-style. This is PHP Markdown + (http://michelf.com/projects/php-markdown/). + +- test/Markdown.pl: BSD-style + + A copy of Perl Markdown (http://daringfireball.net/projects/markdown/). + diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/Makefile.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/Makefile.py Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,663 @@ + +"""Makefile for the python-markdown2 project. + +${common_task_list} + +See `mk -h' for options. +""" + +import sys +import os +from os.path import join, dirname, normpath, abspath, exists, basename +import re +import webbrowser +from pprint import pprint + +from mklib.common import MkError +from mklib import Task +from mklib.sh import run_in_dir + + + +class bugs(Task): + """Open bug database page.""" + def make(self): + webbrowser.open("http://code.google.com/p/python-markdown2/issues/list") + +class site(Task): + """Open the Google Code project page.""" + def make(self): + webbrowser.open("http://code.google.com/p/python-markdown2/") + +class sdist(Task): + """python setup.py sdist""" + def make(self): + run_in_dir("%spython setup.py sdist -f --formats zip" + % _setup_command_prefix(), + self.dir, self.log.debug) + +class pypi_upload(Task): + """Update release to pypi.""" + def make(self): + tasks = (sys.platform == "win32" + and "bdist_wininst upload" + or "sdist --formats zip upload") + run_in_dir("%spython setup.py %s" % (_setup_command_prefix(), tasks), + self.dir, self.log.debug) + + sys.path.insert(0, join(self.dir, "lib")) + url = "http://pypi.python.org/pypi/markdown2/" + import webbrowser + webbrowser.open_new(url) + +class googlecode_upload(Task): + """Upload sdist to Google Code project site.""" + deps = ["sdist"] + def make(self): + helper_in_cwd = exists(join(self.dir, "googlecode_upload.py")) + if helper_in_cwd: + sys.path.insert(0, self.dir) + try: + import googlecode_upload + except ImportError: + raise MkError("couldn't import `googlecode_upload` (get it from http://support.googlecode.com/svn/trunk/scripts/googlecode_upload.py)") + if helper_in_cwd: + del sys.path[0] + + sys.path.insert(0, join(self.dir, "lib")) + import markdown2 + sdist_path = join(self.dir, "dist", + "markdown2-%s.zip" % markdown2.__version__) + status, reason, url = googlecode_upload.upload_find_auth( + sdist_path, + "python-markdown2", # project_name + "markdown2 %s source package" % markdown2.__version__, # summary + ["Featured", "Type-Archive"]) # labels + if not url: + raise MkError("couldn't upload sdist to Google Code: %s (%s)" + % (reason, status)) + self.log.info("uploaded sdist to `%s'", url) + + project_url = "http://code.google.com/p/python-markdown2/" + import webbrowser + webbrowser.open_new(project_url) + + + +class test(Task): + """Run all tests (except known failures).""" + def make(self): + for ver, python in self._gen_pythons(): + if ver < (2,3): + # Don't support Python < 2.3. + continue + elif ver >= (3, 0): + # Don't yet support Python 3. + continue + ver_str = "%s.%s" % ver + print "-- test with Python %s (%s)" % (ver_str, python) + assert ' ' not in python + run_in_dir("%s test.py -- -knownfailure" % python, + join(self.dir, "test")) + + def _python_ver_from_python(self, python): + assert ' ' not in python + o = os.popen('''%s -c "import sys; print(sys.version)"''' % python) + ver_str = o.read().strip() + ver_bits = re.split("\.|[^\d]", ver_str, 2)[:2] + ver = tuple(map(int, ver_bits)) + return ver + + def _gen_python_names(self): + yield "python" + for ver in [(2,4), (2,5), (2,6), (2,7), (3,0), (3,1)]: + yield "python%d.%d" % ver + if sys.platform == "win32": + yield "python%d%d" % ver + + def _gen_pythons(self): + sys.path.insert(0, join(self.dir, "externals", "which")) + import which # get it from http://trentm.com/projects/which + python_from_ver = {} + for name in self._gen_python_names(): + for python in which.whichall(name): + ver = self._python_ver_from_python(python) + if ver not in python_from_ver: + python_from_ver[ver] = python + for ver, python in sorted(python_from_ver.items()): + yield ver, python + + +class todo(Task): + """Print out todo's and xxx's in the docs area.""" + def make(self): + for path in _paths_from_path_patterns(['.'], + excludes=[".svn", "*.pyc", "TO""DO.txt", "Makefile.py", + "*.png", "*.gif", "*.pprint", "*.prof", + "tmp-*"]): + self._dump_pattern_in_path("TO\DO\\|XX\X", path) + + path = join(self.dir, "TO""DO.txt") + todos = re.compile("^- ", re.M).findall(open(path, 'r').read()) + print "(plus %d TODOs from TO""DO.txt)" % len(todos) + + def _dump_pattern_in_path(self, pattern, path): + os.system("grep -nH '%s' '%s'" % (pattern, path)) + +class pygments(Task): + """Get a copy of pygments in externals/pygments. + + This will be used by the test suite. + """ + def make(self): + pygments_dir = join(self.dir, "externals", "pygments") + if exists(pygments_dir): + run_in_dir("hg pull", pygments_dir, self.log.info) + run_in_dir("hg update", pygments_dir, self.log.info) + else: + if not exists(dirname(pygments_dir)): + os.makedirs(dirname(pygments_dir)) + run_in_dir("hg clone http://dev.pocoo.org/hg/pygments-main %s" + % basename(pygments_dir), + dirname(pygments_dir), self.log.info) + +class announce_release(Task): + """Send a release announcement. Don't send this multiple times!.""" + headers = { + "To": [ + "python-markdown2@googlegroups.com", + "python-announce@python.org" + ], + "From": ["Trent Mick "], + "Subject": "ANN: python-markdown2 %(version)s -- A fast and complete Python implementation of Markdown", + "Reply-To": "python-markdown2@googlegroups.com", + } + if False: # for dev/debugging + headers["To"] = ["trentm@gmail.com"] + + body = r""" + ### Where? + + - Project Page: + - PyPI: + + ### What's new? + + %(whatsnew)s + + Full changelog: + + ### What is 'markdown2'? + + `markdown2.py` is a fast and complete Python implementation of + [Markdown](http://daringfireball.net/projects/markdown/) -- a + text-to-HTML markup syntax. + + ### Module usage + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'

boo!

\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'

boo!

\n' + >>> markdowner.convert("**boom!**") + u'

boom!

\n' + + ### Command line usage + + $ cat hi.markdown + # Hello World! + $ markdown2 hi.markdown +

Hello World!

+ + This implementation of Markdown implements the full "core" syntax plus a + number of extras (e.g., code syntax coloring, footnotes) as described on + . + + Cheers, + Trent + + -- + Trent Mick + trentm@gmail.com + http://trentm.com/blog/ + """ + + def _parse_changes_txt(self): + changes_txt = open(join(self.dir, "CHANGES.txt")).read() + sections = re.split(r'\n(?=##)', changes_txt) + for section in sections[1:]: + first, tail = section.split('\n', 1) + if "not yet released" in first: + continue + break + + whatsnew_text = tail.strip() + version = first.strip().split()[-1] + if version.startswith("v"): + version = version[1:] + + return version, whatsnew_text + + def make(self): + import getpass + if getpass.getuser() != "trentm": + raise RuntimeError("You're not `trentm`. That's not " + "expected here.") + + version, whatsnew = self._parse_changes_txt() + data = { + "whatsnew": whatsnew, + "version": version, + } + + headers = {} + for name, v in self.headers.items(): + if isinstance(v, basestring): + value = v % data + else: + value = v + headers[name] = value + body = _dedent(self.body, skip_first_line=True) % data + + # Ensure all the footer lines end with two spaces: markdown syntax + # for
. + lines = body.splitlines(False) + idx = lines.index("Cheers,") - 1 + for i in range(idx, len(lines)): + lines[i] += ' ' + body = '\n'.join(lines) + + print "=" * 70, "body" + print body + print "=" * 70 + answer = _query_yes_no( + "Send release announcement email for v%s to %s?" % ( + version, ", ".join(self.headers["To"])), + default="no") + if answer != "yes": + return + + sys.path.insert(0, join(self.dir, "lib")) + import markdown2 + body_html = markdown2.markdown(body) + + email_it_via_gmail(headers, text=body, html=body_html) + self.log.info("announcement sent") + + + +#---- internal support stuff + +# Recipe http://code.activestate.com/recipes/576824/ +def email_it_via_gmail(headers, text=None, html=None, password=None): + """Send an email -- with text and HTML parts. + + @param headers {dict} A mapping with, at least: "To", "Subject" and + "From", header values. "To", "Cc" and "Bcc" values must be *lists*, + if given. + @param text {str} The text email content. + @param html {str} The HTML email content. + @param password {str} Is the 'From' gmail user's password. If not given + it will be prompted for via `getpass.getpass()`. + + Derived from http://code.activestate.com/recipes/473810/ and + http://stackoverflow.com/questions/778202/smtplib-and-gmail-python-script-problems + """ + from email.MIMEMultipart import MIMEMultipart + from email.MIMEText import MIMEText + import smtplib + import getpass + + if text is None and html is None: + raise ValueError("neither `text` nor `html` content was given for " + "sending the email") + if not ("To" in headers and "From" in headers and "Subject" in headers): + raise ValueError("`headers` dict must include at least all of " + "'To', 'From' and 'Subject' keys") + + # Create the root message and fill in the from, to, and subject headers + msg_root = MIMEMultipart('related') + for name, value in headers.items(): + msg_root[name] = isinstance(value, list) and ', '.join(value) or value + msg_root.preamble = 'This is a multi-part message in MIME format.' + + # Encapsulate the plain and HTML versions of the message body in an + # 'alternative' part, so message agents can decide which they want + # to display. + msg_alternative = MIMEMultipart('alternative') + msg_root.attach(msg_alternative) + + # Attach HTML and text alternatives. + if text: + msg_text = MIMEText(text.encode('utf-8')) + msg_alternative.attach(msg_text) + if html: + msg_text = MIMEText(html.encode('utf-8'), 'html') + msg_alternative.attach(msg_text) + + to_addrs = headers["To"] \ + + headers.get("Cc", []) \ + + headers.get("Bcc", []) + from_addr = msg_root["From"] + + # Get username and password. + from_addr_pats = [ + re.compile(".*\((.+@.+)\)"), # Joe (joe@example.com) + re.compile(".*<(.+@.+)>"), # Joe + ] + for pat in from_addr_pats: + m = pat.match(from_addr) + if m: + username = m.group(1) + break + else: + username = from_addr + if not password: + password = getpass.getpass("%s's password: " % username) + + smtp = smtplib.SMTP('smtp.gmail.com', 587) # port 465 or 587 + smtp.ehlo() + smtp.starttls() + smtp.ehlo() + smtp.login(username, password) + smtp.sendmail(from_addr, to_addrs, msg_root.as_string()) + smtp.close() + + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +# Recipe: query_yes_no (1.0) +def _query_yes_no(question, default="yes"): + """Ask a yes/no question via raw_input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is one of "yes" or "no". + """ + valid = {"yes":"yes", "y":"yes", "ye":"yes", + "no":"no", "n":"no"} + if default == None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while 1: + sys.stdout.write(question + prompt) + choice = raw_input().lower() + if default is not None and choice == '': + return default + elif choice in valid.keys(): + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' "\ + "(or 'y' or 'n').\n") + + +# Recipe: paths_from_path_patterns (0.3.7) +def _should_include_path(path, includes, excludes): + """Return True iff the given path should be included.""" + from os.path import basename + from fnmatch import fnmatch + + base = basename(path) + if includes: + for include in includes: + if fnmatch(base, include): + try: + log.debug("include `%s' (matches `%s')", path, include) + except (NameError, AttributeError): + pass + break + else: + try: + log.debug("exclude `%s' (matches no includes)", path) + except (NameError, AttributeError): + pass + return False + for exclude in excludes: + if fnmatch(base, exclude): + try: + log.debug("exclude `%s' (matches `%s')", path, exclude) + except (NameError, AttributeError): + pass + return False + return True + +_NOT_SPECIFIED = ("NOT", "SPECIFIED") +def _paths_from_path_patterns(path_patterns, files=True, dirs="never", + recursive=True, includes=[], excludes=[], + on_error=_NOT_SPECIFIED): + """_paths_from_path_patterns([, ...]) -> file paths + + Generate a list of paths (files and/or dirs) represented by the given path + patterns. + + "path_patterns" is a list of paths optionally using the '*', '?' and + '[seq]' glob patterns. + "files" is boolean (default True) indicating if file paths + should be yielded + "dirs" is string indicating under what conditions dirs are + yielded. It must be one of: + never (default) never yield dirs + always yield all dirs matching given patterns + if-not-recursive only yield dirs for invocations when + recursive=False + See use cases below for more details. + "recursive" is boolean (default True) indicating if paths should + be recursively yielded under given dirs. + "includes" is a list of file patterns to include in recursive + searches. + "excludes" is a list of file and dir patterns to exclude. + (Note: This is slightly different than GNU grep's --exclude + option which only excludes *files*. I.e. you cannot exclude + a ".svn" dir.) + "on_error" is an error callback called when a given path pattern + matches nothing: + on_error(PATH_PATTERN) + If not specified, the default is look for a "log" global and + call: + log.error("`%s': No such file or directory") + Specify None to do nothing. + + Typically this is useful for a command-line tool that takes a list + of paths as arguments. (For Unix-heads: the shell on Windows does + NOT expand glob chars, that is left to the app.) + + Use case #1: like `grep -r` + {files=True, dirs='never', recursive=(if '-r' in opts)} + script FILE # yield FILE, else call on_error(FILE) + script DIR # yield nothing + script PATH* # yield all files matching PATH*; if none, + # call on_error(PATH*) callback + script -r DIR # yield files (not dirs) recursively under DIR + script -r PATH* # yield files matching PATH* and files recursively + # under dirs matching PATH*; if none, call + # on_error(PATH*) callback + + Use case #2: like `file -r` (if it had a recursive option) + {files=True, dirs='if-not-recursive', recursive=(if '-r' in opts)} + script FILE # yield FILE, else call on_error(FILE) + script DIR # yield DIR, else call on_error(DIR) + script PATH* # yield all files and dirs matching PATH*; if none, + # call on_error(PATH*) callback + script -r DIR # yield files (not dirs) recursively under DIR + script -r PATH* # yield files matching PATH* and files recursively + # under dirs matching PATH*; if none, call + # on_error(PATH*) callback + + Use case #3: kind of like `find .` + {files=True, dirs='always', recursive=(if '-r' in opts)} + script FILE # yield FILE, else call on_error(FILE) + script DIR # yield DIR, else call on_error(DIR) + script PATH* # yield all files and dirs matching PATH*; if none, + # call on_error(PATH*) callback + script -r DIR # yield files and dirs recursively under DIR + # (including DIR) + script -r PATH* # yield files and dirs matching PATH* and recursively + # under dirs; if none, call on_error(PATH*) + # callback + """ + from os.path import basename, exists, isdir, join + from glob import glob + + assert not isinstance(path_patterns, basestring), \ + "'path_patterns' must be a sequence, not a string: %r" % path_patterns + GLOB_CHARS = '*?[' + + for path_pattern in path_patterns: + # Determine the set of paths matching this path_pattern. + for glob_char in GLOB_CHARS: + if glob_char in path_pattern: + paths = glob(path_pattern) + break + else: + paths = exists(path_pattern) and [path_pattern] or [] + if not paths: + if on_error is None: + pass + elif on_error is _NOT_SPECIFIED: + try: + log.error("`%s': No such file or directory", path_pattern) + except (NameError, AttributeError): + pass + else: + on_error(path_pattern) + + for path in paths: + if isdir(path): + # 'includes' SHOULD affect whether a dir is yielded. + if (dirs == "always" + or (dirs == "if-not-recursive" and not recursive) + ) and _should_include_path(path, includes, excludes): + yield path + + # However, if recursive, 'includes' should NOT affect + # whether a dir is recursed into. Otherwise you could + # not: + # script -r --include="*.py" DIR + if recursive and _should_include_path(path, [], excludes): + for dirpath, dirnames, filenames in os.walk(path): + dir_indeces_to_remove = [] + for i, dirname in enumerate(dirnames): + d = join(dirpath, dirname) + if dirs == "always" \ + and _should_include_path(d, includes, excludes): + yield d + if not _should_include_path(d, [], excludes): + dir_indeces_to_remove.append(i) + for i in reversed(dir_indeces_to_remove): + del dirnames[i] + if files: + for filename in sorted(filenames): + f = join(dirpath, filename) + if _should_include_path(f, includes, excludes): + yield f + + elif files and _should_include_path(path, includes, excludes): + yield path + +def _setup_command_prefix(): + prefix = "" + if sys.platform == "darwin": + # http://forums.macosxhints.com/archive/index.php/t-43243.html + # This is an Apple customization to `tar` to avoid creating + # '._foo' files for extended-attributes for archived files. + prefix = "COPY_EXTENDED_ATTRIBUTES_DISABLE=1 " + return prefix + + diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/PKG-INFO --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/PKG-INFO Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,27 @@ +Metadata-Version: 1.0 +Name: markdown2 +Version: 1.0.1.17 +Summary: markdown2: A fast and complete Python implementaion of Markdown. +Home-page: http://code.google.com/p/python-markdown2/ +Author: Trent Mick +Author-email: trentm@gmail.com +License: http://www.opensource.org/licenses/mit-license.php +Description: Markdown is a text-to-HTML filter; it translates an easy-to-read / + easy-to-write structured text format into HTML. Markdown's text + format is most similar to that of plain text email, and supports + features such as headers, *emphasis*, code blocks, blockquotes, and + links. -- http://daringfireball.net/projects/markdown/ + + This is a fast and complete Python implementation of the Markdown + spec. + +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Operating System :: OS Independent +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Documentation +Classifier: Topic :: Text Processing :: Filters +Classifier: Topic :: Text Processing :: Markup :: HTML diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/README.txt Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,95 @@ +markdown2 README +================ + +This is a fast and complete Python implementation of Markdown, a text-to-html +markup system as defined here: + + http://daringfireball.net/projects/markdown/syntax + + +Install +------- + +To install it in your Python installation run: + + python setup.py install + +However, everything you need to run this is in "lib/markdown2.py". If it is +easier for you, you can just copy that file to somewhere on your PythonPath +(to use as a module) or executable path (to use as a script). + + +Quick Usage +----------- + +As a module: + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'

boo!

\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'

boo!

\n' + >>> markdowner.convert("**boom!**") + u'

boom!

\n' + +As a script: + + $ python markdown2.py foo.txt > foo.html + +See the project pages, "lib/markdown2.py" docstrings and/or +`python markdown2.py --help` for more details. + + +Project +------- + +The python-markdown2 project lives here (subversion repo, issue tracker, +wiki): + + http://code.google.com/p/python-markdown2/ + +To checkout the full sources: + + svn checkout http://python-markdown2.googlecode.com/svn/trunk/ python-markdown2 + +To report a bug: + + http://code.google.com/p/python-markdown2/issues/list + + +License +------- + +This project is licensed under the MIT License. + +Note that in the subversion repository there are a few files (for the test +suite and performance metrics) that are under different licenses. These files +are *not* included in source packages. See LICENSE.txt for details. + + +Test Suite +---------- + +This markdown implementation passes a fairly extensive test suite. To run it: + + cd test && python test.py + +If you have the [mk](http://svn.openkomodo.com/openkomodo/browse/mk/trunk) +tool installed you can run the test suite with all available Python versions +by running: + + mk test + +The crux of the test suite is a number of "cases" directories -- each with a +set of matching .text (input) and .html (expected output) files. These are: + + tm-cases/ Tests authored for python-markdown2 + markdowntest-cases/ Tests from the 3rd-party MarkdownTest package + php-markdown-cases/ Tests from the 3rd-party MDTest package + php-markdown-extra-cases/ Tests also from MDTest package + +See the wiki page for full details: +http://code.google.com/p/python-markdown2/wiki/TestingNotes + diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/TODO.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/TODO.txt Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,85 @@ +- add "html-classes" extra to wiki +- bug: can't have '<\w+' in a code span or code block with safe_mode if there + is a '>' somewhere later in the document. E.g. code.as.com-beta/CHANGES.md. + It captures all of that. Right answer is to not count code spans or code + blocks. + - add an issue for this + - test cases + - idea: better sanitation re-write? lot of work + - idea: Change all <,>,& emission from markdown processing to something + like {LT}, {GT}, {AMP}, {OPENTAG:$tag[:$class]} (first checking for + conflicts and escaping those out of the way). Then do sanitization at the + end: + escape: escape all <,>,& with entities + remove: not supported + whitelist: (new) build a reasonable default whitelist of patterns to + keep. Takes "extras" argument (and hook for subclassing) to + for custom whitelist. Google Code (was it?) had some list + of reasonable whitelist stuff. + Then unescape these special chars. The use of OPENTAG above would make + "html-classes" extra trivial. + +- fix the r135 xml option, add xml extra for it (see email) +- look at http://code.google.com/p/markdownsharp/ +- add description of pyshell and demote-headers extras to wiki +- to bring up on markdown-discuss: + - the trailing '#' escaping in DoHeaders (provide a patch for this) + - the discussion of backticks and backslash-escapes in code spans: + also bring in python-markdown-discuss on this + - The link for backslash escapes doesn't mention '>', but I believe it + should -- viz Markdown.pl's `%g_escape_table` which *does* include '>'. + TODO: bring this up on markdown-discuss list. +- wiki: add an "Other Markdown implementations page" + http://daringfireball.net/projects/markdown/ + http://www.michelf.com/projects/php-markdown/ + http://www.freewisdom.org/projects/python-markdown/Features +- test safe_mode on HTML in footnotes +- compare setup.py stuff from Yannick to what I have now. Also: + http://gitorious.org/projects/git-python/repos/mainline/trees/master + http://www.python.org/~jeremy/weblog/030924.html +- http://www.freewisdom.org/projects/python-markdown/Available_Extensions +- Extras.wiki desc of code-color option. Not sure I love the ":::name" + markup for the lexer name. +- find more unicode edge cases (look for any usage of md5() and make that + unicode) +- update MDTest 1.1? (see + http://six.pairlist.net/pipermail/markdown-discuss/2007-September/000815.html) + update MDTest tests from http://git.michelf.com/mdtest/ +- I see ref to Markdown.pl 1.0.2 + (http://six.pairlist.net/pipermail/markdown-discuss/2007-August/000756.html) + Update to that? Yes. Copy, at least, in showdown package. +- take a look at other examples/test-cases from + http://adlcommunity.net/help.php?file=advanced_markdown.html +- googlecode site: Why another Python impl? Test info. Usage/Features page. +- get on http://en.wikipedia.org/wiki/Markdown +- ask about remaining two MarkdownTest test failures +- put in recipes site +- perhaps some extras from Maruku and PHP Markdown extra + (http://maruku.rubyforge.org/maruku.html#extra) + - tables (tho I don't really like the syntax, prefer google codes, see + below) + - markdown inside literal HTML (if 'markdown="1|true"' attr) + - automatic toc generation (wanted that anyway, no a fan of maruku syntax + for this) + - weird markup in headers and links (does markdown2.py handle this?) + - meta-data syntax? One example of this is ids for headers. How about + automatically assigning header ids from the name (a la rest)? + - at-the-top email-style headers? + - maruku's footnote links are 'fn:1' and 'fnref:1' for a footnote id of + 'blah'. If this is the PHP Markdown Extras way, then should follow + that. +- googlecode wiki markup ideas? + (http://code.google.com/p/support/wiki/WikiSyntax) + - ~~strikeout~~ + - ||tables||simple||syntax|| +- at bottom has a wish + list: + - simple "cite" for blockquote. How about: + [Zaphod Breeblebrox] + > blah blah + > blah +- do perf comparison with the other Markdown impls (if compare horribly then + do something about it) +- submit a Markdown.py (or .pl?) fix based on revision 1895 (on tm svn) +- see about using html5lib (for speed and/or for better raw HTML handling) +- see about plugins (SmartyPants, others available) diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/lib/markdown2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/lib/markdown2.py Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,2036 @@ +#!/usr/bin/env python +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) + +r"""A fast and complete Python implementation of Markdown. + +[from http://daringfireball.net/projects/markdown/] +> Markdown is a text-to-HTML filter; it translates an easy-to-read / +> easy-to-write structured text format into HTML. Markdown's text +> format is most similar to that of plain text email, and supports +> features such as headers, *emphasis*, code blocks, blockquotes, and +> links. +> +> Markdown's syntax is designed not as a generic markup language, but +> specifically to serve as a front-end to (X)HTML. You can use span-level +> HTML tags anywhere in a Markdown document, and you can use block level +> HTML tags (like
and as well). + +Module usage: + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'

boo!

\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'

boo!

\n' + >>> markdowner.convert("**boom!**") + u'

boom!

\n' + +This implementation of Markdown implements the full "core" syntax plus a +number of extras (e.g., code syntax coloring, footnotes) as described on +. +""" + +cmdln_desc = """A fast and complete Python implementation of Markdown, a +text-to-HTML conversion tool for web writers. + +Supported extras (see -x|--extras option below): +* code-friendly: Disable _ and __ for em and strong. +* code-color: Pygments-based syntax coloring of sections. +* cuddled-lists: Allow lists to be cuddled to the preceding paragraph. +* footnotes: Support footnotes as in use on daringfireball.net and + implemented in other Markdown processors (tho not in Markdown.pl v1.0.1). +* html-classes: Takes a dict mapping html tag names (lowercase) to a + string to use for a "class" tag attribute. Currently only supports + "pre" and "code" tags. Add an issue if you require this for other tags. +* pyshell: Treats unindented Python interactive shell sessions as + blocks. +* link-patterns: Auto-link given regex patterns in text (e.g. bug number + references, revision number references). +* xml: Passes one-liner processing instructions and namespaced XML tags. +""" + +# Dev Notes: +# - There is already a Python markdown processor +# (http://www.freewisdom.org/projects/python-markdown/). +# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm +# not yet sure if there implications with this. Compare 'pydoc sre' +# and 'perldoc perlre'. + +__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl +__version__ = '1.0.1.17' +__author__ = "Trent Mick" + +import os +import sys +from pprint import pprint +import re +import logging +try: + from hashlib import md5 +except ImportError: + from md5 import md5 +import optparse +from random import random, randint +import codecs +from urllib import quote + + + +#---- Python version compat + +if sys.version_info[:2] < (2,4): + from sets import Set as set + def reversed(sequence): + for i in sequence[::-1]: + yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): + return unicode(s, encoding, errors) +else: + def _unicode_decode(s, encoding, errors='strict'): + return s.decode(encoding, errors) + + +#---- globals + +DEBUG = False +log = logging.getLogger("markdown") + +DEFAULT_TAB_WIDTH = 4 + + +try: + import uuid +except ImportError: + SECRET_SALT = str(randint(0, 1000000)) +else: + SECRET_SALT = str(uuid.uuid4()) +def _hash_ascii(s): + #return md5(s).hexdigest() # Markdown.pl effectively does this. + return 'md5-' + md5(SECRET_SALT + s).hexdigest() +def _hash_text(s): + return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest() + +# Table of hash values for escaped characters: +g_escape_table = dict([(ch, _hash_ascii(ch)) + for ch in '\\`*_{}[]()>#+-.!']) + + + +#---- exceptions + +class MarkdownError(Exception): + pass + + + +#---- public api + +def markdown_path(path, encoding="utf-8", + html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + fp = codecs.open(path, 'r', encoding) + text = fp.read() + fp.close() + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +class Markdown(object): + # The dict of "extras" to enable in processing -- a mapping of + # extra name to argument for the extra. Most extras do not have an + # argument, in which case the value is None. + # + # This can be set via (a) subclassing and (b) the constructor + # "extras" argument. + extras = None + + urls = None + titles = None + html_blocks = None + html_spans = None + html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py + + # Used to track when we're inside an ordered or unordered list + # (see _ProcessListItems() for details): + list_level = 0 + + _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) + + def __init__(self, html4tags=False, tab_width=4, safe_mode=None, + extras=None, link_patterns=None, use_file_vars=False): + if html4tags: + self.empty_element_suffix = ">" + else: + self.empty_element_suffix = " />" + self.tab_width = tab_width + + # For compatibility with earlier markdown2.py and with + # markdown.py's safe_mode being a boolean, + # safe_mode == True -> "replace" + if safe_mode is True: + self.safe_mode = "replace" + else: + self.safe_mode = safe_mode + + if self.extras is None: + self.extras = {} + elif not isinstance(self.extras, dict): + self.extras = dict([(e, None) for e in self.extras]) + if extras: + if not isinstance(extras, dict): + extras = dict([(e, None) for e in extras]) + self.extras.update(extras) + assert isinstance(self.extras, dict) + if "toc" in self.extras and not "header-ids" in self.extras: + self.extras["header-ids"] = None # "toc" implies "header-ids" + self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns + self.use_file_vars = use_file_vars + self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + + def reset(self): + self.urls = {} + self.titles = {} + self.html_blocks = {} + self.html_spans = {} + self.list_level = 0 + self.extras = self._instance_extras.copy() + if "footnotes" in self.extras: + self.footnotes = {} + self.footnote_ids = [] + if "header-ids" in self.extras: + self._count_from_header_id = {} # no `defaultdict` in Python 2.4 + + def convert(self, text): + """Convert the given text.""" + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialChars(), so that any *'s or _'s in the + # and tags get encoded. + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + self.reset() + + if not isinstance(text, unicode): + #TODO: perhaps shouldn't presume UTF-8 for string input? + text = unicode(text, 'utf-8') + + if self.use_file_vars: + # Look for emacs-style file variable hints. + emacs_vars = self._get_emacs_vars(text) + if "markdown-extras" in emacs_vars: + splitter = re.compile("[ ,]+") + for e in splitter.split(emacs_vars["markdown-extras"]): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + self.extras[ename] = earg + + # Standardize line endings: + text = re.sub("\r\n|\r", "\n", text) + + # Make sure $text ends with a couple of newlines: + text += "\n\n" + + # Convert all tabs to spaces. + text = self._detab(text) + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + text = self._ws_only_line_re.sub("", text) + + if self.safe_mode: + text = self._hash_html_spans(text) + + # Turn block-level HTML blocks into hash entries + text = self._hash_html_blocks(text, raw=True) + + # Strip link definitions, store in hashes. + if "footnotes" in self.extras: + # Must do footnotes first because an unlucky footnote defn + # looks like a link defn: + # [^4]: this "looks like a link defn" + text = self._strip_footnote_definitions(text) + text = self._strip_link_definitions(text) + + text = self._run_block_gamut(text) + + if "footnotes" in self.extras: + text = self._add_footnotes(text) + + text = self._unescape_special_chars(text) + + if self.safe_mode: + text = self._unhash_html_spans(text) + + text += "\n" + + rv = UnicodeWithAttrs(text) + if "toc" in self.extras: + rv._toc = self._toc + return rv + + _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) + # This regular expression is intended to match blocks like this: + # PREFIX Local Variables: SUFFIX + # PREFIX mode: Tcl SUFFIX + # PREFIX End: SUFFIX + # Some notes: + # - "[ \t]" is used instead of "\s" to specifically exclude newlines + # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does + # not like anything other than Unix-style line terminators. + _emacs_local_vars_pat = re.compile(r"""^ + (?P(?:[^\r\n|\n|\r])*?) + [\ \t]*Local\ Variables:[\ \t]* + (?P.*?)(?:\r\n|\n|\r) + (?P.*?\1End:) + """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) + + def _get_emacs_vars(self, text): + """Return a dictionary of emacs-style local variables. + + Parsing is done loosely according to this spec (and according to + some in-practice deviations from this): + http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables + """ + emacs_vars = {} + SIZE = pow(2, 13) # 8kB + + # Search near the start for a '-*-'-style one-liner of variables. + head = text[:SIZE] + if "-*-" in head: + match = self._emacs_oneliner_vars_pat.search(head) + if match: + emacs_vars_str = match.group(1) + assert '\n' not in emacs_vars_str + emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') + if s.strip()] + if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: + # While not in the spec, this form is allowed by emacs: + # -*- Tcl -*- + # where the implied "variable" is "mode". This form + # is only allowed if there are no other variables. + emacs_vars["mode"] = emacs_var_strs[0].strip() + else: + for emacs_var_str in emacs_var_strs: + try: + variable, value = emacs_var_str.strip().split(':', 1) + except ValueError: + log.debug("emacs variables error: malformed -*- " + "line: %r", emacs_var_str) + continue + # Lowercase the variable name because Emacs allows "Mode" + # or "mode" or "MoDe", etc. + emacs_vars[variable.lower()] = value.strip() + + tail = text[-SIZE:] + if "Local Variables" in tail: + match = self._emacs_local_vars_pat.search(tail) + if match: + prefix = match.group("prefix") + suffix = match.group("suffix") + lines = match.group("content").splitlines(0) + #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ + # % (prefix, suffix, match.group("content"), lines) + + # Validate the Local Variables block: proper prefix and suffix + # usage. + for i, line in enumerate(lines): + if not line.startswith(prefix): + log.debug("emacs variables error: line '%s' " + "does not use proper prefix '%s'" + % (line, prefix)) + return {} + # Don't validate suffix on last line. Emacs doesn't care, + # neither should we. + if i != len(lines)-1 and not line.endswith(suffix): + log.debug("emacs variables error: line '%s' " + "does not use proper suffix '%s'" + % (line, suffix)) + return {} + + # Parse out one emacs var per line. + continued_for = None + for line in lines[:-1]: # no var on the last line ("PREFIX End:") + if prefix: line = line[len(prefix):] # strip prefix + if suffix: line = line[:-len(suffix)] # strip suffix + line = line.strip() + if continued_for: + variable = continued_for + if line.endswith('\\'): + line = line[:-1].rstrip() + else: + continued_for = None + emacs_vars[variable] += ' ' + line + else: + try: + variable, value = line.split(':', 1) + except ValueError: + log.debug("local variables error: missing colon " + "in local variables entry: '%s'" % line) + continue + # Do NOT lowercase the variable name, because Emacs only + # allows "mode" (and not "Mode", "MoDe", etc.) in this block. + value = value.strip() + if value.endswith('\\'): + value = value[:-1].rstrip() + continued_for = variable + else: + continued_for = None + emacs_vars[variable] = value + + # Unquote values. + for var, val in emacs_vars.items(): + if len(val) > 1 and (val.startswith('"') and val.endswith('"') + or val.startswith('"') and val.endswith('"')): + emacs_vars[var] = val[1:-1] + + return emacs_vars + + # Cribbed from a post by Bart Lateur: + # + _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): + g1 = match.group(1) + return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): + r"""Remove (leading?) tabs from a file. + + >>> m = Markdown() + >>> m._detab("\tfoo") + ' foo' + >>> m._detab(" \tfoo") + ' foo' + >>> m._detab("\t foo") + ' foo' + >>> m._detab(" foo") + ' foo' + >>> m._detab(" foo\n\tbar\tblam") + ' foo\n bar blam' + """ + if '\t' not in text: + return text + return self._detab_re.subn(self._detab_sub, text)[0] + + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _strict_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_a, + re.X | re.M) + + _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _liberal_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_b, + re.X | re.M) + + def _hash_html_block_sub(self, match, raw=False): + html = match.group(1) + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + return "\n\n" + key + "\n\n" + + def _hash_html_blocks(self, text, raw=False): + """Hashify HTML blocks + + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap

s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded. + + @param raw {boolean} indicates if these are raw HTML blocks in + the original source. It makes a difference in "safe" mode. + """ + if '<' not in text: + return text + + # Pass `raw` value into our calls to self._hash_html_block_sub. + hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + + # Now match more liberally, simply from `\n` to `\n` + text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) + + # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + if "", start_idx) + 3 + except ValueError, ex: + break + + # Start position for next comment block search. + start = end_idx + + # Validate whitespace before comment. + if start_idx: + # - Up to `tab_width - 1` spaces before start_idx. + for i in range(self.tab_width - 1): + if text[start_idx - 1] != ' ': + break + start_idx -= 1 + if start_idx == 0: + break + # - Must be preceded by 2 newlines or hit the start of + # the document. + if start_idx == 0: + pass + elif start_idx == 1 and text[0] == '\n': + start_idx = 0 # to match minute detail of Markdown.pl regex + elif text[start_idx-2:start_idx] == '\n\n': + pass + else: + break + + # Validate whitespace after comment. + # - Any number of spaces and tabs. + while end_idx < len(text): + if text[end_idx] not in ' \t': + break + end_idx += 1 + # - Must be following by 2 newlines or hit end of text. + if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + continue + + # Escape and hash (must match `_hash_html_block_sub`). + html = text[start_idx:end_idx] + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + + if "xml" in self.extras: + # Treat XML processing instructions and namespaced one-liner + # tags as if they were block HTML tags. E.g., if standalone + # (i.e. are their own paragraph), the following do not get + # wrapped in a

tag: + # + # + # + _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) + text = _xml_oneliner_re.sub(hash_html_block_sub, text) + + return text + + def _strip_link_definitions(self, text): + # Strips link definitions from text, stores the URLs and titles in + # hash references. + less_than_tab = self.tab_width - 1 + + # Link defs are in the form: + # [id]: url "optional title" + _link_def_re = re.compile(r""" + ^[ ]{0,%d}\[(.+)\]: # id = \1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = \2 + [ \t]* + (?: + \n? # maybe one newline + [ \t]* + (?<=\s) # lookbehind for whitespace + ['"(] + ([^\n]*) # title = \3 + ['")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + """ % less_than_tab, re.X | re.M | re.U) + return _link_def_re.sub(self._extract_link_def_sub, text) + + def _extract_link_def_sub(self, match): + id, url, title = match.groups() + key = id.lower() # Link IDs are case-insensitive + self.urls[key] = self._encode_amps_and_angles(url) + if title: + self.titles[key] = title.replace('"', '"') + return "" + + def _extract_footnote_def_sub(self, match): + id, text = match.groups() + text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() + normed_id = re.sub(r'\W', '-', id) + # Ensure footnote text ends with a couple newlines (for some + # block gamut matches). + self.footnotes[normed_id] = text + "\n\n" + return "" + + def _strip_footnote_definitions(self, text): + """A footnote definition looks like this: + + [^note-id]: Text of the note. + + May include one or more indented paragraphs. + + Where, + - The 'note-id' can be pretty much anything, though typically it + is the number of the footnote. + - The first paragraph may start on the next line, like so: + + [^note-id]: + Text of the note. + """ + less_than_tab = self.tab_width - 1 + footnote_def_re = re.compile(r''' + ^[ ]{0,%d}\[\^(.+)\]: # id = \1 + [ \t]* + ( # footnote text = \2 + # First line need not start with the spaces. + (?:\s*.*\n+) + (?: + (?:[ ]{%d} | \t) # Subsequent lines must be indented. + .*\n+ + )* + ) + # Lookahead for non-space at line-start, or end of doc. + (?:(?=^[ ]{0,%d}\S)|\Z) + ''' % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M) + return footnote_def_re.sub(self._extract_footnote_def_sub, text) + + + _hr_res = [ + re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), + ] + + def _run_block_gamut(self, text): + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + + text = self._do_headers(text) + + # Do Horizontal Rules: + hr = "\n tags around block-level tags. + text = self._hash_html_blocks(text) + + text = self._form_paragraphs(text) + + return text + + def _pyshell_block_sub(self, match): + lines = match.group(0).splitlines(0) + _dedentlines(lines) + indent = ' ' * self.tab_width + s = ('\n' # separate from possible cuddled paragraph + + indent + ('\n'+indent).join(lines) + + '\n\n') + return s + + def _prepare_pyshell_blocks(self, text): + """Ensure that Python interactive shell sessions are put in + code blocks -- even if not properly indented. + """ + if ">>>" not in text: + return text + + less_than_tab = self.tab_width - 1 + _pyshell_block_re = re.compile(r""" + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1.*\S+.*\n)* # any number of subsequent lines + ^\n # ends with a blank line + """ % less_than_tab, re.M | re.X) + + return _pyshell_block_re.sub(self._pyshell_block_sub, text) + + def _run_span_gamut(self, text): + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + text = self._do_code_spans(text) + + text = self._escape_special_chars(text) + + # Process anchor and image tags. + text = self._do_links(text) + + # Make links out of things like `` + # Must come after _do_links(), because you can use < and > + # delimiters in inline links like [this](). + text = self._do_auto_links(text) + + if "link-patterns" in self.extras: + text = self._do_link_patterns(text) + + text = self._encode_amps_and_angles(text) + + text = self._do_italics_and_bold(text) + + # Do hard breaks: + text = re.sub(r" {2,}\n", " + | + # auto-link (e.g., ) + <\w+[^>]*> + | + # comment + | + <\?.*?\?> # processing instruction + ) + """, re.X) + + def _escape_special_chars(self, text): + # Python markdown note: the HTML tokenization here differs from + # that in Markdown.pl, hence the behaviour for subtle cases can + # differ (I believe the tokenizer here does a better job because + # it isn't susceptible to unmatched '<' and '>' in HTML tags). + # Note, however, that '>' is not allowed in an auto-link URL + # here. + escaped = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup: + # Within tags/HTML-comments/auto-links, encode * and _ + # so they don't conflict with their use in Markdown for + # italics and strong. We're replacing each such + # character with its corresponding MD5 checksum value; + # this is likely overkill, but it should prevent us from + # colliding with the escape values by accident. + escaped.append(token.replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + else: + escaped.append(self._encode_backslash_escapes(token)) + is_html_markup = not is_html_markup + return ''.join(escaped) + + def _hash_html_spans(self, text): + # Used for safe_mode. + + def _is_auto_link(s): + if ':' in s and self._auto_link_re.match(s): + return True + elif '@' in s and self._auto_email_link_re.match(s): + return True + return False + + tokens = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup and not _is_auto_link(token): + sanitized = self._sanitize_html(token) + key = _hash_text(sanitized) + self.html_spans[key] = sanitized + tokens.append(key) + else: + tokens.append(token) + is_html_markup = not is_html_markup + return ''.join(tokens) + + def _unhash_html_spans(self, text): + for key, sanitized in self.html_spans.items(): + text = text.replace(key, sanitized) + return text + + def _sanitize_html(self, s): + if self.safe_mode == "replace": + return self.html_removed_text + elif self.safe_mode == "escape": + replacements = [ + ('&', '&'), + ('<', '<'), + ('>', '>'), + ] + for before, after in replacements: + s = s.replace(before, after) + return s + else: + raise MarkdownError("invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode) + + _tail_of_inline_link_re = re.compile(r''' + # Match tail of: [text](/url/) or [text](/url/ "title") + \( # literal paren + [ \t]* + (?P # \1 + <.*?> + | + .*? + ) + [ \t]* + ( # \2 + (['"]) # quote char = \3 + (?P.*?) + \3 # matching quote + )? # title is optional + \) + ''', re.X | re.S) + _tail_of_reference_link_re = re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P<id>.*?) + \] + ''', re.X | re.S) + + def _do_links(self, text): + """Turn Markdown link shortcuts into XHTML <a> and <img> tags. + + This is a combination of Markdown.pl's _DoAnchors() and + _DoImages(). They are done together because that simplified the + approach. It was necessary to use a different approach than + Markdown.pl because of the lack of atomic matching support in + Python's regex engine used in $g_nested_brackets. + """ + MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 + + # `anchor_allowed_pos` is used to support img links inside + # anchors, but not anchors inside anchors. An anchor's start + # pos must be `>= anchor_allowed_pos`. + anchor_allowed_pos = 0 + + curr_pos = 0 + while True: # Handle the next link. + # The next '[' is the start of: + # - an inline anchor: [text](url "title") + # - a reference anchor: [text][id] + # - an inline img: ![text](url "title") + # - a reference img: ![text][id] + # - a footnote ref: [^id] + # (Only if 'footnotes' extra enabled) + # - a footnote defn: [^id]: ... + # (Only if 'footnotes' extra enabled) These have already + # been stripped in _strip_footnote_definitions() so no + # need to watch for them. + # - a link definition: [id]: url "title" + # These have already been stripped in + # _strip_link_definitions() so no need to watch for them. + # - not markup: [...anything else... + try: + start_idx = text.index('[', curr_pos) + except ValueError: + break + text_length = len(text) + + # Find the matching closing ']'. + # Markdown.pl allows *matching* brackets in link text so we + # will here too. Markdown.pl *doesn't* currently allow + # matching brackets in img alt text -- we'll differ in that + # regard. + bracket_depth = 0 + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + text_length)): + ch = text[p] + if ch == ']': + bracket_depth -= 1 + if bracket_depth < 0: + break + elif ch == '[': + bracket_depth += 1 + else: + # Closing bracket not found within sentinel length. + # This isn't markup. + curr_pos = start_idx + 1 + continue + link_text = text[start_idx+1:p] + + # Possibly a footnote ref? + if "footnotes" in self.extras and link_text.startswith("^"): + normed_id = re.sub(r'\W', '-', link_text[1:]) + if normed_id in self.footnotes: + self.footnote_ids.append(normed_id) + result = '<sup class="footnote-ref" id="fnref-%s">' \ + '<a href="#fn-%s">%s</a></sup>' \ + % (normed_id, normed_id, len(self.footnote_ids)) + text = text[:start_idx] + result + text[p+1:] + else: + # This id isn't defined, leave the markup alone. + curr_pos = p+1 + continue + + # Now determine what this is by the remainder. + p += 1 + if p == text_length: + return text + + # Inline anchor or img? + if text[p] == '(': # attempt at perf improvement + match = self._tail_of_inline_link_re.match(text, p) + if match: + # Handle an inline anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + + url, title = match.group("url"), match.group("title") + if url and url[0] == '<': + url = url[1:-1] # '<url>' -> 'url' + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + if title: + title_str = ' title="%s"' \ + % title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) \ + .replace('"', '"') + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url.replace('"', '"'), + link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + continue + + # Reference anchor or img? + else: + match = self._tail_of_reference_link_re.match(text, p) + if match: + # Handle a reference-style anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + link_id = match.group("id").lower() + if not link_id: + link_id = link_text.lower() # for links like [this][] + if link_id in self.urls: + url = self.urls[link_id] + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title = self.titles.get(link_id) + if title: + title = title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title_str = ' title="%s"' % title + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url.replace('"', '"'), + link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result = '<a href="%s"%s>%s</a>' \ + % (url, title_str, link_text) + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + else: + # This id isn't defined, leave the markup alone. + curr_pos = match.end() + continue + + # Otherwise, it isn't markup. + curr_pos = start_idx + 1 + + return text + + def header_id_from_text(self, text, prefix): + """Generate a header id attribute value from the given header + HTML content. + + This is only called if the "header-ids" extra is enabled. + Subclasses may override this for different header ids. + """ + header_id = _slugify(text) + if prefix: + header_id = prefix + '-' + header_id + if header_id in self._count_from_header_id: + self._count_from_header_id[header_id] += 1 + header_id += '-%s' % self._count_from_header_id[header_id] + else: + self._count_from_header_id[header_id] = 1 + return header_id + + _toc = None + def _toc_add_entry(self, level, id, name): + if self._toc is None: + self._toc = [] + self._toc.append((level, id, name)) + + _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): + n = {"=": 1, "-": 2}[match.group(2)[0]] + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + header_id_attr = "" + if "header-ids" in self.extras: + header_id = self.header_id_from_text(match.group(1), + prefix=self.extras["header-ids"]) + header_id_attr = ' id="%s"' % header_id + html = self._run_span_gamut(match.group(1)) + if "toc" in self.extras: + self._toc_add_entry(n, header_id, html) + return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n) + + _atx_h_re = re.compile(r''' + ^(\#{1,6}) # \1 = string of #'s + [ \t]* + (.+?) # \2 = Header text + [ \t]* + (?<!\\) # ensure not an escaped trailing '#' + \#* # optional closing #'s (not counted) + \n+ + ''', re.X | re.M) + def _atx_h_sub(self, match): + n = len(match.group(1)) + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + header_id_attr = "" + if "header-ids" in self.extras: + header_id = self.header_id_from_text(match.group(2), + prefix=self.extras["header-ids"]) + header_id_attr = ' id="%s"' % header_id + html = self._run_span_gamut(match.group(2)) + if "toc" in self.extras: + self._toc_add_entry(n, header_id, html) + return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n) + + def _do_headers(self, text): + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + text = self._setext_h_re.sub(self._setext_h_sub, text) + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + text = self._atx_h_re.sub(self._atx_h_sub, text) + + return text + + + _marker_ul_chars = '*+-' + _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars + _marker_ul = '(?:[%s])' % _marker_ul_chars + _marker_ol = r'(?:\d+\.)' + + def _list_sub(self, match): + lst = match.group(1) + lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" + result = self._process_list_items(lst) + if self.list_level: + return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) + else: + return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) + + def _do_lists(self, text): + # Form HTML ordered (numbered) and unordered (bulleted) lists. + + for marker_pat in (self._marker_ul, self._marker_ol): + # Re-usable pattern to match any entire ul or ol list: + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + ) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _process_list_items(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if self.list_level: + sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + text = sub_list_re.sub(self._list_sub, text) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + text = list_re.sub(self._list_sub, text) + + return text + + _list_item_re = re.compile(r''' + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (?P<marker>%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+)) + ''' % (_marker_any, _marker_any), + re.M | re.X | re.S) + + _last_li_endswith_two_eols = False + def _list_item_sub(self, match): + item = match.group(4) + leading_line = match.group(1) + leading_space = match.group(2) + if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: + item = self._run_block_gamut(self._outdent(item)) + else: + # Recursion for sub-lists: + item = self._do_lists(self._outdent(item)) + if item.endswith('\n'): + item = item[:-1] + item = self._run_span_gamut(item) + self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + return "<li>%s</li>\n" % item + + def _process_list_items(self, list_str): + # Process the contents of a single ordered or unordered list, + # splitting it into individual list items. + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + self.list_level += 1 + self._last_li_endswith_two_eols = False + list_str = list_str.rstrip('\n') + '\n' + list_str = self._list_item_re.sub(self._list_item_sub, list_str) + self.list_level -= 1 + return list_str + + def _get_pygments_lexer(self, lexer_name): + try: + from pygments import lexers, util + except ImportError: + return None + try: + return lexers.get_lexer_by_name(lexer_name) + except util.ClassNotFound: + return None + + def _color_with_pygments(self, codeblock, lexer, **formatter_opts): + import pygments + import pygments.formatters + + class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): + def _wrap_code(self, inner): + """A function for use in a Pygments Formatter which + wraps in <code> tags. + """ + yield 0, "<code>" + for tup in inner: + yield tup + yield 0, "</code>" + + def wrap(self, source, outfile): + """Return the source with a code, pre, and div.""" + return self._wrap_div(self._wrap_pre(self._wrap_code(source))) + + formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + return pygments.highlight(codeblock, lexer, formatter) + + def _code_block_sub(self, match): + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace + + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + lexer = self._get_pygments_lexer(lexer_name) + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + if lexer: + formatter_opts = self.extras['code-color'] or {} + colored = self._color_with_pygments(codeblock, lexer, + **formatter_opts) + return "\n\n%s\n\n" % colored + + codeblock = self._encode_code(codeblock) + pre_class_str = self._html_class_str_from_tag("pre") + code_class_str = self._html_class_str_from_tag("code") + return "\n\n<pre%s><code%s>%s\n</code></pre>\n\n" % ( + pre_class_str, code_class_str, codeblock) + + def _html_class_str_from_tag(self, tag): + """Get the appropriate ' class="..."' string (note the leading + space), if any, for the given tag. + """ + if "html-classes" not in self.extras: + return "" + try: + html_classes_from_tag = self.extras["html-classes"] + except TypeError: + return "" + else: + if tag in html_classes_from_tag: + return ' class="%s"' % html_classes_from_tag[tag] + return "" + + def _do_code_blocks(self, text): + """Process Markdown `<pre><code>` blocks.""" + code_block_re = re.compile(r''' + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ''' % (self.tab_width, self.tab_width), + re.M | re.X) + + return code_block_re.sub(self._code_block_sub, text) + + + # Rules for a code span: + # - backslash escapes are not interpreted in a code span + # - to include one or or a run of more backticks the delimiters must + # be a longer run of backticks + # - cannot start or end a code span with a backtick; pad with a + # space and that space will be removed in the emitted HTML + # See `test/tm-cases/escapes.text` for a number of edge-case + # examples. + _code_span_re = re.compile(r''' + (?<!\\) + (`+) # \1 = Opening run of ` + (?!`) # See Note A test/tm-cases/escapes.text + (.+?) # \2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + ''', re.X | re.S) + + def _code_span_sub(self, match): + c = match.group(2).strip(" \t") + c = self._encode_code(c) + return "<code>%s</code>" % c + + def _do_code_spans(self, text): + # * Backtick quotes are used for <code></code> spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type <code>`bar`</code> ... + return self._code_span_re.sub(self._code_span_sub, text) + + def _encode_code(self, text): + """Encode/escape certain characters inside Markdown code runs. + The point is that in code, these characters are literals, + and lose their special Markdown meanings. + """ + replacements = [ + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + ('&', '&'), + # Do the angle bracket song and dance: + ('<', '<'), + ('>', '>'), + # Now, escape characters that are magic in Markdown: + ('*', g_escape_table['*']), + ('_', g_escape_table['_']), + ('{', g_escape_table['{']), + ('}', g_escape_table['}']), + ('[', g_escape_table['[']), + (']', g_escape_table[']']), + ('\\', g_escape_table['\\']), + ] + for before, after in replacements: + text = text.replace(before, after) + return text + + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): + # <strong> must go first: + if "code-friendly" in self.extras: + text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) + else: + text = self._strong_re.sub(r"<strong>\2</strong>", text) + text = self._em_re.sub(r"<em>\2</em>", text) + return text + + + _block_quote_re = re.compile(r''' + ( # Wrap whole match in \1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + ''', re.M | re.X) + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + + _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with <pre> content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "<blockquote>\n%s\n</blockquote>\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wrap <p> tags. + grafs = [] + for i, graf in enumerate(re.split(r"\n{2,}", text)): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs.append(self.html_blocks[graf]) + else: + cuddled_list = None + if "cuddled-lists" in self.extras: + # Need to put back trailing '\n' for `_list_item_re` + # match at the end of the paragraph. + li = self._list_item_re.search(graf + '\n') + # Two of the same list marker in this paragraph: a likely + # candidate for a list cuddled to preceding paragraph + # text (issue 33). Note the `[-1]` is a quick way to + # consider numeric bullets (e.g. "1." and "2.") to be + # equal. + if (li and len(li.group(2)) <= 3 and li.group("next_marker") + and li.group("marker")[-1] == li.group("next_marker")[-1]): + start = li.start() + cuddled_list = self._do_lists(graf[start:]).rstrip("\n") + assert cuddled_list.startswith("<ul>") or cuddled_list.startswith("<ol>") + graf = graf[:start] + + # Wrap <p> tags. + graf = self._run_span_gamut(graf) + grafs.append("<p>" + graf.lstrip(" \t") + "</p>") + + if cuddled_list: + grafs.append(cuddled_list) + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '<div class="footnotes">', + '<hr' + self.empty_element_suffix, + '<ol>', + ] + for i, id in enumerate(self.footnote_ids): + if i != 0: + footer.append('') + footer.append('<li id="fn-%s">' % id) + footer.append(self._run_block_gamut(self.footnotes[id])) + backlink = ('<a href="#fnref-%s" ' + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) + if footer[-1].endswith("</p>"): + footer[-1] = footer[-1][:-len("</p>")] \ + + ' ' + backlink + "</p>" + else: + footer.append("\n<p>%s</p>" % backlink) + footer.append('</li>') + footer.append('</ol>') + footer.append('</div>') + return text + '\n\n' + '\n'.join(footer) + else: + return text + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') + _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + + def _encode_amps_and_angles(self, text): + # Smart processing for ampersands and angle brackets that need + # to be encoded. + text = self._ampersand_re.sub('&', text) + + # Encode naked <'s + text = self._naked_lt_re.sub('<', text) + + # Encode naked >'s + # Note: Other markdown implementations (e.g. Markdown.pl, PHP + # Markdown) don't do this. + text = self._naked_gt_re.sub('>', text) + return text + + def _encode_backslash_escapes(self, text): + for ch, escape in g_escape_table.items(): + text = text.replace("\\"+ch, escape) + return text + + _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): + g1 = match.group(1) + return '<a href="%s">%s</a>' % (g1, g1) + + _auto_email_link_re = re.compile(r""" + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-\w]+(\.[-\w]+)*\.[a-z]+ + ) + > + """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): + return self._encode_email_address( + self._unescape_special_chars(match.group(1))) + + def _do_auto_links(self, text): + text = self._auto_link_re.sub(self._auto_link_sub, text) + text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) + return text + + def _encode_email_address(self, addr): + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # <a href="mailto:foo@e + # xample.com">foo + # @example.com</a> + # + # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: <http://tinyurl.com/yu7ue> + chars = [_xml_encode_email_char_at_random(ch) + for ch in "mailto:" + addr] + # Strip the mailto: from the visible part. + addr = '<a href="%s">%s</a>' \ + % (''.join(chars), ''.join(chars[7:])) + return addr + + def _do_link_patterns(self, text): + """Caveat emptor: there isn't much guarding against link + patterns being formed inside other standard Markdown links, e.g. + inside a [link def][like this]. + + Dev Notes: *Could* consider prefixing regexes with a negative + lookbehind assertion to attempt to guard against this. + """ + link_from_hash = {} + for regex, repl in self.link_patterns: + replacements = [] + for match in regex.finditer(text): + if hasattr(repl, "__call__"): + href = repl(match) + else: + href = match.expand(repl) + replacements.append((match.span(), href)) + for (start, end), href in reversed(replacements): + escaped_href = ( + href.replace('"', '"') # b/c of attr quote + # To avoid markdown <em> and <strong>: + .replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) + hash = _hash_text(link) + link_from_hash[hash] = link + text = text[:start] + hash + text[end:] + for hash, link in link_from_hash.items(): + text = text.replace(hash, link) + return text + + def _unescape_special_chars(self, text): + # Swap back in all the special characters we've hidden. + for ch, hash in g_escape_table.items(): + text = text.replace(hash, ch) + return text + + def _outdent(self, text): + # Remove one level of line-leading tabs or spaces + return self._outdent_re.sub('', text) + + +class MarkdownWithExtras(Markdown): + """A markdowner class that enables most extras: + + - footnotes + - code-color (only has effect if 'pygments' Python module on path) + + These are not included: + - pyshell (specific to Python-related documenting) + - code-friendly (because it *disables* part of the syntax) + - link-patterns (because you need to specify some actual + link-patterns anyway) + """ + extras = ["footnotes", "code-color"] + + +#---- internal support functions + +class UnicodeWithAttrs(unicode): + """A subclass of unicode used for the return value of conversion to + possibly attach some attributes. E.g. the "toc_html" attribute when + the "toc" extra is used. + """ + _toc = None + @property + def toc_html(self): + """Return the HTML for the current TOC. + + This expects the `_toc` attribute to have been set on this instance. + """ + if self._toc is None: + return None + + def indent(): + return ' ' * (len(h_stack) - 1) + lines = [] + h_stack = [0] # stack of header-level numbers + for level, id, name in self._toc: + if level > h_stack[-1]: + lines.append("%s<ul>" % indent()) + h_stack.append(level) + elif level == h_stack[-1]: + lines[-1] += "</li>" + else: + while level < h_stack[-1]: + h_stack.pop() + if not lines[-1].endswith("</li>"): + lines[-1] += "</li>" + lines.append("%s</ul></li>" % indent()) + lines.append(u'%s<li><a href="#%s">%s</a>' % ( + indent(), id, name)) + while len(h_stack) > 1: + h_stack.pop() + if not lines[-1].endswith("</li>"): + lines[-1] += "</li>" + lines.append("%s</ul>" % indent()) + return '\n'.join(lines) + '\n' + + +_slugify_strip_re = re.compile(r'[^\w\s-]') +_slugify_hyphenate_re = re.compile(r'[-\s]+') +def _slugify(value): + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + + From Django's "django/template/defaultfilters.py". + """ + import unicodedata + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = unicode(_slugify_strip_re.sub('', value).strip().lower()) + return _slugify_hyphenate_re.sub('-', value) + +# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 +def _curry(*args, **kwargs): + function, args = args[0], args[1:] + def result(*rest, **kwrest): + combined = kwargs.copy() + combined.update(kwrest) + return function(*args + rest, **combined) + return result + +# Recipe: regex_from_encoded_pattern (1.0) +def _regex_from_encoded_pattern(s): + """'foo' -> re.compile(re.escape('foo')) + '/foo/' -> re.compile('foo') + '/foo/i' -> re.compile('foo', re.I) + """ + if s.startswith('/') and s.rfind('/') != 0: + # Parse it: /PATTERN/FLAGS + idx = s.rfind('/') + pattern, flags_str = s[1:idx], s[idx+1:] + flag_from_char = { + "i": re.IGNORECASE, + "l": re.LOCALE, + "s": re.DOTALL, + "m": re.MULTILINE, + "u": re.UNICODE, + } + flags = 0 + for char in flags_str: + try: + flags |= flag_from_char[char] + except KeyError: + raise ValueError("unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, ''.join(flag_from_char.keys()))) + return re.compile(s[1:idx], flags) + else: # not an encoded regex + return re.compile(re.escape(s)) + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +class _memoized(object): + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + def __init__(self, func): + self.func = func + self.cache = {} + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ + + +def _xml_oneliner_re_from_tab_width(tab_width): + """Standalone XML processing instruction regex.""" + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,%d} + (?: + <\?\w+\b\s+.*?\?> # XML processing instruction + | + <\w+:\w+\b\s+.*?/> # namespaced single tag + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + +def _hr_tag_re_from_tab_width(tab_width): + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in \1 + [ ]{0,%d} + <(hr) # start tag = \2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) + + +def _xml_encode_email_char_at_random(ch): + r = random() + # Roughly 10% raw, 45% hex, 45% dec. + # '@' *must* be encoded. I [John Gruber] insist. + # Issue 26: '_' must be encoded. + if r > 0.9 and ch not in "@_": + return ch + elif r < 0.45: + # The [1:] is to drop leading '0': 0x63 -> x63 + return '&#%s;' % hex(ord(ch))[1:] + else: + return '&#%s;' % ord(ch) + + + +#---- mainline + +class _NoReflowFormatter(optparse.IndentedHelpFormatter): + """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): + return description or "" + +def _test(): + import doctest + doctest.testmod() + +def main(argv=None): + if argv is None: + argv = sys.argv + if not logging.root.handlers: + logging.basicConfig() + + usage = "usage: %prog [PATHS...]" + version = "%prog "+__version__ + parser = optparse.OptionParser(prog="markdown2", usage=usage, + version=version, description=cmdln_desc, + formatter=_NoReflowFormatter()) + parser.add_option("-v", "--verbose", dest="log_level", + action="store_const", const=logging.DEBUG, + help="more verbose output") + parser.add_option("--encoding", + help="specify encoding of text content") + parser.add_option("--html4tags", action="store_true", default=False, + help="use HTML 4 style for empty element tags") + parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", + help="sanitize literal HTML: 'escape' escapes " + "HTML meta chars, 'replace' replaces with an " + "[HTML_REMOVED] note") + parser.add_option("-x", "--extras", action="append", + help="Turn on specific extra features (not part of " + "the core Markdown spec). See above.") + parser.add_option("--use-file-vars", + help="Look for and use Emacs-style 'markdown-extras' " + "file var to turn on extras. See " + "<http://code.google.com/p/python-markdown2/wiki/Extras>.") + parser.add_option("--link-patterns-file", + help="path to a link pattern file") + parser.add_option("--self-test", action="store_true", + help="run internal self-tests (some doctests)") + parser.add_option("--compare", action="store_true", + help="run against Markdown.pl as well (for testing)") + parser.set_defaults(log_level=logging.INFO, compare=False, + encoding="utf-8", safe_mode=None, use_file_vars=False) + opts, paths = parser.parse_args() + log.setLevel(opts.log_level) + + if opts.self_test: + return _test() + + if opts.extras: + extras = {} + for s in opts.extras: + splitter = re.compile("[,;: ]+") + for e in splitter.split(s): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + extras[ename] = earg + else: + extras = None + + if opts.link_patterns_file: + link_patterns = [] + f = open(opts.link_patterns_file) + try: + for i, line in enumerate(f.readlines()): + if not line.strip(): continue + if line.lstrip().startswith("#"): continue + try: + pat, href = line.rstrip().rsplit(None, 1) + except ValueError: + raise MarkdownError("%s:%d: invalid link pattern line: %r" + % (opts.link_patterns_file, i+1, line)) + link_patterns.append( + (_regex_from_encoded_pattern(pat), href)) + finally: + f.close() + else: + link_patterns = None + + from os.path import join, dirname, abspath, exists + markdown_pl = join(dirname(dirname(abspath(__file__))), "test", + "Markdown.pl") + for path in paths: + if opts.compare: + print "==== Markdown.pl ====" + perl_cmd = 'perl %s "%s"' % (markdown_pl, path) + o = os.popen(perl_cmd) + perl_html = o.read() + o.close() + sys.stdout.write(perl_html) + print "==== markdown2.py ====" + html = markdown_path(path, encoding=opts.encoding, + html4tags=opts.html4tags, + safe_mode=opts.safe_mode, + extras=extras, link_patterns=link_patterns, + use_file_vars=opts.use_file_vars) + sys.stdout.write( + html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if extras and "toc" in extras: + log.debug("toc_html: " + + html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if opts.compare: + test_dir = join(dirname(dirname(abspath(__file__))), "test") + if exists(join(test_dir, "test_markdown2.py")): + sys.path.insert(0, test_dir) + from test_markdown2 import norm_html_from_html + norm_html = norm_html_from_html(html) + norm_perl_html = norm_html_from_html(perl_html) + else: + norm_html = html + norm_perl_html = perl_html + print "==== match? %r ====" % (norm_perl_html == norm_html) + + +if __name__ == "__main__": + sys.exit( main(sys.argv) ) + diff -r bc5a004bfcca -r 5101c0cba85d bundled/markdown2/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bundled/markdown2/setup.py Fri Jun 18 22:28:31 2010 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +"""markdown2: A fast and complete Python implementaion of Markdown. + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text +format is most similar to that of plain text email, and supports +features such as headers, *emphasis*, code blocks, blockquotes, and +links. -- http://daringfireball.net/projects/markdown/ + +This is a fast and complete Python implementation of the Markdown +spec. +""" + +import os +import sys +import distutils +from distutils.core import setup + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "lib")) +try: + import markdown2 +finally: + del sys.path[0] + +classifiers = """\ +Development Status :: 5 - Production/Stable +Intended Audience :: Developers +License :: OSI Approved :: MIT License +Programming Language :: Python +Operating System :: OS Independent +Topic :: Software Development :: Libraries :: Python Modules +Topic :: Software Development :: Documentation +Topic :: Text Processing :: Filters +Topic :: Text Processing :: Markup :: HTML +""" + +if sys.version_info < (2, 3): + # Distutils before Python 2.3 doesn't accept classifiers. + _setup = setup + def setup(**kwargs): + if kwargs.has_key("classifiers"): + del kwargs["classifiers"] + _setup(**kwargs) + +doclines = __doc__.split("\n") +script = (sys.platform == "win32" and "lib\\markdown2.py" or "bin/markdown2") + +setup( + name="markdown2", + version=markdown2.__version__, + maintainer="Trent Mick", + maintainer_email="trentm@gmail.com", + author="Trent Mick", + author_email="trentm@gmail.com", + url="http://code.google.com/p/python-markdown2/", + license="http://www.opensource.org/licenses/mit-license.php", + platforms=["any"], + py_modules=["markdown2"], + package_dir={"": "lib"}, + scripts=[script], + description=doclines[0], + classifiers=filter(None, classifiers.split("\n")), + long_description="\n".join(doclines[2:]), +) +