#!/usr/bin/env python # -*- coding: iso8859-1 *-* """highlighter """ import optparse import os import re import sys import time from StringIO import StringIO __program_name__ = "highlighter" __author__ = "$Author: fuller $" __version__ = 0 # Source: $Id: highlighter.nw 26 2008-11-12 09:56:35Z fuller $ class Highlighter(object): """Use listings package to highlight code chunks.""" def __init__(self): # default mode is documentation self.__codeMode = False self.__outputBuffer = StringIO() self.__codeBuffer = StringIO() self.__beginDocumentFound = False # patterns self.__beginDoc = re.compile("^\\\\begin\{document\}") self.__beginCode = re.compile("\\\\nwbegincode\{\d+\}") self.__endCode = re.compile("\\\\nwendcode\{\}") self.__refLine = re.compile("\\\\LA\{\}") self.__codeName = re.compile("\\\\moddef\{") self.__rBracket = re.compile("\}") self.__plusSign = re.compile("\\\\plusendmoddef") def parseOptions(self): usage = "usage: %prog [options]" parser = optparse.OptionParser(usage=usage, version="%s %s" % (__program_name__, __version__)) parser.add_option("-l", "--language", metavar="LANG", dest="language", default="Python", help="programming language") parser.add_option("-e", "--encoding", metavar="ENCODING", dest="encoding", default="latin_1", help="input encoding") parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="print status messages") (options, args) = parser.parse_args() self.__verbose = options.verbose self.__language = options.language self.__encoding = options.encoding def run(self, fd): """Read lines from file descriptor""" self.__message("Running highlighter...") for raw_line in fd: try: line = raw_line.decode(self.__encoding) except LookupError: sys.stderr.write("%s: Unknown encoding\n" % self.__encoding) sys.stderr.flush() sys.exit(1) # check mode if not self.__beginDocumentFound and self.__beginDoc.search(line): self.__beginDocument(line) elif self.__beginCode.search(line): self.__setCodeMode(line) elif self.__endCode.search(line): self.__setDocMode(line) elif self.__refLine.search(line): self.__refChunkLine(line) else: # collect all lines in output buffer self.__collect(line) # remove escaped code chunks buffer = self.__outputBuffer.getvalue().encode(self.__encoding) return buffer def __beginDocument(self, line): tmp = r"""\usepackage{marvosym} \definecolor{mblue}{rgb}{0,0,0.5} \definecolor{mgreen}{rgb}{0,0.5,0} \definecolor{mgray}{gray}{0.25} \usepackage{listings} \lstloadlanguages{%s} \lstset{language=%s, extendedchars=true breaklines=true, breakatwhitespace=true, prebreak=\scriptsize\Righttorque, postbreak=\scriptsize\Lefttorque, basicstyle=\small\ttfamily, identifierstyle=\color{mgray}, commentstyle=\color[rgb]{0,0.5,0}, keywordstyle=\color[rgb]{0,0.2,0.5}\textbf, stringstyle=\color[rgb]{0.5,0,0}\textbf, frame=single, tabsize=2, columns=[l]flexible, tab=\rightarrow, xleftmargin=10pt, showtabs=true, morecomment=[s][\color{mgreen}]{\"\"\"}{\"\"\"}, morecomment=[s][\color{mblue}\textit]{<<}{\>\>}}""" % \ (self.__language, self.__language) self.__outputBuffer.write(tmp) self.__outputBuffer.write(line) self.__beginDocumentFound = True def __setCodeMode(self, line): self.__message("> Set code mode") # search for code chunk name match = self.__codeName.search(line) if match: self.__outputBuffer.write("\n\\textlangle") startPos = match.end() line = line[startPos:] # look for closing } endPos = self.__rBracket.search(line).end() # replace unallowed chars for latex before writing self.__outputBuffer.write("\\textit{%s}" % line[0:endPos-1].replace("_", "{\\_}")) self.__outputBuffer.write("\\textrangle") # + needed if \plusendmoddef plusMatch = self.__plusSign.search(line) if plusMatch: self.__outputBuffer.write("+") self.__outputBuffer.write("$\equiv$\n") else: self.__outputBuffer.write(line) self.__codeMode = True def __setDocMode(self, line): self.__message("> Set doc mode") self.__highlight() # clean code buffer self.__codeBuffer = StringIO() self.__codeMode = False # end def __refChunkLine(self, line): self.__message("> Got single doc line") line = re.sub(r"\\LA{}", "<<", line) line = re.sub(r"\\RA{}", ">>", line) self.__codeBuffer.write(line) # end def __collect(self, line): self.__message(" %s" % line) if self.__codeMode: line = line.replace("\{", "{").replace("\}", "}").replace('\\\\', '\\') self.__codeBuffer.write(line) else: self.__outputBuffer.write(line) # fi # end def __highlight(self): self.__message("> Set lstlisting and code") self.__outputBuffer.write("\\begin{lstlisting}\n") self.__outputBuffer.write(self.__codeBuffer.getvalue()) self.__outputBuffer.write("\\end{lstlisting}\n") # end def __message(self, message): if self.__verbose: msg = message.encode(self.__encoding) sys.stderr.write(msg.strip()) sys.stderr.write("\n") sys.stderr.flush() # fi # end def main(args): highlighter = Highlighter() highlighter.parseOptions() sys.stdout.write(highlighter.run(sys.stdin)) if __name__ == "__main__": main(sys.argv)