#!/usr/bin/env python
# txt2tags - generic text conversion tool
# http://txt2tags.org
#
# Copyright 2001-2010 Aurelio Jargas
#
# License: http://www.gnu.org/licenses/gpl-2.0.txt
# Subversion: http://svn.txt2tags.org
# Bug tracker: http://bugs.txt2tags.org
#
########################################################################
#
# BORING CODE EXPLANATION AHEAD
#
# Just read it if you wish to understand how the txt2tags code works.
#
########################################################################
#
# The code that [1] parses the marked text is separated from the
# code that [2] insert the target tags.
#
# [1] made by: def convert()
# [2] made by: class BlockMaster
#
# The structures of the marked text are identified and its contents are
# extracted into a data holder (Python lists and dictionaries).
#
# When parsing the source file, the blocks (para, lists, quote, table)
# are opened with BlockMaster, right when found. Then its contents,
# which spans on several lines, are feeded into a special holder on the
# BlockMaster instance. Just when the block is closed, the target tags
# are inserted for the full block as a whole, in one pass. This way, we
# have a better control on blocks. Much better than the previous line by
# line approach.
#
# In other words, whenever inside a block, the parser *holds* the tag
# insertion process, waiting until the full block is read. That was
# needed primary to close paragraphs for the XHTML target, but
# proved to be a very good adding, improving many other processing.
#
# -------------------------------------------------------------------
#
# These important classes are all documented:
# CommandLine, SourceDocument, ConfigMaster, ConfigLines.
#
# There is a RAW Config format and all kind of configuration is first
# converted to this format. Then a generic method parses it.
#
# These functions get information about the input file(s) and take
# care of the init processing:
# get_infiles_config(), process_source_file() and convert_this_files()
#
########################################################################
#XXX Python coding warning
# Avoid common mistakes:
# - do NOT use newlist=list instead newlist=list[:]
# - do NOT use newdic=dic instead newdic=dic.copy()
# - do NOT use dic[key] instead dic.get(key)
# - do NOT use del dic[key] without has_key() before
#XXX Smart Image Align don't work if the image is a link
# Can't fix that because the image is expanded together with the
# link, at the linkbank filling moment. Only the image is passed
# to parse_images(), not the full line, so it is always 'middle'.
#XXX Paragraph separation not valid inside Quote
# Quote will not have <p></p> inside, instead will close and open
# again the <blockquote>. This really sux in CSS, when defining a
# different background color. Still don't know how to fix it.
#XXX TODO (maybe)
# New mark or macro which expands to an anchor full title.
# It is necessary to parse the full document in this order:
# DONE 1st scan: HEAD: get all settings, including %!includeconf
# DONE 2nd scan: BODY: expand includes & apply %!preproc
# 3rd scan: BODY: read titles and compose TOC info
# 4th scan: BODY: full parsing, expanding [#anchor] 1st
# Steps 2 and 3 can be made together, with no tag adding.
# Two complete body scans will be *slow*, don't know if it worths.
# One solution may be add the titles as postproc rules
##############################################################################
# User config (1=ON, 0=OFF)
USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
BG_LIGHT = 0 # your terminal background color is light (default is 0)
HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
##############################################################################
# These are all the core Python modules used by txt2tags (KISS!)
import re, os, sys, time, getopt
# The CSV module is new in Python version 2.3
try:
import csv
except ImportError:
csv = None
# Program information
my_url = 'http://txt2tags.org'
my_name = 'txt2tags'
my_email = 'verde@aurelio.net'
my_version = '2.6'
# i18n - just use if available
if USE_I18N:
try:
import gettext
# If your locale dir is different, change it here
cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
_ = cat.gettext
except:
_ = lambda x:x
else:
_ = lambda x:x
# FLAGS : the conversion related flags , may be used in %!options
# OPTIONS : the conversion related options, may be used in %!options
# ACTIONS : the other behavior modifiers, valid on command line only
# MACROS : the valid macros with their default values for formatting
# SETTINGS: global miscellaneous settings, valid on RC file only
# NO_TARGET: actions that don't require a target specification
# NO_MULTI_INPUT: actions that don't accept more than one input file
# CONFIG_KEYWORDS: the valid %!key:val keywords
#
# FLAGS and OPTIONS are configs that affect the converted document.
# They usually have also a --no-<option> to turn them OFF.
#
# ACTIONS are needed because when doing multiple input files, strange
# behavior would be found, as use command line interface for the
# first file and gui for the second. There is no --no-<action>.
# --version and --help inside %!options are also odd
#
TARGETS = 'html xhtml sgml dbk tex lout man mgp wiki gwiki doku pmw moin pm6 txt art adoc creole'.split()
TARGETS.sort()
FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
'quiet' :0 , 'slides' :0 }
OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
'infile' :'', 'outfile' :'', 'encoding' :'',
'config-file':'', 'split' :0 , 'lang' :'',
'width' :0 , 'height' :0 , 'art-chars' :'',
'show-config-value':''}
ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
'verbose' :0 , 'debug' :0 , 'dump-config':0 ,
'dump-source':0 , 'targets' :0}
MACROS = {'date' : '%Y%m%d', 'infile': '%f',
'mtime': '%Y%m%d', 'outfile': '%f'}
SETTINGS = {} # for future use
NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source', 'targets']
NO_MULTI_INPUT = ['gui','dump-config','dump-source']
CONFIG_KEYWORDS = [
'target', 'encoding', 'style', 'options', 'preproc','postproc',
'guicolors']
TARGET_NAMES = {
'html' : _('HTML page'),
'xhtml' : _('XHTML page'),
'sgml' : _('SGML document'),
'dbk' : _('DocBook document'),
'tex' : _('LaTeX document'),
'lout' : _('Lout document'),
'man' : _('UNIX Manual page'),
'mgp' : _('MagicPoint presentation'),
'wiki' : _('Wikipedia page'),
'gwiki' : _('Google Wiki page'),
'doku' : _('DokuWiki page'),
'pmw' : _('PmWiki page'),
'moin' : _('MoinMoin page'),
'pm6' : _('PageMaker document'),
'txt' : _('Plain Text'),
'art' : _('ASCII Art text'),
'adoc' : _('AsciiDoc document'),
'creole' : _('Creole 1.0 document')
}
DEBUG = 0 # do not edit here, please use --debug
VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
QUIET = 0 # do not edit here, please use --quiet
GUI = 0 # do not edit here, please use --gui
AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
DFT_TEXT_WIDTH = 72 # do not edit here, please use --width
DFT_SLIDE_WIDTH = 80 # do not edit here, please use --width
DFT_SLIDE_HEIGHT = 25 # do not edit here, please use --height
# ASCII Art config
AA_KEYS = 'corner border side bar1