--- /dev/null
+#!/usr/bin/env python
+#
+# python script to process makedoc instructions in a source file and produce
+# DocBook XML output
+#
+
+#
+# This performs 3 stages of processing on it's input, in a similar fashion
+# to makedoc:
+#
+# 1. Discard everything outside of /* */ comments
+# 2. Identify lines which contains commands (a single uppercase word)
+# 3. Apply each command to the text of the following lines (up to the next
+# command or the end of the comment block), to produce some output
+#
+# The resulting output contains one or more DocBook XML refentry elements.
+#
+# To make the output a valid XML document which can be xincluded, those refentry
+# elements are contained by a refcontainer element. refcontainer is not part of
+# the DocBook DTD and should be removed by a suitable XSLT.
+#
+
+from __future__ import print_function
+
+import sys
+import re
+from optparse import OptionParser
+import lxml.etree
+import ply.lex as lex
+import ply.yacc as yacc
+
+rootelement = None # root element of the XML tree
+refentry = None # the current refentry
+verbose = 0
+
+def dump(s, stage, threshold = 1):
+ if verbose > threshold:
+ print('*' * 40, file=sys.stderr)
+ print(stage, file=sys.stderr)
+ print('*' * 40, file=sys.stderr)
+ print('%s' % s, file=sys.stderr)
+ print('*' * 40, file=sys.stderr)
+
+#
+# Stage 1
+#
+
+def skip_whitespace_and_stars(i, src):
+
+ while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i+1] != '/')):
+ i += 1
+
+ return i
+
+# Discard everything not inside '/* */' style-comments which start at column 0
+# Discard any leading blank space or '*'
+# Discard a single leading '.'
+# Discard blank lines after a blank line
+def comment_contents_generator(src):
+ i = 0
+
+ while i < len(src) - 2:
+ if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
+ i = i + 3
+
+ i = skip_whitespace_and_stars(i, src)
+
+ if src[i] == '.':
+ i += 1
+
+ while i < len(src):
+ if src[i] == '\n':
+ yield '\n'
+ i += 1
+
+ # allow a single blank line
+ if i < len(src) and src[i] == '\n':
+ yield '\n'
+ i += 1
+
+ i = skip_whitespace_and_stars(i, src)
+
+ elif src[i] == '*' and src[i+1] == '/':
+ i = i + 2
+ # If we have just output \n\n, this adds another blank line.
+ # This is the only way a double blank line can occur.
+ yield '\nEND\n'
+ break
+ else:
+ yield src[i]
+ i += 1
+ else:
+ i += 1
+
+def remove_noncomments(src):
+ src = '\n' + src
+ dst = ''.join(comment_contents_generator(src))
+ dump(dst, 'extracted from comments')
+
+ return dst
+
+#
+# Stage 2
+#
+
+# A command is a single word of at least 3 characters, all uppercase, and alone on a line
+def iscommand(l):
+ if re.match('^[A-Z_]{3,}\s*$', l):
+
+ return True
+ return False
+
+def command_block_generator(content):
+ command = 'START'
+ text = ''
+
+ for l in content.splitlines():
+ if iscommand(l):
+ yield (command, text)
+ command = l.rstrip()
+ text = ''
+ else:
+ text = text + l + '\n'
+ yield (command, text)
+
+# Look for commands, which give instructions how to process the following input
+def process(content):
+ content = content.lstrip()
+
+ dump(content, 'about to process for commands')
+
+ # process into a list of tuples of commands and the associated following text
+ # it is important to maintain the order of the sections the commands generate
+ processed = list(command_block_generator(content))
+
+ return processed
+
+#
+# Stage 3
+#
+
+# invoke each command on it's text
+def perform(processed):
+ for i in processed:
+ c = i[0].rstrip()
+ t = i[1].strip() + '\n'
+
+ if verbose:
+ print("performing command '%s'" % c, file=sys.stderr)
+
+ if c in command_dispatch_dict:
+ command_dispatch_dict[c](c, t)
+ else:
+ print("command '%s' is not recognized" % c, file=sys.stderr)
+ # the text following an unrecognized command is discarded
+
+# FUNCTION (aka TYPEDEF)
+#
+def function(c, l):
+ global refentry
+ global rootelement
+
+ l = l.strip()
+ if verbose:
+ print('FUNCTION %s' % l, file=sys.stderr)
+
+ separator = '---'
+
+ if ';' in l:
+ # fpclassify has an unusual format we also need to handle
+ spliton = ';'
+ l = l.splitlines()[0]
+ elif len(l.splitlines()) > 1:
+ # a few pages like mktemp have two '---' lines
+ spliton = ';'
+ o = ''
+ for i in l.splitlines():
+ if separator in i:
+ o += i + ';'
+ else:
+ o += i
+ l = o[:-1]
+ else:
+ spliton = '\n'
+
+ namelist = []
+ descrlist = []
+ for a in l.split(spliton):
+ (n, d) = a.split(separator, 1)
+ namelist = namelist + n.split(',')
+ descrlist = descrlist + [d]
+
+ # only copysign and log1p use <[ ]> markup in descr,
+ # only gets() uses << >> markup
+ # but we should handle it correctly
+ descr = line_markup_convert(', '.join(descrlist))
+
+ # fpclassify includes an 'and' we need to discard
+ namelist = map(lambda v: re.sub('^and ', '', v.strip(), 1), namelist)
+ # strip off << >> surrounding name
+ namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
+
+ if verbose:
+ print(namelist, file=sys.stderr)
+ # additional alternate names may also appear in INDEX commands
+
+ # create the root element if needed
+ if rootelement is None:
+ rootelement = lxml.etree.Element('refentrycontainer')
+
+ # FUNCTION implies starting a new refentry
+ if refentry is not None:
+ print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
+ exit(1)
+
+ # create the refentry
+ refentry = lxml.etree.SubElement(rootelement, 'refentry')
+ refentry.append(lxml.etree.Comment(' Generated by makedocbook.py '))
+ refentry.set('id', namelist[0].lstrip('_'))
+
+ refmeta = lxml.etree.SubElement(refentry, 'refmeta')
+ # refentrytitle will be same as refdescriptor, the primary name
+ refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle')
+ refentrytitle.text = namelist[0]
+ manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum')
+ manvolnum.text = '3'
+
+ refnamediv = lxml.etree.SubElement(refentry, 'refnamediv')
+ # refdescriptor is the primary name, assume we should use the one which
+ # appears first in the list
+ refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor')
+ refdescriptor.text = namelist[0]
+ # refname elements exist for all alternate names
+ for n in namelist:
+ refname = lxml.etree.SubElement(refnamediv, 'refname')
+ refname.text = n
+ refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
+ refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
+
+ # Only FUNCTION currently exists, which implies that the SYNOPSIS should be
+ # a funcsynopsis. If TYPEDEF was to be added, SYNOPSIS should be processed
+ # in a different way, probably producing a refsynopsis.
+
+# INDEX
+# may occur more than once for each FUNCTION giving alternate names this
+# function should be indexed under
+#
+def index(c, l):
+ l = l.strip()
+
+ if verbose:
+ print('INDEX %s' % l, file=sys.stderr)
+
+ # discard anything after the first word
+ l = l.split()[0]
+
+ # add indexterm
+ # (we could just index under all the refnames, but we control the indexing
+ # separately as that is what makedoc does)
+ indexterm = lxml.etree.SubElement(refentry, 'indexterm')
+ primary = lxml.etree.SubElement(indexterm, 'primary')
+ primary.text = l
+
+ # to validate, it seems we need to maintain refentry elements in a certain order
+ refentry[:] = sorted(refentry, key = lambda x: x.tag)
+
+ # adds another alternate refname
+ refnamediv = refentry.find('refnamediv')
+
+ # as long as it doesn't already exist
+ if not refnamediv.xpath(('refname[.="%s"]') % l):
+ refname = lxml.etree.SubElement(refnamediv, 'refname')
+ refname.text = l
+ if verbose > 1:
+ print('added refname %s' % l, file=sys.stderr)
+ else:
+ if verbose > 1:
+ print('duplicate refname %s discarded' % l, file=sys.stderr)
+
+ # to validate, it seems we need to maintain refnamediv elements in a certain order
+ refnamediv[:] = sorted(refnamediv, key = lambda x: x.tag)
+
+
+# SYNOPSIS aka ANSI_SYNOPSIS
+# ANSI-style synopsis
+#
+# Note that makedoc would also process <<code>> markup here, but there are no
+# such uses.
+#
+def synopsis(c, t):
+ refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv')
+ funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis')
+
+ s = ''
+ for l in t.splitlines():
+ if re.match('\s*[#[]', l):
+ # a #include, #define etc.
+ # fpclassify contains some comments in [ ] brackets
+ funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
+ funcsynopsisinfo.text = l.strip() + '\n'
+ else:
+ s = s + l
+
+ # a prototype without a terminating ';' is an error
+ if s.endswith(')'):
+ print("'%s' missing terminating semicolon" % l, file=sys.stderr)
+ s = s + ';'
+ exit(1)
+
+ if ';' in s:
+ synopsis_for_prototype(funcsynopsis, s)
+ s = ''
+
+ if s.strip():
+ print("surplus synopsis '%s'" % s, file=sys.stderr)
+ raise
+
+def synopsis_for_prototype(funcsynopsis, s):
+ s = s.strip()
+
+ # funcsynopsis has a very detailed content model, so we need to massage the
+ # bare prototype into it. Fortunately, since the parameter names are marked
+ # up, we have enough information to do this.
+ for fp in s.split(';'):
+ fp = fp.strip()
+ if fp:
+
+ if verbose:
+ print("'%s'" % fp, file=sys.stderr)
+
+ match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
+
+ if verbose:
+ print(match.groups(), file=sys.stderr)
+
+ funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
+ funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
+ funcdef.text = match.group(1)
+ function = lxml.etree.SubElement(funcdef, 'function')
+ function.text = match.group(2)
+
+ if match.group(3).strip() == 'void':
+ void = lxml.etree.SubElement(funcprototype, 'void')
+ else:
+ # Split parameters on ',' except if it is inside ()
+ for p in re.split(',(?![^()]*\))', match.group(3)):
+ p = p.strip()
+
+ if verbose:
+ print(p, file=sys.stderr)
+
+ if p == '...':
+ varargs = lxml.etree.SubElement(funcprototype, 'varargs')
+ else:
+ paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
+ parameter = lxml.etree.SubElement(paramdef, 'parameter')
+
+ # <[ ]> enclose the parameter name
+ match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
+
+ if verbose:
+ print(match2.groups(), file=sys.stderr)
+
+ paramdef.text = match2.group(1)
+ parameter.text = match2.group(2)
+ parameter.tail = match2.group(3)
+
+
+# DESCRIPTION
+# (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES are handled the same)
+#
+# Create a refsect with a title corresponding to the command
+#
+# Nearly all the the existing DESCRIPTION contents could be transformed into
+# DocBook with a few regex substitutions. Unfortunately, pages like sprintf and
+# sscanf, have very complex layout using nested tables and itemized lists, which
+# it is best to parse in order to transform correctly.
+#
+
+def refsect(t, s):
+ refsect = lxml.etree.SubElement(refentry, 'refsect1')
+ title = lxml.etree.SubElement(refsect, 'title')
+ title.text = t.title()
+
+ if verbose:
+ print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
+
+ if verbose > 1:
+ dump(s, 'before lexing')
+
+ # dump out lexer token sequence
+ lex.input(s)
+ for tok in lexer:
+ print(tok, file=sys.stderr)
+
+ # parse the section text for makedoc markup and the few pieces of texinfo
+ # markup we understand, and output an XML marked-up string
+ xml = parser.parse(s, tracking=True, debug=(verbose > 2))
+
+ dump(xml, 'after parsing')
+
+ xml = '<refsect1>' + xml + '</refsect1>'
+
+ refsect.extend(lxml.etree.fromstring(xml))
+
+def seealso(c, t):
+ refsect('SEE ALSO', t)
+
+# NEWPAGE
+#
+# start a new refentry
+
+def newpage(c, t):
+ global refentry
+ refentry = None
+
+# command dispatch table
+
+def discarded(c, t):
+ return
+
+command_dispatch_dict = {
+ 'FUNCTION' : function,
+ 'TYPEDEF' : function, # TYPEDEF is not currently used, but described in doc.str
+ 'INDEX' : index,
+ 'TRAD_SYNOPSIS' : discarded, # K&R-style synopsis, obsolete and discarded
+ 'ANSI_SYNOPSIS' : synopsis,
+ 'SYNOPSIS' : synopsis,
+ 'DESCRIPTION' : refsect,
+ 'RETURNS' : refsect,
+ 'ERRORS' : refsect,
+ 'PORTABILITY' : refsect,
+ 'BUGS' : refsect,
+ 'WARNINGS' : refsect,
+ 'SEEALSO' : seealso,
+ 'NOTES' : refsect, # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
+ 'QUICKREF' : discarded, # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
+ 'MATHREF' : discarded,
+ 'START' : discarded, # a START command is inserted to contain the text before the first command
+ 'END' : discarded, # an END command is inserted merely to terminate the text for the last command in a comment block
+ 'NEWPAGE' : newpage,
+}
+
+#
+# Utility functions
+#
+
+# apply transformations which are easy to do in-place
+def line_markup_convert(p):
+ s = p;
+
+ # process the texinfo escape for an @
+ s = s.replace('@@', '@')
+
+ # escape characters not allowed in XML
+ s = s.replace('&','&')
+ s = s.replace('<','<')
+ s = s.replace('>','>')
+
+ # convert <<somecode>> to <code>somecode</code> and <[var]> to
+ # <varname>var</varname>
+ # also handle nested << <[ ]> >> correctly
+ s = s.replace('<<','<code>')
+ s = s.replace('<[','<varname>')
+ s = s.replace(']>','</varname>')
+ s = s.replace('>>','</code>')
+
+ # also convert some simple texinfo markup
+ # convert @emph{foo} to <emphasis>foo</emphasis>
+ s = re.sub('@emph{(.*?)}', '<emphasis>\\1</emphasis>', s)
+ # convert @minus{} to U+2212 MINUS SIGN
+ s = s.replace('@minus{}', '−')
+ # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS
+ s = s.replace('@dots{}', '…')
+
+ # convert xref and pxref
+ s = re.sub('@xref{(.*?)}', "See <xref linkend='\\1'/>", s)
+
+ # very hacky way of dealing with @* to force a newline
+ s = s.replace('@*', '</para><para>')
+
+ if (verbose > 3) and (s != p):
+ print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
+
+ return s
+
+#
+# lexer
+#
+
+texinfo_commands = {
+ 'ifnottex' : 'IFNOTTEX',
+ 'end ifnottex' : 'ENDIFNOTTEX',
+ 'tex' : 'IFTEX',
+ 'end tex' : 'ENDIFTEX',
+ 'comment' : 'COMMENT',
+ 'c ' : 'COMMENT',
+ 'multitable' : 'MULTICOLUMNTABLE',
+ 'end multitable' : 'ENDMULTICOLUMNTABLE',
+ 'headitem' : 'MCT_HEADITEM',
+ 'tab' : 'MCT_COLUMN_SEPARATOR',
+ 'item' : 'MCT_ITEM',
+ }
+
+# token names
+tokens = [
+ 'BLANKLINE',
+ 'BULLETEND',
+ 'BULLETSTART',
+ 'COURIER',
+ 'EOF',
+ 'ITEM',
+ 'TABLEEND',
+ 'TABLESTART',
+ 'TEXINFO',
+ 'TEXT',
+] + list(set(texinfo_commands.values()))
+
+# regular expression rules for tokens, in priority order
+# (all these expressions should match a whole line)
+def t_TEXINFO(t):
+ # this matches any @command. but not @command{} which just happens to be at
+ # the start of a line
+ r'@\w+[^{]*?\n'
+
+ # if the line starts with a known texinfo command, change t.type to the
+ # token for that command
+ for k in texinfo_commands.keys():
+ if t.value[1:].startswith(k):
+ t.type = texinfo_commands[k]
+ break
+
+ return t
+
+def t_COURIER(t):
+ r'[.|].*\n'
+ t.value = line_markup_convert(t.value[1:])
+ return t
+
+def t_BULLETSTART(t):
+ r'O\+\n'
+ return t
+
+def t_BULLETEND(t):
+ r'O-\n'
+ return t
+
+def t_TABLESTART(t):
+ r'o\+\n'
+ return t
+
+def t_TABLEEND(t):
+ r'o-\n'
+ return t
+
+def t_ITEM(t):
+ r'o\s.*\n'
+ t.value = re.sub('o\s', '', lexer.lexmatch.group(0), 1)
+ t.value = line_markup_convert(t.value)
+ return t
+
+def t_TEXT(t):
+ r'.+\n'
+ t.value = line_markup_convert(t.value)
+ t.lexer.lineno += 1
+ return t
+
+def t_BLANKLINE(t):
+ r'\n'
+ t.lexer.lineno += 1
+ return t
+
+def t_eof(t):
+ if hasattr(t.lexer,'at_eof'):
+ # remove eof flag ready for lexing next input
+ delattr(t.lexer,'at_eof')
+ t.lexer.lineno = 0
+ return None
+
+ t.type = 'EOF'
+ t.lexer.at_eof = True;
+
+ return t
+
+# Error handling rule
+def t_error(t):
+ print("tokenization error, remaining text '%s'" % t.value, file=sys.stderr)
+ exit(1)
+
+lexer = lex.lex()
+
+#
+# parser
+#
+
+def parser_verbose(p):
+ if verbose > 2:
+ print(p[0], file=sys.stderr)
+
+def p_input(p):
+ '''input : paragraph
+ | input paragraph'''
+ if len(p) == 3:
+ p[0] = p[1] + '\n' + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+# Strictly, text at top level should be paragraphs (i.e terminated by a
+# BLANKLINE), while text contained in rows or bullets may not be, but this
+# grammar doesn't enforce that for simplicity's sake.
+def p_paragraph(p):
+ '''paragraph : paragraph_content maybe_eof_or_blankline'''
+ p[0] = '<para>\n' + p[1] + '</para>'
+ parser_verbose(p)
+
+def p_paragraph_content(p):
+ '''paragraph_content : paragraph_line
+ | paragraph_line paragraph_content'''
+ if len(p) == 3:
+ p[0] = p[1] + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_paragraph_line(p):
+ '''paragraph_line : TEXT
+ | texinfocmd
+ | courierblock
+ | table
+ | bulletlist'''
+ p[0] = p[1]
+
+def p_empty(p):
+ 'empty :'
+ p[0] = ''
+
+def p_maybe_eof_or_blankline(p):
+ '''maybe_eof_or_blankline : empty
+ | EOF
+ | BLANKLINE
+ | BLANKLINE EOF'''
+ p[0] = ''
+
+def p_maybe_lines(p):
+ '''maybe_lines : empty
+ | paragraph maybe_lines'''
+ if len(p) == 3:
+ p[0] = p[1] + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_maybe_blankline(p):
+ '''maybe_blankline : empty
+ | BLANKLINE'''
+ p[0] = ''
+
+def p_courierblock(p):
+ '''courierblock : courier'''
+ p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>'
+ parser_verbose(p)
+
+def p_courier(p):
+ '''courier : COURIER
+ | COURIER courier'''
+ if len(p) == 3:
+ p[0] = p[1] + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_bullet(p):
+ '''bullet : ITEM maybe_lines
+ | ITEM BLANKLINE maybe_lines'''
+ if len(p) == 3:
+ # Glue any text in ITEM into the first para of maybe_lines
+ # (This is an unfortunate consequence of the line-based tokenization we do)
+ if p[2].startswith('<para>'):
+ p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
+ else:
+ p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
+ else:
+ p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
+ parser_verbose(p)
+
+def p_bullets(p):
+ '''bullets : bullet
+ | bullet bullets'''
+ if len(p) == 3:
+ p[0] = p[1] + '\n' + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_bulletlist(p):
+ '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline'''
+ p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>'
+ parser_verbose(p)
+
+def p_row(p):
+ '''row : ITEM maybe_lines
+ | ITEM BLANKLINE maybe_lines'''
+ if len(p) == 3:
+ p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
+ else:
+ p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
+ parser_verbose(p)
+
+def p_rows(p):
+ '''rows : row
+ | row rows'''
+ if len(p) == 3:
+ p[0] = p[1] + '\n' + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_table(p):
+ '''table : TABLESTART rows TABLEEND maybe_blankline'''
+ p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>'
+ parser_verbose(p)
+
+def p_texinfocmd(p):
+ '''texinfocmd : unknown_texinfocmd
+ | comment
+ | multitable
+ | nottex
+ | tex'''
+ p[0] = p[1]
+
+def p_unknown_texinfocmd(p):
+ '''unknown_texinfocmd : TEXINFO'''
+ print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr)
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_nottex(p):
+ '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX'''
+ p[0] = p[2]
+
+def p_tex(p):
+ '''tex : IFTEX paragraph_content ENDIFTEX'''
+ # text for TeX formatter inside @iftex is discarded
+ p[0] = ''
+
+def p_comment(p):
+ '''comment : COMMENT'''
+ # comment text is discarded
+ p[0] = ''
+
+def p_mct_columns(p):
+ '''mct_columns : maybe_lines
+ | maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
+ if len(p) == 4:
+ p[0] = '<entry>' + p[1] + '</entry>' + p[3]
+ else:
+ p[0] = '<entry>' + p[1] + '</entry>'
+ parser_verbose(p)
+
+def p_mct_row(p):
+ '''mct_row : MCT_ITEM mct_columns'''
+ p[0] = '<row>' + p[2] + '</row>'
+ parser_verbose(p)
+
+def p_mct_rows(p):
+ '''mct_rows : mct_row
+ | mct_row mct_rows'''
+ if len(p) == 3:
+ p[0] = p[1] + '\n' + p[2]
+ else:
+ p[0] = p[1]
+ parser_verbose(p)
+
+def p_mct_header(p):
+ '''mct_header : MCT_HEADITEM mct_columns'''
+ p[0] = '<row>' + p[2] + '</row>'
+ parser_verbose(p)
+
+def p_multitable(p):
+ '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE'''
+ # this doesn't handle the prototype row form of @multitable, only the @columnfractions form
+ colfrac = p[1].replace('@multitable @columnfractions', '').split()
+ colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac])
+ header = '<thead>' + p[2] + '</thead>\n'
+ body = '<tbody>' + p[3] + '</tbody>\n'
+ p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) +'">' + colspec + header + body + '</tgroup></informaltable>'
+ parser_verbose(p)
+
+def p_error(t):
+ print('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token()), file=sys.stderr)
+ exit(1)
+
+parser = yacc.yacc(start='input')
+
+#
+#
+#
+
+def main(file):
+ content = file.read()
+ content = remove_noncomments(content)
+ processed = process(content)
+ perform(processed)
+
+ # output the XML tree
+ s = lxml.etree.tostring(rootelement, pretty_print=True)
+
+ if not s:
+ print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
+ exit(1)
+
+ print(s)
+
+ # warn about texinfo commands which didn't get processed
+ match = re.search('@[a-z*]+', s)
+ if match:
+ print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
+
+#
+#
+#
+
+if __name__ == '__main__' :
+ options = OptionParser()
+ options.add_option('-v', '--verbose', action='count', dest = 'verbose')
+ (opts, args) = options.parse_args()
+
+ verbose = opts.verbose
+
+ if len(args) > 0:
+ main(open(args[0], 'rb'))
+ else:
+ main(sys.stdin)