Make newlib manpages via DocBook XML (v3)

author Jon Turney <jon.turney@dronecode.org.uk>

Fri, 24 Jun 2016 20:50:15 +0000 (21:50 +0100)

committer Jon Turney <jon.turney@dronecode.org.uk>

Sat, 25 Jun 2016 16:04:15 +0000 (17:04 +0100)
author Jon Turney <jon.turney@dronecode.org.uk>
Fri, 24 Jun 2016 20:50:15 +0000 (21:50 +0100)
committer Jon Turney <jon.turney@dronecode.org.uk>
Sat, 25 Jun 2016 16:04:15 +0000 (17:04 +0100)
diff --git a/newlib/Makefile.am b/newlib/Makefile.am

index 200bc61bdc590958656d11c75763046c3ea6d652..ee580075e1dd5604fb057ceaee94699d570de7cd 100644 (file)
--- a/newlib/Makefile.am
+++ b/newlib/Makefile.am
@@ -362,6 +362,21 @@ dvi-recursive: doc/makedoc
  doc/makedoc:
         cd doc && $(MAKE) all
  
+# Recursive targets for man and install-man
+man:
+       for d in $(SUBDIRS); do \
+         if test "$$d" != "."; then \
+           (cd $$d && $(MAKE) man) || exit 1; \
+         fi; \
+       done
+
+install-man:
+       for d in $(SUBDIRS); do \
+         if test "$$d" != "."; then \
+           (cd $$d && $(MAKE) install-man) || exit 1; \
+         fi; \
+       done
+
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
  
  # Multilib support.
diff --git a/newlib/Makefile.shared b/newlib/Makefile.shared

index 6e026fdf025046ab17dd66625e29916d45cae83f..c9ce5f468cece7496b7c0ff889ef810371a11337 100644 (file)
--- a/newlib/Makefile.shared
+++ b/newlib/Makefile.shared
@@ -9,7 +9,7 @@ objectlist.awk.in: $(noinst_LTLIBRARIES)
  # documentation rules
  #
  
-SUFFIXES = .def
+SUFFIXES = .def .xml
  
  CHEW = ${top_builddir}/../doc/makedoc -f $(top_srcdir)/../doc/doc.str
  
@@ -25,4 +25,18 @@ doc: $(CHEWOUT_FILES)
           cat $(srcdir)/$$chapter >> $(TARGETDOC) ; \
         done
  
-CLEANFILES = $(CHEWOUT_FILES) *.ref
+DOCBOOK_CHEW = ${top_srcdir}/../doc/makedocbook.py
+DOCBOOK_OUT_FILES = $(CHEWOUT_FILES:.def=.xml)
+DOCBOOK_CHAPTERS = $(CHAPTERS:.tex=.xml)
+
+.c.xml:
+       $(DOCBOOK_CHEW) < $< > $*.xml || ( rm $*.xml && false )
+       @touch stmp-xml
+
+docbook: $(DOCBOOK_OUT_FILES)
+       for chapter in $(DOCBOOK_CHAPTERS) ; \
+       do \
+         ${top_srcdir}/../doc/chapter-texi2docbook.py <$(srcdir)/$${chapter%.xml}.tex >../$$chapter ; \
+       done
+
+CLEANFILES = $(CHEWOUT_FILES) $(CHEWOUT_FILES:.def=.ref) $(DOCBOOK_OUT_FILES)
diff --git a/newlib/doc/.gitignore b/newlib/doc/.gitignore

new file mode 100644 (file)

index 0000000..b6fcb73
--- /dev/null
+++ b/newlib/doc/.gitignore
@@ -0,0 +1,3 @@
+# PLY artefacts
+parser.out
+parsetab.py
diff --git a/newlib/doc/Makefile.am b/newlib/doc/Makefile.am

index c863007803f0ea104f2dc636df5767a1c715c8fa..99afb41f1f049e17d84159c3752dee6439653b71 100644 (file)
--- a/newlib/doc/Makefile.am
+++ b/newlib/doc/Makefile.am
@@ -19,3 +19,5 @@ makedoc.o: makedoc.c
  
  ACLOCAL_AMFLAGS = -I .. -I ../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
+
+man:
diff --git a/newlib/doc/chapter-texi2docbook.py b/newlib/doc/chapter-texi2docbook.py

new file mode 100755 (executable)

index 0000000..eb606dc
--- /dev/null
+++ b/newlib/doc/chapter-texi2docbook.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+#
+# python script to convert the handwritten chapter .texi files, which include
+# the generated files for each function, to DocBook XML
+#
+# all we care about is the content of the refentries, so all this needs to do is
+# convert the @include of the makedoc generated .def files to xi:include of the
+# makedocbook generated .xml files.
+#
+
+from __future__ import print_function
+import sys
+import re
+
+def main():
+    first_node = True
+
+    print ('<?xml version="1.0" encoding="UTF-8"?>')
+    print ('<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">')
+
+    for l in sys.stdin.readlines():
+       l = l.rstrip()
+
+       # transform @file{foo} to <filename>foo</filename>
+       l = re.sub("@file{(.*?)}", "<filename>\\1</filename>", l)
+
+       if l.startswith("@node"):
+           l = l.replace("@node", "", 1)
+           l = l.strip()
+           l = l.lower()
+           if first_node:
+               print ('<chapter id="%s" xmlns:xi="http://www.w3.org/2001/XInclude">' % l.replace(' ', '_'))
+               first_node = False
+       elif l.startswith("@chapter "):
+           l = l.replace("@chapter ", "", 1)
+           print ('<title>%s</title>' % l)
+       elif l.startswith("@include "):
+           l = l.replace("@include ", "", 1)
+           l = l.replace(".def", ".xml", 1)
+           print ('<xi:include href="%s"/>' % l.strip())
+
+    print ('</chapter>')
+
+if __name__ == "__main__" :
+    main()
diff --git a/newlib/doc/makedocbook.py b/newlib/doc/makedocbook.py

new file mode 100755 (executable)

index 0000000..1b4f5ab
--- /dev/null
+++ b/newlib/doc/makedocbook.py
@@ -0,0 +1,834 @@
+#!/usr/bin/env python
+#
+# python script to process makedoc instructions in a source file and produce
+# DocBook XML output
+#
+
+#
+# This performs 3 stages of processing on it's input, in a similar fashion
+# to makedoc:
+#
+# 1. Discard everything outside of /*  */ comments
+# 2. Identify lines which contains commands (a single uppercase word)
+# 3. Apply each command to the text of the following lines (up to the next
+#    command or the end of the comment block), to produce some output
+#
+# The resulting output contains one or more DocBook XML refentry elements.
+#
+# To make the output a valid XML document which can be xincluded, those refentry
+# elements are contained by a refcontainer element.  refcontainer is not part of
+# the DocBook DTD and should be removed by a suitable XSLT.
+#
+
+from __future__ import print_function
+
+import sys
+import re
+from optparse import OptionParser
+import lxml.etree
+import ply.lex as lex
+import ply.yacc as yacc
+
+rootelement = None # root element of the XML tree
+refentry = None # the current refentry
+verbose = 0
+
+def dump(s, stage, threshold = 1):
+    if verbose > threshold:
+       print('*' * 40, file=sys.stderr)
+       print(stage, file=sys.stderr)
+       print('*' * 40, file=sys.stderr)
+       print('%s' % s, file=sys.stderr)
+       print('*' * 40, file=sys.stderr)
+
+#
+# Stage 1
+#
+
+def skip_whitespace_and_stars(i, src):
+
+    while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i+1] != '/')):
+       i += 1
+
+    return i
+
+# Discard everything not inside '/*  */' style-comments which start at column 0
+# Discard any leading blank space or '*'
+# Discard a single leading '.'
+# Discard blank lines after a blank line
+def comment_contents_generator(src):
+    i = 0
+
+    while i < len(src) - 2:
+       if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
+           i = i + 3
+
+           i = skip_whitespace_and_stars(i, src)
+
+           if src[i] == '.':
+               i += 1
+
+           while i < len(src):
+               if src[i] == '\n':
+                   yield '\n'
+                   i += 1
+
+                   # allow a single blank line
+                   if i < len(src) and src[i] == '\n':
+                       yield '\n'
+                       i += 1
+
+                   i = skip_whitespace_and_stars(i, src)
+
+               elif src[i] == '*' and src[i+1] == '/':
+                   i = i + 2
+                   # If we have just output \n\n, this adds another blank line.
+                   # This is the only way a double blank line can occur.
+                   yield '\nEND\n'
+                   break
+               else:
+                   yield src[i]
+                   i += 1
+       else:
+           i += 1
+
+def remove_noncomments(src):
+    src = '\n' + src
+    dst = ''.join(comment_contents_generator(src))
+    dump(dst, 'extracted from comments')
+
+    return dst
+
+#
+# Stage 2
+#
+
+# A command is a single word of at least 3 characters, all uppercase, and alone on a line
+def iscommand(l):
+    if re.match('^[A-Z_]{3,}\s*$', l):
+
+       return True
+    return False
+
+def command_block_generator(content):
+    command = 'START'
+    text = ''
+
+    for l in content.splitlines():
+       if iscommand(l):
+           yield (command, text)
+           command = l.rstrip()
+           text = ''
+       else:
+           text = text + l + '\n'
+    yield (command, text)
+
+# Look for commands, which give instructions how to process the following input
+def process(content):
+    content = content.lstrip()
+
+    dump(content, 'about to process for commands')
+
+    # process into a list of tuples of commands and the associated following text
+    # it is important to maintain the order of the sections the commands generate
+    processed = list(command_block_generator(content))
+
+    return processed
+
+#
+# Stage 3
+#
+
+#  invoke each command on it's text
+def perform(processed):
+    for i in processed:
+       c = i[0].rstrip()
+       t = i[1].strip() + '\n'
+
+       if verbose:
+           print("performing command '%s'" % c, file=sys.stderr)
+
+       if c in command_dispatch_dict:
+           command_dispatch_dict[c](c, t)
+       else:
+           print("command '%s' is not recognized" % c, file=sys.stderr)
+           # the text following an unrecognized command is discarded
+
+# FUNCTION (aka TYPEDEF)
+#
+def function(c, l):
+    global refentry
+    global rootelement
+
+    l = l.strip()
+    if verbose:
+       print('FUNCTION %s' % l, file=sys.stderr)
+
+    separator = '---'
+
+    if ';' in l:
+       # fpclassify has an unusual format we also need to handle
+       spliton = ';'
+       l = l.splitlines()[0]
+    elif len(l.splitlines()) > 1:
+       # a few pages like mktemp have two '---' lines
+       spliton = ';'
+       o = ''
+       for i in l.splitlines():
+            if separator in i:
+                o += i + ';'
+            else:
+                o += i
+       l = o[:-1]
+    else:
+       spliton = '\n'
+
+    namelist = []
+    descrlist = []
+    for a in l.split(spliton):
+       (n, d) = a.split(separator, 1)
+       namelist = namelist + n.split(',')
+       descrlist = descrlist + [d]
+
+    # only copysign and log1p use <[ ]> markup in descr,
+    # only gets() uses << >> markup
+    # but we should handle it correctly
+    descr = line_markup_convert(', '.join(descrlist))
+
+    # fpclassify includes an 'and' we need to discard
+    namelist = map(lambda v: re.sub('^and ', '', v.strip(), 1), namelist)
+    # strip off << >> surrounding name
+    namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
+
+    if verbose:
+       print(namelist, file=sys.stderr)
+    # additional alternate names may also appear in INDEX commands
+
+    # create the root element if needed
+    if rootelement is None:
+       rootelement = lxml.etree.Element('refentrycontainer')
+
+    # FUNCTION implies starting a new refentry
+    if refentry is not None:
+       print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
+       exit(1)
+
+    # create the refentry
+    refentry = lxml.etree.SubElement(rootelement, 'refentry')
+    refentry.append(lxml.etree.Comment(' Generated by makedocbook.py '))
+    refentry.set('id', namelist[0].lstrip('_'))
+
+    refmeta = lxml.etree.SubElement(refentry, 'refmeta')
+    # refentrytitle will be same as refdescriptor, the primary name
+    refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle')
+    refentrytitle.text = namelist[0]
+    manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum')
+    manvolnum.text = '3'
+
+    refnamediv = lxml.etree.SubElement(refentry, 'refnamediv')
+    # refdescriptor is the primary name, assume we should use the one which
+    # appears first in the list
+    refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor')
+    refdescriptor.text = namelist[0]
+    # refname elements exist for all alternate names
+    for n in namelist:
+       refname = lxml.etree.SubElement(refnamediv, 'refname')
+       refname.text = n
+    refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
+    refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
+
+    # Only FUNCTION currently exists, which implies that the SYNOPSIS should be
+    # a funcsynopsis.  If TYPEDEF was to be added, SYNOPSIS should be processed
+    # in a different way, probably producing a refsynopsis.
+
+# INDEX
+# may occur more than once for each FUNCTION giving alternate names this
+# function should be indexed under
+#
+def index(c, l):
+    l = l.strip()
+
+    if verbose:
+       print('INDEX %s' % l, file=sys.stderr)
+
+    # discard anything after the first word
+    l = l.split()[0]
+
+    # add indexterm
+    # (we could just index under all the refnames, but we control the indexing
+    # separately as that is what makedoc does)
+    indexterm = lxml.etree.SubElement(refentry, 'indexterm')
+    primary = lxml.etree.SubElement(indexterm, 'primary')
+    primary.text = l
+
+    # to validate, it seems we need to maintain refentry elements in a certain order
+    refentry[:] = sorted(refentry, key = lambda x: x.tag)
+
+    # adds another alternate refname
+    refnamediv = refentry.find('refnamediv')
+
+    # as long as it doesn't already exist
+    if not refnamediv.xpath(('refname[.="%s"]') % l):
+       refname = lxml.etree.SubElement(refnamediv, 'refname')
+       refname.text = l
+       if verbose > 1:
+           print('added refname %s' % l, file=sys.stderr)
+    else:
+       if verbose > 1:
+           print('duplicate refname %s discarded' % l, file=sys.stderr)
+
+    # to validate, it seems we need to maintain refnamediv elements in a certain order
+    refnamediv[:] = sorted(refnamediv, key = lambda x: x.tag)
+
+
+# SYNOPSIS aka ANSI_SYNOPSIS
+# ANSI-style synopsis
+#
+# Note that makedoc would also process <<code>> markup here, but there are no
+# such uses.
+#
+def synopsis(c, t):
+    refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv')
+    funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis')
+
+    s = ''
+    for l in t.splitlines():
+       if re.match('\s*[#[]', l):
+           # a #include, #define etc.
+           # fpclassify contains some comments in [ ] brackets
+           funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
+           funcsynopsisinfo.text = l.strip() + '\n'
+       else:
+           s = s + l
+
+           # a prototype without a terminating ';' is an error
+           if s.endswith(')'):
+               print("'%s' missing terminating semicolon" % l, file=sys.stderr)
+               s = s + ';'
+               exit(1)
+
+           if ';' in s:
+               synopsis_for_prototype(funcsynopsis, s)
+               s = ''
+
+    if s.strip():
+       print("surplus synopsis '%s'" % s, file=sys.stderr)
+       raise
+
+def synopsis_for_prototype(funcsynopsis, s):
+    s = s.strip()
+
+    # funcsynopsis has a very detailed content model, so we need to massage the
+    # bare prototype into it.  Fortunately, since the parameter names are marked
+    # up, we have enough information to do this.
+    for fp in s.split(';'):
+       fp = fp.strip()
+       if fp:
+
+           if verbose:
+               print("'%s'" % fp, file=sys.stderr)
+
+           match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
+
+           if verbose:
+               print(match.groups(), file=sys.stderr)
+
+           funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
+           funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
+           funcdef.text = match.group(1)
+           function = lxml.etree.SubElement(funcdef, 'function')
+           function.text = match.group(2)
+
+           if match.group(3).strip() == 'void':
+               void = lxml.etree.SubElement(funcprototype, 'void')
+           else:
+               # Split parameters on ',' except if it is inside ()
+               for p in re.split(',(?![^()]*\))', match.group(3)):
+                   p = p.strip()
+
+                   if verbose:
+                       print(p, file=sys.stderr)
+
+                   if p == '...':
+                       varargs = lxml.etree.SubElement(funcprototype, 'varargs')
+                   else:
+                       paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
+                       parameter = lxml.etree.SubElement(paramdef, 'parameter')
+
+                       # <[ ]> enclose the parameter name
+                       match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
+
+                       if verbose:
+                           print(match2.groups(), file=sys.stderr)
+
+                       paramdef.text = match2.group(1)
+                       parameter.text = match2.group(2)
+                       parameter.tail = match2.group(3)
+
+
+# DESCRIPTION
+# (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES  are handled the same)
+#
+# Create a refsect with a title corresponding to the command
+#
+# Nearly all the the existing DESCRIPTION contents could be transformed into
+# DocBook with a few regex substitutions.  Unfortunately, pages like sprintf and
+# sscanf, have very complex layout using nested tables and itemized lists, which
+# it is best to parse in order to transform correctly.
+#
+
+def refsect(t, s):
+    refsect = lxml.etree.SubElement(refentry, 'refsect1')
+    title = lxml.etree.SubElement(refsect, 'title')
+    title.text = t.title()
+
+    if verbose:
+       print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
+
+    if verbose > 1:
+       dump(s, 'before lexing')
+
+       # dump out lexer token sequence
+       lex.input(s)
+       for tok in lexer:
+           print(tok, file=sys.stderr)
+
+    # parse the section text for makedoc markup and the few pieces of texinfo
+    # markup we understand, and output an XML marked-up string
+    xml = parser.parse(s, tracking=True, debug=(verbose > 2))
+
+    dump(xml, 'after parsing')
+
+    xml = '<refsect1>' + xml + '</refsect1>'
+
+    refsect.extend(lxml.etree.fromstring(xml))
+
+def seealso(c, t):
+    refsect('SEE ALSO', t)
+
+# NEWPAGE
+#
+# start a new refentry
+
+def newpage(c, t):
+    global refentry
+    refentry = None
+
+# command dispatch table
+
+def discarded(c, t):
+    return
+
+command_dispatch_dict = {
+    'FUNCTION'         : function,
+    'TYPEDEF'          : function,     # TYPEDEF is not currently used, but described in doc.str
+    'INDEX'            : index,
+    'TRAD_SYNOPSIS'    : discarded,    # K&R-style synopsis, obsolete and discarded
+    'ANSI_SYNOPSIS'    : synopsis,
+    'SYNOPSIS'         : synopsis,
+    'DESCRIPTION'      : refsect,
+    'RETURNS'          : refsect,
+    'ERRORS'           : refsect,
+    'PORTABILITY'      : refsect,
+    'BUGS'             : refsect,
+    'WARNINGS'         : refsect,
+    'SEEALSO'          : seealso,
+    'NOTES'            : refsect,      # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
+    'QUICKREF'         : discarded,    # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
+    'MATHREF'          : discarded,
+    'START'            : discarded,    # a START command is inserted to contain the text before the first command
+    'END'              : discarded,    # an END command is inserted merely to terminate the text for the last command in a comment block
+    'NEWPAGE'          : newpage,
+}
+
+#
+# Utility functions
+#
+
+# apply transformations which are easy to do in-place
+def line_markup_convert(p):
+    s = p;
+
+    # process the texinfo escape for an @
+    s = s.replace('@@', '@')
+
+    # escape characters not allowed in XML
+    s = s.replace('&','&amp;')
+    s = s.replace('<','&lt;')
+    s = s.replace('>','&gt;')
+
+    # convert <<somecode>> to <code>somecode</code> and <[var]> to
+    # <varname>var</varname>
+    # also handle nested << <[ ]> >> correctly
+    s = s.replace('&lt;&lt;','<code>')
+    s = s.replace('&lt;[','<varname>')
+    s = s.replace(']&gt;','</varname>')
+    s = s.replace('&gt;&gt;','</code>')
+
+    # also convert some simple texinfo markup
+    # convert @emph{foo} to <emphasis>foo</emphasis>
+    s = re.sub('@emph{(.*?)}', '<emphasis>\\1</emphasis>', s)
+    # convert @minus{} to U+2212 MINUS SIGN
+    s = s.replace('@minus{}', '&#x2212;')
+    # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS
+    s = s.replace('@dots{}', '&#x2026;')
+
+    # convert xref and pxref
+    s = re.sub('@xref{(.*?)}', "See <xref linkend='\\1'/>", s)
+
+    # very hacky way of dealing with @* to force a newline
+    s = s.replace('@*', '</para><para>')
+
+    if (verbose > 3) and (s != p):
+       print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
+
+    return s
+
+#
+# lexer
+#
+
+texinfo_commands = {
+    'ifnottex' : 'IFNOTTEX',
+    'end ifnottex' : 'ENDIFNOTTEX',
+    'tex' : 'IFTEX',
+    'end tex' : 'ENDIFTEX',
+    'comment' : 'COMMENT',
+    'c ' : 'COMMENT',
+    'multitable' : 'MULTICOLUMNTABLE',
+    'end multitable' : 'ENDMULTICOLUMNTABLE',
+    'headitem' : 'MCT_HEADITEM',
+    'tab' : 'MCT_COLUMN_SEPARATOR',
+    'item' : 'MCT_ITEM',
+    }
+
+# token names
+tokens = [
+    'BLANKLINE',
+    'BULLETEND',
+    'BULLETSTART',
+    'COURIER',
+    'EOF',
+    'ITEM',
+    'TABLEEND',
+    'TABLESTART',
+    'TEXINFO',
+    'TEXT',
+] + list(set(texinfo_commands.values()))
+
+# regular expression rules for tokens, in priority order
+# (all these expressions should match a whole line)
+def t_TEXINFO(t):
+    # this matches any @command. but not @command{} which just happens to be at
+    # the start of a line
+    r'@\w+[^{]*?\n'
+
+    # if the line starts with a known texinfo command, change t.type to the
+    # token for that command
+    for k in texinfo_commands.keys():
+       if t.value[1:].startswith(k):
+           t.type = texinfo_commands[k]
+           break
+
+    return t
+
+def t_COURIER(t):
+    r'[.|].*\n'
+    t.value = line_markup_convert(t.value[1:])
+    return t
+
+def t_BULLETSTART(t):
+    r'O\+\n'
+    return t
+
+def t_BULLETEND(t):
+    r'O-\n'
+    return t
+
+def t_TABLESTART(t):
+    r'o\+\n'
+    return t
+
+def t_TABLEEND(t):
+    r'o-\n'
+    return t
+
+def t_ITEM(t):
+    r'o\s.*\n'
+    t.value = re.sub('o\s', '', lexer.lexmatch.group(0), 1)
+    t.value = line_markup_convert(t.value)
+    return t
+
+def t_TEXT(t):
+    r'.+\n'
+    t.value = line_markup_convert(t.value)
+    t.lexer.lineno += 1
+    return t
+
+def t_BLANKLINE(t):
+    r'\n'
+    t.lexer.lineno += 1
+    return t
+
+def t_eof(t):
+    if hasattr(t.lexer,'at_eof'):
+       # remove eof flag ready for lexing next input
+       delattr(t.lexer,'at_eof')
+       t.lexer.lineno = 0
+       return None
+
+    t.type = 'EOF'
+    t.lexer.at_eof = True;
+
+    return t
+
+# Error handling rule
+def t_error(t):
+    print("tokenization error, remaining text '%s'" % t.value, file=sys.stderr)
+    exit(1)
+
+lexer = lex.lex()
+
+#
+# parser
+#
+
+def parser_verbose(p):
+    if verbose > 2:
+       print(p[0], file=sys.stderr)
+
+def p_input(p):
+    '''input : paragraph
+             | input paragraph'''
+    if len(p) == 3:
+       p[0] = p[1] + '\n' + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+# Strictly, text at top level should be paragraphs (i.e terminated by a
+# BLANKLINE), while text contained in rows or bullets may not be, but this
+# grammar doesn't enforce that for simplicity's sake.
+def p_paragraph(p):
+    '''paragraph : paragraph_content maybe_eof_or_blankline'''
+    p[0] = '<para>\n' + p[1] + '</para>'
+    parser_verbose(p)
+
+def p_paragraph_content(p):
+    '''paragraph_content : paragraph_line
+                         | paragraph_line paragraph_content'''
+    if len(p) == 3:
+       p[0] = p[1] + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_paragraph_line(p):
+    '''paragraph_line : TEXT
+                      | texinfocmd
+                      | courierblock
+                      | table
+                      | bulletlist'''
+    p[0] = p[1]
+
+def p_empty(p):
+    'empty :'
+    p[0] = ''
+
+def p_maybe_eof_or_blankline(p):
+    '''maybe_eof_or_blankline : empty
+                              | EOF
+                              | BLANKLINE
+                              | BLANKLINE EOF'''
+    p[0] = ''
+
+def p_maybe_lines(p):
+    '''maybe_lines : empty
+                   | paragraph maybe_lines'''
+    if len(p) == 3:
+       p[0] = p[1] + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_maybe_blankline(p):
+    '''maybe_blankline : empty
+                       | BLANKLINE'''
+    p[0] = ''
+
+def p_courierblock(p):
+    '''courierblock : courier'''
+    p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>'
+    parser_verbose(p)
+
+def p_courier(p):
+    '''courier : COURIER
+               | COURIER courier'''
+    if len(p) == 3:
+       p[0] = p[1] + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_bullet(p):
+    '''bullet : ITEM maybe_lines
+              | ITEM BLANKLINE maybe_lines'''
+    if len(p) == 3:
+       # Glue any text in ITEM into the first para of maybe_lines
+       # (This is an unfortunate consequence of the line-based tokenization we do)
+       if p[2].startswith('<para>'):
+           p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
+       else:
+           p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
+    else:
+       p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
+    parser_verbose(p)
+
+def p_bullets(p):
+    '''bullets : bullet
+               | bullet bullets'''
+    if len(p) == 3:
+       p[0] = p[1] + '\n' + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_bulletlist(p):
+    '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline'''
+    p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>'
+    parser_verbose(p)
+
+def p_row(p):
+    '''row : ITEM maybe_lines
+           | ITEM BLANKLINE maybe_lines'''
+    if len(p) == 3:
+       p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
+    else:
+       p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
+    parser_verbose(p)
+
+def p_rows(p):
+    '''rows : row
+            | row rows'''
+    if len(p) == 3:
+       p[0] = p[1] + '\n' + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_table(p):
+    '''table : TABLESTART rows TABLEEND maybe_blankline'''
+    p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>'
+    parser_verbose(p)
+
+def p_texinfocmd(p):
+    '''texinfocmd : unknown_texinfocmd
+                  | comment
+                  | multitable
+                  | nottex
+                  | tex'''
+    p[0] = p[1]
+
+def p_unknown_texinfocmd(p):
+    '''unknown_texinfocmd : TEXINFO'''
+    print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr)
+    p[0] = p[1]
+    parser_verbose(p)
+
+def p_nottex(p):
+    '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX'''
+    p[0] = p[2]
+
+def p_tex(p):
+    '''tex : IFTEX paragraph_content ENDIFTEX'''
+    # text for TeX formatter inside @iftex is discarded
+    p[0] = ''
+
+def p_comment(p):
+    '''comment : COMMENT'''
+    # comment text is discarded
+    p[0] = ''
+
+def p_mct_columns(p):
+    '''mct_columns : maybe_lines
+                   | maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
+    if len(p) == 4:
+       p[0] = '<entry>' + p[1] + '</entry>' + p[3]
+    else:
+       p[0] = '<entry>' + p[1] + '</entry>'
+    parser_verbose(p)
+
+def p_mct_row(p):
+    '''mct_row : MCT_ITEM mct_columns'''
+    p[0] = '<row>' + p[2] + '</row>'
+    parser_verbose(p)
+
+def p_mct_rows(p):
+    '''mct_rows : mct_row
+                | mct_row mct_rows'''
+    if len(p) == 3:
+       p[0] = p[1] + '\n' + p[2]
+    else:
+       p[0] = p[1]
+    parser_verbose(p)
+
+def p_mct_header(p):
+    '''mct_header : MCT_HEADITEM mct_columns'''
+    p[0] = '<row>' + p[2] + '</row>'
+    parser_verbose(p)
+
+def p_multitable(p):
+    '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE'''
+    # this doesn't handle the prototype row form of @multitable, only the @columnfractions form
+    colfrac = p[1].replace('@multitable @columnfractions', '').split()
+    colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac])
+    header = '<thead>' + p[2] + '</thead>\n'
+    body = '<tbody>' + p[3] + '</tbody>\n'
+    p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) +'">' + colspec + header + body  + '</tgroup></informaltable>'
+    parser_verbose(p)
+
+def p_error(t):
+    print('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token()), file=sys.stderr)
+    exit(1)
+
+parser = yacc.yacc(start='input')
+
+#
+#
+#
+
+def main(file):
+    content = file.read()
+    content = remove_noncomments(content)
+    processed = process(content)
+    perform(processed)
+
+    # output the XML tree
+    s = lxml.etree.tostring(rootelement, pretty_print=True)
+
+    if not s:
+       print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
+       exit(1)
+
+    print(s)
+
+    # warn about texinfo commands which didn't get processed
+    match = re.search('@[a-z*]+', s)
+    if match:
+       print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
+
+#
+#
+#
+
+if __name__ == '__main__' :
+    options = OptionParser()
+    options.add_option('-v', '--verbose', action='count', dest = 'verbose')
+    (opts, args) = options.parse_args()
+
+    verbose = opts.verbose
+
+    if len(args) > 0:
+       main(open(args[0], 'rb'))
+    else:
+       main(sys.stdin)
diff --git a/newlib/libc/Makefile.am b/newlib/libc/Makefile.am

index 21a74fe78a9455bf54800e6b74e28f35c19f195f..6e97bca5230f1cea77d7f3f5287ed1201f8244d5 100644 (file)
--- a/newlib/libc/Makefile.am
+++ b/newlib/libc/Makefile.am
@@ -142,6 +142,9 @@ SUBDEFS = \
         $(LIBC_EXTRA_DEF) \
         misc/stmp-def
  
+# ditto for stmp-xml files in each subdirectory which builds .xml files
+SUBXMLS = $(SUBDEFS:stmp-def=stmp-xml)
+
  libc.info: sigset.texi extra.texi stdio64.texi posix.texi iconvset.texi \
         targetdep.tex $(SUBDEFS)
  
@@ -223,6 +226,23 @@ info_TEXINFOS = libc.texinfo
  libc_TEXINFOS = sigset.texi extra.texi posix.texi stdio64.texi iconvset.texi \
         targetdep.tex $(SUBDEFS)
  
+docbook-recursive: force
+       for d in $(SUBDIRS); do \
+         if test "$$d" != "."; then \
+           (cd $$d && $(MAKE) docbook) || exit 1; \
+         fi; \
+       done
+
+$(SUBXMLS): docbook-recursive
+
+man: $(SUBXMLS) libc.in.xml
+       xsltproc --xinclude --path ${builddir} --nonet ${srcdir}/../refcontainers.xslt ${srcdir}/libc.in.xml >libc.xml
+       xmlto --skip-validation man -m ${srcdir}/../man.xsl libc.xml
+
+install-man: man
+       mkdir -p $(DESTDIR)$(mandir)/man3
+       $(INSTALL_DATA) *.3 $(DESTDIR)$(mandir)/man3
+
  .PHONY: force
  force:
  
@@ -230,7 +250,8 @@ CLEANFILES = $(CRT0) \
         sigset.texi stmp-sigset extra.texi stmp-extra \
         stdio64.texi stmp-stdio64 targetdep.tex stmp-targetdep \
         tmp-sigset.texi tmp-iconvset.texi tmp-extra.texi \
-       tmp-stdio64.texi tmp-posix.texi tmp-targetdep.texi
+       tmp-stdio64.texi tmp-posix.texi tmp-targetdep.texi \
+       *.xml *.3
  
  ACLOCAL_AMFLAGS = -I .. -I ../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
diff --git a/newlib/libc/iconv/Makefile.am b/newlib/libc/iconv/Makefile.am

index e7ffefa8d6504b682180b0d2f112e50a0efe500d..c4199247af44152ba44b1372d21249bf5925d2fe 100644 (file)
--- a/newlib/libc/iconv/Makefile.am
+++ b/newlib/libc/iconv/Makefile.am
@@ -18,6 +18,9 @@ CHAPTERS = iconv.tex
  iconv.def: lib/iconv.def
         cp lib/iconv.def iconv.def
  
+iconv.xml: lib/iconv.xml
+       cp lib/iconv.xml iconv.xml
+
  stmp-def: force
         (cd lib && $(MAKE) doc)
         touch $@
diff --git a/newlib/libc/libc.in.xml b/newlib/libc/libc.in.xml

new file mode 100644 (file)

index 0000000..9726961
--- /dev/null
+++ b/newlib/libc/libc.in.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book id="libc" xmlns:xi="http://www.w3.org/2001/XInclude">
+  <bookinfo>
+    <productname>newlib</productname>
+  </bookinfo>
+
+  <xi:include href="stdlib.xml"/>
+  <xi:include href="ctype.xml"/>
+  <xi:include href="stdio.xml"/>
+  <!-- stdio64 is optional -->
+  <xi:include href="stdio64.xml">
+    <xi:fallback/>
+  </xi:include>
+
+  <xi:include href="strings.xml"/>
+  <xi:include href="wcstrings.xml"/>
+  <!-- signals is optional -->
+  <xi:include href="signal.xml">
+    <xi:fallback/>
+  </xi:include>
+
+  <xi:include href="time.xml"/>
+  <xi:include href="locale.xml"/>
+  <!-- reent.tex contains fixed content and nothing seems to include the .def made in reent/ -->
+
+  <xi:include href="misc.xml"/>
+  <!-- posix is optional -->
+  <xi:include href="posix.xml">
+      <xi:fallback/>
+  </xi:include>
+  <!-- XXX: stdarg.h and vararg.h are directly described in libc.texinfo -->
+
+  <!-- iconv is optional -->
+  <xi:include href="iconv.xml">
+      <xi:fallback/>
+  </xi:include>
+
+  <!-- processing should insert index here -->
+  <index/>
+
+</book>
diff --git a/newlib/libc/machine/Makefile.am b/newlib/libc/machine/Makefile.am

index 147af840b540797f74f6c7024d1985d935ac9cd7..831c0c36f44b4a338ae7525914e4265135444def 100644 (file)
--- a/newlib/libc/machine/Makefile.am
+++ b/newlib/libc/machine/Makefile.am
@@ -22,5 +22,7 @@ $(machine_dir)/lib.a:
  
  doc:
  
+docbook:
+
  ACLOCAL_AMFLAGS = -I ../.. -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
diff --git a/newlib/libc/sys/Makefile.am b/newlib/libc/sys/Makefile.am

index b6106a6cbf141d966d8007ca34e95c8a224ecb76..71de72cf19eaaff91b839bc5b4c1709bfda4f34d 100644 (file)
--- a/newlib/libc/sys/Makefile.am
+++ b/newlib/libc/sys/Makefile.am
@@ -33,5 +33,7 @@ CLEANFILES = $(CRT0)
  
  doc:
  
+docbook:
+
  ACLOCAL_AMFLAGS = -I ../.. -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
diff --git a/newlib/libm/Makefile.am b/newlib/libm/Makefile.am

index bc458165f9a809db99fd625c5d19ef5df121abbb..8bc2c2cfe11bb4d826fcba52dd5d743547a7e4e7 100644 (file)
--- a/newlib/libm/Makefile.am
+++ b/newlib/libm/Makefile.am
@@ -58,10 +58,27 @@ math/stmp-def: stmp-targetdep ; @true
  
  complex/stmp-def: stmp-targetdep ; @true
  
+docbook-recursive: force
+       for d in $(SUBDIRS); do \
+         if test "$$d" != "."; then \
+           (cd $$d && $(MAKE) docbook) || exit 1; \
+         fi; \
+       done
+
+math/stmp-xml complex/stmp-xml: docbook-recursive
+
+man: math/stmp-xml complex/stmp-xml libm.in.xml
+       xsltproc --xinclude --path ${builddir} --nonet ${srcdir}/../refcontainers.xslt ${srcdir}/libm.in.xml >libm.xml
+       xmlto --skip-validation --searchpath ${builddir} man -m ${srcdir}/../man.xsl libm.xml
+
+install-man: man
+       mkdir -p $(DESTDIR)$(mandir)/man3
+       $(INSTALL_DATA) *.3 $(DESTDIR)$(mandir)/man3/
+
  .PHONY: force
  force:
  
-CLEANFILES = tmp.texi targetdep.tex stmp-targetdep
+CLEANFILES = tmp.texi targetdep.tex stmp-targetdep *.xml *.3
  
  ACLOCAL_AMFLAGS = -I .. -I ../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
diff --git a/newlib/libm/libm.in.xml b/newlib/libm/libm.in.xml

new file mode 100644 (file)

index 0000000..dea8a0f
--- /dev/null
+++ b/newlib/libm/libm.in.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book id="libm" xmlns:xi="http://www.w3.org/2001/XInclude">
+  <bookinfo>
+    <productname>newlib</productname>
+  </bookinfo>
+
+  <xi:include href="complex.xml"/>
+  <xi:include href="math.xml"/>
+
+  <!-- processing should insert index here -->
+  <index/>
+
+</book>
diff --git a/newlib/libm/machine/Makefile.am b/newlib/libm/machine/Makefile.am

index a0881ec30f9d4529171d92d7b4900fc07186238b..d67175b102dab936f5223d1ac9c330cedb0272b1 100644 (file)
--- a/newlib/libm/machine/Makefile.am
+++ b/newlib/libm/machine/Makefile.am
@@ -22,5 +22,7 @@ $(libm_machine_dir)/lib.a:
  
  doc:
  
+docbook:
+
  ACLOCAL_AMFLAGS = -I ../.. -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
diff --git a/newlib/man.xsl b/newlib/man.xsl

new file mode 100644 (file)

index 0000000..a0e2736
--- /dev/null
+++ b/newlib/man.xsl
@@ -0,0 +1,13 @@
+<?xml version='1.0'?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'>
+
+<!-- don't truncate long manual names -->
+<xsl:param name="man.th.extra3.max.length" select="45" />
+
+<!-- don't moan about missing metadata -->
+<xsl:param name="refentry.meta.get.quietly" select="1" />
+
+<!-- generate ansi rather than k&r style function synopses -->
+<xsl:param name="funcsynopsis.style" select="ansi" />
+
+</xsl:stylesheet>
diff --git a/newlib/refcontainers.xslt b/newlib/refcontainers.xslt

new file mode 100644 (file)

index 0000000..85396f6
--- /dev/null
+++ b/newlib/refcontainers.xslt
@@ -0,0 +1,14 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <!-- trivial XSLT which removes the refentrycontainer layer -->
+ <xsl:output method="xml" encoding="UTF-8" indent="yes" doctype-public="-//OASIS//DTD DocBook V4.5//EN" doctype-system="http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" />
+ <xsl:strip-space elements="*"/>
+
+ <!-- Whenever you match any node but refentrycontainer or any attribute -->
+ <xsl:template match="node()[not(self::refentrycontainer)]|@*">
+ <!-- Copy the current node -->
+  <xsl:copy>
+    <!-- Including any attributes it has and any child nodes -->
+   <xsl:apply-templates select="node()|@*"/>
+  </xsl:copy>
+ </xsl:template>
+</xsl:stylesheet>
author	Jon Turney <jon.turney@dronecode.org.uk>
	Fri, 24 Jun 2016 20:50:15 +0000 (21:50 +0100)
committer	Jon Turney <jon.turney@dronecode.org.uk>
	Sat, 25 Jun 2016 16:04:15 +0000 (17:04 +0100)
newlib/Makefile.am		patch \| blob \| blame \| history
newlib/Makefile.shared		patch \| blob \| blame \| history
newlib/doc/.gitignore	[new file with mode: 0644]	patch \| blob
newlib/doc/Makefile.am		patch \| blob \| blame \| history
newlib/doc/chapter-texi2docbook.py	[new file with mode: 0755]	patch \| blob
newlib/doc/makedocbook.py	[new file with mode: 0755]	patch \| blob
newlib/libc/Makefile.am		patch \| blob \| blame \| history
newlib/libc/iconv/Makefile.am		patch \| blob \| blame \| history
newlib/libc/libc.in.xml	[new file with mode: 0644]	patch \| blob
newlib/libc/machine/Makefile.am		patch \| blob \| blame \| history
newlib/libc/sys/Makefile.am		patch \| blob \| blame \| history
newlib/libm/Makefile.am		patch \| blob \| blame \| history
newlib/libm/libm.in.xml	[new file with mode: 0644]	patch \| blob
newlib/libm/machine/Makefile.am		patch \| blob \| blame \| history
newlib/man.xsl	[new file with mode: 0644]	patch \| blob
newlib/refcontainers.xslt	[new file with mode: 0644]	patch \| blob