summaryrefslogtreecommitdiffstats
path: root/doc/xml2wiki.py
diff options
context:
space:
mode:
Diffstat (limited to 'doc/xml2wiki.py')
-rwxr-xr-xdoc/xml2wiki.py395
1 files changed, 395 insertions, 0 deletions
diff --git a/doc/xml2wiki.py b/doc/xml2wiki.py
new file mode 100755
index 000000000..96651c20f
--- /dev/null
+++ b/doc/xml2wiki.py
@@ -0,0 +1,395 @@
+import io
+import re
+import sys
+import time
+import xml.parsers.expat
+
+from optparse import OptionParser
+
+###############################################################################
+
+writer = None
+
+###############################################################################
+
+class StreamEntry:
+ def __init__( self, object, isElement ):
+ self.object = object
+ self.isElement = isElement
+
+###############################################################################
+
+class Element:
+ def __init__( self, parent, text=True, strip=True, delimBegin=None, delimEnd=None, newline=0 ):
+ self._parent = parent
+ self._text = text
+ self._strip = strip
+ self._delimBegin = delimBegin
+ self._delimEnd = delimEnd
+ self._newline = newline
+ self._stream = []
+
+ def _addElement( self, child ):
+ self._stream.append( StreamEntry( child, True ))
+
+ def _addText( self, text ):
+ if self._text:
+ self._stream.append( StreamEntry( text, False ))
+
+ def _write( self, file ):
+ if self._delimBegin:
+ file.write( self._delimBegin )
+ for entry in self._stream:
+ if entry.isElement:
+ entry.object.write( file )
+ else:
+ file.write( str(entry.object) )
+ if self._delimEnd:
+ file.write( self._delimEnd )
+
+ def write( self, file ):
+ if self._newline > writer.newlineCount:
+ file.write( '\n' * (self._newline - writer.newlineCount))
+ self._write( file )
+
+###############################################################################
+
+class Document( Element ):
+ def __init__( self ):
+ Element.__init__( self, None )
+ self._stack = [ self ]
+ self._pending = self
+ self._summary = None
+ self._debugIndent = ''
+ self._chapterLevel = 0
+ self._sectionLevel = 0
+ self._dividerCount = 0
+
+ #self._pragmaSummary = PragmaElement( self, 'summary' )
+ #self._pragmaLabels = PragmaElement( self, 'labels' )
+ #self._pragmaLabels._addText( 'xml2wiki,Distribution,Featured' )
+
+ def _pop( self ):
+ self._stack.pop()
+ self._pending = self._stack[-1]
+ return self._pending
+
+ def _pushChild( self, child, add=True ):
+ if add:
+ self._pending._addElement( child );
+ self._stack.append( child )
+ self._pending = child
+ return self._pending
+
+ def _chapterBegin( self ):
+ self._chapterLevel = self._chapterLevel + 1
+
+ def _chapterEnd( self ):
+ self._chapterLevel = self._chapterLevel - 1
+
+ def _sectionBegin( self ):
+ self._sectionLevel = self._sectionLevel + 1
+
+ def _sectionEnd( self ):
+ self._sectionLevel = self._sectionLevel - 1
+
+ def _write( self, file ):
+ #self._pragmaSummary.write( file )
+ #file.write( '\n' )
+ #self._pragmaLabels.write( file )
+ if options.date:
+ file.write( "\n\n ===== `[`generated by xml2wiki on %s`]` =====" % (time.strftime( '%c' ) ))
+ #if options.toc:
+ # file.write( '\n\n<wiki:toc max_depth="3" />' )
+ Element._write( self, file )
+ file.write( '\n' )
+
+ def handleElementBegin( self, name, attrs ):
+ self._debugIndent = ' ' * (len(self._stack) - 1)
+ if options.verbose:
+ print( '%sBEGIN %s %s' % (self._debugIndent, name, attrs))
+
+ e = None
+ shouldAdd = True
+
+ if name == 'anchor':
+ global anchor
+ anchor = attrs['name']
+ elif name == 'b':
+ e = Element( self._pending, delimBegin="'''", delimEnd="'''" )
+ elif name == 'chapter':
+ self._chapterBegin()
+ elif name == 'code':
+ e = Element( self._pending, delimBegin='{{{{', delimEnd='}}}' )
+ elif name == 'command':
+ e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
+ elif name == 'enumerate':
+ e = EnumerateElement( self._pending )
+ elif name == 'example':
+ e = CodeElement( self._pending )
+ elif name == 'file':
+ e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
+ elif name == 'i':
+ e = Element( self._pending, delimBegin="''", delimEnd="''" )
+ elif name == 'itemize':
+ e = ItemizeElement( self._pending )
+ elif name == 'item':
+ e = ItemElement( self._pending )
+ #elif name == 'majorheading':
+ # e = self._pragmaSummary
+ # shouldAdd = False
+ elif name == 'para':
+ e = ParagraphElement( self._pending )
+ elif name == 'quotation':
+ e = IndentedElement( self._pending )
+ elif name == 'samp':
+ e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
+ elif name == 'section' or name == 'subsection':
+ self._sectionBegin()
+ #elif name == 'table':
+ # e = Element( self._pending, newline=1, delimBegin='<table border="1" cellpadding="4">', delimEnd='</table>', strip=True )
+ elif name == 'tableitem':
+ e = TableItemElement( self._pending )
+ elif name == 'tableterm':
+ e = Element( self._pending, delimBegin=' ', delimEnd='::\n' )
+ elif name == 'title':
+ e = HeadingElement( self._pending, self._chapterLevel + self._sectionLevel )
+ elif name == 'unnumbered' or name == 'unnumberedsec':
+ self._chapterBegin()
+ elif name == 'uref':
+ e = UrefInline( self._pending )
+ elif name == 'urefdesc':
+ e = UrefDescInline( self._pending )
+ elif name == 'urefurl':
+ e = UrefUrlInline( self._pending )
+ elif name == 'xref':
+ e = XrefInline( self._pending )
+ elif name == 'xrefnodename':
+ e = XrefNodenameInline( self._pending )
+
+ if not e:
+ self._pushChild( UnknownElement( self._pending ) )
+ if options.verbose > 2:
+ print( 'UNKNOWN:', name )
+ else:
+ self._pushChild( e, add=shouldAdd )
+
+ def handleElementEnd( self, name ):
+ if name == 'chapter':
+ self._chapterEnd()
+ elif name == 'section' or name == 'subsection':
+ self._sectionEnd()
+ elif name == 'unnumbered' or name == 'unnumberedsec':
+ self._sectionEnd()
+
+ self._pop()
+ self._debugIndent = ' ' * (len(self._stack) - 1)
+ if options.verbose:
+ print( '%sEND %s' % (self._debugIndent, name))
+
+ def handleCharacterData( self, data ):
+ if options.verbose > 1:
+ print( '%s[%s]' % (self._debugIndent, data.strip()))
+ self._pending._addText( data )
+
+###############################################################################
+
+class UnknownElement( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, text=False )
+
+###############################################################################
+
+class PragmaElement( Element ):
+ def __init__( self, parent, keyword ):
+ Element.__init__( self, parent, delimBegin=('#' + keyword + ' ') )
+
+###############################################################################
+
+class BlockElement( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, newline=2, text=False )
+
+###############################################################################
+
+class CodeElement( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, newline=2, delimBegin='{{{\n', delimEnd='\n}}}\n' )
+
+###############################################################################
+
+class HeadingElement( Element ):
+ def __init__( self, parent, level ):
+ Element.__init__( self, parent, newline=2 )
+ global anchor
+ self._anchor = anchor
+
+ self._level = level
+ self._delimBegin = ('=' * level) + ' '
+ self._delimEnd = ' %s #%s\n' % (('=' * level), self._anchor)
+ #self._delimEnd = ' ' + ('=' * level) + ' #%s\n' % (self._anchor)
+
+ # insert divider for level 1 headers
+ if level == 1:
+ if options.toc or doc._dividerCount:
+ self._delimBegin = '----\n%s' % (self._delimBegin)
+ doc._dividerCount = doc._dividerCount + 1
+
+ toc.append( self )
+
+###############################################################################
+
+class IndentedElement( BlockElement ):
+ def _write( self, file ):
+ writer.increase()
+ Element._write( self, file )
+ writer.decrease()
+
+###############################################################################
+
+class EnumerateElement( IndentedElement ):
+ pass
+
+###############################################################################
+
+class ItemizeElement( IndentedElement ):
+ pass
+
+###############################################################################
+
+class ItemElement( BlockElement ):
+ def __init__( self, parent ):
+ BlockElement.__init__( self, parent )
+ self._newline = 1
+ if isinstance( parent, TableItemElement ):
+ self._newline = 0
+ #self._delimBegin = '<td>'
+ #self._delimEnd = '</td>'
+ self._delimBegin = ' '
+ self._delimEnd = '\n'
+
+###############################################################################
+
+class ParagraphElement( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, newline=2 )
+ if isinstance( parent, ItemElement ):
+ if isinstance( parent._parent, TableItemElement ):
+ self._newline = 0
+ elif isinstance( parent._parent, EnumerateElement ):
+ self._newline = 1
+ self._delimBegin = '# '
+ else:
+ self._newline = 1
+ self._delimBegin = '* '
+
+###############################################################################
+
+class TableItemElement( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, newline=1, text=False )
+ #self._delimBegin = '<tr>'
+ #self._delimEnd = '</tr>'
+
+###############################################################################
+
+class UrefInline( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, text=False, delimBegin='[', delimEnd=']' )
+
+###############################################################################
+
+class UrefDescInline( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, delimBegin=' ' )
+
+###############################################################################
+
+class UrefUrlInline( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent )
+
+###############################################################################
+
+class XrefInline( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent, text=False )
+
+###############################################################################
+
+class XrefNodenameInline( Element ):
+ def __init__( self, parent ):
+ Element.__init__( self, parent )
+
+ def _write( self, file ):
+ buffer = io.StringIO()
+ Element._write( self, buffer )
+ name = str( buffer.getvalue() )
+ anchor = re.sub( ' ', '_', name )
+ file.write( '[#%s %s]' % (anchor, name) )
+
+###############################################################################
+
+class IndentedWriter:
+ def __init__( self, size, file ):
+ self._chunk = ' ' * size
+ self._file = file
+ self._level = 0
+ self._indent = ''
+ self._pending = False
+
+ self.newlineCount = 0
+
+ def decrease( self ):
+ self._level = self._level - 1
+ self._indent = self._chunk * self._level
+
+ def increase( self ):
+ self._level = self._level + 1
+ self._indent = self._chunk * self._level
+
+ def write( self, data ):
+ for b in data:
+ if self._pending:
+ self._pending = False
+ self._file.write( self._indent )
+ if b == '\n':
+ self.newlineCount = self.newlineCount + 1
+ self._pending = True
+ else:
+ self.newlineCount = 0
+ self._file.write( b )
+
+###############################################################################
+
+parser = OptionParser( 'Usage: %prog [OPTIONS] xml' )
+parser.add_option( '-d', '--date', action='store_true', default=False, help='generate date-stamp under title' )
+parser.add_option( '-t', '--toc', action='store_true', default=False, help='generate table of contents' )
+parser.add_option( '-v', '--verbose', action='count', default=False, help='increase verbosity' )
+
+(options, args) = parser.parse_args()
+
+if( len(args) != 1 ):
+ parser.error( 'incorrect number of arguments' )
+
+###############################################################################
+
+doc = Document()
+xml = xml.parsers.expat.ParserCreate()
+
+xml.StartElementHandler = doc.handleElementBegin
+xml.EndElementHandler = doc.handleElementEnd
+xml.CharacterDataHandler = doc.handleCharacterData
+
+anchor = None
+toc = []
+with open( args[0], 'rb' ) as fin:
+ xml.ParseFile( fin )
+
+writer = IndentedWriter( 4, sys.stdout )
+
+if options.toc:
+ for e in toc:
+ writer.write( '%s* [#%s %s]\n' % (' ' * e._level,e._anchor,e._stream[0].object) )
+
+doc.write( writer )