From 5ea78be3a56758c605d2877832f6a12559c06b1a Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 30 Jul 2002 18:49:52 +0000 Subject: [PATCH] * src/tools/docmaker/*: adding new (more advanced) version of the DocMaker tool. Python with regular expressions rocks.. --- ChangeLog | 3 + src/tools/docmaker/content.py | 547 ++++++++++++++++++++++++++++++++ src/tools/docmaker/docmaker.py | 120 +++++++ src/tools/docmaker/formatter.py | 194 +++++++++++ src/tools/docmaker/sources.py | 355 +++++++++++++++++++++ src/tools/docmaker/tohtml.py | 475 +++++++++++++++++++++++++++ src/tools/docmaker/utils.py | 86 +++++ 7 files changed, 1780 insertions(+) create mode 100644 src/tools/docmaker/content.py create mode 100644 src/tools/docmaker/docmaker.py create mode 100644 src/tools/docmaker/formatter.py create mode 100644 src/tools/docmaker/sources.py create mode 100644 src/tools/docmaker/tohtml.py create mode 100644 src/tools/docmaker/utils.py diff --git a/ChangeLog b/ChangeLog index c4654f94e..3ab8883a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,9 @@ to demonstrate a "cleaner" API to support incremental font loading. comments appreciated... + * src/tools/docmaker/*: adding new (more advanced) version of + the DocMaker tool. Python with regular expressions rocks.. + 2002-07-28 Werner Lemberg s/ft_memset/FT_MEM_SET/. diff --git a/src/tools/docmaker/content.py b/src/tools/docmaker/content.py new file mode 100644 index 000000000..52f7c1153 --- /dev/null +++ b/src/tools/docmaker/content.py @@ -0,0 +1,547 @@ +# +# this file contains routines used to parse the content of documentation +# comment block and build a more structured objects out of them +# + +from sources import * +from utils import * +import string, re + + +# this regular expresion is used to detect code sequences. these +# are simply code fragments embedded in '{' and '}' like in: +# +# { +# x = y + z; +# if ( zookoo == 2 ) +# { +# foobar(); +# } +# } +# +# note that identation of the starting and ending accolades must be +# exactly the same. the code sequence can contain accolades at greater +# indentation +# +re_code_start = re.compile( r"(\s*){\s*$" ) +re_code_end = re.compile( r"(\s*)}\s*$" ) + + +# this regular expression is used to isolate identifiers from +# other text +# +re_identifier = re.compile( r'(\w*)' ) + + +############################################################################# +# +# The DocCode class is used to store source code lines. +# +# 'self.lines' contains a set of source code lines that will be dumped as +# HTML in a
 tag.
+#
+#   The object is filled line by line by the parser; it strips the leading
+#   "margin" space from each input line before storing it in 'self.lines'.
+#
+class DocCode:
+
+    def __init__( self, margin, lines ):
+        self.lines  = []
+        self.words  = None
+
+        # remove margin spaces
+        for l in lines:
+            if string.strip( l[:margin] ) == "":
+                l = l[margin:]
+            self.lines.append( l )
+
+    def dump( self, prefix = "", width=60 ):
+        for l in self.lines:
+            print prefix + l
+
+
+#############################################################################
+#
+# The DocPara class is used to store "normal" text paragraph.
+#
+#   'self.words' contains the list of words that make up the paragraph
+#
+class DocPara:
+
+    def __init__( self, lines ):
+        self.lines = None
+        self.words = []
+        for l in lines:
+            l = string.strip(l)
+            self.words.extend( string.split( l ) )
+
+    def dump( self, prefix = "", width = 60 ):
+        cur  = ""  # current line
+        col  = 0   # current width
+
+        for word in self.words:
+            ln = len(word)
+            if col > 0:
+                ln = ln+1
+
+            if col + ln > width:
+                print prefix + cur
+                cur = word
+                col = len(word)
+            else:
+                if col > 0:
+                    cur = cur + " "
+                cur = cur + word
+                col = col + ln
+
+        if col > 0:
+            print prefix + cur
+
+
+
+#############################################################################
+#
+#  The DocField class is used to store a list containing either DocPara or
+#  DocCode objects. Each DocField also has an optional "name" which is used
+#  when the object corresponds to a field of value definition
+#
+class DocField:
+
+    def __init__( self, name, lines ):
+
+        self.name  = name  # can be None for normal paragraphs/sources
+        self.items = []     # list of items
+
+        mode_none  = 0   # start parsing mode
+        mode_code  = 1   # parsing code sequences
+        mode_para  = 3   # parsing normal paragraph
+
+        margin     = -1  # current code sequence indentation
+        cur_lines  = []
+
+        # now analyze the markup lines to see if they contain paragraphs,
+        # code sequences or fields definitions
+        #
+        start = 0
+        mode  = mode_none
+        for l in lines:
+
+            # are we parsing a code sequence ?
+            if mode == mode_code:
+
+                m = re_code_end.match( l )
+                if m and len(m.group(1)) <= margin:
+                    # that's it, we finised the code sequence
+                    code = DocCode( margin, cur_lines )
+                    self.items.append( code )
+                    margin    = -1
+                    cur_lines = []
+                    mode      = mode_none
+                else:
+                    # nope, continue the code sequence
+                    cur_lines.append( l[margin:] )
+            else:
+                # start of code sequence ?
+                m = re_code_start.match( l )
+                if m:
+                    # save current lines
+                    if cur_lines:
+                        para = DocPara( cur_lines )
+                        self.items.append( para )
+                        cur_lines = []
+
+                    # switch to code extraction mode
+                    margin = len(m.group(1))
+                    mode   = mode_code
+
+                else:
+                    if not string.split( l ) and cur_lines:
+                        # if the line is empty, we end the current paragraph,
+                        # if any
+                        para = DocPara( cur_lines )
+                        self.items.append( para )
+                        cur_lines = []
+                    else:
+                        # otherwise, simply add the line to the current
+                        # paragraph
+                        cur_lines.append( l )
+
+        if mode == mode_code:
+            # unexpected end of code sequence
+            code = DocCode( margin, cur_lines )
+            self.items.append( code )
+
+        elif cur_lines:
+            para = DocPara( cur_lines )
+            self.items.append( para )
+
+    def dump( self, prefix = "" ):
+        if self.field:
+            print prefix + self.field + " ::"
+            prefix = prefix + "----"
+
+        first = 1
+        for p in self.items:
+            if not first:
+                print ""
+            p.dump( prefix )
+            first = 0
+
+
+# this regular expression is used to detect field definitions
+#
+re_field  = re.compile( r"\s*(\w*)\s*::" )
+
+
+
+class DocMarkup:
+
+    def __init__( self, tag, lines ):
+        self.tag       = string.lower(tag)
+        self.fields    = []
+
+        cur_lines = []
+        field     = None
+        mode      = 0
+
+        for l in lines:
+            m = re_field.match( l )
+            if m:
+                # we detected the start of a new field definition
+
+                # first, save the current one
+                if cur_lines:
+                    f = DocField( field, cur_lines )
+                    self.fields.append( f )
+                    cur_lines = []
+                    field     = None
+
+                field     = m.group(1)   # record field name
+                ln        = len(m.group(0))
+                l         = " "*ln + l[ln:]
+                cur_lines = [ l ]
+            else:
+                cur_lines.append( l )
+
+        if field or cur_lines:
+            f = DocField( field, cur_lines )
+            self.fields.append( f )
+
+    def get_name( self ):
+        try:
+            return self.fields[0].items[0].words[0]
+
+        except:
+            return None
+
+    def dump( self, margin ):
+        print " "*margin + "<" + self.tag + ">"
+        for f in self.fields:
+            f.dump( "  " )
+        print " "*margin + ""
+
+
+
+
+class DocChapter:
+
+    def __init__( self, block ):
+        self.block    = block
+        self.sections = []
+        if block:
+            self.name     = block.name
+            self.title    = block.get_markup_words( "title" )
+            self.order    = block.get_markup_words( "sections" )
+        else:
+            self.name     = "Other"
+            self.title    = string.split( "Miscellaneous" )
+            self.order    = []
+
+
+
+class DocSection:
+
+    def __init__( self, name = "Other" ):
+        self.name        = name
+        self.blocks      = {}
+        self.block_names = []  # ordered block names in section
+        self.defs        = []
+        self.abstract    = ""
+        self.description = ""
+        self.order       = []
+        self.title       = "ERROR"
+        self.chapter     = None
+
+    def add_def( self, block ):
+        self.defs.append( block )
+
+    def add_block( self, block ):
+        self.block_names.append( block.name )
+        self.blocks[ block.name ] = block
+
+    def process( self ):
+        # lookup one block that contains a valid section description
+        for block in self.defs:
+            title = block.get_markup_text( "Title" )
+            if title:
+                self.title       = title
+                self.abstract    = block.get_markup_words( "abstract" )
+                self.description = block.get_markup_items( "description" )
+                self.order       = block.get_markup_words( "order" )
+                return
+                
+    def reorder( self ):
+        
+        self.block_names = sort_order_list( self.block_names, self.order )
+
+
+class ContentProcessor:
+
+    def __init__( self ):
+        """initialize a block content processor"""
+        self.reset()
+        
+        self.sections = {}    # dictionary of documentation sections
+        self.section  = None  # current documentation section
+
+        self.chapters = []        # list of chapters
+
+    def set_section( self, section_name ):
+        """set current section during parsing"""
+        if not self.sections.has_key( section_name ):
+            section = DocSection( section_name )
+            self.sections[ section_name ] = section
+            self.section                  = section
+        else:
+            self.section = self.sections[ section_name ]
+
+    def add_chapter( self, block ):
+        chapter = DocChapter( block )
+        self.chapters.append( chapter )
+
+
+    def reset( self ):
+        """reset the content processor for a new block"""
+        self.markups      = []
+        self.markup       = None
+        self.markup_lines = []
+
+    def add_markup( self ):
+        """add a new markup section"""
+        if self.markup and self.markup_lines:
+
+            # get rid of last line of markup if it's empty
+            marks = self.markup_lines
+            if len(marks) > 0 and not string.strip(marks[-1]):
+                self.markup_lines = marks[:-1]
+
+            m = DocMarkup( self.markup, self.markup_lines )
+
+            self.markups.append( m )
+
+            self.markup       = None
+            self.markup_lines = []
+
+
+    def process_content( self, content ):
+        """process a block content and return a list of DocMarkup objects
+           corresponding to it"""
+        markup       = None
+        markup_lines = []
+        first        = 1
+
+        for line in content:
+            found = None
+            for t in re_markup_tags:
+                m = t.match( line )
+                if m:
+                    found  = string.lower(m.group(1))
+                    prefix = len(m.group(0))
+                    line   = " "*prefix + line[prefix:]   # remove markup from line
+                    break
+
+            # is it the start of a new markup section ?
+            if found:
+                first = 0
+                self.add_markup()  # add current markup content
+                self.markup = found
+                if len(string.strip( line )) > 0:
+                    self.markup_lines.append( line )
+            elif first == 0:
+                self.markup_lines.append( line )
+
+        self.add_markup()
+
+        return self.markups
+
+
+    def  parse_sources( self, source_processor ):
+        blocks = source_processor.blocks
+        count  = len(blocks)
+        for n in range(count):
+            
+            source = blocks[n]
+            if source.content:
+                # this is a documentation comment, we need to catch
+                # all following normal blocks in the "follow" list
+                #
+                follow = []
+                m = n+1
+                while m < count and not blocks[m].content:
+                    follow.append( blocks[m] )
+                    m = m+1
+
+                doc_block = DocBlock( source, follow, self )
+    
+    
+    def  finish( self ):
+
+        # process all sections to extract their abstract, description
+        # and ordered list of items
+        #
+        for sec in self.sections.values():
+            sec.process()
+
+        # process chapters to check that all sections are correctly
+        # listed there
+        for chap in self.chapters:
+            for sec in chap.order:
+                if self.sections.has_key(sec):
+                    section = self.sections[ sec ]
+                    section.chapter = chap
+                    section.reorder()
+                    chap.sections.append( section )
+                else:
+                    sys.stderr.write( "WARNING: chapter '" +
+                        chap.name + "' in " + chap.block.location() + \
+                        " lists unknown section '" + sec + "'\n" )
+
+        # check that all sections are in a chapter
+        #
+        others = []
+        for sec in self.sections.values():
+            if not sec.chapter:
+                others.append(sec)
+
+        # create a new special chapter for all remaining sections
+        # when necessary
+        #
+        if others:
+            chap = DocChapter( None )
+            chap.sections = others
+            self.chapters.append( chap )
+            
+
+
+class DocBlock:
+
+    def __init__( self, source, follow, processor ):
+        
+        processor.reset()
+
+        self.source    = source
+        self.code      = []
+        self.type      = "ERRTYPE"
+        self.name      = "ERRNAME"
+        self.section   = processor.section
+        self.markups   = processor.process_content( source.content )
+
+        # compute block type from first markup tag
+        try:
+            self.type = self.markups[0].tag
+        except:
+            pass
+        
+            
+        # compute block name from first markup paragraph
+        try:
+            markup = self.markups[0]
+            para   = markup.fields[0].items[0]
+            name   = para.words[0]
+            m = re_identifier.match( name )
+            if m:
+                name = m.group(1)
+            self.name = name
+        except:
+            pass
+
+        # detect new section starts
+        if self.type == "section":
+            processor.set_section( self.name )
+            processor.section.add_def( self )
+
+        # detect new chapter
+        elif self.type == "chapter":
+            processor.add_chapter( self )
+
+        else:
+            processor.section.add_block( self )
+
+        # now, compute the source lines relevant to this documentation
+        # block. We keep normal comments in for obvious reasons (??)
+        source = []
+        for b in follow:
+            if b.format:
+                break
+            for l in b.lines:
+                # we use "/* */" as a separator
+                if re_source_sep.match( l ):
+                    break
+                source.append( l )
+
+        # now strip the leading and trailing empty lines from the sources
+        start = 0
+        end   = len( source )-1
+        
+        while start < end and not string.strip( source[start] ):
+            start = start + 1
+
+        while start < end and not string.strip( source[end] ):
+            end = end - 1
+
+        source = source[start:end+1]
+
+        self.code = source
+
+
+    def location( self ):
+        return self.source.location()
+
+
+
+    def get_markup( self, tag_name ):
+        """return the DocMarkup corresponding to a given tag in a block"""
+        for m in self.markups:
+            if m.tag == string.lower(tag_name):
+                return m
+        return None
+
+
+    def get_markup_name( self, tag_name ):
+        """return the name of a given primary markup in a block"""
+        try:
+            m = self.get_markup( tag_name )
+            return m.get_name()
+        except:
+            return None
+
+
+    def get_markup_words( self, tag_name ):
+        try:
+            m = self.get_markup( tag_name )
+            return m.fields[0].items[0].words
+        except:
+            return []
+
+
+    def get_markup_text( self, tag_name ):
+        result = self.get_markup_words( tag_name )
+        return string.join( result )
+
+
+    def get_markup_items( self, tag_name ):
+        try:
+            m = self.get_markup( tag_name )
+            return m.fields[0].items
+        except:
+            return None
\ No newline at end of file
diff --git a/src/tools/docmaker/docmaker.py b/src/tools/docmaker/docmaker.py
new file mode 100644
index 000000000..a502c9c3a
--- /dev/null
+++ b/src/tools/docmaker/docmaker.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+#  DocMaker 0.2 (c) 2002 David Turner 
+#
+# This program is a re-write of the original DocMaker took used
+# to generate the API Reference of the FreeType font engine
+# by converting in-source comments into structured HTML
+#
+# This new version is capable of outputting XML data, as well
+# as accepts more liberal formatting options
+#
+# It also uses regular expression matching and substitution
+# to speed things significantly
+#
+
+from sources import *
+from content import *
+from tohtml  import *
+
+import sys, os, time, string, glob, getopt
+
+
+def file_exists( pathname ):
+    """checks that a given file exists"""
+    result = 1
+    try:
+        file = open( pathname, "r" )
+        file.close()
+    except:
+        result = None
+        sys.err.write( pathname + " couldn't be accessed\n" )
+
+    return result
+
+
+def make_file_list( args = None ):
+    """builds a list of input files from command-line arguments"""
+
+    file_list = []
+    # sys.stderr.write( repr( sys.argv[1 :] ) + '\n' )
+
+    if not args:
+        args = sys.argv[1 :]
+
+    for pathname in args:
+        if string.find( pathname, '*' ) >= 0:
+            newpath = glob.glob( pathname )
+            newpath.sort()  # sort files -- this is important because
+                            # of the order of files
+        else:
+            newpath = [pathname]
+            
+        last = len( file_list )
+        file_list[last : last] = newpath
+
+    if len( file_list ) == 0:
+        file_list = None
+    else:
+        # now filter the file list to remove non-existing ones
+        file_list = filter( file_exists, file_list )
+    
+    return file_list
+
+
+
+def usage():
+    print "\nDocMaker 0.2 Usage information\n"
+    print "  docmaker [options] file1 [ file2 ... ]\n"
+    print "using the following options:\n"
+    print "  -h : print this page"
+    
+
+def main( argv ):
+    """main program loop"""
+
+    try:
+        opts, args = getopt.getopt( argv[1:],"h", [ "help" ] )
+
+    except getopt.GetoptError:
+        usage()
+        sys.exit( 2 )
+
+    if args == []:
+        usage()
+        sys.exit( 1 )
+
+    # process options
+    #
+    for opt in opts:
+        if opt[0] in ( "-h", "--help" ):
+            usage()
+            sys.exit( 0 )
+
+    # create context and processor
+    source_processor  = SourceProcessor()
+    content_processor = ContentProcessor()
+
+    # retrieve the list of files to process
+    file_list = make_file_list()
+    for filename in file_list:
+        source_processor.parse_file( filename )
+        content_processor.parse_sources( source_processor )
+        
+    # process sections
+    content_processor.finish()
+
+    formatter = HtmlFormatter( content_processor, "Example", "zz" )
+
+    formatter.toc_dump()
+    formatter.index_dump()
+    formatter.section_dump_all()
+
+
+# if called from the command line
+#
+if __name__ == '__main__':
+    main( sys.argv )
+
+
+# eof
diff --git a/src/tools/docmaker/formatter.py b/src/tools/docmaker/formatter.py
new file mode 100644
index 000000000..36d72aeca
--- /dev/null
+++ b/src/tools/docmaker/formatter.py
@@ -0,0 +1,194 @@
+from sources import *
+from content import *
+from utils   import *
+
+class Formatter:
+
+    def __init__( self, processor ):
+
+        self.processor   = processor
+        self.identifiers = {}
+        self.chapters    = processor.chapters
+        self.sections    = processor.sections.values()
+        self.block_index = []
+
+        # store all blocks in a dictionary
+        self.blocks      = []
+        for section in self.sections:
+            for block in section.blocks.values():
+                self.add_identifier( block.name, block )
+                    
+                # add enumeration values to the index, since this is useful
+                for markup in block.markups:
+                    if markup.tag == 'values':
+                        for field in markup.fields:
+                            self.add_identifier( field.name, block )
+
+
+        self.block_index = self.identifiers.keys()
+        self.block_index.sort( index_sort )
+
+
+    def add_identifier( self, name, block ):
+        if self.identifiers.has_key( name ):
+            # duplicate name !!
+            sys.stderr.write( \
+               "WARNING: duplicate definition for '" + name + "' in " + \
+               block.location() + ", previous definition in " +         \
+               self.identifiers[ name ].location() + "\n" )
+        else:
+            self.identifiers[name] = block
+              
+
+    #
+    #  Formatting the table of contents
+    #
+
+    def  toc_enter( self ):
+        pass
+    
+    def  toc_chapter_enter( self, chapter ):
+        pass
+    
+    def  toc_section_enter( self, section ):
+        pass
+        
+    def  toc_section_exit( self, section ):
+        pass
+        
+    def  toc_chapter_exit( self, chapter ):
+        pass
+
+    def  toc_index( self, index_filename ):
+        pass
+    
+    def  toc_exit( self ):
+        pass
+
+    def  toc_dump( self, toc_filename = None, index_filename = None ):
+        
+        output = None
+        if toc_filename:
+            output = open_output( toc_filename )
+        
+        self.toc_enter()
+    
+        for chap in self.processor.chapters:
+    
+            self.toc_chapter_enter( chap )
+    
+            for section in chap.sections:
+                self.toc_section_enter( section )
+                self.toc_section_exit( section )
+    
+            self.toc_chapter_exit ( chap )
+    
+        self.toc_index( index_filename )
+    
+        self.toc_exit()
+
+        if output:
+            close_output( output )
+    
+    #
+    #  Formatting the index
+    #
+
+    def  index_enter( self ):
+        pass
+
+    def  index_name_enter( self, name ):
+        pass
+
+    def  index_name_exit( self, name ):
+        pass
+
+    def  index_exit( self ):
+        pass
+
+    def  index_dump( self, index_filename = None ):
+        
+        output = None
+        if index_filename:
+            output = open_output( index_filename )
+
+        self.index_enter()
+
+        for name in self.block_index:
+            self.index_name_enter( name )
+            self.index_name_exit ( name )
+
+        self.index_exit()
+     
+        if output:
+            close_output( output )
+     
+    #
+    #  Formatting a section
+    #
+    def  section_enter( self, section ):
+        pass
+    
+    def  block_enter( self, block ):
+        pass
+    
+    def  markup_enter( self, markup, block = None ):
+        pass
+    
+    def  field_enter( self, field, markup = None, block = None ):
+        pass
+        
+    def  field_exit( self, field, markup = None, block = None ):
+        pass
+    
+    def  markup_exit( self, markup, block = None ):
+        pass
+        
+    def  block_exit( self, block ):
+        pass
+
+    def  section_exit( self, section ):
+        pass
+
+
+    def  section_dump( self, section, section_filename = None ):
+        
+        output = None
+        if section_filename:
+            output = open_output( section_filename )
+        
+        self.section_enter( section )
+
+        for name in section.block_names:
+            block = self.identifiers[ name ]
+            self.block_enter( block )
+
+            for markup in block.markups[1:]:   # always ignore first markup !!
+                self.markup_enter( markup, block )
+
+                for field in markup.fields:
+                    self.field_enter( field, markup, block )
+
+                    self.field_exit ( field, markup, block )
+
+                self.markup_exit( markup, block )
+
+            self.block_exit( block )
+
+        self.section_exit ( section )
+
+        if output:
+            close_output( output )
+
+
+    def section_dump_all( self ):
+        for section in self.sections:
+            self.section_dump( section )
+
+    #
+    #  Formatting a block
+    #
+
+
+
+
diff --git a/src/tools/docmaker/sources.py b/src/tools/docmaker/sources.py
new file mode 100644
index 000000000..6961cd979
--- /dev/null
+++ b/src/tools/docmaker/sources.py
@@ -0,0 +1,355 @@
+#
+# this file contains definitions of classes needed to decompose
+# C sources files into a series of multi-line "blocks". There are
+# two kinds of blocks:
+#
+#   - normal blocks, which contain source code or ordinary comments
+#
+#   - documentation blocks, which have restricted formatting, and
+#     whose text always start with a documentation markup tag like
+#     "", "", etc..
+#
+# the routines used to process the content of documentation blocks
+# are not contained here, but in "doccontent.py"
+#
+# the classes and methods found here only deal with text parsing
+# and basic documentation block extraction
+#
+import fileinput, re, sys, os, string
+
+
+
+
+
+
+################################################################
+##
+##  BLOCK FORMAT PATTERN
+##
+##   A simple class containing compiled regular expressions used
+##   to detect potential documentation format block comments within
+##   C source code
+##
+##   note that the 'column' pattern must contain a group that will
+##   be used to "unbox" the content of documentation comment blocks
+##
+class SourceBlockFormat:
+
+    def __init__( self, id, start, column, end ):
+        """create a block pattern, used to recognize special documentation blocks"""
+
+        self.id     = id
+        self.start  = re.compile( start, re.VERBOSE )
+        self.column = re.compile( column, re.VERBOSE )
+        self.end    = re.compile( end, re.VERBOSE )
+
+
+
+#
+# format 1 documentation comment blocks look like the following:
+#
+#    /************************************/
+#    /*                                  */
+#    /*                                  */
+#    /*                                  */
+#    /************************************/
+#
+# we define a few regular expressions here to detect them
+#
+
+start = r'''
+  \s*       # any number of whitespace
+  /\*{2,}/  # followed by '/' and at least two asterisks then '/'
+  \s*$      # eventually followed by whitespace
+'''
+
+column = r'''
+  \s*      # any number of whitespace
+  /\*{1}   # followed by '/' and precisely one asterisk
+  ([^*].*) # followed by anything (group 1)
+  \*{1}/   # followed by one asterisk and a '/'
+  \s*$     # enventually followed by whitespace
+'''
+
+re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
+
+#
+# format 2 documentation comment blocks look like the following:
+#
+#    /************************************
+#     *
+#     *                                                                    
+#     *                                                                    
+#     *                                                                    
+#     **/       (1 or more asterisks at the end)
+#
+# we define a few regular expressions here to detect them
+#
+start = r'''
+  \s*     # any number of whitespace
+  /\*{2,} # followed by '/' and at least two asterisks
+  \s*$    # eventually followed by whitespace
+'''
+
+column = r'''
+  \s*         # any number of whitespace
+  \*{1}       # followed by precisely one asterisk
+  (.*)        # followed by anything (group1)
+'''
+
+end = r'''
+  \s*     # any number of whitespace
+  \*+/    # followed by at least on asterisk, then '/'
+'''
+
+re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
+
+#
+# the list of supported documentation block formats, we could add new ones
+# relatively easily
+#
+re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
+
+
+#
+# the following regular expressions corresponds to markup tags
+# within the documentation comment blocks. they're equivalent
+# despite their different syntax
+#
+# notice how each markup tag _must_ begin a new line
+#
+re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  #  format
+re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
+
+#
+# the list of supported markup tags, we could add new ones relatively
+# easily
+#
+re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
+
+#
+# used to detect a cross-reference, after markup tags have been stripped
+#
+re_crossref = re.compile( r'@(\w*)' )
+
+#
+# used to detect italic and bold styles in paragraph text
+#
+re_italic = re.compile( r'_(\w+)_' )
+re_bold   = re.compile( r'\*(\w+)\*' )
+
+#
+# used to detect the end of commented source lines
+#
+re_source_sep = re.compile( r'\s*/\*\s*\*/' )
+
+#
+# used to perform cross-reference within source output
+#
+re_source_crossref = re.compile( r'(\W*)(\w*)' )
+
+#
+# a list of reserved source keywords
+#
+re_source_keywords = re.compile( '''( typedef | 
+                                       struct |
+                                       enum   |
+                                       union  |
+                                       const  |
+                                       char   |
+                                       int    |
+                                       short  |
+                                       long   |
+                                       void   |
+                                       signed |
+                                       unsigned |
+                                       \#include |
+                                       \#define  |
+                                       \#undef   |
+                                       \#if      |
+                                       \#ifdef   |
+                                       \#ifndef  |
+                                       \#else    |
+                                       \#endif   )''', re.VERBOSE )
+
+################################################################
+##
+##  SOURCE BLOCK CLASS
+##
+##   A SourceProcessor is in charge or reading a C source file
+##   and decomposing it into a series of different "SourceBlocks".
+##   each one of these blocks can be made of the following data:
+##
+##   - A documentation comment block that starts with "/**" and
+##     whose exact format will be discussed later
+##
+##   - normal sources lines, include comments
+##
+##   the important fields in a text block are the following ones:
+##
+##     self.lines   : a list of text lines for the corresponding block
+##
+##     self.content : for documentation comment blocks only, this is the
+##                    block content that has been "unboxed" from its
+##                    decoration. This is None for all other blocks
+##                    (i.e. sources or ordinary comments with no starting
+##                     markup tag)
+##
+class SourceBlock:
+    def __init__( self, processor, filename, lineno, lines ):
+        self.processor = processor
+        self.filename  = filename
+        self.lineno    = lineno
+        self.lines     = lines
+        self.format    = processor.format
+        self.content   = []
+
+        if self.format == None:
+            return
+
+        words = []
+
+        # extract comment lines
+        lines = []
+
+        for line0 in self.lines[1:]:
+            m = self.format.column.match( line0 )
+            if m:
+                lines.append( m.group(1) )
+
+        # now, look for a markup tag
+        for l in lines:
+            l = string.strip(l)
+            if len(l) > 0:
+                for tag in re_markup_tags:
+                    if tag.match( l ):
+                        self.content = lines
+                return
+
+    def location( self ):
+        return "(" + self.filename + ":" + repr(self.lineno) + ")"
+
+
+    # debugging only - not used in normal operations
+    def dump( self ):
+        
+        if self.content:
+            print "{{{content start---"
+            for l in self.content:
+                print l
+            print "---content end}}}"
+            return
+            
+        fmt = ""
+        if self.format:
+            fmt = repr(self.format.id) + " "
+        
+        for line in self.lines:
+            print line
+
+
+################################################################
+##
+##  SOURCE PROCESSOR CLASS
+##
+##   The SourceProcessor is in charge or reading a C source file
+##   and decomposing it into a series of different "SourceBlock"
+##   objects.
+##
+##   each one of these blocks can be made of the following data:
+##
+##   - A documentation comment block that starts with "/**" and
+##     whose exact format will be discussed later
+##
+##   - normal sources lines, include comments
+##
+##
+class SourceProcessor:
+
+    def  __init__( self ):
+        """initialize a source processor"""
+        self.blocks   = []
+        self.filename = None
+        self.format   = None
+        self.lines    = []
+
+    def  reset( self ):
+        """reset a block processor, clean all its blocks"""
+        self.blocks = []
+        self.format = None
+
+
+    def  parse_file( self, filename ):
+        """parse a C source file, and adds its blocks to the processor's list"""
+        
+        self.reset()
+        
+        self.filename = filename
+        
+        fileinput.close()
+        self.format    = None
+        self.lineno    = 0
+        self.lines     = []
+
+        for line in fileinput.input( filename ):
+            
+            # strip trailing newlines, important on Windows machines !!
+            if  line[-1] == '\012':
+                line = line[0:-1]
+    
+            if self.format == None:
+                self.process_normal_line( line )
+
+            else:
+                if self.format.end.match( line ):
+                    # that's a normal block end, add it to lines and
+                    # create a new block
+                    self.lines.append( line )
+                    self.add_block_lines()
+                    
+                elif self.format.column.match( line ):
+                    # that's a normal column line, add it to 'lines'
+                    self.lines.append( line )
+                        
+                else:
+                    # humm.. this is an unexcepted block end,
+                    # create a new block, but don't process the line
+                    self.add_block_lines()
+                    
+                    # we need to process the line again
+                    self.process_normal_line( line )
+                                
+        # record the last lines
+        self.add_block_lines()
+
+        
+
+    def process_normal_line( self, line ):
+        """process a normal line and check if it's the start of a new block"""
+        for f in re_source_block_formats:
+          if f.start.match( line ):
+            self.add_block_lines()
+            self.format = f
+            self.lineno = fileinput.filelineno()
+
+        self.lines.append( line )
+
+    
+
+    def add_block_lines( self ):
+        """add the current accumulated lines, and create a new block"""
+        if self.lines != []:
+            block = SourceBlock( self, self.filename, self.lineno, self.lines )
+            
+            self.blocks.append( block )
+            self.format = None
+            self.lines  = []
+
+    
+    # debugging only, not used in normal operations
+    def dump( self ):
+        """print all blocks in a processor"""
+        for b in self.blocks:
+            b.dump()
+
+# eof
diff --git a/src/tools/docmaker/tohtml.py b/src/tools/docmaker/tohtml.py
new file mode 100644
index 000000000..1067d3494
--- /dev/null
+++ b/src/tools/docmaker/tohtml.py
@@ -0,0 +1,475 @@
+from sources import *
+from content import *
+from formatter import *
+import time
+
+# The following defines the HTML header used by all generated pages.
+#
+html_header_1 = """\
+
+
+""" + +html_header_2= """ API Reference + + +
+ +

""" + +html_header_3=""" API Reference

+""" + + + +# The HTML footer used by all generated pages. +# +html_footer = """\ + +""" + +# The header and footer used for each section. +# +section_title_header = "

" +section_title_footer = "

" + +# The header and footer used for code segments. +# +code_header = "
"
+code_footer = "
" + +# Paragraph header and footer. +# +para_header = "

" +para_footer = "

" + +# Block header and footer. +# +block_header = "
" +block_footer = "

" + +# Description header/footer. +# +description_header = "
" +description_footer = "

" + +# Marker header/inter/footer combination. +# +marker_header = "
" +marker_inter = "
" +marker_footer = "
" + +# Source code extracts header/footer. +# +source_header = "
\n"
+source_footer = "\n

" + +# Chapter header/inter/footer. +# +chapter_header = "

" +chapter_inter = "

    " +chapter_footer = "
" + + +# source language keyword coloration/styling +# +keyword_prefix = '' +keyword_suffix = '' + +section_synopsis_header = '

Synopsys

' +section_synopsis_footer = '' + +# Translate a single line of source to HTML. This will convert +# a "<" into "<.", ">" into ">.", etc. +# +def html_quote( line ): + result = string.replace( line, "&", "&" ) + result = string.replace( result, "<", "<" ) + result = string.replace( result, ">", ">" ) + return result + + +# same as 'html_quote', but ignores left and right brackets +# +def html_quote0( line ): + return string.replace( line, "&", "&" ) + + +def dump_html_code( lines, prefix = "" ): + # clean the last empty lines + # + l = len( self.lines ) + while l > 0 and string.strip( self.lines[l - 1] ) == "": + l = l - 1 + + # The code footer should be directly appended to the last code + # line to avoid an additional blank line. + # + print prefix + code_header, + for line in self.lines[0 : l+1]: + print '\n' + prefix + html_quote(line), + print prefix + code_footer, + + + +class HtmlFormatter(Formatter): + + def __init__( self, processor, project_title, file_prefix ): + + Formatter.__init__( self, processor ) + + global html_header_1, html_header_2, html_header_3, html_footer + + if file_prefix: + file_prefix = file_prefix + "-" + else: + file_prefix = "" + + self.project_title = project_title + self.file_prefix = file_prefix + self.html_header = html_header_1 + project_title + html_header_2 + \ + project_title + html_header_3 + + self.html_footer = "

generated on " + \ + time.asctime( time.localtime( time.time() ) ) + \ + "

" + html_footer + + self.columns = 3 + + def make_section_url( self, section ): + return self.file_prefix + section.name + ".html" + + + def make_block_url( self, block ): + return self.make_section_url( block.section ) + "#" + block.name + + + def make_html_words( self, words ): + """ convert a series of simple words into some HTML text """ + line = "" + if words: + line = html_quote( words[0] ) + for w in words[1:]: + line = line + " " + html_quote( w ) + + return line + + + def make_html_word( self, word ): + """analyze a simple word to detect cross-references and styling""" + # look for cross-references + # + m = re_crossref.match( word ) + if m: + try: + name = m.group(1) + block = self.identifiers[ name ] + url = self.make_block_url( block ) + return '' + name + '' + except: + return '?' + name + '?' + + # look for italics and bolds + m = re_italic.match( word ) + if m: + name = m.group(1) + return ''+name+'' + + m = re_bold.match( word ) + if m: + name = m.group(1) + return ''+name+'' + + return html_quote(word) + + + def make_html_para( self, words ): + """ convert a paragraph's words into tagged HTML text, handle xrefs """ + line = "" + if words: + line = self.make_html_word( words[0] ) + for word in words[1:]: + line = line + " " + self.make_html_word( word ) + + return "

" + line + "

" + + + def make_html_code( self, lines ): + """ convert a code sequence to HTML """ + line = code_header + '\n' + for l in lines: + line = line + html_quote( l ) + '\n' + + return line + code_footer + + + def make_html_items( self, items ): + """ convert a field's content into some valid HTML """ + lines = [] + for item in items: + if item.lines: + lines.append( self.make_html_code( item.lines ) ) + else: + lines.append( self.make_html_para( item.words ) ) + + return string.join( lines, '\n' ) + + + def print_html_items( self, items ): + print self.make_html_items( items ) + + + def print_html_field( self, field ): + if field.name: + print "
"+field.name+"" + + print self.make_html_items( field.items ) + + if field.name: + print "
" + + + def html_source_quote( self, line, block_name = None ): + result = "" + while line: + m = re_source_crossref.match( line ) + if m: + name = m.group(2) + prefix = html_quote( m.group(1) ) + length = len( m.group(0) ) + + if name == block_name: + # this is the current block name, if any + result = result + prefix + '' + name + '' + + elif re_source_keywords.match(name): + # this is a C keyword + result = result + prefix + keyword_prefix + name + keyword_suffix + + elif self.identifiers.has_key(name): + # this is a known identifier + block = self.identifiers[name] + result = result + prefix + '' + name + '' + else: + result = result + html_quote(line[ : length ]) + + line = line[ length : ] + else: + result = result + html_quote(line) + line = [] + + return result + + + def print_html_field_list( self, fields ): + print "" + for field in fields: + print "" + print "
" + field.name + "" + self.print_html_items( field.items ) + print "
" + + + def print_html_markup( self, markup ): + table_fields = [] + for field in markup.fields: + if field.name: + # we begin a new series of field or value definitions, we + # will record them in the 'table_fields' list before outputting + # all of them as a single table + # + table_fields.append( field ) + + else: + if table_fields: + self.print_html_field_list( table_fields ) + table_fields = [] + + self.print_html_items( field.items ) + + if table_fields: + self.print_html_field_list( table_fields ) + + # + # Formatting the index + # + + def index_enter( self ): + print self.html_header + self.index_items = {} + + def index_name_enter( self, name ): + block = self.identifiers[ name ] + url = self.make_block_url( block ) + self.index_items[ name ] = url + + def index_exit( self ): + + # block_index already contains the sorted list of index names + count = len( self.block_index ) + rows = (count + self.columns - 1)/self.columns + + print "
" + for r in range(rows): + line = "" + for c in range(self.columns): + i = r + c*rows + if i < count: + bname = self.block_index[ r + c*rows ] + url = self.index_items[ bname ] + line = line + '' + else: + line = line + '' + line = line + "" + print line + + print "
' + bname + '
" + print self.html_footer + self.index_items = {} + + def index_dump( self, index_filename = None ): + + if index_filename == None: + index_filename = self.file_prefix + "index.html" + + Formatter.index_dump( self, index_filename ) + + # + # Formatting the table of content + # + def toc_enter( self ): + print self.html_header + print "

Table of Contents

" + + def toc_chapter_enter( self, chapter ): + print chapter_header + string.join(chapter.title) + chapter_inter + print "" + + def toc_section_enter( self, section ): + print "" + + def toc_chapter_exit( self, chapter ): + print "
" + print '' + \ + section.title + '' + + print self.make_html_para( section.abstract ) + + def toc_section_exit( self, section ): + print "
" + print chapter_footer + + def toc_index( self, index_filename ): + print chapter_header + 'Global Index' + chapter_inter + chapter_footer + + def toc_exit( self ): + print "" + print self.html_footer + + def toc_dump( self, toc_filename = None, index_filename = None ): + if toc_filename == None: + toc_filename = self.file_prefix + "toc.html" + + if index_filename == None: + index_filename = self.file_prefix + "index.html" + + Formatter.toc_dump( self, toc_filename, index_filename ) + + # + # Formatting sections + # + def section_enter( self, section ): + print self.html_header + + print section_title_header + print section.title + print section_title_footer + + # print section synopsys + print section_synopsis_header + print "
" + + maxwidth = 0 + for b in section.blocks.values(): + if len(b.name) > maxwidth: + maxwidth = len(b.name) + + width = 130 # XXX magic number + columns = width / maxwidth + if columns < 1: + columns = 1 + + count = len(section.block_names) + rows = (count + columns-1)/columns + for r in range(rows): + line = "" + for c in range(columns): + i = r + c*rows + line = line + '' + line = line + "" + print line + + print "
' + if i < count: + name = section.block_names[i] + line = line + '' + name + '' + + line = line + '


" + print section_synopsis_footer + + print description_header + print self.make_html_items( section.description ) + print description_footer + + def block_enter( self, block ): + print block_header + + # place html anchor if needed + if block.name: + print '' + print "

" + block.name + "

" + print "
" + + # dump the block C source lines now + if block.code: + print source_header + for l in block.code: + print self.html_source_quote( l, block.name ) + print source_footer + + + def markup_enter( self, markup, block ): + if markup.tag == "description": + print description_header + else: + print marker_header + markup.tag + marker_inter + + self.print_html_markup( markup ) + + def markup_exit( self, markup, block ): + if markup.tag == "description": + print description_footer + else: + print marker_footer + + def block_exit( self, block ): + print block_footer + + + def section_exit( self, section ): + print html_footer + + + def section_dump_all( self ): + for section in self.sections: + self.section_dump( section, self.file_prefix + section.name + '.html' ) + \ No newline at end of file diff --git a/src/tools/docmaker/utils.py b/src/tools/docmaker/utils.py new file mode 100644 index 000000000..f27353e99 --- /dev/null +++ b/src/tools/docmaker/utils.py @@ -0,0 +1,86 @@ +import string, sys + +# This function is used to sort the index. It is a simple lexicographical +# sort, except that it places capital letters before lowercase ones. +# +def index_sort( s1, s2 ): + if not s1: + return -1 + + if not s2: + return 1 + + l1 = len( s1 ) + l2 = len( s2 ) + m1 = string.lower( s1 ) + m2 = string.lower( s2 ) + + for i in range( l1 ): + if i >= l2 or m1[i] > m2[i]: + return 1 + + if m1[i] < m2[i]: + return -1 + + if s1[i] < s2[i]: + return -1 + + if s1[i] > s2[i]: + return 1 + + if l2 > l1: + return -1 + + return 0 + +# Sort input_list, placing the elements of order_list in front. +# +def sort_order_list( input_list, order_list ): + new_list = order_list[:] + for id in input_list: + if not id in order_list: + new_list.append( id ) + return new_list + + +# current output directory +# +output_dir = None + + +# Open the standard output to a given project documentation file. Use +# "output_dir" to determine the filename location if necessary and save the +# old stdout in a tuple that is returned by this function. +# +def open_output( filename ): + global output_dir + + if output_dir and output_dir != "": + filename = output_dir + os.sep + filename + + old_stdout = sys.stdout + new_file = open( filename, "w" ) + sys.stdout = new_file + + return ( new_file, old_stdout ) + + +# Close the output that was returned by "close_output". +# +def close_output( output ): + output[0].close() + sys.stdout = output[1] + + +# Check output directory. +# +def check_output( ): + global output_dir + if output_dir: + if output_dir != "": + if not os.path.isdir( output_dir ): + sys.stderr.write( "argument" + " '" + output_dir + "' " + + "is not a valid directory" ) + sys.exit( 2 ) + else: + output_dir = None