From f52165cc068e5abc27bd64d793358c258e1acd18 Mon Sep 17 00:00:00 2001
From: David Turner
Date: Tue, 30 Jul 2002 18:49:52 +0000
Subject: [PATCH] * src/tools/docmaker/*: adding new (more advanced)
version of the DocMaker tool. Python with regular expressions rocks..
---
ChangeLog | 3 +
src/tools/docmaker/content.py | 547 ++++++++++++++++++++++++++++++++
src/tools/docmaker/docmaker.py | 120 +++++++
src/tools/docmaker/formatter.py | 194 +++++++++++
src/tools/docmaker/sources.py | 355 +++++++++++++++++++++
src/tools/docmaker/tohtml.py | 475 +++++++++++++++++++++++++++
src/tools/docmaker/utils.py | 86 +++++
7 files changed, 1780 insertions(+)
create mode 100644 src/tools/docmaker/content.py
create mode 100644 src/tools/docmaker/docmaker.py
create mode 100644 src/tools/docmaker/formatter.py
create mode 100644 src/tools/docmaker/sources.py
create mode 100644 src/tools/docmaker/tohtml.py
create mode 100644 src/tools/docmaker/utils.py
diff --git a/ChangeLog b/ChangeLog
index c4654f94e..3ab8883a0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,9 @@
to demonstrate a "cleaner" API to support incremental font loading.
comments appreciated...
+ * src/tools/docmaker/*: adding new (more advanced) version of
+ the DocMaker tool. Python with regular expressions rocks..
+
2002-07-28 Werner Lemberg
s/ft_memset/FT_MEM_SET/.
diff --git a/src/tools/docmaker/content.py b/src/tools/docmaker/content.py
new file mode 100644
index 000000000..52f7c1153
--- /dev/null
+++ b/src/tools/docmaker/content.py
@@ -0,0 +1,547 @@
+#
+# this file contains routines used to parse the content of documentation
+# comment block and build a more structured objects out of them
+#
+
+from sources import *
+from utils import *
+import string, re
+
+
+# this regular expresion is used to detect code sequences. these
+# are simply code fragments embedded in '{' and '}' like in:
+#
+# {
+# x = y + z;
+# if ( zookoo == 2 )
+# {
+# foobar();
+# }
+# }
+#
+# note that identation of the starting and ending accolades must be
+# exactly the same. the code sequence can contain accolades at greater
+# indentation
+#
+re_code_start = re.compile( r"(\s*){\s*$" )
+re_code_end = re.compile( r"(\s*)}\s*$" )
+
+
+# this regular expression is used to isolate identifiers from
+# other text
+#
+re_identifier = re.compile( r'(\w*)' )
+
+
+#############################################################################
+#
+# The DocCode class is used to store source code lines.
+#
+# 'self.lines' contains a set of source code lines that will be dumped as
+# HTML in a tag.
+#
+# The object is filled line by line by the parser; it strips the leading
+# "margin" space from each input line before storing it in 'self.lines'.
+#
+class DocCode:
+
+ def __init__( self, margin, lines ):
+ self.lines = []
+ self.words = None
+
+ # remove margin spaces
+ for l in lines:
+ if string.strip( l[:margin] ) == "":
+ l = l[margin:]
+ self.lines.append( l )
+
+ def dump( self, prefix = "", width=60 ):
+ for l in self.lines:
+ print prefix + l
+
+
+#############################################################################
+#
+# The DocPara class is used to store "normal" text paragraph.
+#
+# 'self.words' contains the list of words that make up the paragraph
+#
+class DocPara:
+
+ def __init__( self, lines ):
+ self.lines = None
+ self.words = []
+ for l in lines:
+ l = string.strip(l)
+ self.words.extend( string.split( l ) )
+
+ def dump( self, prefix = "", width = 60 ):
+ cur = "" # current line
+ col = 0 # current width
+
+ for word in self.words:
+ ln = len(word)
+ if col > 0:
+ ln = ln+1
+
+ if col + ln > width:
+ print prefix + cur
+ cur = word
+ col = len(word)
+ else:
+ if col > 0:
+ cur = cur + " "
+ cur = cur + word
+ col = col + ln
+
+ if col > 0:
+ print prefix + cur
+
+
+
+#############################################################################
+#
+# The DocField class is used to store a list containing either DocPara or
+# DocCode objects. Each DocField also has an optional "name" which is used
+# when the object corresponds to a field of value definition
+#
+class DocField:
+
+ def __init__( self, name, lines ):
+
+ self.name = name # can be None for normal paragraphs/sources
+ self.items = [] # list of items
+
+ mode_none = 0 # start parsing mode
+ mode_code = 1 # parsing code sequences
+ mode_para = 3 # parsing normal paragraph
+
+ margin = -1 # current code sequence indentation
+ cur_lines = []
+
+ # now analyze the markup lines to see if they contain paragraphs,
+ # code sequences or fields definitions
+ #
+ start = 0
+ mode = mode_none
+ for l in lines:
+
+ # are we parsing a code sequence ?
+ if mode == mode_code:
+
+ m = re_code_end.match( l )
+ if m and len(m.group(1)) <= margin:
+ # that's it, we finised the code sequence
+ code = DocCode( margin, cur_lines )
+ self.items.append( code )
+ margin = -1
+ cur_lines = []
+ mode = mode_none
+ else:
+ # nope, continue the code sequence
+ cur_lines.append( l[margin:] )
+ else:
+ # start of code sequence ?
+ m = re_code_start.match( l )
+ if m:
+ # save current lines
+ if cur_lines:
+ para = DocPara( cur_lines )
+ self.items.append( para )
+ cur_lines = []
+
+ # switch to code extraction mode
+ margin = len(m.group(1))
+ mode = mode_code
+
+ else:
+ if not string.split( l ) and cur_lines:
+ # if the line is empty, we end the current paragraph,
+ # if any
+ para = DocPara( cur_lines )
+ self.items.append( para )
+ cur_lines = []
+ else:
+ # otherwise, simply add the line to the current
+ # paragraph
+ cur_lines.append( l )
+
+ if mode == mode_code:
+ # unexpected end of code sequence
+ code = DocCode( margin, cur_lines )
+ self.items.append( code )
+
+ elif cur_lines:
+ para = DocPara( cur_lines )
+ self.items.append( para )
+
+ def dump( self, prefix = "" ):
+ if self.field:
+ print prefix + self.field + " ::"
+ prefix = prefix + "----"
+
+ first = 1
+ for p in self.items:
+ if not first:
+ print ""
+ p.dump( prefix )
+ first = 0
+
+
+# this regular expression is used to detect field definitions
+#
+re_field = re.compile( r"\s*(\w*)\s*::" )
+
+
+
+class DocMarkup:
+
+ def __init__( self, tag, lines ):
+ self.tag = string.lower(tag)
+ self.fields = []
+
+ cur_lines = []
+ field = None
+ mode = 0
+
+ for l in lines:
+ m = re_field.match( l )
+ if m:
+ # we detected the start of a new field definition
+
+ # first, save the current one
+ if cur_lines:
+ f = DocField( field, cur_lines )
+ self.fields.append( f )
+ cur_lines = []
+ field = None
+
+ field = m.group(1) # record field name
+ ln = len(m.group(0))
+ l = " "*ln + l[ln:]
+ cur_lines = [ l ]
+ else:
+ cur_lines.append( l )
+
+ if field or cur_lines:
+ f = DocField( field, cur_lines )
+ self.fields.append( f )
+
+ def get_name( self ):
+ try:
+ return self.fields[0].items[0].words[0]
+
+ except:
+ return None
+
+ def dump( self, margin ):
+ print " "*margin + "<" + self.tag + ">"
+ for f in self.fields:
+ f.dump( " " )
+ print " "*margin + "" + self.tag + ">"
+
+
+
+
+class DocChapter:
+
+ def __init__( self, block ):
+ self.block = block
+ self.sections = []
+ if block:
+ self.name = block.name
+ self.title = block.get_markup_words( "title" )
+ self.order = block.get_markup_words( "sections" )
+ else:
+ self.name = "Other"
+ self.title = string.split( "Miscellaneous" )
+ self.order = []
+
+
+
+class DocSection:
+
+ def __init__( self, name = "Other" ):
+ self.name = name
+ self.blocks = {}
+ self.block_names = [] # ordered block names in section
+ self.defs = []
+ self.abstract = ""
+ self.description = ""
+ self.order = []
+ self.title = "ERROR"
+ self.chapter = None
+
+ def add_def( self, block ):
+ self.defs.append( block )
+
+ def add_block( self, block ):
+ self.block_names.append( block.name )
+ self.blocks[ block.name ] = block
+
+ def process( self ):
+ # lookup one block that contains a valid section description
+ for block in self.defs:
+ title = block.get_markup_text( "Title" )
+ if title:
+ self.title = title
+ self.abstract = block.get_markup_words( "abstract" )
+ self.description = block.get_markup_items( "description" )
+ self.order = block.get_markup_words( "order" )
+ return
+
+ def reorder( self ):
+
+ self.block_names = sort_order_list( self.block_names, self.order )
+
+
+class ContentProcessor:
+
+ def __init__( self ):
+ """initialize a block content processor"""
+ self.reset()
+
+ self.sections = {} # dictionary of documentation sections
+ self.section = None # current documentation section
+
+ self.chapters = [] # list of chapters
+
+ def set_section( self, section_name ):
+ """set current section during parsing"""
+ if not self.sections.has_key( section_name ):
+ section = DocSection( section_name )
+ self.sections[ section_name ] = section
+ self.section = section
+ else:
+ self.section = self.sections[ section_name ]
+
+ def add_chapter( self, block ):
+ chapter = DocChapter( block )
+ self.chapters.append( chapter )
+
+
+ def reset( self ):
+ """reset the content processor for a new block"""
+ self.markups = []
+ self.markup = None
+ self.markup_lines = []
+
+ def add_markup( self ):
+ """add a new markup section"""
+ if self.markup and self.markup_lines:
+
+ # get rid of last line of markup if it's empty
+ marks = self.markup_lines
+ if len(marks) > 0 and not string.strip(marks[-1]):
+ self.markup_lines = marks[:-1]
+
+ m = DocMarkup( self.markup, self.markup_lines )
+
+ self.markups.append( m )
+
+ self.markup = None
+ self.markup_lines = []
+
+
+ def process_content( self, content ):
+ """process a block content and return a list of DocMarkup objects
+ corresponding to it"""
+ markup = None
+ markup_lines = []
+ first = 1
+
+ for line in content:
+ found = None
+ for t in re_markup_tags:
+ m = t.match( line )
+ if m:
+ found = string.lower(m.group(1))
+ prefix = len(m.group(0))
+ line = " "*prefix + line[prefix:] # remove markup from line
+ break
+
+ # is it the start of a new markup section ?
+ if found:
+ first = 0
+ self.add_markup() # add current markup content
+ self.markup = found
+ if len(string.strip( line )) > 0:
+ self.markup_lines.append( line )
+ elif first == 0:
+ self.markup_lines.append( line )
+
+ self.add_markup()
+
+ return self.markups
+
+
+ def parse_sources( self, source_processor ):
+ blocks = source_processor.blocks
+ count = len(blocks)
+ for n in range(count):
+
+ source = blocks[n]
+ if source.content:
+ # this is a documentation comment, we need to catch
+ # all following normal blocks in the "follow" list
+ #
+ follow = []
+ m = n+1
+ while m < count and not blocks[m].content:
+ follow.append( blocks[m] )
+ m = m+1
+
+ doc_block = DocBlock( source, follow, self )
+
+
+ def finish( self ):
+
+ # process all sections to extract their abstract, description
+ # and ordered list of items
+ #
+ for sec in self.sections.values():
+ sec.process()
+
+ # process chapters to check that all sections are correctly
+ # listed there
+ for chap in self.chapters:
+ for sec in chap.order:
+ if self.sections.has_key(sec):
+ section = self.sections[ sec ]
+ section.chapter = chap
+ section.reorder()
+ chap.sections.append( section )
+ else:
+ sys.stderr.write( "WARNING: chapter '" +
+ chap.name + "' in " + chap.block.location() + \
+ " lists unknown section '" + sec + "'\n" )
+
+ # check that all sections are in a chapter
+ #
+ others = []
+ for sec in self.sections.values():
+ if not sec.chapter:
+ others.append(sec)
+
+ # create a new special chapter for all remaining sections
+ # when necessary
+ #
+ if others:
+ chap = DocChapter( None )
+ chap.sections = others
+ self.chapters.append( chap )
+
+
+
+class DocBlock:
+
+ def __init__( self, source, follow, processor ):
+
+ processor.reset()
+
+ self.source = source
+ self.code = []
+ self.type = "ERRTYPE"
+ self.name = "ERRNAME"
+ self.section = processor.section
+ self.markups = processor.process_content( source.content )
+
+ # compute block type from first markup tag
+ try:
+ self.type = self.markups[0].tag
+ except:
+ pass
+
+
+ # compute block name from first markup paragraph
+ try:
+ markup = self.markups[0]
+ para = markup.fields[0].items[0]
+ name = para.words[0]
+ m = re_identifier.match( name )
+ if m:
+ name = m.group(1)
+ self.name = name
+ except:
+ pass
+
+ # detect new section starts
+ if self.type == "section":
+ processor.set_section( self.name )
+ processor.section.add_def( self )
+
+ # detect new chapter
+ elif self.type == "chapter":
+ processor.add_chapter( self )
+
+ else:
+ processor.section.add_block( self )
+
+ # now, compute the source lines relevant to this documentation
+ # block. We keep normal comments in for obvious reasons (??)
+ source = []
+ for b in follow:
+ if b.format:
+ break
+ for l in b.lines:
+ # we use "/* */" as a separator
+ if re_source_sep.match( l ):
+ break
+ source.append( l )
+
+ # now strip the leading and trailing empty lines from the sources
+ start = 0
+ end = len( source )-1
+
+ while start < end and not string.strip( source[start] ):
+ start = start + 1
+
+ while start < end and not string.strip( source[end] ):
+ end = end - 1
+
+ source = source[start:end+1]
+
+ self.code = source
+
+
+ def location( self ):
+ return self.source.location()
+
+
+
+ def get_markup( self, tag_name ):
+ """return the DocMarkup corresponding to a given tag in a block"""
+ for m in self.markups:
+ if m.tag == string.lower(tag_name):
+ return m
+ return None
+
+
+ def get_markup_name( self, tag_name ):
+ """return the name of a given primary markup in a block"""
+ try:
+ m = self.get_markup( tag_name )
+ return m.get_name()
+ except:
+ return None
+
+
+ def get_markup_words( self, tag_name ):
+ try:
+ m = self.get_markup( tag_name )
+ return m.fields[0].items[0].words
+ except:
+ return []
+
+
+ def get_markup_text( self, tag_name ):
+ result = self.get_markup_words( tag_name )
+ return string.join( result )
+
+
+ def get_markup_items( self, tag_name ):
+ try:
+ m = self.get_markup( tag_name )
+ return m.fields[0].items
+ except:
+ return None
\ No newline at end of file
diff --git a/src/tools/docmaker/docmaker.py b/src/tools/docmaker/docmaker.py
new file mode 100644
index 000000000..a502c9c3a
--- /dev/null
+++ b/src/tools/docmaker/docmaker.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# DocMaker 0.2 (c) 2002 David Turner
+#
+# This program is a re-write of the original DocMaker took used
+# to generate the API Reference of the FreeType font engine
+# by converting in-source comments into structured HTML
+#
+# This new version is capable of outputting XML data, as well
+# as accepts more liberal formatting options
+#
+# It also uses regular expression matching and substitution
+# to speed things significantly
+#
+
+from sources import *
+from content import *
+from tohtml import *
+
+import sys, os, time, string, glob, getopt
+
+
+def file_exists( pathname ):
+ """checks that a given file exists"""
+ result = 1
+ try:
+ file = open( pathname, "r" )
+ file.close()
+ except:
+ result = None
+ sys.err.write( pathname + " couldn't be accessed\n" )
+
+ return result
+
+
+def make_file_list( args = None ):
+ """builds a list of input files from command-line arguments"""
+
+ file_list = []
+ # sys.stderr.write( repr( sys.argv[1 :] ) + '\n' )
+
+ if not args:
+ args = sys.argv[1 :]
+
+ for pathname in args:
+ if string.find( pathname, '*' ) >= 0:
+ newpath = glob.glob( pathname )
+ newpath.sort() # sort files -- this is important because
+ # of the order of files
+ else:
+ newpath = [pathname]
+
+ last = len( file_list )
+ file_list[last : last] = newpath
+
+ if len( file_list ) == 0:
+ file_list = None
+ else:
+ # now filter the file list to remove non-existing ones
+ file_list = filter( file_exists, file_list )
+
+ return file_list
+
+
+
+def usage():
+ print "\nDocMaker 0.2 Usage information\n"
+ print " docmaker [options] file1 [ file2 ... ]\n"
+ print "using the following options:\n"
+ print " -h : print this page"
+
+
+def main( argv ):
+ """main program loop"""
+
+ try:
+ opts, args = getopt.getopt( argv[1:],"h", [ "help" ] )
+
+ except getopt.GetoptError:
+ usage()
+ sys.exit( 2 )
+
+ if args == []:
+ usage()
+ sys.exit( 1 )
+
+ # process options
+ #
+ for opt in opts:
+ if opt[0] in ( "-h", "--help" ):
+ usage()
+ sys.exit( 0 )
+
+ # create context and processor
+ source_processor = SourceProcessor()
+ content_processor = ContentProcessor()
+
+ # retrieve the list of files to process
+ file_list = make_file_list()
+ for filename in file_list:
+ source_processor.parse_file( filename )
+ content_processor.parse_sources( source_processor )
+
+ # process sections
+ content_processor.finish()
+
+ formatter = HtmlFormatter( content_processor, "Example", "zz" )
+
+ formatter.toc_dump()
+ formatter.index_dump()
+ formatter.section_dump_all()
+
+
+# if called from the command line
+#
+if __name__ == '__main__':
+ main( sys.argv )
+
+
+# eof
diff --git a/src/tools/docmaker/formatter.py b/src/tools/docmaker/formatter.py
new file mode 100644
index 000000000..36d72aeca
--- /dev/null
+++ b/src/tools/docmaker/formatter.py
@@ -0,0 +1,194 @@
+from sources import *
+from content import *
+from utils import *
+
+class Formatter:
+
+ def __init__( self, processor ):
+
+ self.processor = processor
+ self.identifiers = {}
+ self.chapters = processor.chapters
+ self.sections = processor.sections.values()
+ self.block_index = []
+
+ # store all blocks in a dictionary
+ self.blocks = []
+ for section in self.sections:
+ for block in section.blocks.values():
+ self.add_identifier( block.name, block )
+
+ # add enumeration values to the index, since this is useful
+ for markup in block.markups:
+ if markup.tag == 'values':
+ for field in markup.fields:
+ self.add_identifier( field.name, block )
+
+
+ self.block_index = self.identifiers.keys()
+ self.block_index.sort( index_sort )
+
+
+ def add_identifier( self, name, block ):
+ if self.identifiers.has_key( name ):
+ # duplicate name !!
+ sys.stderr.write( \
+ "WARNING: duplicate definition for '" + name + "' in " + \
+ block.location() + ", previous definition in " + \
+ self.identifiers[ name ].location() + "\n" )
+ else:
+ self.identifiers[name] = block
+
+
+ #
+ # Formatting the table of contents
+ #
+
+ def toc_enter( self ):
+ pass
+
+ def toc_chapter_enter( self, chapter ):
+ pass
+
+ def toc_section_enter( self, section ):
+ pass
+
+ def toc_section_exit( self, section ):
+ pass
+
+ def toc_chapter_exit( self, chapter ):
+ pass
+
+ def toc_index( self, index_filename ):
+ pass
+
+ def toc_exit( self ):
+ pass
+
+ def toc_dump( self, toc_filename = None, index_filename = None ):
+
+ output = None
+ if toc_filename:
+ output = open_output( toc_filename )
+
+ self.toc_enter()
+
+ for chap in self.processor.chapters:
+
+ self.toc_chapter_enter( chap )
+
+ for section in chap.sections:
+ self.toc_section_enter( section )
+ self.toc_section_exit( section )
+
+ self.toc_chapter_exit ( chap )
+
+ self.toc_index( index_filename )
+
+ self.toc_exit()
+
+ if output:
+ close_output( output )
+
+ #
+ # Formatting the index
+ #
+
+ def index_enter( self ):
+ pass
+
+ def index_name_enter( self, name ):
+ pass
+
+ def index_name_exit( self, name ):
+ pass
+
+ def index_exit( self ):
+ pass
+
+ def index_dump( self, index_filename = None ):
+
+ output = None
+ if index_filename:
+ output = open_output( index_filename )
+
+ self.index_enter()
+
+ for name in self.block_index:
+ self.index_name_enter( name )
+ self.index_name_exit ( name )
+
+ self.index_exit()
+
+ if output:
+ close_output( output )
+
+ #
+ # Formatting a section
+ #
+ def section_enter( self, section ):
+ pass
+
+ def block_enter( self, block ):
+ pass
+
+ def markup_enter( self, markup, block = None ):
+ pass
+
+ def field_enter( self, field, markup = None, block = None ):
+ pass
+
+ def field_exit( self, field, markup = None, block = None ):
+ pass
+
+ def markup_exit( self, markup, block = None ):
+ pass
+
+ def block_exit( self, block ):
+ pass
+
+ def section_exit( self, section ):
+ pass
+
+
+ def section_dump( self, section, section_filename = None ):
+
+ output = None
+ if section_filename:
+ output = open_output( section_filename )
+
+ self.section_enter( section )
+
+ for name in section.block_names:
+ block = self.identifiers[ name ]
+ self.block_enter( block )
+
+ for markup in block.markups[1:]: # always ignore first markup !!
+ self.markup_enter( markup, block )
+
+ for field in markup.fields:
+ self.field_enter( field, markup, block )
+
+ self.field_exit ( field, markup, block )
+
+ self.markup_exit( markup, block )
+
+ self.block_exit( block )
+
+ self.section_exit ( section )
+
+ if output:
+ close_output( output )
+
+
+ def section_dump_all( self ):
+ for section in self.sections:
+ self.section_dump( section )
+
+ #
+ # Formatting a block
+ #
+
+
+
+
diff --git a/src/tools/docmaker/sources.py b/src/tools/docmaker/sources.py
new file mode 100644
index 000000000..6961cd979
--- /dev/null
+++ b/src/tools/docmaker/sources.py
@@ -0,0 +1,355 @@
+#
+# this file contains definitions of classes needed to decompose
+# C sources files into a series of multi-line "blocks". There are
+# two kinds of blocks:
+#
+# - normal blocks, which contain source code or ordinary comments
+#
+# - documentation blocks, which have restricted formatting, and
+# whose text always start with a documentation markup tag like
+# "", "", etc..
+#
+# the routines used to process the content of documentation blocks
+# are not contained here, but in "doccontent.py"
+#
+# the classes and methods found here only deal with text parsing
+# and basic documentation block extraction
+#
+import fileinput, re, sys, os, string
+
+
+
+
+
+
+################################################################
+##
+## BLOCK FORMAT PATTERN
+##
+## A simple class containing compiled regular expressions used
+## to detect potential documentation format block comments within
+## C source code
+##
+## note that the 'column' pattern must contain a group that will
+## be used to "unbox" the content of documentation comment blocks
+##
+class SourceBlockFormat:
+
+ def __init__( self, id, start, column, end ):
+ """create a block pattern, used to recognize special documentation blocks"""
+
+ self.id = id
+ self.start = re.compile( start, re.VERBOSE )
+ self.column = re.compile( column, re.VERBOSE )
+ self.end = re.compile( end, re.VERBOSE )
+
+
+
+#
+# format 1 documentation comment blocks look like the following:
+#
+# /************************************/
+# /* */
+# /* */
+# /* */
+# /************************************/
+#
+# we define a few regular expressions here to detect them
+#
+
+start = r'''
+ \s* # any number of whitespace
+ /\*{2,}/ # followed by '/' and at least two asterisks then '/'
+ \s*$ # eventually followed by whitespace
+'''
+
+column = r'''
+ \s* # any number of whitespace
+ /\*{1} # followed by '/' and precisely one asterisk
+ ([^*].*) # followed by anything (group 1)
+ \*{1}/ # followed by one asterisk and a '/'
+ \s*$ # enventually followed by whitespace
+'''
+
+re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
+
+#
+# format 2 documentation comment blocks look like the following:
+#
+# /************************************
+# *
+# *
+# *
+# *
+# **/ (1 or more asterisks at the end)
+#
+# we define a few regular expressions here to detect them
+#
+start = r'''
+ \s* # any number of whitespace
+ /\*{2,} # followed by '/' and at least two asterisks
+ \s*$ # eventually followed by whitespace
+'''
+
+column = r'''
+ \s* # any number of whitespace
+ \*{1} # followed by precisely one asterisk
+ (.*) # followed by anything (group1)
+'''
+
+end = r'''
+ \s* # any number of whitespace
+ \*+/ # followed by at least on asterisk, then '/'
+'''
+
+re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
+
+#
+# the list of supported documentation block formats, we could add new ones
+# relatively easily
+#
+re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
+
+
+#
+# the following regular expressions corresponds to markup tags
+# within the documentation comment blocks. they're equivalent
+# despite their different syntax
+#
+# notice how each markup tag _must_ begin a new line
+#
+re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # format
+re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format
+
+#
+# the list of supported markup tags, we could add new ones relatively
+# easily
+#
+re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
+
+#
+# used to detect a cross-reference, after markup tags have been stripped
+#
+re_crossref = re.compile( r'@(\w*)' )
+
+#
+# used to detect italic and bold styles in paragraph text
+#
+re_italic = re.compile( r'_(\w+)_' )
+re_bold = re.compile( r'\*(\w+)\*' )
+
+#
+# used to detect the end of commented source lines
+#
+re_source_sep = re.compile( r'\s*/\*\s*\*/' )
+
+#
+# used to perform cross-reference within source output
+#
+re_source_crossref = re.compile( r'(\W*)(\w*)' )
+
+#
+# a list of reserved source keywords
+#
+re_source_keywords = re.compile( '''( typedef |
+ struct |
+ enum |
+ union |
+ const |
+ char |
+ int |
+ short |
+ long |
+ void |
+ signed |
+ unsigned |
+ \#include |
+ \#define |
+ \#undef |
+ \#if |
+ \#ifdef |
+ \#ifndef |
+ \#else |
+ \#endif )''', re.VERBOSE )
+
+################################################################
+##
+## SOURCE BLOCK CLASS
+##
+## A SourceProcessor is in charge or reading a C source file
+## and decomposing it into a series of different "SourceBlocks".
+## each one of these blocks can be made of the following data:
+##
+## - A documentation comment block that starts with "/**" and
+## whose exact format will be discussed later
+##
+## - normal sources lines, include comments
+##
+## the important fields in a text block are the following ones:
+##
+## self.lines : a list of text lines for the corresponding block
+##
+## self.content : for documentation comment blocks only, this is the
+## block content that has been "unboxed" from its
+## decoration. This is None for all other blocks
+## (i.e. sources or ordinary comments with no starting
+## markup tag)
+##
+class SourceBlock:
+ def __init__( self, processor, filename, lineno, lines ):
+ self.processor = processor
+ self.filename = filename
+ self.lineno = lineno
+ self.lines = lines
+ self.format = processor.format
+ self.content = []
+
+ if self.format == None:
+ return
+
+ words = []
+
+ # extract comment lines
+ lines = []
+
+ for line0 in self.lines[1:]:
+ m = self.format.column.match( line0 )
+ if m:
+ lines.append( m.group(1) )
+
+ # now, look for a markup tag
+ for l in lines:
+ l = string.strip(l)
+ if len(l) > 0:
+ for tag in re_markup_tags:
+ if tag.match( l ):
+ self.content = lines
+ return
+
+ def location( self ):
+ return "(" + self.filename + ":" + repr(self.lineno) + ")"
+
+
+ # debugging only - not used in normal operations
+ def dump( self ):
+
+ if self.content:
+ print "{{{content start---"
+ for l in self.content:
+ print l
+ print "---content end}}}"
+ return
+
+ fmt = ""
+ if self.format:
+ fmt = repr(self.format.id) + " "
+
+ for line in self.lines:
+ print line
+
+
+################################################################
+##
+## SOURCE PROCESSOR CLASS
+##
+## The SourceProcessor is in charge or reading a C source file
+## and decomposing it into a series of different "SourceBlock"
+## objects.
+##
+## each one of these blocks can be made of the following data:
+##
+## - A documentation comment block that starts with "/**" and
+## whose exact format will be discussed later
+##
+## - normal sources lines, include comments
+##
+##
+class SourceProcessor:
+
+ def __init__( self ):
+ """initialize a source processor"""
+ self.blocks = []
+ self.filename = None
+ self.format = None
+ self.lines = []
+
+ def reset( self ):
+ """reset a block processor, clean all its blocks"""
+ self.blocks = []
+ self.format = None
+
+
+ def parse_file( self, filename ):
+ """parse a C source file, and adds its blocks to the processor's list"""
+
+ self.reset()
+
+ self.filename = filename
+
+ fileinput.close()
+ self.format = None
+ self.lineno = 0
+ self.lines = []
+
+ for line in fileinput.input( filename ):
+
+ # strip trailing newlines, important on Windows machines !!
+ if line[-1] == '\012':
+ line = line[0:-1]
+
+ if self.format == None:
+ self.process_normal_line( line )
+
+ else:
+ if self.format.end.match( line ):
+ # that's a normal block end, add it to lines and
+ # create a new block
+ self.lines.append( line )
+ self.add_block_lines()
+
+ elif self.format.column.match( line ):
+ # that's a normal column line, add it to 'lines'
+ self.lines.append( line )
+
+ else:
+ # humm.. this is an unexcepted block end,
+ # create a new block, but don't process the line
+ self.add_block_lines()
+
+ # we need to process the line again
+ self.process_normal_line( line )
+
+ # record the last lines
+ self.add_block_lines()
+
+
+
+ def process_normal_line( self, line ):
+ """process a normal line and check if it's the start of a new block"""
+ for f in re_source_block_formats:
+ if f.start.match( line ):
+ self.add_block_lines()
+ self.format = f
+ self.lineno = fileinput.filelineno()
+
+ self.lines.append( line )
+
+
+
+ def add_block_lines( self ):
+ """add the current accumulated lines, and create a new block"""
+ if self.lines != []:
+ block = SourceBlock( self, self.filename, self.lineno, self.lines )
+
+ self.blocks.append( block )
+ self.format = None
+ self.lines = []
+
+
+ # debugging only, not used in normal operations
+ def dump( self ):
+ """print all blocks in a processor"""
+ for b in self.blocks:
+ b.dump()
+
+# eof
diff --git a/src/tools/docmaker/tohtml.py b/src/tools/docmaker/tohtml.py
new file mode 100644
index 000000000..1067d3494
--- /dev/null
+++ b/src/tools/docmaker/tohtml.py
@@ -0,0 +1,475 @@
+from sources import *
+from content import *
+from formatter import *
+import time
+
+# The following defines the HTML header used by all generated pages.
+#
+html_header_1 = """\
+
+
+"""
+
+html_header_2= """ API Reference
+
+
+
+
+"""
+
+html_header_3=""" API Reference
+"""
+
+
+
+# The HTML footer used by all generated pages.
+#
+html_footer = """\
+
+"""
+
+# The header and footer used for each section.
+#
+section_title_header = ""
+section_title_footer = "
"
+
+# The header and footer used for code segments.
+#
+code_header = ""
+code_footer = "
"
+
+# Paragraph header and footer.
+#
+para_header = ""
+para_footer = "
"
+
+# Block header and footer.
+#
+block_header = "
"
+
+# Description header/footer.
+#
+description_header = ""
+description_footer = " |
"
+
+# Marker header/inter/footer combination.
+#
+marker_header = ""
+marker_inter = " |
"
+marker_footer = " |
"
+
+# Source code extracts header/footer.
+#
+source_header = "
"
+
+# Chapter header/inter/footer.
+#
+chapter_header = "
"
+
+
+# source language keyword coloration/styling
+#
+keyword_prefix = ''
+keyword_suffix = ''
+
+section_synopsis_header = 'Synopsys
'
+section_synopsis_footer = ''
+
+# Translate a single line of source to HTML. This will convert
+# a "<" into "<.", ">" into ">.", etc.
+#
+def html_quote( line ):
+ result = string.replace( line, "&", "&" )
+ result = string.replace( result, "<", "<" )
+ result = string.replace( result, ">", ">" )
+ return result
+
+
+# same as 'html_quote', but ignores left and right brackets
+#
+def html_quote0( line ):
+ return string.replace( line, "&", "&" )
+
+
+def dump_html_code( lines, prefix = "" ):
+ # clean the last empty lines
+ #
+ l = len( self.lines )
+ while l > 0 and string.strip( self.lines[l - 1] ) == "":
+ l = l - 1
+
+ # The code footer should be directly appended to the last code
+ # line to avoid an additional blank line.
+ #
+ print prefix + code_header,
+ for line in self.lines[0 : l+1]:
+ print '\n' + prefix + html_quote(line),
+ print prefix + code_footer,
+
+
+
+class HtmlFormatter(Formatter):
+
+ def __init__( self, processor, project_title, file_prefix ):
+
+ Formatter.__init__( self, processor )
+
+ global html_header_1, html_header_2, html_header_3, html_footer
+
+ if file_prefix:
+ file_prefix = file_prefix + "-"
+ else:
+ file_prefix = ""
+
+ self.project_title = project_title
+ self.file_prefix = file_prefix
+ self.html_header = html_header_1 + project_title + html_header_2 + \
+ project_title + html_header_3
+
+ self.html_footer = "generated on " + \
+ time.asctime( time.localtime( time.time() ) ) + \
+ "
" + html_footer
+
+ self.columns = 3
+
+ def make_section_url( self, section ):
+ return self.file_prefix + section.name + ".html"
+
+
+ def make_block_url( self, block ):
+ return self.make_section_url( block.section ) + "#" + block.name
+
+
+ def make_html_words( self, words ):
+ """ convert a series of simple words into some HTML text """
+ line = ""
+ if words:
+ line = html_quote( words[0] )
+ for w in words[1:]:
+ line = line + " " + html_quote( w )
+
+ return line
+
+
+ def make_html_word( self, word ):
+ """analyze a simple word to detect cross-references and styling"""
+ # look for cross-references
+ #
+ m = re_crossref.match( word )
+ if m:
+ try:
+ name = m.group(1)
+ block = self.identifiers[ name ]
+ url = self.make_block_url( block )
+ return '' + name + ''
+ except:
+ return '?' + name + '?'
+
+ # look for italics and bolds
+ m = re_italic.match( word )
+ if m:
+ name = m.group(1)
+ return ''+name+''
+
+ m = re_bold.match( word )
+ if m:
+ name = m.group(1)
+ return ''+name+''
+
+ return html_quote(word)
+
+
+ def make_html_para( self, words ):
+ """ convert a paragraph's words into tagged HTML text, handle xrefs """
+ line = ""
+ if words:
+ line = self.make_html_word( words[0] )
+ for word in words[1:]:
+ line = line + " " + self.make_html_word( word )
+
+ return "" + line + "
"
+
+
+ def make_html_code( self, lines ):
+ """ convert a code sequence to HTML """
+ line = code_header + '\n'
+ for l in lines:
+ line = line + html_quote( l ) + '\n'
+
+ return line + code_footer
+
+
+ def make_html_items( self, items ):
+ """ convert a field's content into some valid HTML """
+ lines = []
+ for item in items:
+ if item.lines:
+ lines.append( self.make_html_code( item.lines ) )
+ else:
+ lines.append( self.make_html_para( item.words ) )
+
+ return string.join( lines, '\n' )
+
+
+ def print_html_items( self, items ):
+ print self.make_html_items( items )
+
+
+ def print_html_field( self, field ):
+ if field.name:
+ print ""+field.name+" | "
+
+ print self.make_html_items( field.items )
+
+ if field.name:
+ print " |
"
+
+
+ def html_source_quote( self, line, block_name = None ):
+ result = ""
+ while line:
+ m = re_source_crossref.match( line )
+ if m:
+ name = m.group(2)
+ prefix = html_quote( m.group(1) )
+ length = len( m.group(0) )
+
+ if name == block_name:
+ # this is the current block name, if any
+ result = result + prefix + '' + name + ''
+
+ elif re_source_keywords.match(name):
+ # this is a C keyword
+ result = result + prefix + keyword_prefix + name + keyword_suffix
+
+ elif self.identifiers.has_key(name):
+ # this is a known identifier
+ block = self.identifiers[name]
+ result = result + prefix + '' + name + ''
+ else:
+ result = result + html_quote(line[ : length ])
+
+ line = line[ length : ]
+ else:
+ result = result + html_quote(line)
+ line = []
+
+ return result
+
+
+ def print_html_field_list( self, fields ):
+ print ""
+ for field in fields:
+ print "" + field.name + " | "
+ self.print_html_items( field.items )
+ print " |
"
+ print "
"
+
+
+ def print_html_markup( self, markup ):
+ table_fields = []
+ for field in markup.fields:
+ if field.name:
+ # we begin a new series of field or value definitions, we
+ # will record them in the 'table_fields' list before outputting
+ # all of them as a single table
+ #
+ table_fields.append( field )
+
+ else:
+ if table_fields:
+ self.print_html_field_list( table_fields )
+ table_fields = []
+
+ self.print_html_items( field.items )
+
+ if table_fields:
+ self.print_html_field_list( table_fields )
+
+ #
+ # Formatting the index
+ #
+
+ def index_enter( self ):
+ print self.html_header
+ self.index_items = {}
+
+ def index_name_enter( self, name ):
+ block = self.identifiers[ name ]
+ url = self.make_block_url( block )
+ self.index_items[ name ] = url
+
+ def index_exit( self ):
+
+ # block_index already contains the sorted list of index names
+ count = len( self.block_index )
+ rows = (count + self.columns - 1)/self.columns
+
+ print ""
+ for r in range(rows):
+ line = ""
+ for c in range(self.columns):
+ i = r + c*rows
+ if i < count:
+ bname = self.block_index[ r + c*rows ]
+ url = self.index_items[ bname ]
+ line = line + '' + bname + ' | '
+ else:
+ line = line + ' | '
+ line = line + "
"
+ print line
+
+ print "
"
+ print self.html_footer
+ self.index_items = {}
+
+ def index_dump( self, index_filename = None ):
+
+ if index_filename == None:
+ index_filename = self.file_prefix + "index.html"
+
+ Formatter.index_dump( self, index_filename )
+
+ #
+ # Formatting the table of content
+ #
+ def toc_enter( self ):
+ print self.html_header
+ print "Table of Contents
"
+
+ def toc_chapter_enter( self, chapter ):
+ print chapter_header + string.join(chapter.title) + chapter_inter
+ print ""
+
+ def toc_section_enter( self, section ):
+ print ""
+ print '' + \
+ section.title + ' | '
+
+ print self.make_html_para( section.abstract )
+
+ def toc_section_exit( self, section ):
+ print " |
"
+
+ def toc_chapter_exit( self, chapter ):
+ print "
"
+ print chapter_footer
+
+ def toc_index( self, index_filename ):
+ print chapter_header + 'Global Index' + chapter_inter + chapter_footer
+
+ def toc_exit( self ):
+ print ""
+ print self.html_footer
+
+ def toc_dump( self, toc_filename = None, index_filename = None ):
+ if toc_filename == None:
+ toc_filename = self.file_prefix + "toc.html"
+
+ if index_filename == None:
+ index_filename = self.file_prefix + "index.html"
+
+ Formatter.toc_dump( self, toc_filename, index_filename )
+
+ #
+ # Formatting sections
+ #
+ def section_enter( self, section ):
+ print self.html_header
+
+ print section_title_header
+ print section.title
+ print section_title_footer
+
+ # print section synopsys
+ print section_synopsis_header
+ print ""
+
+ maxwidth = 0
+ for b in section.blocks.values():
+ if len(b.name) > maxwidth:
+ maxwidth = len(b.name)
+
+ width = 130 # XXX magic number
+ columns = width / maxwidth
+ if columns < 1:
+ columns = 1
+
+ count = len(section.block_names)
+ rows = (count + columns-1)/columns
+ for r in range(rows):
+ line = ""
+ for c in range(columns):
+ i = r + c*rows
+ line = line + ' | '
+ if i < count:
+ name = section.block_names[i]
+ line = line + '' + name + ''
+
+ line = line + ' | '
+ line = line + "
"
+ print line
+
+ print "
"
+ print section_synopsis_footer
+
+ print description_header
+ print self.make_html_items( section.description )
+ print description_footer
+
+ def block_enter( self, block ):
+ print block_header
+
+ # place html anchor if needed
+ if block.name:
+ print ''
+ print "" + block.name + "
"
+ print ""
+
+ # dump the block C source lines now
+ if block.code:
+ print source_header
+ for l in block.code:
+ print self.html_source_quote( l, block.name )
+ print source_footer
+
+
+ def markup_enter( self, markup, block ):
+ if markup.tag == "description":
+ print description_header
+ else:
+ print marker_header + markup.tag + marker_inter
+
+ self.print_html_markup( markup )
+
+ def markup_exit( self, markup, block ):
+ if markup.tag == "description":
+ print description_footer
+ else:
+ print marker_footer
+
+ def block_exit( self, block ):
+ print block_footer
+
+
+ def section_exit( self, section ):
+ print html_footer
+
+
+ def section_dump_all( self ):
+ for section in self.sections:
+ self.section_dump( section, self.file_prefix + section.name + '.html' )
+
\ No newline at end of file
diff --git a/src/tools/docmaker/utils.py b/src/tools/docmaker/utils.py
new file mode 100644
index 000000000..f27353e99
--- /dev/null
+++ b/src/tools/docmaker/utils.py
@@ -0,0 +1,86 @@
+import string, sys
+
+# This function is used to sort the index. It is a simple lexicographical
+# sort, except that it places capital letters before lowercase ones.
+#
+def index_sort( s1, s2 ):
+ if not s1:
+ return -1
+
+ if not s2:
+ return 1
+
+ l1 = len( s1 )
+ l2 = len( s2 )
+ m1 = string.lower( s1 )
+ m2 = string.lower( s2 )
+
+ for i in range( l1 ):
+ if i >= l2 or m1[i] > m2[i]:
+ return 1
+
+ if m1[i] < m2[i]:
+ return -1
+
+ if s1[i] < s2[i]:
+ return -1
+
+ if s1[i] > s2[i]:
+ return 1
+
+ if l2 > l1:
+ return -1
+
+ return 0
+
+# Sort input_list, placing the elements of order_list in front.
+#
+def sort_order_list( input_list, order_list ):
+ new_list = order_list[:]
+ for id in input_list:
+ if not id in order_list:
+ new_list.append( id )
+ return new_list
+
+
+# current output directory
+#
+output_dir = None
+
+
+# Open the standard output to a given project documentation file. Use
+# "output_dir" to determine the filename location if necessary and save the
+# old stdout in a tuple that is returned by this function.
+#
+def open_output( filename ):
+ global output_dir
+
+ if output_dir and output_dir != "":
+ filename = output_dir + os.sep + filename
+
+ old_stdout = sys.stdout
+ new_file = open( filename, "w" )
+ sys.stdout = new_file
+
+ return ( new_file, old_stdout )
+
+
+# Close the output that was returned by "close_output".
+#
+def close_output( output ):
+ output[0].close()
+ sys.stdout = output[1]
+
+
+# Check output directory.
+#
+def check_output( ):
+ global output_dir
+ if output_dir:
+ if output_dir != "":
+ if not os.path.isdir( output_dir ):
+ sys.stderr.write( "argument" + " '" + output_dir + "' " +
+ "is not a valid directory" )
+ sys.exit( 2 )
+ else:
+ output_dir = None