#!/usr/bin/env python # # DocMaker is a very simple program used to generate HTML documentation # from the source files of the FreeType packages. # # I should really be using regular expressions to do this, but hey, # i'm too lazy right now, and the damn thing seems to work :-) # - David # import fileinput, sys, string, glob html_header = """\
FreeType 2 API Reference

FreeType 2 API Reference

""" html_footer = """\ """ section_title_header = "

" section_title_footer = "

" code_header = "
"
code_footer = "
" para_header = "

" para_footer = "

" block_header = "
" block_footer = "

" description_header = "
" description_footer = "

" marker_header = "
" marker_inter = "
" marker_footer = "
" source_header = "
"
source_footer = "

" current_section = None # this function is used to sort the index. it's a simple lexicographical # sort, except that it places capital letters before small ones # def index_sort( s1, s2 ): if not s1: return -1 if not s2: return 1 l1 = len(s1) l2 = len(s2) m1 = string.lower(s1) m2 = string.lower(s2) for i in range(l1): if i >= l2 or m1[i] > m2[i]: return 1 if m1[i] < m2[i]: return -1 if s1[i] < s2[i]: return -1 if s1[i] > s2[i]: return 1 if l2 > l1: return -1 return 0 # The FreeType 2 reference is extracted from the source files. These contain # various comment blocks that follow one of the following formats: # # /************************** # * # * FORMAT1 # * # * # * # * # *************************/ # # /**************************/ # /* */ # /* FORMAT2 */ # /* */ # /* */ # /* */ # /* */ # # /**************************/ # /* */ # /* FORMAT3 */ # /* */ # /* */ # /* */ # /* */ # /**************************/ # # Each block contains a list of markers; each one can be followed by # some arbitrary text or a list of fields. Here's an example: # # # MyStruct # # # this structure holds some data # # # x :: horizontal coordinate # y :: vertical coordinate # # # This example defines three markers: 'Struct', 'Description' & 'Fields'. # The first two markers contain arbitrary text, while the last one contains # a list of fields. # # Each field is simple of the format: WORD :: TEXT... # # Note that typically each comment block is followed by some source # code declaration that may need to be kept in the reference. # # Note that markers can alternatively be written as "@MARKER:" # instead of "". All marker identifiers are converted to # lower case during parsing in order to simply sorting. # # We associate with each block the following source lines that do not # begin with a comment. For example, the following: # # /********************************** # * # * blabla # * # */ # # bla_bla_bla # bilip_bilip # # /* - this comment acts as a separator - */ # # blo_blo_blo # # # will only keep the first two lines of sources with # the "blabla" block. # # However, the comment will be kept, with following source lines # if it contains a starting '#' or '@' as in: # # /*@.....*/ # /*#.....*/ # /* @.....*/ # /* #.....*/ # ############################################################################# # # The DocCode class is used to store source code lines # # 'self.lines' contains a set of source code lines that will # be dumped as HTML in a
 tag.
#
#   The object is filled line by line by the parser; it strips the
#   leading "margin" space from each input line before storing it
#   in 'self.lines'.
#
class DocCode:

    def __init__( self, margin = 0 ):
        self.lines  = []
        self.margin = margin


    def add( self, line ):
        # remove margin whitespace
        #
        if string.strip( line[: self.margin] ) == "":
            line = line[self.margin :]
        self.lines.append( line )


    def dump( self ):
        for line in self.lines:
            print "--" + line
        print ""


    def get_identifier( self ):
        # this function should never be called
        #
        return "UNKNOWN_CODE_IDENTIFIER!"


    def dump_html( self, identifiers = None ):
        # clean the last empty lines
        #
        l = len( self.lines ) - 1
        while l > 0 and string.strip( self.lines[l - 1] ) == "":
            l = l - 1

        # the code footer should be directly appended to the last code
        # line to avoid an additional blank line
        #
        sys.stdout.write( code_header )
        for line in self.lines[0 : l]:
            sys.stdout.write( '\n' + line )
        sys.stdout.write( code_footer )



#############################################################################
#
# The DocParagraph is used to store text paragraphs.
# 'self.words' is simply a list of words for the paragraph.
#
# The paragraph is filled line by line by the parser.
#
class DocParagraph:

    def __init__( self ):
        self.words = []


    def add( self, line ):
        # Get rid of unwanted spaces in the paragraph.
        #
        # The following two lines are the same as
        #
        #   self.words.extend( string.split( line ) )
        #
        # but older Python versions don't have the `extend' attribute.
        #
        last = len( self.words )
        self.words[last:last] = string.split( line )


    # This function is used to retrieve the first word of a given
    # paragraph.
    #
    def get_identifier( self ):
        if self.words:
            return self.words[0]

        # should never happen
        #
        return "UNKNOWN_PARA_IDENTIFIER!"


    def get_words( self ):
        return self.words[:]


    def dump( self, identifiers = None ):
        max_width = 50
        cursor    = 0
        line      = ""
        extra     = None
        alphanum  = string.lowercase + string.uppercase + string.digits + '_'

        for word in self.words:
            # process cross references if needed
            #
            if identifiers and word and word[0] == '@':
                word = word[1:]

                # we need to find non-alphanumeric characters
                #
                i = len( word )
                while i > 0 and not word[i - 1] in alphanum:
                    i = i - 1
                    
                if i > 0:
                    extra = word[i:]
                    word  = word[0:i]

                block = identifiers.get( word )
                if block:
                    word = '' + word + ''
                else:
                    word = '?' + word

            if cursor + len( word ) + 1 > max_width:
                print line
                cursor = 0
                line = ""

            line = line + word
            if not extra:
                line = line + " "

            cursor = cursor + len( word ) + 1

            # handle trailing periods, commas, etc. at the end of
            # cross references.
            #
            if extra:
                if cursor + len( extra ) + 1 > max_width:
                    print line
                    cursor = 0
                    line   = ""

                line   = line + extra + " "
                cursor = cursor + len( extra ) + 1
                extra  = None

        if cursor > 0:
            print line

        # print "§" # for debugging only


    def dump_string( self ):
        s     = ""
        space = ""
        for word in self.words:
            s     = s + space + word
            space = " "

        return s


    def dump_html( self, identifiers = None ):
        print para_header
        self.dump( identifiers )
        print para_footer



#############################################################################
#
# DocContent is used to store the content of a given marker.
#
# The "self.items" list contains (field,elements) records, where
# "field" corresponds to a given structure fields or function
# parameter (indicated by a "::"), or NULL for a normal section
# of text/code.
#
# Hence, the following example:
#
#   
#      This is an example of what can be put in a content section,
#
#      A second line of example text.
#
#      x :: A simple test field, with some contents.
#      y :: Even before, this field has some code contents.
#           {
#             y = x+2;
#           }
#
# should be stored as
#
#     [ ( None, [ DocParagraph, DocParagraph] ),
#       ( "x",  [ DocParagraph ] ),
#       ( "y",  [ DocParagraph, DocCode ] ) ]
#
# in 'self.items'.
#
# The DocContent object is entirely built at creation time; you must
# pass a list of input text lines in the "lines_list" parameter.
#
class DocContent:

    def __init__( self, lines_list ):
        self.items  = []
        code_mode   = 0
        code_margin = 0
        text        = []
        paragraph   = None   # represents the current DocParagraph
        code        = None   # represents the current DocCode

        elements    = []     # the list of elements for the current field,
                             # contains DocParagraph or DocCode objects

        field       = None   # the current field

        for aline in lines_list:
            if code_mode == 0:
                line   = string.lstrip( aline )
                l      = len( line )
                margin = len( aline ) - l

                # if the line is empty, this is the end of the current
                # paragraph
                #
                if l == 0 or line == '{':
                    if paragraph:
                        elements.append( paragraph )
                        paragraph = None

                    if line == "":
                        continue

                    code_mode   = 1
                    code_margin = margin
                    code        = None
                    continue

                words = string.split( line )

                # test for a field delimiter on the start of the line, i.e.
                # the token `::'
                #
                if len( words ) >= 2 and words[1] == "::":
                    # start a new field - complete current paragraph if any
                    #
                    if paragraph:
                        elements.append( paragraph )
                        paragraph = None

                    # append previous "field" to self.items
                    #
                    self.items.append( ( field, elements ) )

                    # start new field and elements list
                    #
                    field    = words[0]
                    elements = []
                    words    = words[2 :]

                # append remaining words to current paragraph
                #
                if len( words ) > 0:
                    line = string.join( words )
                    if not paragraph:
                        paragraph = DocParagraph()
                    paragraph.add( line )

            else:
                # we are in code mode...
                #
                line = aline

                # the code block ends with a line that has a single '}' on
                # it that is located at the same column that the opening
                # accolade...
                #
                if line == " " * code_margin + '}':
                    if code:
                        elements.append( code )
                        code = None

                    code_mode   = 0
                    code_margin = 0

                # otherwise, add the line to the current paragraph
                #
                else:
                    if not code:
                        code = DocCode()
                    code.add( line )

        if paragraph:
            elements.append( paragraph )

        if code:
            elements.append( code )

        self.items.append( ( field, elements ) )


    def get_identifier( self ):
        if self.items:
            item = self.items[0]
            for element in item[1]:
                return element.get_identifier()

        # should never happen
        #
        return "UNKNOWN_CONTENT_IDENTIFIER!"


    def get_title( self ):
        if self.items:
            item = self.items[0]
            for element in item[1]:
                return element.dump_string()

        # should never happen
        #
        return "UNKNOWN_CONTENT_TITLE!"


    def dump( self ):
        for item in self.items:
            field = item[0]
            if field:
                print ""

            for element in item[1]:
                element.dump()

            if field:
                print ""


    def dump_html( self, identifiers = None ):
        n        = len( self.items )
        in_table = 0

        for i in range( n ):
            item  = self.items[i]
            field = item[0]

            if not field:
                if in_table:
                    print ""
                    in_table = 0

                for element in item[1]:
                    element.dump_html( identifiers )

            else:
                if not in_table:
                    print "
" in_table = 1 else: print "
" print "" + field + "" for element in item[1]: element.dump_html( identifiers ) if in_table: print "
" def dump_html_in_table( self, identifiers = None ): n = len( self.items ) in_table = 0 for i in range( n ): item = self.items[i] field = item[0] if not field: if item[1]: print "" for element in item[1]: element.dump_html( identifiers ) print "" else: print "" + field + "" for element in item[1]: element.dump_html( identifiers ) print "" ############################################################################# # # # The DocBlock class is used to store a given comment block. It contains # a list of markers, as well as a list of contents for each marker. # # "self.items" is a list of (marker, contents) elements, where # 'marker' is a lowercase marker string, and 'contents' is a DocContent # object. # # "self.source" is simply a list of text lines taken from the # uncommented source itself. # # Finally, "self.name" is a simple identifier used to uniquely identify # the block. It is taken from the first word of the first # paragraph of the first marker of a given block, i.e: # # Goo # Bla bla bla # # will have a name of "Goo" # class DocBlock: def __init__( self, block_line_list = [], source_line_list = [] ): self.items = [] # current ( marker, contents ) list self.section = None # section this block belongs to self.filename = "unknown" # filename defining this block self.lineno = 0 # line number in filename marker = None # current marker content = [] # current content lines list alphanum = string.letters + string.digits + "_" self.name = None for line in block_line_list: line2 = string.lstrip( line ) l = len( line2 ) margin = len( line ) - l if l > 3: ender = None if line2[0] == '<': ender = '>' elif line2[0] == '@': ender = ':' if ender: i = 1 while i < l and line2[i] in alphanum: i = i + 1 if i < l and line2[i] == ender: if marker and content: self.add( marker, content ) marker = line2[1 : i] content = [] line2 = string.lstrip( line2[i + 1 :] ) l = len( line2 ) line = " " * margin + line2 content.append( line ) if marker and content: self.add( marker, content ) self.source = [] if self.items: self.source = source_line_list # now retrieve block name when possible # if self.items: first = self.items[0] self.name = first[1].get_identifier() # This function adds a new element to 'self.items'. # 'marker' is a marker string, or None. # 'lines' is a list of text lines used to compute a list of # DocContent objects. # def add( self, marker, lines ): # remove the first and last empty lines from the content list # l = len( lines ) if l > 0: i = 0 while l > 0 and string.strip( lines[l - 1] ) == "": l = l - 1 while i < l and string.strip( lines[i] ) == "": i = i + 1 lines = lines[i : l] l = len( lines ) # add a new marker only if its marker and its content list # aren't empty # if l > 0 and marker: content = DocContent( lines ) self.items.append( ( string.lower( marker ), content ) ) def find_content( self, marker ): for item in self.items: if ( item[0] == marker ): return item[1] return None def html_address( self ): section = self.section if section and section.filename: return section.filename + '#' + self.name return "" # this block is not in a section? def location( self ): return self.filename + ':' + str( self.lineno ) def dump( self ): for i in range( len( self.items ) ): print "[" + self.items[i][0] + "]" content = self.items[i][1] content.dump() def dump_html( self, identifiers = None ): types = [ 'type', 'struct', 'functype', 'function', 'constant', 'enum', 'macro', 'structure', 'also' ] parameters = [ 'input', 'inout', 'output', 'return' ] if not self.items: return # start of a block # print block_header # place html anchor if needed # if self.name: print '' print "

" + self.name + "

" print "
" # print source code # if not self.source: print block_footer return lines = self.source l = len( lines ) - 1 while l >= 0 and string.strip( lines[l] ) == "": l = l - 1 print source_header print "" for line in lines[0 : l + 1]: print line print source_footer in_table = 0 # dump each (marker,content) element # for element in self.items: marker = element[0] content = element[1] if marker == "description": print description_header content.dump_html( identifiers ) print description_footer elif not ( marker in types ): sys.stdout.write( marker_header ) sys.stdout.write( marker ) sys.stdout.write( marker_inter + '\n' ) content.dump_html( identifiers ) print marker_footer print "" print block_footer ############################################################################# # # The DocSection class is used to store a given documentation section. # # Each section is made of an identifier, an abstract and a description. # # For example, look at: # #
Basic_Data_Types # # FreeType 2 Basic Data Types # # <Abstract> # Definitions of basic FreeType data types # # <Description> # FreeType defines several basic data types for all its # operations... # class DocSection: def __init__( self, block ): self.block = block self.name = string.lower( block.name ) self.abstract = block.find_content( "abstract" ) self.description = block.find_content( "description" ) self.elements = {} self.list = [] self.filename = self.name + ".html" # sys.stderr.write( "new section '" + self.name + "'" ) def add_element( self, block ): # check that we don't have a duplicate element in this # section # if self.elements.has_key( block.name ): sys.stderr.write( "ERROR - duplicate element definition for " + "'" + block.name + "' in section '" + self.name + "'" ) sys.exit() self.elements[ block.name ] = block self.list.append( block ) def dump_html( self, identifiers = None ): """make an HTML page from a given DocSection""" # print HTML header # print html_header # print title # print section_title_header print self.title print section_title_footer # print description # print block_header self.description.dump_html( identifiers ) print block_footer # print elements # for element in self.list: element.dump_html( identifiers ) print html_footer class DocSectionList: def __init__( self ): self.sections = {} # map section names to section objects self.list = [] # list of sections (in creation order) self.current_section = None # current section self.identifiers = {} # map identifiers to blocks def append_section( self, block ): name = string.lower( block.name ) abstract = block.find_content( "abstract" ) if self.sections.has_key( name ): # There is already a section with this name in our # list. We will try to complete it. # section = self.sections[name] if section.abstract: # This section already has an abstract defined; # simply check that the new section doesn't # provide a new one. # if abstract: stderr.write( "ERROR - duplicate section definition" + " for '" + name + "'" ) stderr.write( "previous definition in" + " '" + section.location() ) stderr.write( "second definition in" + " '" + block.location() ) sys.quit() else: # The old section didn't contain an abstract; we are # now going to replace it. # section.abstract = abstract section.description = block.find_content( "description" ) section.block = block else: # a new section # section = DocSection( block ) self.sections[name] = section self.list.append( section ) self.current_section = section def append_block( self, block ): if block.name: section = block.find_content( "section" ) if section: self.append_section( block ) elif self.current_section: self.current_section.add_element( block ) block.section = self.current_section self.identifiers[block.name] = block def prepare_files( self, file_prefix = None ): # prepare the section list, by computing section filenames # and the index # if file_prefix: prefix = file_prefix + "-" else: prefix = "" # compute section names # for section in self.sections.values(): title_content = section.block.find_content( "title" ) if title_content: section.title = title_content.get_title() else: section.title = "UNKNOWN_SECTION_TITLE!" # sort section elements according to the <order> marker when # available for section in self.sections.values(): order = section.block.find_content( "order" ) if order: #sys.stderr.write( "<order> found at "+section.block.location()+'\n' ) order_list = [] for item in order.items: for element in item[1]: words = None try: words = element.get_words() except: sys.stderr.write( "WARNING:" + section.block.location() + ": invalid content in <order> marker\n" ) if words: for word in words: block = self.identifiers.get( word ) if block: if block.section == section: order_list.append( word ) else: sys.stderr.write( "WARNING:" + section.block.location() + ": invalid reference to '"+word+"' defined in other section\n" ) else: sys.stderr.write( "WARNING:" + section.block.location() + ": invalid reference to '"+word+"'\n" ) # now sort the list of blocks according to the order list # new_list = [] old_list = section.list for id in order_list: new_list.append( section.elements[id] ) for block in old_list: if not block.name in order_list: new_list.append( block ) section.list = new_list # compute section filenames # for section in self.sections.values(): section.filename = prefix + section.name + ".html" self.toc_filename = prefix + "toc.html" self.index_filename = prefix + "index.html" # compute the sorted list of identifiers for the index # self.index = self.identifiers.keys() self.index.sort( index_sort ) def dump_html_toc( self ): # dump an html table of contents # old_stdout = sys.stdout new_file = open( self.toc_filename, "w" ) sys.stdout = new_file print html_header print "<center><h1>Table of Contents</h1></center>" print "<center><table cellpadding=5>" for section in self.list: if section.abstract: print "<tr valign=top><td>" sys.stdout.write( '<a href="' + section.filename + '">' ) sys.stdout.write( section.title ) sys.stdout.write( "</a></td><td>" + '\n' ) section.abstract.dump_html( self.identifiers ) print "</td></tr>" print "</table></center>" print html_footer sys.stdout = old_stdout def dump_html_sections( self ): old_stdout = sys.stdout for section in self.sections.values(): if section.filename: new_file = open( section.filename, "w" ) sys.stdout = new_file section.dump_html( self.identifiers ) new_file.close() sys.stdout = old_stdout def dump_html_index( self ): old_stdout = sys.stdout new_file = open( self.index_filename, "w" ) sys.stdout = new_file num_columns = 3 total = len( self.index ) line = 0 print html_header print "<center><h1>General Index</h1></center>" print "<center><table cellpadding=5><tr valign=top><td>" for ident in self.index: block = self.identifiers[ident] if block: sys.stdout.write( '<a href="' + block.html_address() + '">' ) sys.stdout.write( block.name ) sys.stdout.write( '</a><br>' + '\n' ) if line * num_columns >= total: print "</td><td>" line = 0 else: line = line + 1 else: sys.stderr.write( "identifier '" + ident + "' has no definition" + '\n' ) print "</tr></table></center>" print html_footer sys.stdout = old_stdout # Filter a given list of DocBlocks. Returns a new list # of DocBlock objects that only contains element whose # "type" (i.e. first marker) is in the "types" parameter. # def filter_blocks_by_type( block_list, types ): new_list = [] for block in block_list: if block.items: element = block.items[0] marker = element[0] if marker in types: new_list.append( block ) return new_list def filter_section_blocks( block ): return block.section != None # Perform a lexicographical comparison of two DocBlock # objects. Returns -1, 0 or 1. # def block_lexicographical_compare( b1, b2 ): if not b1.name: return -1 if not b2.name: return 1 id1 = string.lower( b1.name ) id2 = string.lower( b2.name ) if id1 < id2: return -1 elif id1 == id2: return 0 else: return 1 # dump a list block as a single HTML page # def dump_html_1( block_list ): print html_header for block in block_list: block.dump_html() print html_footer def file_exists( pathname ): result = 1 try: file = open( pathname, "r" ) file.close() except: result = None return result def add_new_block( list, filename, lineno, block_lines, source_lines ): """add a new block to the list""" block = DocBlock( block_lines, source_lines ) block.filename = filename block.lineno = lineno list.append( block ) def make_block_list(): """parse a file and extract comments blocks from it""" file_list = [] #sys.stderr.write( repr( sys.argv[1:] ) + '\n' ) for pathname in sys.argv[1:]: if string.find( pathname, '*' ) >= 0: newpath = glob.glob( pathname ) newpath.sort() # sort files -- this is important because # of the order of files else: newpath = [pathname] last = len( file_list ) file_list[last:last] = newpath if len( file_list ) == 0: file_list = None else: # now filter the file list to remove non-existing ones file_list = filter( file_exists, file_list ) list = [] block = [] format = 0 lineno = 0 # We use "format" to store the state of our parser: # # 0 - wait for beginning of comment # 1 - parse comment format 1 # 2 - parse comment format 2 # # 4 - wait for beginning of source (or comment ??) # 5 - process source # comment = [] source = [] state = 0 for line in fileinput.input( file_list ): l = len( line ) if l > 0 and line[l - 1] == '\012': line = line[0 : l - 1] # stripped version of the line # line2 = string.strip( line ) l = len( line2 ) # if this line begins with a comment and we are processing some # source, exit to state 0 # # unless we encounter something like: # # /*@..... # /*#..... # # /* @..... # /* #..... # if format >= 4 and l > 2 and line2[0 : 2] == '/*': if l < 4 or ( line2[3] != '@' and line2[3:4] != ' @' and line2[3] != '#' and line2[3:4] != ' #'): add_new_block( list, fileinput.filename(), lineno, block, source ) format = 0 if format == 0: #### wait for beginning of comment #### if l > 3 and line2[0 : 3] == '/**': i = 3 while i < l and line2[i] == '*': i = i + 1 if i == l: # this is '/**' followed by any number of '*', the # beginning of a Format 1 block # block = [] source = [] format = 1 lineno = fileinput.lineno() elif i == l - 1 and line2[i] == '/': # this is '/**' followed by any number of '*', followed # by a '/', i.e. the beginning of a Format 2 or 3 block # block = [] source = [] format = 2 lineno = fileinput.lineno() ############################################################## # # FORMAT 1 # elif format == 1: # If the line doesn't begin with a "*", something went # wrong, and we must exit, and forget the current block. # if l == 0 or line2[0] != '*': block = [] format = 0 # Otherwise, we test for an end of block, which is an # arbitrary number of '*', followed by '/'. # else: i = 1 while i < l and line2[i] == '*': i = i + 1 # test for the end of the block # if i < l and line2[i] == '/': if block != []: format = 4 else: format = 0 else: # otherwise simply append line to current block # block.append( line2[i:] ) continue ############################################################## # # FORMAT 2 # elif format == 2: # If the line doesn't begin with '/*' and end with '*/', # this is the end of the format 2 format. # if l < 4 or line2[: 2] != '/*' or line2[-2 :] != '*/': if block != []: format = 4 else: format = 0 else: # remove the start and end comment delimiters, then # right-strip the line # line2 = string.rstrip( line2[2 : -2] ) # check for end of a format2 block, i.e. a run of '*' # if string.count( line2, '*' ) == l - 4: if block != []: format = 4 else: format = 0 else: # otherwise, add the line to the current block # block.append( line2 ) continue if format >= 4: #### source processing #### if l > 0: format = 5 if format == 5: source.append( line ) if format >= 4: add_new_block( list, fileinput.filename(), lineno, block, source ) return list # This function is only used for debugging # def dump_block_list( list ): """dump a comment block list""" for block in list: print "----------------------------------------" for line in block[0]: print line for line in block[1]: print line print "---------the end-----------------------" def main( argv ): """main program loop""" # we begin by simply building a list of DocBlock elements # sys.stderr.write( "extracting comment blocks from sources...\n" ) list = make_block_list() # now, sort the blocks into sections # section_list = DocSectionList() for block in list: section_list.append_block( block ) section_list.prepare_files( "ft2" ) # dump the section list TOC and sections # section_list.dump_html_toc() section_list.dump_html_sections() section_list.dump_html_index() # list2 = filter_blocks( list, ['type','macro','enum','constant','functype'] ) # list2 = list # list2.sort( block_lexicographical_compare ) # dump_html_1( list2 ) # dump_doc_blocks( list ) # dump_block_lists( list ) # dump_html_1( list ) # If called from the command line # if __name__ == '__main__': main( sys.argv ) # eof