source: trunk/packages/xen-common/xen-common/docs/pythfilter.py @ 95

Last change on this file since 95 was 34, checked in by hartmans, 17 years ago

Add xen and xen-common

  • Property svn:mime-type set to text/script
File size: 21.1 KB
Line 
1#!/usr/bin/env python
2
3# pythfilter.py v1.5.5, written by Matthias Baas (baas@ira.uka.de)
4
5# Doxygen filter which can be used to document Python source code.
6# Classes (incl. methods) and functions can be documented.
7# Every comment that begins with ## is literally turned into an
8# Doxygen comment. Consecutive comment lines are turned into
9# comment blocks (-> /** ... */).
10# All the stuff is put inside a namespace with the same name as
11# the source file.
12
13# Conversions:
14# ============
15# ##-blocks                  ->  /** ... */
16# "class name(base): ..."    ->  "class name : public base {...}"
17# "def name(params): ..."    ->  "name(params) {...}"
18
19# Changelog:
20# 21.01.2003: Raw (r"") or unicode (u"") doc string will now be properly
21#             handled. (thanks to Richard Laager for the patch)
22# 22.12.2003: Fixed a bug where no function names would be output for "def"
23#             blocks that were not in a class.
24#             (thanks to Richard Laager for the patch)
25# 12.12.2003: Implemented code to handle static and class methods with
26#             this logic: Methods with "self" as the first argument are
27#             non-static. Methods with "cls" are Python class methods,
28#             which translate into static methods for Doxygen. Other
29#             methods are assumed to be static methods. As should be
30#             obvious, this logic doesn't take into account if the method
31#             is actually setup as a classmethod() or a staticmethod(),
32#             just if it follows the normal conventions.
33#             (thanks to Richard Laager for the patch)
34# 11.12.2003: Corrected #includes to use os.path.sep instead of ".". Corrected
35#             namespace code to use "::" instead of ".".
36#             (thanks to Richard Laager for the patch)
37# 11.12.2003: Methods beginning with two underscores that end with
38#             something other than two underscores are considered private
39#             and are handled accordingly.
40#             (thanks to Richard Laager for the patch)
41# 03.12.2003: The first parameter of class methods (self) is removed from
42#             the documentation.
43# 03.11.2003: The module docstring will be used as namespace documentation
44#             (thanks to Joe Bronkema for the patch)
45# 08.07.2003: Namespaces get a default documentation so that the namespace
46#             and its contents will show up in the generated documentation.
47# 05.02.2003: Directories will be delted during synchronization.
48# 31.01.2003: -f option & filtering entire directory trees.
49# 10.08.2002: In base classes the '.' will be replaced by '::'
50# 18.07.2002: * and ** will be translated into arguments
51# 18.07.2002: Argument lists may contain default values using constructors.
52# 18.06.2002: Support for ## public:
53# 21.01.2002: from ... import will be translated to "using namespace ...;"
54#             TODO: "from ... import *" vs "from ... import names"
55#             TODO: Using normal imports: name.name -> name::name
56# 20.01.2002: #includes will be placed in front of the namespace
57
58######################################################################
59
60# The program is written as a state machine with the following states:
61#
62# - OUTSIDE               The current position is outside any comment,
63#                         class definition or function.
64#
65# - BUILD_COMMENT         Begins with first "##".
66#                         Ends with the first token that is no "##"
67#                         at the same column as before.
68#
69# - BUILD_CLASS_DECL      Begins with "class".
70#                         Ends with ":"
71# - BUILD_CLASS_BODY      Begins just after BUILD_CLASS_DECL.
72#                         The first following token (which is no comment)
73#                         determines indentation depth.
74#                         Ends with a token that has a smaller indendation.
75#
76# - BUILD_DEF_DECL        Begins with "def".
77#                         Ends with ":".
78# - BUILD_DEF_BODY        Begins just after BUILD_DEF_DECL.
79#                         The first following token (which is no comment)
80#                         determines indentation depth.
81#                         Ends with a token that has a smaller indendation.
82
83import getopt
84import glob
85import os.path
86import re
87import shutil
88import string
89import sys
90import token
91import tokenize
92
93from stat import *
94
95OUTSIDE          = 0
96BUILD_COMMENT    = 1
97BUILD_CLASS_DECL = 2
98BUILD_CLASS_BODY = 3
99BUILD_DEF_DECL   = 4
100BUILD_DEF_BODY   = 5
101IMPORT           = 6
102IMPORT_OP        = 7
103IMPORT_APPEND    = 8
104
105# Output file stream
106outfile = sys.stdout
107
108# Output buffer
109outbuffer = []
110
111out_row = 1
112out_col = 0
113
114# Variables used by rec_name_n_param()
115name         = ""
116param        = ""
117doc_string   = ""
118record_state = 0
119bracket_counter = 0
120
121# Tuple: (row,column)
122class_spos  = (0,0)
123def_spos    = (0,0)
124import_spos = (0,0)
125
126# Which import was used? ("import" or "from")
127import_token = ""
128
129# Comment block buffer
130comment_block = []
131comment_finished = 0
132
133# Imported modules
134modules = []
135
136# Program state
137stateStack = [OUTSIDE]
138
139# Keep track of whether module has a docstring
140module_has_docstring = False
141
142# Keep track of member protection
143protection_level = "public"
144private_member = False
145
146# Keep track of the module namespace
147namespace = ""
148
149######################################################################
150# Output string s. '\n' may only be at the end of the string (not
151# somewhere in the middle).
152#
153# In: s    - String
154#     spos - Startpos
155######################################################################
156def output(s,spos, immediate=0):
157    global outbuffer, out_row, out_col, outfile
158
159    os = string.rjust(s,spos[1]-out_col+len(s))
160
161    if immediate:
162        outfile.write(os)
163    else:
164        outbuffer.append(os)
165
166    assert -1 == string.find(s[0:-2], "\n"), s
167
168    if (s[-1:]=="\n"):
169        out_row = out_row+1
170        out_col = 0
171    else:
172        out_col = spos[1]+len(s)
173
174
175######################################################################
176# Records a name and parameters. The name is either a class name or
177# a function name. Then the parameter is either the base class or
178# the function parameters.
179# The name is stored in the global variable "name", the parameters
180# in "param".
181# The variable "record_state" holds the current state of this internal
182# state machine.
183# The recording is started by calling start_recording().
184#
185# In: type, tok
186######################################################################
187def rec_name_n_param(type, tok):
188    global record_state,name,param,doc_string,bracket_counter
189    s = record_state
190    # State 0: Do nothing.
191    if   (s==0):
192         return
193    # State 1: Remember name.
194    elif (s==1):
195        name = tok
196        record_state = 2
197    # State 2: Wait for opening bracket or colon
198    elif (s==2):
199        if (tok=='('):
200            bracket_counter = 1
201            record_state=3
202        if (tok==':'): record_state=4
203    # State 3: Store parameter (or base class) and wait for an ending bracket
204    elif (s==3):
205        if (tok=='*' or tok=='**'):
206            tok=''
207        if (tok=='('):
208            bracket_counter = bracket_counter+1
209        if (tok==')'):
210            bracket_counter = bracket_counter-1
211        if bracket_counter==0:
212            record_state=4
213        else:
214            param=param+tok
215    # State 4: Look for doc string
216    elif (s==4):
217        if (type==token.NEWLINE or type==token.INDENT or type==token.SLASHEQUAL):
218            return
219        elif (tok==":"):
220            return
221        elif (type==token.STRING):
222            while tok[:1]=='r' or tok[:1]=='u':
223                tok=tok[1:]
224            while tok[:1]=='"':
225                tok=tok[1:]
226            while tok[-1:]=='"':
227                tok=tok[:-1]
228            doc_string=tok
229        record_state=0
230
231######################################################################
232# Starts the recording of a name & param part.
233# The function rec_name_n_param() has to be fed with tokens. After
234# the necessary tokens are fed the name and parameters can be found
235# in the global variables "name" und "param".
236######################################################################
237def start_recording():
238    global record_state,param,name, doc_string
239    record_state=1
240    name=""
241    param=""
242    doc_string=""
243
244######################################################################
245# Test if recording is finished
246######################################################################
247def is_recording_finished():
248    global record_state
249    return record_state==0
250
251######################################################################
252## Gather comment block
253######################################################################
254def gather_comment(type,tok,spos):
255    global comment_block,comment_finished
256    if (type!=tokenize.COMMENT):
257        comment_finished = 1
258    else:
259        # Output old comment block if a new one is started.
260        if (comment_finished):
261            print_comment(spos)
262            comment_finished=0
263        if (tok[0:2]=="##" and tok[0:3]!="###"):
264            append_comment_lines(tok[2:])
265
266######################################################################
267## Output comment block and empty buffer.
268######################################################################
269def print_comment(spos):
270    global comment_block,comment_finished
271    if (comment_block!=[]):
272        output("/** ",spos)
273        for c in comment_block:
274            output(c,spos)
275        output("*/\n",spos)
276    comment_block    = []
277    comment_finished = 0
278
279######################################################################
280def set_state(s):
281    global stateStack
282    stateStack[len(stateStack)-1]=s
283
284######################################################################
285def get_state():
286    global stateStack
287    return stateStack[len(stateStack)-1]
288
289######################################################################
290def push_state(s):
291    global stateStack
292    stateStack.append(s)
293
294######################################################################
295def pop_state():
296    global stateStack
297    stateStack.pop()
298
299
300######################################################################
301def tok_eater(type, tok, spos, epos, line):
302    global stateStack,name,param,class_spos,def_spos,import_spos
303    global doc_string, modules, import_token, module_has_docstring
304    global protection_level, private_member
305    global out_row
306
307    while out_row + 1 < spos[0]:
308        output("\n", (0, 0))
309
310    rec_name_n_param(type,tok)
311    if (string.replace(string.strip(tok)," ","")=="##private:"):
312         protection_level = "private"
313         output("private:\n",spos)
314    elif (string.replace(string.strip(tok)," ","")=="##protected:"):
315         protection_level = "protected"
316         output("protected:\n",spos)
317    elif (string.replace(string.strip(tok)," ","")=="##public:"):
318         protection_level = "public"
319         output("public:\n",spos)
320    else:
321         gather_comment(type,tok,spos)
322
323    state = get_state()
324
325#    sys.stderr.write("%d: %s\n"%(state, tok))
326
327    # OUTSIDE
328    if   (state==OUTSIDE):
329        if  (tok=="class"):
330            start_recording()
331            class_spos = spos
332            push_state(BUILD_CLASS_DECL)
333        elif (tok=="def"):
334            start_recording()
335            def_spos = spos
336            push_state(BUILD_DEF_DECL)
337        elif (tok=="import") or (tok=="from"):
338            import_token = tok
339            import_spos = spos
340            modules     = []
341            push_state(IMPORT)
342        elif (spos[1] == 0 and tok[:3] == '"""'):
343            # Capture module docstring as namespace documentation
344            module_has_docstring = True
345            append_comment_lines("\\namespace %s\n" % namespace)
346            append_comment_lines(tok[3:-3])
347            print_comment(spos)
348
349    # IMPORT
350    elif (state==IMPORT):
351        if (type==token.NAME):
352            modules.append(tok)
353            set_state(IMPORT_OP)
354    # IMPORT_OP
355    elif (state==IMPORT_OP):
356        if (tok=="."):
357            set_state(IMPORT_APPEND)
358        elif (tok==","):
359            set_state(IMPORT)
360        else:
361            for m in modules:
362                output('#include "'+m.replace('.',os.path.sep)+'.py"\n', import_spos, immediate=1)
363                if import_token=="from":
364                    output('using namespace '+m.replace('.', '::')+';\n', import_spos)
365            pop_state()
366    # IMPORT_APPEND
367    elif (state==IMPORT_APPEND):
368        if (type==token.NAME):
369            modules[len(modules)-1]+="."+tok
370            set_state(IMPORT_OP)
371    # BUILD_CLASS_DECL
372    elif (state==BUILD_CLASS_DECL):
373        if (is_recording_finished()):
374            s = "class "+name
375            if (param!=""): s = s+" : public "+param.replace('.','::')
376            if (doc_string!=""):
377                append_comment_lines(doc_string)
378            print_comment(class_spos)
379            output(s+"\n",class_spos)
380            output("{\n",(class_spos[0]+1,class_spos[1]))
381            protection_level = "public"
382            output("  public:\n",(class_spos[0]+2,class_spos[1]))
383            set_state(BUILD_CLASS_BODY)
384    # BUILD_CLASS_BODY
385    elif (state==BUILD_CLASS_BODY):
386        if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and
387            type!=tokenize.NL and type!=tokenize.COMMENT and
388            (spos[1]<=class_spos[1])):
389            output("}; // end of class\n",(out_row+1,class_spos[1]))
390            pop_state()
391        elif (tok=="def"):
392            start_recording()
393            def_spos = spos
394            push_state(BUILD_DEF_DECL)
395    # BUILD_DEF_DECL
396    elif (state==BUILD_DEF_DECL):
397        if (is_recording_finished()):
398            param = param.replace("\n", " ")
399            param = param.replace("=", " = ")
400            params = param.split(",")
401            if BUILD_CLASS_BODY in stateStack:
402                if len(name) > 1 \
403                   and name[0:2] == '__' \
404                   and name[len(name)-2:len(name)] != '__' \
405                   and protection_level != 'private':
406                       private_member = True
407                       output("  private:\n",(def_spos[0]+2,def_spos[1]))
408
409            if (doc_string != ""):
410                append_comment_lines(doc_string)
411
412            print_comment(def_spos)
413
414            output_function_decl(name, params)
415#       output("{\n",(def_spos[0]+1,def_spos[1]))
416            set_state(BUILD_DEF_BODY)
417    # BUILD_DEF_BODY
418    elif (state==BUILD_DEF_BODY):
419        if (type!=token.INDENT and type!=token.NEWLINE \
420            and type!=40 and type!=tokenize.NL \
421            and (spos[1]<=def_spos[1])):
422#            output("} // end of method/function\n",(out_row+1,def_spos[1]))
423            if private_member and protection_level != 'private':
424                private_member = False
425                output("  " + protection_level + ":\n",(def_spos[0]+2,def_spos[1]))
426            pop_state()
427#       else:
428#            output(tok,spos)
429
430
431def output_function_decl(name, params):
432    global def_spos
433
434    # Do we document a class method? then remove the 'self' parameter
435    if params[0] == 'self':
436        preamble = ''
437        params = params[1:]
438    else:
439        preamble = 'static '
440        if params[0] == 'cls':
441            params = params[1:]
442
443    param_string = string.join(params, ", Type ")
444
445    if param_string == '':
446        param_string = '(' + param_string + ');\n'
447    else:
448        param_string = '(Type ' + param_string + ');\n'
449
450    output(preamble, def_spos)
451    output(name, def_spos)
452    output(param_string, def_spos)
453
454
455def append_comment_lines(lines):
456    map(append_comment_line, doc_string.split('\n'))
457
458paramRE = re.compile(r'(@param \w+):')
459
460def append_comment_line(line):
461    global paramRE
462   
463    comment_block.append(paramRE.sub(r'\1', line) + '\n')
464
465def dump(filename):
466    f = open(filename)
467    r = f.readlines()
468    for s in r:
469        sys.stdout.write(s)
470
471def filter(filename):
472    global name, module_has_docstring, source_root
473
474    path,name = os.path.split(filename)
475    root,ext  = os.path.splitext(name)
476
477    if source_root and path.find(source_root) == 0:
478        path = path[len(source_root):]
479
480        if path[0] == os.sep:
481            path = path[1:]
482
483        ns = path.split(os.sep)
484    else:
485        ns = []
486
487    ns.append(root)
488
489    for n in ns:
490        output("namespace " + n + " {\n",(0,0))
491
492    # set module name for tok_eater to use if there's a module doc string
493    name = root
494
495#    sys.stderr.write('Filtering "'+filename+'"...')
496    f = open(filename)
497    tokenize.tokenize(f.readline, tok_eater)
498    f.close()
499    print_comment((0,0))
500
501    output("\n",(0,0))
502   
503    for n in ns:
504        output("}  // end of namespace\n",(0,0))
505
506    if not module_has_docstring:
507        # Put in default namespace documentation
508        output('/** \\namespace '+root+' \n',(0,0))
509        output('    \\brief Module "%s" */\n'%(root),(0,0))
510
511    for s in outbuffer:
512        outfile.write(s)
513
514
515def filterFile(filename, out=sys.stdout):
516    global outfile
517
518    outfile = out
519
520    try:
521        root,ext  = os.path.splitext(filename)
522
523        if ext==".py":
524            filter(filename)
525        else:
526            dump(filename)
527
528#        sys.stderr.write("OK\n")
529    except IOError,e:
530        sys.stderr.write(e[1]+"\n")
531
532
533######################################################################
534
535# preparePath
536def preparePath(path):
537    """Prepare a path.
538
539    Checks if the path exists and creates it if it does not exist.
540    """
541    if not os.path.exists(path):
542        parent = os.path.dirname(path)
543        if parent!="":
544            preparePath(parent)
545        os.mkdir(path)
546
547# isNewer
548def isNewer(file1,file2):
549    """Check if file1 is newer than file2.
550
551    file1 must be an existing file.
552    """
553    if not os.path.exists(file2):
554        return True
555    return os.stat(file1)[ST_MTIME]>os.stat(file2)[ST_MTIME]
556
557# convert
558def convert(srcpath, destpath):
559    """Convert a Python source tree into a C+ stub tree.
560
561    All *.py files in srcpath (including sub-directories) are filtered
562    and written to destpath. If destpath exists, only the files
563    that have been modified are filtered again. Files that were deleted
564    from srcpath are also deleted in destpath if they are still present.
565    The function returns the number of processed *.py files.
566    """
567    count=0
568    sp = os.path.join(srcpath,"*")
569    sfiles = glob.glob(sp)
570    dp = os.path.join(destpath,"*")
571    dfiles = glob.glob(dp)
572    leftovers={}
573    for df in dfiles:
574        leftovers[os.path.basename(df)]=1
575
576    for srcfile in sfiles:
577        basename = os.path.basename(srcfile)
578        if basename in leftovers:
579            del leftovers[basename]
580
581        # Is it a subdirectory?
582        if os.path.isdir(srcfile):
583            sdir = os.path.join(srcpath,basename)
584            ddir = os.path.join(destpath,basename)
585            count+=convert(sdir, ddir)
586            continue
587        # Check the extension (only *.py will be converted)
588        root, ext = os.path.splitext(srcfile)
589        if ext.lower()!=".py":
590            continue
591
592        destfile = os.path.join(destpath,basename)
593        if destfile==srcfile:
594            print "WARNING: Input and output names are identical!"
595            sys.exit(1)
596
597        count+=1
598#        sys.stdout.write("%s\015"%(srcfile))
599
600        if isNewer(srcfile, destfile):
601            preparePath(os.path.dirname(destfile))
602#            out=open(destfile,"w")
603#            filterFile(srcfile, out)
604#            out.close()
605            os.system("python %s -f %s>%s"%(sys.argv[0],srcfile,destfile))
606
607    # Delete obsolete files in destpath
608    for df in leftovers:
609        dname=os.path.join(destpath,df)
610        if os.path.isdir(dname):
611            try:
612                shutil.rmtree(dname)
613            except:
614                print "Can't remove obsolete directory '%s'"%dname
615        else:
616            try:
617                os.remove(dname)
618            except:
619                print "Can't remove obsolete file '%s'"%dname
620
621    return count
622
623
624######################################################################
625######################################################################
626######################################################################
627
628filter_file = False
629source_root = None
630
631try:
632    opts, args = getopt.getopt(sys.argv[1:], "hfr:", ["help"])
633except getopt.GetoptError,e:
634    print e
635    sys.exit(1)
636
637for o,a in opts:
638    if o=="-f":
639        filter_file = True
640
641    if o=="-r":
642        source_root = os.path.abspath(a)
643
644if filter_file:
645    # Filter the specified file and print the result to stdout
646    filename = string.join(args)
647    filterFile(os.path.abspath(filename))
648else:
649
650    if len(args)!=2:
651        sys.stderr.write("%s options input output\n"%(os.path.basename(sys.argv[0])))
652        sys.exit(1)
653
654    # Filter an entire Python source tree
655    print '"%s" -> "%s"\n'%(args[0],args[1])
656    c=convert(args[0],args[1])
657    print "%d files"%(c)
658
Note: See TracBrowser for help on using the repository browser.