1 |
#!/usr/bin/env python |
2 |
|
3 |
# pythfilter.py v1.5.5, written by Matthias Baas (baas@ira.uka.de) |
4 |
|
5 |
# Doxygen filter which can be used to document Python source code. |
6 |
# Classes (incl. methods) and functions can be documented. |
7 |
# Every comment that begins with ## is literally turned into an |
8 |
# Doxygen comment. Consecutive comment lines are turned into |
9 |
# comment blocks (-> /** ... */). |
10 |
# All the stuff is put inside a namespace with the same name as |
11 |
# the source file. |
12 |
|
13 |
# Conversions: |
14 |
# ============ |
15 |
# ##-blocks -> /** ... */ |
16 |
# "class name(base): ..." -> "class name : public base {...}" |
17 |
# "def name(params): ..." -> "name(params) {...}" |
18 |
|
19 |
# Changelog: |
20 |
# 21.01.2003: Raw (r"") or unicode (u"") doc string will now be properly |
21 |
# handled. (thanks to Richard Laager for the patch) |
22 |
# 22.12.2003: Fixed a bug where no function names would be output for "def" |
23 |
# blocks that were not in a class. |
24 |
# (thanks to Richard Laager for the patch) |
25 |
# 12.12.2003: Implemented code to handle static and class methods with |
26 |
# this logic: Methods with "self" as the first argument are |
27 |
# non-static. Methods with "cls" are Python class methods, |
28 |
# which translate into static methods for Doxygen. Other |
29 |
# methods are assumed to be static methods. As should be |
30 |
# obvious, this logic doesn't take into account if the method |
31 |
# is actually setup as a classmethod() or a staticmethod(), |
32 |
# just if it follows the normal conventions. |
33 |
# (thanks to Richard Laager for the patch) |
34 |
# 11.12.2003: Corrected #includes to use os.path.sep instead of ".". Corrected |
35 |
# namespace code to use "::" instead of ".". |
36 |
# (thanks to Richard Laager for the patch) |
37 |
# 11.12.2003: Methods beginning with two underscores that end with |
38 |
# something other than two underscores are considered private |
39 |
# and are handled accordingly. |
40 |
# (thanks to Richard Laager for the patch) |
41 |
# 03.12.2003: The first parameter of class methods (self) is removed from |
42 |
# the documentation. |
43 |
# 03.11.2003: The module docstring will be used as namespace documentation |
44 |
# (thanks to Joe Bronkema for the patch) |
45 |
# 08.07.2003: Namespaces get a default documentation so that the namespace |
46 |
# and its contents will show up in the generated documentation. |
47 |
# 05.02.2003: Directories will be delted during synchronization. |
48 |
# 31.01.2003: -f option & filtering entire directory trees. |
49 |
# 10.08.2002: In base classes the '.' will be replaced by '::' |
50 |
# 18.07.2002: * and ** will be translated into arguments |
51 |
# 18.07.2002: Argument lists may contain default values using constructors. |
52 |
# 18.06.2002: Support for ## public: |
53 |
# 21.01.2002: from ... import will be translated to "using namespace ...;" |
54 |
# TODO: "from ... import *" vs "from ... import names" |
55 |
# TODO: Using normal imports: name.name -> name::name |
56 |
# 20.01.2002: #includes will be placed in front of the namespace |
57 |
|
58 |
###################################################################### |
59 |
|
60 |
# The program is written as a state machine with the following states: |
61 |
# |
62 |
# - OUTSIDE The current position is outside any comment, |
63 |
# class definition or function. |
64 |
# |
65 |
# - BUILD_COMMENT Begins with first "##". |
66 |
# Ends with the first token that is no "##" |
67 |
# at the same column as before. |
68 |
# |
69 |
# - BUILD_CLASS_DECL Begins with "class". |
70 |
# Ends with ":" |
71 |
# - BUILD_CLASS_BODY Begins just after BUILD_CLASS_DECL. |
72 |
# The first following token (which is no comment) |
73 |
# determines indentation depth. |
74 |
# Ends with a token that has a smaller indendation. |
75 |
# |
76 |
# - BUILD_DEF_DECL Begins with "def". |
77 |
# Ends with ":". |
78 |
# - BUILD_DEF_BODY Begins just after BUILD_DEF_DECL. |
79 |
# The first following token (which is no comment) |
80 |
# determines indentation depth. |
81 |
# Ends with a token that has a smaller indendation. |
82 |
|
83 |
import getopt |
84 |
import glob |
85 |
import os.path |
86 |
import shutil |
87 |
import string |
88 |
import sys |
89 |
import token |
90 |
import tokenize |
91 |
|
92 |
from stat import * |
93 |
|
94 |
OUTSIDE = 0 |
95 |
BUILD_COMMENT = 1 |
96 |
BUILD_CLASS_DECL = 2 |
97 |
BUILD_CLASS_BODY = 3 |
98 |
BUILD_DEF_DECL = 4 |
99 |
BUILD_DEF_BODY = 5 |
100 |
IMPORT = 6 |
101 |
IMPORT_OP = 7 |
102 |
IMPORT_APPEND = 8 |
103 |
|
104 |
# Output file stream |
105 |
outfile = sys.stdout |
106 |
|
107 |
# Output buffer |
108 |
outbuffer = [] |
109 |
|
110 |
out_row = 0 |
111 |
out_col = 0 |
112 |
|
113 |
# Variables used by rec_name_n_param() |
114 |
name = "" |
115 |
param = "" |
116 |
doc_string = "" |
117 |
record_state = 0 |
118 |
bracket_counter = 0 |
119 |
|
120 |
# Tuple: (row,column) |
121 |
class_spos = (0,0) |
122 |
def_spos = (0,0) |
123 |
import_spos = (0,0) |
124 |
|
125 |
# Which import was used? ("import" or "from") |
126 |
import_token = "" |
127 |
|
128 |
# Comment block buffer |
129 |
comment_block = [] |
130 |
comment_finished = 0 |
131 |
|
132 |
# Imported modules |
133 |
modules = [] |
134 |
|
135 |
# Program state |
136 |
stateStack = [OUTSIDE] |
137 |
|
138 |
# Keep track of whether module has a docstring |
139 |
module_has_docstring = False |
140 |
|
141 |
# Keep track of member protection |
142 |
protection_level = "public" |
143 |
private_member = False |
144 |
|
145 |
# Keep track of the module namespace |
146 |
namespace = "" |
147 |
|
148 |
###################################################################### |
149 |
# Output string s. '\n' may only be at the end of the string (not |
150 |
# somewhere in the middle). |
151 |
# |
152 |
# In: s - String |
153 |
# spos - Startpos |
154 |
###################################################################### |
155 |
def output(s,spos, immediate=0): |
156 |
global outbuffer, out_row, out_col, outfile |
157 |
|
158 |
os = string.rjust(s,spos[1]-out_col+len(s)) |
159 |
if immediate: |
160 |
outfile.write(os) |
161 |
else: |
162 |
outbuffer.append(os) |
163 |
if (s[-1:]=="\n"): |
164 |
out_row = out_row+1 |
165 |
out_col = 0 |
166 |
else: |
167 |
out_col = spos[1]+len(s) |
168 |
|
169 |
|
170 |
###################################################################### |
171 |
# Records a name and parameters. The name is either a class name or |
172 |
# a function name. Then the parameter is either the base class or |
173 |
# the function parameters. |
174 |
# The name is stored in the global variable "name", the parameters |
175 |
# in "param". |
176 |
# The variable "record_state" holds the current state of this internal |
177 |
# state machine. |
178 |
# The recording is started by calling start_recording(). |
179 |
# |
180 |
# In: type, tok |
181 |
###################################################################### |
182 |
def rec_name_n_param(type, tok): |
183 |
global record_state,name,param,doc_string,bracket_counter |
184 |
s = record_state |
185 |
# State 0: Do nothing. |
186 |
if (s==0): |
187 |
return |
188 |
# State 1: Remember name. |
189 |
elif (s==1): |
190 |
name = tok |
191 |
record_state = 2 |
192 |
# State 2: Wait for opening bracket or colon |
193 |
elif (s==2): |
194 |
if (tok=='('): |
195 |
bracket_counter = 1 |
196 |
record_state=3 |
197 |
if (tok==':'): record_state=4 |
198 |
# State 3: Store parameter (or base class) and wait for an ending bracket |
199 |
elif (s==3): |
200 |
if (tok=='*' or tok=='**'): |
201 |
tok='' |
202 |
if (tok=='('): |
203 |
bracket_counter = bracket_counter+1 |
204 |
if (tok==')'): |
205 |
bracket_counter = bracket_counter-1 |
206 |
if bracket_counter==0: |
207 |
record_state=4 |
208 |
else: |
209 |
param=param+tok |
210 |
# State 4: Look for doc string |
211 |
elif (s==4): |
212 |
if (type==token.NEWLINE or type==token.INDENT or type==token.SLASHEQUAL): |
213 |
return |
214 |
elif (tok==":"): |
215 |
return |
216 |
elif (type==token.STRING): |
217 |
while tok[:1]=='r' or tok[:1]=='u': |
218 |
tok=tok[1:] |
219 |
while tok[:1]=='"': |
220 |
tok=tok[1:] |
221 |
while tok[-1:]=='"': |
222 |
tok=tok[:-1] |
223 |
doc_string=tok |
224 |
record_state=0 |
225 |
|
226 |
###################################################################### |
227 |
# Starts the recording of a name & param part. |
228 |
# The function rec_name_n_param() has to be fed with tokens. After |
229 |
# the necessary tokens are fed the name and parameters can be found |
230 |
# in the global variables "name" und "param". |
231 |
###################################################################### |
232 |
def start_recording(): |
233 |
global record_state,param,name, doc_string |
234 |
record_state=1 |
235 |
name="" |
236 |
param="" |
237 |
doc_string="" |
238 |
|
239 |
###################################################################### |
240 |
# Test if recording is finished |
241 |
###################################################################### |
242 |
def is_recording_finished(): |
243 |
global record_state |
244 |
return record_state==0 |
245 |
|
246 |
###################################################################### |
247 |
## Gather comment block |
248 |
###################################################################### |
249 |
def gather_comment(type,tok,spos): |
250 |
global comment_block,comment_finished |
251 |
if (type!=tokenize.COMMENT): |
252 |
comment_finished = 1 |
253 |
else: |
254 |
# Output old comment block if a new one is started. |
255 |
if (comment_finished): |
256 |
print_comment(spos) |
257 |
comment_finished=0 |
258 |
if (tok[0:2]=="##" and tok[0:3]!="###"): |
259 |
comment_block.append(tok[2:]) |
260 |
|
261 |
###################################################################### |
262 |
## Output comment block and empty buffer. |
263 |
###################################################################### |
264 |
def print_comment(spos): |
265 |
global comment_block,comment_finished |
266 |
if (comment_block!=[]): |
267 |
output("/**\n",spos) |
268 |
for c in comment_block: |
269 |
output(c,spos) |
270 |
output("*/\n",spos) |
271 |
comment_block = [] |
272 |
comment_finished = 0 |
273 |
|
274 |
###################################################################### |
275 |
def set_state(s): |
276 |
global stateStack |
277 |
stateStack[len(stateStack)-1]=s |
278 |
|
279 |
###################################################################### |
280 |
def get_state(): |
281 |
global stateStack |
282 |
return stateStack[len(stateStack)-1] |
283 |
|
284 |
###################################################################### |
285 |
def push_state(s): |
286 |
global stateStack |
287 |
stateStack.append(s) |
288 |
|
289 |
###################################################################### |
290 |
def pop_state(): |
291 |
global stateStack |
292 |
stateStack.pop() |
293 |
|
294 |
|
295 |
###################################################################### |
296 |
def tok_eater(type, tok, spos, epos, line): |
297 |
global stateStack,name,param,class_spos,def_spos,import_spos |
298 |
global doc_string, modules, import_token, module_has_docstring |
299 |
global protection_level, private_member |
300 |
|
301 |
rec_name_n_param(type,tok) |
302 |
if (string.replace(string.strip(tok)," ","")=="##private:"): |
303 |
protection_level = "private" |
304 |
output("private:\n",spos) |
305 |
elif (string.replace(string.strip(tok)," ","")=="##protected:"): |
306 |
protection_level = "protected" |
307 |
output("protected:\n",spos) |
308 |
elif (string.replace(string.strip(tok)," ","")=="##public:"): |
309 |
protection_level = "public" |
310 |
output("public:\n",spos) |
311 |
else: |
312 |
gather_comment(type,tok,spos) |
313 |
|
314 |
state = get_state() |
315 |
|
316 |
# sys.stderr.write("%d: %s\n"%(state, tok)) |
317 |
|
318 |
# OUTSIDE |
319 |
if (state==OUTSIDE): |
320 |
if (tok=="class"): |
321 |
start_recording() |
322 |
class_spos = spos |
323 |
push_state(BUILD_CLASS_DECL) |
324 |
elif (tok=="def"): |
325 |
start_recording() |
326 |
def_spos = spos |
327 |
push_state(BUILD_DEF_DECL) |
328 |
elif (tok=="import") or (tok=="from"): |
329 |
import_token = tok |
330 |
import_spos = spos |
331 |
modules = [] |
332 |
push_state(IMPORT) |
333 |
elif (spos[1] == 0 and tok[:3] == '"""'): |
334 |
# Capture module docstring as namespace documentation |
335 |
module_has_docstring = True |
336 |
comment_block.append("\\namespace %s\n" % namespace) |
337 |
comment_block.append(tok[3:-3]) |
338 |
print_comment(spos) |
339 |
|
340 |
# IMPORT |
341 |
elif (state==IMPORT): |
342 |
if (type==token.NAME): |
343 |
modules.append(tok) |
344 |
set_state(IMPORT_OP) |
345 |
# IMPORT_OP |
346 |
elif (state==IMPORT_OP): |
347 |
if (tok=="."): |
348 |
set_state(IMPORT_APPEND) |
349 |
elif (tok==","): |
350 |
set_state(IMPORT) |
351 |
else: |
352 |
for m in modules: |
353 |
output('#include "'+m.replace('.',os.path.sep)+'.py"\n', import_spos, immediate=1) |
354 |
if import_token=="from": |
355 |
output('using namespace '+m.replace('.', '::')+';\n', import_spos) |
356 |
pop_state() |
357 |
# IMPORT_APPEND |
358 |
elif (state==IMPORT_APPEND): |
359 |
if (type==token.NAME): |
360 |
modules[len(modules)-1]+="."+tok |
361 |
set_state(IMPORT_OP) |
362 |
# BUILD_CLASS_DECL |
363 |
elif (state==BUILD_CLASS_DECL): |
364 |
if (is_recording_finished()): |
365 |
s = "class "+name |
366 |
if (param!=""): s = s+" : public "+param.replace('.','::') |
367 |
if (doc_string!=""): comment_block.append(doc_string) |
368 |
print_comment(class_spos) |
369 |
output(s+"\n",class_spos) |
370 |
output("{\n",(class_spos[0]+1,class_spos[1])) |
371 |
protection_level = "public" |
372 |
output(" public:\n",(class_spos[0]+2,class_spos[1])) |
373 |
set_state(BUILD_CLASS_BODY) |
374 |
# BUILD_CLASS_BODY |
375 |
elif (state==BUILD_CLASS_BODY): |
376 |
if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and |
377 |
type!=tokenize.NL and type!=tokenize.COMMENT and |
378 |
(spos[1]<=class_spos[1])): |
379 |
output("}; // end of class\n",(out_row+1,class_spos[1])) |
380 |
pop_state() |
381 |
elif (tok=="def"): |
382 |
start_recording() |
383 |
def_spos = spos |
384 |
push_state(BUILD_DEF_DECL) |
385 |
# BUILD_DEF_DECL |
386 |
elif (state==BUILD_DEF_DECL): |
387 |
if (is_recording_finished()): |
388 |
s = '' |
389 |
# Do we document a class method? then remove the 'self' parameter |
390 |
if BUILD_CLASS_BODY in stateStack: |
391 |
params = param.split(",") |
392 |
if params[0] == 'self': |
393 |
param = string.join(params[1:], ",") |
394 |
else: |
395 |
s = 'static ' |
396 |
if params[0] == 'cls': |
397 |
param = string.join(params[1:], ",") |
398 |
s = s+name+"("+param+");\n" |
399 |
if len(name) > 1 \ |
400 |
and name[0:2] == '__' \ |
401 |
and name[len(name)-2:len(name)] != '__' \ |
402 |
and protection_level != 'private': |
403 |
private_member = True |
404 |
output(" private:\n",(def_spos[0]+2,def_spos[1])) |
405 |
else: |
406 |
s = name+"("+param+");\n" |
407 |
if (doc_string!=""): comment_block.append(doc_string) |
408 |
print_comment(def_spos) |
409 |
output(s,def_spos) |
410 |
# output("{\n",(def_spos[0]+1,def_spos[1])) |
411 |
set_state(BUILD_DEF_BODY) |
412 |
# BUILD_DEF_BODY |
413 |
elif (state==BUILD_DEF_BODY): |
414 |
if (type!=token.INDENT and type!=token.NEWLINE \ |
415 |
and type!=40 and type!=tokenize.NL \ |
416 |
and (spos[1]<=def_spos[1])): |
417 |
# output("} // end of method/function\n",(out_row+1,def_spos[1])) |
418 |
if private_member and protection_level != 'private': |
419 |
private_member = False |
420 |
output(" " + protection_level + ":\n",(def_spos[0]+2,def_spos[1])) |
421 |
pop_state() |
422 |
# else: |
423 |
# output(tok,spos) |
424 |
|
425 |
|
426 |
def dump(filename): |
427 |
f = open(filename) |
428 |
r = f.readlines() |
429 |
for s in r: |
430 |
sys.stdout.write(s) |
431 |
|
432 |
def filter(filename): |
433 |
global name, module_has_docstring |
434 |
|
435 |
path,name = os.path.split(filename) |
436 |
root,ext = os.path.splitext(name) |
437 |
|
438 |
output("namespace "+root+" {\n",(0,0)) |
439 |
|
440 |
# set module name for tok_eater to use if there's a module doc string |
441 |
name = root |
442 |
|
443 |
sys.stderr.write('Filtering "'+filename+'"...') |
444 |
f = open(filename) |
445 |
tokenize.tokenize(f.readline, tok_eater) |
446 |
f.close() |
447 |
print_comment((0,0)) |
448 |
|
449 |
output("\n",(0,0)) |
450 |
output("} // end of namespace\n",(0,0)) |
451 |
|
452 |
if not module_has_docstring: |
453 |
# Put in default namespace documentation |
454 |
output('/** \\namespace '+root+' \n',(0,0)) |
455 |
output(' \\brief Module "%s" */\n'%(root),(0,0)) |
456 |
|
457 |
for s in outbuffer: |
458 |
outfile.write(s) |
459 |
|
460 |
|
461 |
def filterFile(filename, out=sys.stdout): |
462 |
global outfile |
463 |
|
464 |
outfile = out |
465 |
|
466 |
try: |
467 |
root,ext = os.path.splitext(filename) |
468 |
|
469 |
if ext==".py": |
470 |
filter(filename) |
471 |
else: |
472 |
dump(filename) |
473 |
|
474 |
sys.stderr.write("OK\n") |
475 |
except IOError,e: |
476 |
sys.stderr.write(e[1]+"\n") |
477 |
|
478 |
|
479 |
###################################################################### |
480 |
|
481 |
# preparePath |
482 |
def preparePath(path): |
483 |
"""Prepare a path. |
484 |
|
485 |
Checks if the path exists and creates it if it does not exist. |
486 |
""" |
487 |
if not os.path.exists(path): |
488 |
parent = os.path.dirname(path) |
489 |
if parent!="": |
490 |
preparePath(parent) |
491 |
os.mkdir(path) |
492 |
|
493 |
# isNewer |
494 |
def isNewer(file1,file2): |
495 |
"""Check if file1 is newer than file2. |
496 |
|
497 |
file1 must be an existing file. |
498 |
""" |
499 |
if not os.path.exists(file2): |
500 |
return True |
501 |
return os.stat(file1)[ST_MTIME]>os.stat(file2)[ST_MTIME] |
502 |
|
503 |
# convert |
504 |
def convert(srcpath, destpath): |
505 |
"""Convert a Python source tree into a C+ stub tree. |
506 |
|
507 |
All *.py files in srcpath (including sub-directories) are filtered |
508 |
and written to destpath. If destpath exists, only the files |
509 |
that have been modified are filtered again. Files that were deleted |
510 |
from srcpath are also deleted in destpath if they are still present. |
511 |
The function returns the number of processed *.py files. |
512 |
""" |
513 |
count=0 |
514 |
sp = os.path.join(srcpath,"*") |
515 |
sfiles = glob.glob(sp) |
516 |
dp = os.path.join(destpath,"*") |
517 |
dfiles = glob.glob(dp) |
518 |
leftovers={} |
519 |
for df in dfiles: |
520 |
leftovers[os.path.basename(df)]=1 |
521 |
|
522 |
for srcfile in sfiles: |
523 |
basename = os.path.basename(srcfile) |
524 |
if basename in leftovers: |
525 |
del leftovers[basename] |
526 |
|
527 |
# Is it a subdirectory? |
528 |
if os.path.isdir(srcfile): |
529 |
sdir = os.path.join(srcpath,basename) |
530 |
ddir = os.path.join(destpath,basename) |
531 |
count+=convert(sdir, ddir) |
532 |
continue |
533 |
# Check the extension (only *.py will be converted) |
534 |
root, ext = os.path.splitext(srcfile) |
535 |
if ext.lower()!=".py": |
536 |
continue |
537 |
|
538 |
destfile = os.path.join(destpath,basename) |
539 |
if destfile==srcfile: |
540 |
print "WARNING: Input and output names are identical!" |
541 |
sys.exit(1) |
542 |
|
543 |
count+=1 |
544 |
# sys.stdout.write("%s\015"%(srcfile)) |
545 |
|
546 |
if isNewer(srcfile, destfile): |
547 |
preparePath(os.path.dirname(destfile)) |
548 |
# out=open(destfile,"w") |
549 |
# filterFile(srcfile, out) |
550 |
# out.close() |
551 |
os.system("python %s -f %s>%s"%(sys.argv[0],srcfile,destfile)) |
552 |
|
553 |
# Delete obsolete files in destpath |
554 |
for df in leftovers: |
555 |
dname=os.path.join(destpath,df) |
556 |
if os.path.isdir(dname): |
557 |
try: |
558 |
shutil.rmtree(dname) |
559 |
except: |
560 |
print "Can't remove obsolete directory '%s'"%dname |
561 |
else: |
562 |
try: |
563 |
os.remove(dname) |
564 |
except: |
565 |
print "Can't remove obsolete file '%s'"%dname |
566 |
|
567 |
return count |
568 |
|
569 |
|
570 |
###################################################################### |
571 |
###################################################################### |
572 |
###################################################################### |
573 |
|
574 |
filter_file = False |
575 |
|
576 |
try: |
577 |
opts, args = getopt.getopt(sys.argv[1:], "hf", ["help"]) |
578 |
except getopt.GetoptError,e: |
579 |
print e |
580 |
sys.exit(1) |
581 |
|
582 |
for o,a in opts: |
583 |
if o=="-f": |
584 |
filter_file = True |
585 |
|
586 |
if filter_file: |
587 |
# Filter the specified file and print the result to stdout |
588 |
filename = string.join(args) |
589 |
filterFile(filename) |
590 |
else: |
591 |
|
592 |
if len(args)!=2: |
593 |
sys.stderr.write("%s options input output\n"%(os.path.basename(sys.argv[0]))) |
594 |
sys.exit(1) |
595 |
|
596 |
# Filter an entire Python source tree |
597 |
print '"%s" -> "%s"\n'%(args[0],args[1]) |
598 |
c=convert(args[0],args[1]) |
599 |
print "%d files"%(c) |
600 |
|