1 |
#!/usr/bin/env python |
2 |
|
3 |
# pythfilter.py v1.5.5, written by Matthias Baas (baas@ira.uka.de) |
4 |
|
5 |
# $Id: pythfilter,v 1.1 2004/11/23 12:27:16 paultcochrane Exp $ |
6 |
|
7 |
# Doxygen filter which can be used to document Python source code. |
8 |
# Classes (incl. methods) and functions can be documented. |
9 |
# Every comment that begins with ## is literally turned into an |
10 |
# Doxygen comment. Consecutive comment lines are turned into |
11 |
# comment blocks (-> /** ... */). |
12 |
# All the stuff is put inside a namespace with the same name as |
13 |
# the source file. |
14 |
|
15 |
# Conversions: |
16 |
# ============ |
17 |
# ##-blocks -> /** ... */ |
18 |
# "class name(base): ..." -> "class name : public base {...}" |
19 |
# "def name(params): ..." -> "name(params) {...}" |
20 |
|
21 |
# Changelog: |
22 |
# 21.01.2003: Raw (r"") or unicode (u"") doc string will now be properly |
23 |
# handled. (thanks to Richard Laager for the patch) |
24 |
# 22.12.2003: Fixed a bug where no function names would be output for "def" |
25 |
# blocks that were not in a class. |
26 |
# (thanks to Richard Laager for the patch) |
27 |
# 12.12.2003: Implemented code to handle static and class methods with |
28 |
# this logic: Methods with "self" as the first argument are |
29 |
# non-static. Methods with "cls" are Python class methods, |
30 |
# which translate into static methods for Doxygen. Other |
31 |
# methods are assumed to be static methods. As should be |
32 |
# obvious, this logic doesn't take into account if the method |
33 |
# is actually setup as a classmethod() or a staticmethod(), |
34 |
# just if it follows the normal conventions. |
35 |
# (thanks to Richard Laager for the patch) |
36 |
# 11.12.2003: Corrected #includes to use os.path.sep instead of ".". Corrected |
37 |
# namespace code to use "::" instead of ".". |
38 |
# (thanks to Richard Laager for the patch) |
39 |
# 11.12.2003: Methods beginning with two underscores that end with |
40 |
# something other than two underscores are considered private |
41 |
# and are handled accordingly. |
42 |
# (thanks to Richard Laager for the patch) |
43 |
# 03.12.2003: The first parameter of class methods (self) is removed from |
44 |
# the documentation. |
45 |
# 03.11.2003: The module docstring will be used as namespace documentation |
46 |
# (thanks to Joe Bronkema for the patch) |
47 |
# 08.07.2003: Namespaces get a default documentation so that the namespace |
48 |
# and its contents will show up in the generated documentation. |
49 |
# 05.02.2003: Directories will be delted during synchronization. |
50 |
# 31.01.2003: -f option & filtering entire directory trees. |
51 |
# 10.08.2002: In base classes the '.' will be replaced by '::' |
52 |
# 18.07.2002: * and ** will be translated into arguments |
53 |
# 18.07.2002: Argument lists may contain default values using constructors. |
54 |
# 18.06.2002: Support for ## public: |
55 |
# 21.01.2002: from ... import will be translated to "using namespace ...;" |
56 |
# TODO: "from ... import *" vs "from ... import names" |
57 |
# TODO: Using normal imports: name.name -> name::name |
58 |
# 20.01.2002: #includes will be placed in front of the namespace |
59 |
|
60 |
###################################################################### |
61 |
|
62 |
# The program is written as a state machine with the following states: |
63 |
# |
64 |
# - OUTSIDE The current position is outside any comment, |
65 |
# class definition or function. |
66 |
# |
67 |
# - BUILD_COMMENT Begins with first "##". |
68 |
# Ends with the first token that is no "##" |
69 |
# at the same column as before. |
70 |
# |
71 |
# - BUILD_CLASS_DECL Begins with "class". |
72 |
# Ends with ":" |
73 |
# - BUILD_CLASS_BODY Begins just after BUILD_CLASS_DECL. |
74 |
# The first following token (which is no comment) |
75 |
# determines indentation depth. |
76 |
# Ends with a token that has a smaller indendation. |
77 |
# |
78 |
# - BUILD_DEF_DECL Begins with "def". |
79 |
# Ends with ":". |
80 |
# - BUILD_DEF_BODY Begins just after BUILD_DEF_DECL. |
81 |
# The first following token (which is no comment) |
82 |
# determines indentation depth. |
83 |
# Ends with a token that has a smaller indendation. |
84 |
|
85 |
import getopt |
86 |
import glob |
87 |
import os.path |
88 |
import shutil |
89 |
import string |
90 |
import sys |
91 |
import token |
92 |
import tokenize |
93 |
|
94 |
from stat import * |
95 |
|
96 |
OUTSIDE = 0 |
97 |
BUILD_COMMENT = 1 |
98 |
BUILD_CLASS_DECL = 2 |
99 |
BUILD_CLASS_BODY = 3 |
100 |
BUILD_DEF_DECL = 4 |
101 |
BUILD_DEF_BODY = 5 |
102 |
IMPORT = 6 |
103 |
IMPORT_OP = 7 |
104 |
IMPORT_APPEND = 8 |
105 |
|
106 |
# Output file stream |
107 |
outfile = sys.stdout |
108 |
|
109 |
# Output buffer |
110 |
outbuffer = [] |
111 |
|
112 |
out_row = 0 |
113 |
out_col = 0 |
114 |
|
115 |
# Variables used by rec_name_n_param() |
116 |
name = "" |
117 |
param = "" |
118 |
doc_string = "" |
119 |
record_state = 0 |
120 |
bracket_counter = 0 |
121 |
|
122 |
# Tuple: (row,column) |
123 |
class_spos = (0,0) |
124 |
def_spos = (0,0) |
125 |
import_spos = (0,0) |
126 |
|
127 |
# Which import was used? ("import" or "from") |
128 |
import_token = "" |
129 |
|
130 |
# Comment block buffer |
131 |
comment_block = [] |
132 |
comment_finished = 0 |
133 |
|
134 |
# Imported modules |
135 |
modules = [] |
136 |
|
137 |
# Program state |
138 |
stateStack = [OUTSIDE] |
139 |
|
140 |
# Keep track of whether module has a docstring |
141 |
module_has_docstring = False |
142 |
|
143 |
# Keep track of member protection |
144 |
protection_level = "public" |
145 |
private_member = False |
146 |
|
147 |
# Keep track of the module namespace |
148 |
namespace = "" |
149 |
|
150 |
###################################################################### |
151 |
# Output string s. '\n' may only be at the end of the string (not |
152 |
# somewhere in the middle). |
153 |
# |
154 |
# In: s - String |
155 |
# spos - Startpos |
156 |
###################################################################### |
157 |
def output(s,spos, immediate=0): |
158 |
global outbuffer, out_row, out_col, outfile |
159 |
|
160 |
os = string.rjust(s,spos[1]-out_col+len(s)) |
161 |
if immediate: |
162 |
outfile.write(os) |
163 |
else: |
164 |
outbuffer.append(os) |
165 |
if (s[-1:]=="\n"): |
166 |
out_row = out_row+1 |
167 |
out_col = 0 |
168 |
else: |
169 |
out_col = spos[1]+len(s) |
170 |
|
171 |
|
172 |
###################################################################### |
173 |
# Records a name and parameters. The name is either a class name or |
174 |
# a function name. Then the parameter is either the base class or |
175 |
# the function parameters. |
176 |
# The name is stored in the global variable "name", the parameters |
177 |
# in "param". |
178 |
# The variable "record_state" holds the current state of this internal |
179 |
# state machine. |
180 |
# The recording is started by calling start_recording(). |
181 |
# |
182 |
# In: type, tok |
183 |
###################################################################### |
184 |
def rec_name_n_param(type, tok): |
185 |
global record_state,name,param,doc_string,bracket_counter |
186 |
s = record_state |
187 |
# State 0: Do nothing. |
188 |
if (s==0): |
189 |
return |
190 |
# State 1: Remember name. |
191 |
elif (s==1): |
192 |
name = tok |
193 |
record_state = 2 |
194 |
# State 2: Wait for opening bracket or colon |
195 |
elif (s==2): |
196 |
if (tok=='('): |
197 |
bracket_counter = 1 |
198 |
record_state=3 |
199 |
if (tok==':'): record_state=4 |
200 |
# State 3: Store parameter (or base class) and wait for an ending bracket |
201 |
elif (s==3): |
202 |
if (tok=='*' or tok=='**'): |
203 |
tok='' |
204 |
if (tok=='('): |
205 |
bracket_counter = bracket_counter+1 |
206 |
if (tok==')'): |
207 |
bracket_counter = bracket_counter-1 |
208 |
if bracket_counter==0: |
209 |
record_state=4 |
210 |
else: |
211 |
param=param+tok |
212 |
# State 4: Look for doc string |
213 |
elif (s==4): |
214 |
if (type==token.NEWLINE or type==token.INDENT or type==token.SLASHEQUAL): |
215 |
return |
216 |
elif (tok==":"): |
217 |
return |
218 |
elif (type==token.STRING): |
219 |
while tok[:1]=='r' or tok[:1]=='u': |
220 |
tok=tok[1:] |
221 |
while tok[:1]=='"': |
222 |
tok=tok[1:] |
223 |
while tok[-1:]=='"': |
224 |
tok=tok[:-1] |
225 |
doc_string=tok |
226 |
record_state=0 |
227 |
|
228 |
###################################################################### |
229 |
# Starts the recording of a name & param part. |
230 |
# The function rec_name_n_param() has to be fed with tokens. After |
231 |
# the necessary tokens are fed the name and parameters can be found |
232 |
# in the global variables "name" und "param". |
233 |
###################################################################### |
234 |
def start_recording(): |
235 |
global record_state,param,name, doc_string |
236 |
record_state=1 |
237 |
name="" |
238 |
param="" |
239 |
doc_string="" |
240 |
|
241 |
###################################################################### |
242 |
# Test if recording is finished |
243 |
###################################################################### |
244 |
def is_recording_finished(): |
245 |
global record_state |
246 |
return record_state==0 |
247 |
|
248 |
###################################################################### |
249 |
## Gather comment block |
250 |
###################################################################### |
251 |
def gather_comment(type,tok,spos): |
252 |
global comment_block,comment_finished |
253 |
if (type!=tokenize.COMMENT): |
254 |
comment_finished = 1 |
255 |
else: |
256 |
# Output old comment block if a new one is started. |
257 |
if (comment_finished): |
258 |
print_comment(spos) |
259 |
comment_finished=0 |
260 |
if (tok[0:2]=="##" and tok[0:3]!="###"): |
261 |
comment_block.append(tok[2:]) |
262 |
|
263 |
###################################################################### |
264 |
## Output comment block and empty buffer. |
265 |
###################################################################### |
266 |
def print_comment(spos): |
267 |
global comment_block,comment_finished |
268 |
if (comment_block!=[]): |
269 |
output("/**\n",spos) |
270 |
for c in comment_block: |
271 |
output(c,spos) |
272 |
output("*/\n",spos) |
273 |
comment_block = [] |
274 |
comment_finished = 0 |
275 |
|
276 |
###################################################################### |
277 |
def set_state(s): |
278 |
global stateStack |
279 |
stateStack[len(stateStack)-1]=s |
280 |
|
281 |
###################################################################### |
282 |
def get_state(): |
283 |
global stateStack |
284 |
return stateStack[len(stateStack)-1] |
285 |
|
286 |
###################################################################### |
287 |
def push_state(s): |
288 |
global stateStack |
289 |
stateStack.append(s) |
290 |
|
291 |
###################################################################### |
292 |
def pop_state(): |
293 |
global stateStack |
294 |
stateStack.pop() |
295 |
|
296 |
|
297 |
###################################################################### |
298 |
def tok_eater(type, tok, spos, epos, line): |
299 |
global stateStack,name,param,class_spos,def_spos,import_spos |
300 |
global doc_string, modules, import_token, module_has_docstring |
301 |
global protection_level, private_member |
302 |
|
303 |
rec_name_n_param(type,tok) |
304 |
if (string.replace(string.strip(tok)," ","")=="##private:"): |
305 |
protection_level = "private" |
306 |
output("private:\n",spos) |
307 |
elif (string.replace(string.strip(tok)," ","")=="##protected:"): |
308 |
protection_level = "protected" |
309 |
output("protected:\n",spos) |
310 |
elif (string.replace(string.strip(tok)," ","")=="##public:"): |
311 |
protection_level = "public" |
312 |
output("public:\n",spos) |
313 |
else: |
314 |
gather_comment(type,tok,spos) |
315 |
|
316 |
state = get_state() |
317 |
|
318 |
# sys.stderr.write("%d: %s\n"%(state, tok)) |
319 |
|
320 |
# OUTSIDE |
321 |
if (state==OUTSIDE): |
322 |
if (tok=="class"): |
323 |
start_recording() |
324 |
class_spos = spos |
325 |
push_state(BUILD_CLASS_DECL) |
326 |
elif (tok=="def"): |
327 |
start_recording() |
328 |
def_spos = spos |
329 |
push_state(BUILD_DEF_DECL) |
330 |
elif (tok=="import") or (tok=="from"): |
331 |
import_token = tok |
332 |
import_spos = spos |
333 |
modules = [] |
334 |
push_state(IMPORT) |
335 |
elif (spos[1] == 0 and tok[:3] == '"""'): |
336 |
# Capture module docstring as namespace documentation |
337 |
module_has_docstring = True |
338 |
comment_block.append("\\namespace %s\n" % namespace) |
339 |
comment_block.append(tok[3:-3]) |
340 |
print_comment(spos) |
341 |
|
342 |
# IMPORT |
343 |
elif (state==IMPORT): |
344 |
if (type==token.NAME): |
345 |
modules.append(tok) |
346 |
set_state(IMPORT_OP) |
347 |
# IMPORT_OP |
348 |
elif (state==IMPORT_OP): |
349 |
if (tok=="."): |
350 |
set_state(IMPORT_APPEND) |
351 |
elif (tok==","): |
352 |
set_state(IMPORT) |
353 |
else: |
354 |
for m in modules: |
355 |
output('#include "'+m.replace('.',os.path.sep)+'.py"\n', import_spos, immediate=1) |
356 |
if import_token=="from": |
357 |
output('using namespace '+m.replace('.', '::')+';\n', import_spos) |
358 |
pop_state() |
359 |
# IMPORT_APPEND |
360 |
elif (state==IMPORT_APPEND): |
361 |
if (type==token.NAME): |
362 |
modules[len(modules)-1]+="."+tok |
363 |
set_state(IMPORT_OP) |
364 |
# BUILD_CLASS_DECL |
365 |
elif (state==BUILD_CLASS_DECL): |
366 |
if (is_recording_finished()): |
367 |
s = "class "+name |
368 |
if (param!=""): s = s+" : public "+param.replace('.','::') |
369 |
if (doc_string!=""): comment_block.append(doc_string) |
370 |
print_comment(class_spos) |
371 |
output(s+"\n",class_spos) |
372 |
output("{\n",(class_spos[0]+1,class_spos[1])) |
373 |
protection_level = "public" |
374 |
output(" public:\n",(class_spos[0]+2,class_spos[1])) |
375 |
set_state(BUILD_CLASS_BODY) |
376 |
# BUILD_CLASS_BODY |
377 |
elif (state==BUILD_CLASS_BODY): |
378 |
if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and |
379 |
type!=tokenize.NL and type!=tokenize.COMMENT and |
380 |
(spos[1]<=class_spos[1])): |
381 |
output("}; // end of class\n",(out_row+1,class_spos[1])) |
382 |
pop_state() |
383 |
elif (tok=="def"): |
384 |
start_recording() |
385 |
def_spos = spos |
386 |
push_state(BUILD_DEF_DECL) |
387 |
# BUILD_DEF_DECL |
388 |
elif (state==BUILD_DEF_DECL): |
389 |
if (is_recording_finished()): |
390 |
s = '' |
391 |
# Do we document a class method? then remove the 'self' parameter |
392 |
if BUILD_CLASS_BODY in stateStack: |
393 |
params = param.split(",") |
394 |
if params[0] == 'self': |
395 |
param = string.join(params[1:], ",") |
396 |
else: |
397 |
s = 'static ' |
398 |
if params[0] == 'cls': |
399 |
param = string.join(params[1:], ",") |
400 |
s = s+name+"("+param+");\n" |
401 |
if len(name) > 1 \ |
402 |
and name[0:2] == '__' \ |
403 |
and name[len(name)-2:len(name)] != '__' \ |
404 |
and protection_level != 'private': |
405 |
private_member = True |
406 |
output(" private:\n",(def_spos[0]+2,def_spos[1])) |
407 |
else: |
408 |
s = name+"("+param+");\n" |
409 |
if (doc_string!=""): comment_block.append(doc_string) |
410 |
print_comment(def_spos) |
411 |
output(s,def_spos) |
412 |
# output("{\n",(def_spos[0]+1,def_spos[1])) |
413 |
set_state(BUILD_DEF_BODY) |
414 |
# BUILD_DEF_BODY |
415 |
elif (state==BUILD_DEF_BODY): |
416 |
if (type!=token.INDENT and type!=token.NEWLINE \ |
417 |
and type!=40 and type!=tokenize.NL \ |
418 |
and (spos[1]<=def_spos[1])): |
419 |
# output("} // end of method/function\n",(out_row+1,def_spos[1])) |
420 |
if private_member and protection_level != 'private': |
421 |
private_member = False |
422 |
output(" " + protection_level + ":\n",(def_spos[0]+2,def_spos[1])) |
423 |
pop_state() |
424 |
# else: |
425 |
# output(tok,spos) |
426 |
|
427 |
|
428 |
def dump(filename): |
429 |
f = open(filename) |
430 |
r = f.readlines() |
431 |
for s in r: |
432 |
sys.stdout.write(s) |
433 |
|
434 |
def filter(filename): |
435 |
global name, module_has_docstring |
436 |
|
437 |
path,name = os.path.split(filename) |
438 |
root,ext = os.path.splitext(name) |
439 |
|
440 |
output("namespace "+root+" {\n",(0,0)) |
441 |
|
442 |
# set module name for tok_eater to use if there's a module doc string |
443 |
name = root |
444 |
|
445 |
sys.stderr.write('Filtering "'+filename+'"...') |
446 |
f = open(filename) |
447 |
tokenize.tokenize(f.readline, tok_eater) |
448 |
f.close() |
449 |
print_comment((0,0)) |
450 |
|
451 |
output("\n",(0,0)) |
452 |
output("} // end of namespace\n",(0,0)) |
453 |
|
454 |
if not module_has_docstring: |
455 |
# Put in default namespace documentation |
456 |
output('/** \\namespace '+root+' \n',(0,0)) |
457 |
output(' \\brief Module "%s" */\n'%(root),(0,0)) |
458 |
|
459 |
for s in outbuffer: |
460 |
outfile.write(s) |
461 |
|
462 |
|
463 |
def filterFile(filename, out=sys.stdout): |
464 |
global outfile |
465 |
|
466 |
outfile = out |
467 |
|
468 |
try: |
469 |
root,ext = os.path.splitext(filename) |
470 |
|
471 |
if ext==".py": |
472 |
filter(filename) |
473 |
else: |
474 |
dump(filename) |
475 |
|
476 |
sys.stderr.write("OK\n") |
477 |
except IOError,e: |
478 |
sys.stderr.write(e[1]+"\n") |
479 |
|
480 |
|
481 |
###################################################################### |
482 |
|
483 |
# preparePath |
484 |
def preparePath(path): |
485 |
"""Prepare a path. |
486 |
|
487 |
Checks if the path exists and creates it if it does not exist. |
488 |
""" |
489 |
if not os.path.exists(path): |
490 |
parent = os.path.dirname(path) |
491 |
if parent!="": |
492 |
preparePath(parent) |
493 |
os.mkdir(path) |
494 |
|
495 |
# isNewer |
496 |
def isNewer(file1,file2): |
497 |
"""Check if file1 is newer than file2. |
498 |
|
499 |
file1 must be an existing file. |
500 |
""" |
501 |
if not os.path.exists(file2): |
502 |
return True |
503 |
return os.stat(file1)[ST_MTIME]>os.stat(file2)[ST_MTIME] |
504 |
|
505 |
# convert |
506 |
def convert(srcpath, destpath): |
507 |
"""Convert a Python source tree into a C+ stub tree. |
508 |
|
509 |
All *.py files in srcpath (including sub-directories) are filtered |
510 |
and written to destpath. If destpath exists, only the files |
511 |
that have been modified are filtered again. Files that were deleted |
512 |
from srcpath are also deleted in destpath if they are still present. |
513 |
The function returns the number of processed *.py files. |
514 |
""" |
515 |
count=0 |
516 |
sp = os.path.join(srcpath,"*") |
517 |
sfiles = glob.glob(sp) |
518 |
dp = os.path.join(destpath,"*") |
519 |
dfiles = glob.glob(dp) |
520 |
leftovers={} |
521 |
for df in dfiles: |
522 |
leftovers[os.path.basename(df)]=1 |
523 |
|
524 |
for srcfile in sfiles: |
525 |
basename = os.path.basename(srcfile) |
526 |
if basename in leftovers: |
527 |
del leftovers[basename] |
528 |
|
529 |
# Is it a subdirectory? |
530 |
if os.path.isdir(srcfile): |
531 |
sdir = os.path.join(srcpath,basename) |
532 |
ddir = os.path.join(destpath,basename) |
533 |
count+=convert(sdir, ddir) |
534 |
continue |
535 |
# Check the extension (only *.py will be converted) |
536 |
root, ext = os.path.splitext(srcfile) |
537 |
if ext.lower()!=".py": |
538 |
continue |
539 |
|
540 |
destfile = os.path.join(destpath,basename) |
541 |
if destfile==srcfile: |
542 |
print "WARNING: Input and output names are identical!" |
543 |
sys.exit(1) |
544 |
|
545 |
count+=1 |
546 |
# sys.stdout.write("%s\015"%(srcfile)) |
547 |
|
548 |
if isNewer(srcfile, destfile): |
549 |
preparePath(os.path.dirname(destfile)) |
550 |
# out=open(destfile,"w") |
551 |
# filterFile(srcfile, out) |
552 |
# out.close() |
553 |
os.system("python %s -f %s>%s"%(sys.argv[0],srcfile,destfile)) |
554 |
|
555 |
# Delete obsolete files in destpath |
556 |
for df in leftovers: |
557 |
dname=os.path.join(destpath,df) |
558 |
if os.path.isdir(dname): |
559 |
try: |
560 |
shutil.rmtree(dname) |
561 |
except: |
562 |
print "Can't remove obsolete directory '%s'"%dname |
563 |
else: |
564 |
try: |
565 |
os.remove(dname) |
566 |
except: |
567 |
print "Can't remove obsolete file '%s'"%dname |
568 |
|
569 |
return count |
570 |
|
571 |
|
572 |
###################################################################### |
573 |
###################################################################### |
574 |
###################################################################### |
575 |
|
576 |
filter_file = False |
577 |
|
578 |
try: |
579 |
opts, args = getopt.getopt(sys.argv[1:], "hf", ["help"]) |
580 |
except getopt.GetoptError,e: |
581 |
print e |
582 |
sys.exit(1) |
583 |
|
584 |
for o,a in opts: |
585 |
if o=="-f": |
586 |
filter_file = True |
587 |
|
588 |
if filter_file: |
589 |
# Filter the specified file and print the result to stdout |
590 |
filename = string.join(args) |
591 |
filterFile(filename) |
592 |
else: |
593 |
|
594 |
if len(args)!=2: |
595 |
sys.stderr.write("%s options input output\n"%(os.path.basename(sys.argv[0]))) |
596 |
sys.exit(1) |
597 |
|
598 |
# Filter an entire Python source tree |
599 |
print '"%s" -> "%s"\n'%(args[0],args[1]) |
600 |
c=convert(args[0],args[1]) |
601 |
print "%d files"%(c) |
602 |
|