2 """Doxygen XML to SWIG docstring converter.
4 Converts Doxygen generated XML files into a file containing docstrings
5 that can be used by SWIG-1.3.x. Note that you need to get SWIG
6 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
11 doxy2swig.py input.xml output.i
13 input.xml is your doxygen generated XML file and output.i is where the
14 output will be written (the file will be clobbered).
18 # This code is implemented using Mark Pilgrim's code as a guideline:
19 # http://www.faqs.org/docs/diveintopython/kgp_divein.html
21 # Author: Prabhu Ramachandran
25 from xml.dom import minidom
33 def my_open_read(source):
34 if hasattr(source, "read"):
39 def my_open_write(dest):
40 if hasattr(dest, "write"):
43 return open(dest, 'w')
47 """Converts Doxygen generated XML files into a file containing
48 docstrings that can be used by SWIG-1.3.x that have support for
49 feature("docstring"). Once the data is parsed it is stored in
54 def __init__(self, src):
55 """Initialize the instance given a source object (file or
60 self.my_dir = os.path.dirname(f.name)
61 self.xmldoc = minidom.parse(f).documentElement
65 self.pieces.append('\n// File: %s\n'%\
66 os.path.basename(f.name))
68 self.space_re = re.compile(r'\s+')
69 self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
71 self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
72 'innerclass', 'name', 'declname', 'incdepgraph',
73 'invincdepgraph', 'programlisting', 'type',
74 'references', 'referencedby', 'location',
75 'collaborationgraph', 'reimplements',
76 'reimplementedby', 'derivedcompoundref',
81 """Parses the file set in the initialization. The resulting
82 data is stored in `self.pieces`.
85 self.parse(self.xmldoc)
87 def parse(self, node):
88 """Parse a given node. This function in turn calls the
89 `parse_<nodeType>` functions which handle the respective
93 pm = getattr(self, "parse_%s"%node.__class__.__name__)
96 def parse_Document(self, node):
97 self.parse(node.documentElement)
99 def parse_Text(self, node):
101 txt = txt.replace('\\', r'\\\\')
102 txt = txt.replace('"', r'\"')
103 # ignore pure whitespace
104 m = self.space_re.match(txt)
105 if m and len(m.group()) == len(txt):
108 self.add_text(textwrap.fill(txt))
110 def parse_Element(self, node):
111 """Parse an `ELEMENT_NODE`. This calls specific
112 `do_<tagName>` handers for different elements. If no handler
113 is available the `generic_parse` method is called. All
114 tagNames specified in `self.ignores` are simply ignored.
118 ignores = self.ignores
121 attr = "do_%s" % name
122 if hasattr(self, attr):
123 handlerMethod = getattr(self, attr)
126 self.generic_parse(node)
127 #if name not in self.generics: self.generics.append(name)
129 def add_text(self, value):
130 """Adds text corresponding to `value` into `self.pieces`."""
131 if type(value) in (types.ListType, types.TupleType):
132 self.pieces.extend(value)
134 self.pieces.append(value)
136 def get_specific_nodes(self, node, names):
137 """Given a node and a sequence of strings in `names`, return a
138 dictionary containing the names as keys and child
139 `ELEMENT_NODEs`, that have a `tagName` equal to the name.
142 nodes = [(x.tagName, x) for x in node.childNodes \
143 if x.nodeType == x.ELEMENT_NODE and \
147 def generic_parse(self, node, pad=0):
148 """A Generic parser for arbitrary tags in a node.
152 - node: A node in the DOM.
153 - pad: `int` (default: 0)
155 If 0 the node data is not padded with newlines. If 1 it
156 appends a newline after parsing the childNodes. If 2 it
157 pads before and after the nodes are processed. Defaults to
163 npiece = len(self.pieces)
166 for n in node.childNodes:
169 if len(self.pieces) > npiece:
172 def space_parse(self, node):
174 self.generic_parse(node)
177 do_emphasis = space_parse
178 do_bold = space_parse
179 do_computeroutput = space_parse
180 do_formula = space_parse
182 def do_compoundname(self, node):
183 self.add_text('\n\n')
184 data = node.firstChild.data
185 self.add_text('%%feature("docstring") %s "\n'%data)
187 def do_compounddef(self, node):
188 kind = node.attributes['kind'].value
189 if kind in ('class', 'struct'):
190 prot = node.attributes['prot'].value
193 names = ('compoundname', 'briefdescription',
194 'detaileddescription', 'includes')
195 first = self.get_specific_nodes(node, names)
199 self.add_text(['";','\n'])
200 for n in node.childNodes:
201 if n not in first.values():
203 elif kind in ('file', 'namespace'):
204 nodes = node.getElementsByTagName('sectiondef')
208 def do_includes(self, node):
209 self.add_text('C++ includes: ')
210 self.generic_parse(node, pad=1)
212 def do_parameterlist(self, node):
213 self.add_text(['\n', '\n', 'Parameters:', '\n'])
214 self.generic_parse(node, pad=1)
216 def do_para(self, node):
218 self.generic_parse(node, pad=1)
220 def do_parametername(self, node):
222 self.add_text("%s: "%node.firstChild.data)
224 def do_parameterdefinition(self, node):
225 self.generic_parse(node, pad=1)
227 def do_detaileddescription(self, node):
228 self.generic_parse(node, pad=1)
230 def do_briefdescription(self, node):
231 self.generic_parse(node, pad=1)
233 def do_memberdef(self, node):
234 prot = node.attributes['prot'].value
235 id = node.attributes['id'].value
236 kind = node.attributes['kind'].value
237 tmp = node.parentNode.parentNode.parentNode
238 compdef = tmp.getElementsByTagName('compounddef')[0]
239 cdef_kind = compdef.attributes['kind'].value
242 first = self.get_specific_nodes(node, ('definition', 'name'))
243 name = first['name'].firstChild.data
244 if name[:8] == 'operator': # Don't handle operators yet.
247 defn = first['definition'].firstChild.data
249 self.add_text('%feature("docstring") ')
251 anc = node.parentNode.parentNode
252 if cdef_kind in ('file', 'namespace'):
253 ns_node = anc.getElementsByTagName('innernamespace')
254 if not ns_node and cdef_kind == 'namespace':
255 ns_node = anc.getElementsByTagName('compoundname')
257 ns = ns_node[0].firstChild.data
258 self.add_text(' %s::%s "\n%s'%(ns, name, defn))
260 self.add_text(' %s "\n%s'%(name, defn))
261 elif cdef_kind in ('class', 'struct'):
262 # Get the full function name.
263 anc_node = anc.getElementsByTagName('compoundname')
264 cname = anc_node[0].firstChild.data
265 self.add_text(' %s::%s "\n%s'%(cname, name, defn))
267 for n in node.childNodes:
268 if n not in first.values():
270 self.add_text(['";', '\n'])
272 def do_definition(self, node):
273 data = node.firstChild.data
274 self.add_text('%s "\n%s'%(data, data))
276 def do_sectiondef(self, node):
277 kind = node.attributes['kind'].value
278 if kind in ('public-func', 'func'):
279 self.generic_parse(node)
281 def do_simplesect(self, node):
282 kind = node.attributes['kind'].value
283 if kind in ('date', 'rcs', 'version'):
285 elif kind == 'warning':
286 self.add_text(['\n', 'WARNING: '])
287 self.generic_parse(node)
290 self.add_text('See: ')
291 self.generic_parse(node)
293 self.generic_parse(node)
295 def do_argsstring(self, node):
296 self.generic_parse(node, pad=1)
298 def do_member(self, node):
299 kind = node.attributes['kind'].value
300 refid = node.attributes['refid'].value
301 if kind == 'function' and refid[:9] == 'namespace':
302 self.generic_parse(node)
304 def do_doxygenindex(self, node):
306 comps = node.getElementsByTagName('compound')
308 refid = c.attributes['refid'].value
309 fname = refid + '.xml'
310 if not os.path.exists(fname):
311 fname = os.path.join(self.my_dir, fname)
312 print "parsing file: %s"%fname
315 self.pieces.extend(self.clean_pieces(p.pieces))
317 def write(self, fname):
318 o = my_open_write(fname)
320 o.write("".join(self.pieces))
322 o.write("".join(self.clean_pieces(self.pieces)))
325 def clean_pieces(self, pieces):
326 """Cleans the list of strings given as `pieces`. It replaces
327 multiple newlines by a maximum of 2 and returns a new list.
328 It also wraps the paragraphs nicely.
343 ret.append('\n'*count)
349 for i in _data.split('\n\n'):
350 if i == 'Parameters:':
351 ret.extend(['Parameters:\n-----------', '\n\n'])
352 elif i.find('// File:') > -1: # leave comments alone.
353 ret.extend([i, '\n'])
355 _tmp = textwrap.fill(i.strip())
356 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
357 ret.extend([_tmp, '\n\n'])
361 def main(input, output):
367 if __name__ == '__main__':
368 if len(sys.argv) != 3:
371 main(sys.argv[1], sys.argv[2])