This commit is contained in:
2026-01-06 13:25:49 +00:00
parent 5d495d731b
commit 4e15e08b7f
1395 changed files with 295666 additions and 323 deletions

View File

@@ -0,0 +1,514 @@
#!/usr/bin/env python
"""
pybind11 documentation insertion tool.
Extracts documentation from Doxygen XML and inserts it into pybind11 bindings
using string matching via signature lookup table.
Logic:
- Builds a multi-key lookup for all functions (MaterialX::, mx::, Class::method, method)
- Handles free functions without <qualifiedname> by assuming MaterialX namespace
- Adds class context tracking to correctly document lambda-based bindings
- Supports .def(...) and .def_static(...); skips .def_readonly_static(...)
"""
import argparse
import re
import json
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, Optional
# Defaults (can be overridden by CLI)
DOXYGEN_XML_DIR = Path("build/documents/doxygen_xml")
PYBIND_DIR = Path("source/PyMaterialX")
class DocExtractor:
"""Extracts documentation from Doxygen XML files and builds a lookup table."""
def __init__(self, xml_dir: Path):
self.xml_dir = xml_dir
self.class_docs: Dict[str, str] = {}
self.func_docs: Dict[str, Dict] = {}
# Multi-key lookup: all name variants point to the same doc
self.func_lookup: Dict[str, Dict] = {}
def extract(self):
if not self.xml_dir.exists():
raise FileNotFoundError(f"Doxygen XML directory not found: {self.xml_dir}")
for xml_file in self.xml_dir.glob("*.xml"):
self._process_xml_file(xml_file)
self._build_lookup_table()
print(f"Extracted {len(self.class_docs)} classes and {len(self.func_docs)} functions")
print(f"Built lookup table with {len(self.func_lookup)} keys")
def _process_xml_file(self, xml_file: Path):
tree = ET.parse(xml_file)
root = tree.getroot()
# Class / struct documentation
for compound in root.findall(".//compounddef[@kind='class']") + root.findall(".//compounddef[@kind='struct']"):
self._extract_class_doc(compound)
# Function documentation
for member in root.findall(".//memberdef[@kind='function']"):
self._extract_func_doc(member)
def _extract_class_doc(self, compound):
name = self._get_text(compound.find("compoundname"))
brief = self._get_text(compound.find("briefdescription/para"))
detail = self._extract_detail(compound.find("detaileddescription"))
doc = "\n\n".join(filter(None, [brief, detail]))
if doc:
normalized = self._normalize_name(name)
self.class_docs[normalized] = doc
def _extract_func_doc(self, member):
name = self._get_text(member.find("name"))
qualified = self._get_text(member.find("qualifiedname"))
# Many free functions have no <qualifiedname>; use the bare name
# and normalize to MaterialX::name so lookups can resolve.
if not qualified and name:
qualified = name
if not qualified:
return
brief = self._get_text(member.find("briefdescription/para"))
detail = self._extract_detail(member.find("detaileddescription"))
params = self._extract_params(member)
returns = self._get_text(member.find(".//simplesect[@kind='return']"))
normalized = self._normalize_name(qualified)
self.func_docs[normalized] = {
"brief": brief,
"detail": detail,
"params": params,
"returns": returns,
}
def _build_lookup_table(self):
for qualified_name, doc in self.func_docs.items():
for variant in self._generate_name_variants(qualified_name):
if variant not in self.func_lookup:
self.func_lookup[variant] = doc
def _generate_name_variants(self, qualified_name: str) -> list:
variants = [qualified_name]
parts = qualified_name.split("::")
# Class::method
if len(parts) >= 2:
variants.append("::".join(parts[-2:]))
# method
if len(parts) >= 1:
variants.append(parts[-1])
# mx:: variant if MaterialX::
if qualified_name.startswith("MaterialX::"):
mx_variant = qualified_name.replace("MaterialX::", "mx::", 1)
variants.append(mx_variant)
if len(parts) >= 3:
variants.append(f"mx::{parts[-2]}::{parts[-1]}")
return variants
def _normalize_name(self, name: str) -> str:
if not name:
return name
return name if name.startswith("MaterialX::") else f"MaterialX::{name}"
def _get_text(self, elem) -> str:
if elem is None:
return ""
text = "".join(elem.itertext())
return re.sub(r"\s+", " ", text).strip()
def _extract_detail(self, elem, exclude_tags={"parameterlist", "simplesect"}) -> str:
if elem is None:
return ""
parts = []
for para in elem.findall("para"):
if not any(para.find(tag) is not None for tag in exclude_tags):
t = self._get_text(para)
if t:
parts.append(t)
return "\n\n".join(parts)
def _extract_params(self, member) -> Dict[str, str]:
params = {}
for param_item in member.findall(".//parameterlist[@kind='param']/parameteritem"):
name = self._get_text(param_item.find("parameternamelist/parametername"))
desc = self._get_text(param_item.find("parameterdescription"))
if name:
params[name] = desc
return params
class DocInserter:
"""Inserts documentation into pybind11 binding files."""
def __init__(self, extractor: DocExtractor, pybind_dir: Path, force_replace: bool = False):
self.extractor = extractor
self.pybind_dir = pybind_dir
self.force_replace = force_replace
self.class_pattern = re.compile(r"py::class_<")
self.def_pattern = re.compile(r"\.def(?:_static)?\s*\(")
# Match .def and .def_static; skip .def_readonly_static (constants)
self.def_pattern = re.compile(r"\.def(?:_static)?\s*\(")
self.skip_pattern = re.compile(r"\.def_readonly_static\s*\(")
def process_all_files(self):
cpp_files = list(self.pybind_dir.rglob("*.cpp"))
patched = 0
for cpp_file in cpp_files:
if self._process_file(cpp_file):
patched += 1
print(f"\nProcessed {len(cpp_files)} files, patched {patched}")
def _process_file(self, cpp_file: Path) -> bool:
content = cpp_file.read_text(encoding="utf-8")
original = content
content = self._insert_class_docs(content)
content = self._insert_method_docs(content)
if content != original:
cpp_file.write_text(content, encoding="utf-8")
print(f" - {cpp_file.relative_to(self.pybind_dir.parent)}")
return True
else:
print(f" - {cpp_file.relative_to(self.pybind_dir.parent)}")
return False
def _insert_class_docs(self, content: str) -> str:
result = []
pos = 0
for match in self.class_pattern.finditer(content):
result.append(content[pos:match.start()])
start = match.start()
template_end = self._find_template_end(content, start)
if template_end == -1:
result.append(content[start:match.end()])
pos = match.end()
continue
paren_start = content.find('(', template_end)
if paren_start == -1:
result.append(content[start:match.end()])
pos = match.end()
continue
paren_end = self._find_matching_paren(content, paren_start)
if paren_end == -1:
result.append(content[start:match.end()])
pos = match.end()
continue
args_text = content[paren_start + 1:paren_end]
class_name = self._extract_class_name(args_text)
if class_name:
doc = self.extractor.class_docs.get(self.extractor._normalize_name(class_name))
if doc:
args = self._split_args(args_text)
if len(args) >= 3 and not self.force_replace:
result.append(content[start:paren_end + 1])
pos = paren_end + 1
continue
escaped = self._escape_for_cpp(doc)
if len(args) >= 3 and self.force_replace:
new_args = args[:2] + [f'"{escaped}"'] + args[3:]
result.append(content[start:paren_start + 1])
result.append(", ".join(new_args))
result.append(")")
else:
result.append(content[start:paren_end])
result.append(f', "{escaped}")')
pos = paren_end + 1
continue
result.append(content[start:paren_end + 1])
pos = paren_end + 1
result.append(content[pos:])
return "".join(result)
def _insert_method_docs(self, content: str) -> str:
# Build a map of line numbers to class contexts
class_contexts = self._extract_class_contexts(content)
result = []
pos = 0
for match in self.def_pattern.finditer(content):
if self.skip_pattern.match(content, match.start()):
continue
result.append(content[pos:match.start()])
start = match.start()
paren_start = content.find('(', start)
if paren_start == -1:
result.append(content[start:match.end()])
pos = match.end()
continue
paren_end = self._find_matching_paren(content, paren_start)
if paren_end == -1:
result.append(content[start:match.end()])
pos = match.end()
continue
args_text = content[paren_start + 1:paren_end]
args = self._split_args(args_text)
if len(args) < 2:
result.append(content[start:paren_end + 1])
pos = paren_end + 1
continue
has_doc = self._has_docstring(args)
if has_doc and not self.force_replace:
result.append(content[start:paren_end + 1])
pos = paren_end + 1
continue
callable_ref = args[1].strip()
current_line = content[:start].count('\n')
class_context = class_contexts.get(current_line)
doc_entry = self._find_doc_for_callable(callable_ref, class_context)
if doc_entry:
docstring = self._build_docstring(doc_entry)
escaped = self._escape_for_cpp(docstring)
if has_doc and self.force_replace:
doc_idx = self._find_docstring_arg_index(args)
if doc_idx is not None:
new_args = args[:doc_idx] + [f'"{escaped}"'] + args[doc_idx + 1:]
result.append(content[start:paren_start + 1])
result.append(", ".join(new_args))
result.append(")")
pos = paren_end + 1
continue
result.append(content[start:paren_end])
result.append(f', "{escaped}")')
pos = paren_end + 1
continue
result.append(content[start:paren_end + 1])
pos = paren_end + 1
result.append(content[pos:])
return "".join(result)
def _extract_class_contexts(self, content: str) -> Dict[int, str]:
contexts = {}
for match in self.class_pattern.finditer(content):
start = match.start()
template_end = self._find_template_end(content, start)
if template_end == -1:
continue
template_start = content.find('<', start) + 1
template_content = content[template_start:template_end - 1]
class_type = template_content.split(',')[0].strip()
class_name = class_type.split('::')[-1] if '::' in class_type else class_type
start_line = content[:start].count('\n')
end_pos = content.find(';', start)
if end_pos != -1:
end_line = content[:end_pos].count('\n')
for line in range(start_line, end_line + 1):
contexts[line] = class_name
return contexts
def _find_doc_for_callable(self, callable_ref: str, class_context: Optional[str] = None) -> Optional[Dict]:
callable_ref = callable_ref.strip()
# Function pointers like &mx::Class::method or &MaterialX::name
if callable_ref.startswith('&'):
name = callable_ref[1:].strip()
name = re.sub(r'[,\s]+$', '', name)
return self.extractor.func_lookup.get(name)
# Lambdas: look for elem.method( or obj->method(
method_match = re.search(r'[\.\->](\w+)\s*\(', callable_ref)
if method_match:
method_name = method_match.group(1)
if class_context:
for prefix in ("", "mx::", "MaterialX::"):
qualified = f"{prefix}{class_context}::{method_name}" if prefix else f"{class_context}::{method_name}"
doc = self.extractor.func_lookup.get(qualified)
if doc:
return doc
return self.extractor.func_lookup.get(method_name)
return None
def _build_docstring(self, doc_entry: Dict) -> str:
parts = []
if doc_entry.get("brief"):
parts.append(doc_entry["brief"])
if doc_entry.get("detail"):
parts.append(doc_entry["detail"])
params = doc_entry.get("params", {})
if params:
param_lines = ["Args:"]
for name, desc in params.items():
param_lines.append(f" {name}: {desc}" if desc else f" {name}:")
parts.append("\n".join(param_lines))
if doc_entry.get("returns"):
parts.append(f"Returns:\n {doc_entry['returns']}")
return "\n\n".join(parts)
def _escape_for_cpp(self, s: str) -> str:
if not s:
return ""
s = s.replace("\\", "\\\\").replace('"', '\\"')
s = s.replace("\n", "\\n")
return s
def _find_template_end(self, content: str, start: int) -> int:
pos = content.find('<', start)
if pos == -1:
return -1
depth = 1
i = pos + 1
in_string = False
while i < len(content) and depth > 0:
c = content[i]
if c == '"' and content[i - 1] != '\\':
in_string = not in_string
elif not in_string:
if c == '<':
depth += 1
elif c == '>':
depth -= 1
i += 1
return i if depth == 0 else -1
def _find_matching_paren(self, content: str, start: int) -> int:
depth = 0
in_string = False
escape = False
for i in range(start, len(content)):
c = content[i]
if escape:
escape = False
continue
if c == '\\':
escape = True
continue
if c == '"':
in_string = not in_string
continue
if not in_string:
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0:
return i
return -1
def _split_args(self, args_text: str) -> list:
args = []
current = []
depth = 0
in_string = False
escape = False
for c in args_text:
if escape:
current.append(c)
escape = False
continue
if c == '\\':
current.append(c)
escape = True
continue
if c == '"':
in_string = not in_string
current.append(c)
continue
if not in_string:
if c in '(<':
depth += 1
elif c in ')>':
depth -= 1
elif c == ',' and depth == 0:
args.append("".join(current).strip())
current = []
continue
current.append(c)
if current:
args.append("".join(current).strip())
return args
def _extract_class_name(self, args_text: str) -> Optional[str]:
args = self._split_args(args_text)
if len(args) >= 2:
return args[1].strip().strip('"')
return None
def _has_docstring(self, args: list) -> bool:
for arg in args[2:]:
a = arg.strip()
if not a.startswith("py::arg") and a.startswith('"'):
return True
return False
def _find_docstring_arg_index(self, args: list) -> Optional[int]:
for i, arg in enumerate(args[2:], start=2):
a = arg.strip()
if not a.startswith("py::arg") and a.startswith('"'):
return i
return None
def main():
parser = argparse.ArgumentParser(description="Extract Doxygen docs and insert into pybind11 bindings (simplified)")
parser.add_argument("-d", "--doxygen_xml_dir", type=Path, default=Path("build/documents/doxygen_xml"), help="Path to Doxygen XML output directory")
parser.add_argument("-p", "--pybind_dir", type=Path, default=Path("source/PyMaterialX"), help="Path to pybind11 bindings directory")
parser.add_argument("-f", "--force", action="store_true", help="Force replace existing docstrings")
parser.add_argument("-j", "--write_json", action="store_true", help="Write extracted docs to JSON files")
args = parser.parse_args()
if not args.doxygen_xml_dir.exists():
print(f"Error: Doxygen XML directory not found: {args.doxygen_xml_dir}")
return 1
if not args.pybind_dir.exists():
print(f"Error: Pybind directory not found: {args.pybind_dir}")
return 1
print("Extracting documentation from Doxygen XML...")
extractor = DocExtractor(args.doxygen_xml_dir)
extractor.extract()
if args.write_json:
print("\nWriting JSON files...")
Path("class_docs.json").write_text(json.dumps(extractor.class_docs, indent=2), encoding="utf-8")
Path("func_docs.json").write_text(json.dumps(extractor.func_docs, indent=2), encoding="utf-8")
print(" - class_docs.json")
print(" - func_docs.json")
print(f"\n{'Replacing' if args.force else 'Inserting'} documentation in pybind11 files...")
inserter = DocInserter(extractor, args.pybind_dir, args.force)
inserter.process_all_files()
print("\nDone!")
return 0
if __name__ == "__main__":
exit(main())