Files
metabuilder/tools/project-management/populate-kanban.py
2025-12-27 03:36:30 +00:00

486 lines
16 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Populate GitHub Project Kanban from TODO files.
This script:
1. Parses all TODO markdown files in docs/todo/
2. Extracts unchecked TODO items
3. Generates GitHub issues with appropriate labels and metadata
4. Can output to JSON or directly create issues via GitHub CLI
Usage:
python3 populate-kanban.py --dry-run # Preview issues
python3 populate-kanban.py --output issues.json # Export to JSON
python3 populate-kanban.py --create # Create issues (requires gh auth)
python3 populate-kanban.py --project-id 2 # Add to specific project
"""
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
@dataclass
class TodoItem:
"""Represents a single TODO item."""
title: str
body: str
file: str
section: str
labels: List[str]
priority: str
line_number: int
class TodoParser:
"""Parse TODO markdown files and extract items."""
# Map TODO files to label categories
LABEL_MAP = {
'core': ['core', 'workflow'],
'infrastructure': ['infrastructure'],
'features': ['feature'],
'improvements': ['enhancement'],
}
# Priority mapping based on README
PRIORITY_MAP = {
'critical': '🔴 Critical',
'high': '🟠 High',
'medium': '🟡 Medium',
'low': '🟢 Low',
}
def __init__(self, todo_dir: Path):
self.todo_dir = todo_dir
self.readme_priorities = self._parse_readme_priorities()
def _parse_readme_priorities(self) -> Dict[str, str]:
"""Parse priority information from README.md."""
priorities = {}
readme_path = self.todo_dir / 'README.md'
if not readme_path.exists():
return priorities
with open(readme_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract priority from the Quick Reference table
pattern = r'\|\s*\[([^\]]+)\]\([^\)]+\)\s*\|[^|]*\|\s*(Critical|High|Medium|Low)\s*\|'
matches = re.findall(pattern, content, re.MULTILINE)
for filename, priority in matches:
priorities[filename] = priority.lower()
return priorities
def _categorize_file(self, filepath: Path) -> List[str]:
"""Determine labels based on file location and name."""
labels = []
# Get category from directory structure
relative = filepath.relative_to(self.todo_dir)
parts = relative.parts
if len(parts) > 1:
category = parts[0] # e.g., 'core', 'infrastructure', etc.
if category in self.LABEL_MAP:
labels.extend(self.LABEL_MAP[category])
# Add specific labels based on filename
filename = filepath.stem.lower()
if 'dbal' in filename:
labels.append('dbal')
if 'frontend' in filename:
labels.append('frontend')
if 'backend' in filename:
labels.append('backend')
if 'test' in filename:
labels.append('testing')
if 'security' in filename:
labels.append('security')
if 'database' in filename or 'db' in filename:
labels.append('database')
if 'deploy' in filename:
labels.append('deployment')
if 'doc' in filename:
labels.append('documentation')
if 'package' in filename:
labels.append('packages')
if 'lua' in filename:
labels.append('lua')
if 'multi-tenant' in filename:
labels.append('multi-tenant')
if 'ui' in filename or 'declarative' in filename:
labels.append('ui')
if 'accessibility' in filename or 'a11y' in filename:
labels.append('accessibility')
if 'performance' in filename:
labels.append('performance')
if 'copilot' in filename:
labels.append('copilot')
if 'sdlc' in filename:
labels.append('sdlc')
return list(set(labels)) # Remove duplicates
def _get_priority(self, filepath: Path) -> str:
"""Get priority level for a TODO file."""
filename = filepath.name
# Check if priority is defined in README
if filename in self.readme_priorities:
priority = self.readme_priorities[filename]
return self.PRIORITY_MAP.get(priority, '🟡 Medium')
# Default priorities based on patterns
if 'critical' in filename.lower() or 'security' in filename.lower():
return '🔴 Critical'
elif any(x in filename.lower() for x in ['build', 'dbal', 'frontend', 'database', 'sdlc']):
return '🟠 High'
elif 'future' in filename.lower() or 'copilot' in filename.lower():
return '🟢 Low'
else:
return '🟡 Medium'
def _parse_file(self, filepath: Path) -> List[TodoItem]:
"""Parse a single TODO markdown file."""
items = []
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
current_section = "General"
labels = self._categorize_file(filepath)
priority = self._get_priority(filepath)
for i, line in enumerate(lines, start=1):
# Track section headers
if line.startswith('#'):
# Extract section name
section_match = re.match(r'^#+\s+(.+)$', line.strip())
if section_match:
current_section = section_match.group(1)
continue
# Look for unchecked TODO items
todo_match = re.match(r'^(\s*)- \[ \]\s+(.+)$', line)
if todo_match:
indent, content = todo_match.groups()
# Skip if it's empty or just a placeholder
if not content.strip() or len(content.strip()) < 5:
continue
# Build context body
context_lines = []
# Look back for context (list items, sub-items)
for j in range(max(0, i-5), i):
prev_line = lines[j].strip()
if prev_line and not prev_line.startswith('#'):
if prev_line.startswith('- ['):
# Include related checked items for context
context_lines.append(prev_line)
elif prev_line.startswith('>'):
# Include blockquotes (often have important notes)
context_lines.append(prev_line)
# Build body with file reference and section
body_parts = [
f"**File:** `{filepath.relative_to(self.todo_dir.parent.parent)}`",
f"**Section:** {current_section}",
f"**Line:** {i}",
"",
]
if context_lines:
body_parts.append("**Context:**")
body_parts.extend(context_lines)
body_parts.append("")
body_parts.append(f"**Task:** {content}")
item = TodoItem(
title=content[:100] + ('...' if len(content) > 100 else ''),
body='\n'.join(body_parts),
file=str(filepath.relative_to(self.todo_dir.parent.parent)),
section=current_section,
labels=labels,
priority=priority,
line_number=i
)
items.append(item)
return items
def parse_all(self) -> List[TodoItem]:
"""Parse all TODO files in the directory."""
all_items = []
# Get all .md files except README, STATUS, and SCAN reports
todo_files_set = set()
for pattern in ['**/*TODO*.md', '**/[0-9]*.md']:
todo_files_set.update(self.todo_dir.glob(pattern))
# Filter out special files
exclude_patterns = ['README', 'STATUS', 'SCAN', 'REFACTOR']
todo_files = [
f for f in todo_files_set
if not any(pattern in f.name.upper() for pattern in exclude_patterns)
]
print(f"Found {len(todo_files)} TODO files to parse")
for filepath in sorted(todo_files):
print(f" Parsing {filepath.relative_to(self.todo_dir.parent.parent)}...")
items = self._parse_file(filepath)
all_items.extend(items)
print(f" Found {len(items)} items")
return all_items
class GitHubIssueCreator:
"""Create GitHub issues from TODO items."""
def __init__(self, repo: str, project_id: Optional[int] = None):
self.repo = repo
self.project_id = project_id
def _check_gh_auth(self) -> bool:
"""Check if GitHub CLI is authenticated."""
try:
result = subprocess.run(
['gh', 'auth', 'status'],
capture_output=True,
text=True
)
return result.returncode == 0
except FileNotFoundError:
print("ERROR: GitHub CLI (gh) not found. Please install it first.")
return False
def create_issue(self, item: TodoItem, dry_run: bool = False) -> Optional[str]:
"""Create a single GitHub issue."""
if dry_run:
print(f"\n[DRY RUN] Would create issue:")
print(f" Title: {item.title}")
print(f" Labels: {', '.join(item.labels + [item.priority])}")
print(f" Body preview: {item.body[:100]}...")
return None
# Build gh issue create command
cmd = [
'gh', 'issue', 'create',
'--repo', self.repo,
'--title', item.title,
'--body', item.body,
]
# Add labels
all_labels = item.labels + [item.priority]
for label in all_labels:
cmd.extend(['--label', label])
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True
)
issue_url = result.stdout.strip()
print(f" ✓ Created: {issue_url}")
# Add to project if specified
if self.project_id and issue_url:
self._add_to_project(issue_url)
return issue_url
except subprocess.CalledProcessError as e:
print(f" ✗ Failed to create issue: {e.stderr}")
return None
def _add_to_project(self, issue_url: str):
"""Add an issue to a GitHub project."""
try:
subprocess.run(
[
'gh', 'project', 'item-add', str(self.project_id),
'--owner', self.repo.split('/')[0],
'--url', issue_url
],
capture_output=True,
text=True,
check=True
)
print(f" ✓ Added to project {self.project_id}")
except subprocess.CalledProcessError as e:
print(f" ✗ Failed to add to project: {e.stderr}")
def create_all(self, items: List[TodoItem], dry_run: bool = False,
batch_size: int = 10) -> List[str]:
"""Create issues for all TODO items."""
if not dry_run and not self._check_gh_auth():
print("ERROR: GitHub CLI is not authenticated. Run 'gh auth login' first.")
return []
print(f"\nCreating {len(items)} issues...")
if dry_run:
print("\n[DRY RUN MODE - No issues will be created]")
created_urls = []
for i, item in enumerate(items, start=1):
print(f"\n[{i}/{len(items)}] {item.title[:60]}...")
url = self.create_issue(item, dry_run=dry_run)
if url:
created_urls.append(url)
# Pause after batch to avoid rate limiting
if not dry_run and i % batch_size == 0 and i < len(items):
print(f"\n Pausing after {batch_size} issues (rate limiting)...")
import time
time.sleep(2)
return created_urls
def main():
parser = argparse.ArgumentParser(
description='Populate GitHub Project Kanban from TODO files',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Preview issues without creating them'
)
parser.add_argument(
'--output',
type=str,
help='Export issues to JSON file instead of creating them'
)
parser.add_argument(
'--create',
action='store_true',
help='Create issues on GitHub (requires gh auth)'
)
parser.add_argument(
'--project-id',
type=int,
default=2,
help='GitHub project ID to add issues to (default: 2)'
)
parser.add_argument(
'--repo',
type=str,
default='johndoe6345789/metabuilder',
help='GitHub repository (default: johndoe6345789/metabuilder)'
)
parser.add_argument(
'--todo-dir',
type=Path,
help='Path to docs/todo directory (default: auto-detect)'
)
parser.add_argument(
'--limit',
type=int,
help='Limit number of issues to create (for testing)'
)
args = parser.parse_args()
# Auto-detect todo directory if not specified
if args.todo_dir is None:
script_dir = Path(__file__).parent
repo_root = script_dir.parent.parent
args.todo_dir = repo_root / 'docs' / 'todo'
if not args.todo_dir.exists():
print(f"ERROR: TODO directory not found: {args.todo_dir}")
sys.exit(1)
print(f"Parsing TODO files from: {args.todo_dir}")
# Parse TODO files
parser_obj = TodoParser(args.todo_dir)
items = parser_obj.parse_all()
if args.limit:
items = items[:args.limit]
print(f"\nLimited to first {args.limit} items")
print(f"\n{'='*60}")
print(f"Total TODO items found: {len(items)}")
print(f"{'='*60}")
# Output summary by priority
priority_counts = {}
for item in items:
priority_counts[item.priority] = priority_counts.get(item.priority, 0) + 1
print("\nBreakdown by priority:")
for priority in ['🔴 Critical', '🟠 High', '🟡 Medium', '🟢 Low']:
count = priority_counts.get(priority, 0)
if count > 0:
print(f" {priority}: {count}")
# Output summary by label
label_counts = {}
for item in items:
for label in item.labels:
label_counts[label] = label_counts.get(label, 0) + 1
print("\nBreakdown by label:")
for label, count in sorted(label_counts.items(), key=lambda x: -x[1])[:10]:
print(f" {label}: {count}")
# Export to JSON if requested
if args.output:
output_path = Path(args.output)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump([asdict(item) for item in items], f, indent=2)
print(f"\n✓ Exported {len(items)} items to {output_path}")
return
# Create issues if requested
if args.create or args.dry_run:
creator = GitHubIssueCreator(args.repo, args.project_id)
created = creator.create_all(items, dry_run=args.dry_run)
if not args.dry_run:
print(f"\n{'='*60}")
print(f"Successfully created {len(created)} issues")
print(f"{'='*60}")
else:
print("\nTo create issues, run with --create or --dry-run")
print("Example: python3 populate-kanban.py --dry-run")
if __name__ == '__main__':
main()