Files
metabuilder/docs/docs.py
2026-03-09 22:30:41 +00:00

648 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Documentation Manager
Self-documenting script for managing project documentation
Data stored in SQLite3 for efficient querying and search
"""
import argparse
import subprocess
import sys
import sqlite3
from pathlib import Path
from datetime import datetime
import hashlib
DB_FILE = Path(__file__).parent / 'docs.db'
def init_db():
"""Initialize SQLite database"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT UNIQUE NOT NULL,
title TEXT,
content TEXT NOT NULL,
category TEXT,
type TEXT NOT NULL,
size INTEGER,
hash TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
archived INTEGER DEFAULT 0,
tags TEXT
)
''')
c.execute('''
CREATE INDEX IF NOT EXISTS idx_filename ON documents(filename)
''')
c.execute('''
CREATE INDEX IF NOT EXISTS idx_category ON documents(category)
''')
c.execute('''
CREATE INDEX IF NOT EXISTS idx_archived ON documents(archived)
''')
c.execute('''
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
filename, title, content, category, tags,
content='documents',
content_rowid='id'
)
''')
conn.commit()
conn.close()
def import_file(filepath, category=None, archived=False):
"""Import a file into the database"""
if not filepath.exists():
return False
content = filepath.read_text()
size = filepath.stat().st_size
file_hash = hashlib.sha256(content.encode()).hexdigest()
# Extract title from first line if markdown
title = content.split('\n')[0].strip('#').strip() if content else filepath.stem
# Determine type
if filepath.suffix == '.md':
file_type = 'markdown'
elif filepath.suffix == '.txt':
file_type = 'text'
else:
file_type = 'other'
# Auto-detect category from path if not provided
if not category and filepath.parent.name != 'docs':
category = filepath.parent.name
created = datetime.fromtimestamp(filepath.stat().st_ctime).isoformat()
updated = datetime.fromtimestamp(filepath.stat().st_mtime).isoformat()
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
try:
c.execute('''
INSERT OR REPLACE INTO documents
(filename, title, content, category, type, size, hash, created_at, updated_at, archived)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (filepath.name, title, content, category, file_type, size, file_hash, created, updated, 1 if archived else 0))
# Update FTS index
c.execute('''
INSERT OR REPLACE INTO documents_fts (rowid, filename, title, content, category)
SELECT id, filename, title, content, category FROM documents WHERE filename = ?
''', (filepath.name,))
conn.commit()
return True
except Exception as e:
print(f"Error importing {filepath.name}: {e}")
return False
finally:
conn.close()
def list_documents(pattern=None, category=None, archived=False):
"""List documents from database"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
query = 'SELECT filename, category, type, size, created_at FROM documents WHERE 1=1'
params = []
if not archived:
query += ' AND archived = 0'
if category:
query += ' AND category = ?'
params.append(category)
if pattern:
query += ' AND filename LIKE ?'
params.append(f'%{pattern}%')
query += ' ORDER BY category, filename'
c.execute(query, params)
results = c.fetchall()
conn.close()
return results
def search_documents(query, limit=20):
"""Full-text search across documents"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute('''
SELECT d.filename, d.category, d.type,
snippet(documents_fts, 2, '<mark>', '</mark>', '...', 60) as snippet
FROM documents_fts fts
JOIN documents d ON fts.rowid = d.id
WHERE documents_fts MATCH ?
ORDER BY rank
LIMIT ?
''', (query, limit))
results = c.fetchall()
conn.close()
return results
def show_document(filename):
"""Get document content"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute('SELECT content FROM documents WHERE filename = ?', (filename,))
result = c.fetchone()
conn.close()
return result[0] if result else None
def get_stats():
"""Get database statistics"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute('SELECT category, type, COUNT(*), SUM(size) FROM documents WHERE archived = 0 GROUP BY category, type')
active = c.fetchall()
c.execute('SELECT category, COUNT(*) FROM documents WHERE archived = 1 GROUP BY category')
archived = c.fetchall()
c.execute('SELECT COUNT(*), SUM(size) FROM documents')
total = c.fetchone()
c.execute('SELECT DISTINCT category FROM documents WHERE archived = 0 ORDER BY category')
categories = [row[0] for row in c.fetchall() if row[0]]
conn.close()
return {
'active': active,
'archived': archived,
'total_files': total[0],
'total_size': total[1] or 0,
'categories': categories
}
def archive_category(category, year=None):
"""Archive documents by category"""
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
query = 'UPDATE documents SET archived = 1 WHERE category = ? AND archived = 0'
params = [category]
if year:
query += ' AND created_at LIKE ?'
params.append(f'{year}%')
c.execute(query, params)
count = c.rowcount
conn.commit()
conn.close()
return count
def main():
parser = argparse.ArgumentParser(
description='Documentation Manager - SQLite-backed project docs',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Storage: SQLite3 database (docs.db)
Features:
- Full-text search (FTS5)
- Category organization
- Metadata tracking
- Archive management
- Content deduplication
Examples:
# Initialize database
%(prog)s init
# Import all markdown from guides/
%(prog)s import guides/*.md --category guides
# List all documents
%(prog)s list
# List by category
%(prog)s list --category guides
# Search documents
%(prog)s search "workflow"
# Show specific document
%(prog)s show CLAUDE.md
# Show statistics
%(prog)s stats
# Archive 2026 guides
%(prog)s archive guides --year 2026
# Export document to file
%(prog)s export CLAUDE.md output.md
"""
)
subparsers = parser.add_subparsers(dest='command', help='Command to run')
# init
subparsers.add_parser('init', help='Initialize database')
# import
import_parser = subparsers.add_parser('import', help='Import files to database')
import_parser.add_argument('files', nargs='+', help='Files to import')
import_parser.add_argument('--category', type=str, help='Category name')
import_parser.add_argument('--archived', action='store_true', help='Mark as archived')
# create
create_parser = subparsers.add_parser('create', help='Create document directly in database')
create_parser.add_argument('title', type=str, help='Document title')
create_parser.add_argument('content', type=str, help='Document content')
create_parser.add_argument('--category', type=str, help='Category name')
create_parser.add_argument('--type', type=str, choices=['markdown', 'text'], default='markdown', help='Document type')
create_parser.add_argument('--archived', action='store_true', help='Mark as archived')
# update
update_parser = subparsers.add_parser('update', help='Update existing document')
update_parser.add_argument('filename', type=str, help='Document filename to update')
update_parser.add_argument('--title', type=str, help='New title')
update_parser.add_argument('--content', type=str, help='New content')
update_parser.add_argument('--category', type=str, help='New category')
update_parser.add_argument('--type', type=str, choices=['markdown', 'text'], help='New type')
# delete
delete_parser = subparsers.add_parser('delete', help='Delete document from database')
delete_parser.add_argument('filename', type=str, help='Document filename to delete')
# delete-many
delete_many_parser = subparsers.add_parser('delete-many', help='Delete multiple documents')
delete_many_parser.add_argument('pattern', type=str, help='Pattern to match (SQL LIKE pattern, e.g., "2026-01-%%")')
delete_many_parser.add_argument('--category', type=str, help='Filter by category')
delete_many_parser.add_argument('--type', type=str, choices=['markdown', 'text'], help='Filter by type')
delete_many_parser.add_argument('--archived-only', action='store_true', help='Only delete archived documents')
delete_many_parser.add_argument('--confirm', action='store_true', help='Skip confirmation prompt')
# list
list_parser = subparsers.add_parser('list', help='List documents')
list_parser.add_argument('--pattern', type=str, help='Filter by pattern')
list_parser.add_argument('--category', type=str, help='Filter by category')
list_parser.add_argument('--archived', action='store_true', help='Include archived')
# search
search_parser = subparsers.add_parser('search', help='Search documents (FTS5)')
search_parser.add_argument('query', type=str, help='Search query')
search_parser.add_argument('--limit', type=int, default=20, help='Max results')
# show
show_parser = subparsers.add_parser('show', help='Show document content')
show_parser.add_argument('filename', type=str, help='Document filename')
# stats
subparsers.add_parser('stats', help='Show statistics')
# archive
archive_parser = subparsers.add_parser('archive', help='Archive category')
archive_parser.add_argument('category', type=str, help='Category to archive')
archive_parser.add_argument('--year', type=int, help='Only archive from year')
# export
export_parser = subparsers.add_parser('export', help='Export document to file')
export_parser.add_argument('filename', type=str, help='Document to export')
export_parser.add_argument('output', type=str, help='Output file')
# categories
subparsers.add_parser('categories', help='List all categories')
args = parser.parse_args()
if not args.command:
parser.print_help()
return 0
try:
if args.command == 'init':
print("🔧 Initializing documentation database...")
init_db()
print(f"✅ Database created: {DB_FILE}")
return 0
elif args.command == 'import':
init_db()
print(f"📥 Importing {len(args.files)} files...")
imported = 0
for pattern in args.files:
for filepath in Path('.').glob(pattern):
if filepath.is_file():
if import_file(filepath, args.category, args.archived):
imported += 1
print(f"{filepath.name}")
print(f"\n✅ Imported {imported} files")
return 0
elif args.command == 'create':
init_db()
# Generate filename from title
filename = args.title.lower().replace(' ', '-') + ('.md' if args.type == 'markdown' else f'.{args.type}')
# Get current timestamp
now = datetime.now()
created = now.isoformat()
updated = created
# Calculate size and hash
content_bytes = args.content.encode()
size = len(content_bytes)
file_hash = hashlib.sha256(content_bytes).hexdigest()
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
try:
c.execute('''
INSERT INTO documents
(filename, title, content, category, type, size, hash, created_at, updated_at, archived)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (filename, args.title, args.content, args.category, args.type, size, file_hash, created, updated, 1 if args.archived else 0))
# Update FTS index
c.execute('''
INSERT INTO documents_fts (rowid, filename, title, content, category)
SELECT id, filename, title, content, category FROM documents WHERE filename = ?
''', (filename,))
conn.commit()
print(f"✅ Created document: {filename}")
if args.category:
print(f" Category: {args.category}")
return 0
except Exception as e:
print(f"❌ Error creating document: {e}")
return 1
finally:
conn.close()
elif args.command == 'list':
docs = list_documents(args.pattern, args.category, args.archived)
if not docs:
print("No documents found")
return 0
print(f"{'Category':<20} {'Filename':<40} {'Type':<10} {'Size':<10}")
print("-" * 85)
for filename, category, dtype, size, created in docs:
size_str = f"{size / 1024:.1f}K" if size > 1024 else f"{size}B"
cat_str = category or 'root'
print(f"{cat_str:<20} {filename:<40} {dtype:<10} {size_str:<10}")
print(f"\nTotal: {len(docs)} documents")
elif args.command == 'search':
results = search_documents(args.query, args.limit)
if not results:
print(f"No results for: {args.query}")
return 0
print(f"🔍 Search results for '{args.query}'\n")
for i, (filename, category, dtype, snippet) in enumerate(results, 1):
cat_str = category or 'root'
print(f"{i}. {filename} ({cat_str})")
print(f" {snippet}")
print()
elif args.command == 'show':
content = show_document(args.filename)
if not content:
print(f"❌ Document not found: {args.filename}")
return 1
print(content)
elif args.command == 'stats':
stats = get_stats()
print("📊 Documentation Statistics\n")
print(f"Categories: {', '.join(stats['categories'])}\n")
print("Documents by Category:")
current_cat = None
for category, dtype, count, size in stats['active']:
if category != current_cat:
if current_cat:
print()
cat_str = category or 'root'
print(f"{cat_str}/")
current_cat = category
size_mb = (size or 0) / (1024 * 1024)
print(f" {dtype:<10} {count:>5} files {size_mb:>6.2f} MB")
total_mb = stats['total_size'] / (1024 * 1024)
print(f"\nTotal: {stats['total_files']} files, {total_mb:.2f} MB")
elif args.command == 'archive':
count = archive_category(args.category, args.year)
year_str = f" from {args.year}" if args.year else ""
print(f"📦 Archived {count} documents from {args.category}{year_str}")
elif args.command == 'update':
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
try:
updates = []
params = []
if args.title:
updates.append('title = ?')
params.append(args.title)
if args.content:
updates.append('content = ?')
params.append(args.content)
content_bytes = args.content.encode()
updates.append('size = ?')
params.append(len(content_bytes))
updates.append('hash = ?')
params.append(hashlib.sha256(content_bytes).hexdigest())
if args.category is not None:
updates.append('category = ?')
params.append(args.category)
if args.type:
updates.append('type = ?')
params.append(args.type)
if not updates:
print("❌ No updates specified")
return 1
updates.append('updated_at = ?')
params.append(datetime.now().isoformat())
params.append(args.filename)
c.execute(f"UPDATE documents SET {', '.join(updates)} WHERE filename = ?", params)
if c.rowcount == 0:
print(f"❌ Document not found: {args.filename}")
return 1
c.execute('''
INSERT OR REPLACE INTO documents_fts (rowid, filename, title, content, category)
SELECT id, filename, title, content, category FROM documents WHERE filename = ?
''', (args.filename,))
conn.commit()
print(f"✅ Updated document: {args.filename}")
return 0
except Exception as e:
print(f"❌ Error: {e}")
return 1
finally:
conn.close()
elif args.command == 'delete':
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
try:
c.execute('DELETE FROM documents WHERE filename = ?', (args.filename,))
if c.rowcount == 0:
print(f"❌ Document not found: {args.filename}")
return 1
c.execute('DELETE FROM documents_fts WHERE filename = ?', (args.filename,))
conn.commit()
print(f"✅ Deleted document: {args.filename}")
return 0
except Exception as e:
print(f"❌ Error: {e}")
return 1
finally:
conn.close()
elif args.command == 'delete-many':
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
try:
# Build query to find matching documents
query = 'SELECT filename, category FROM documents WHERE filename LIKE ?'
params = [args.pattern]
if args.category:
query += ' AND category = ?'
params.append(args.category)
if args.type:
query += ' AND type = ?'
params.append(args.type)
if args.archived_only:
query += ' AND archived = 1'
c.execute(query, params)
matching = c.fetchall()
if not matching:
print(f"❌ No documents found matching pattern: {args.pattern}")
return 1
print(f"Found {len(matching)} documents matching pattern: {args.pattern}")
for filename, category in matching:
cat_str = category or 'root'
print(f" - {filename} ({cat_str})")
if not args.confirm:
response = input(f"\n⚠️ Delete {len(matching)} documents? [y/N]: ")
if response.lower() != 'y':
print("❌ Cancelled")
return 1
# Delete from main table
delete_query = 'DELETE FROM documents WHERE filename LIKE ?'
delete_params = [args.pattern]
if args.category:
delete_query += ' AND category = ?'
delete_params.append(args.category)
if args.type:
delete_query += ' AND type = ?'
delete_params.append(args.type)
if args.archived_only:
delete_query += ' AND archived = 1'
c.execute(delete_query, delete_params)
deleted_count = c.rowcount
# Delete from FTS table
for filename, _ in matching:
c.execute('DELETE FROM documents_fts WHERE filename = ?', (filename,))
conn.commit()
print(f"✅ Deleted {deleted_count} documents")
return 0
except Exception as e:
print(f"❌ Error: {e}")
return 1
finally:
conn.close()
elif args.command == 'export':
content = show_document(args.filename)
if not content:
print(f"❌ Document not found: {args.filename}")
return 1
Path(args.output).write_text(content)
print(f"✅ Exported to: {args.output}")
elif args.command == 'categories':
stats = get_stats()
print("📁 Categories:\n")
for cat in stats['categories']:
print(f" - {cat}")
return 0
except KeyboardInterrupt:
print("\n⚠️ Interrupted")
return 1
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == '__main__':
sys.exit(main())