diff --git a/backend/autometabuilder/schema/__init__.py b/backend/autometabuilder/schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/autometabuilder/schema/n8n-workflow.schema.json b/backend/autometabuilder/schema/n8n-workflow.schema.json new file mode 100644 index 0000000..2f5eb6c --- /dev/null +++ b/backend/autometabuilder/schema/n8n-workflow.schema.json @@ -0,0 +1,345 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/schemas/n8n-workflow.schema.json", + "title": "N8N-Style Workflow", + "type": "object", + "additionalProperties": false, + "required": ["name", "nodes", "connections"], + "properties": { + "id": { + "description": "Optional external identifier (DB id, UUID, etc.).", + "type": ["string", "integer"] + }, + "name": { + "type": "string", + "minLength": 1 + }, + "active": { + "type": "boolean", + "default": false + }, + "versionId": { + "description": "Optional version identifier for optimistic concurrency.", + "type": "string" + }, + "createdAt": { + "type": "string", + "format": "date-time" + }, + "updatedAt": { + "type": "string", + "format": "date-time" + }, + "tags": { + "type": "array", + "items": { "$ref": "#/$defs/tag" }, + "default": [] + }, + "meta": { + "description": "Arbitrary metadata. Keep stable keys for tooling.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "settings": { + "$ref": "#/$defs/workflowSettings" + }, + "pinData": { + "description": "Optional pinned execution data (useful for dev).", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + } + }, + "nodes": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/node" } + }, + "connections": { + "$ref": "#/$defs/connections" + }, + "staticData": { + "description": "Reserved for engine-managed workflow state.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "credentials": { + "description": "Optional top-level credential bindings (engine-specific).", + "type": "array", + "items": { "$ref": "#/$defs/credentialBinding" }, + "default": [] + }, + "triggers": { + "description": "Optional explicit trigger declarations for event-driven workflows.", + "type": "array", + "default": [], + "items": { "$ref": "#/$defs/trigger" } + } + }, + "$defs": { + "tag": { + "type": "object", + "additionalProperties": false, + "required": ["name"], + "properties": { + "id": { "type": ["string", "integer"] }, + "name": { "type": "string", "minLength": 1 } + } + }, + "workflowSettings": { + "type": "object", + "additionalProperties": false, + "properties": { + "timezone": { + "description": "IANA timezone name, e.g. Europe/London.", + "type": "string" + }, + "executionTimeout": { + "description": "Hard timeout in seconds for a workflow execution.", + "type": "integer", + "minimum": 0 + }, + "saveExecutionProgress": { + "type": "boolean", + "default": true + }, + "saveManualExecutions": { + "type": "boolean", + "default": true + }, + "saveDataErrorExecution": { + "description": "Persist execution data on error.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "saveDataSuccessExecution": { + "description": "Persist execution data on success.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "saveDataManualExecution": { + "description": "Persist execution data for manual runs.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "errorWorkflowId": { + "description": "Optional workflow id to call on error.", + "type": ["string", "integer"] + }, + "callerPolicy": { + "description": "Optional policy controlling which workflows can call this workflow.", + "type": "string" + } + }, + "default": {} + }, + "node": { + "type": "object", + "additionalProperties": false, + "required": ["id", "name", "type", "typeVersion", "position"], + "properties": { + "id": { + "description": "Stable unique id within the workflow. Prefer UUID.", + "type": "string", + "minLength": 1 + }, + "name": { + "description": "Human-friendly name; should be unique in workflow.", + "type": "string", + "minLength": 1 + }, + "type": { + "description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.", + "type": "string", + "minLength": 1 + }, + "typeVersion": { + "description": "Node implementation version.", + "type": ["integer", "number"], + "minimum": 1 + }, + "disabled": { + "type": "boolean", + "default": false + }, + "notes": { + "type": "string", + "default": "" + }, + "notesInFlow": { + "description": "When true, notes are displayed on canvas.", + "type": "boolean", + "default": false + }, + "retryOnFail": { + "type": "boolean", + "default": false + }, + "maxTries": { + "type": "integer", + "minimum": 1 + }, + "waitBetweenTries": { + "description": "Milliseconds.", + "type": "integer", + "minimum": 0 + }, + "continueOnFail": { + "type": "boolean", + "default": false + }, + "alwaysOutputData": { + "type": "boolean", + "default": false + }, + "executeOnce": { + "description": "If true, node executes only once per execution (engine-dependent).", + "type": "boolean", + "default": false + }, + "position": { + "$ref": "#/$defs/position" + }, + "parameters": { + "description": "Node-specific parameters. Typically JSON-serializable.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "credentials": { + "description": "Node-level credential references.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/credentialRef" + }, + "default": {} + }, + "webhookId": { + "description": "Optional webhook id (for webhook-based trigger nodes).", + "type": "string" + }, + "onError": { + "description": "Node-level error routing policy (engine-dependent).", + "type": "string", + "enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"] + } + } + }, + "position": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": { + "type": "number" + } + }, + "credentialRef": { + "type": "object", + "additionalProperties": false, + "required": ["id"], + "properties": { + "id": { + "description": "Credential id or stable key.", + "type": ["string", "integer"] + }, + "name": { + "description": "Optional human label.", + "type": "string" + } + } + }, + "credentialBinding": { + "type": "object", + "additionalProperties": false, + "required": ["nodeId", "credentialType", "credentialId"], + "properties": { + "nodeId": { "type": "string", "minLength": 1 }, + "credentialType": { "type": "string", "minLength": 1 }, + "credentialId": { "type": ["string", "integer"] } + } + }, + "connections": { + "description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/nodeConnectionsByType" + }, + "default": {} + }, + "nodeConnectionsByType": { + "type": "object", + "additionalProperties": false, + "properties": { + "main": { + "$ref": "#/$defs/outputIndexMap" + }, + "error": { + "$ref": "#/$defs/outputIndexMap" + } + }, + "anyOf": [ + { "required": ["main"] }, + { "required": ["error"] } + ] + }, + "outputIndexMap": { + "description": "Output index -> array of connection targets.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { "$ref": "#/$defs/connectionTarget" } + }, + "default": {} + }, + "connectionTarget": { + "type": "object", + "additionalProperties": false, + "required": ["node", "type", "index"], + "properties": { + "node": { + "description": "Target node name (n8n uses node 'name' in connections).", + "type": "string", + "minLength": 1 + }, + "type": { + "description": "Input type on target node (typically 'main' or 'error').", + "type": "string", + "minLength": 1 + }, + "index": { + "description": "Input index on target node.", + "type": "integer", + "minimum": 0 + } + } + }, + "trigger": { + "type": "object", + "additionalProperties": false, + "required": ["nodeId", "kind"], + "properties": { + "nodeId": { "type": "string", "minLength": 1 }, + "kind": { + "type": "string", + "enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"] + }, + "enabled": { "type": "boolean", "default": true }, + "meta": { + "description": "Trigger-kind-specific metadata for routing/registration.", + "type": "object", + "additionalProperties": true, + "default": {} + } + } + } + } +} diff --git a/backend/autometabuilder/tools/validate_workflows.py b/backend/autometabuilder/tools/validate_workflows.py index 9972373..50fa6cd 100755 --- a/backend/autometabuilder/tools/validate_workflows.py +++ b/backend/autometabuilder/tools/validate_workflows.py @@ -5,14 +5,23 @@ import sys from pathlib import Path from typing import List, Tuple -# Import the schema module - try direct import first (when installed via poetry) -# If that fails, add parent directory to path (for direct script execution) try: - from autometabuilder.workflow.n8n_schema import N8NWorkflow + import jsonschema + from jsonschema import Draft202012Validator except ImportError: - backend_dir = Path(__file__).resolve().parent.parent.parent - sys.path.insert(0, str(backend_dir)) - from autometabuilder.workflow.n8n_schema import N8NWorkflow + print("Error: jsonschema library not found. Install with: poetry add jsonschema") + sys.exit(1) + + +def load_schema() -> dict: + """Load the N8N workflow JSON schema.""" + schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json" + + if not schema_path.exists(): + raise FileNotFoundError(f"Schema file not found at: {schema_path}") + + with open(schema_path, 'r', encoding='utf-8') as f: + return json.load(f) def find_workflow_files(base_path: Path) -> List[Path]: @@ -28,9 +37,9 @@ def find_workflow_files(base_path: Path) -> List[Path]: return sorted(workflow_files) -def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]: +def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]: """ - Validate a single workflow JSON file. + Validate a single workflow JSON file against the schema. Returns: Tuple of (is_valid, error_message) @@ -43,35 +52,15 @@ def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]: except Exception as e: return False, f"Error reading file: {e}" - # Basic structure checks - if not isinstance(workflow_data, dict): - return False, "Workflow data must be an object" + # Validate against schema + validator = Draft202012Validator(schema) + errors = list(validator.iter_errors(workflow_data)) - # Check required fields - required_fields = ["name", "nodes", "connections"] - missing_fields = [field for field in required_fields if field not in workflow_data] - if missing_fields: - return False, f"Missing required fields: {', '.join(missing_fields)}" - - # Check name - if not isinstance(workflow_data["name"], str) or not workflow_data["name"]: - return False, "Field 'name' must be a non-empty string" - - # Check nodes - if not isinstance(workflow_data["nodes"], list): - return False, "Field 'nodes' must be an array" - - if len(workflow_data["nodes"]) < 1: - return False, "Field 'nodes' must contain at least 1 node (use a start node for blank workflows)" - - # Check connections - if not isinstance(workflow_data["connections"], dict): - return False, "Field 'connections' must be an object" - - # Full validation - is_valid = N8NWorkflow.validate(workflow_data) - if not is_valid: - return False, "Schema validation failed (check node structure, position, types, etc.)" + if errors: + # Return the first error with a clear message + error = errors[0] + error_path = ".".join(str(p) for p in error.path) if error.path else "root" + return False, f"{error.message} (at {error_path})" return True, "" @@ -87,6 +76,13 @@ def main(): print("Error: Could not locate autometabuilder/packages directory") return 1 + # Load the schema + try: + schema = load_schema() + except Exception as e: + print(f"Error loading schema: {e}") + return 1 + # Find all workflow files workflow_files = find_workflow_files(script_dir) @@ -104,7 +100,7 @@ def main(): # If relative_to fails, use the full path relative_path = workflow_path - is_valid, error_msg = validate_workflow_file(workflow_path) + is_valid, error_msg = validate_workflow_file(workflow_path, schema) if is_valid: print(f"✓ {relative_path}") diff --git a/backend/tests/test_workflow_validation.py b/backend/tests/test_workflow_validation.py index b4c22bf..8225062 100644 --- a/backend/tests/test_workflow_validation.py +++ b/backend/tests/test_workflow_validation.py @@ -6,6 +6,7 @@ import pytest from autometabuilder.tools.validate_workflows import ( find_workflow_files, + load_schema, validate_workflow_file, ) @@ -20,10 +21,19 @@ def test_find_workflow_files(): assert all(f.exists() for f in workflow_files) +def test_load_schema(): + """Test that the schema can be loaded.""" + schema = load_schema() + assert isinstance(schema, dict) + assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema" + assert schema.get("title") == "N8N-Style Workflow" + + def test_validate_all_workflow_files(): """Test that all workflow files in packages directory are valid.""" backend_dir = Path(__file__).parent.parent / "autometabuilder" workflow_files = find_workflow_files(backend_dir) + schema = load_schema() errors = [] for workflow_path in workflow_files: @@ -33,7 +43,7 @@ def test_validate_all_workflow_files(): # If relative_to fails (e.g., due to symlinks), use the full path relative_path = workflow_path - is_valid, error_msg = validate_workflow_file(workflow_path) + is_valid, error_msg = validate_workflow_file(workflow_path, schema) if not is_valid: errors.append((relative_path, error_msg)) @@ -46,6 +56,7 @@ def test_validate_all_workflow_files(): def test_validate_minimal_valid_workflow(tmp_path): """Test validation of a minimal valid workflow.""" + schema = load_schema() workflow_data = { "name": "Test Workflow", "nodes": [ @@ -63,12 +74,13 @@ def test_validate_minimal_valid_workflow(tmp_path): workflow_file = tmp_path / "workflow.json" workflow_file.write_text(json.dumps(workflow_data)) - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert is_valid, f"Validation failed: {error_msg}" def test_validate_workflow_with_missing_name(tmp_path): """Test validation of workflow missing required 'name' field.""" + schema = load_schema() workflow_data = { "nodes": [ { @@ -85,13 +97,14 @@ def test_validate_workflow_with_missing_name(tmp_path): workflow_file = tmp_path / "workflow.json" workflow_file.write_text(json.dumps(workflow_data)) - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert not is_valid - assert "name" in error_msg.lower() + assert "name" in error_msg.lower() or "required" in error_msg.lower() def test_validate_workflow_with_empty_nodes(tmp_path): """Test validation of workflow with empty nodes array.""" + schema = load_schema() workflow_data = { "name": "Empty Workflow", "nodes": [], @@ -101,24 +114,26 @@ def test_validate_workflow_with_empty_nodes(tmp_path): workflow_file = tmp_path / "workflow.json" workflow_file.write_text(json.dumps(workflow_data)) - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert not is_valid - assert "nodes" in error_msg.lower() - assert "at least 1" in error_msg.lower() + # jsonschema will report "[] should be non-empty" + assert "nodes" in error_msg.lower() or "empty" in error_msg.lower() def test_validate_workflow_with_invalid_json(tmp_path): """Test validation of file with invalid JSON.""" + schema = load_schema() workflow_file = tmp_path / "workflow.json" workflow_file.write_text("{ invalid json }") - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert not is_valid assert "json" in error_msg.lower() def test_validate_workflow_with_invalid_node(tmp_path): """Test validation of workflow with invalid node structure.""" + schema = load_schema() workflow_data = { "name": "Test Workflow", "nodes": [ @@ -133,12 +148,13 @@ def test_validate_workflow_with_invalid_node(tmp_path): workflow_file = tmp_path / "workflow.json" workflow_file.write_text(json.dumps(workflow_data)) - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert not is_valid def test_validate_workflow_with_triggers(tmp_path): """Test validation of workflow with triggers array.""" + schema = load_schema() workflow_data = { "name": "Test Workflow with Triggers", "nodes": [ @@ -166,5 +182,5 @@ def test_validate_workflow_with_triggers(tmp_path): workflow_file = tmp_path / "workflow.json" workflow_file.write_text(json.dumps(workflow_data)) - is_valid, error_msg = validate_workflow_file(workflow_file) + is_valid, error_msg = validate_workflow_file(workflow_file, schema) assert is_valid, f"Validation failed: {error_msg}" diff --git a/docs/WORKFLOW_VALIDATION.md b/docs/WORKFLOW_VALIDATION.md index cd81dd0..28a90b0 100644 --- a/docs/WORKFLOW_VALIDATION.md +++ b/docs/WORKFLOW_VALIDATION.md @@ -1,10 +1,10 @@ # Workflow JSON Validation -This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md. +This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file. ## Schema Definition -The workflow JSON schema is defined in [ROADMAP.md](../ROADMAP.md) (lines 84-430). It defines the structure for N8N-style workflows with the following key requirements: +The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements: - **Required fields**: `name`, `nodes`, `connections` - **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position` @@ -170,8 +170,9 @@ When adding new workflow JSON files: ## Implementation Details -The validation is implemented in: -- **Validator Module**: `backend/autometabuilder/workflow/n8n_schema.py` +The validation is implemented using: +- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md) +- **Validation Library**: `jsonschema` (official JSON Schema validator for Python) - **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py` - **Tests**: `backend/tests/test_workflow_validation.py` - **Schema Tests**: `backend/tests/test_n8n_schema.py` diff --git a/pyproject.toml b/pyproject.toml index a61d95d..1ddaed7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ tenacity = "^9.1.2" flask = "^2.3.3" slack-sdk = "^3.39.0" discord-py = "^2.6.4" +jsonschema = "^4.10.3" [build-system] requires = ["poetry-core"]