diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 90f44a9..6d395f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,6 +32,9 @@ jobs: - name: Build package run: poetry build + - name: Validate workflow JSON files + run: poetry run validate-workflows + - name: Static analysis run: poetry run python -m compileall src diff --git a/README.md b/README.md index 65fcd81..a480248 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,16 @@ The Next.js app now lives under `frontend/autometabuilder` and uses Material UI ## Testing & linting +### Workflow JSON Validation + +Validate all workflow JSON files against the N8N schema: + +```bash +poetry run validate-workflows +``` + +See [docs/WORKFLOW_VALIDATION.md](docs/WORKFLOW_VALIDATION.md) for detailed documentation. + ### Python ```bash diff --git a/backend/autometabuilder/packages/blank/workflow.json b/backend/autometabuilder/packages/blank/workflow.json index f469193..979a833 100644 --- a/backend/autometabuilder/packages/blank/workflow.json +++ b/backend/autometabuilder/packages/blank/workflow.json @@ -1,6 +1,18 @@ { "name": "Blank Canvas", "active": false, - "nodes": [], + "nodes": [ + { + "id": "start", + "name": "Start", + "type": "core.start", + "typeVersion": 1, + "position": [ + 0, + 0 + ], + "parameters": {} + } + ], "connections": {} } \ No newline at end of file diff --git a/backend/autometabuilder/schema/__init__.py b/backend/autometabuilder/schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/autometabuilder/schema/n8n-workflow.schema.json b/backend/autometabuilder/schema/n8n-workflow.schema.json new file mode 100644 index 0000000..2f5eb6c --- /dev/null +++ b/backend/autometabuilder/schema/n8n-workflow.schema.json @@ -0,0 +1,345 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/schemas/n8n-workflow.schema.json", + "title": "N8N-Style Workflow", + "type": "object", + "additionalProperties": false, + "required": ["name", "nodes", "connections"], + "properties": { + "id": { + "description": "Optional external identifier (DB id, UUID, etc.).", + "type": ["string", "integer"] + }, + "name": { + "type": "string", + "minLength": 1 + }, + "active": { + "type": "boolean", + "default": false + }, + "versionId": { + "description": "Optional version identifier for optimistic concurrency.", + "type": "string" + }, + "createdAt": { + "type": "string", + "format": "date-time" + }, + "updatedAt": { + "type": "string", + "format": "date-time" + }, + "tags": { + "type": "array", + "items": { "$ref": "#/$defs/tag" }, + "default": [] + }, + "meta": { + "description": "Arbitrary metadata. Keep stable keys for tooling.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "settings": { + "$ref": "#/$defs/workflowSettings" + }, + "pinData": { + "description": "Optional pinned execution data (useful for dev).", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + } + }, + "nodes": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/node" } + }, + "connections": { + "$ref": "#/$defs/connections" + }, + "staticData": { + "description": "Reserved for engine-managed workflow state.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "credentials": { + "description": "Optional top-level credential bindings (engine-specific).", + "type": "array", + "items": { "$ref": "#/$defs/credentialBinding" }, + "default": [] + }, + "triggers": { + "description": "Optional explicit trigger declarations for event-driven workflows.", + "type": "array", + "default": [], + "items": { "$ref": "#/$defs/trigger" } + } + }, + "$defs": { + "tag": { + "type": "object", + "additionalProperties": false, + "required": ["name"], + "properties": { + "id": { "type": ["string", "integer"] }, + "name": { "type": "string", "minLength": 1 } + } + }, + "workflowSettings": { + "type": "object", + "additionalProperties": false, + "properties": { + "timezone": { + "description": "IANA timezone name, e.g. Europe/London.", + "type": "string" + }, + "executionTimeout": { + "description": "Hard timeout in seconds for a workflow execution.", + "type": "integer", + "minimum": 0 + }, + "saveExecutionProgress": { + "type": "boolean", + "default": true + }, + "saveManualExecutions": { + "type": "boolean", + "default": true + }, + "saveDataErrorExecution": { + "description": "Persist execution data on error.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "saveDataSuccessExecution": { + "description": "Persist execution data on success.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "saveDataManualExecution": { + "description": "Persist execution data for manual runs.", + "type": "string", + "enum": ["all", "none"], + "default": "all" + }, + "errorWorkflowId": { + "description": "Optional workflow id to call on error.", + "type": ["string", "integer"] + }, + "callerPolicy": { + "description": "Optional policy controlling which workflows can call this workflow.", + "type": "string" + } + }, + "default": {} + }, + "node": { + "type": "object", + "additionalProperties": false, + "required": ["id", "name", "type", "typeVersion", "position"], + "properties": { + "id": { + "description": "Stable unique id within the workflow. Prefer UUID.", + "type": "string", + "minLength": 1 + }, + "name": { + "description": "Human-friendly name; should be unique in workflow.", + "type": "string", + "minLength": 1 + }, + "type": { + "description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.", + "type": "string", + "minLength": 1 + }, + "typeVersion": { + "description": "Node implementation version.", + "type": ["integer", "number"], + "minimum": 1 + }, + "disabled": { + "type": "boolean", + "default": false + }, + "notes": { + "type": "string", + "default": "" + }, + "notesInFlow": { + "description": "When true, notes are displayed on canvas.", + "type": "boolean", + "default": false + }, + "retryOnFail": { + "type": "boolean", + "default": false + }, + "maxTries": { + "type": "integer", + "minimum": 1 + }, + "waitBetweenTries": { + "description": "Milliseconds.", + "type": "integer", + "minimum": 0 + }, + "continueOnFail": { + "type": "boolean", + "default": false + }, + "alwaysOutputData": { + "type": "boolean", + "default": false + }, + "executeOnce": { + "description": "If true, node executes only once per execution (engine-dependent).", + "type": "boolean", + "default": false + }, + "position": { + "$ref": "#/$defs/position" + }, + "parameters": { + "description": "Node-specific parameters. Typically JSON-serializable.", + "type": "object", + "additionalProperties": true, + "default": {} + }, + "credentials": { + "description": "Node-level credential references.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/credentialRef" + }, + "default": {} + }, + "webhookId": { + "description": "Optional webhook id (for webhook-based trigger nodes).", + "type": "string" + }, + "onError": { + "description": "Node-level error routing policy (engine-dependent).", + "type": "string", + "enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"] + } + } + }, + "position": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": { + "type": "number" + } + }, + "credentialRef": { + "type": "object", + "additionalProperties": false, + "required": ["id"], + "properties": { + "id": { + "description": "Credential id or stable key.", + "type": ["string", "integer"] + }, + "name": { + "description": "Optional human label.", + "type": "string" + } + } + }, + "credentialBinding": { + "type": "object", + "additionalProperties": false, + "required": ["nodeId", "credentialType", "credentialId"], + "properties": { + "nodeId": { "type": "string", "minLength": 1 }, + "credentialType": { "type": "string", "minLength": 1 }, + "credentialId": { "type": ["string", "integer"] } + } + }, + "connections": { + "description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/nodeConnectionsByType" + }, + "default": {} + }, + "nodeConnectionsByType": { + "type": "object", + "additionalProperties": false, + "properties": { + "main": { + "$ref": "#/$defs/outputIndexMap" + }, + "error": { + "$ref": "#/$defs/outputIndexMap" + } + }, + "anyOf": [ + { "required": ["main"] }, + { "required": ["error"] } + ] + }, + "outputIndexMap": { + "description": "Output index -> array of connection targets.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { "$ref": "#/$defs/connectionTarget" } + }, + "default": {} + }, + "connectionTarget": { + "type": "object", + "additionalProperties": false, + "required": ["node", "type", "index"], + "properties": { + "node": { + "description": "Target node name (n8n uses node 'name' in connections).", + "type": "string", + "minLength": 1 + }, + "type": { + "description": "Input type on target node (typically 'main' or 'error').", + "type": "string", + "minLength": 1 + }, + "index": { + "description": "Input index on target node.", + "type": "integer", + "minimum": 0 + } + } + }, + "trigger": { + "type": "object", + "additionalProperties": false, + "required": ["nodeId", "kind"], + "properties": { + "nodeId": { "type": "string", "minLength": 1 }, + "kind": { + "type": "string", + "enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"] + }, + "enabled": { "type": "boolean", "default": true }, + "meta": { + "description": "Trigger-kind-specific metadata for routing/registration.", + "type": "object", + "additionalProperties": true, + "default": {} + } + } + } + } +} diff --git a/backend/autometabuilder/tools/validate_workflows.py b/backend/autometabuilder/tools/validate_workflows.py new file mode 100755 index 0000000..bb6858e --- /dev/null +++ b/backend/autometabuilder/tools/validate_workflows.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +"""Tool to validate all workflow JSON files against the N8N schema.""" +import json +import sys +from pathlib import Path +from typing import List, Tuple + +try: + import jsonschema + from jsonschema import Draft202012Validator +except ImportError: + print("Error: jsonschema library not found. Install with: poetry install") + sys.exit(1) + + +def load_schema() -> dict: + """Load the N8N workflow JSON schema.""" + schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json" + + if not schema_path.exists(): + raise FileNotFoundError(f"Schema file not found at: {schema_path}") + + with open(schema_path, 'r', encoding='utf-8') as f: + return json.load(f) + + +def find_workflow_files(base_path: Path) -> List[Path]: + """Find all workflow.json files in the packages directory.""" + packages_dir = base_path / "packages" + if not packages_dir.exists(): + return [] + + workflow_files = [] + for workflow_file in packages_dir.rglob("workflow.json"): + workflow_files.append(workflow_file) + + return sorted(workflow_files) + + +def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]: + """ + Validate a single workflow JSON file against the schema. + + Returns: + Tuple of (is_valid, error_message) + """ + try: + with open(workflow_path, 'r', encoding='utf-8') as f: + workflow_data = json.load(f) + except json.JSONDecodeError as e: + return False, f"JSON parsing error: {e}" + except Exception as e: + return False, f"Error reading file: {e}" + + # Validate against schema + validator = Draft202012Validator(schema) + errors = list(validator.iter_errors(workflow_data)) + + if errors: + # Return the first error with a clear message + error = errors[0] + error_path = ".".join(str(p) for p in error.path) if error.path else "root" + return False, f"{error.message} (at {error_path})" + + return True, "" + + +def main(): + """Main function to validate all workflow files.""" + # Find the autometabuilder directory by looking for the packages subdirectory + # This works whether run as a script or via poetry command + script_dir = Path(__file__).resolve().parent.parent + + # Verify we found the right directory + if not (script_dir / "packages").exists(): + print("Error: Could not locate autometabuilder/packages directory") + return 1 + + # Load the schema + try: + schema = load_schema() + except Exception as e: + print(f"Error loading schema: {e}") + return 1 + + # Find all workflow files + workflow_files = find_workflow_files(script_dir) + + if not workflow_files: + print("No workflow.json files found in packages directory.") + return 1 + + print(f"Found {len(workflow_files)} workflow file(s) to validate\n") + + errors = [] + for workflow_path in workflow_files: + try: + relative_path = workflow_path.relative_to(script_dir) + except ValueError: + # If relative_to fails, use the full path + relative_path = workflow_path + + is_valid, error_msg = validate_workflow_file(workflow_path, schema) + + if is_valid: + print(f"✓ {relative_path}") + else: + print(f"✗ {relative_path}: {error_msg}") + errors.append((relative_path, error_msg)) + + print() + if errors: + print(f"Validation failed for {len(errors)} file(s):") + for path, error in errors: + print(f" - {path}: {error}") + return 1 + else: + print(f"All {len(workflow_files)} workflow file(s) are valid!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/tests/test_workflow_validation.py b/backend/tests/test_workflow_validation.py new file mode 100644 index 0000000..94c6db3 --- /dev/null +++ b/backend/tests/test_workflow_validation.py @@ -0,0 +1,187 @@ +"""Tests for workflow JSON validation tool.""" +import json +from pathlib import Path + +import pytest + +from autometabuilder.tools.validate_workflows import ( + find_workflow_files, + load_schema, + validate_workflow_file, +) + + +def test_find_workflow_files(): + """Test that workflow files are found.""" + backend_dir = Path(__file__).parent.parent / "autometabuilder" + workflow_files = find_workflow_files(backend_dir) + + assert len(workflow_files) > 0 + assert all(f.name == "workflow.json" for f in workflow_files) + assert all(f.exists() for f in workflow_files) + + +def test_load_schema(): + """Test that the schema can be loaded.""" + schema = load_schema() + assert isinstance(schema, dict) + assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema" + assert schema.get("title") == "N8N-Style Workflow" + + +def test_validate_all_workflow_files(): + """Test that all workflow files in packages directory are valid.""" + backend_dir = Path(__file__).parent.parent / "autometabuilder" + workflow_files = find_workflow_files(backend_dir) + schema = load_schema() + + errors = [] + for workflow_path in workflow_files: + try: + relative_path = workflow_path.relative_to(backend_dir) + except ValueError: + # If relative_to fails (e.g., due to symlinks), use the full path + relative_path = workflow_path + + is_valid, error_msg = validate_workflow_file(workflow_path, schema) + + if not is_valid: + errors.append((relative_path, error_msg)) + + # Report all errors for debugging + if errors: + error_report = "\n".join(f" - {path}: {error}" for path, error in errors) + pytest.fail(f"Workflow validation failed for {len(errors)} file(s):\n{error_report}") + + +def test_validate_minimal_valid_workflow(tmp_path): + """Test validation of a minimal valid workflow.""" + schema = load_schema() + workflow_data = { + "name": "Test Workflow", + "nodes": [ + { + "id": "node-1", + "name": "Test Node", + "type": "core.test", + "typeVersion": 1, + "position": [0, 0] + } + ], + "connections": {} + } + + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text(json.dumps(workflow_data)) + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert is_valid, f"Validation failed: {error_msg}" + + +def test_validate_workflow_with_missing_name(tmp_path): + """Test validation of workflow missing required 'name' field.""" + schema = load_schema() + workflow_data = { + "nodes": [ + { + "id": "node-1", + "name": "Test Node", + "type": "core.test", + "typeVersion": 1, + "position": [0, 0] + } + ], + "connections": {} + } + + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text(json.dumps(workflow_data)) + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert not is_valid + # jsonschema reports missing required property + assert "required" in error_msg.lower() or "'name'" in error_msg + + +def test_validate_workflow_with_empty_nodes(tmp_path): + """Test validation of workflow with empty nodes array.""" + schema = load_schema() + workflow_data = { + "name": "Empty Workflow", + "nodes": [], + "connections": {} + } + + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text(json.dumps(workflow_data)) + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert not is_valid + # jsonschema reports "[] is too short" for minItems violation + assert "too short" in error_msg.lower() and "nodes" in error_msg.lower() + + +def test_validate_workflow_with_invalid_json(tmp_path): + """Test validation of file with invalid JSON.""" + schema = load_schema() + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text("{ invalid json }") + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert not is_valid + assert "json" in error_msg.lower() + + +def test_validate_workflow_with_invalid_node(tmp_path): + """Test validation of workflow with invalid node structure.""" + schema = load_schema() + workflow_data = { + "name": "Test Workflow", + "nodes": [ + { + "id": "node-1", + # Missing required fields: name, type, typeVersion, position + } + ], + "connections": {} + } + + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text(json.dumps(workflow_data)) + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert not is_valid + + +def test_validate_workflow_with_triggers(tmp_path): + """Test validation of workflow with triggers array.""" + schema = load_schema() + workflow_data = { + "name": "Test Workflow with Triggers", + "nodes": [ + { + "id": "webhook-1", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 1, + "position": [0, 0] + } + ], + "connections": {}, + "triggers": [ + { + "nodeId": "webhook-1", + "kind": "webhook", + "enabled": True, + "meta": { + "path": "/api/test" + } + } + ] + } + + workflow_file = tmp_path / "workflow.json" + workflow_file.write_text(json.dumps(workflow_data)) + + is_valid, error_msg = validate_workflow_file(workflow_file, schema) + assert is_valid, f"Validation failed: {error_msg}" diff --git a/docs/WORKFLOW_VALIDATION.md b/docs/WORKFLOW_VALIDATION.md new file mode 100644 index 0000000..28a90b0 --- /dev/null +++ b/docs/WORKFLOW_VALIDATION.md @@ -0,0 +1,178 @@ +# Workflow JSON Validation + +This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file. + +## Schema Definition + +The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements: + +- **Required fields**: `name`, `nodes`, `connections` +- **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position` +- **Connections**: Define the flow between nodes +- **Optional fields**: `id`, `active`, `versionId`, `createdAt`, `updatedAt`, `tags`, `meta`, `settings`, `pinData`, `staticData`, `credentials`, `triggers` + +## Validation Tool + +### Running the Validation Tool + +You can validate all workflow JSON files using the following methods: + +#### 1. Using Poetry Command (Recommended) + +```bash +poetry run validate-workflows +``` + +#### 2. Direct Python Execution + +```bash +cd backend/autometabuilder +python tools/validate_workflows.py +``` + +#### 3. As Part of CI + +The validation is automatically run as part of the CI pipeline. See `.github/workflows/ci.yml` for the configuration. + +### What Gets Validated + +The tool automatically discovers and validates all `workflow.json` files in the `backend/autometabuilder/packages/` directory. + +Currently, there are 19 workflow files being validated: +- backend_bootstrap +- blank +- conditional_logic_demo +- contextual_iterative_loop +- data_processing_demo +- default_app_workflow +- dict_plugins_test +- game_tick_loop +- iterative_loop +- list_plugins_test +- logic_plugins_test +- math_plugins_test +- plan_execute_summarize +- repo_scan_context +- single_pass +- string_plugins_test +- testing_triangle +- web_server_bootstrap +- web_server_json_routes + +### Validation Rules + +The validator checks: + +1. **JSON Syntax**: File must be valid JSON +2. **Required Fields**: Must have `name`, `nodes`, `connections` +3. **Name Field**: Must be a non-empty string +4. **Nodes Array**: Must contain at least 1 node +5. **Node Structure**: Each node must have: + - `id` (non-empty string) + - `name` (non-empty string) + - `type` (non-empty string) + - `typeVersion` (number >= 1) + - `position` (array of 2 numbers [x, y]) +6. **Connections**: Must be an object/dict +7. **Triggers** (if present): Must be an array of valid trigger objects + +### Example Valid Workflow + +```json +{ + "name": "Example Workflow", + "active": false, + "nodes": [ + { + "id": "start", + "name": "Start", + "type": "core.start", + "typeVersion": 1, + "position": [0, 0], + "parameters": {} + } + ], + "connections": {}, + "triggers": [ + { + "nodeId": "start", + "kind": "manual", + "enabled": true, + "meta": { + "description": "Manually triggered workflow" + } + } + ] +} +``` + +## Testing + +The validation tool has its own test suite: + +```bash +# Run validation tests +poetry run pytest backend/tests/test_workflow_validation.py -v + +# Run all tests including workflow validation +poetry run pytest +``` + +## Adding New Workflows + +When adding new workflow JSON files: + +1. Place the `workflow.json` file in a package directory under `backend/autometabuilder/packages/` +2. Ensure it follows the schema defined in ROADMAP.md +3. Run the validation tool to verify: `poetry run validate-workflows` +4. The validation will automatically run in CI when you push your changes + +## Common Issues + +### Empty Nodes Array + +**Error**: `Field 'nodes' must contain at least 1 node` + +**Solution**: Add at least one node to the workflow. Even blank templates need a starting node: + +```json +{ + "name": "Blank Canvas", + "nodes": [ + { + "id": "start", + "name": "Start", + "type": "core.start", + "typeVersion": 1, + "position": [0, 0], + "parameters": {} + } + ], + "connections": {} +} +``` + +### Missing Required Fields + +**Error**: `Missing required fields: name, nodes` + +**Solution**: Ensure your workflow JSON includes all required top-level fields: `name`, `nodes`, and `connections`. + +### Invalid Node Structure + +**Error**: `Schema validation failed (check node structure, position, types, etc.)` + +**Solution**: Verify each node has all required fields and correct types. Common issues: +- Missing `position` array +- `typeVersion` less than 1 +- Empty strings for `id`, `name`, or `type` +- Position not being an array of exactly 2 numbers + +## Implementation Details + +The validation is implemented using: +- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md) +- **Validation Library**: `jsonschema` (official JSON Schema validator for Python) +- **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py` +- **Tests**: `backend/tests/test_workflow_validation.py` +- **Schema Tests**: `backend/tests/test_n8n_schema.py` diff --git a/pyproject.toml b/pyproject.toml index 43c8b78..1ddaed7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ tenacity = "^9.1.2" flask = "^2.3.3" slack-sdk = "^3.39.0" discord-py = "^2.6.4" +jsonschema = "^4.10.3" [build-system] requires = ["poetry-core"] @@ -24,6 +25,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] autometabuilder = "autometabuilder.main:main" +validate-workflows = "autometabuilder.tools.validate_workflows:main" [dependency-groups] dev = [