Add workflow JSON validation tool with tests and CI integration

Co-authored-by: johndoe6345789 <224850594+johndoe6345789@users.noreply.github.com>
2026-04-24 13:54:59 +00:00 · 2026-01-10 23:28:52 +00:00
parent a33a300c5e
commit 759c9efcbf
6 changed files with 472 additions and 1 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,6 +32,9 @@ jobs:
      - name: Build package
        run: poetry build

+      - name: Validate workflow JSON files
+        run: poetry run validate-workflows
+
      - name: Static analysis
        run: poetry run python -m compileall src

--- a/backend/autometabuilder/packages/blank/workflow.json
+++ b/backend/autometabuilder/packages/blank/workflow.json
@@ -1,6 +1,18 @@
 {
  "name": "Blank Canvas",
  "active": false,
-  "nodes": [],
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [
+        0,
+        0
+      ],
+      "parameters": {}
+    }
+  ],
  "connections": {}
 }
--- a/backend/autometabuilder/tools/validate_workflows.py
+++ b/backend/autometabuilder/tools/validate_workflows.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""Tool to validate all workflow JSON files against the N8N schema."""
+import json
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+# Add the backend directory to the path to import the schema module
+backend_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(backend_dir))
+
+from autometabuilder.workflow.n8n_schema import N8NWorkflow
+
+
+def find_workflow_files(base_path: Path) -> List[Path]:
+    """Find all workflow.json files in the packages directory."""
+    packages_dir = base_path / "packages"
+    if not packages_dir.exists():
+        return []
+    
+    workflow_files = []
+    for workflow_file in packages_dir.rglob("workflow.json"):
+        workflow_files.append(workflow_file)
+    
+    return sorted(workflow_files)
+
+
+def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]:
+    """
+    Validate a single workflow JSON file.
+    
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    try:
+        with open(workflow_path, 'r', encoding='utf-8') as f:
+            workflow_data = json.load(f)
+    except json.JSONDecodeError as e:
+        return False, f"JSON parsing error: {e}"
+    except Exception as e:
+        return False, f"Error reading file: {e}"
+    
+    # Basic structure checks
+    if not isinstance(workflow_data, dict):
+        return False, "Workflow data must be an object"
+    
+    # Check required fields
+    required_fields = ["name", "nodes", "connections"]
+    missing_fields = [field for field in required_fields if field not in workflow_data]
+    if missing_fields:
+        return False, f"Missing required fields: {', '.join(missing_fields)}"
+    
+    # Check name
+    if not isinstance(workflow_data["name"], str) or not workflow_data["name"]:
+        return False, "Field 'name' must be a non-empty string"
+    
+    # Check nodes
+    if not isinstance(workflow_data["nodes"], list):
+        return False, "Field 'nodes' must be an array"
+    
+    if len(workflow_data["nodes"]) < 1:
+        return False, "Field 'nodes' must contain at least 1 node (use a start node for blank workflows)"
+    
+    # Check connections
+    if not isinstance(workflow_data["connections"], dict):
+        return False, "Field 'connections' must be an object"
+    
+    # Full validation
+    is_valid = N8NWorkflow.validate(workflow_data)
+    if not is_valid:
+        return False, "Schema validation failed (check node structure, position, types, etc.)"
+    
+    return True, ""
+
+
+def main():
+    """Main function to validate all workflow files."""
+    # Find the backend directory
+    script_dir = Path(__file__).parent.parent.parent / "autometabuilder"
+    
+    # Find all workflow files
+    workflow_files = find_workflow_files(script_dir)
+    
+    if not workflow_files:
+        print("No workflow.json files found in packages directory.")
+        return 1
+    
+    print(f"Found {len(workflow_files)} workflow file(s) to validate\n")
+    
+    errors = []
+    for workflow_path in workflow_files:
+        relative_path = workflow_path.relative_to(script_dir)
+        is_valid, error_msg = validate_workflow_file(workflow_path)
+        
+        if is_valid:
+            print(f"✓ {relative_path}")
+        else:
+            print(f"✗ {relative_path}: {error_msg}")
+            errors.append((relative_path, error_msg))
+    
+    print()
+    if errors:
+        print(f"Validation failed for {len(errors)} file(s):")
+        for path, error in errors:
+            print(f"  - {path}: {error}")
+        return 1
+    else:
+        print(f"All {len(workflow_files)} workflow file(s) are valid!")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/backend/tests/test_workflow_validation.py
+++ b/backend/tests/test_workflow_validation.py
@@ -0,0 +1,165 @@
+"""Tests for workflow JSON validation tool."""
+import json
+from pathlib import Path
+
+import pytest
+
+from autometabuilder.tools.validate_workflows import (
+    find_workflow_files,
+    validate_workflow_file,
+)
+
+
+def test_find_workflow_files():
+    """Test that workflow files are found."""
+    backend_dir = Path(__file__).parent.parent / "autometabuilder"
+    workflow_files = find_workflow_files(backend_dir)
+    
+    assert len(workflow_files) > 0
+    assert all(f.name == "workflow.json" for f in workflow_files)
+    assert all(f.exists() for f in workflow_files)
+
+
+def test_validate_all_workflow_files():
+    """Test that all workflow files in packages directory are valid."""
+    backend_dir = Path(__file__).parent.parent / "autometabuilder"
+    workflow_files = find_workflow_files(backend_dir)
+    
+    errors = []
+    for workflow_path in workflow_files:
+        relative_path = workflow_path.relative_to(backend_dir)
+        is_valid, error_msg = validate_workflow_file(workflow_path)
+        
+        if not is_valid:
+            errors.append((relative_path, error_msg))
+    
+    # Report all errors for debugging
+    if errors:
+        error_report = "\n".join(f"  - {path}: {error}" for path, error in errors)
+        pytest.fail(f"Workflow validation failed for {len(errors)} file(s):\n{error_report}")
+
+
+def test_validate_minimal_valid_workflow(tmp_path):
+    """Test validation of a minimal valid workflow."""
+    workflow_data = {
+        "name": "Test Workflow",
+        "nodes": [
+            {
+                "id": "node-1",
+                "name": "Test Node",
+                "type": "core.test",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert is_valid, f"Validation failed: {error_msg}"
+
+
+def test_validate_workflow_with_missing_name(tmp_path):
+    """Test validation of workflow missing required 'name' field."""
+    workflow_data = {
+        "nodes": [
+            {
+                "id": "node-1",
+                "name": "Test Node",
+                "type": "core.test",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert not is_valid
+    assert "name" in error_msg.lower()
+
+
+def test_validate_workflow_with_empty_nodes(tmp_path):
+    """Test validation of workflow with empty nodes array."""
+    workflow_data = {
+        "name": "Empty Workflow",
+        "nodes": [],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert not is_valid
+    assert "nodes" in error_msg.lower()
+    assert "at least 1" in error_msg.lower()
+
+
+def test_validate_workflow_with_invalid_json(tmp_path):
+    """Test validation of file with invalid JSON."""
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text("{ invalid json }")
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert not is_valid
+    assert "json" in error_msg.lower()
+
+
+def test_validate_workflow_with_invalid_node(tmp_path):
+    """Test validation of workflow with invalid node structure."""
+    workflow_data = {
+        "name": "Test Workflow",
+        "nodes": [
+            {
+                "id": "node-1",
+                # Missing required fields: name, type, typeVersion, position
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert not is_valid
+
+
+def test_validate_workflow_with_triggers(tmp_path):
+    """Test validation of workflow with triggers array."""
+    workflow_data = {
+        "name": "Test Workflow with Triggers",
+        "nodes": [
+            {
+                "id": "webhook-1",
+                "name": "Webhook",
+                "type": "n8n-nodes-base.webhook",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {},
+        "triggers": [
+            {
+                "nodeId": "webhook-1",
+                "kind": "webhook",
+                "enabled": True,
+                "meta": {
+                    "path": "/api/test"
+                }
+            }
+        ]
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file)
+    assert is_valid, f"Validation failed: {error_msg}"
--- a/docs/WORKFLOW_VALIDATION.md
+++ b/docs/WORKFLOW_VALIDATION.md
@@ -0,0 +1,177 @@
+# Workflow JSON Validation
+
+This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md.
+
+## Schema Definition
+
+The workflow JSON schema is defined in [ROADMAP.md](ROADMAP.md) (lines 84-430). It defines the structure for N8N-style workflows with the following key requirements:
+
+- **Required fields**: `name`, `nodes`, `connections`
+- **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position`
+- **Connections**: Define the flow between nodes
+- **Optional fields**: `id`, `active`, `versionId`, `createdAt`, `updatedAt`, `tags`, `meta`, `settings`, `pinData`, `staticData`, `credentials`, `triggers`
+
+## Validation Tool
+
+### Running the Validation Tool
+
+You can validate all workflow JSON files using the following methods:
+
+#### 1. Using Poetry Command (Recommended)
+
+```bash
+poetry run validate-workflows
+```
+
+#### 2. Direct Python Execution
+
+```bash
+cd backend/autometabuilder
+python tools/validate_workflows.py
+```
+
+#### 3. As Part of CI
+
+The validation is automatically run as part of the CI pipeline. See `.github/workflows/ci.yml` for the configuration.
+
+### What Gets Validated
+
+The tool automatically discovers and validates all `workflow.json` files in the `backend/autometabuilder/packages/` directory.
+
+Currently, there are 19 workflow files being validated:
+- backend_bootstrap
+- blank
+- conditional_logic_demo
+- contextual_iterative_loop
+- data_processing_demo
+- default_app_workflow
+- dict_plugins_test
+- game_tick_loop
+- iterative_loop
+- list_plugins_test
+- logic_plugins_test
+- math_plugins_test
+- plan_execute_summarize
+- repo_scan_context
+- single_pass
+- string_plugins_test
+- testing_triangle
+- web_server_bootstrap
+- web_server_json_routes
+
+### Validation Rules
+
+The validator checks:
+
+1. **JSON Syntax**: File must be valid JSON
+2. **Required Fields**: Must have `name`, `nodes`, `connections`
+3. **Name Field**: Must be a non-empty string
+4. **Nodes Array**: Must contain at least 1 node
+5. **Node Structure**: Each node must have:
+   - `id` (non-empty string)
+   - `name` (non-empty string)
+   - `type` (non-empty string)
+   - `typeVersion` (number >= 1)
+   - `position` (array of 2 numbers [x, y])
+6. **Connections**: Must be an object/dict
+7. **Triggers** (if present): Must be an array of valid trigger objects
+
+### Example Valid Workflow
+
+```json
+{
+  "name": "Example Workflow",
+  "active": false,
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [0, 0],
+      "parameters": {}
+    }
+  ],
+  "connections": {},
+  "triggers": [
+    {
+      "nodeId": "start",
+      "kind": "manual",
+      "enabled": true,
+      "meta": {
+        "description": "Manually triggered workflow"
+      }
+    }
+  ]
+}
+```
+
+## Testing
+
+The validation tool has its own test suite:
+
+```bash
+# Run validation tests
+poetry run pytest backend/tests/test_workflow_validation.py -v
+
+# Run all tests including workflow validation
+poetry run pytest
+```
+
+## Adding New Workflows
+
+When adding new workflow JSON files:
+
+1. Place the `workflow.json` file in a package directory under `backend/autometabuilder/packages/`
+2. Ensure it follows the schema defined in ROADMAP.md
+3. Run the validation tool to verify: `poetry run validate-workflows`
+4. The validation will automatically run in CI when you push your changes
+
+## Common Issues
+
+### Empty Nodes Array
+
+**Error**: `Field 'nodes' must contain at least 1 node`
+
+**Solution**: Add at least one node to the workflow. Even blank templates need a starting node:
+
+```json
+{
+  "name": "Blank Canvas",
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [0, 0],
+      "parameters": {}
+    }
+  ],
+  "connections": {}
+}
+```
+
+### Missing Required Fields
+
+**Error**: `Missing required fields: name, nodes`
+
+**Solution**: Ensure your workflow JSON includes all required top-level fields: `name`, `nodes`, and `connections`.
+
+### Invalid Node Structure
+
+**Error**: `Schema validation failed (check node structure, position, types, etc.)`
+
+**Solution**: Verify each node has all required fields and correct types. Common issues:
+- Missing `position` array
+- `typeVersion` less than 1
+- Empty strings for `id`, `name`, or `type`
+- Position not being an array of exactly 2 numbers
+
+## Implementation Details
+
+The validation is implemented in:
+- **Validator Module**: `backend/autometabuilder/workflow/n8n_schema.py`
+- **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py`
+- **Tests**: `backend/tests/test_workflow_validation.py`
+- **Schema Tests**: `backend/tests/test_n8n_schema.py`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry.scripts]
 autometabuilder = "autometabuilder.main:main"
+validate-workflows = "autometabuilder.tools.validate_workflows:main"

 [dependency-groups]
 dev = [