Merge pull request #24 from johndoe6345789/copilot/validate-workflow-json-schema

Add workflow JSON schema validation tool using jsonschema library
2026-04-24 13:54:59 +00:00 · 2026-01-11 00:06:30 +00:00
parent 6bae697731 0d1f8d06c4
commit 9e81074ea6
9 changed files with 861 additions and 1 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,6 +32,9 @@ jobs:
      - name: Build package
        run: poetry build

+      - name: Validate workflow JSON files
+        run: poetry run validate-workflows
+
      - name: Static analysis
        run: poetry run python -m compileall src

--- a/README.md
+++ b/README.md
@@ -48,6 +48,16 @@ The Next.js app now lives under `frontend/autometabuilder` and uses Material UI

 ## Testing & linting

+### Workflow JSON Validation
+
+Validate all workflow JSON files against the N8N schema:
+
+```bash
+poetry run validate-workflows
+```
+
+See [docs/WORKFLOW_VALIDATION.md](docs/WORKFLOW_VALIDATION.md) for detailed documentation.
+
 ### Python

 ```bash
--- a/backend/autometabuilder/packages/blank/workflow.json
+++ b/backend/autometabuilder/packages/blank/workflow.json
@@ -1,6 +1,18 @@
 {
  "name": "Blank Canvas",
  "active": false,
-  "nodes": [],
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [
+        0,
+        0
+      ],
+      "parameters": {}
+    }
+  ],
  "connections": {}
 }
--- a/backend/autometabuilder/schema/init.py
+++ b/backend/autometabuilder/schema/init.py
--- a/backend/autometabuilder/schema/n8n-workflow.schema.json
+++ b/backend/autometabuilder/schema/n8n-workflow.schema.json
@@ -0,0 +1,345 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://example.com/schemas/n8n-workflow.schema.json",
+  "title": "N8N-Style Workflow",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["name", "nodes", "connections"],
+  "properties": {
+    "id": {
+      "description": "Optional external identifier (DB id, UUID, etc.).",
+      "type": ["string", "integer"]
+    },
+    "name": {
+      "type": "string",
+      "minLength": 1
+    },
+    "active": {
+      "type": "boolean",
+      "default": false
+    },
+    "versionId": {
+      "description": "Optional version identifier for optimistic concurrency.",
+      "type": "string"
+    },
+    "createdAt": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "updatedAt": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "tags": {
+      "type": "array",
+      "items": { "$ref": "#/$defs/tag" },
+      "default": []
+    },
+    "meta": {
+      "description": "Arbitrary metadata. Keep stable keys for tooling.",
+      "type": "object",
+      "additionalProperties": true,
+      "default": {}
+    },
+    "settings": {
+      "$ref": "#/$defs/workflowSettings"
+    },
+    "pinData": {
+      "description": "Optional pinned execution data (useful for dev).",
+      "type": "object",
+      "additionalProperties": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "additionalProperties": true
+        }
+      }
+    },
+    "nodes": {
+      "type": "array",
+      "minItems": 1,
+      "items": { "$ref": "#/$defs/node" }
+    },
+    "connections": {
+      "$ref": "#/$defs/connections"
+    },
+    "staticData": {
+      "description": "Reserved for engine-managed workflow state.",
+      "type": "object",
+      "additionalProperties": true,
+      "default": {}
+    },
+    "credentials": {
+      "description": "Optional top-level credential bindings (engine-specific).",
+      "type": "array",
+      "items": { "$ref": "#/$defs/credentialBinding" },
+      "default": []
+    },
+    "triggers": {
+      "description": "Optional explicit trigger declarations for event-driven workflows.",
+      "type": "array",
+      "default": [],
+      "items": { "$ref": "#/$defs/trigger" }
+    }
+  },
+  "$defs": {
+    "tag": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["name"],
+      "properties": {
+        "id": { "type": ["string", "integer"] },
+        "name": { "type": "string", "minLength": 1 }
+      }
+    },
+    "workflowSettings": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "timezone": {
+          "description": "IANA timezone name, e.g. Europe/London.",
+          "type": "string"
+        },
+        "executionTimeout": {
+          "description": "Hard timeout in seconds for a workflow execution.",
+          "type": "integer",
+          "minimum": 0
+        },
+        "saveExecutionProgress": {
+          "type": "boolean",
+          "default": true
+        },
+        "saveManualExecutions": {
+          "type": "boolean",
+          "default": true
+        },
+        "saveDataErrorExecution": {
+          "description": "Persist execution data on error.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "saveDataSuccessExecution": {
+          "description": "Persist execution data on success.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "saveDataManualExecution": {
+          "description": "Persist execution data for manual runs.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "errorWorkflowId": {
+          "description": "Optional workflow id to call on error.",
+          "type": ["string", "integer"]
+        },
+        "callerPolicy": {
+          "description": "Optional policy controlling which workflows can call this workflow.",
+          "type": "string"
+        }
+      },
+      "default": {}
+    },
+    "node": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["id", "name", "type", "typeVersion", "position"],
+      "properties": {
+        "id": {
+          "description": "Stable unique id within the workflow. Prefer UUID.",
+          "type": "string",
+          "minLength": 1
+        },
+        "name": {
+          "description": "Human-friendly name; should be unique in workflow.",
+          "type": "string",
+          "minLength": 1
+        },
+        "type": {
+          "description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.",
+          "type": "string",
+          "minLength": 1
+        },
+        "typeVersion": {
+          "description": "Node implementation version.",
+          "type": ["integer", "number"],
+          "minimum": 1
+        },
+        "disabled": {
+          "type": "boolean",
+          "default": false
+        },
+        "notes": {
+          "type": "string",
+          "default": ""
+        },
+        "notesInFlow": {
+          "description": "When true, notes are displayed on canvas.",
+          "type": "boolean",
+          "default": false
+        },
+        "retryOnFail": {
+          "type": "boolean",
+          "default": false
+        },
+        "maxTries": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "waitBetweenTries": {
+          "description": "Milliseconds.",
+          "type": "integer",
+          "minimum": 0
+        },
+        "continueOnFail": {
+          "type": "boolean",
+          "default": false
+        },
+        "alwaysOutputData": {
+          "type": "boolean",
+          "default": false
+        },
+        "executeOnce": {
+          "description": "If true, node executes only once per execution (engine-dependent).",
+          "type": "boolean",
+          "default": false
+        },
+        "position": {
+          "$ref": "#/$defs/position"
+        },
+        "parameters": {
+          "description": "Node-specific parameters. Typically JSON-serializable.",
+          "type": "object",
+          "additionalProperties": true,
+          "default": {}
+        },
+        "credentials": {
+          "description": "Node-level credential references.",
+          "type": "object",
+          "additionalProperties": {
+            "$ref": "#/$defs/credentialRef"
+          },
+          "default": {}
+        },
+        "webhookId": {
+          "description": "Optional webhook id (for webhook-based trigger nodes).",
+          "type": "string"
+        },
+        "onError": {
+          "description": "Node-level error routing policy (engine-dependent).",
+          "type": "string",
+          "enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"]
+        }
+      }
+    },
+    "position": {
+      "type": "array",
+      "minItems": 2,
+      "maxItems": 2,
+      "items": {
+        "type": "number"
+      }
+    },
+    "credentialRef": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["id"],
+      "properties": {
+        "id": {
+          "description": "Credential id or stable key.",
+          "type": ["string", "integer"]
+        },
+        "name": {
+          "description": "Optional human label.",
+          "type": "string"
+        }
+      }
+    },
+    "credentialBinding": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["nodeId", "credentialType", "credentialId"],
+      "properties": {
+        "nodeId": { "type": "string", "minLength": 1 },
+        "credentialType": { "type": "string", "minLength": 1 },
+        "credentialId": { "type": ["string", "integer"] }
+      }
+    },
+    "connections": {
+      "description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.",
+      "type": "object",
+      "additionalProperties": {
+        "$ref": "#/$defs/nodeConnectionsByType"
+      },
+      "default": {}
+    },
+    "nodeConnectionsByType": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "main": {
+          "$ref": "#/$defs/outputIndexMap"
+        },
+        "error": {
+          "$ref": "#/$defs/outputIndexMap"
+        }
+      },
+      "anyOf": [
+        { "required": ["main"] },
+        { "required": ["error"] }
+      ]
+    },
+    "outputIndexMap": {
+      "description": "Output index -> array of connection targets.",
+      "type": "object",
+      "additionalProperties": {
+        "type": "array",
+        "items": { "$ref": "#/$defs/connectionTarget" }
+      },
+      "default": {}
+    },
+    "connectionTarget": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["node", "type", "index"],
+      "properties": {
+        "node": {
+          "description": "Target node name (n8n uses node 'name' in connections).",
+          "type": "string",
+          "minLength": 1
+        },
+        "type": {
+          "description": "Input type on target node (typically 'main' or 'error').",
+          "type": "string",
+          "minLength": 1
+        },
+        "index": {
+          "description": "Input index on target node.",
+          "type": "integer",
+          "minimum": 0
+        }
+      }
+    },
+    "trigger": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["nodeId", "kind"],
+      "properties": {
+        "nodeId": { "type": "string", "minLength": 1 },
+        "kind": {
+          "type": "string",
+          "enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"]
+        },
+        "enabled": { "type": "boolean", "default": true },
+        "meta": {
+          "description": "Trigger-kind-specific metadata for routing/registration.",
+          "type": "object",
+          "additionalProperties": true,
+          "default": {}
+        }
+      }
+    }
+  }
+}
--- a/backend/autometabuilder/tools/validate_workflows.py
+++ b/backend/autometabuilder/tools/validate_workflows.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""Tool to validate all workflow JSON files against the N8N schema."""
+import json
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+try:
+    import jsonschema
+    from jsonschema import Draft202012Validator
+except ImportError:
+    print("Error: jsonschema library not found. Install with: poetry install")
+    sys.exit(1)
+
+
+def load_schema() -> dict:
+    """Load the N8N workflow JSON schema."""
+    schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json"
+    
+    if not schema_path.exists():
+        raise FileNotFoundError(f"Schema file not found at: {schema_path}")
+    
+    with open(schema_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+
+def find_workflow_files(base_path: Path) -> List[Path]:
+    """Find all workflow.json files in the packages directory."""
+    packages_dir = base_path / "packages"
+    if not packages_dir.exists():
+        return []
+    
+    workflow_files = []
+    for workflow_file in packages_dir.rglob("workflow.json"):
+        workflow_files.append(workflow_file)
+    
+    return sorted(workflow_files)
+
+
+def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]:
+    """
+    Validate a single workflow JSON file against the schema.
+    
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    try:
+        with open(workflow_path, 'r', encoding='utf-8') as f:
+            workflow_data = json.load(f)
+    except json.JSONDecodeError as e:
+        return False, f"JSON parsing error: {e}"
+    except Exception as e:
+        return False, f"Error reading file: {e}"
+    
+    # Validate against schema
+    validator = Draft202012Validator(schema)
+    errors = list(validator.iter_errors(workflow_data))
+    
+    if errors:
+        # Return the first error with a clear message
+        error = errors[0]
+        error_path = ".".join(str(p) for p in error.path) if error.path else "root"
+        return False, f"{error.message} (at {error_path})"
+    
+    return True, ""
+
+
+def main():
+    """Main function to validate all workflow files."""
+    # Find the autometabuilder directory by looking for the packages subdirectory
+    # This works whether run as a script or via poetry command
+    script_dir = Path(__file__).resolve().parent.parent
+    
+    # Verify we found the right directory
+    if not (script_dir / "packages").exists():
+        print("Error: Could not locate autometabuilder/packages directory")
+        return 1
+    
+    # Load the schema
+    try:
+        schema = load_schema()
+    except Exception as e:
+        print(f"Error loading schema: {e}")
+        return 1
+    
+    # Find all workflow files
+    workflow_files = find_workflow_files(script_dir)
+    
+    if not workflow_files:
+        print("No workflow.json files found in packages directory.")
+        return 1
+    
+    print(f"Found {len(workflow_files)} workflow file(s) to validate\n")
+    
+    errors = []
+    for workflow_path in workflow_files:
+        try:
+            relative_path = workflow_path.relative_to(script_dir)
+        except ValueError:
+            # If relative_to fails, use the full path
+            relative_path = workflow_path
+        
+        is_valid, error_msg = validate_workflow_file(workflow_path, schema)
+        
+        if is_valid:
+            print(f"✓ {relative_path}")
+        else:
+            print(f"✗ {relative_path}: {error_msg}")
+            errors.append((relative_path, error_msg))
+    
+    print()
+    if errors:
+        print(f"Validation failed for {len(errors)} file(s):")
+        for path, error in errors:
+            print(f"  - {path}: {error}")
+        return 1
+    else:
+        print(f"All {len(workflow_files)} workflow file(s) are valid!")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/backend/tests/test_workflow_validation.py
+++ b/backend/tests/test_workflow_validation.py
@@ -0,0 +1,187 @@
+"""Tests for workflow JSON validation tool."""
+import json
+from pathlib import Path
+
+import pytest
+
+from autometabuilder.tools.validate_workflows import (
+    find_workflow_files,
+    load_schema,
+    validate_workflow_file,
+)
+
+
+def test_find_workflow_files():
+    """Test that workflow files are found."""
+    backend_dir = Path(__file__).parent.parent / "autometabuilder"
+    workflow_files = find_workflow_files(backend_dir)
+    
+    assert len(workflow_files) > 0
+    assert all(f.name == "workflow.json" for f in workflow_files)
+    assert all(f.exists() for f in workflow_files)
+
+
+def test_load_schema():
+    """Test that the schema can be loaded."""
+    schema = load_schema()
+    assert isinstance(schema, dict)
+    assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema"
+    assert schema.get("title") == "N8N-Style Workflow"
+
+
+def test_validate_all_workflow_files():
+    """Test that all workflow files in packages directory are valid."""
+    backend_dir = Path(__file__).parent.parent / "autometabuilder"
+    workflow_files = find_workflow_files(backend_dir)
+    schema = load_schema()
+    
+    errors = []
+    for workflow_path in workflow_files:
+        try:
+            relative_path = workflow_path.relative_to(backend_dir)
+        except ValueError:
+            # If relative_to fails (e.g., due to symlinks), use the full path
+            relative_path = workflow_path
+        
+        is_valid, error_msg = validate_workflow_file(workflow_path, schema)
+        
+        if not is_valid:
+            errors.append((relative_path, error_msg))
+    
+    # Report all errors for debugging
+    if errors:
+        error_report = "\n".join(f"  - {path}: {error}" for path, error in errors)
+        pytest.fail(f"Workflow validation failed for {len(errors)} file(s):\n{error_report}")
+
+
+def test_validate_minimal_valid_workflow(tmp_path):
+    """Test validation of a minimal valid workflow."""
+    schema = load_schema()
+    workflow_data = {
+        "name": "Test Workflow",
+        "nodes": [
+            {
+                "id": "node-1",
+                "name": "Test Node",
+                "type": "core.test",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert is_valid, f"Validation failed: {error_msg}"
+
+
+def test_validate_workflow_with_missing_name(tmp_path):
+    """Test validation of workflow missing required 'name' field."""
+    schema = load_schema()
+    workflow_data = {
+        "nodes": [
+            {
+                "id": "node-1",
+                "name": "Test Node",
+                "type": "core.test",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert not is_valid
+    # jsonschema reports missing required property
+    assert "required" in error_msg.lower() or "'name'" in error_msg
+
+
+def test_validate_workflow_with_empty_nodes(tmp_path):
+    """Test validation of workflow with empty nodes array."""
+    schema = load_schema()
+    workflow_data = {
+        "name": "Empty Workflow",
+        "nodes": [],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert not is_valid
+    # jsonschema reports "[] is too short" for minItems violation
+    assert "too short" in error_msg.lower() and "nodes" in error_msg.lower()
+
+
+def test_validate_workflow_with_invalid_json(tmp_path):
+    """Test validation of file with invalid JSON."""
+    schema = load_schema()
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text("{ invalid json }")
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert not is_valid
+    assert "json" in error_msg.lower()
+
+
+def test_validate_workflow_with_invalid_node(tmp_path):
+    """Test validation of workflow with invalid node structure."""
+    schema = load_schema()
+    workflow_data = {
+        "name": "Test Workflow",
+        "nodes": [
+            {
+                "id": "node-1",
+                # Missing required fields: name, type, typeVersion, position
+            }
+        ],
+        "connections": {}
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert not is_valid
+
+
+def test_validate_workflow_with_triggers(tmp_path):
+    """Test validation of workflow with triggers array."""
+    schema = load_schema()
+    workflow_data = {
+        "name": "Test Workflow with Triggers",
+        "nodes": [
+            {
+                "id": "webhook-1",
+                "name": "Webhook",
+                "type": "n8n-nodes-base.webhook",
+                "typeVersion": 1,
+                "position": [0, 0]
+            }
+        ],
+        "connections": {},
+        "triggers": [
+            {
+                "nodeId": "webhook-1",
+                "kind": "webhook",
+                "enabled": True,
+                "meta": {
+                    "path": "/api/test"
+                }
+            }
+        ]
+    }
+    
+    workflow_file = tmp_path / "workflow.json"
+    workflow_file.write_text(json.dumps(workflow_data))
+    
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
+    assert is_valid, f"Validation failed: {error_msg}"
--- a/docs/WORKFLOW_VALIDATION.md
+++ b/docs/WORKFLOW_VALIDATION.md
@@ -0,0 +1,178 @@
+# Workflow JSON Validation
+
+This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file.
+
+## Schema Definition
+
+The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements:
+
+- **Required fields**: `name`, `nodes`, `connections`
+- **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position`
+- **Connections**: Define the flow between nodes
+- **Optional fields**: `id`, `active`, `versionId`, `createdAt`, `updatedAt`, `tags`, `meta`, `settings`, `pinData`, `staticData`, `credentials`, `triggers`
+
+## Validation Tool
+
+### Running the Validation Tool
+
+You can validate all workflow JSON files using the following methods:
+
+#### 1. Using Poetry Command (Recommended)
+
+```bash
+poetry run validate-workflows
+```
+
+#### 2. Direct Python Execution
+
+```bash
+cd backend/autometabuilder
+python tools/validate_workflows.py
+```
+
+#### 3. As Part of CI
+
+The validation is automatically run as part of the CI pipeline. See `.github/workflows/ci.yml` for the configuration.
+
+### What Gets Validated
+
+The tool automatically discovers and validates all `workflow.json` files in the `backend/autometabuilder/packages/` directory.
+
+Currently, there are 19 workflow files being validated:
+- backend_bootstrap
+- blank
+- conditional_logic_demo
+- contextual_iterative_loop
+- data_processing_demo
+- default_app_workflow
+- dict_plugins_test
+- game_tick_loop
+- iterative_loop
+- list_plugins_test
+- logic_plugins_test
+- math_plugins_test
+- plan_execute_summarize
+- repo_scan_context
+- single_pass
+- string_plugins_test
+- testing_triangle
+- web_server_bootstrap
+- web_server_json_routes
+
+### Validation Rules
+
+The validator checks:
+
+1. **JSON Syntax**: File must be valid JSON
+2. **Required Fields**: Must have `name`, `nodes`, `connections`
+3. **Name Field**: Must be a non-empty string
+4. **Nodes Array**: Must contain at least 1 node
+5. **Node Structure**: Each node must have:
+   - `id` (non-empty string)
+   - `name` (non-empty string)
+   - `type` (non-empty string)
+   - `typeVersion` (number >= 1)
+   - `position` (array of 2 numbers [x, y])
+6. **Connections**: Must be an object/dict
+7. **Triggers** (if present): Must be an array of valid trigger objects
+
+### Example Valid Workflow
+
+```json
+{
+  "name": "Example Workflow",
+  "active": false,
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [0, 0],
+      "parameters": {}
+    }
+  ],
+  "connections": {},
+  "triggers": [
+    {
+      "nodeId": "start",
+      "kind": "manual",
+      "enabled": true,
+      "meta": {
+        "description": "Manually triggered workflow"
+      }
+    }
+  ]
+}
+```
+
+## Testing
+
+The validation tool has its own test suite:
+
+```bash
+# Run validation tests
+poetry run pytest backend/tests/test_workflow_validation.py -v
+
+# Run all tests including workflow validation
+poetry run pytest
+```
+
+## Adding New Workflows
+
+When adding new workflow JSON files:
+
+1. Place the `workflow.json` file in a package directory under `backend/autometabuilder/packages/`
+2. Ensure it follows the schema defined in ROADMAP.md
+3. Run the validation tool to verify: `poetry run validate-workflows`
+4. The validation will automatically run in CI when you push your changes
+
+## Common Issues
+
+### Empty Nodes Array
+
+**Error**: `Field 'nodes' must contain at least 1 node`
+
+**Solution**: Add at least one node to the workflow. Even blank templates need a starting node:
+
+```json
+{
+  "name": "Blank Canvas",
+  "nodes": [
+    {
+      "id": "start",
+      "name": "Start",
+      "type": "core.start",
+      "typeVersion": 1,
+      "position": [0, 0],
+      "parameters": {}
+    }
+  ],
+  "connections": {}
+}
+```
+
+### Missing Required Fields
+
+**Error**: `Missing required fields: name, nodes`
+
+**Solution**: Ensure your workflow JSON includes all required top-level fields: `name`, `nodes`, and `connections`.
+
+### Invalid Node Structure
+
+**Error**: `Schema validation failed (check node structure, position, types, etc.)`
+
+**Solution**: Verify each node has all required fields and correct types. Common issues:
+- Missing `position` array
+- `typeVersion` less than 1
+- Empty strings for `id`, `name`, or `type`
+- Position not being an array of exactly 2 numbers
+
+## Implementation Details
+
+The validation is implemented using:
+- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md)
+- **Validation Library**: `jsonschema` (official JSON Schema validator for Python)
+- **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py`
+- **Tests**: `backend/tests/test_workflow_validation.py`
+- **Schema Tests**: `backend/tests/test_n8n_schema.py`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ tenacity = "^9.1.2"
 flask = "^2.3.3"
 slack-sdk = "^3.39.0"
 discord-py = "^2.6.4"
+jsonschema = "^4.10.3"

 [build-system]
 requires = ["poetry-core"]
@@ -24,6 +25,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry.scripts]
 autometabuilder = "autometabuilder.main:main"
+validate-workflows = "autometabuilder.tools.validate_workflows:main"

 [dependency-groups]
 dev = [