Refactor validation to use jsonschema library with extracted schema file

Extract schema from ROADMAP.md to dedicated JSON file in schema/ folder. Use jsonschema library for validation instead of custom implementation. Add jsonschema to pyproject.toml dependencies. Co-authored-by: johndoe6345789 <224850594+johndoe6345789@users.noreply.github.com>
2026-04-24 13:54:59 +00:00 · 2026-01-10 23:44:08 +00:00
parent 6e31c1dd68
commit 4447e949ab
6 changed files with 410 additions and 51 deletions
--- a/backend/autometabuilder/schema/init.py
+++ b/backend/autometabuilder/schema/init.py
--- a/backend/autometabuilder/schema/n8n-workflow.schema.json
+++ b/backend/autometabuilder/schema/n8n-workflow.schema.json
@@ -0,0 +1,345 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://example.com/schemas/n8n-workflow.schema.json",
+  "title": "N8N-Style Workflow",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["name", "nodes", "connections"],
+  "properties": {
+    "id": {
+      "description": "Optional external identifier (DB id, UUID, etc.).",
+      "type": ["string", "integer"]
+    },
+    "name": {
+      "type": "string",
+      "minLength": 1
+    },
+    "active": {
+      "type": "boolean",
+      "default": false
+    },
+    "versionId": {
+      "description": "Optional version identifier for optimistic concurrency.",
+      "type": "string"
+    },
+    "createdAt": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "updatedAt": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "tags": {
+      "type": "array",
+      "items": { "$ref": "#/$defs/tag" },
+      "default": []
+    },
+    "meta": {
+      "description": "Arbitrary metadata. Keep stable keys for tooling.",
+      "type": "object",
+      "additionalProperties": true,
+      "default": {}
+    },
+    "settings": {
+      "$ref": "#/$defs/workflowSettings"
+    },
+    "pinData": {
+      "description": "Optional pinned execution data (useful for dev).",
+      "type": "object",
+      "additionalProperties": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "additionalProperties": true
+        }
+      }
+    },
+    "nodes": {
+      "type": "array",
+      "minItems": 1,
+      "items": { "$ref": "#/$defs/node" }
+    },
+    "connections": {
+      "$ref": "#/$defs/connections"
+    },
+    "staticData": {
+      "description": "Reserved for engine-managed workflow state.",
+      "type": "object",
+      "additionalProperties": true,
+      "default": {}
+    },
+    "credentials": {
+      "description": "Optional top-level credential bindings (engine-specific).",
+      "type": "array",
+      "items": { "$ref": "#/$defs/credentialBinding" },
+      "default": []
+    },
+    "triggers": {
+      "description": "Optional explicit trigger declarations for event-driven workflows.",
+      "type": "array",
+      "default": [],
+      "items": { "$ref": "#/$defs/trigger" }
+    }
+  },
+  "$defs": {
+    "tag": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["name"],
+      "properties": {
+        "id": { "type": ["string", "integer"] },
+        "name": { "type": "string", "minLength": 1 }
+      }
+    },
+    "workflowSettings": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "timezone": {
+          "description": "IANA timezone name, e.g. Europe/London.",
+          "type": "string"
+        },
+        "executionTimeout": {
+          "description": "Hard timeout in seconds for a workflow execution.",
+          "type": "integer",
+          "minimum": 0
+        },
+        "saveExecutionProgress": {
+          "type": "boolean",
+          "default": true
+        },
+        "saveManualExecutions": {
+          "type": "boolean",
+          "default": true
+        },
+        "saveDataErrorExecution": {
+          "description": "Persist execution data on error.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "saveDataSuccessExecution": {
+          "description": "Persist execution data on success.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "saveDataManualExecution": {
+          "description": "Persist execution data for manual runs.",
+          "type": "string",
+          "enum": ["all", "none"],
+          "default": "all"
+        },
+        "errorWorkflowId": {
+          "description": "Optional workflow id to call on error.",
+          "type": ["string", "integer"]
+        },
+        "callerPolicy": {
+          "description": "Optional policy controlling which workflows can call this workflow.",
+          "type": "string"
+        }
+      },
+      "default": {}
+    },
+    "node": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["id", "name", "type", "typeVersion", "position"],
+      "properties": {
+        "id": {
+          "description": "Stable unique id within the workflow. Prefer UUID.",
+          "type": "string",
+          "minLength": 1
+        },
+        "name": {
+          "description": "Human-friendly name; should be unique in workflow.",
+          "type": "string",
+          "minLength": 1
+        },
+        "type": {
+          "description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.",
+          "type": "string",
+          "minLength": 1
+        },
+        "typeVersion": {
+          "description": "Node implementation version.",
+          "type": ["integer", "number"],
+          "minimum": 1
+        },
+        "disabled": {
+          "type": "boolean",
+          "default": false
+        },
+        "notes": {
+          "type": "string",
+          "default": ""
+        },
+        "notesInFlow": {
+          "description": "When true, notes are displayed on canvas.",
+          "type": "boolean",
+          "default": false
+        },
+        "retryOnFail": {
+          "type": "boolean",
+          "default": false
+        },
+        "maxTries": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "waitBetweenTries": {
+          "description": "Milliseconds.",
+          "type": "integer",
+          "minimum": 0
+        },
+        "continueOnFail": {
+          "type": "boolean",
+          "default": false
+        },
+        "alwaysOutputData": {
+          "type": "boolean",
+          "default": false
+        },
+        "executeOnce": {
+          "description": "If true, node executes only once per execution (engine-dependent).",
+          "type": "boolean",
+          "default": false
+        },
+        "position": {
+          "$ref": "#/$defs/position"
+        },
+        "parameters": {
+          "description": "Node-specific parameters. Typically JSON-serializable.",
+          "type": "object",
+          "additionalProperties": true,
+          "default": {}
+        },
+        "credentials": {
+          "description": "Node-level credential references.",
+          "type": "object",
+          "additionalProperties": {
+            "$ref": "#/$defs/credentialRef"
+          },
+          "default": {}
+        },
+        "webhookId": {
+          "description": "Optional webhook id (for webhook-based trigger nodes).",
+          "type": "string"
+        },
+        "onError": {
+          "description": "Node-level error routing policy (engine-dependent).",
+          "type": "string",
+          "enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"]
+        }
+      }
+    },
+    "position": {
+      "type": "array",
+      "minItems": 2,
+      "maxItems": 2,
+      "items": {
+        "type": "number"
+      }
+    },
+    "credentialRef": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["id"],
+      "properties": {
+        "id": {
+          "description": "Credential id or stable key.",
+          "type": ["string", "integer"]
+        },
+        "name": {
+          "description": "Optional human label.",
+          "type": "string"
+        }
+      }
+    },
+    "credentialBinding": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["nodeId", "credentialType", "credentialId"],
+      "properties": {
+        "nodeId": { "type": "string", "minLength": 1 },
+        "credentialType": { "type": "string", "minLength": 1 },
+        "credentialId": { "type": ["string", "integer"] }
+      }
+    },
+    "connections": {
+      "description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.",
+      "type": "object",
+      "additionalProperties": {
+        "$ref": "#/$defs/nodeConnectionsByType"
+      },
+      "default": {}
+    },
+    "nodeConnectionsByType": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "main": {
+          "$ref": "#/$defs/outputIndexMap"
+        },
+        "error": {
+          "$ref": "#/$defs/outputIndexMap"
+        }
+      },
+      "anyOf": [
+        { "required": ["main"] },
+        { "required": ["error"] }
+      ]
+    },
+    "outputIndexMap": {
+      "description": "Output index -> array of connection targets.",
+      "type": "object",
+      "additionalProperties": {
+        "type": "array",
+        "items": { "$ref": "#/$defs/connectionTarget" }
+      },
+      "default": {}
+    },
+    "connectionTarget": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["node", "type", "index"],
+      "properties": {
+        "node": {
+          "description": "Target node name (n8n uses node 'name' in connections).",
+          "type": "string",
+          "minLength": 1
+        },
+        "type": {
+          "description": "Input type on target node (typically 'main' or 'error').",
+          "type": "string",
+          "minLength": 1
+        },
+        "index": {
+          "description": "Input index on target node.",
+          "type": "integer",
+          "minimum": 0
+        }
+      }
+    },
+    "trigger": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["nodeId", "kind"],
+      "properties": {
+        "nodeId": { "type": "string", "minLength": 1 },
+        "kind": {
+          "type": "string",
+          "enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"]
+        },
+        "enabled": { "type": "boolean", "default": true },
+        "meta": {
+          "description": "Trigger-kind-specific metadata for routing/registration.",
+          "type": "object",
+          "additionalProperties": true,
+          "default": {}
+        }
+      }
+    }
+  }
+}
--- a/backend/autometabuilder/tools/validate_workflows.py
+++ b/backend/autometabuilder/tools/validate_workflows.py
@@ -5,14 +5,23 @@ import sys
 from pathlib import Path
 from typing import List, Tuple

-# Import the schema module - try direct import first (when installed via poetry)
-# If that fails, add parent directory to path (for direct script execution)
 try:
-    from autometabuilder.workflow.n8n_schema import N8NWorkflow
+    import jsonschema
+    from jsonschema import Draft202012Validator
 except ImportError:
-    backend_dir = Path(__file__).resolve().parent.parent.parent
-    sys.path.insert(0, str(backend_dir))
-    from autometabuilder.workflow.n8n_schema import N8NWorkflow
+    print("Error: jsonschema library not found. Install with: poetry add jsonschema")
+    sys.exit(1)
+
+
+def load_schema() -> dict:
+    """Load the N8N workflow JSON schema."""
+    schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json"
+    
+    if not schema_path.exists():
+        raise FileNotFoundError(f"Schema file not found at: {schema_path}")
+    
+    with open(schema_path, 'r', encoding='utf-8') as f:
+        return json.load(f)


 def find_workflow_files(base_path: Path) -> List[Path]:
@@ -28,9 +37,9 @@ def find_workflow_files(base_path: Path) -> List[Path]:
    return sorted(workflow_files)


-def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]:
+def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]:
    """
-    Validate a single workflow JSON file.
+    Validate a single workflow JSON file against the schema.
    
    Returns:
        Tuple of (is_valid, error_message)
@@ -43,35 +52,15 @@ def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]:
    except Exception as e:
        return False, f"Error reading file: {e}"
    
-    # Basic structure checks
-    if not isinstance(workflow_data, dict):
-        return False, "Workflow data must be an object"
+    # Validate against schema
+    validator = Draft202012Validator(schema)
+    errors = list(validator.iter_errors(workflow_data))
    
-    # Check required fields
-    required_fields = ["name", "nodes", "connections"]
-    missing_fields = [field for field in required_fields if field not in workflow_data]
-    if missing_fields:
-        return False, f"Missing required fields: {', '.join(missing_fields)}"
-    
-    # Check name
-    if not isinstance(workflow_data["name"], str) or not workflow_data["name"]:
-        return False, "Field 'name' must be a non-empty string"
-    
-    # Check nodes
-    if not isinstance(workflow_data["nodes"], list):
-        return False, "Field 'nodes' must be an array"
-    
-    if len(workflow_data["nodes"]) < 1:
-        return False, "Field 'nodes' must contain at least 1 node (use a start node for blank workflows)"
-    
-    # Check connections
-    if not isinstance(workflow_data["connections"], dict):
-        return False, "Field 'connections' must be an object"
-    
-    # Full validation
-    is_valid = N8NWorkflow.validate(workflow_data)
-    if not is_valid:
-        return False, "Schema validation failed (check node structure, position, types, etc.)"
+    if errors:
+        # Return the first error with a clear message
+        error = errors[0]
+        error_path = ".".join(str(p) for p in error.path) if error.path else "root"
+        return False, f"{error.message} (at {error_path})"
    
    return True, ""

@@ -87,6 +76,13 @@ def main():
        print("Error: Could not locate autometabuilder/packages directory")
        return 1
    
+    # Load the schema
+    try:
+        schema = load_schema()
+    except Exception as e:
+        print(f"Error loading schema: {e}")
+        return 1
+    
    # Find all workflow files
    workflow_files = find_workflow_files(script_dir)
    
@@ -104,7 +100,7 @@ def main():
            # If relative_to fails, use the full path
            relative_path = workflow_path
        
-        is_valid, error_msg = validate_workflow_file(workflow_path)
+        is_valid, error_msg = validate_workflow_file(workflow_path, schema)
        
        if is_valid:
            print(f"✓ {relative_path}")
--- a/backend/tests/test_workflow_validation.py
+++ b/backend/tests/test_workflow_validation.py
@@ -6,6 +6,7 @@ import pytest

 from autometabuilder.tools.validate_workflows import (
    find_workflow_files,
+    load_schema,
    validate_workflow_file,
 )

@@ -20,10 +21,19 @@ def test_find_workflow_files():
    assert all(f.exists() for f in workflow_files)


+def test_load_schema():
+    """Test that the schema can be loaded."""
+    schema = load_schema()
+    assert isinstance(schema, dict)
+    assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema"
+    assert schema.get("title") == "N8N-Style Workflow"
+
+
 def test_validate_all_workflow_files():
    """Test that all workflow files in packages directory are valid."""
    backend_dir = Path(__file__).parent.parent / "autometabuilder"
    workflow_files = find_workflow_files(backend_dir)
+    schema = load_schema()
    
    errors = []
    for workflow_path in workflow_files:
@@ -33,7 +43,7 @@ def test_validate_all_workflow_files():
            # If relative_to fails (e.g., due to symlinks), use the full path
            relative_path = workflow_path
        
-        is_valid, error_msg = validate_workflow_file(workflow_path)
+        is_valid, error_msg = validate_workflow_file(workflow_path, schema)
        
        if not is_valid:
            errors.append((relative_path, error_msg))
@@ -46,6 +56,7 @@ def test_validate_all_workflow_files():

 def test_validate_minimal_valid_workflow(tmp_path):
    """Test validation of a minimal valid workflow."""
+    schema = load_schema()
    workflow_data = {
        "name": "Test Workflow",
        "nodes": [
@@ -63,12 +74,13 @@ def test_validate_minimal_valid_workflow(tmp_path):
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text(json.dumps(workflow_data))
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert is_valid, f"Validation failed: {error_msg}"


 def test_validate_workflow_with_missing_name(tmp_path):
    """Test validation of workflow missing required 'name' field."""
+    schema = load_schema()
    workflow_data = {
        "nodes": [
            {
@@ -85,13 +97,14 @@ def test_validate_workflow_with_missing_name(tmp_path):
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text(json.dumps(workflow_data))
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert not is_valid
-    assert "name" in error_msg.lower()
+    assert "name" in error_msg.lower() or "required" in error_msg.lower()


 def test_validate_workflow_with_empty_nodes(tmp_path):
    """Test validation of workflow with empty nodes array."""
+    schema = load_schema()
    workflow_data = {
        "name": "Empty Workflow",
        "nodes": [],
@@ -101,24 +114,26 @@ def test_validate_workflow_with_empty_nodes(tmp_path):
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text(json.dumps(workflow_data))
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert not is_valid
-    assert "nodes" in error_msg.lower()
-    assert "at least 1" in error_msg.lower()
+    # jsonschema will report "[] should be non-empty"
+    assert "nodes" in error_msg.lower() or "empty" in error_msg.lower()


 def test_validate_workflow_with_invalid_json(tmp_path):
    """Test validation of file with invalid JSON."""
+    schema = load_schema()
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text("{ invalid json }")
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert not is_valid
    assert "json" in error_msg.lower()


 def test_validate_workflow_with_invalid_node(tmp_path):
    """Test validation of workflow with invalid node structure."""
+    schema = load_schema()
    workflow_data = {
        "name": "Test Workflow",
        "nodes": [
@@ -133,12 +148,13 @@ def test_validate_workflow_with_invalid_node(tmp_path):
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text(json.dumps(workflow_data))
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert not is_valid


 def test_validate_workflow_with_triggers(tmp_path):
    """Test validation of workflow with triggers array."""
+    schema = load_schema()
    workflow_data = {
        "name": "Test Workflow with Triggers",
        "nodes": [
@@ -166,5 +182,5 @@ def test_validate_workflow_with_triggers(tmp_path):
    workflow_file = tmp_path / "workflow.json"
    workflow_file.write_text(json.dumps(workflow_data))
    
-    is_valid, error_msg = validate_workflow_file(workflow_file)
+    is_valid, error_msg = validate_workflow_file(workflow_file, schema)
    assert is_valid, f"Validation failed: {error_msg}"
--- a/docs/WORKFLOW_VALIDATION.md
+++ b/docs/WORKFLOW_VALIDATION.md
@@ -1,10 +1,10 @@
 # Workflow JSON Validation

-This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md.
+This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file.

 ## Schema Definition

-The workflow JSON schema is defined in [ROADMAP.md](../ROADMAP.md) (lines 84-430). It defines the structure for N8N-style workflows with the following key requirements:
+The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements:

 - **Required fields**: `name`, `nodes`, `connections`
 - **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position`
@@ -170,8 +170,9 @@ When adding new workflow JSON files:

 ## Implementation Details

-The validation is implemented in:
- **Validator Module**: `backend/autometabuilder/workflow/n8n_schema.py`
+The validation is implemented using:
+- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md)
+- **Validation Library**: `jsonschema` (official JSON Schema validator for Python)
 - **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py`
 - **Tests**: `backend/tests/test_workflow_validation.py`
 - **Schema Tests**: `backend/tests/test_n8n_schema.py`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ tenacity = "^9.1.2"
 flask = "^2.3.3"
 slack-sdk = "^3.39.0"
 discord-py = "^2.6.4"
+jsonschema = "^4.10.3"

 [build-system]
 requires = ["poetry-core"]