Refactor validation to use jsonschema library with extracted schema file

Extract schema from ROADMAP.md to dedicated JSON file in schema/ folder.
Use jsonschema library for validation instead of custom implementation.
Add jsonschema to pyproject.toml dependencies.

Co-authored-by: johndoe6345789 <224850594+johndoe6345789@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-01-10 23:44:08 +00:00
parent 6e31c1dd68
commit 4447e949ab
6 changed files with 410 additions and 51 deletions

View File

@@ -0,0 +1,345 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://example.com/schemas/n8n-workflow.schema.json",
"title": "N8N-Style Workflow",
"type": "object",
"additionalProperties": false,
"required": ["name", "nodes", "connections"],
"properties": {
"id": {
"description": "Optional external identifier (DB id, UUID, etc.).",
"type": ["string", "integer"]
},
"name": {
"type": "string",
"minLength": 1
},
"active": {
"type": "boolean",
"default": false
},
"versionId": {
"description": "Optional version identifier for optimistic concurrency.",
"type": "string"
},
"createdAt": {
"type": "string",
"format": "date-time"
},
"updatedAt": {
"type": "string",
"format": "date-time"
},
"tags": {
"type": "array",
"items": { "$ref": "#/$defs/tag" },
"default": []
},
"meta": {
"description": "Arbitrary metadata. Keep stable keys for tooling.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"settings": {
"$ref": "#/$defs/workflowSettings"
},
"pinData": {
"description": "Optional pinned execution data (useful for dev).",
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": true
}
}
},
"nodes": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#/$defs/node" }
},
"connections": {
"$ref": "#/$defs/connections"
},
"staticData": {
"description": "Reserved for engine-managed workflow state.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"credentials": {
"description": "Optional top-level credential bindings (engine-specific).",
"type": "array",
"items": { "$ref": "#/$defs/credentialBinding" },
"default": []
},
"triggers": {
"description": "Optional explicit trigger declarations for event-driven workflows.",
"type": "array",
"default": [],
"items": { "$ref": "#/$defs/trigger" }
}
},
"$defs": {
"tag": {
"type": "object",
"additionalProperties": false,
"required": ["name"],
"properties": {
"id": { "type": ["string", "integer"] },
"name": { "type": "string", "minLength": 1 }
}
},
"workflowSettings": {
"type": "object",
"additionalProperties": false,
"properties": {
"timezone": {
"description": "IANA timezone name, e.g. Europe/London.",
"type": "string"
},
"executionTimeout": {
"description": "Hard timeout in seconds for a workflow execution.",
"type": "integer",
"minimum": 0
},
"saveExecutionProgress": {
"type": "boolean",
"default": true
},
"saveManualExecutions": {
"type": "boolean",
"default": true
},
"saveDataErrorExecution": {
"description": "Persist execution data on error.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"saveDataSuccessExecution": {
"description": "Persist execution data on success.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"saveDataManualExecution": {
"description": "Persist execution data for manual runs.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"errorWorkflowId": {
"description": "Optional workflow id to call on error.",
"type": ["string", "integer"]
},
"callerPolicy": {
"description": "Optional policy controlling which workflows can call this workflow.",
"type": "string"
}
},
"default": {}
},
"node": {
"type": "object",
"additionalProperties": false,
"required": ["id", "name", "type", "typeVersion", "position"],
"properties": {
"id": {
"description": "Stable unique id within the workflow. Prefer UUID.",
"type": "string",
"minLength": 1
},
"name": {
"description": "Human-friendly name; should be unique in workflow.",
"type": "string",
"minLength": 1
},
"type": {
"description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.",
"type": "string",
"minLength": 1
},
"typeVersion": {
"description": "Node implementation version.",
"type": ["integer", "number"],
"minimum": 1
},
"disabled": {
"type": "boolean",
"default": false
},
"notes": {
"type": "string",
"default": ""
},
"notesInFlow": {
"description": "When true, notes are displayed on canvas.",
"type": "boolean",
"default": false
},
"retryOnFail": {
"type": "boolean",
"default": false
},
"maxTries": {
"type": "integer",
"minimum": 1
},
"waitBetweenTries": {
"description": "Milliseconds.",
"type": "integer",
"minimum": 0
},
"continueOnFail": {
"type": "boolean",
"default": false
},
"alwaysOutputData": {
"type": "boolean",
"default": false
},
"executeOnce": {
"description": "If true, node executes only once per execution (engine-dependent).",
"type": "boolean",
"default": false
},
"position": {
"$ref": "#/$defs/position"
},
"parameters": {
"description": "Node-specific parameters. Typically JSON-serializable.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"credentials": {
"description": "Node-level credential references.",
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/credentialRef"
},
"default": {}
},
"webhookId": {
"description": "Optional webhook id (for webhook-based trigger nodes).",
"type": "string"
},
"onError": {
"description": "Node-level error routing policy (engine-dependent).",
"type": "string",
"enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"]
}
}
},
"position": {
"type": "array",
"minItems": 2,
"maxItems": 2,
"items": {
"type": "number"
}
},
"credentialRef": {
"type": "object",
"additionalProperties": false,
"required": ["id"],
"properties": {
"id": {
"description": "Credential id or stable key.",
"type": ["string", "integer"]
},
"name": {
"description": "Optional human label.",
"type": "string"
}
}
},
"credentialBinding": {
"type": "object",
"additionalProperties": false,
"required": ["nodeId", "credentialType", "credentialId"],
"properties": {
"nodeId": { "type": "string", "minLength": 1 },
"credentialType": { "type": "string", "minLength": 1 },
"credentialId": { "type": ["string", "integer"] }
}
},
"connections": {
"description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.",
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/nodeConnectionsByType"
},
"default": {}
},
"nodeConnectionsByType": {
"type": "object",
"additionalProperties": false,
"properties": {
"main": {
"$ref": "#/$defs/outputIndexMap"
},
"error": {
"$ref": "#/$defs/outputIndexMap"
}
},
"anyOf": [
{ "required": ["main"] },
{ "required": ["error"] }
]
},
"outputIndexMap": {
"description": "Output index -> array of connection targets.",
"type": "object",
"additionalProperties": {
"type": "array",
"items": { "$ref": "#/$defs/connectionTarget" }
},
"default": {}
},
"connectionTarget": {
"type": "object",
"additionalProperties": false,
"required": ["node", "type", "index"],
"properties": {
"node": {
"description": "Target node name (n8n uses node 'name' in connections).",
"type": "string",
"minLength": 1
},
"type": {
"description": "Input type on target node (typically 'main' or 'error').",
"type": "string",
"minLength": 1
},
"index": {
"description": "Input index on target node.",
"type": "integer",
"minimum": 0
}
}
},
"trigger": {
"type": "object",
"additionalProperties": false,
"required": ["nodeId", "kind"],
"properties": {
"nodeId": { "type": "string", "minLength": 1 },
"kind": {
"type": "string",
"enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"]
},
"enabled": { "type": "boolean", "default": true },
"meta": {
"description": "Trigger-kind-specific metadata for routing/registration.",
"type": "object",
"additionalProperties": true,
"default": {}
}
}
}
}
}

View File

@@ -5,14 +5,23 @@ import sys
from pathlib import Path
from typing import List, Tuple
# Import the schema module - try direct import first (when installed via poetry)
# If that fails, add parent directory to path (for direct script execution)
try:
from autometabuilder.workflow.n8n_schema import N8NWorkflow
import jsonschema
from jsonschema import Draft202012Validator
except ImportError:
backend_dir = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(backend_dir))
from autometabuilder.workflow.n8n_schema import N8NWorkflow
print("Error: jsonschema library not found. Install with: poetry add jsonschema")
sys.exit(1)
def load_schema() -> dict:
"""Load the N8N workflow JSON schema."""
schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json"
if not schema_path.exists():
raise FileNotFoundError(f"Schema file not found at: {schema_path}")
with open(schema_path, 'r', encoding='utf-8') as f:
return json.load(f)
def find_workflow_files(base_path: Path) -> List[Path]:
@@ -28,9 +37,9 @@ def find_workflow_files(base_path: Path) -> List[Path]:
return sorted(workflow_files)
def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]:
def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]:
"""
Validate a single workflow JSON file.
Validate a single workflow JSON file against the schema.
Returns:
Tuple of (is_valid, error_message)
@@ -43,35 +52,15 @@ def validate_workflow_file(workflow_path: Path) -> Tuple[bool, str]:
except Exception as e:
return False, f"Error reading file: {e}"
# Basic structure checks
if not isinstance(workflow_data, dict):
return False, "Workflow data must be an object"
# Validate against schema
validator = Draft202012Validator(schema)
errors = list(validator.iter_errors(workflow_data))
# Check required fields
required_fields = ["name", "nodes", "connections"]
missing_fields = [field for field in required_fields if field not in workflow_data]
if missing_fields:
return False, f"Missing required fields: {', '.join(missing_fields)}"
# Check name
if not isinstance(workflow_data["name"], str) or not workflow_data["name"]:
return False, "Field 'name' must be a non-empty string"
# Check nodes
if not isinstance(workflow_data["nodes"], list):
return False, "Field 'nodes' must be an array"
if len(workflow_data["nodes"]) < 1:
return False, "Field 'nodes' must contain at least 1 node (use a start node for blank workflows)"
# Check connections
if not isinstance(workflow_data["connections"], dict):
return False, "Field 'connections' must be an object"
# Full validation
is_valid = N8NWorkflow.validate(workflow_data)
if not is_valid:
return False, "Schema validation failed (check node structure, position, types, etc.)"
if errors:
# Return the first error with a clear message
error = errors[0]
error_path = ".".join(str(p) for p in error.path) if error.path else "root"
return False, f"{error.message} (at {error_path})"
return True, ""
@@ -87,6 +76,13 @@ def main():
print("Error: Could not locate autometabuilder/packages directory")
return 1
# Load the schema
try:
schema = load_schema()
except Exception as e:
print(f"Error loading schema: {e}")
return 1
# Find all workflow files
workflow_files = find_workflow_files(script_dir)
@@ -104,7 +100,7 @@ def main():
# If relative_to fails, use the full path
relative_path = workflow_path
is_valid, error_msg = validate_workflow_file(workflow_path)
is_valid, error_msg = validate_workflow_file(workflow_path, schema)
if is_valid:
print(f"{relative_path}")

View File

@@ -6,6 +6,7 @@ import pytest
from autometabuilder.tools.validate_workflows import (
find_workflow_files,
load_schema,
validate_workflow_file,
)
@@ -20,10 +21,19 @@ def test_find_workflow_files():
assert all(f.exists() for f in workflow_files)
def test_load_schema():
"""Test that the schema can be loaded."""
schema = load_schema()
assert isinstance(schema, dict)
assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema"
assert schema.get("title") == "N8N-Style Workflow"
def test_validate_all_workflow_files():
"""Test that all workflow files in packages directory are valid."""
backend_dir = Path(__file__).parent.parent / "autometabuilder"
workflow_files = find_workflow_files(backend_dir)
schema = load_schema()
errors = []
for workflow_path in workflow_files:
@@ -33,7 +43,7 @@ def test_validate_all_workflow_files():
# If relative_to fails (e.g., due to symlinks), use the full path
relative_path = workflow_path
is_valid, error_msg = validate_workflow_file(workflow_path)
is_valid, error_msg = validate_workflow_file(workflow_path, schema)
if not is_valid:
errors.append((relative_path, error_msg))
@@ -46,6 +56,7 @@ def test_validate_all_workflow_files():
def test_validate_minimal_valid_workflow(tmp_path):
"""Test validation of a minimal valid workflow."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow",
"nodes": [
@@ -63,12 +74,13 @@ def test_validate_minimal_valid_workflow(tmp_path):
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert is_valid, f"Validation failed: {error_msg}"
def test_validate_workflow_with_missing_name(tmp_path):
"""Test validation of workflow missing required 'name' field."""
schema = load_schema()
workflow_data = {
"nodes": [
{
@@ -85,13 +97,14 @@ def test_validate_workflow_with_missing_name(tmp_path):
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
assert "name" in error_msg.lower()
assert "name" in error_msg.lower() or "required" in error_msg.lower()
def test_validate_workflow_with_empty_nodes(tmp_path):
"""Test validation of workflow with empty nodes array."""
schema = load_schema()
workflow_data = {
"name": "Empty Workflow",
"nodes": [],
@@ -101,24 +114,26 @@ def test_validate_workflow_with_empty_nodes(tmp_path):
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
assert "nodes" in error_msg.lower()
assert "at least 1" in error_msg.lower()
# jsonschema will report "[] should be non-empty"
assert "nodes" in error_msg.lower() or "empty" in error_msg.lower()
def test_validate_workflow_with_invalid_json(tmp_path):
"""Test validation of file with invalid JSON."""
schema = load_schema()
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text("{ invalid json }")
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
assert "json" in error_msg.lower()
def test_validate_workflow_with_invalid_node(tmp_path):
"""Test validation of workflow with invalid node structure."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow",
"nodes": [
@@ -133,12 +148,13 @@ def test_validate_workflow_with_invalid_node(tmp_path):
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
def test_validate_workflow_with_triggers(tmp_path):
"""Test validation of workflow with triggers array."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow with Triggers",
"nodes": [
@@ -166,5 +182,5 @@ def test_validate_workflow_with_triggers(tmp_path):
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file)
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert is_valid, f"Validation failed: {error_msg}"

View File

@@ -1,10 +1,10 @@
# Workflow JSON Validation
This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md.
This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file.
## Schema Definition
The workflow JSON schema is defined in [ROADMAP.md](../ROADMAP.md) (lines 84-430). It defines the structure for N8N-style workflows with the following key requirements:
The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements:
- **Required fields**: `name`, `nodes`, `connections`
- **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position`
@@ -170,8 +170,9 @@ When adding new workflow JSON files:
## Implementation Details
The validation is implemented in:
- **Validator Module**: `backend/autometabuilder/workflow/n8n_schema.py`
The validation is implemented using:
- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md)
- **Validation Library**: `jsonschema` (official JSON Schema validator for Python)
- **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py`
- **Tests**: `backend/tests/test_workflow_validation.py`
- **Schema Tests**: `backend/tests/test_n8n_schema.py`

View File

@@ -17,6 +17,7 @@ tenacity = "^9.1.2"
flask = "^2.3.3"
slack-sdk = "^3.39.0"
discord-py = "^2.6.4"
jsonschema = "^4.10.3"
[build-system]
requires = ["poetry-core"]