Merge pull request #24 from johndoe6345789/copilot/validate-workflow-json-schema

Add workflow JSON schema validation tool using jsonschema library
This commit is contained in:
2026-01-11 00:06:30 +00:00
committed by GitHub
9 changed files with 861 additions and 1 deletions

View File

@@ -32,6 +32,9 @@ jobs:
- name: Build package
run: poetry build
- name: Validate workflow JSON files
run: poetry run validate-workflows
- name: Static analysis
run: poetry run python -m compileall src

View File

@@ -48,6 +48,16 @@ The Next.js app now lives under `frontend/autometabuilder` and uses Material UI
## Testing & linting
### Workflow JSON Validation
Validate all workflow JSON files against the N8N schema:
```bash
poetry run validate-workflows
```
See [docs/WORKFLOW_VALIDATION.md](docs/WORKFLOW_VALIDATION.md) for detailed documentation.
### Python
```bash

View File

@@ -1,6 +1,18 @@
{
"name": "Blank Canvas",
"active": false,
"nodes": [],
"nodes": [
{
"id": "start",
"name": "Start",
"type": "core.start",
"typeVersion": 1,
"position": [
0,
0
],
"parameters": {}
}
],
"connections": {}
}

View File

@@ -0,0 +1,345 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://example.com/schemas/n8n-workflow.schema.json",
"title": "N8N-Style Workflow",
"type": "object",
"additionalProperties": false,
"required": ["name", "nodes", "connections"],
"properties": {
"id": {
"description": "Optional external identifier (DB id, UUID, etc.).",
"type": ["string", "integer"]
},
"name": {
"type": "string",
"minLength": 1
},
"active": {
"type": "boolean",
"default": false
},
"versionId": {
"description": "Optional version identifier for optimistic concurrency.",
"type": "string"
},
"createdAt": {
"type": "string",
"format": "date-time"
},
"updatedAt": {
"type": "string",
"format": "date-time"
},
"tags": {
"type": "array",
"items": { "$ref": "#/$defs/tag" },
"default": []
},
"meta": {
"description": "Arbitrary metadata. Keep stable keys for tooling.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"settings": {
"$ref": "#/$defs/workflowSettings"
},
"pinData": {
"description": "Optional pinned execution data (useful for dev).",
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": true
}
}
},
"nodes": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#/$defs/node" }
},
"connections": {
"$ref": "#/$defs/connections"
},
"staticData": {
"description": "Reserved for engine-managed workflow state.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"credentials": {
"description": "Optional top-level credential bindings (engine-specific).",
"type": "array",
"items": { "$ref": "#/$defs/credentialBinding" },
"default": []
},
"triggers": {
"description": "Optional explicit trigger declarations for event-driven workflows.",
"type": "array",
"default": [],
"items": { "$ref": "#/$defs/trigger" }
}
},
"$defs": {
"tag": {
"type": "object",
"additionalProperties": false,
"required": ["name"],
"properties": {
"id": { "type": ["string", "integer"] },
"name": { "type": "string", "minLength": 1 }
}
},
"workflowSettings": {
"type": "object",
"additionalProperties": false,
"properties": {
"timezone": {
"description": "IANA timezone name, e.g. Europe/London.",
"type": "string"
},
"executionTimeout": {
"description": "Hard timeout in seconds for a workflow execution.",
"type": "integer",
"minimum": 0
},
"saveExecutionProgress": {
"type": "boolean",
"default": true
},
"saveManualExecutions": {
"type": "boolean",
"default": true
},
"saveDataErrorExecution": {
"description": "Persist execution data on error.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"saveDataSuccessExecution": {
"description": "Persist execution data on success.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"saveDataManualExecution": {
"description": "Persist execution data for manual runs.",
"type": "string",
"enum": ["all", "none"],
"default": "all"
},
"errorWorkflowId": {
"description": "Optional workflow id to call on error.",
"type": ["string", "integer"]
},
"callerPolicy": {
"description": "Optional policy controlling which workflows can call this workflow.",
"type": "string"
}
},
"default": {}
},
"node": {
"type": "object",
"additionalProperties": false,
"required": ["id", "name", "type", "typeVersion", "position"],
"properties": {
"id": {
"description": "Stable unique id within the workflow. Prefer UUID.",
"type": "string",
"minLength": 1
},
"name": {
"description": "Human-friendly name; should be unique in workflow.",
"type": "string",
"minLength": 1
},
"type": {
"description": "Node type identifier, e.g. n8n-nodes-base.httpRequest.",
"type": "string",
"minLength": 1
},
"typeVersion": {
"description": "Node implementation version.",
"type": ["integer", "number"],
"minimum": 1
},
"disabled": {
"type": "boolean",
"default": false
},
"notes": {
"type": "string",
"default": ""
},
"notesInFlow": {
"description": "When true, notes are displayed on canvas.",
"type": "boolean",
"default": false
},
"retryOnFail": {
"type": "boolean",
"default": false
},
"maxTries": {
"type": "integer",
"minimum": 1
},
"waitBetweenTries": {
"description": "Milliseconds.",
"type": "integer",
"minimum": 0
},
"continueOnFail": {
"type": "boolean",
"default": false
},
"alwaysOutputData": {
"type": "boolean",
"default": false
},
"executeOnce": {
"description": "If true, node executes only once per execution (engine-dependent).",
"type": "boolean",
"default": false
},
"position": {
"$ref": "#/$defs/position"
},
"parameters": {
"description": "Node-specific parameters. Typically JSON-serializable.",
"type": "object",
"additionalProperties": true,
"default": {}
},
"credentials": {
"description": "Node-level credential references.",
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/credentialRef"
},
"default": {}
},
"webhookId": {
"description": "Optional webhook id (for webhook-based trigger nodes).",
"type": "string"
},
"onError": {
"description": "Node-level error routing policy (engine-dependent).",
"type": "string",
"enum": ["stopWorkflow", "continueRegularOutput", "continueErrorOutput"]
}
}
},
"position": {
"type": "array",
"minItems": 2,
"maxItems": 2,
"items": {
"type": "number"
}
},
"credentialRef": {
"type": "object",
"additionalProperties": false,
"required": ["id"],
"properties": {
"id": {
"description": "Credential id or stable key.",
"type": ["string", "integer"]
},
"name": {
"description": "Optional human label.",
"type": "string"
}
}
},
"credentialBinding": {
"type": "object",
"additionalProperties": false,
"required": ["nodeId", "credentialType", "credentialId"],
"properties": {
"nodeId": { "type": "string", "minLength": 1 },
"credentialType": { "type": "string", "minLength": 1 },
"credentialId": { "type": ["string", "integer"] }
}
},
"connections": {
"description": "Adjacency map: fromNodeName -> outputType -> outputIndex -> array of targets.",
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/nodeConnectionsByType"
},
"default": {}
},
"nodeConnectionsByType": {
"type": "object",
"additionalProperties": false,
"properties": {
"main": {
"$ref": "#/$defs/outputIndexMap"
},
"error": {
"$ref": "#/$defs/outputIndexMap"
}
},
"anyOf": [
{ "required": ["main"] },
{ "required": ["error"] }
]
},
"outputIndexMap": {
"description": "Output index -> array of connection targets.",
"type": "object",
"additionalProperties": {
"type": "array",
"items": { "$ref": "#/$defs/connectionTarget" }
},
"default": {}
},
"connectionTarget": {
"type": "object",
"additionalProperties": false,
"required": ["node", "type", "index"],
"properties": {
"node": {
"description": "Target node name (n8n uses node 'name' in connections).",
"type": "string",
"minLength": 1
},
"type": {
"description": "Input type on target node (typically 'main' or 'error').",
"type": "string",
"minLength": 1
},
"index": {
"description": "Input index on target node.",
"type": "integer",
"minimum": 0
}
}
},
"trigger": {
"type": "object",
"additionalProperties": false,
"required": ["nodeId", "kind"],
"properties": {
"nodeId": { "type": "string", "minLength": 1 },
"kind": {
"type": "string",
"enum": ["webhook", "schedule", "queue", "email", "poll", "manual", "other"]
},
"enabled": { "type": "boolean", "default": true },
"meta": {
"description": "Trigger-kind-specific metadata for routing/registration.",
"type": "object",
"additionalProperties": true,
"default": {}
}
}
}
}
}

View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""Tool to validate all workflow JSON files against the N8N schema."""
import json
import sys
from pathlib import Path
from typing import List, Tuple
try:
import jsonschema
from jsonschema import Draft202012Validator
except ImportError:
print("Error: jsonschema library not found. Install with: poetry install")
sys.exit(1)
def load_schema() -> dict:
"""Load the N8N workflow JSON schema."""
schema_path = Path(__file__).resolve().parent.parent / "schema" / "n8n-workflow.schema.json"
if not schema_path.exists():
raise FileNotFoundError(f"Schema file not found at: {schema_path}")
with open(schema_path, 'r', encoding='utf-8') as f:
return json.load(f)
def find_workflow_files(base_path: Path) -> List[Path]:
"""Find all workflow.json files in the packages directory."""
packages_dir = base_path / "packages"
if not packages_dir.exists():
return []
workflow_files = []
for workflow_file in packages_dir.rglob("workflow.json"):
workflow_files.append(workflow_file)
return sorted(workflow_files)
def validate_workflow_file(workflow_path: Path, schema: dict) -> Tuple[bool, str]:
"""
Validate a single workflow JSON file against the schema.
Returns:
Tuple of (is_valid, error_message)
"""
try:
with open(workflow_path, 'r', encoding='utf-8') as f:
workflow_data = json.load(f)
except json.JSONDecodeError as e:
return False, f"JSON parsing error: {e}"
except Exception as e:
return False, f"Error reading file: {e}"
# Validate against schema
validator = Draft202012Validator(schema)
errors = list(validator.iter_errors(workflow_data))
if errors:
# Return the first error with a clear message
error = errors[0]
error_path = ".".join(str(p) for p in error.path) if error.path else "root"
return False, f"{error.message} (at {error_path})"
return True, ""
def main():
"""Main function to validate all workflow files."""
# Find the autometabuilder directory by looking for the packages subdirectory
# This works whether run as a script or via poetry command
script_dir = Path(__file__).resolve().parent.parent
# Verify we found the right directory
if not (script_dir / "packages").exists():
print("Error: Could not locate autometabuilder/packages directory")
return 1
# Load the schema
try:
schema = load_schema()
except Exception as e:
print(f"Error loading schema: {e}")
return 1
# Find all workflow files
workflow_files = find_workflow_files(script_dir)
if not workflow_files:
print("No workflow.json files found in packages directory.")
return 1
print(f"Found {len(workflow_files)} workflow file(s) to validate\n")
errors = []
for workflow_path in workflow_files:
try:
relative_path = workflow_path.relative_to(script_dir)
except ValueError:
# If relative_to fails, use the full path
relative_path = workflow_path
is_valid, error_msg = validate_workflow_file(workflow_path, schema)
if is_valid:
print(f"{relative_path}")
else:
print(f"{relative_path}: {error_msg}")
errors.append((relative_path, error_msg))
print()
if errors:
print(f"Validation failed for {len(errors)} file(s):")
for path, error in errors:
print(f" - {path}: {error}")
return 1
else:
print(f"All {len(workflow_files)} workflow file(s) are valid!")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,187 @@
"""Tests for workflow JSON validation tool."""
import json
from pathlib import Path
import pytest
from autometabuilder.tools.validate_workflows import (
find_workflow_files,
load_schema,
validate_workflow_file,
)
def test_find_workflow_files():
"""Test that workflow files are found."""
backend_dir = Path(__file__).parent.parent / "autometabuilder"
workflow_files = find_workflow_files(backend_dir)
assert len(workflow_files) > 0
assert all(f.name == "workflow.json" for f in workflow_files)
assert all(f.exists() for f in workflow_files)
def test_load_schema():
"""Test that the schema can be loaded."""
schema = load_schema()
assert isinstance(schema, dict)
assert schema.get("$schema") == "https://json-schema.org/draft/2020-12/schema"
assert schema.get("title") == "N8N-Style Workflow"
def test_validate_all_workflow_files():
"""Test that all workflow files in packages directory are valid."""
backend_dir = Path(__file__).parent.parent / "autometabuilder"
workflow_files = find_workflow_files(backend_dir)
schema = load_schema()
errors = []
for workflow_path in workflow_files:
try:
relative_path = workflow_path.relative_to(backend_dir)
except ValueError:
# If relative_to fails (e.g., due to symlinks), use the full path
relative_path = workflow_path
is_valid, error_msg = validate_workflow_file(workflow_path, schema)
if not is_valid:
errors.append((relative_path, error_msg))
# Report all errors for debugging
if errors:
error_report = "\n".join(f" - {path}: {error}" for path, error in errors)
pytest.fail(f"Workflow validation failed for {len(errors)} file(s):\n{error_report}")
def test_validate_minimal_valid_workflow(tmp_path):
"""Test validation of a minimal valid workflow."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow",
"nodes": [
{
"id": "node-1",
"name": "Test Node",
"type": "core.test",
"typeVersion": 1,
"position": [0, 0]
}
],
"connections": {}
}
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert is_valid, f"Validation failed: {error_msg}"
def test_validate_workflow_with_missing_name(tmp_path):
"""Test validation of workflow missing required 'name' field."""
schema = load_schema()
workflow_data = {
"nodes": [
{
"id": "node-1",
"name": "Test Node",
"type": "core.test",
"typeVersion": 1,
"position": [0, 0]
}
],
"connections": {}
}
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
# jsonschema reports missing required property
assert "required" in error_msg.lower() or "'name'" in error_msg
def test_validate_workflow_with_empty_nodes(tmp_path):
"""Test validation of workflow with empty nodes array."""
schema = load_schema()
workflow_data = {
"name": "Empty Workflow",
"nodes": [],
"connections": {}
}
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
# jsonschema reports "[] is too short" for minItems violation
assert "too short" in error_msg.lower() and "nodes" in error_msg.lower()
def test_validate_workflow_with_invalid_json(tmp_path):
"""Test validation of file with invalid JSON."""
schema = load_schema()
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text("{ invalid json }")
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
assert "json" in error_msg.lower()
def test_validate_workflow_with_invalid_node(tmp_path):
"""Test validation of workflow with invalid node structure."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow",
"nodes": [
{
"id": "node-1",
# Missing required fields: name, type, typeVersion, position
}
],
"connections": {}
}
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert not is_valid
def test_validate_workflow_with_triggers(tmp_path):
"""Test validation of workflow with triggers array."""
schema = load_schema()
workflow_data = {
"name": "Test Workflow with Triggers",
"nodes": [
{
"id": "webhook-1",
"name": "Webhook",
"type": "n8n-nodes-base.webhook",
"typeVersion": 1,
"position": [0, 0]
}
],
"connections": {},
"triggers": [
{
"nodeId": "webhook-1",
"kind": "webhook",
"enabled": True,
"meta": {
"path": "/api/test"
}
}
]
}
workflow_file = tmp_path / "workflow.json"
workflow_file.write_text(json.dumps(workflow_data))
is_valid, error_msg = validate_workflow_file(workflow_file, schema)
assert is_valid, f"Validation failed: {error_msg}"

178
docs/WORKFLOW_VALIDATION.md Normal file
View File

@@ -0,0 +1,178 @@
# Workflow JSON Validation
This repository includes a validation tool for workflow JSON files based on the N8N-style workflow schema defined in ROADMAP.md and extracted to a dedicated schema file.
## Schema Definition
The workflow JSON schema is extracted from [ROADMAP.md](../ROADMAP.md) (lines 84-430) and stored in `backend/autometabuilder/schema/n8n-workflow.schema.json`. It defines the structure for N8N-style workflows with the following key requirements:
- **Required fields**: `name`, `nodes`, `connections`
- **Nodes**: Must contain at least 1 node with `id`, `name`, `type`, `typeVersion`, and `position`
- **Connections**: Define the flow between nodes
- **Optional fields**: `id`, `active`, `versionId`, `createdAt`, `updatedAt`, `tags`, `meta`, `settings`, `pinData`, `staticData`, `credentials`, `triggers`
## Validation Tool
### Running the Validation Tool
You can validate all workflow JSON files using the following methods:
#### 1. Using Poetry Command (Recommended)
```bash
poetry run validate-workflows
```
#### 2. Direct Python Execution
```bash
cd backend/autometabuilder
python tools/validate_workflows.py
```
#### 3. As Part of CI
The validation is automatically run as part of the CI pipeline. See `.github/workflows/ci.yml` for the configuration.
### What Gets Validated
The tool automatically discovers and validates all `workflow.json` files in the `backend/autometabuilder/packages/` directory.
Currently, there are 19 workflow files being validated:
- backend_bootstrap
- blank
- conditional_logic_demo
- contextual_iterative_loop
- data_processing_demo
- default_app_workflow
- dict_plugins_test
- game_tick_loop
- iterative_loop
- list_plugins_test
- logic_plugins_test
- math_plugins_test
- plan_execute_summarize
- repo_scan_context
- single_pass
- string_plugins_test
- testing_triangle
- web_server_bootstrap
- web_server_json_routes
### Validation Rules
The validator checks:
1. **JSON Syntax**: File must be valid JSON
2. **Required Fields**: Must have `name`, `nodes`, `connections`
3. **Name Field**: Must be a non-empty string
4. **Nodes Array**: Must contain at least 1 node
5. **Node Structure**: Each node must have:
- `id` (non-empty string)
- `name` (non-empty string)
- `type` (non-empty string)
- `typeVersion` (number >= 1)
- `position` (array of 2 numbers [x, y])
6. **Connections**: Must be an object/dict
7. **Triggers** (if present): Must be an array of valid trigger objects
### Example Valid Workflow
```json
{
"name": "Example Workflow",
"active": false,
"nodes": [
{
"id": "start",
"name": "Start",
"type": "core.start",
"typeVersion": 1,
"position": [0, 0],
"parameters": {}
}
],
"connections": {},
"triggers": [
{
"nodeId": "start",
"kind": "manual",
"enabled": true,
"meta": {
"description": "Manually triggered workflow"
}
}
]
}
```
## Testing
The validation tool has its own test suite:
```bash
# Run validation tests
poetry run pytest backend/tests/test_workflow_validation.py -v
# Run all tests including workflow validation
poetry run pytest
```
## Adding New Workflows
When adding new workflow JSON files:
1. Place the `workflow.json` file in a package directory under `backend/autometabuilder/packages/`
2. Ensure it follows the schema defined in ROADMAP.md
3. Run the validation tool to verify: `poetry run validate-workflows`
4. The validation will automatically run in CI when you push your changes
## Common Issues
### Empty Nodes Array
**Error**: `Field 'nodes' must contain at least 1 node`
**Solution**: Add at least one node to the workflow. Even blank templates need a starting node:
```json
{
"name": "Blank Canvas",
"nodes": [
{
"id": "start",
"name": "Start",
"type": "core.start",
"typeVersion": 1,
"position": [0, 0],
"parameters": {}
}
],
"connections": {}
}
```
### Missing Required Fields
**Error**: `Missing required fields: name, nodes`
**Solution**: Ensure your workflow JSON includes all required top-level fields: `name`, `nodes`, and `connections`.
### Invalid Node Structure
**Error**: `Schema validation failed (check node structure, position, types, etc.)`
**Solution**: Verify each node has all required fields and correct types. Common issues:
- Missing `position` array
- `typeVersion` less than 1
- Empty strings for `id`, `name`, or `type`
- Position not being an array of exactly 2 numbers
## Implementation Details
The validation is implemented using:
- **JSON Schema**: `backend/autometabuilder/schema/n8n-workflow.schema.json` (extracted from ROADMAP.md)
- **Validation Library**: `jsonschema` (official JSON Schema validator for Python)
- **Validation Tool**: `backend/autometabuilder/tools/validate_workflows.py`
- **Tests**: `backend/tests/test_workflow_validation.py`
- **Schema Tests**: `backend/tests/test_n8n_schema.py`

View File

@@ -17,6 +17,7 @@ tenacity = "^9.1.2"
flask = "^2.3.3"
slack-sdk = "^3.39.0"
discord-py = "^2.6.4"
jsonschema = "^4.10.3"
[build-system]
requires = ["poetry-core"]
@@ -24,6 +25,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
autometabuilder = "autometabuilder.main:main"
validate-workflows = "autometabuilder.tools.validate_workflows:main"
[dependency-groups]
dev = [