mirror of
https://github.com/johndoe6345789/docker-swarm-termina.git
synced 2026-04-24 13:45:01 +00:00
Add Docker Swarm verification checks and diagnostics
This commit enhances the Docker diagnostics system with comprehensive Swarm-specific health checks to ensure the application is properly deployed in a Docker Swarm/CapRover environment. Changes: - Add check_swarm_status() function to verify Docker Swarm configuration - Checks if Docker is running in Swarm mode - Retrieves and logs Swarm node information (hostname, role, state) - Detects if container is running as a Swarm service task - Provides clear diagnostic messages for troubleshooting - Integrate Swarm checks into application startup (app.py) - Runs after Docker connection is verified - Logs success for production Swarm deployments - Warns (but doesn't fail) for local development environments - Add comprehensive test coverage (8 new tests) - Tests for active/inactive Swarm states - Tests for error handling and edge cases - Tests for node retrieval and hostname detection - Maintains 99% overall code coverage (128 tests passing) This ensures that Docker Swarm-related issues are caught early during deployment and provides clear diagnostic information for troubleshooting CapRover deployments with Docker socket mounting. https://claude.ai/code/session_01RRUv2BWJ76L24VyY6Fi2bh
This commit is contained in:
@@ -58,6 +58,14 @@ if __name__ == '__main__':
|
||||
test_client = get_docker_client()
|
||||
if test_client:
|
||||
logger.info("✓ Docker connection verified on startup")
|
||||
|
||||
# Check Docker Swarm status
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
swarm_ok = check_swarm_status(test_client)
|
||||
if swarm_ok:
|
||||
logger.info("✓ Docker Swarm verification passed")
|
||||
else:
|
||||
logger.warning("⚠ Docker Swarm verification did not pass (this is OK for local development)")
|
||||
else:
|
||||
logger.error("✗ Docker connection FAILED on startup - check logs above for details")
|
||||
|
||||
|
||||
133
backend/tests/test_swarm_checks.py
Normal file
133
backend/tests/test_swarm_checks.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Tests for Docker Swarm status checks."""
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
|
||||
class TestSwarmStatusChecks:
|
||||
"""Test Docker Swarm status check functionality"""
|
||||
|
||||
def test_check_swarm_status_with_none_client(self):
|
||||
"""Test check_swarm_status with None client"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
result = check_swarm_status(None)
|
||||
assert result is False
|
||||
|
||||
def test_check_swarm_status_active_swarm(self):
|
||||
"""Test check_swarm_status with active Swarm"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
# Mock Docker client with Swarm info
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {
|
||||
'Swarm': {
|
||||
'NodeID': 'test-node-123',
|
||||
'LocalNodeState': 'active'
|
||||
}
|
||||
}
|
||||
|
||||
# Mock nodes
|
||||
mock_node = MagicMock()
|
||||
mock_node.id = 'test-node-123'
|
||||
mock_node.attrs = {
|
||||
'Description': {'Hostname': 'test-host'},
|
||||
'Spec': {'Role': 'manager'},
|
||||
'Status': {'State': 'ready'}
|
||||
}
|
||||
mock_client.nodes.list.return_value = [mock_node]
|
||||
|
||||
with patch.dict('os.environ', {'HOSTNAME': 'service.1.task123'}):
|
||||
result = check_swarm_status(mock_client)
|
||||
|
||||
assert result is True
|
||||
mock_client.info.assert_called_once()
|
||||
|
||||
def test_check_swarm_status_inactive_swarm(self):
|
||||
"""Test check_swarm_status with inactive Swarm"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {
|
||||
'Swarm': {
|
||||
'NodeID': '',
|
||||
'LocalNodeState': 'inactive'
|
||||
}
|
||||
}
|
||||
|
||||
result = check_swarm_status(mock_client)
|
||||
assert result is False
|
||||
|
||||
def test_check_swarm_status_error_getting_nodes(self):
|
||||
"""Test check_swarm_status when getting nodes fails"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {
|
||||
'Swarm': {
|
||||
'NodeID': 'test-node-123',
|
||||
'LocalNodeState': 'active'
|
||||
}
|
||||
}
|
||||
mock_client.nodes.list.side_effect = Exception("Cannot list nodes")
|
||||
|
||||
# Should still return True even if node details fail
|
||||
result = check_swarm_status(mock_client)
|
||||
assert result is True
|
||||
|
||||
def test_check_swarm_status_exception(self):
|
||||
"""Test check_swarm_status when client.info() raises exception"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.side_effect = Exception("Connection failed")
|
||||
|
||||
result = check_swarm_status(mock_client)
|
||||
assert result is False
|
||||
|
||||
def test_check_swarm_status_non_service_hostname(self):
|
||||
"""Test check_swarm_status with non-service hostname"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {
|
||||
'Swarm': {
|
||||
'NodeID': 'test-node-123',
|
||||
'LocalNodeState': 'active'
|
||||
}
|
||||
}
|
||||
mock_client.nodes.list.return_value = []
|
||||
|
||||
with patch.dict('os.environ', {'HOSTNAME': 'simple-hostname'}):
|
||||
result = check_swarm_status(mock_client)
|
||||
|
||||
assert result is True
|
||||
|
||||
def test_check_swarm_status_hostname_check_exception(self):
|
||||
"""Test check_swarm_status when hostname check raises exception"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {
|
||||
'Swarm': {
|
||||
'NodeID': 'test-node-123',
|
||||
'LocalNodeState': 'active'
|
||||
}
|
||||
}
|
||||
mock_client.nodes.list.return_value = []
|
||||
|
||||
# Patch os.getenv to raise exception
|
||||
with patch('utils.diagnostics.docker_env.os.getenv', side_effect=Exception("getenv failed")):
|
||||
result = check_swarm_status(mock_client)
|
||||
|
||||
# Should still return True since Swarm is active
|
||||
assert result is True
|
||||
|
||||
def test_check_swarm_status_no_swarm_key(self):
|
||||
"""Test check_swarm_status when info doesn't contain Swarm key"""
|
||||
from utils.diagnostics.docker_env import check_swarm_status
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.info.return_value = {}
|
||||
|
||||
result = check_swarm_status(mock_client)
|
||||
assert result is False
|
||||
@@ -86,3 +86,82 @@ def diagnose_docker_environment(): # pylint: disable=too-many-locals,too-many-s
|
||||
logger.error("Error checking user info: %s", e)
|
||||
|
||||
logger.info("=== End Diagnosis ===")
|
||||
|
||||
|
||||
def check_swarm_status(client):
|
||||
"""Check if Docker is running in Swarm mode and get Swarm information.
|
||||
|
||||
Args:
|
||||
client: Docker client instance
|
||||
|
||||
Returns:
|
||||
bool: True if Swarm checks pass, False otherwise
|
||||
"""
|
||||
if client is None:
|
||||
logger.warning("Cannot check Swarm status - Docker client is None")
|
||||
return False
|
||||
|
||||
logger.info("=== Docker Swarm Status Check ===")
|
||||
|
||||
try:
|
||||
# Check Swarm status
|
||||
swarm_info = client.info()
|
||||
|
||||
# Check if Swarm is active
|
||||
swarm_attrs = swarm_info.get('Swarm', {})
|
||||
node_id = swarm_attrs.get('NodeID', '')
|
||||
local_node_state = swarm_attrs.get('LocalNodeState', 'inactive')
|
||||
|
||||
logger.info("Swarm LocalNodeState: %s", local_node_state)
|
||||
logger.info("Swarm NodeID: %s", node_id if node_id else "Not in Swarm")
|
||||
|
||||
if local_node_state == 'active':
|
||||
logger.info("✓ Docker is running in Swarm mode")
|
||||
|
||||
# Get node information
|
||||
try:
|
||||
nodes = client.nodes.list()
|
||||
logger.info("Swarm has %d node(s)", len(nodes))
|
||||
|
||||
# Find current node
|
||||
for node in nodes:
|
||||
if node.id == node_id:
|
||||
logger.info("Current node: %s (Role: %s, State: %s)",
|
||||
node.attrs.get('Description', {}).get('Hostname', 'unknown'),
|
||||
node.attrs.get('Spec', {}).get('Role', 'unknown'),
|
||||
node.attrs.get('Status', {}).get('State', 'unknown'))
|
||||
break
|
||||
|
||||
except Exception as e: # pylint: disable=broad-exception-caught
|
||||
logger.warning("Could not retrieve node details: %s", e)
|
||||
|
||||
# Check if running as part of a service
|
||||
try:
|
||||
import os # pylint: disable=import-outside-toplevel,reimported
|
||||
hostname = os.getenv('HOSTNAME', '')
|
||||
|
||||
if hostname:
|
||||
# In Swarm, container names typically follow pattern:
|
||||
# service-name.replica-number.task-id
|
||||
if '.' in hostname:
|
||||
logger.info("✓ Container appears to be running as a Swarm service task")
|
||||
logger.info(" Container hostname: %s", hostname)
|
||||
else:
|
||||
logger.info("Container hostname: %s (may not be a Swarm service)", hostname)
|
||||
|
||||
except Exception as e: # pylint: disable=broad-exception-caught
|
||||
logger.warning("Could not check service status: %s", e)
|
||||
|
||||
logger.info("=== Swarm Status: OK ===")
|
||||
return True
|
||||
else:
|
||||
logger.warning("⚠ Docker is NOT running in Swarm mode (state: %s)", local_node_state)
|
||||
logger.warning(" This application is designed for Docker Swarm/CapRover deployment")
|
||||
logger.warning(" For local development, Swarm mode is not required")
|
||||
logger.info("=== Swarm Status: Not Active ===")
|
||||
return False
|
||||
|
||||
except Exception as e: # pylint: disable=broad-exception-caught
|
||||
logger.error("Error checking Swarm status: %s", e, exc_info=True)
|
||||
logger.info("=== Swarm Status: Error ===")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user