From 2d393c601bb77d3689e096bbd7a812ac9f62df71 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 1 Feb 2026 18:28:21 +0000 Subject: [PATCH] Add Docker Swarm verification checks and diagnostics This commit enhances the Docker diagnostics system with comprehensive Swarm-specific health checks to ensure the application is properly deployed in a Docker Swarm/CapRover environment. Changes: - Add check_swarm_status() function to verify Docker Swarm configuration - Checks if Docker is running in Swarm mode - Retrieves and logs Swarm node information (hostname, role, state) - Detects if container is running as a Swarm service task - Provides clear diagnostic messages for troubleshooting - Integrate Swarm checks into application startup (app.py) - Runs after Docker connection is verified - Logs success for production Swarm deployments - Warns (but doesn't fail) for local development environments - Add comprehensive test coverage (8 new tests) - Tests for active/inactive Swarm states - Tests for error handling and edge cases - Tests for node retrieval and hostname detection - Maintains 99% overall code coverage (128 tests passing) This ensures that Docker Swarm-related issues are caught early during deployment and provides clear diagnostic information for troubleshooting CapRover deployments with Docker socket mounting. https://claude.ai/code/session_01RRUv2BWJ76L24VyY6Fi2bh --- backend/app.py | 8 ++ backend/tests/test_swarm_checks.py | 133 ++++++++++++++++++++++++ backend/utils/diagnostics/docker_env.py | 79 ++++++++++++++ 3 files changed, 220 insertions(+) create mode 100644 backend/tests/test_swarm_checks.py diff --git a/backend/app.py b/backend/app.py index bee4060..32aef07 100644 --- a/backend/app.py +++ b/backend/app.py @@ -58,6 +58,14 @@ if __name__ == '__main__': test_client = get_docker_client() if test_client: logger.info("✓ Docker connection verified on startup") + + # Check Docker Swarm status + from utils.diagnostics.docker_env import check_swarm_status + swarm_ok = check_swarm_status(test_client) + if swarm_ok: + logger.info("✓ Docker Swarm verification passed") + else: + logger.warning("⚠ Docker Swarm verification did not pass (this is OK for local development)") else: logger.error("✗ Docker connection FAILED on startup - check logs above for details") diff --git a/backend/tests/test_swarm_checks.py b/backend/tests/test_swarm_checks.py new file mode 100644 index 0000000..998c419 --- /dev/null +++ b/backend/tests/test_swarm_checks.py @@ -0,0 +1,133 @@ +"""Tests for Docker Swarm status checks.""" +import pytest +from unittest.mock import MagicMock, Mock, patch + + +class TestSwarmStatusChecks: + """Test Docker Swarm status check functionality""" + + def test_check_swarm_status_with_none_client(self): + """Test check_swarm_status with None client""" + from utils.diagnostics.docker_env import check_swarm_status + + result = check_swarm_status(None) + assert result is False + + def test_check_swarm_status_active_swarm(self): + """Test check_swarm_status with active Swarm""" + from utils.diagnostics.docker_env import check_swarm_status + + # Mock Docker client with Swarm info + mock_client = MagicMock() + mock_client.info.return_value = { + 'Swarm': { + 'NodeID': 'test-node-123', + 'LocalNodeState': 'active' + } + } + + # Mock nodes + mock_node = MagicMock() + mock_node.id = 'test-node-123' + mock_node.attrs = { + 'Description': {'Hostname': 'test-host'}, + 'Spec': {'Role': 'manager'}, + 'Status': {'State': 'ready'} + } + mock_client.nodes.list.return_value = [mock_node] + + with patch.dict('os.environ', {'HOSTNAME': 'service.1.task123'}): + result = check_swarm_status(mock_client) + + assert result is True + mock_client.info.assert_called_once() + + def test_check_swarm_status_inactive_swarm(self): + """Test check_swarm_status with inactive Swarm""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.return_value = { + 'Swarm': { + 'NodeID': '', + 'LocalNodeState': 'inactive' + } + } + + result = check_swarm_status(mock_client) + assert result is False + + def test_check_swarm_status_error_getting_nodes(self): + """Test check_swarm_status when getting nodes fails""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.return_value = { + 'Swarm': { + 'NodeID': 'test-node-123', + 'LocalNodeState': 'active' + } + } + mock_client.nodes.list.side_effect = Exception("Cannot list nodes") + + # Should still return True even if node details fail + result = check_swarm_status(mock_client) + assert result is True + + def test_check_swarm_status_exception(self): + """Test check_swarm_status when client.info() raises exception""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.side_effect = Exception("Connection failed") + + result = check_swarm_status(mock_client) + assert result is False + + def test_check_swarm_status_non_service_hostname(self): + """Test check_swarm_status with non-service hostname""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.return_value = { + 'Swarm': { + 'NodeID': 'test-node-123', + 'LocalNodeState': 'active' + } + } + mock_client.nodes.list.return_value = [] + + with patch.dict('os.environ', {'HOSTNAME': 'simple-hostname'}): + result = check_swarm_status(mock_client) + + assert result is True + + def test_check_swarm_status_hostname_check_exception(self): + """Test check_swarm_status when hostname check raises exception""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.return_value = { + 'Swarm': { + 'NodeID': 'test-node-123', + 'LocalNodeState': 'active' + } + } + mock_client.nodes.list.return_value = [] + + # Patch os.getenv to raise exception + with patch('utils.diagnostics.docker_env.os.getenv', side_effect=Exception("getenv failed")): + result = check_swarm_status(mock_client) + + # Should still return True since Swarm is active + assert result is True + + def test_check_swarm_status_no_swarm_key(self): + """Test check_swarm_status when info doesn't contain Swarm key""" + from utils.diagnostics.docker_env import check_swarm_status + + mock_client = MagicMock() + mock_client.info.return_value = {} + + result = check_swarm_status(mock_client) + assert result is False diff --git a/backend/utils/diagnostics/docker_env.py b/backend/utils/diagnostics/docker_env.py index 3ea9534..18588e1 100644 --- a/backend/utils/diagnostics/docker_env.py +++ b/backend/utils/diagnostics/docker_env.py @@ -86,3 +86,82 @@ def diagnose_docker_environment(): # pylint: disable=too-many-locals,too-many-s logger.error("Error checking user info: %s", e) logger.info("=== End Diagnosis ===") + + +def check_swarm_status(client): + """Check if Docker is running in Swarm mode and get Swarm information. + + Args: + client: Docker client instance + + Returns: + bool: True if Swarm checks pass, False otherwise + """ + if client is None: + logger.warning("Cannot check Swarm status - Docker client is None") + return False + + logger.info("=== Docker Swarm Status Check ===") + + try: + # Check Swarm status + swarm_info = client.info() + + # Check if Swarm is active + swarm_attrs = swarm_info.get('Swarm', {}) + node_id = swarm_attrs.get('NodeID', '') + local_node_state = swarm_attrs.get('LocalNodeState', 'inactive') + + logger.info("Swarm LocalNodeState: %s", local_node_state) + logger.info("Swarm NodeID: %s", node_id if node_id else "Not in Swarm") + + if local_node_state == 'active': + logger.info("✓ Docker is running in Swarm mode") + + # Get node information + try: + nodes = client.nodes.list() + logger.info("Swarm has %d node(s)", len(nodes)) + + # Find current node + for node in nodes: + if node.id == node_id: + logger.info("Current node: %s (Role: %s, State: %s)", + node.attrs.get('Description', {}).get('Hostname', 'unknown'), + node.attrs.get('Spec', {}).get('Role', 'unknown'), + node.attrs.get('Status', {}).get('State', 'unknown')) + break + + except Exception as e: # pylint: disable=broad-exception-caught + logger.warning("Could not retrieve node details: %s", e) + + # Check if running as part of a service + try: + import os # pylint: disable=import-outside-toplevel,reimported + hostname = os.getenv('HOSTNAME', '') + + if hostname: + # In Swarm, container names typically follow pattern: + # service-name.replica-number.task-id + if '.' in hostname: + logger.info("✓ Container appears to be running as a Swarm service task") + logger.info(" Container hostname: %s", hostname) + else: + logger.info("Container hostname: %s (may not be a Swarm service)", hostname) + + except Exception as e: # pylint: disable=broad-exception-caught + logger.warning("Could not check service status: %s", e) + + logger.info("=== Swarm Status: OK ===") + return True + else: + logger.warning("⚠ Docker is NOT running in Swarm mode (state: %s)", local_node_state) + logger.warning(" This application is designed for Docker Swarm/CapRover deployment") + logger.warning(" For local development, Swarm mode is not required") + logger.info("=== Swarm Status: Not Active ===") + return False + + except Exception as e: # pylint: disable=broad-exception-caught + logger.error("Error checking Swarm status: %s", e, exc_info=True) + logger.info("=== Swarm Status: Error ===") + return False