feat: Add crash recovery service and integrate it into ServiceBasedApp

This commit is contained in:
2026-01-04 14:22:18 +00:00
parent 9b47bffe3d
commit ced8b8f5a7
6 changed files with 373 additions and 10 deletions

View File

@@ -132,6 +132,7 @@ if(BUILD_SDL3_APP)
src/services/impl/buffer_service.cpp
src/services/impl/render_command_service.cpp
src/services/impl/sdl_audio_service.cpp
src/services/impl/crash_recovery_service.cpp
src/services/impl/vulkan_gui_service.cpp
src/services/impl/bullet_physics_service.cpp
src/services/impl/scene_service.cpp

View File

@@ -17,6 +17,7 @@
#include "services/impl/sdl_audio_service.hpp"
#include "services/impl/vulkan_gui_service.hpp"
#include "services/impl/bullet_physics_service.hpp"
#include "services/impl/crash_recovery_service.hpp"
#include "services/impl/logger_service.hpp"
#include <stdexcept>
@@ -24,16 +25,23 @@ namespace sdl3cpp::app {
ServiceBasedApp::ServiceBasedApp(const std::filesystem::path& scriptPath)
: scriptPath_(scriptPath) {
logging::Logger::GetInstance().Trace("ServiceBasedApp::ServiceBasedApp: constructor starting");
// Register logger service first
registry_.RegisterService<services::ILogger, services::impl::LoggerService>();
logger_ = registry_.GetService<services::ILogger>();
logger_->Trace("ServiceBasedApp", "ServiceBasedApp", "scriptPath=" + scriptPath_.string(), "constructor starting");
try {
logging::Logger::GetInstance().Info("ServiceBasedApp::ServiceBasedApp: Setting up SDL");
logger_->Info("ServiceBasedApp::ServiceBasedApp: Setting up SDL");
SetupSDL();
logging::Logger::GetInstance().Info("ServiceBasedApp::ServiceBasedApp: Registering services");
logger_->Info("ServiceBasedApp::ServiceBasedApp: Registering services");
RegisterServices();
// Get the logger service after registration
logger_ = registry_.GetService<services::ILogger>();
// Get and initialize crash recovery service
crashRecoveryService_ = registry_.GetService<services::ICrashRecoveryService>();
if (crashRecoveryService_) {
crashRecoveryService_->Initialize();
}
logger_->Info("ServiceBasedApp::ServiceBasedApp: Creating controllers");
@@ -50,6 +58,11 @@ ServiceBasedApp::ServiceBasedApp(const std::filesystem::path& scriptPath)
ServiceBasedApp::~ServiceBasedApp() {
logger_->Trace("ServiceBasedApp", "~ServiceBasedApp", "", "Entering");
// Shutdown crash recovery service
if (crashRecoveryService_) {
crashRecoveryService_->Shutdown();
}
applicationController_.reset();
lifecycleController_.reset();
@@ -98,8 +111,25 @@ void ServiceBasedApp::Run() {
}
}
// Run the main application loop
applicationController_->Run();
// Run the main application loop with crash recovery
if (crashRecoveryService_) {
bool success = crashRecoveryService_->ExecuteWithTimeout(
[this]() { applicationController_->Run(); },
30000, // 30 second timeout for the main loop
"Main Application Loop"
);
if (!success) {
logger_->Warn("ServiceBasedApp::Run: Main loop timed out, attempting recovery");
if (crashRecoveryService_->AttemptRecovery()) {
logger_->Info("ServiceBasedApp::Run: Recovery successful, restarting main loop");
applicationController_->Run(); // Try again
}
}
} else {
// Fallback if no crash recovery service
applicationController_->Run();
}
// Shutdown all services
lifecycleController_->ShutdownAll();
@@ -108,7 +138,13 @@ void ServiceBasedApp::Run() {
} catch (const std::exception& e) {
logger_->Error("ServiceBasedApp::Run: Application error: " + std::string(e.what()));
throw;
// Attempt recovery on exception
if (crashRecoveryService_ && crashRecoveryService_->AttemptRecovery()) {
logger_->Info("ServiceBasedApp::Run: Recovered from exception");
} else {
throw;
}
}
}
@@ -124,8 +160,11 @@ void ServiceBasedApp::SetupSDL() {
void ServiceBasedApp::RegisterServices() {
logger_->Trace("ServiceBasedApp", "RegisterServices", "", "Entering");
// Logger service (needed by all other services)
registry_.RegisterService<services::ILogger, services::impl::LoggerService>();
// Logger service already registered in constructor
// Crash recovery service (needed early for crash detection)
registry_.RegisterService<services::ICrashRecoveryService, services::impl::CrashRecoveryService>(
registry_.GetService<services::ILogger>());
// Event bus (needed by window service)
registry_.RegisterService<events::EventBus, events::EventBus>();

View File

@@ -7,6 +7,7 @@
#include "controllers/lifecycle_controller.hpp"
#include "controllers/application_controller.hpp"
#include "services/interfaces/i_logger.hpp"
#include "services/interfaces/i_crash_recovery_service.hpp"
namespace sdl3cpp::app {
@@ -37,6 +38,7 @@ private:
std::unique_ptr<controllers::LifecycleController> lifecycleController_;
std::unique_ptr<controllers::ApplicationController> applicationController_;
std::shared_ptr<services::ILogger> logger_;
std::shared_ptr<services::ICrashRecoveryService> crashRecoveryService_;
};
} // namespace sdl3cpp::app

View File

@@ -0,0 +1,202 @@
#include "crash_recovery_service.hpp"
#include <future>
#include <chrono>
#include <sstream>
#include <cstring>
#include <unistd.h>
namespace sdl3cpp::services::impl {
// Static instance for signal handler
CrashRecoveryService* CrashRecoveryService::instance_ = nullptr;
CrashRecoveryService::CrashRecoveryService(std::shared_ptr<ILogger> logger)
: logger_(logger)
, crashDetected_(false)
, lastSignal_(0)
, signalHandlersInstalled_(false) {
logger_->Trace("CrashRecoveryService", "CrashRecoveryService", "", "Created");
}
CrashRecoveryService::~CrashRecoveryService() {
logger_->Trace("CrashRecoveryService", "~CrashRecoveryService", "", "Destroying");
Shutdown();
}
void CrashRecoveryService::Initialize() {
logger_->Trace("CrashRecoveryService", "Initialize", "", "Initializing crash recovery service");
SetupSignalHandlers();
crashDetected_ = false;
lastSignal_ = 0;
crashReport_.clear();
logger_->Info("CrashRecoveryService::Initialize: Crash recovery service initialized");
}
void CrashRecoveryService::Shutdown() {
logger_->Trace("CrashRecoveryService", "Shutdown", "", "Shutting down crash recovery service");
RemoveSignalHandlers();
logger_->Info("CrashRecoveryService::Shutdown: Crash recovery service shutdown");
}
bool CrashRecoveryService::ExecuteWithTimeout(std::function<void()> func, int timeoutMs, const std::string& operationName) {
logger_->Trace("CrashRecoveryService", "ExecuteWithTimeout", "timeoutMs=" + std::to_string(timeoutMs) + ", operationName=" + operationName, "Executing with timeout");
auto future = std::async(std::launch::async, func);
if (future.wait_for(std::chrono::milliseconds(timeoutMs)) == std::future_status::timeout) {
logger_->Warn("CrashRecoveryService::ExecuteWithTimeout: Operation '" + operationName + "' timed out after " + std::to_string(timeoutMs) + "ms");
// Attempt to cancel the operation (limited effectiveness)
// Note: std::future doesn't provide direct cancellation, this is just detection
return false;
}
try {
future.get(); // Re-throw any exceptions
logger_->Trace("CrashRecoveryService", "ExecuteWithTimeout", "", "Operation completed successfully");
return true;
} catch (const std::exception& e) {
logger_->Error("CrashRecoveryService::ExecuteWithTimeout: Operation '" + operationName + "' threw exception: " + e.what());
throw;
}
}
bool CrashRecoveryService::IsCrashDetected() const {
return crashDetected_.load();
}
bool CrashRecoveryService::AttemptRecovery() {
logger_->Trace("CrashRecoveryService", "AttemptRecovery", "", "Attempting recovery");
std::lock_guard<std::mutex> lock(crashMutex_);
if (!crashDetected_) {
logger_->Warn("CrashRecoveryService::AttemptRecovery: No crash detected");
return true;
}
bool recovered = PerformRecovery();
if (recovered) {
crashDetected_ = false;
lastSignal_ = 0;
crashReport_.clear();
logger_->Info("CrashRecoveryService::AttemptRecovery: Recovery successful");
} else {
logger_->Error("CrashRecoveryService::AttemptRecovery: Recovery failed");
}
return recovered;
}
std::string CrashRecoveryService::GetCrashReport() const {
std::lock_guard<std::mutex> lock(crashMutex_);
return crashReport_;
}
void CrashRecoveryService::SignalHandler(int signal) {
if (instance_) {
instance_->HandleCrash(signal);
}
}
void CrashRecoveryService::SetupSignalHandlers() {
if (signalHandlersInstalled_) {
return;
}
instance_ = this;
struct sigaction sa;
std::memset(&sa, 0, sizeof(sa));
sa.sa_handler = SignalHandler;
sa.sa_flags = SA_RESTART;
// Install handlers for common crash signals
if (sigaction(SIGSEGV, &sa, &oldSigsegv_) == -1) {
logger_->Warn("CrashRecoveryService::SetupSignalHandlers: Failed to install SIGSEGV handler");
}
if (sigaction(SIGABRT, &sa, &oldSigabrt_) == -1) {
logger_->Warn("CrashRecoveryService::SetupSignalHandlers: Failed to install SIGABRT handler");
}
if (sigaction(SIGFPE, &sa, &oldSigfpe_) == -1) {
logger_->Warn("CrashRecoveryService::SetupSignalHandlers: Failed to install SIGFPE handler");
}
if (sigaction(SIGILL, &sa, &oldSigill_) == -1) {
logger_->Warn("CrashRecoveryService::SetupSignalHandlers: Failed to install SIGILL handler");
}
signalHandlersInstalled_ = true;
logger_->Info("CrashRecoveryService::SetupSignalHandlers: Signal handlers installed");
}
void CrashRecoveryService::RemoveSignalHandlers() {
if (!signalHandlersInstalled_) {
return;
}
// Restore original signal handlers
sigaction(SIGSEGV, &oldSigsegv_, nullptr);
sigaction(SIGABRT, &oldSigabrt_, nullptr);
sigaction(SIGFPE, &oldSigfpe_, nullptr);
sigaction(SIGILL, &oldSigill_, nullptr);
signalHandlersInstalled_ = false;
instance_ = nullptr;
logger_->Info("CrashRecoveryService::RemoveSignalHandlers: Signal handlers removed");
}
void CrashRecoveryService::HandleCrash(int signal) {
std::lock_guard<std::mutex> lock(crashMutex_);
crashDetected_ = true;
lastSignal_ = signal;
std::stringstream ss;
ss << "Crash detected! Signal: " << signal << " (";
switch (signal) {
case SIGSEGV: ss << "SIGSEGV - Segmentation fault"; break;
case SIGABRT: ss << "SIGABRT - Abort signal"; break;
case SIGFPE: ss << "SIGFPE - Floating point exception"; break;
case SIGILL: ss << "SIGILL - Illegal instruction"; break;
default: ss << "Unknown signal"; break;
}
ss << ")\nProcess ID: " << getpid();
ss << "\nThread ID: " << std::this_thread::get_id();
crashReport_ = ss.str();
logger_->Error("CrashRecoveryService::HandleCrash: " + crashReport_);
// Note: In a real implementation, you might want to:
// 1. Generate a core dump
// 2. Send crash report to monitoring service
// 3. Attempt graceful shutdown
// 4. Restart critical services
// For now, we just log and set the flag
}
bool CrashRecoveryService::PerformRecovery() {
// Basic recovery logic - in a real implementation this would be more sophisticated
logger_->Info("CrashRecoveryService::PerformRecovery: Performing basic recovery");
// Reset crash state
// In a more advanced implementation, this might:
// - Restart failed services
// - Reset corrupted state
// - Reinitialize resources
// - Restore from backup
logger_->Info("CrashRecoveryService::PerformRecovery: Recovery completed");
return true;
}
} // namespace sdl3cpp::services::impl

View File

@@ -0,0 +1,59 @@
#pragma once
#include "../interfaces/i_crash_recovery_service.hpp"
#include "../interfaces/i_logger.hpp"
#include <atomic>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <csignal>
#include <string>
#include <functional>
namespace sdl3cpp::services::impl {
/**
* @brief Crash recovery service implementation.
*
* Detects crashes and infinite loops, provides recovery mechanisms.
* Uses signal handlers and timeout monitoring.
*/
class CrashRecoveryService : public ICrashRecoveryService {
public:
explicit CrashRecoveryService(std::shared_ptr<ILogger> logger);
~CrashRecoveryService() override;
// ICrashRecoveryService interface
void Initialize() override;
void Shutdown() override;
bool ExecuteWithTimeout(std::function<void()> func, int timeoutMs, const std::string& operationName) override;
bool IsCrashDetected() const override;
bool AttemptRecovery() override;
std::string GetCrashReport() const override;
private:
// Signal handling
static void SignalHandler(int signal);
void SetupSignalHandlers();
void RemoveSignalHandlers();
// Crash detection and recovery
void HandleCrash(int signal);
bool PerformRecovery();
std::shared_ptr<ILogger> logger_;
std::atomic<bool> crashDetected_;
std::atomic<int> lastSignal_;
std::string crashReport_;
mutable std::mutex crashMutex_;
// Signal handler state
static CrashRecoveryService* instance_;
struct sigaction oldSigsegv_;
struct sigaction oldSigabrt_;
struct sigaction oldSigfpe_;
struct sigaction oldSigill_;
bool signalHandlersInstalled_;
};
} // namespace sdl3cpp::services::impl

View File

@@ -0,0 +1,60 @@
#pragma once
#include <functional>
#include <string>
namespace sdl3cpp::services {
/**
* @brief Crash recovery service interface.
*
* Provides mechanisms for detecting and recovering from crashes and infinite loops.
* Small, focused service (~30 lines) for application stability.
*/
class ICrashRecoveryService {
public:
virtual ~ICrashRecoveryService() = default;
/**
* @brief Initialize crash recovery mechanisms.
*/
virtual void Initialize() = 0;
/**
* @brief Shutdown crash recovery mechanisms.
*/
virtual void Shutdown() = 0;
/**
* @brief Execute a function with timeout protection.
*
* @param func Function to execute
* @param timeoutMs Timeout in milliseconds
* @param operationName Name of the operation for logging
* @return true if function completed successfully, false if timeout occurred
*/
virtual bool ExecuteWithTimeout(std::function<void()> func, int timeoutMs, const std::string& operationName) = 0;
/**
* @brief Check if a crash has been detected.
*
* @return true if crash detected
*/
virtual bool IsCrashDetected() const = 0;
/**
* @brief Attempt recovery from detected crash.
*
* @return true if recovery successful
*/
virtual bool AttemptRecovery() = 0;
/**
* @brief Get crash report.
*
* @return Crash report string
*/
virtual std::string GetCrashReport() const = 0;
};
} // namespace sdl3cpp::services