From 4fdbcdc4bc4265d5b1259deb14706295fc52d466 Mon Sep 17 00:00:00 2001 From: johndoe6345789 Date: Thu, 8 Jan 2026 15:39:48 +0000 Subject: [PATCH] feat(tests): add crash recovery timeout test to validate prompt timeout behavior --- CMakeLists.txt | 13 ++ CRASH_ANALYSIS.md | 146 ++++++++++++++++++ .../impl/application_loop_service.hpp | 7 +- src/services/impl/crash_recovery_service.cpp | 48 +++++- tests/crash_recovery_timeout_test.cpp | 67 ++++++++ 5 files changed, 275 insertions(+), 6 deletions(-) create mode 100644 tests/crash_recovery_timeout_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 666f8a0..187faa0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -659,6 +659,19 @@ target_link_libraries(gui_shader_linking_failure_test PRIVATE ) add_test(NAME gui_shader_linking_failure_test COMMAND gui_shader_linking_failure_test) +# Test: Crash recovery timeout (TDD test for main loop hang after timeout) +add_executable(crash_recovery_timeout_test + tests/crash_recovery_timeout_test.cpp + src/services/impl/crash_recovery_service.cpp +) +target_include_directories(crash_recovery_timeout_test PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src") +target_link_libraries(crash_recovery_timeout_test PRIVATE + GTest::gtest + GTest::gtest_main + cpptrace::cpptrace +) +add_test(NAME crash_recovery_timeout_test COMMAND crash_recovery_timeout_test) + # Test: Bgfx Draw bounds crash test (REAL TDD - tests should be DISABLED until fix implemented) add_executable(bgfx_draw_bounds_crash_test tests/bgfx_draw_bounds_crash_test.cpp diff --git a/CRASH_ANALYSIS.md b/CRASH_ANALYSIS.md index 47a15a5..d2e12c6 100644 --- a/CRASH_ANALYSIS.md +++ b/CRASH_ANALYSIS.md @@ -280,3 +280,149 @@ The crash is most likely caused by: - Texture loading: [bgfx_graphics_backend.cpp:698-744](src/services/impl/bgfx_graphics_backend.cpp#L698-L744) - Pipeline creation: [bgfx_graphics_backend.cpp:804-875](src/services/impl/bgfx_graphics_backend.cpp#L804-L875) - Shader validation: [shader_pipeline_validator.cpp](src/services/impl/shader_pipeline_validator.cpp) + +▶ Running: build-ninja/sdl3_app -j config/seed_runtime.json + +2026-01-08 15:37:11.675 [INFO] JsonConfigService initialized from config file: /home/rewrich/Documents/GitHub/SDL3CPlusPlus/config/seed_runtime.json + +2026-01-08 15:37:11.675 [INFO] ServiceBasedApp::ServiceBasedApp: Setting up SDL +2026-01-08 15:37:11.675 [INFO] ServiceBasedApp::ServiceBasedApp: Registering services +2026-01-08 15:37:11.675 [INFO] JsonConfigService initialized with explicit configuration + +2026-01-08 15:37:11.773 [INFO] CrashRecoveryService::SetupSignalHandlers: Signal handlers installed +2026-01-08 15:37:11.773 [INFO] CrashRecoveryService::Initialize: Crash recovery service initialized +2026-01-08 15:37:11.773 [INFO] ServiceBasedApp::ServiceBasedApp: Resolving lifecycle services +2026-01-08 15:37:11.773 [INFO] ServiceBasedApp::ServiceBasedApp: constructor completed + +2026-01-08 15:37:11.773 [INFO] Application starting +2026-01-08 15:37:11.774 [INFO] LifecycleService::InitializeAll: Initializing all services + +2026-01-08 15:37:11.785 [INFO] SDL audio service initialized successfully + +2026-01-08 15:37:11.789 [INFO] Playing background audio: /home/rewrich/Documents/GitHub/SDL3CPlusPlus/scripts/piano.ogg (loop: 1) + +2026-01-08 15:37:11.791 [INFO] Script engine service initialized + +2026-01-08 15:37:11.794 [INFO] Physics service initialized + +2026-01-08 15:37:11.794 [INFO] LifecycleService::InitializeAll: All services initialized + +2026-01-08 15:37:11.811 [INFO] PlatformService::FeatureTable +feature value +platform.pointerBits 64 +platform.name Linux +platform.sdl.version 3002020 +platform.sdl.version.major 3 +platform.sdl.version.minor 2 +platform.sdl.version.micro 20 +platform.sdl.revision release-3.2.20-0-g96292a5b4 +platform.cpu.count 12 +platform.cpu.cacheLineSize 64 +platform.systemRamMB 64198 +platform.cpu.hasSSE true +platform.cpu.hasSSE2 true +platform.cpu.hasSSE3 true +platform.cpu.hasSSE41 true +platform.cpu.hasSSE42 true +platform.cpu.hasAVX true +platform.cpu.hasAVX2 true +platform.cpu.hasAVX512F false +platform.cpu.hasNEON false +platform.cpu.hasARMSIMD false +platform.cpu.hasAltiVec false +platform.cpu.hasLSX false +platform.cpu.hasLASX false +env.xdgSessionType x11 +env.waylandDisplay unset +env.x11Display :0 +env.desktopSession mate +env.xdgCurrentDesktop MATE +env.xdgRuntimeDir /run/user/1000 +env.sdlVideoDriver unset +env.sdlRenderDriver unset +sdl.hint.videoDriver unset +sdl.hint.renderDriver unset +sdl.hint.waylandPreferLibdecor unset +sdl.videoDriverCount 5 +sdl.videoDrivers wayland, x11, offscreen, dummy, evdev +sdl.videoInitialized true +sdl.videoBackend.supportsWayland true +sdl.videoBackend.supportsX11 true +sdl.videoBackend.supportsKmsdrm false +sdl.videoBackend.supportsWindows false +sdl.videoBackend.supportsCocoa false +sdl.videoBackend.isWayland false +sdl.videoBackend.isX11 true +sdl.videoBackend.isKmsdrm false +sdl.videoBackend.isWindows false +sdl.videoBackend.isCocoa false +sdl.currentVideoDriver x11 +sdl.systemTheme unknown +sdl.renderDriverCount 5 +sdl.renderDrivers opengl, opengles2, vulkan, gpu, software +sdl.render.supportsOpenGL true +sdl.render.supportsOpenGLES2 true +sdl.render.supportsDirect3D11 false +sdl.render.supportsDirect3D12 false +sdl.render.supportsMetal false +sdl.render.supportsSoftware true +sdl.displayCount 1 +sdl.primaryDisplayId 1 +sdl.displaySummary 0:Odyssey G40B 27"@1920x1080+0+0 +sdl.displayError none +platform.uname.sysname Linux +platform.uname.release 6.17.12-300.fc43.x86_64 +platform.uname.version #1 SMP PREEMPT_DYNAMIC Sat Dec 13 05:06:24 UTC 2025 +platform.uname.machine x86_64 + + +2026-01-08 15:37:11.871 [INFO] SdlWindowService: Mouse grab config: enabled=true, grabOnClick=true, grabMouseButton=1, releaseKey=27 + +2026-01-08 15:37:11.954 [INFO] ApplicationLoopService::Run: Starting main loop + +2026-01-08 15:37:12.022 [WARN] [MaterialX Pipeline: standard_surface_wood_tiled.mtlx] ⚠ Vertex layout provides unused attribute at location 4 (Color0) + +2026-01-08 15:37:12.074 [WARN] [MaterialX Pipeline: standard_surface_brick_procedural.mtlx] ⚠ Vertex layout provides unused attribute at location 4 (Color0) + +2026-01-08 15:37:12.125 [WARN] [MaterialX Pipeline: standard_surface_marble_solid.mtlx] ⚠ Vertex layout provides unused attribute at location 3 (TexCoord0) + +2026-01-08 15:37:12.126 [WARN] [MaterialX Pipeline: standard_surface_marble_solid.mtlx] ⚠ Vertex layout provides unused attribute at location 4 (Color0) + +2026-01-08 15:37:12.171 [WARN] [MaterialX Pipeline: standard_surface_brass_tiled.mtlx] ⚠ Vertex layout provides unused attribute at location 4 (Color0) + +2026-01-08 15:37:12.229 [INFO] BgfxShaderCompiler: created shader ceiling:vertex (binSize=2553, renderer=Vulkan) + +2026-01-08 15:37:12.546 [INFO] BgfxShaderCompiler: created shader ceiling:fragment (binSize=78632, renderer=Vulkan) + +2026-01-08 15:37:12.591 [INFO] BgfxShaderCompiler: created shader wall:vertex (binSize=2835, renderer=Vulkan) + +2026-01-08 15:37:12.893 [INFO] BgfxShaderCompiler: created shader wall:fragment (binSize=78866, renderer=Vulkan) + +2026-01-08 15:37:13.079 [INFO] BgfxShaderCompiler: created shader solid:vertex (binSize=2675, renderer=Vulkan) + +2026-01-08 15:37:13.363 [INFO] BgfxShaderCompiler: created shader solid:fragment (binSize=68326, renderer=Vulkan) + +2026-01-08 15:37:13.497 [INFO] BgfxShaderCompiler: created shader floor:vertex (binSize=2675, renderer=Vulkan) + +2026-01-08 15:37:13.784 [INFO] BgfxShaderCompiler: created shader floor:fragment (binSize=68414, renderer=Vulkan) + +2026-01-08 15:37:13.905 [INFO] BgfxShaderCompiler: created shader gui_vertex (binSize=1646, renderer=Vulkan) + +2026-01-08 15:37:13.953 [INFO] BgfxShaderCompiler: created shader gui_fragment (binSize=846, renderer=Vulkan) + +radv/amdgpu: The CS has been cancelled because the context is lost. This context is guilty of a hard recovery. +radv: GPUVM fault detected at address 0x8001000000. +GCVM_L2_PROTECTION_FAULT_STATUS: 0x401431 + CLIENT_ID: (SQC (data)) 0xa + MORE_FAULTS: 1 + WALKER_ERROR: 0 + PERMISSION_FAULTS: 3 + MAPPING_ERROR: 0 + RW: 0 + +2026-01-08 15:37:41.954 [WARN] CrashRecoveryService::ExecuteWithTimeout: Operation 'Main Application Loop' timed out after 30000ms + + +⏸ Stopping process... + +❌ Process exited with code 9 diff --git a/src/services/impl/application_loop_service.hpp b/src/services/impl/application_loop_service.hpp index bc7927b..6e85c7c 100644 --- a/src/services/impl/application_loop_service.hpp +++ b/src/services/impl/application_loop_service.hpp @@ -2,6 +2,7 @@ #include "../interfaces/i_application_loop_service.hpp" #include "../interfaces/i_audio_service.hpp" +#include "../interfaces/i_crash_recovery_service.hpp" #include "../interfaces/i_input_service.hpp" #include "../interfaces/i_logger.hpp" #include "../interfaces/i_physics_service.hpp" @@ -22,7 +23,8 @@ public: std::shared_ptr physicsService, std::shared_ptr sceneService, std::shared_ptr renderCoordinatorService, - std::shared_ptr audioService); + std::shared_ptr audioService, + std::shared_ptr crashRecoveryService); ~ApplicationLoopService() override = default; void Run() override; @@ -39,7 +41,10 @@ private: std::shared_ptr sceneService_; std::shared_ptr renderCoordinatorService_; std::shared_ptr audioService_; + std::shared_ptr crashRecoveryService_; bool running_ = false; + double lastMemoryCheckSeconds_ = 0.0; + double memoryCheckIntervalSeconds_ = 1.0; }; } // namespace sdl3cpp::services::impl diff --git a/src/services/impl/crash_recovery_service.cpp b/src/services/impl/crash_recovery_service.cpp index 8119f66..c3b1003 100644 --- a/src/services/impl/crash_recovery_service.cpp +++ b/src/services/impl/crash_recovery_service.cpp @@ -1,4 +1,5 @@ #include "crash_recovery_service.hpp" +#include #include #include #include @@ -71,23 +72,60 @@ void CrashRecoveryService::Shutdown() { bool CrashRecoveryService::ExecuteWithTimeout(std::function func, int timeoutMs, const std::string& operationName) { logger_->Trace("CrashRecoveryService", "ExecuteWithTimeout", "timeoutMs=" + std::to_string(timeoutMs) + ", operationName=" + operationName, "Executing with timeout"); - auto future = std::async(std::launch::async, func); + std::promise completionPromise; + auto completionFuture = completionPromise.get_future(); + std::thread workerThread([task = std::move(func), promise = std::move(completionPromise)]() mutable { + try { + task(); + promise.set_value(); + } catch (...) { + try { + promise.set_exception(std::current_exception()); + } catch (...) { + // Swallow exception if promise is already satisfied. + } + } + }); - if (future.wait_for(std::chrono::milliseconds(timeoutMs)) == std::future_status::timeout) { + if (completionFuture.wait_for(std::chrono::milliseconds(timeoutMs)) == std::future_status::timeout) { logger_->Warn("CrashRecoveryService::ExecuteWithTimeout: Operation '" + operationName + "' timed out after " + std::to_string(timeoutMs) + "ms"); + logger_->Trace("CrashRecoveryService", "ExecuteWithTimeout", + "timeoutMs=" + std::to_string(timeoutMs) + ", operationName=" + operationName, + "Timeout detected; marking crash and detaching worker thread"); - // Attempt to cancel the operation (limited effectiveness) - // Note: std::future doesn't provide direct cancellation, this is just detection + { + std::lock_guard lock(crashMutex_); + crashDetected_ = true; + crashReport_ += "\nTIMEOUT: Operation '" + operationName + "' exceeded " + + std::to_string(timeoutMs) + "ms\n"; + } + + // No safe way to cancel; detach so we can report the timeout promptly. + if (workerThread.joinable()) { + workerThread.detach(); + } return false; } try { - future.get(); // Re-throw any exceptions + completionFuture.get(); // Re-throw any exceptions logger_->Trace("CrashRecoveryService", "ExecuteWithTimeout", "", "Operation completed successfully"); + if (workerThread.joinable()) { + workerThread.join(); + } return true; } catch (const std::exception& e) { logger_->Error("CrashRecoveryService::ExecuteWithTimeout: Operation '" + operationName + "' threw exception: " + e.what()); + if (workerThread.joinable()) { + workerThread.join(); + } + throw; + } catch (...) { + logger_->Error("CrashRecoveryService::ExecuteWithTimeout: Operation '" + operationName + "' threw unknown exception"); + if (workerThread.joinable()) { + workerThread.join(); + } throw; } } diff --git a/tests/crash_recovery_timeout_test.cpp b/tests/crash_recovery_timeout_test.cpp new file mode 100644 index 0000000..1c141eb --- /dev/null +++ b/tests/crash_recovery_timeout_test.cpp @@ -0,0 +1,67 @@ +#include + +#include "services/impl/crash_recovery_service.hpp" + +#include +#include +#include +#include +#include + +namespace { + +class NullLogger final : public sdl3cpp::services::ILogger { +public: + void SetLevel(sdl3cpp::services::LogLevel) override {} + sdl3cpp::services::LogLevel GetLevel() const override { return sdl3cpp::services::LogLevel::OFF; } + void SetOutputFile(const std::string&) override {} + void SetMaxLinesPerFile(size_t) override {} + void EnableConsoleOutput(bool) override {} + void Log(sdl3cpp::services::LogLevel, const std::string&) override {} + void Trace(const std::string&) override {} + void Trace(const std::string&, const std::string&, const std::string&, const std::string&) override {} + void Debug(const std::string&) override {} + void Info(const std::string&) override {} + void Warn(const std::string&) override {} + void Error(const std::string&) override {} + void TraceFunction(const std::string&) override {} + void TraceVariable(const std::string&, const std::string&) override {} + void TraceVariable(const std::string&, int) override {} + void TraceVariable(const std::string&, size_t) override {} + void TraceVariable(const std::string&, bool) override {} + void TraceVariable(const std::string&, float) override {} + void TraceVariable(const std::string&, double) override {} +}; + +// TDD FAILING TEST: +// sdl3_app.log ends with "Main Application Loop" timing out after 30000ms. +// ExecuteWithTimeout should return promptly after the timeout instead of +// blocking on the background task's completion. +TEST(CrashRecoveryTimeoutTest, ExecuteWithTimeoutReturnsPromptlyAfterTimeout) { + auto logger = std::make_shared(); + sdl3cpp::services::impl::CrashRecoveryService crashRecoveryService(logger); + + std::promise completionPromise; + auto completionFuture = completionPromise.get_future(); + + const auto start = std::chrono::steady_clock::now(); + const bool success = crashRecoveryService.ExecuteWithTimeout( + [promise = std::move(completionPromise)]() mutable { + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + promise.set_value(); + }, + 10, + "Main Application Loop"); + const auto elapsedMs = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count(); + + EXPECT_FALSE(success); + EXPECT_LT(elapsedMs, 100) + << "ExecuteWithTimeout should return shortly after the timeout instead of waiting for completion."; + + EXPECT_EQ(completionFuture.wait_for(std::chrono::milliseconds(500)), std::future_status::ready) + << "Background task should finish promptly to avoid leaking threads during tests."; +} + +} // namespace