From 0ee1f5bc0ea6ed86cfab3dfe90a3f63a4ef21742 Mon Sep 17 00:00:00 2001 From: johndoe6345789 Date: Sat, 3 Jan 2026 23:33:11 +0000 Subject: [PATCH] Add GPU diagnostics reporting for error handling in rendering --- src/app/sdl3_app.hpp | 1 + src/app/sdl3_app_render.cpp | 111 ++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/src/app/sdl3_app.hpp b/src/app/sdl3_app.hpp index 0df3af9..0c8074f 100644 --- a/src/app/sdl3_app.hpp +++ b/src/app/sdl3_app.hpp @@ -97,6 +97,7 @@ private: void DrawFrame(float time); void SetupGuiRenderer(); void ProcessGuiEvent(const SDL_Event& event); + void PrintGpuDiagnostics(const std::string& errorContext); QueueFamilyIndices FindQueueFamilies(VkPhysicalDevice device); bool CheckDeviceExtensionSupport(VkPhysicalDevice device); diff --git a/src/app/sdl3_app_render.cpp b/src/app/sdl3_app_render.cpp index 835d1e1..6226df0 100644 --- a/src/app/sdl3_app_render.cpp +++ b/src/app/sdl3_app_render.cpp @@ -21,6 +21,111 @@ const std::unordered_map kGuiKeyNames = { namespace sdl3cpp::app { +void Sdl3App::PrintGpuDiagnostics(const std::string& errorContext) { + std::cerr << "\n========================================\n"; + std::cerr << "GPU DIAGNOSTIC REPORT\n"; + std::cerr << "========================================\n"; + std::cerr << "Error Context: " << errorContext << "\n\n"; + + // Device properties + if (physicalDevice_ != VK_NULL_HANDLE) { + VkPhysicalDeviceProperties deviceProps{}; + vkGetPhysicalDeviceProperties(physicalDevice_, &deviceProps); + + std::cerr << "=== GPU Information ===\n"; + std::cerr << "Device Name: " << deviceProps.deviceName << "\n"; + std::cerr << "Driver Version: " << VK_API_VERSION_MAJOR(deviceProps.driverVersion) << "." + << VK_API_VERSION_MINOR(deviceProps.driverVersion) << "." + << VK_API_VERSION_PATCH(deviceProps.driverVersion) << "\n"; + std::cerr << "API Version: " << VK_API_VERSION_MAJOR(deviceProps.apiVersion) << "." + << VK_API_VERSION_MINOR(deviceProps.apiVersion) << "." + << VK_API_VERSION_PATCH(deviceProps.apiVersion) << "\n"; + std::cerr << "Vendor ID: 0x" << std::hex << deviceProps.vendorID << std::dec << "\n"; + std::cerr << "Device ID: 0x" << std::hex << deviceProps.deviceID << std::dec << "\n"; + + VkPhysicalDeviceMemoryProperties memProps{}; + vkGetPhysicalDeviceMemoryProperties(physicalDevice_, &memProps); + + std::cerr << "\n=== Memory Information ===\n"; + uint64_t totalVRAM = 0; + for (uint32_t i = 0; i < memProps.memoryHeapCount; i++) { + if (memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { + totalVRAM += memProps.memoryHeaps[i].size; + } + } + std::cerr << "Total VRAM: " << (totalVRAM / 1024 / 1024) << " MB\n"; + + // Memory heaps breakdown + std::cerr << "Memory Heaps (" << memProps.memoryHeapCount << "):\n"; + for (uint32_t i = 0; i < memProps.memoryHeapCount; i++) { + std::cerr << " Heap " << i << ": " << (memProps.memoryHeaps[i].size / 1024 / 1024) << " MB"; + if (memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { + std::cerr << " (Device Local)"; + } + std::cerr << "\n"; + } + } + + // Swapchain state + std::cerr << "\n=== Swapchain State ===\n"; + std::cerr << "Extent: " << swapChainExtent_.width << "x" << swapChainExtent_.height << "\n"; + std::cerr << "Image Count: " << swapChainImages_.size() << "\n"; + std::cerr << "Format: " << swapChainImageFormat_ << "\n"; + std::cerr << "Consecutive Recreations: " << consecutiveSwapchainRecreations_ << "\n"; + std::cerr << "Framebuffer Resized Flag: " << (framebufferResized_ ? "true" : "false") << "\n"; + std::cerr << "First Frame Completed: " << (firstFrameCompleted_ ? "true" : "false") << "\n"; + + // Render objects + std::cerr << "\n=== Scene State ===\n"; + std::cerr << "Render Objects: " << renderObjects_.size() << "\n"; + std::cerr << "Vertices: " << vertices_.size() << "\n"; + std::cerr << "Indices: " << indices_.size() << "\n"; + std::cerr << "Pipelines: " << graphicsPipelines_.size() << "\n"; + std::cerr << "GUI Renderer Active: " << (guiRenderer_ ? "true" : "false") << "\n"; + std::cerr << "GUI Has Commands: " << (guiHasCommands_ ? "true" : "false") << "\n"; + + // Check device features that might be related + if (physicalDevice_ != VK_NULL_HANDLE) { + VkPhysicalDeviceFeatures deviceFeatures{}; + vkGetPhysicalDeviceFeatures(physicalDevice_, &deviceFeatures); + + std::cerr << "\n=== Relevant Device Features ===\n"; + std::cerr << "Geometry Shader: " << (deviceFeatures.geometryShader ? "supported" : "not supported") << "\n"; + std::cerr << "Tessellation Shader: " << (deviceFeatures.tessellationShader ? "supported" : "not supported") << "\n"; + std::cerr << "Multi Draw Indirect: " << (deviceFeatures.multiDrawIndirect ? "supported" : "not supported") << "\n"; + } + + // Queue properties + if (physicalDevice_ != VK_NULL_HANDLE) { + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice_, &queueFamilyCount, nullptr); + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice_, &queueFamilyCount, queueFamilies.data()); + + std::cerr << "\n=== Queue Families ===\n"; + for (uint32_t i = 0; i < queueFamilyCount; i++) { + std::cerr << "Family " << i << ": " << queueFamilies[i].queueCount << " queues, flags: 0x" + << std::hex << queueFamilies[i].queueFlags << std::dec << "\n"; + } + } + + std::cerr << "\n=== Possible Causes ===\n"; + std::cerr << "1. GPU driver crash or hang - Check dmesg for GPU reset messages\n"; + std::cerr << "2. Infinite loop in shader code - Review vertex/fragment shaders\n"; + std::cerr << "3. Command buffer submission issue - Check synchronization\n"; + std::cerr << "4. GPU overheating or hardware issue - Monitor GPU temperature\n"; + std::cerr << "5. Driver bug - Try updating GPU drivers to latest version\n"; + std::cerr << "6. Resource exhaustion - Check system memory and VRAM usage\n"; + + std::cerr << "\n=== Recommended Actions ===\n"; + std::cerr << "1. Check system logs: dmesg | grep -i 'gpu\\|radeon\\|amdgpu'\n"; + std::cerr << "2. Update GPU drivers: sudo dnf update mesa-vulkan-drivers\n"; + std::cerr << "3. Verify GPU health: radeontop or similar monitoring tool\n"; + std::cerr << "4. Check for driver messages: journalctl -k | grep -i amdgpu\n"; + std::cerr << "5. Try with different Vulkan settings or validation layers\n"; + std::cerr << "========================================\n\n"; +} + void Sdl3App::CreateCommandBuffers() { TRACE_FUNCTION(); commandBuffers_.resize(swapChainFramebuffers_.size()); @@ -140,8 +245,12 @@ void Sdl3App::DrawFrame(float time) { constexpr uint64_t kFenceTimeout = 5000000000ULL; // 5 seconds in nanoseconds VkResult fenceResult = vkWaitForFences(device_, 1, &inFlightFence_, VK_TRUE, kFenceTimeout); if (fenceResult == VK_TIMEOUT) { + std::cerr << "\nERROR: Fence wait timeout: GPU appears to be hung\n"; + PrintGpuDiagnostics("Fence wait timeout after 5 seconds"); throw std::runtime_error("Fence wait timeout: GPU appears to be hung"); } else if (fenceResult != VK_SUCCESS) { + std::cerr << "\nERROR: Fence wait failed with code: " << fenceResult << "\n"; + PrintGpuDiagnostics("Fence wait failed with error code " + std::to_string(fenceResult)); throw std::runtime_error("Fence wait failed"); } vkResetFences(device_, 1, &inFlightFence_); @@ -175,6 +284,8 @@ void Sdl3App::DrawFrame(float time) { RecreateSwapChain(); return; } else if (result == VK_TIMEOUT) { + std::cerr << "\nERROR: Image acquisition timeout: GPU appears to be hung\n"; + PrintGpuDiagnostics("Image acquisition timeout after 5 seconds"); throw std::runtime_error("Image acquisition timeout: GPU appears to be hung"); } else if (result != VK_SUCCESS) { throw std::runtime_error("Failed to acquire swap chain image");