diff --git a/kernel/src/apic.cpp b/kernel/src/apic.cpp index 10bac9e..3392ffb 100644 --- a/kernel/src/apic.cpp +++ b/kernel/src/apic.cpp @@ -1,8 +1,9 @@ -/* - * MetalOS Kernel - APIC (Advanced Programmable Interrupt Controller) +/** + * @file apic.cpp + * @brief Implementation of APIC (Advanced Programmable Interrupt Controller) manager * - * Local APIC support for multicore systems - * Replaces legacy PIC for per-CPU interrupt handling + * The Local APIC is a key component of modern x86-64 multicore systems. It replaces + * the legacy 8259 PIC and provides per-CPU interrupt handling capabilities. */ #include "kernel/apic.h" @@ -10,7 +11,14 @@ // APIC base address (default, can be read from MSR) #define APIC_BASE_MSR 0x1B -// Read CPUID to check for APIC +/** + * @brief Check if CPU has APIC support using CPUID instruction + * + * The CPUID instruction provides information about CPU features. Function 1 + * returns feature flags in the EDX register, where bit 9 indicates APIC support. + * + * @return true if APIC is supported, false otherwise + */ static bool cpuidHasAPIC(void) { uint32_t eax, ebx, ecx, edx; @@ -25,13 +33,37 @@ static bool cpuidHasAPIC(void) { return (edx & (1 << 9)) != 0; } -// APIC class implementation +/* APIC class implementation */ + +/** + * @brief Constructor - sets APIC base address to default memory-mapped location + * + * The Local APIC registers are accessed through memory-mapped I/O at physical + * address 0xFEE00000 by default. This can be changed via the IA32_APIC_BASE MSR, + * but we use the default location for simplicity. + */ APIC::APIC() : apicBase((volatile uint32_t*)0xFEE00000) {} +/** + * @brief Read a 32-bit value from an APIC register + * + * APIC registers are 32 bits wide and located at 16-byte aligned offsets. + * The apicBase pointer is to uint32_t, so we divide the offset by 4 to get + * the array index. + * + * @param offset Register offset in bytes (e.g., 0x020 for APIC ID register) + * @return 32-bit register value + */ uint32_t APIC::read(uint32_t offset) const { return apicBase[offset / 4]; } +/** + * @brief Write a 32-bit value to an APIC register + * + * @param offset Register offset in bytes + * @param value 32-bit value to write + */ void APIC::write(uint32_t offset, uint32_t value) { apicBase[offset / 4] = value; } @@ -40,6 +72,17 @@ bool APIC::isAvailable() const { return cpuidHasAPIC(); } +/** + * @brief Initialize the Local APIC for this CPU core + * + * This function: + * 1. Enables the APIC by setting the software enable bit (bit 8) in the + * Spurious Interrupt Vector Register + * 2. Sets the spurious vector to 0xFF (unused vector for spurious interrupts) + * 3. Sets Task Priority Register to 0 to accept all interrupt priorities + * + * After this initialization, the APIC is ready to receive and send interrupts. + */ void APIC::init() { // Enable APIC via spurious interrupt vector register // Set spurious vector to 0xFF and enable APIC (bit 8) @@ -49,15 +92,52 @@ void APIC::init() { write(APIC_REG_TPR, 0); } +/** + * @brief Get the APIC ID of the current CPU core + * + * The APIC ID is stored in bits 24-31 of the APIC ID register. This is a + * unique identifier for each Local APIC (and thus each CPU core). + * + * @return 8-bit APIC ID + * @note APIC IDs may not be sequential (e.g., 0, 2, 4, 6 on hyperthreaded systems) + */ uint8_t APIC::getId() const { uint32_t idReg = read(APIC_REG_ID); return (idReg >> 24) & 0xFF; } +/** + * @brief Send End-Of-Interrupt signal to acknowledge interrupt completion + * + * After handling an interrupt that came through the APIC, the interrupt handler + * must send an EOI to inform the APIC that the interrupt has been processed. + * This allows the APIC to deliver the next interrupt if one is pending. + * + * Writing any value (typically 0) to the EOI register sends the EOI. + */ void APIC::sendEOI() { write(APIC_REG_EOI, 0); } +/** + * @brief Send an Inter-Processor Interrupt (IPI) to another CPU core + * + * IPIs are used for: + * - Starting Application Processors (APs) during SMP initialization (INIT + SIPI) + * - Sending signals or notifications to other cores + * - TLB shootdowns when changing page tables + * - Requesting other cores to perform specific actions + * + * The IPI is sent using the Interrupt Command Register (ICR), which consists of + * two 32-bit registers (high and low). The high register contains the destination + * APIC ID, and the low register contains the delivery mode, vector, and control flags. + * + * @param destApicId APIC ID of the destination CPU core + * @param vector Interrupt vector number (or page number for SIPI) + * @param deliveryMode Delivery mode (INIT, SIPI, fixed, etc.) + * + * @note This function waits for any pending IPI to complete before sending a new one + */ void APIC::sendIPI(uint8_t destApicId, uint8_t vector, uint32_t deliveryMode) { // Wait for previous IPI to complete while (read(APIC_REG_ICR_LOW) & (1 << 12)) { diff --git a/kernel/src/gdt.cpp b/kernel/src/gdt.cpp index 6100ca9..fc9cae9 100644 --- a/kernel/src/gdt.cpp +++ b/kernel/src/gdt.cpp @@ -1,8 +1,9 @@ -/* - * MetalOS Kernel - Global Descriptor Table (GDT) +/** + * @file gdt.cpp + * @brief Implementation of Global Descriptor Table (GDT) manager * - * Minimal GDT setup for x86_64 long mode - * Only what's needed for our single-app OS + * The GDT is required by x86-64 processors even though segmentation is largely + * disabled in 64-bit mode. It defines code and data segments with privilege levels. */ #include "kernel/gdt.h" @@ -10,12 +11,40 @@ // Load GDT (assembly) extern "C" void gdt_flush(uint64_t); -// GDT class implementation +/* GDT class implementation */ + +/** + * @brief Constructor - initializes GDT pointer structure + * + * Sets up the GDTR (GDT Register) structure that will be loaded into the CPU. + * The limit is the size of the GDT minus 1, and the base is the memory address. + */ GDT::GDT() { gdtPtr.limit = (sizeof(gdt_entry_t) * 5) - 1; gdtPtr.base = (uint64_t)&entries; } +/** + * @brief Set a GDT entry with specified parameters + * + * Fills in all fields of a GDT entry. In 64-bit mode, the base and limit are + * largely ignored, but the access flags (privilege level, executable) are enforced. + * + * @param num Entry index (0-4) + * @param base Base address (mostly ignored in 64-bit mode) + * @param limit Segment limit (mostly ignored in 64-bit mode) + * @param access Access byte containing: + * - Bit 7: Present (must be 1 for valid segment) + * - Bits 5-6: DPL (Descriptor Privilege Level): 0=kernel, 3=user + * - Bit 4: Descriptor type (1 for code/data) + * - Bit 3: Executable (1 for code, 0 for data) + * - Bit 1: Readable/Writable + * @param gran Granularity byte containing: + * - Bit 7: Granularity (1=4KB blocks, 0=1 byte blocks) + * - Bit 6: Size (1=32-bit, 0=16-bit; for 64-bit use access flags) + * - Bit 5: Long mode (1=64-bit code segment) + * - Bits 0-3: Upper 4 bits of limit + */ void GDT::setGate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) { entries[num].base_low = (base & 0xFFFF); entries[num].base_middle = (base >> 16) & 0xFF; @@ -27,6 +56,30 @@ void GDT::setGate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_ entries[num].access = access; } +/** + * @brief Initialize the GDT with required segments and load it + * + * Sets up a minimal GDT with 5 entries: + * 0. Null descriptor (required by CPU, must be all zeros) + * 1. Kernel code segment (CPL 0, 64-bit, executable, readable) + * 2. Kernel data segment (CPL 0, 64-bit, writable) + * 3. User code segment (CPL 3, 64-bit, executable, readable) + * 4. User data segment (CPL 3, 64-bit, writable) + * + * After setting up the entries, calls gdt_flush() assembly function to: + * - Load GDTR using LGDT instruction + * - Reload segment registers with new selectors + * + * Access byte values: + * - 0x9A = 10011010 = Present, Ring 0, Code, Executable, Readable + * - 0x92 = 10010010 = Present, Ring 0, Data, Writable + * - 0xFA = 11111010 = Present, Ring 3, Code, Executable, Readable + * - 0xF2 = 11110010 = Present, Ring 3, Data, Writable + * + * Granularity values: + * - 0xA0 = 10100000 = Long mode (64-bit) + * - 0xC0 = 11000000 = 32-bit mode (for data segments in 64-bit mode) + */ void GDT::init() { // Null descriptor setGate(0, 0, 0, 0, 0); diff --git a/kernel/src/interrupts.cpp b/kernel/src/interrupts.cpp index b2fc199..c392dd0 100644 --- a/kernel/src/interrupts.cpp +++ b/kernel/src/interrupts.cpp @@ -1,8 +1,9 @@ -/* - * MetalOS Kernel - Interrupt Handling +/** + * @file interrupts.cpp + * @brief Implementation of interrupt descriptor table and interrupt handling * - * Minimal IDT and interrupt handlers - * Supports both PIC (legacy) and APIC (multicore) modes + * Manages CPU exceptions and hardware interrupts through the IDT. + * Supports both legacy PIC and modern APIC interrupt controllers. */ #include "kernel/interrupts.h" @@ -10,7 +11,11 @@ #include "kernel/smp.h" #include "kernel/apic.h" -// I/O port access functions +/** + * @brief Write a byte to an I/O port + * @param port I/O port address + * @param value Byte value to write + */ static inline void outb(uint16_t port, uint8_t value) { __asm__ volatile("outb %0, %1" : : "a"(value), "Nd"(port)); } @@ -34,12 +39,34 @@ extern "C" { void irq0(void); void irq1(void); } -// InterruptManager class implementation +/* InterruptManager class implementation */ + +/** + * @brief Constructor - initializes IDT pointer structure + */ InterruptManager::InterruptManager() { idtPtr.limit = (sizeof(idt_entry_t) * 256) - 1; idtPtr.base = (uint64_t)&idt; } +/** + * @brief Set an IDT entry to point to an interrupt handler + * + * In 64-bit mode, IDT entries are 16 bytes and contain: + * - 64-bit handler address (split across three fields) + * - 16-bit code segment selector + * - Type and attributes (present, DPL, gate type) + * - IST (Interrupt Stack Table) offset (usually 0) + * + * @param num Interrupt vector number (0-255) + * @param handler Address of interrupt handler function + * @param selector Code segment selector (0x08 for kernel code) + * @param flags Type and attribute byte: + * - Bit 7: Present (1) + * - Bits 5-6: DPL (0 for kernel) + * - Bits 0-4: Gate type (0xE for interrupt gate) + * Common value: 0x8E = Present, DPL=0, Interrupt Gate + */ void InterruptManager::setGate(uint8_t num, uint64_t handler, uint16_t selector, uint8_t flags) { idt[num].offset_low = handler & 0xFFFF; idt[num].offset_mid = (handler >> 16) & 0xFFFF; @@ -50,6 +77,26 @@ void InterruptManager::setGate(uint8_t num, uint64_t handler, uint16_t selector, idt[num].zero = 0; } +/** + * @brief Remap the 8259 PIC to avoid conflicts with CPU exceptions + * + * By default, the PIC uses IRQ vectors 0-15, which overlap with CPU exception + * vectors 0-31. This causes confusion when a hardware interrupt has the same + * vector as a CPU exception (e.g., IRQ 8 vs Double Fault exception 8). + * + * We remap the PIC so that: + * - Master PIC (IRQ 0-7) → vectors 32-39 + * - Slave PIC (IRQ 8-15) → vectors 40-47 + * + * The remapping process uses ICW (Initialization Command Words): + * - ICW1: Start initialization (0x11 = ICW4 needed, cascade mode) + * - ICW2: Set vector offset (0x20 for master, 0x28 for slave) + * - ICW3: Set up cascade (master: slave on IRQ2, slave: cascade identity) + * - ICW4: Set 8086 mode + * + * After remapping, all IRQs are masked (disabled) initially. Individual IRQs + * must be explicitly unmasked to receive interrupts. + */ void InterruptManager::remapPIC() { // ICW1: Initialize PIC outb(PIC1_COMMAND, 0x11); @@ -72,6 +119,30 @@ void InterruptManager::remapPIC() { outb(PIC2_DATA, 0xFF); } +/** + * @brief Initialize the IDT and enable interrupts + * + * This function performs complete interrupt subsystem initialization: + * 1. Clear all 256 IDT entries + * 2. Install exception handlers (ISR 0-31) for CPU exceptions + * 3. Remap the PIC to avoid conflicts + * 4. Install IRQ handlers (32-47) for hardware interrupts + * 5. Load IDT using LIDT instruction + * 6. Enable interrupts using STI instruction + * + * CPU exceptions (0-31) include: + * - 0: Divide by zero + * - 6: Invalid opcode + * - 13: General protection fault + * - 14: Page fault + * etc. + * + * Hardware IRQs (32-47) include: + * - 32 (IRQ 0): Timer + * - 33 (IRQ 1): Keyboard + * - 44 (IRQ 12): PS/2 Mouse + * etc. + */ void InterruptManager::init() { // Clear IDT for (int i = 0; i < 256; i++) { @@ -126,6 +197,25 @@ void InterruptManager::init() { __asm__ volatile("sti"); } +/** + * @brief Main interrupt handler dispatcher + * + * This function is called from the assembly interrupt stubs (ISRs/IRQs). + * It receives the saved CPU state and dispatches to specific handlers + * based on the interrupt number. + * + * Process: + * 1. Check interrupt number + * 2. Call specific handler if needed (e.g., timer for IRQ 0) + * 3. Send End-Of-Interrupt signal to PIC or APIC + * + * For hardware IRQs (32-47): + * - Check if using APIC (multicore) or PIC (legacy) + * - Send EOI to appropriate controller + * - For slave PIC IRQs (40-47), must send EOI to both PICs + * + * @param regs Pointer to saved CPU register state + */ void InterruptManager::handleInterrupt(registers_t* regs) { // Handle specific interrupts if (regs->int_no == 32) { diff --git a/kernel/src/main.cpp b/kernel/src/main.cpp index cf7e071..d630a93 100644 --- a/kernel/src/main.cpp +++ b/kernel/src/main.cpp @@ -1,3 +1,26 @@ +/** + * @file main.cpp + * @brief MetalOS Kernel Main Entry Point + * + * This is the heart of MetalOS - an extremely minimalist kernel designed to run + * a single application (QT6 Hello World). The kernel provides only the essential + * hardware initialization needed to run the application. + * + * Design Philosophy: + * - No scheduler: Single application, always running + * - No process management: One process only + * - No complex memory management: Simple bump allocator + * - No filesystem: Application embedded in boot image + * - Multicore support: All cores initialized for future parallel processing + * + * Boot sequence: + * 1. UEFI bootloader loads kernel and provides boot information + * 2. Kernel initializes hardware (GDT, IDT, memory, timer, PCI, SMP) + * 3. Kernel will eventually jump directly to the application + * + * Target size: < 150 KB (achieved through extreme minimalism) + */ + /* * MetalOS Kernel - Main Entry Point * @@ -16,12 +39,60 @@ #include "kernel/timer.h" #include "kernel/smp.h" -/* - * Kernel main entry point - * Called by bootloader with boot information +/** + * @brief Kernel main entry point - called by bootloader * - * This is the root-level function that hands off to C++ classes - * for hardware initialization and system management. + * This is the first C++ function executed after the bootloader transfers control. + * It receives boot information from UEFI and performs minimal hardware initialization. + * + * Initialization sequence: + * + * 1. GDT (Global Descriptor Table): + * - Required for x86-64 segmentation and privilege levels + * - Sets up kernel/user code and data segments + * + * 2. IDT (Interrupt Descriptor Table): + * - Sets up interrupt and exception handlers + * - Remaps PIC to avoid conflicts with CPU exceptions + * - Enables hardware interrupts + * + * 3. Physical Memory Manager: + * - Initializes page bitmap for 4KB page allocation + * - Currently assumes 128MB at 16MB physical address + * - TODO: Parse UEFI memory map for proper detection + * + * 4. Kernel Heap: + * - Allocates 1MB (256 pages) for kernel dynamic allocation + * - Uses simple bump allocator (no free() support) + * + * 5. Timer (PIT): + * - Programs 8254 PIT for 1000 Hz (1ms ticks) + * - Used for timekeeping and delays + * + * 6. PCI Bus: + * - Enumerates all PCI devices + * - Discovers GPU and other hardware + * - Stores device information for later use + * + * 7. SMP (Multi-Processing): + * - Initializes Local APIC on BSP + * - Starts all available Application Processor cores + * - Currently APs idle; only BSP runs application + * + * After initialization, the kernel will eventually: + * - Initialize GPU for framebuffer graphics + * - Set up minimal input (PS/2 keyboard/mouse or USB) + * - Jump directly to QT6 application entry point + * + * For now, it enters an infinite halt loop waiting for implementation. + * + * @param boot_info Pointer to boot information structure from UEFI bootloader containing: + * - Framebuffer information (base, width, height, pitch, bpp) + * - Kernel location and size + * - ACPI RSDP pointer + * - UEFI memory map + * + * @note This function should never return */ extern "C" void kernel_main(BootInfo* boot_info) { // Initialize GDT (Global Descriptor Table) - using GDT class diff --git a/kernel/src/memory.cpp b/kernel/src/memory.cpp index e500827..89ff90f 100644 --- a/kernel/src/memory.cpp +++ b/kernel/src/memory.cpp @@ -1,8 +1,10 @@ -/* - * MetalOS Kernel - Memory Management +/** + * @file memory.cpp + * @brief Implementation of physical memory manager and kernel heap allocator * - * Simple physical memory manager and heap allocator - * Minimal implementation for single-app OS + * Provides two memory management subsystems: + * 1. Physical Memory Manager (PMM): Manages 4KB pages using a bitmap + * 2. Heap Allocator: Simple bump allocator for kernel dynamic allocation */ #include "kernel/memory.h" @@ -10,7 +12,11 @@ // Physical memory bitmap constants #define BITMAP_SIZE 32768 // Supports up to 128MB with 4KB pages -// PhysicalMemoryManager class implementation +/* PhysicalMemoryManager class implementation */ + +/** + * @brief Constructor - initializes all fields and clears bitmap + */ PhysicalMemoryManager::PhysicalMemoryManager() : totalPages(0), usedPages(0) { for (uint64_t i = 0; i < BITMAP_SIZE; i++) { @@ -18,6 +24,20 @@ PhysicalMemoryManager::PhysicalMemoryManager() } } +/** + * @brief Initialize the physical memory manager + * + * Currently uses a simplified approach: + * - Assumes 128MB of usable RAM starting at physical address 16MB (0x01000000) + * - Clears the entire page bitmap to mark all pages as free + * - TODO: Parse the UEFI memory map from bootInfo to properly detect available memory + * + * The 16MB starting address is chosen to avoid: + * - First 1MB: Legacy BIOS area, video memory, etc. + * - 1MB-16MB: Kernel code, boot structures, and reserved areas + * + * @param bootInfo Boot information structure (currently unused, TODO: parse memory map) + */ void PhysicalMemoryManager::init(BootInfo* bootInfo) { (void)bootInfo; // TODO: Parse UEFI memory map @@ -32,6 +52,21 @@ void PhysicalMemoryManager::init(BootInfo* bootInfo) { usedPages = 0; } +/** + * @brief Allocate a single 4KB physical memory page + * + * Uses a simple first-fit algorithm: + * 1. Scan the bitmap from the beginning + * 2. Find the first page where the corresponding bit is 0 (free) + * 3. Set the bit to 1 (allocated) + * 4. Calculate and return the physical address + * + * Each bit in the bitmap represents one 4KB page: + * - Byte N, Bit M represents page (N*8 + M) + * - Physical address = 0x01000000 + (page_index * 4096) + * + * @return Physical address of allocated page, or nullptr if out of memory + */ void* PhysicalMemoryManager::allocPage() { // Find first free page in bitmap for (uint64_t i = 0; i < totalPages; i++) { @@ -53,6 +88,15 @@ void* PhysicalMemoryManager::allocPage() { return nullptr; } +/** + * @brief Free a previously allocated physical memory page + * + * Calculates the page index from the physical address and clears the + * corresponding bit in the bitmap to mark the page as free. + * + * @param page Physical address of page to free + * @note Does nothing if address is invalid (< base or >= limit) + */ void PhysicalMemoryManager::freePage(void* page) { uint64_t addr = (uint64_t)page; @@ -71,24 +115,65 @@ void PhysicalMemoryManager::freePage(void* page) { usedPages--; } +/** + * @brief Get total memory managed by PMM in bytes + * @return Total memory size (totalPages * PAGE_SIZE) + */ uint64_t PhysicalMemoryManager::getTotalMemory() const { return totalPages * PAGE_SIZE; } +/** + * @brief Get free memory available in bytes + * @return Free memory size ((totalPages - usedPages) * PAGE_SIZE) + */ uint64_t PhysicalMemoryManager::getFreeMemory() const { return (totalPages - usedPages) * PAGE_SIZE; } -// HeapAllocator class implementation +/* HeapAllocator class implementation */ + +/** + * @brief Constructor - initializes all pointers to null + */ HeapAllocator::HeapAllocator() : heapStart(nullptr), heapCurrent(nullptr), heapEnd(nullptr) {} +/** + * @brief Initialize heap with a pre-allocated memory region + * + * The heap operates on a contiguous region of memory. The heapCurrent pointer + * starts at the beginning and moves forward with each allocation. + * + * @param start Starting address of heap region (obtained from PMM) + * @param size Size of heap region in bytes (e.g., 1MB = 256 pages * 4KB) + */ void HeapAllocator::init(void* start, size_t size) { heapStart = (uint8_t*)start; heapCurrent = heapStart; heapEnd = heapStart + size; } +/** + * @brief Allocate memory from the heap (bump allocator) + * + * This is a "bump" or "arena" allocator - the simplest possible allocator. + * It just moves the current pointer forward by the requested size. + * + * Process: + * 1. Round size up to 16-byte boundary for alignment + * 2. Check if enough space remains in heap + * 3. Save current pointer as return value + * 4. Move current pointer forward by aligned size + * + * Alignment to 16 bytes ensures: + * - Compatibility with SSE/AVX instructions (require 16-byte alignment) + * - Better cache line utilization + * - Prevents unaligned access penalties + * + * @param size Number of bytes to allocate + * @return Pointer to allocated memory, or nullptr if out of heap space + */ void* HeapAllocator::alloc(size_t size) { if (!heapStart) { return nullptr; @@ -107,6 +192,15 @@ void* HeapAllocator::alloc(size_t size) { return ptr; } +/** + * @brief Allocate and zero-initialize array memory + * + * Equivalent to alloc(num * size) followed by memset to zero. + * + * @param num Number of elements + * @param size Size of each element in bytes + * @return Pointer to allocated and zeroed memory, or nullptr if out of space + */ void* HeapAllocator::calloc(size_t num, size_t size) { size_t total = num * size; void* ptr = alloc(total); @@ -118,13 +212,35 @@ void* HeapAllocator::calloc(size_t num, size_t size) { return ptr; } +/** + * @brief Free memory (no-op in bump allocator) + * + * Bump allocators cannot free individual allocations. The entire heap + * can only be reset at once. For a simple single-application OS, this + * limitation is acceptable. + * + * @param ptr Pointer to memory (ignored) + * @todo Replace with proper allocator if individual free() is needed + */ void HeapAllocator::free(void* ptr) { (void)ptr; // TODO: Implement proper free with a real allocator // For now, bump allocator doesn't support freeing } -// Memory utility functions +/* Memory utility functions */ + +/** + * @brief Fill memory with a constant byte value + * + * Simple byte-by-byte memset implementation. Not optimized for large blocks, + * but sufficient for kernel use with small structures and buffers. + * + * @param dest Pointer to memory block to fill + * @param val Value to set (converted to unsigned char) + * @param count Number of bytes to set + * @return Pointer to dest + */ void* memset(void* dest, int val, size_t count) { uint8_t* d = (uint8_t*)dest; uint8_t v = (uint8_t)val; @@ -136,6 +252,17 @@ void* memset(void* dest, int val, size_t count) { return dest; } +/** + * @brief Copy memory from source to destination + * + * Simple byte-by-byte memcpy implementation. Memory areas must not overlap. + * + * @param dest Pointer to destination buffer + * @param src Pointer to source buffer + * @param count Number of bytes to copy + * @return Pointer to dest + * @warning Memory regions must not overlap (use memmove if they might) + */ void* memcpy(void* dest, const void* src, size_t count) { uint8_t* d = (uint8_t*)dest; const uint8_t* s = (const uint8_t*)src; @@ -147,6 +274,17 @@ void* memcpy(void* dest, const void* src, size_t count) { return dest; } +/** + * @brief Compare two memory blocks + * + * Compares memory byte-by-byte until a difference is found or count bytes + * have been compared. + * + * @param s1 Pointer to first memory block + * @param s2 Pointer to second memory block + * @param count Number of bytes to compare + * @return 0 if equal, negative if s1 < s2, positive if s1 > s2 + */ int memcmp(const void* s1, const void* s2, size_t count) { const uint8_t* a = (const uint8_t*)s1; const uint8_t* b = (const uint8_t*)s2; diff --git a/kernel/src/pci.cpp b/kernel/src/pci.cpp index 3f96bcc..6c38a2a 100644 --- a/kernel/src/pci.cpp +++ b/kernel/src/pci.cpp @@ -1,27 +1,69 @@ -/* - * MetalOS Kernel - PCI Bus Support +/** + * @file pci.cpp + * @brief Implementation of PCI bus enumeration and device management * - * Minimal PCI enumeration and configuration - * Only what's needed to find and initialize the GPU + * PCI (Peripheral Component Interconnect) is the standard bus for connecting + * hardware devices. This implementation scans the PCI bus to discover devices + * and provides functions to configure them. */ #include "kernel/pci.h" #include "kernel/memory.h" -// I/O port access functions +/** + * @brief Write a 32-bit value to an I/O port + * @param port I/O port address + * @param value 32-bit value to write + */ static inline void outl(uint16_t port, uint32_t value) { __asm__ volatile("outl %0, %1" : : "a"(value), "Nd"(port)); } +/** + * @brief Read a 32-bit value from an I/O port + * @param port I/O port address + * @return 32-bit value read from port + */ static inline uint32_t inl(uint16_t port) { uint32_t value; __asm__ volatile("inl %1, %0" : "=a"(value) : "Nd"(port)); return value; } -// PCIManager class implementation +/* PCIManager class implementation */ + +/** + * @brief Constructor - initializes device count to zero + */ PCIManager::PCIManager() : deviceCount(0) {} +/** + * @brief Read a 32-bit value from PCI configuration space + * + * PCI configuration space is accessed through two I/O ports: + * - 0xCF8 (CONFIG_ADDRESS): Write the address of config register to read + * - 0xCFC (CONFIG_DATA): Read the 32-bit value from that register + * + * The address format (32 bits): + * - Bit 31: Enable bit (must be 1) + * - Bits 16-23: Bus number (0-255) + * - Bits 11-15: Device number (0-31) + * - Bits 8-10: Function number (0-7) + * - Bits 0-7: Register offset (4-byte aligned, bits 0-1 ignored) + * + * Each PCI device has 256 bytes of configuration space containing: + * - Device identification (vendor/device ID at offset 0x00) + * - Command/status registers (offset 0x04) + * - Class code (offset 0x08) + * - BARs (Base Address Registers at offsets 0x10-0x24) + * - Interrupt configuration + * + * @param bus Bus number (0-255) + * @param device Device number on bus (0-31) + * @param function Function number within device (0-7) + * @param offset Register offset (must be 4-byte aligned) + * @return 32-bit configuration register value + */ uint32_t PCIManager::readConfig(uint8_t bus, uint8_t device, uint8_t function, uint8_t offset) { uint32_t address = (uint32_t)( ((uint32_t)bus << 16) | @@ -35,6 +77,21 @@ uint32_t PCIManager::readConfig(uint8_t bus, uint8_t device, uint8_t function, u return inl(PCI_CONFIG_DATA); } +/** + * @brief Write a 32-bit value to PCI configuration space + * + * Similar to readConfig, but writes a value to the specified register. + * Used for device configuration, such as: + * - Enabling bus mastering + * - Enabling memory/IO space access + * - Configuring interrupt lines + * + * @param bus Bus number + * @param device Device number + * @param function Function number + * @param offset Register offset (4-byte aligned) + * @param value 32-bit value to write + */ void PCIManager::writeConfig(uint8_t bus, uint8_t device, uint8_t function, uint8_t offset, uint32_t value) { uint32_t address = (uint32_t)( ((uint32_t)bus << 16) | @@ -48,6 +105,26 @@ void PCIManager::writeConfig(uint8_t bus, uint8_t device, uint8_t function, uint outl(PCI_CONFIG_DATA, value); } +/** + * @brief Probe a specific PCI device/function and add to device list + * + * Reads device information from PCI configuration space and stores it: + * - Vendor ID and Device ID (for identification) + * - Class code, subclass, prog_if (device type) + * - Revision ID + * - All 6 Base Address Registers (BARs) + * + * BARs specify memory or I/O regions used by the device: + * - Bit 0: 0=memory BAR, 1=I/O BAR + * - For memory BARs: + * - Bits 1-2: Type (00=32-bit, 10=64-bit) + * - Bit 3: Prefetchable + * - Bits 4-31: Base address (4KB aligned) + * + * @param bus Bus number + * @param device Device number + * @param function Function number + */ void PCIManager::probeDevice(uint8_t bus, uint8_t device, uint8_t function) { uint32_t vendorDevice = readConfig(bus, device, function, 0x00); uint16_t vendor_id = vendorDevice & 0xFFFF; @@ -85,6 +162,21 @@ void PCIManager::probeDevice(uint8_t bus, uint8_t device, uint8_t function) { } } +/** + * @brief Initialize PCI subsystem by scanning all buses + * + * Performs a complete scan of the PCI bus hierarchy: + * - Iterates through all 256 possible buses + * - For each bus, checks all 32 device slots + * - For each device, checks if it's multi-function + * - If multi-function, scans all 8 functions + * + * A device exists if its vendor ID is not 0xFFFF. The header type + * register (offset 0x0C) has bit 7 set for multi-function devices. + * + * This approach is brute-force but simple and reliable. More sophisticated + * implementations would parse ACPI tables to find PCI buses. + */ void PCIManager::init() { deviceCount = 0; @@ -114,6 +206,18 @@ void PCIManager::init() { } } +/** + * @brief Find a PCI device by vendor and device ID + * + * Searches through the list of discovered devices for a match. + * Useful for finding specific hardware, e.g.: + * - AMD RX 6600: vendor=0x1002, device=0x73FF + * - Intel NIC: vendor=0x8086, device=various + * + * @param vendor_id Vendor identifier (e.g., 0x1002 for AMD) + * @param device_id Device identifier (specific model) + * @return Pointer to PCIDevice if found, nullptr otherwise + */ PCIDevice* PCIManager::findDevice(uint16_t vendor_id, uint16_t device_id) { for (uint32_t i = 0; i < deviceCount; i++) { if (devices[i].vendor_id == vendor_id && devices[i].device_id == device_id) { @@ -123,6 +227,21 @@ PCIDevice* PCIManager::findDevice(uint16_t vendor_id, uint16_t device_id) { return nullptr; } +/** + * @brief Enable bus mastering for a PCI device + * + * Bus mastering allows a device to perform DMA (Direct Memory Access) - + * reading and writing system memory without CPU involvement. This is + * essential for high-performance devices like GPUs and network cards. + * + * The command register (offset 0x04) contains control bits: + * - Bit 0: I/O Space Enable + * - Bit 1: Memory Space Enable + * - Bit 2: Bus Master Enable ← We set this bit + * - Bit 10: Interrupt Disable + * + * @param dev Pointer to PCI device structure + */ void PCIManager::enableBusMastering(PCIDevice* dev) { if (!dev) return; diff --git a/kernel/src/smp.cpp b/kernel/src/smp.cpp index f3e9e40..5c76bb8 100644 --- a/kernel/src/smp.cpp +++ b/kernel/src/smp.cpp @@ -1,8 +1,9 @@ -/* - * MetalOS Kernel - SMP (Symmetric Multi-Processing) Support +/** + * @file smp.cpp + * @brief Implementation of SMP (Symmetric Multi-Processing) initialization * - * Basic multicore support for better performance - * Initializes Application Processors (APs) using SIPI protocol + * SMP support allows the OS to use multiple CPU cores. This involves starting + * Application Processors (APs) using the INIT-SIPI-SIPI sequence defined by Intel. */ #include "kernel/smp.h" @@ -21,7 +22,14 @@ extern "C" { void ap_trampoline_end(void); } -// SMPManager class implementation +/* SMPManager class implementation */ + +/** + * @brief Constructor - initializes BSP (Bootstrap Processor) as CPU 0 + * + * The BSP is the first CPU core that starts when the system boots. + * It's responsible for initializing the system and starting other cores (APs). + */ SMPManager::SMPManager() : cpuCount(1), smpEnabled(false) { // Initialize BSP cpuInfo[0].cpu_id = BSP_CPU_ID; @@ -30,6 +38,15 @@ SMPManager::SMPManager() : cpuCount(1), smpEnabled(false) { cpuInfo[0].kernel_stack = 0; } +/** + * @brief Get the logical CPU ID of the currently executing core + * + * Uses the Local APIC ID to determine which CPU is running this code. + * This is important in multicore systems where each core may be executing + * kernel code simultaneously. + * + * @return Logical CPU ID (0 for BSP, 1+ for APs) + */ uint8_t SMPManager::getCurrentCPU() const { if (!smpEnabled) { return BSP_CPU_ID; @@ -47,6 +64,16 @@ uint8_t SMPManager::getCurrentCPU() const { return BSP_CPU_ID; } +/** + * @brief Initialize CPU information structure + * + * Sets up the per-CPU data structure with initial values. + * Each CPU has a logical ID (sequential: 0, 1, 2...) and a physical + * APIC ID (may not be sequential, e.g., 0, 2, 4, 6...). + * + * @param cpuId Logical CPU ID (0-15) + * @param apicId Physical APIC ID + */ void SMPManager::initCPU(uint8_t cpuId, uint8_t apicId) { if (cpuId >= MAX_CPUS) return; @@ -62,6 +89,17 @@ void SMPManager::markCPUOnline(uint8_t cpuId) { } } +/** + * @brief Busy-wait delay for timing during AP startup + * + * This is an approximate delay using a busy loop. Not precise, but sufficient + * for the timing requirements of the INIT-SIPI-SIPI sequence: + * - 10ms delay after INIT + * - 200μs delay after each SIPI + * + * @param microseconds Delay duration in microseconds (approximate) + * @note Uses PAUSE instruction to improve performance during busy-wait + */ void SMPManager::delay(uint32_t microseconds) { // Approximate delay (not precise) for (volatile uint32_t i = 0; i < microseconds * 100; i++) { @@ -69,6 +107,30 @@ void SMPManager::delay(uint32_t microseconds) { } } +/** + * @brief Start an Application Processor using INIT-SIPI-SIPI sequence + * + * The Intel-specified AP startup sequence: + * 1. Send INIT IPI to reset the AP to a known state (16-bit real mode) + * 2. Wait 10ms for INIT to complete + * 3. Send first SIPI with vector = page number of trampoline code + * 4. Wait 200μs + * 5. Send second SIPI (per Intel spec for compatibility) + * 6. Wait 200μs + * 7. Poll for AP to mark itself online (timeout after 1 second) + * + * The SIPI vector is the page number (4KB) where the trampoline code + * is located. For address 0x8000, vector = 0x8000 >> 12 = 0x08. + * + * The trampoline code must: + * - Be in low memory (< 1MB) accessible in real mode + * - Switch from 16-bit real mode to 64-bit long mode + * - Initialize the AP's GDT, IDT, and APIC + * - Jump to the AP entry point in the kernel + * + * @param apicId Physical APIC ID of the AP to start + * @return true if AP started successfully, false on timeout + */ bool SMPManager::startAP(uint8_t apicId) { // Send INIT IPI apic_send_ipi(apicId, 0, APIC_IPI_INIT); @@ -97,6 +159,28 @@ bool SMPManager::startAP(uint8_t apicId) { return false; } +/** + * @brief Initialize SMP subsystem and start all available CPU cores + * + * This function performs the following steps: + * 1. Check if Local APIC is available (required for SMP) + * 2. If no APIC, fall back to single-core mode + * 3. Initialize BSP's Local APIC + * 4. Get BSP's APIC ID + * 5. Attempt to start additional cores by sending IPIs + * 6. Set smpEnabled flag if multiple cores detected + * + * The function tries to start up to MAX_CPUS cores by probing APIC IDs + * from 0 to maxCPUsToTry. In a real system, this should be done by + * parsing the ACPI MADT (Multiple APIC Description Table) to find + * the actual APIC IDs of installed CPUs. + * + * After successful initialization: + * - All cores have initialized their Local APICs + * - All cores are marked as online + * - Each core can execute kernel code + * - Currently only BSP runs the application (APs idle) + */ void SMPManager::init() { // Check if APIC is available if (!apic_is_available()) { diff --git a/kernel/src/spinlock.cpp b/kernel/src/spinlock.cpp index da8817a..8428140 100644 --- a/kernel/src/spinlock.cpp +++ b/kernel/src/spinlock.cpp @@ -1,20 +1,42 @@ -/* - * MetalOS Kernel - Spinlock +/** + * @file spinlock.cpp + * @brief Implementation of spinlock synchronization primitive * - * Simple spinlock implementation for multicore synchronization - * Uses x86 atomic instructions + * Spinlocks provide mutual exclusion in multicore systems using atomic operations. */ #include "kernel/spinlock.h" -// Spinlock class implementation +/* Spinlock class implementation */ +/** + * @brief Constructor - initializes lock to unlocked state (0) + */ Spinlock::Spinlock() : lock(0) {} void Spinlock::init() { lock = 0; } +/** + * @brief Acquire the spinlock (block until available) + * + * This function uses the x86 XCHG (exchange) instruction, which is: + * - Atomic: The operation cannot be interrupted midway + * - Implicitly locked: Works correctly across multiple CPU cores + * - Sequentially consistent: No memory reordering issues + * + * The algorithm: + * 1. Atomically exchange the lock variable with 1 + * 2. If the old value was 0, we got the lock (return) + * 3. If the old value was 1, lock was already held (spin) + * 4. Use PAUSE instruction while spinning to improve performance + * + * The PAUSE instruction: + * - Improves performance on hyperthreaded CPUs + * - Reduces power consumption during spin-wait + * - Prevents memory order violations in the spin loop + */ void Spinlock::acquire() { while (1) { // Try to acquire lock using atomic exchange @@ -36,6 +58,15 @@ void Spinlock::acquire() { } } +/** + * @brief Try to acquire the spinlock without blocking + * + * Similar to acquire(), but returns immediately if lock is already held. + * Useful when you want to try acquiring a lock but have alternative work + * to do if it's not available. + * + * @return true if lock was successfully acquired, false if already locked + */ bool Spinlock::tryAcquire() { uint32_t old_value; __asm__ volatile( @@ -48,6 +79,18 @@ bool Spinlock::tryAcquire() { return (old_value == 0); } +/** + * @brief Release the spinlock + * + * Simply sets the lock variable back to 0 (unlocked). The empty inline assembly + * with "memory" clobber acts as a compiler memory barrier, ensuring all previous + * stores are completed before the lock is released. + * + * This prevents the compiler from reordering memory operations across the lock + * boundary, which would violate the mutual exclusion guarantee. + * + * @note Must only be called by the CPU that currently holds the lock + */ void Spinlock::release() { // Memory barrier to ensure all previous stores are visible __asm__ volatile("" ::: "memory"); diff --git a/kernel/src/timer.cpp b/kernel/src/timer.cpp index 80d3596..5a332b4 100644 --- a/kernel/src/timer.cpp +++ b/kernel/src/timer.cpp @@ -1,8 +1,8 @@ -/* - * MetalOS Kernel - Timer Support +/** + * @file timer.cpp + * @brief Implementation of PIT (Programmable Interval Timer) manager * - * Simple PIT (Programmable Interval Timer) support - * Used for scheduling and timing + * The PIT generates periodic timer interrupts for system timekeeping and delays. */ #include "kernel/timer.h" @@ -15,20 +15,64 @@ // PIT constants #define PIT_BASE_FREQUENCY 1193182 // Hz -// I/O port access functions +/** + * @brief Write a byte to an I/O port + * + * Uses the x86 OUT instruction to write a byte to a hardware I/O port. + * + * @param port I/O port address + * @param value Byte value to write + */ static inline void outb(uint16_t port, uint8_t value) { __asm__ volatile("outb %0, %1" : : "a"(value), "Nd"(port)); } +/** + * @brief Read a byte from an I/O port + * + * Uses the x86 IN instruction to read a byte from a hardware I/O port. + * + * @param port I/O port address + * @return Byte value read from port + */ static inline uint8_t inb(uint16_t port) { uint8_t value; __asm__ volatile("inb %1, %0" : "=a"(value) : "Nd"(port)); return value; } -// Timer class implementation +/* Timer class implementation */ + +/** + * @brief Constructor - initializes tick counter to zero + */ Timer::Timer() : ticks(0) {} +/** + * @brief Initialize the PIT to generate interrupts at specified frequency + * + * The PIT works by counting down from a divisor value at its base frequency + * of 1.193182 MHz. When the counter reaches zero, it generates an interrupt + * and reloads the divisor. + * + * For example, to get 1000 Hz (1ms ticks): + * divisor = 1193182 / 1000 = 1193 + * + * The process: + * 1. Calculate divisor from desired frequency + * 2. Send command byte to configure channel 0 in rate generator mode + * 3. Send low byte of divisor + * 4. Send high byte of divisor + * 5. Unmask IRQ0 in the PIC to enable timer interrupts + * + * Command byte 0x36 means: + * - Channel 0 + * - Access mode: lobyte/hibyte + * - Mode 3: Square wave generator (rate generator) + * - Binary counter (not BCD) + * + * @param frequency Desired interrupt frequency in Hz (e.g., 1000 for 1ms ticks) + */ void Timer::init(uint32_t frequency) { // Calculate divisor uint32_t divisor = PIT_BASE_FREQUENCY / frequency; @@ -53,6 +97,16 @@ uint64_t Timer::getTicks() const { return ticks; } +/** + * @brief Wait for a specified number of timer ticks + * + * Calculates target tick count and uses HLT instruction to wait efficiently. + * HLT puts the CPU in a low-power state until the next interrupt arrives. + * + * @param waitTicks Number of ticks to wait + * @note Blocking function - CPU will be idle during wait + * @note At 1000 Hz, each tick is 1 millisecond + */ void Timer::wait(uint32_t waitTicks) const { uint64_t target = ticks + waitTicks; while (ticks < target) { @@ -60,6 +114,14 @@ void Timer::wait(uint32_t waitTicks) const { } } +/** + * @brief Handle timer interrupt (increment tick counter) + * + * This function is called from the IRQ0 interrupt handler every time + * the PIT generates an interrupt. It simply increments the tick counter. + * + * @note Must be called from interrupt context only + */ void Timer::handleInterrupt() { ticks++; }