Add detailed docstrings to all kernel implementation files

Co-authored-by: johndoe6345789 <224850594+johndoe6345789@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-12-28 22:54:05 +00:00
parent 2b2dafef02
commit 8f1c71311d
9 changed files with 791 additions and 51 deletions

View File

@@ -1,8 +1,9 @@
/*
* MetalOS Kernel - APIC (Advanced Programmable Interrupt Controller)
/**
* @file apic.cpp
* @brief Implementation of APIC (Advanced Programmable Interrupt Controller) manager
*
* Local APIC support for multicore systems
* Replaces legacy PIC for per-CPU interrupt handling
* The Local APIC is a key component of modern x86-64 multicore systems. It replaces
* the legacy 8259 PIC and provides per-CPU interrupt handling capabilities.
*/
#include "kernel/apic.h"
@@ -10,7 +11,14 @@
// APIC base address (default, can be read from MSR)
#define APIC_BASE_MSR 0x1B
// Read CPUID to check for APIC
/**
* @brief Check if CPU has APIC support using CPUID instruction
*
* The CPUID instruction provides information about CPU features. Function 1
* returns feature flags in the EDX register, where bit 9 indicates APIC support.
*
* @return true if APIC is supported, false otherwise
*/
static bool cpuidHasAPIC(void) {
uint32_t eax, ebx, ecx, edx;
@@ -25,13 +33,37 @@ static bool cpuidHasAPIC(void) {
return (edx & (1 << 9)) != 0;
}
// APIC class implementation
/* APIC class implementation */
/**
* @brief Constructor - sets APIC base address to default memory-mapped location
*
* The Local APIC registers are accessed through memory-mapped I/O at physical
* address 0xFEE00000 by default. This can be changed via the IA32_APIC_BASE MSR,
* but we use the default location for simplicity.
*/
APIC::APIC() : apicBase((volatile uint32_t*)0xFEE00000) {}
/**
* @brief Read a 32-bit value from an APIC register
*
* APIC registers are 32 bits wide and located at 16-byte aligned offsets.
* The apicBase pointer is to uint32_t, so we divide the offset by 4 to get
* the array index.
*
* @param offset Register offset in bytes (e.g., 0x020 for APIC ID register)
* @return 32-bit register value
*/
uint32_t APIC::read(uint32_t offset) const {
return apicBase[offset / 4];
}
/**
* @brief Write a 32-bit value to an APIC register
*
* @param offset Register offset in bytes
* @param value 32-bit value to write
*/
void APIC::write(uint32_t offset, uint32_t value) {
apicBase[offset / 4] = value;
}
@@ -40,6 +72,17 @@ bool APIC::isAvailable() const {
return cpuidHasAPIC();
}
/**
* @brief Initialize the Local APIC for this CPU core
*
* This function:
* 1. Enables the APIC by setting the software enable bit (bit 8) in the
* Spurious Interrupt Vector Register
* 2. Sets the spurious vector to 0xFF (unused vector for spurious interrupts)
* 3. Sets Task Priority Register to 0 to accept all interrupt priorities
*
* After this initialization, the APIC is ready to receive and send interrupts.
*/
void APIC::init() {
// Enable APIC via spurious interrupt vector register
// Set spurious vector to 0xFF and enable APIC (bit 8)
@@ -49,15 +92,52 @@ void APIC::init() {
write(APIC_REG_TPR, 0);
}
/**
* @brief Get the APIC ID of the current CPU core
*
* The APIC ID is stored in bits 24-31 of the APIC ID register. This is a
* unique identifier for each Local APIC (and thus each CPU core).
*
* @return 8-bit APIC ID
* @note APIC IDs may not be sequential (e.g., 0, 2, 4, 6 on hyperthreaded systems)
*/
uint8_t APIC::getId() const {
uint32_t idReg = read(APIC_REG_ID);
return (idReg >> 24) & 0xFF;
}
/**
* @brief Send End-Of-Interrupt signal to acknowledge interrupt completion
*
* After handling an interrupt that came through the APIC, the interrupt handler
* must send an EOI to inform the APIC that the interrupt has been processed.
* This allows the APIC to deliver the next interrupt if one is pending.
*
* Writing any value (typically 0) to the EOI register sends the EOI.
*/
void APIC::sendEOI() {
write(APIC_REG_EOI, 0);
}
/**
* @brief Send an Inter-Processor Interrupt (IPI) to another CPU core
*
* IPIs are used for:
* - Starting Application Processors (APs) during SMP initialization (INIT + SIPI)
* - Sending signals or notifications to other cores
* - TLB shootdowns when changing page tables
* - Requesting other cores to perform specific actions
*
* The IPI is sent using the Interrupt Command Register (ICR), which consists of
* two 32-bit registers (high and low). The high register contains the destination
* APIC ID, and the low register contains the delivery mode, vector, and control flags.
*
* @param destApicId APIC ID of the destination CPU core
* @param vector Interrupt vector number (or page number for SIPI)
* @param deliveryMode Delivery mode (INIT, SIPI, fixed, etc.)
*
* @note This function waits for any pending IPI to complete before sending a new one
*/
void APIC::sendIPI(uint8_t destApicId, uint8_t vector, uint32_t deliveryMode) {
// Wait for previous IPI to complete
while (read(APIC_REG_ICR_LOW) & (1 << 12)) {

View File

@@ -1,8 +1,9 @@
/*
* MetalOS Kernel - Global Descriptor Table (GDT)
/**
* @file gdt.cpp
* @brief Implementation of Global Descriptor Table (GDT) manager
*
* Minimal GDT setup for x86_64 long mode
* Only what's needed for our single-app OS
* The GDT is required by x86-64 processors even though segmentation is largely
* disabled in 64-bit mode. It defines code and data segments with privilege levels.
*/
#include "kernel/gdt.h"
@@ -10,12 +11,40 @@
// Load GDT (assembly)
extern "C" void gdt_flush(uint64_t);
// GDT class implementation
/* GDT class implementation */
/**
* @brief Constructor - initializes GDT pointer structure
*
* Sets up the GDTR (GDT Register) structure that will be loaded into the CPU.
* The limit is the size of the GDT minus 1, and the base is the memory address.
*/
GDT::GDT() {
gdtPtr.limit = (sizeof(gdt_entry_t) * 5) - 1;
gdtPtr.base = (uint64_t)&entries;
}
/**
* @brief Set a GDT entry with specified parameters
*
* Fills in all fields of a GDT entry. In 64-bit mode, the base and limit are
* largely ignored, but the access flags (privilege level, executable) are enforced.
*
* @param num Entry index (0-4)
* @param base Base address (mostly ignored in 64-bit mode)
* @param limit Segment limit (mostly ignored in 64-bit mode)
* @param access Access byte containing:
* - Bit 7: Present (must be 1 for valid segment)
* - Bits 5-6: DPL (Descriptor Privilege Level): 0=kernel, 3=user
* - Bit 4: Descriptor type (1 for code/data)
* - Bit 3: Executable (1 for code, 0 for data)
* - Bit 1: Readable/Writable
* @param gran Granularity byte containing:
* - Bit 7: Granularity (1=4KB blocks, 0=1 byte blocks)
* - Bit 6: Size (1=32-bit, 0=16-bit; for 64-bit use access flags)
* - Bit 5: Long mode (1=64-bit code segment)
* - Bits 0-3: Upper 4 bits of limit
*/
void GDT::setGate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) {
entries[num].base_low = (base & 0xFFFF);
entries[num].base_middle = (base >> 16) & 0xFF;
@@ -27,6 +56,30 @@ void GDT::setGate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_
entries[num].access = access;
}
/**
* @brief Initialize the GDT with required segments and load it
*
* Sets up a minimal GDT with 5 entries:
* 0. Null descriptor (required by CPU, must be all zeros)
* 1. Kernel code segment (CPL 0, 64-bit, executable, readable)
* 2. Kernel data segment (CPL 0, 64-bit, writable)
* 3. User code segment (CPL 3, 64-bit, executable, readable)
* 4. User data segment (CPL 3, 64-bit, writable)
*
* After setting up the entries, calls gdt_flush() assembly function to:
* - Load GDTR using LGDT instruction
* - Reload segment registers with new selectors
*
* Access byte values:
* - 0x9A = 10011010 = Present, Ring 0, Code, Executable, Readable
* - 0x92 = 10010010 = Present, Ring 0, Data, Writable
* - 0xFA = 11111010 = Present, Ring 3, Code, Executable, Readable
* - 0xF2 = 11110010 = Present, Ring 3, Data, Writable
*
* Granularity values:
* - 0xA0 = 10100000 = Long mode (64-bit)
* - 0xC0 = 11000000 = 32-bit mode (for data segments in 64-bit mode)
*/
void GDT::init() {
// Null descriptor
setGate(0, 0, 0, 0, 0);

View File

@@ -1,8 +1,9 @@
/*
* MetalOS Kernel - Interrupt Handling
/**
* @file interrupts.cpp
* @brief Implementation of interrupt descriptor table and interrupt handling
*
* Minimal IDT and interrupt handlers
* Supports both PIC (legacy) and APIC (multicore) modes
* Manages CPU exceptions and hardware interrupts through the IDT.
* Supports both legacy PIC and modern APIC interrupt controllers.
*/
#include "kernel/interrupts.h"
@@ -10,7 +11,11 @@
#include "kernel/smp.h"
#include "kernel/apic.h"
// I/O port access functions
/**
* @brief Write a byte to an I/O port
* @param port I/O port address
* @param value Byte value to write
*/
static inline void outb(uint16_t port, uint8_t value) {
__asm__ volatile("outb %0, %1" : : "a"(value), "Nd"(port));
}
@@ -34,12 +39,34 @@ extern "C" {
void irq0(void); void irq1(void);
}
// InterruptManager class implementation
/* InterruptManager class implementation */
/**
* @brief Constructor - initializes IDT pointer structure
*/
InterruptManager::InterruptManager() {
idtPtr.limit = (sizeof(idt_entry_t) * 256) - 1;
idtPtr.base = (uint64_t)&idt;
}
/**
* @brief Set an IDT entry to point to an interrupt handler
*
* In 64-bit mode, IDT entries are 16 bytes and contain:
* - 64-bit handler address (split across three fields)
* - 16-bit code segment selector
* - Type and attributes (present, DPL, gate type)
* - IST (Interrupt Stack Table) offset (usually 0)
*
* @param num Interrupt vector number (0-255)
* @param handler Address of interrupt handler function
* @param selector Code segment selector (0x08 for kernel code)
* @param flags Type and attribute byte:
* - Bit 7: Present (1)
* - Bits 5-6: DPL (0 for kernel)
* - Bits 0-4: Gate type (0xE for interrupt gate)
* Common value: 0x8E = Present, DPL=0, Interrupt Gate
*/
void InterruptManager::setGate(uint8_t num, uint64_t handler, uint16_t selector, uint8_t flags) {
idt[num].offset_low = handler & 0xFFFF;
idt[num].offset_mid = (handler >> 16) & 0xFFFF;
@@ -50,6 +77,26 @@ void InterruptManager::setGate(uint8_t num, uint64_t handler, uint16_t selector,
idt[num].zero = 0;
}
/**
* @brief Remap the 8259 PIC to avoid conflicts with CPU exceptions
*
* By default, the PIC uses IRQ vectors 0-15, which overlap with CPU exception
* vectors 0-31. This causes confusion when a hardware interrupt has the same
* vector as a CPU exception (e.g., IRQ 8 vs Double Fault exception 8).
*
* We remap the PIC so that:
* - Master PIC (IRQ 0-7) → vectors 32-39
* - Slave PIC (IRQ 8-15) → vectors 40-47
*
* The remapping process uses ICW (Initialization Command Words):
* - ICW1: Start initialization (0x11 = ICW4 needed, cascade mode)
* - ICW2: Set vector offset (0x20 for master, 0x28 for slave)
* - ICW3: Set up cascade (master: slave on IRQ2, slave: cascade identity)
* - ICW4: Set 8086 mode
*
* After remapping, all IRQs are masked (disabled) initially. Individual IRQs
* must be explicitly unmasked to receive interrupts.
*/
void InterruptManager::remapPIC() {
// ICW1: Initialize PIC
outb(PIC1_COMMAND, 0x11);
@@ -72,6 +119,30 @@ void InterruptManager::remapPIC() {
outb(PIC2_DATA, 0xFF);
}
/**
* @brief Initialize the IDT and enable interrupts
*
* This function performs complete interrupt subsystem initialization:
* 1. Clear all 256 IDT entries
* 2. Install exception handlers (ISR 0-31) for CPU exceptions
* 3. Remap the PIC to avoid conflicts
* 4. Install IRQ handlers (32-47) for hardware interrupts
* 5. Load IDT using LIDT instruction
* 6. Enable interrupts using STI instruction
*
* CPU exceptions (0-31) include:
* - 0: Divide by zero
* - 6: Invalid opcode
* - 13: General protection fault
* - 14: Page fault
* etc.
*
* Hardware IRQs (32-47) include:
* - 32 (IRQ 0): Timer
* - 33 (IRQ 1): Keyboard
* - 44 (IRQ 12): PS/2 Mouse
* etc.
*/
void InterruptManager::init() {
// Clear IDT
for (int i = 0; i < 256; i++) {
@@ -126,6 +197,25 @@ void InterruptManager::init() {
__asm__ volatile("sti");
}
/**
* @brief Main interrupt handler dispatcher
*
* This function is called from the assembly interrupt stubs (ISRs/IRQs).
* It receives the saved CPU state and dispatches to specific handlers
* based on the interrupt number.
*
* Process:
* 1. Check interrupt number
* 2. Call specific handler if needed (e.g., timer for IRQ 0)
* 3. Send End-Of-Interrupt signal to PIC or APIC
*
* For hardware IRQs (32-47):
* - Check if using APIC (multicore) or PIC (legacy)
* - Send EOI to appropriate controller
* - For slave PIC IRQs (40-47), must send EOI to both PICs
*
* @param regs Pointer to saved CPU register state
*/
void InterruptManager::handleInterrupt(registers_t* regs) {
// Handle specific interrupts
if (regs->int_no == 32) {

View File

@@ -1,3 +1,26 @@
/**
* @file main.cpp
* @brief MetalOS Kernel Main Entry Point
*
* This is the heart of MetalOS - an extremely minimalist kernel designed to run
* a single application (QT6 Hello World). The kernel provides only the essential
* hardware initialization needed to run the application.
*
* Design Philosophy:
* - No scheduler: Single application, always running
* - No process management: One process only
* - No complex memory management: Simple bump allocator
* - No filesystem: Application embedded in boot image
* - Multicore support: All cores initialized for future parallel processing
*
* Boot sequence:
* 1. UEFI bootloader loads kernel and provides boot information
* 2. Kernel initializes hardware (GDT, IDT, memory, timer, PCI, SMP)
* 3. Kernel will eventually jump directly to the application
*
* Target size: < 150 KB (achieved through extreme minimalism)
*/
/*
* MetalOS Kernel - Main Entry Point
*
@@ -16,12 +39,60 @@
#include "kernel/timer.h"
#include "kernel/smp.h"
/*
* Kernel main entry point
* Called by bootloader with boot information
/**
* @brief Kernel main entry point - called by bootloader
*
* This is the root-level function that hands off to C++ classes
* for hardware initialization and system management.
* This is the first C++ function executed after the bootloader transfers control.
* It receives boot information from UEFI and performs minimal hardware initialization.
*
* Initialization sequence:
*
* 1. GDT (Global Descriptor Table):
* - Required for x86-64 segmentation and privilege levels
* - Sets up kernel/user code and data segments
*
* 2. IDT (Interrupt Descriptor Table):
* - Sets up interrupt and exception handlers
* - Remaps PIC to avoid conflicts with CPU exceptions
* - Enables hardware interrupts
*
* 3. Physical Memory Manager:
* - Initializes page bitmap for 4KB page allocation
* - Currently assumes 128MB at 16MB physical address
* - TODO: Parse UEFI memory map for proper detection
*
* 4. Kernel Heap:
* - Allocates 1MB (256 pages) for kernel dynamic allocation
* - Uses simple bump allocator (no free() support)
*
* 5. Timer (PIT):
* - Programs 8254 PIT for 1000 Hz (1ms ticks)
* - Used for timekeeping and delays
*
* 6. PCI Bus:
* - Enumerates all PCI devices
* - Discovers GPU and other hardware
* - Stores device information for later use
*
* 7. SMP (Multi-Processing):
* - Initializes Local APIC on BSP
* - Starts all available Application Processor cores
* - Currently APs idle; only BSP runs application
*
* After initialization, the kernel will eventually:
* - Initialize GPU for framebuffer graphics
* - Set up minimal input (PS/2 keyboard/mouse or USB)
* - Jump directly to QT6 application entry point
*
* For now, it enters an infinite halt loop waiting for implementation.
*
* @param boot_info Pointer to boot information structure from UEFI bootloader containing:
* - Framebuffer information (base, width, height, pitch, bpp)
* - Kernel location and size
* - ACPI RSDP pointer
* - UEFI memory map
*
* @note This function should never return
*/
extern "C" void kernel_main(BootInfo* boot_info) {
// Initialize GDT (Global Descriptor Table) - using GDT class

View File

@@ -1,8 +1,10 @@
/*
* MetalOS Kernel - Memory Management
/**
* @file memory.cpp
* @brief Implementation of physical memory manager and kernel heap allocator
*
* Simple physical memory manager and heap allocator
* Minimal implementation for single-app OS
* Provides two memory management subsystems:
* 1. Physical Memory Manager (PMM): Manages 4KB pages using a bitmap
* 2. Heap Allocator: Simple bump allocator for kernel dynamic allocation
*/
#include "kernel/memory.h"
@@ -10,7 +12,11 @@
// Physical memory bitmap constants
#define BITMAP_SIZE 32768 // Supports up to 128MB with 4KB pages
// PhysicalMemoryManager class implementation
/* PhysicalMemoryManager class implementation */
/**
* @brief Constructor - initializes all fields and clears bitmap
*/
PhysicalMemoryManager::PhysicalMemoryManager()
: totalPages(0), usedPages(0) {
for (uint64_t i = 0; i < BITMAP_SIZE; i++) {
@@ -18,6 +24,20 @@ PhysicalMemoryManager::PhysicalMemoryManager()
}
}
/**
* @brief Initialize the physical memory manager
*
* Currently uses a simplified approach:
* - Assumes 128MB of usable RAM starting at physical address 16MB (0x01000000)
* - Clears the entire page bitmap to mark all pages as free
* - TODO: Parse the UEFI memory map from bootInfo to properly detect available memory
*
* The 16MB starting address is chosen to avoid:
* - First 1MB: Legacy BIOS area, video memory, etc.
* - 1MB-16MB: Kernel code, boot structures, and reserved areas
*
* @param bootInfo Boot information structure (currently unused, TODO: parse memory map)
*/
void PhysicalMemoryManager::init(BootInfo* bootInfo) {
(void)bootInfo; // TODO: Parse UEFI memory map
@@ -32,6 +52,21 @@ void PhysicalMemoryManager::init(BootInfo* bootInfo) {
usedPages = 0;
}
/**
* @brief Allocate a single 4KB physical memory page
*
* Uses a simple first-fit algorithm:
* 1. Scan the bitmap from the beginning
* 2. Find the first page where the corresponding bit is 0 (free)
* 3. Set the bit to 1 (allocated)
* 4. Calculate and return the physical address
*
* Each bit in the bitmap represents one 4KB page:
* - Byte N, Bit M represents page (N*8 + M)
* - Physical address = 0x01000000 + (page_index * 4096)
*
* @return Physical address of allocated page, or nullptr if out of memory
*/
void* PhysicalMemoryManager::allocPage() {
// Find first free page in bitmap
for (uint64_t i = 0; i < totalPages; i++) {
@@ -53,6 +88,15 @@ void* PhysicalMemoryManager::allocPage() {
return nullptr;
}
/**
* @brief Free a previously allocated physical memory page
*
* Calculates the page index from the physical address and clears the
* corresponding bit in the bitmap to mark the page as free.
*
* @param page Physical address of page to free
* @note Does nothing if address is invalid (< base or >= limit)
*/
void PhysicalMemoryManager::freePage(void* page) {
uint64_t addr = (uint64_t)page;
@@ -71,24 +115,65 @@ void PhysicalMemoryManager::freePage(void* page) {
usedPages--;
}
/**
* @brief Get total memory managed by PMM in bytes
* @return Total memory size (totalPages * PAGE_SIZE)
*/
uint64_t PhysicalMemoryManager::getTotalMemory() const {
return totalPages * PAGE_SIZE;
}
/**
* @brief Get free memory available in bytes
* @return Free memory size ((totalPages - usedPages) * PAGE_SIZE)
*/
uint64_t PhysicalMemoryManager::getFreeMemory() const {
return (totalPages - usedPages) * PAGE_SIZE;
}
// HeapAllocator class implementation
/* HeapAllocator class implementation */
/**
* @brief Constructor - initializes all pointers to null
*/
HeapAllocator::HeapAllocator()
: heapStart(nullptr), heapCurrent(nullptr), heapEnd(nullptr) {}
/**
* @brief Initialize heap with a pre-allocated memory region
*
* The heap operates on a contiguous region of memory. The heapCurrent pointer
* starts at the beginning and moves forward with each allocation.
*
* @param start Starting address of heap region (obtained from PMM)
* @param size Size of heap region in bytes (e.g., 1MB = 256 pages * 4KB)
*/
void HeapAllocator::init(void* start, size_t size) {
heapStart = (uint8_t*)start;
heapCurrent = heapStart;
heapEnd = heapStart + size;
}
/**
* @brief Allocate memory from the heap (bump allocator)
*
* This is a "bump" or "arena" allocator - the simplest possible allocator.
* It just moves the current pointer forward by the requested size.
*
* Process:
* 1. Round size up to 16-byte boundary for alignment
* 2. Check if enough space remains in heap
* 3. Save current pointer as return value
* 4. Move current pointer forward by aligned size
*
* Alignment to 16 bytes ensures:
* - Compatibility with SSE/AVX instructions (require 16-byte alignment)
* - Better cache line utilization
* - Prevents unaligned access penalties
*
* @param size Number of bytes to allocate
* @return Pointer to allocated memory, or nullptr if out of heap space
*/
void* HeapAllocator::alloc(size_t size) {
if (!heapStart) {
return nullptr;
@@ -107,6 +192,15 @@ void* HeapAllocator::alloc(size_t size) {
return ptr;
}
/**
* @brief Allocate and zero-initialize array memory
*
* Equivalent to alloc(num * size) followed by memset to zero.
*
* @param num Number of elements
* @param size Size of each element in bytes
* @return Pointer to allocated and zeroed memory, or nullptr if out of space
*/
void* HeapAllocator::calloc(size_t num, size_t size) {
size_t total = num * size;
void* ptr = alloc(total);
@@ -118,13 +212,35 @@ void* HeapAllocator::calloc(size_t num, size_t size) {
return ptr;
}
/**
* @brief Free memory (no-op in bump allocator)
*
* Bump allocators cannot free individual allocations. The entire heap
* can only be reset at once. For a simple single-application OS, this
* limitation is acceptable.
*
* @param ptr Pointer to memory (ignored)
* @todo Replace with proper allocator if individual free() is needed
*/
void HeapAllocator::free(void* ptr) {
(void)ptr;
// TODO: Implement proper free with a real allocator
// For now, bump allocator doesn't support freeing
}
// Memory utility functions
/* Memory utility functions */
/**
* @brief Fill memory with a constant byte value
*
* Simple byte-by-byte memset implementation. Not optimized for large blocks,
* but sufficient for kernel use with small structures and buffers.
*
* @param dest Pointer to memory block to fill
* @param val Value to set (converted to unsigned char)
* @param count Number of bytes to set
* @return Pointer to dest
*/
void* memset(void* dest, int val, size_t count) {
uint8_t* d = (uint8_t*)dest;
uint8_t v = (uint8_t)val;
@@ -136,6 +252,17 @@ void* memset(void* dest, int val, size_t count) {
return dest;
}
/**
* @brief Copy memory from source to destination
*
* Simple byte-by-byte memcpy implementation. Memory areas must not overlap.
*
* @param dest Pointer to destination buffer
* @param src Pointer to source buffer
* @param count Number of bytes to copy
* @return Pointer to dest
* @warning Memory regions must not overlap (use memmove if they might)
*/
void* memcpy(void* dest, const void* src, size_t count) {
uint8_t* d = (uint8_t*)dest;
const uint8_t* s = (const uint8_t*)src;
@@ -147,6 +274,17 @@ void* memcpy(void* dest, const void* src, size_t count) {
return dest;
}
/**
* @brief Compare two memory blocks
*
* Compares memory byte-by-byte until a difference is found or count bytes
* have been compared.
*
* @param s1 Pointer to first memory block
* @param s2 Pointer to second memory block
* @param count Number of bytes to compare
* @return 0 if equal, negative if s1 < s2, positive if s1 > s2
*/
int memcmp(const void* s1, const void* s2, size_t count) {
const uint8_t* a = (const uint8_t*)s1;
const uint8_t* b = (const uint8_t*)s2;

View File

@@ -1,27 +1,69 @@
/*
* MetalOS Kernel - PCI Bus Support
/**
* @file pci.cpp
* @brief Implementation of PCI bus enumeration and device management
*
* Minimal PCI enumeration and configuration
* Only what's needed to find and initialize the GPU
* PCI (Peripheral Component Interconnect) is the standard bus for connecting
* hardware devices. This implementation scans the PCI bus to discover devices
* and provides functions to configure them.
*/
#include "kernel/pci.h"
#include "kernel/memory.h"
// I/O port access functions
/**
* @brief Write a 32-bit value to an I/O port
* @param port I/O port address
* @param value 32-bit value to write
*/
static inline void outl(uint16_t port, uint32_t value) {
__asm__ volatile("outl %0, %1" : : "a"(value), "Nd"(port));
}
/**
* @brief Read a 32-bit value from an I/O port
* @param port I/O port address
* @return 32-bit value read from port
*/
static inline uint32_t inl(uint16_t port) {
uint32_t value;
__asm__ volatile("inl %1, %0" : "=a"(value) : "Nd"(port));
return value;
}
// PCIManager class implementation
/* PCIManager class implementation */
/**
* @brief Constructor - initializes device count to zero
*/
PCIManager::PCIManager() : deviceCount(0) {}
/**
* @brief Read a 32-bit value from PCI configuration space
*
* PCI configuration space is accessed through two I/O ports:
* - 0xCF8 (CONFIG_ADDRESS): Write the address of config register to read
* - 0xCFC (CONFIG_DATA): Read the 32-bit value from that register
*
* The address format (32 bits):
* - Bit 31: Enable bit (must be 1)
* - Bits 16-23: Bus number (0-255)
* - Bits 11-15: Device number (0-31)
* - Bits 8-10: Function number (0-7)
* - Bits 0-7: Register offset (4-byte aligned, bits 0-1 ignored)
*
* Each PCI device has 256 bytes of configuration space containing:
* - Device identification (vendor/device ID at offset 0x00)
* - Command/status registers (offset 0x04)
* - Class code (offset 0x08)
* - BARs (Base Address Registers at offsets 0x10-0x24)
* - Interrupt configuration
*
* @param bus Bus number (0-255)
* @param device Device number on bus (0-31)
* @param function Function number within device (0-7)
* @param offset Register offset (must be 4-byte aligned)
* @return 32-bit configuration register value
*/
uint32_t PCIManager::readConfig(uint8_t bus, uint8_t device, uint8_t function, uint8_t offset) {
uint32_t address = (uint32_t)(
((uint32_t)bus << 16) |
@@ -35,6 +77,21 @@ uint32_t PCIManager::readConfig(uint8_t bus, uint8_t device, uint8_t function, u
return inl(PCI_CONFIG_DATA);
}
/**
* @brief Write a 32-bit value to PCI configuration space
*
* Similar to readConfig, but writes a value to the specified register.
* Used for device configuration, such as:
* - Enabling bus mastering
* - Enabling memory/IO space access
* - Configuring interrupt lines
*
* @param bus Bus number
* @param device Device number
* @param function Function number
* @param offset Register offset (4-byte aligned)
* @param value 32-bit value to write
*/
void PCIManager::writeConfig(uint8_t bus, uint8_t device, uint8_t function, uint8_t offset, uint32_t value) {
uint32_t address = (uint32_t)(
((uint32_t)bus << 16) |
@@ -48,6 +105,26 @@ void PCIManager::writeConfig(uint8_t bus, uint8_t device, uint8_t function, uint
outl(PCI_CONFIG_DATA, value);
}
/**
* @brief Probe a specific PCI device/function and add to device list
*
* Reads device information from PCI configuration space and stores it:
* - Vendor ID and Device ID (for identification)
* - Class code, subclass, prog_if (device type)
* - Revision ID
* - All 6 Base Address Registers (BARs)
*
* BARs specify memory or I/O regions used by the device:
* - Bit 0: 0=memory BAR, 1=I/O BAR
* - For memory BARs:
* - Bits 1-2: Type (00=32-bit, 10=64-bit)
* - Bit 3: Prefetchable
* - Bits 4-31: Base address (4KB aligned)
*
* @param bus Bus number
* @param device Device number
* @param function Function number
*/
void PCIManager::probeDevice(uint8_t bus, uint8_t device, uint8_t function) {
uint32_t vendorDevice = readConfig(bus, device, function, 0x00);
uint16_t vendor_id = vendorDevice & 0xFFFF;
@@ -85,6 +162,21 @@ void PCIManager::probeDevice(uint8_t bus, uint8_t device, uint8_t function) {
}
}
/**
* @brief Initialize PCI subsystem by scanning all buses
*
* Performs a complete scan of the PCI bus hierarchy:
* - Iterates through all 256 possible buses
* - For each bus, checks all 32 device slots
* - For each device, checks if it's multi-function
* - If multi-function, scans all 8 functions
*
* A device exists if its vendor ID is not 0xFFFF. The header type
* register (offset 0x0C) has bit 7 set for multi-function devices.
*
* This approach is brute-force but simple and reliable. More sophisticated
* implementations would parse ACPI tables to find PCI buses.
*/
void PCIManager::init() {
deviceCount = 0;
@@ -114,6 +206,18 @@ void PCIManager::init() {
}
}
/**
* @brief Find a PCI device by vendor and device ID
*
* Searches through the list of discovered devices for a match.
* Useful for finding specific hardware, e.g.:
* - AMD RX 6600: vendor=0x1002, device=0x73FF
* - Intel NIC: vendor=0x8086, device=various
*
* @param vendor_id Vendor identifier (e.g., 0x1002 for AMD)
* @param device_id Device identifier (specific model)
* @return Pointer to PCIDevice if found, nullptr otherwise
*/
PCIDevice* PCIManager::findDevice(uint16_t vendor_id, uint16_t device_id) {
for (uint32_t i = 0; i < deviceCount; i++) {
if (devices[i].vendor_id == vendor_id && devices[i].device_id == device_id) {
@@ -123,6 +227,21 @@ PCIDevice* PCIManager::findDevice(uint16_t vendor_id, uint16_t device_id) {
return nullptr;
}
/**
* @brief Enable bus mastering for a PCI device
*
* Bus mastering allows a device to perform DMA (Direct Memory Access) -
* reading and writing system memory without CPU involvement. This is
* essential for high-performance devices like GPUs and network cards.
*
* The command register (offset 0x04) contains control bits:
* - Bit 0: I/O Space Enable
* - Bit 1: Memory Space Enable
* - Bit 2: Bus Master Enable ← We set this bit
* - Bit 10: Interrupt Disable
*
* @param dev Pointer to PCI device structure
*/
void PCIManager::enableBusMastering(PCIDevice* dev) {
if (!dev) return;

View File

@@ -1,8 +1,9 @@
/*
* MetalOS Kernel - SMP (Symmetric Multi-Processing) Support
/**
* @file smp.cpp
* @brief Implementation of SMP (Symmetric Multi-Processing) initialization
*
* Basic multicore support for better performance
* Initializes Application Processors (APs) using SIPI protocol
* SMP support allows the OS to use multiple CPU cores. This involves starting
* Application Processors (APs) using the INIT-SIPI-SIPI sequence defined by Intel.
*/
#include "kernel/smp.h"
@@ -21,7 +22,14 @@ extern "C" {
void ap_trampoline_end(void);
}
// SMPManager class implementation
/* SMPManager class implementation */
/**
* @brief Constructor - initializes BSP (Bootstrap Processor) as CPU 0
*
* The BSP is the first CPU core that starts when the system boots.
* It's responsible for initializing the system and starting other cores (APs).
*/
SMPManager::SMPManager() : cpuCount(1), smpEnabled(false) {
// Initialize BSP
cpuInfo[0].cpu_id = BSP_CPU_ID;
@@ -30,6 +38,15 @@ SMPManager::SMPManager() : cpuCount(1), smpEnabled(false) {
cpuInfo[0].kernel_stack = 0;
}
/**
* @brief Get the logical CPU ID of the currently executing core
*
* Uses the Local APIC ID to determine which CPU is running this code.
* This is important in multicore systems where each core may be executing
* kernel code simultaneously.
*
* @return Logical CPU ID (0 for BSP, 1+ for APs)
*/
uint8_t SMPManager::getCurrentCPU() const {
if (!smpEnabled) {
return BSP_CPU_ID;
@@ -47,6 +64,16 @@ uint8_t SMPManager::getCurrentCPU() const {
return BSP_CPU_ID;
}
/**
* @brief Initialize CPU information structure
*
* Sets up the per-CPU data structure with initial values.
* Each CPU has a logical ID (sequential: 0, 1, 2...) and a physical
* APIC ID (may not be sequential, e.g., 0, 2, 4, 6...).
*
* @param cpuId Logical CPU ID (0-15)
* @param apicId Physical APIC ID
*/
void SMPManager::initCPU(uint8_t cpuId, uint8_t apicId) {
if (cpuId >= MAX_CPUS) return;
@@ -62,6 +89,17 @@ void SMPManager::markCPUOnline(uint8_t cpuId) {
}
}
/**
* @brief Busy-wait delay for timing during AP startup
*
* This is an approximate delay using a busy loop. Not precise, but sufficient
* for the timing requirements of the INIT-SIPI-SIPI sequence:
* - 10ms delay after INIT
* - 200μs delay after each SIPI
*
* @param microseconds Delay duration in microseconds (approximate)
* @note Uses PAUSE instruction to improve performance during busy-wait
*/
void SMPManager::delay(uint32_t microseconds) {
// Approximate delay (not precise)
for (volatile uint32_t i = 0; i < microseconds * 100; i++) {
@@ -69,6 +107,30 @@ void SMPManager::delay(uint32_t microseconds) {
}
}
/**
* @brief Start an Application Processor using INIT-SIPI-SIPI sequence
*
* The Intel-specified AP startup sequence:
* 1. Send INIT IPI to reset the AP to a known state (16-bit real mode)
* 2. Wait 10ms for INIT to complete
* 3. Send first SIPI with vector = page number of trampoline code
* 4. Wait 200μs
* 5. Send second SIPI (per Intel spec for compatibility)
* 6. Wait 200μs
* 7. Poll for AP to mark itself online (timeout after 1 second)
*
* The SIPI vector is the page number (4KB) where the trampoline code
* is located. For address 0x8000, vector = 0x8000 >> 12 = 0x08.
*
* The trampoline code must:
* - Be in low memory (< 1MB) accessible in real mode
* - Switch from 16-bit real mode to 64-bit long mode
* - Initialize the AP's GDT, IDT, and APIC
* - Jump to the AP entry point in the kernel
*
* @param apicId Physical APIC ID of the AP to start
* @return true if AP started successfully, false on timeout
*/
bool SMPManager::startAP(uint8_t apicId) {
// Send INIT IPI
apic_send_ipi(apicId, 0, APIC_IPI_INIT);
@@ -97,6 +159,28 @@ bool SMPManager::startAP(uint8_t apicId) {
return false;
}
/**
* @brief Initialize SMP subsystem and start all available CPU cores
*
* This function performs the following steps:
* 1. Check if Local APIC is available (required for SMP)
* 2. If no APIC, fall back to single-core mode
* 3. Initialize BSP's Local APIC
* 4. Get BSP's APIC ID
* 5. Attempt to start additional cores by sending IPIs
* 6. Set smpEnabled flag if multiple cores detected
*
* The function tries to start up to MAX_CPUS cores by probing APIC IDs
* from 0 to maxCPUsToTry. In a real system, this should be done by
* parsing the ACPI MADT (Multiple APIC Description Table) to find
* the actual APIC IDs of installed CPUs.
*
* After successful initialization:
* - All cores have initialized their Local APICs
* - All cores are marked as online
* - Each core can execute kernel code
* - Currently only BSP runs the application (APs idle)
*/
void SMPManager::init() {
// Check if APIC is available
if (!apic_is_available()) {

View File

@@ -1,20 +1,42 @@
/*
* MetalOS Kernel - Spinlock
/**
* @file spinlock.cpp
* @brief Implementation of spinlock synchronization primitive
*
* Simple spinlock implementation for multicore synchronization
* Uses x86 atomic instructions
* Spinlocks provide mutual exclusion in multicore systems using atomic operations.
*/
#include "kernel/spinlock.h"
// Spinlock class implementation
/* Spinlock class implementation */
/**
* @brief Constructor - initializes lock to unlocked state (0)
*/
Spinlock::Spinlock() : lock(0) {}
void Spinlock::init() {
lock = 0;
}
/**
* @brief Acquire the spinlock (block until available)
*
* This function uses the x86 XCHG (exchange) instruction, which is:
* - Atomic: The operation cannot be interrupted midway
* - Implicitly locked: Works correctly across multiple CPU cores
* - Sequentially consistent: No memory reordering issues
*
* The algorithm:
* 1. Atomically exchange the lock variable with 1
* 2. If the old value was 0, we got the lock (return)
* 3. If the old value was 1, lock was already held (spin)
* 4. Use PAUSE instruction while spinning to improve performance
*
* The PAUSE instruction:
* - Improves performance on hyperthreaded CPUs
* - Reduces power consumption during spin-wait
* - Prevents memory order violations in the spin loop
*/
void Spinlock::acquire() {
while (1) {
// Try to acquire lock using atomic exchange
@@ -36,6 +58,15 @@ void Spinlock::acquire() {
}
}
/**
* @brief Try to acquire the spinlock without blocking
*
* Similar to acquire(), but returns immediately if lock is already held.
* Useful when you want to try acquiring a lock but have alternative work
* to do if it's not available.
*
* @return true if lock was successfully acquired, false if already locked
*/
bool Spinlock::tryAcquire() {
uint32_t old_value;
__asm__ volatile(
@@ -48,6 +79,18 @@ bool Spinlock::tryAcquire() {
return (old_value == 0);
}
/**
* @brief Release the spinlock
*
* Simply sets the lock variable back to 0 (unlocked). The empty inline assembly
* with "memory" clobber acts as a compiler memory barrier, ensuring all previous
* stores are completed before the lock is released.
*
* This prevents the compiler from reordering memory operations across the lock
* boundary, which would violate the mutual exclusion guarantee.
*
* @note Must only be called by the CPU that currently holds the lock
*/
void Spinlock::release() {
// Memory barrier to ensure all previous stores are visible
__asm__ volatile("" ::: "memory");

View File

@@ -1,8 +1,8 @@
/*
* MetalOS Kernel - Timer Support
/**
* @file timer.cpp
* @brief Implementation of PIT (Programmable Interval Timer) manager
*
* Simple PIT (Programmable Interval Timer) support
* Used for scheduling and timing
* The PIT generates periodic timer interrupts for system timekeeping and delays.
*/
#include "kernel/timer.h"
@@ -15,20 +15,64 @@
// PIT constants
#define PIT_BASE_FREQUENCY 1193182 // Hz
// I/O port access functions
/**
* @brief Write a byte to an I/O port
*
* Uses the x86 OUT instruction to write a byte to a hardware I/O port.
*
* @param port I/O port address
* @param value Byte value to write
*/
static inline void outb(uint16_t port, uint8_t value) {
__asm__ volatile("outb %0, %1" : : "a"(value), "Nd"(port));
}
/**
* @brief Read a byte from an I/O port
*
* Uses the x86 IN instruction to read a byte from a hardware I/O port.
*
* @param port I/O port address
* @return Byte value read from port
*/
static inline uint8_t inb(uint16_t port) {
uint8_t value;
__asm__ volatile("inb %1, %0" : "=a"(value) : "Nd"(port));
return value;
}
// Timer class implementation
/* Timer class implementation */
/**
* @brief Constructor - initializes tick counter to zero
*/
Timer::Timer() : ticks(0) {}
/**
* @brief Initialize the PIT to generate interrupts at specified frequency
*
* The PIT works by counting down from a divisor value at its base frequency
* of 1.193182 MHz. When the counter reaches zero, it generates an interrupt
* and reloads the divisor.
*
* For example, to get 1000 Hz (1ms ticks):
* divisor = 1193182 / 1000 = 1193
*
* The process:
* 1. Calculate divisor from desired frequency
* 2. Send command byte to configure channel 0 in rate generator mode
* 3. Send low byte of divisor
* 4. Send high byte of divisor
* 5. Unmask IRQ0 in the PIC to enable timer interrupts
*
* Command byte 0x36 means:
* - Channel 0
* - Access mode: lobyte/hibyte
* - Mode 3: Square wave generator (rate generator)
* - Binary counter (not BCD)
*
* @param frequency Desired interrupt frequency in Hz (e.g., 1000 for 1ms ticks)
*/
void Timer::init(uint32_t frequency) {
// Calculate divisor
uint32_t divisor = PIT_BASE_FREQUENCY / frequency;
@@ -53,6 +97,16 @@ uint64_t Timer::getTicks() const {
return ticks;
}
/**
* @brief Wait for a specified number of timer ticks
*
* Calculates target tick count and uses HLT instruction to wait efficiently.
* HLT puts the CPU in a low-power state until the next interrupt arrives.
*
* @param waitTicks Number of ticks to wait
* @note Blocking function - CPU will be idle during wait
* @note At 1000 Hz, each tick is 1 millisecond
*/
void Timer::wait(uint32_t waitTicks) const {
uint64_t target = ticks + waitTicks;
while (ticks < target) {
@@ -60,6 +114,14 @@ void Timer::wait(uint32_t waitTicks) const {
}
}
/**
* @brief Handle timer interrupt (increment tick counter)
*
* This function is called from the IRQ0 interrupt handler every time
* the PIT generates an interrupt. It simply increments the tick counter.
*
* @note Must be called from interrupt context only
*/
void Timer::handleInterrupt() {
ticks++;
}