From 8c30b4b942c8f2df4d5409305b10654fc890f861 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Thu, 17 Apr 2025 23:29:08 +0800 Subject: [PATCH] Rewrite `trap/gdt.rs` --- ostd/src/arch/riscv/mod.rs | 4 +- ostd/src/arch/riscv/trap/mod.rs | 2 +- ostd/src/arch/x86/boot/bsp_boot.S | 8 +- ostd/src/arch/x86/boot/mod.rs | 7 +- ostd/src/arch/x86/mod.rs | 5 +- ostd/src/arch/x86/trap/gdt.rs | 176 ++++++++++++++++-------------- ostd/src/arch/x86/trap/mod.rs | 26 ++--- ostd/src/arch/x86/trap/syscall.S | 4 +- ostd/src/arch/x86/trap/syscall.rs | 8 +- ostd/src/boot/smp.rs | 4 +- 10 files changed, 134 insertions(+), 110 deletions(-) diff --git a/ostd/src/arch/riscv/mod.rs b/ostd/src/arch/riscv/mod.rs index 4360b2a95..4ebecb864 100644 --- a/ostd/src/arch/riscv/mod.rs +++ b/ostd/src/arch/riscv/mod.rs @@ -23,8 +23,8 @@ pub(crate) fn init_cvm_guest() { } pub(crate) unsafe fn late_init_on_bsp() { - // SAFETY: This function is only called once on BSP. - unsafe { trap::init(true) }; + // SAFETY: This function is called in the boot context of the BSP. + unsafe { trap::init() }; irq::init(); // SAFETY: We're on the BSP and we're ready to boot all APs. diff --git a/ostd/src/arch/riscv/trap/mod.rs b/ostd/src/arch/riscv/trap/mod.rs index 2581a87f7..9bec90ca3 100644 --- a/ostd/src/arch/riscv/trap/mod.rs +++ b/ostd/src/arch/riscv/trap/mod.rs @@ -15,7 +15,7 @@ cpu_local_cell! { } /// Initialize interrupt handling on RISC-V. -pub unsafe fn init(_on_bsp: bool) { +pub unsafe fn init() { self::trap::init(); } diff --git a/ostd/src/arch/x86/boot/bsp_boot.S b/ostd/src/arch/x86/boot/bsp_boot.S index 788212041..b9ebb4a21 100644 --- a/ostd/src/arch/x86/boot/bsp_boot.S +++ b/ostd/src/arch/x86/boot/bsp_boot.S @@ -251,10 +251,10 @@ boot_gdtr: .align 16 gdt: - .quad 0x0000000000000000 // 0: null descriptor - .quad 0x00af9a000000ffff // 8: 64-bit code segment (kernel) - .quad 0x00cf92000000ffff // 16: 64-bit data segment (kernel) - .quad 0x00cf9a000000ffff // 24: 32-bit code segment (kernel) + .quad 0 // 0: null descriptor + .quad {KCODE64} // 8: code segment (kernel, 64-bit) + .quad {KDATA} // 16: data segment (kernel) + .quad {KCODE32} // 24: code segment (kernel, 32-bit) gdt_end: // The page tables and the stack diff --git a/ostd/src/arch/x86/boot/mod.rs b/ostd/src/arch/x86/boot/mod.rs index 5af08bf85..04e2eda0c 100644 --- a/ostd/src/arch/x86/boot/mod.rs +++ b/ostd/src/arch/x86/boot/mod.rs @@ -26,5 +26,10 @@ pub mod smp; use core::arch::global_asm; -global_asm!(include_str!("bsp_boot.S")); +global_asm!( + include_str!("bsp_boot.S"), + KCODE64 = const super::trap::gdt::KCODE64, + KDATA = const super::trap::gdt::KDATA, + KCODE32 = const super::trap::gdt::KCODE32, +); global_asm!(include_str!("ap_boot.S")); diff --git a/ostd/src/arch/x86/mod.rs b/ostd/src/arch/x86/mod.rs index 44aeb73e3..aadb0214d 100644 --- a/ostd/src/arch/x86/mod.rs +++ b/ostd/src/arch/x86/mod.rs @@ -62,10 +62,11 @@ static CPU_FEATURES: Once = Once::new(); /// /// # Safety /// -/// This function must be called only once on the bootstrapping processor. +/// This function must be called only once in the boot context of the +/// bootstrapping processor. pub(crate) unsafe fn late_init_on_bsp() { // SAFETY: This function is only called once on BSP. - unsafe { trap::init(true) }; + unsafe { trap::init() }; irq::init(); kernel::acpi::init(); diff --git a/ostd/src/arch/x86/trap/gdt.rs b/ostd/src/arch/x86/trap/gdt.rs index a27baf9ff..358aa4455 100644 --- a/ostd/src/arch/x86/trap/gdt.rs +++ b/ostd/src/arch/x86/trap/gdt.rs @@ -1,30 +1,14 @@ -// SPDX-License-Identifier: MPL-2.0 OR MIT -// -// The original source code is from [trapframe-rs](https://github.com/rcore-os/trapframe-rs), -// which is released under the following license: -// -// SPDX-License-Identifier: MIT -// -// Copyright (c) 2020 - 2024 Runji Wang -// -// We make the following new changes: -// * Link TaskStateSegment to .cpu_local area. -// * Init TaskStateSegment on bsp/ap respectively. -// -// These changes are released under the following license: -// // SPDX-License-Identifier: MPL-2.0 -//! Configure Global Descriptor Table (GDT). +//! Configure the Global Descriptor Table (GDT). -use alloc::{boxed::Box, vec::Vec}; -use core::cell::SyncUnsafeCell; +use alloc::boxed::Box; use x86_64::{ instructions::tables::{lgdt, load_tss}, registers::{ model_specific::Star, - segmentation::{Segment, Segment64, CS, GS}, + segmentation::{Segment, CS}, }, structures::{ gdt::{Descriptor, SegmentSelector}, @@ -34,80 +18,112 @@ use x86_64::{ PrivilegeLevel, VirtAddr, }; -/// Init TSS & GDT. -pub unsafe fn init(on_bsp: bool) { - // Allocate stack for trap from user, set the stack top to TSS, - // so that when trap from ring3 to ring0, CPU can switch stack correctly. - let tss = if on_bsp { - init_local_tss_on_bsp() - } else { - init_local_tss_on_ap() - }; +use crate::cpu::local::CpuLocal; - let (tss0, tss1) = match Descriptor::tss_segment(tss) { +/// Initializes and loads the GDT and TSS. +/// +/// The caller should only call this method once in the boot context for each available processor. +/// This is not a safety requirement, however, because calling this method again will do nothing +/// more than load the GDT and TSS with the same contents. +/// +/// # Safety +/// +/// The caller must ensure that no preemption can occur during the method, otherwise we may +/// accidentally load a wrong GDT and TSS that actually belongs to another CPU. +pub(super) unsafe fn init() { + let tss_ptr = LOCAL_TSS.as_ptr(); + + // FIXME: The segment limit in the descriptor created by `tss_segment_unchecked` does not + // include the I/O port bitmap. + + // SAFETY: As a CPU-local variable, the TSS lives for `'static`. + let tss_desc = unsafe { Descriptor::tss_segment_unchecked(tss_ptr) }; + let (tss0, tss1) = match tss_desc { Descriptor::SystemSegment(tss0, tss1) => (tss0, tss1), _ => unreachable!(), }; - // FIXME: the segment limit assumed by x86_64 does not include the I/O port bitmap. - // Allocate new GDT with 8 entries. - // - // NOTICE: for fast syscall: - // STAR[47:32] = K_CS = K_SS - 8 - // STAR[63:48] = U_CS32 = U_SS32 - 8 = U_CS - 16 - let mut gdt = Vec::::new(); - gdt.extend([0, KCODE64, KDATA64, UCODE32, UDATA32, UCODE64, tss0, tss1].iter()); - let gdt = Vec::leak(gdt); + // The kernel CS is considered a global invariant set by the boot GDT. This method is not + // intended for switching to a new kernel CS. + assert_eq!(CS::get_reg(), KERNEL_CS); - // Load new GDT and TSS. - lgdt(&DescriptorTablePointer { - limit: gdt.len() as u16 * 8 - 1, - base: VirtAddr::new(gdt.as_ptr() as _), - }); - load_tss(SegmentSelector::new(6, PrivilegeLevel::Ring0)); - CS::set_reg(SegmentSelector::new(1, PrivilegeLevel::Ring0)); + // Allocate a new GDT with 8 entries. + let gdt = Box::new([ + 0, KCODE64, KDATA, /* UCODE32 (not used) */ 0, UDATA, UCODE64, tss0, tss1, + ]); + let gdt = &*Box::leak(gdt); + assert_eq!(gdt[KERNEL_CS.index() as usize], KCODE64); + assert_eq!(gdt[KERNEL_SS.index() as usize], KDATA); + assert_eq!(gdt[USER_CS.index() as usize], UCODE64); + assert_eq!(gdt[USER_SS.index() as usize], UDATA); - let sysret = SegmentSelector::new(3, PrivilegeLevel::Ring3).0; - let syscall = SegmentSelector::new(1, PrivilegeLevel::Ring0).0; - Star::write_raw(sysret, syscall); + // Load the new GDT. + let gdtr = DescriptorTablePointer { + limit: (core::mem::size_of_val(gdt) - 1) as u16, + base: VirtAddr::new(gdt.as_ptr().addr() as u64), + }; + // SAFETY: The GDT is valid to load because: + // - It lives for `'static`. + // - It contains correct entries at correct indexes: the kernel code/data segments, the user + // code/data segments, and the TSS segment. + // - Specifically, the TSS segment points to the CPU-local TSS of the current CPU. + unsafe { lgdt(&gdtr) }; - USER_SS = sysret + 8; - USER_CS = sysret + 16; + // Load the TSS. + let tss_sel = SegmentSelector::new(6, PrivilegeLevel::Ring0); + assert_eq!(gdt[tss_sel.index() as usize], tss0); + assert_eq!(gdt[(tss_sel.index() + 1) as usize], tss1); + // SAFETY: The selector points to the TSS descriptors in the GDT. + unsafe { load_tss(tss_sel) }; + + // Set up the selectors for the `syscall` and `sysret` instructions. + let sysret = SegmentSelector::new(3, PrivilegeLevel::Ring3); + assert_eq!(gdt[(sysret.index() + 1) as usize], UDATA); + assert_eq!(gdt[(sysret.index() + 2) as usize], UCODE64); + let syscall = SegmentSelector::new(1, PrivilegeLevel::Ring0); + assert_eq!(gdt[syscall.index() as usize], KCODE64); + assert_eq!(gdt[(syscall.index() + 1) as usize], KDATA); + // SAFETY: The selector points to correct kernel/user code/data descriptors in the GDT. + unsafe { Star::write_raw(sysret.0, syscall.0) }; } -// The linker script ensure that cpu_local_tss section is right -// at the beginning of cpu_local area, so that gsbase (offset zero) -// points to LOCAL_TSS. +// The linker script makes sure that the `.cpu_local_tss` section is at the beginning of the area +// that stores CPU-local variables. This is important because `trap.S` and `syscall.S` will assume +// this and treat the beginning of the CPU-local area as a TSS for loading and saving the kernel +// stack! +// +// No other special initialization is required because the kernel stack information is stored in +// the TSS when we start the userspace program. See `syscall.S` for details. #[link_section = ".cpu_local_tss"] -static LOCAL_TSS: SyncUnsafeCell = SyncUnsafeCell::new(TaskStateSegment::new()); +static LOCAL_TSS: CpuLocal = { + let tss = TaskStateSegment::new(); + // SAFETY: The `.cpu_local_tss` section is part of the CPU-local area. + unsafe { CpuLocal::__new(tss) } +}; -unsafe fn init_local_tss_on_bsp() -> &'static TaskStateSegment { - let tss_ptr = LOCAL_TSS.get(); +// Kernel code and data descriptors. +// +// These are the exact, unique values that satisfy the requirements of the `syscall` instruction. +// The Intel manual says: "It is the responsibility of OS software to ensure that the descriptors +// (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into +// the descriptor caches; the SYSCALL instruction does not ensure this correspondence." +pub(in crate::arch) const KCODE64: u64 = 0x00AF_9B00_0000_FFFF; +pub(in crate::arch) const KDATA: u64 = 0x00CF_9300_0000_FFFF; - let trap_stack_top = Box::leak(Box::new([0u8; 0x1000])).as_ptr() as u64 + 0x1000; - (*tss_ptr).privilege_stack_table[0] = VirtAddr::new(trap_stack_top); - &*tss_ptr -} +// A 32-bit code descriptor that is used in the boot stage only. See `boot/bsp_boot.S`. +pub(in crate::arch) const KCODE32: u64 = 0x00CF_9B00_0000_FFFF; -unsafe fn init_local_tss_on_ap() -> &'static TaskStateSegment { - let gs_base = GS::read_base().as_u64(); - let tss_ptr = gs_base as *mut TaskStateSegment; +// User code and data descriptors. +// +// These are the exact, unique values that satisfy the requirements of the `sysret` instruction. +// The Intel manual says: "It is the responsibility of OS software to ensure that the descriptors +// (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into +// the descriptor caches; the SYSRET instruction does not ensure this correspondence." +const UCODE64: u64 = 0x00AF_FB00_0000_FFFF; +const UDATA: u64 = 0x00CF_F300_0000_FFFF; - let trap_stack_top = Box::leak(Box::new([0u8; 0x1000])).as_ptr() as u64 + 0x1000; - (*tss_ptr).privilege_stack_table[0] = VirtAddr::new(trap_stack_top); - &*tss_ptr -} +const KERNEL_CS: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); +const KERNEL_SS: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); -#[no_mangle] -static mut USER_SS: u16 = 0; -#[no_mangle] -static mut USER_CS: u16 = 0; - -const KCODE64: u64 = 0x00209800_00000000; // EXECUTABLE | USER_SEGMENT | PRESENT | LONG_MODE -const UCODE64: u64 = 0x0020F800_00000000; // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT | LONG_MODE -const KDATA64: u64 = 0x00009200_00000000; // DATA_WRITABLE | USER_SEGMENT | PRESENT - -#[expect(dead_code)] -const UDATA64: u64 = 0x0000F200_00000000; // DATA_WRITABLE | USER_SEGMENT | USER_MODE | PRESENT -const UCODE32: u64 = 0x00cffa00_0000ffff; // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT -const UDATA32: u64 = 0x00cff200_0000ffff; // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT +pub(super) const USER_CS: SegmentSelector = SegmentSelector::new(5, PrivilegeLevel::Ring3); +pub(super) const USER_SS: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring3); diff --git a/ostd/src/arch/x86/trap/mod.rs b/ostd/src/arch/x86/trap/mod.rs index be45ff3de..8031ab90c 100644 --- a/ostd/src/arch/x86/trap/mod.rs +++ b/ostd/src/arch/x86/trap/mod.rs @@ -16,7 +16,7 @@ //! Handles trap. -mod gdt; +pub(super) mod gdt; mod idt; mod syscall; @@ -104,26 +104,24 @@ pub struct TrapFrame { /// Initialize interrupt handling on x86_64. /// -/// # Safety -/// /// This function will: -/// -/// - Disable interrupt. -/// - Switch to a new [GDT], extend 7 more entries from the current one. -/// - Switch to a new [TSS], `GSBASE` pointer to its base address. -/// - Switch to a new [IDT], override the current one. -/// - Enable [`syscall`] instruction. -/// - set `EFER::SYSTEM_CALL_EXTENSIONS` +/// - Switch to a new, CPU-local [GDT]. +/// - Switch to a new, CPU-local [TSS]. +/// - Switch to a new, CPU-local [IDT]. +/// - Enable the [`syscall`] instruction. /// /// [GDT]: https://wiki.osdev.org/GDT /// [IDT]: https://wiki.osdev.org/IDT /// [TSS]: https://wiki.osdev.org/Task_State_Segment /// [`syscall`]: https://www.felixcloutier.com/x86/syscall /// -#[cfg(any(target_os = "none", target_os = "uefi"))] -pub unsafe fn init(on_bsp: bool) { - x86_64::instructions::interrupts::disable(); - gdt::init(on_bsp); +/// # Safety +/// +/// This method must be called only in the boot context of each available processor. +pub unsafe fn init() { + // SAFETY: We're in the boot context, so no preemption can occur. + unsafe { gdt::init() }; + idt::init(); syscall::init(); } diff --git a/ostd/src/arch/x86/trap/syscall.S b/ostd/src/arch/x86/trap/syscall.S index 7f90bebd8..cd7206fe1 100644 --- a/ostd/src/arch/x86/trap/syscall.S +++ b/ostd/src/arch/x86/trap/syscall.S @@ -67,10 +67,10 @@ syscall_return: je sysret iret: # construct trap frame - push [USER_SS] # push ss + push {USER_SS} # push ss push [rsp - 8*8] # push rsp push [rsp + 3*8] # push rflags - push [USER_CS] # push cs + push {USER_CS} # push cs push [rsp + 4*8] # push rip iretq diff --git a/ostd/src/arch/x86/trap/syscall.rs b/ostd/src/arch/x86/trap/syscall.rs index bdf79400c..f2179548e 100644 --- a/ostd/src/arch/x86/trap/syscall.rs +++ b/ostd/src/arch/x86/trap/syscall.rs @@ -30,9 +30,13 @@ use x86_64::{ use super::UserContext; -global_asm!(include_str!("syscall.S")); +global_asm!( + include_str!("syscall.S"), + USER_CS = const super::gdt::USER_CS.0, + USER_SS = const super::gdt::USER_SS.0, +); -pub fn init() { +pub(super) fn init() { let cpuid = CpuId::new(); unsafe { // Enable `syscall` instruction. diff --git a/ostd/src/boot/smp.rs b/ostd/src/boot/smp.rs index 6fec15eb1..17e81d416 100644 --- a/ostd/src/boot/smp.rs +++ b/ostd/src/boot/smp.rs @@ -146,8 +146,8 @@ fn ap_early_entry(cpu_id: u32) -> ! { crate::arch::enable_cpu_features(); - // SAFETY: This function is only called once on this AP. - unsafe { crate::arch::trap::init(false) }; + // SAFETY: This function is called in the boot context of the AP. + unsafe { crate::arch::trap::init() }; // SAFETY: This function is only called once on this AP, after the BSP has // done the architecture-specific initialization.