diff --git a/kernel/src/process/clone.rs b/kernel/src/process/clone.rs index 4d4a7351..5c9854c7 100644 --- a/kernel/src/process/clone.rs +++ b/kernel/src/process/clone.rs @@ -414,7 +414,7 @@ fn clone_cpu_context( tls: u64, clone_flags: CloneFlags, ) -> UserContext { - let mut child_context = *parent_context; + let mut child_context = parent_context.clone(); // The return value of child thread is zero child_context.set_syscall_ret(0); @@ -436,6 +436,10 @@ fn clone_cpu_context( child_context.set_tls_pointer(tls as usize); } + // New threads inherit the FPU state of the parent thread and + // the state is private to the thread thereafter. + child_context.fpu_state().save(); + child_context } diff --git a/kernel/src/syscall/execve.rs b/kernel/src/syscall/execve.rs index 2fe22803..a91896f5 100644 --- a/kernel/src/syscall/execve.rs +++ b/kernel/src/syscall/execve.rs @@ -1,7 +1,10 @@ // SPDX-License-Identifier: MPL-2.0 use aster_rights::WriteOp; -use ostd::{cpu::UserContext, user::UserContextApi}; +use ostd::{ + cpu::{FpuState, RawGeneralRegs, UserContext}, + user::UserContextApi, +}; use super::{constants::*, SyscallReturn}; use crate::{ @@ -131,10 +134,14 @@ fn do_execve( // set signal disposition to default process.sig_dispositions().lock().inherit(); // set cpu context to default - let default_content = UserContext::default(); - *user_context.general_regs_mut() = *default_content.general_regs(); - user_context.set_tls_pointer(default_content.tls_pointer()); - *user_context.fp_regs_mut() = *default_content.fp_regs(); + *user_context.general_regs_mut() = RawGeneralRegs::default(); + user_context.set_tls_pointer(0); + *user_context.fpu_state_mut() = FpuState::default(); + // FIXME: how to reset the FPU state correctly? Before returning to the user space, + // the kernel will call `handle_pending_signal`, which may update the CPU states so that + // when the kernel switches to the user mode, the control of the CPU will be handed over + // to the user-registered signal handlers. + user_context.fpu_state().restore(); // set new entry point user_context.set_instruction_pointer(elf_load_info.entry_point() as _); debug!("entry_point: 0x{:x}", elf_load_info.entry_point()); diff --git a/ostd/src/arch/riscv/cpu/mod.rs b/ostd/src/arch/riscv/cpu/mod.rs index 3bdc4d42..76ebc3be 100644 --- a/ostd/src/arch/riscv/cpu/mod.rs +++ b/ostd/src/arch/riscv/cpu/mod.rs @@ -12,13 +12,13 @@ pub use super::trap::GeneralRegs as RawGeneralRegs; use super::trap::{TrapFrame, UserContext as RawUserContext}; use crate::user::{ReturnReason, UserContextApi, UserContextApiInternal}; -/// Cpu context, including both general-purpose registers and floating-point registers. +/// Cpu context, including both general-purpose registers and FPU state. #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct UserContext { user_context: RawUserContext, trap: Trap, - fp_regs: (), // TODO + fpu_state: (), // TODO cpu_exception_info: CpuExceptionInfo, } @@ -38,7 +38,7 @@ impl Default for UserContext { UserContext { user_context: RawUserContext::default(), trap: Trap::Exception(Exception::Unknown), - fp_regs: (), + fpu_state: (), cpu_exception_info: CpuExceptionInfo::default(), } } @@ -77,14 +77,14 @@ impl UserContext { &self.cpu_exception_info } - /// Returns a reference to the floating point registers - pub fn fp_regs(&self) -> &() { - &self.fp_regs + /// Returns a reference to the FPU state. + pub fn fpu_state(&self) -> &() { + &self.fpu_state } - /// Returns a mutable reference to the floating point registers - pub fn fp_regs_mut(&mut self) -> &mut () { - &mut self.fp_regs + /// Returns a mutable reference to the FPU state. + pub fn fpu_state_mut(&mut self) -> &mut () { + &mut self.fpu_state } /// Sets thread-local storage pointer. diff --git a/ostd/src/arch/x86/cpu/mod.rs b/ostd/src/arch/x86/cpu/mod.rs index ecf8bd24..bb483acd 100644 --- a/ostd/src/arch/x86/cpu/mod.rs +++ b/ostd/src/arch/x86/cpu/mod.rs @@ -4,9 +4,11 @@ pub mod local; +use alloc::boxed::Box; use core::{ - arch::x86_64::{_fxrstor, _fxsave}, + arch::x86_64::{_fxrstor64, _fxsave64, _xrstor64, _xsave64}, fmt::Debug, + sync::atomic::{AtomicBool, Ordering::Relaxed}, }; use bitflags::bitflags; @@ -14,12 +16,20 @@ use cfg_if::cfg_if; use log::debug; use num_derive::FromPrimitive; use num_traits::FromPrimitive; +use spin::Once; use x86::bits64::segmentation::wrfsbase; pub use x86::cpuid; -use x86_64::registers::rflags::RFlags; +use x86_64::registers::{ + control::{Cr0, Cr0Flags}, + rflags::RFlags, + xcontrol::XCr0, +}; pub use super::trap::GeneralRegs as RawGeneralRegs; -use super::trap::{TrapFrame, UserContext as RawUserContext}; +use super::{ + trap::{TrapFrame, UserContext as RawUserContext}, + CPU_FEATURES, +}; use crate::{ task::scheduler, trap::call_irq_callback_functions, @@ -34,12 +44,12 @@ cfg_if! { } } -/// Cpu context, including both general-purpose registers and floating-point registers. -#[derive(Clone, Default, Copy, Debug)] +/// Cpu context, including both general-purpose registers and FPU state. +#[derive(Clone, Default, Debug)] #[repr(C)] pub struct UserContext { user_context: RawUserContext, - fp_regs: FpRegs, + fpu_state: FpuState, cpu_exception_info: CpuExceptionInfo, } @@ -71,14 +81,14 @@ impl UserContext { &self.cpu_exception_info } - /// Returns a reference to the floating point registers - pub fn fp_regs(&self) -> &FpRegs { - &self.fp_regs + /// Returns a reference to the FPU state. + pub fn fpu_state(&self) -> &FpuState { + &self.fpu_state } - /// Returns a mutable reference to the floating point registers - pub fn fp_regs_mut(&mut self) -> &mut FpRegs { - &mut self.fp_regs + /// Returns a mutable reference to the FPU state. + pub fn fpu_state_mut(&mut self) -> &mut FpuState { + &mut self.fpu_state } /// Sets thread-local storage pointer. @@ -385,96 +395,203 @@ cpu_context_impl_getter_setter!( [gsbase, set_gsbase] ); -/// The floating-point state of CPU. -#[derive(Clone, Copy, Debug)] -#[repr(C)] -pub struct FpRegs { - buf: FxsaveArea, - is_valid: bool, +/// The FPU state of user task. +/// +/// This could be used for saving both legacy and modern state format. +#[derive(Debug)] +pub struct FpuState { + state_area: Box, + area_size: usize, + is_valid: AtomicBool, } -impl FpRegs { - /// Creates a new instance. - /// - /// Note that a newly-created instance's floating point state is not - /// initialized, thus considered invalid (i.e., `self.is_valid() == false`). - pub fn new() -> Self { - // The buffer address requires 16bytes alignment. - Self { - buf: FxsaveArea { data: [0; 512] }, - is_valid: false, - } - } +// The legacy SSE/MMX FPU state format (as saved by `FXSAVE` and restored by the `FXRSTOR` instructions). +#[repr(C, align(16))] +#[derive(Clone, Copy, Debug)] +struct FxSaveArea { + control: u16, // x87 FPU Control Word + status: u16, // x87 FPU Status Word + tag: u16, // x87 FPU Tag Word + op: u16, // x87 FPU Last Instruction Opcode + ip: u32, // x87 FPU Instruction Pointer Offset + cs: u32, // x87 FPU Instruction Pointer Selector + dp: u32, // x87 FPU Instruction Operand (Data) Pointer Offset + ds: u32, // x87 FPU Instruction Operand (Data) Pointer Selector + mxcsr: u32, // MXCSR Register State + mxcsr_mask: u32, // MXCSR Mask + st_space: [u32; 32], // x87 FPU or MMX technology registers (ST0-ST7 or MM0-MM7, 128 bits per field) + xmm_space: [u32; 64], // XMM registers (XMM0-XMM15, 128 bits per field) + padding: [u32; 12], // Padding + reserved: [u32; 12], // Software reserved +} - /// Save CPU's current floating pointer states into this instance. - pub fn save(&mut self) { - debug!("save fpregs"); - debug!("write addr = 0x{:x}", (&mut self.buf) as *mut _ as usize); - let layout = alloc::alloc::Layout::for_value(&self.buf); - debug!("layout: {:?}", layout); - let ptr = unsafe { alloc::alloc::alloc(layout) } as usize; - debug!("ptr = 0x{:x}", ptr); +/// The modern FPU state format (as saved by the `XSAVE`` and restored by the `XRSTOR` instructions). +#[repr(C, align(64))] +#[derive(Clone, Copy, Debug)] +struct XSaveArea { + fxsave_area: FxSaveArea, + features: u64, + compaction: u64, + reserved: [u64; 6], + extended_state_area: [u8; MAX_XSAVE_AREA_SIZE - size_of::() - 64], +} + +impl XSaveArea { + fn init() -> Box { + let features = if CPU_FEATURES.get().unwrap().has_xsave() { + XCr0::read().bits() & XSTATE_MAX_FEATURES.get().unwrap() + } else { + 0 + }; + + let mut xsave_area = Box::::new_uninit(); + let ptr = xsave_area.as_mut_ptr(); + // SAFETY: it's safe to initialize the XSaveArea field then return the instance. unsafe { - _fxsave(self.buf.data.as_mut_ptr()); + core::ptr::write_bytes(ptr, 0, 1); + (*ptr).fxsave_area.control = 0x37F; + (*ptr).fxsave_area.mxcsr = 0x1F80; + (*ptr).features = features; + xsave_area.assume_init() + } + } +} + +impl FpuState { + /// Initializes a new instance. + pub fn init() -> Self { + let mut area_size = size_of::(); + if CPU_FEATURES.get().unwrap().has_xsave() { + area_size = area_size.max(*XSAVE_AREA_SIZE.get().unwrap()); + } + + Self { + state_area: XSaveArea::init(), + area_size, + is_valid: AtomicBool::new(true), } - debug!("save fpregs success"); - self.is_valid = true; } - /// Saves the floating state given by a slice of u8. - /// - /// After calling this method, the state of the instance will be considered valid. - /// - /// # Safety - /// - /// It is the caller's responsibility to ensure that the source slice contains - /// data that is in xsave/xrstor format. The slice must have a length of 512 bytes. - pub unsafe fn save_from_slice(&mut self, src: &[u8]) { - self.buf.data.copy_from_slice(src); - self.is_valid = true; - } - - /// Returns whether the instance can contains data in valid xsave/xrstor format. + /// Returns whether the instance can contains valid state. pub fn is_valid(&self) -> bool { - self.is_valid + self.is_valid.load(Relaxed) + } + + /// Save CPU's current FPU state into this instance. + pub fn save(&self) { + let mem_addr = &*self.state_area as *const _ as *mut u8; + + if CPU_FEATURES.get().unwrap().has_xsave() { + unsafe { _xsave64(mem_addr, XFEATURE_MASK_USER_RESTORE) }; + } else { + unsafe { _fxsave64(mem_addr) }; + } + + self.is_valid.store(true, Relaxed); + + debug!("Save FPU state"); + } + + /// Restores CPU's FPU state from this instance. + pub fn restore(&self) { + if !self.is_valid() { + return; + } + + let mem_addr = &*self.state_area as *const _ as *const u8; + + if CPU_FEATURES.get().unwrap().has_xsave() { + let rs_mask = XFEATURE_MASK_USER_RESTORE & XSTATE_MAX_FEATURES.get().unwrap(); + + unsafe { _xrstor64(mem_addr, rs_mask) }; + } else { + unsafe { _fxrstor64(mem_addr) }; + } + + self.is_valid.store(false, Relaxed); + + debug!("Restore FPU state"); } /// Clears the state of the instance. /// - /// This method does not reset the underlying buffer that contains the floating - /// point state; it only marks the buffer __invalid__. - pub fn clear(&mut self) { - self.is_valid = false; - } - - /// Restores CPU's CPU floating pointer states from this instance. - /// - /// # Panics - /// - /// If the current state is invalid, the method will panic. - pub fn restore(&self) { - debug!("restore fpregs"); - assert!(self.is_valid); - unsafe { _fxrstor(self.buf.data.as_ptr()) }; - debug!("restore fpregs success"); - } - - /// Returns the floating point state as a slice. - /// - /// Note that the slice may contain garbage if `self.is_valid() == false`. - pub fn as_slice(&self) -> &[u8] { - &self.buf.data + /// This method does not reset the underlying buffer that contains the + /// FPU state; it only marks the buffer __invalid__. + pub fn clear(&self) { + self.is_valid.store(false, Relaxed); } } -impl Default for FpRegs { +impl Clone for FpuState { + fn clone(&self) -> Self { + let mut state_area = XSaveArea::init(); + state_area.fxsave_area = self.state_area.fxsave_area; + state_area.features = self.state_area.features; + state_area.compaction = self.state_area.compaction; + if self.area_size > size_of::() { + let len = self.area_size - size_of::() - 64; + state_area.extended_state_area[..len] + .copy_from_slice(&self.state_area.extended_state_area[..len]); + } + + Self { + state_area, + area_size: self.area_size, + is_valid: AtomicBool::new(self.is_valid()), + } + } +} + +impl Default for FpuState { fn default() -> Self { - Self::new() + Self::init() } } -#[repr(C, align(16))] -#[derive(Debug, Clone, Copy)] -struct FxsaveArea { - data: [u8; 512], // 512 bytes +/// The XSTATE features (user & supervisor) supported by the processor. +static XSTATE_MAX_FEATURES: Once = Once::new(); + +/// Mask features which are restored when returning to user space. +/// +/// X87 | SSE | AVX | OPMASK | ZMM_HI256 | HI16_ZMM +const XFEATURE_MASK_USER_RESTORE: u64 = 0b1110_0111; + +/// The real size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. +static XSAVE_AREA_SIZE: Once = Once::new(); + +/// The max size in bytes of the XSAVE area. +const MAX_XSAVE_AREA_SIZE: usize = 4096; + +pub(super) fn enable_essential_features() { + XSTATE_MAX_FEATURES.call_once(|| { + const XSTATE_CPUID: u32 = 0x0000000d; + + // Find user xstates supported by the processor. + let res0 = cpuid::cpuid!(XSTATE_CPUID, 0); + let mut features = res0.eax as u64 + ((res0.edx as u64) << 32); + + // Find supervisor xstates supported by the processor. + let res1 = cpuid::cpuid!(XSTATE_CPUID, 1); + features |= res1.ecx as u64 + ((res1.edx as u64) << 32); + + features + }); + + XSAVE_AREA_SIZE.call_once(|| { + let cpuid = cpuid::CpuId::new(); + let size = cpuid.get_extended_state_info().unwrap().xsave_size() as usize; + debug_assert!(size <= MAX_XSAVE_AREA_SIZE); + size + }); + + if CPU_FEATURES.get().unwrap().has_fpu() { + let mut cr0 = Cr0::read(); + cr0.remove(Cr0Flags::TASK_SWITCHED | Cr0Flags::EMULATE_COPROCESSOR); + + unsafe { + Cr0::write(cr0); + // Flush out any pending x87 state. + core::arch::asm!("fninit"); + } + } } diff --git a/ostd/src/arch/x86/mod.rs b/ostd/src/arch/x86/mod.rs index 880df129..6034c31e 100644 --- a/ostd/src/arch/x86/mod.rs +++ b/ostd/src/arch/x86/mod.rs @@ -18,6 +18,8 @@ pub mod timer; pub mod trap; use cfg_if::cfg_if; +use spin::Once; +use x86::cpuid::{CpuId, FeatureInfo}; cfg_if! { if #[cfg(feature = "cvm_guest")] { @@ -59,6 +61,8 @@ pub(crate) fn init_cvm_guest() { } } +static CPU_FEATURES: Once = Once::new(); + pub(crate) fn init_on_bsp() { // SAFETY: this function is only called once on BSP. unsafe { @@ -178,6 +182,14 @@ fn has_avx512() -> bool { pub(crate) fn enable_cpu_features() { use x86_64::registers::{control::Cr4Flags, model_specific::EferFlags, xcontrol::XCr0Flags}; + + CPU_FEATURES.call_once(|| { + let cpuid = CpuId::new(); + cpuid.get_feature_info().unwrap() + }); + + cpu::enable_essential_features(); + let mut cr4 = x86_64::registers::control::Cr4::read(); cr4 |= Cr4Flags::FSGSBASE | Cr4Flags::OSXSAVE @@ -192,8 +204,6 @@ pub(crate) fn enable_cpu_features() { xcr0 |= XCr0Flags::AVX | XCr0Flags::SSE; if has_avx512() { - // TODO: Ensure proper saving and restoring of floating-point states - // to correctly support advanced instructions like AVX-512. xcr0 |= XCr0Flags::OPMASK | XCr0Flags::ZMM_HI256 | XCr0Flags::HI16_ZMM; } diff --git a/ostd/src/task/mod.rs b/ostd/src/task/mod.rs index d9c7da6c..fcac605e 100644 --- a/ostd/src/task/mod.rs +++ b/ostd/src/task/mod.rs @@ -112,6 +112,24 @@ impl Task { None } } + + /// Saves the FPU state for user task. + pub fn save_fpu_state(&self) { + let Some(user_space) = self.user_space.as_ref() else { + return; + }; + + user_space.fpu_state().save(); + } + + /// Restores the FPU state for user task. + pub fn restore_fpu_state(&self) { + let Some(user_space) = self.user_space.as_ref() else { + return; + }; + + user_space.fpu_state().restore(); + } } /// Options to create or spawn a new task. @@ -169,6 +187,8 @@ impl TaskOptions { let current_task = Task::current() .expect("no current task, it should have current task in kernel task entry"); + current_task.restore_fpu_state(); + // SAFETY: The `func` field will only be accessed by the current task in the task // context, so the data won't be accessed concurrently. let task_func = unsafe { current_task.func.get() }; diff --git a/ostd/src/task/processor.rs b/ostd/src/task/processor.rs index 114873ef..51527cc5 100644 --- a/ostd/src/task/processor.rs +++ b/ostd/src/task/processor.rs @@ -42,6 +42,8 @@ pub(super) fn switch_to_task(next_task: Arc) { let current_task_ctx_ptr = if !current_task_ptr.is_null() { // SAFETY: The current task is always alive. let current_task = unsafe { &*current_task_ptr }; + current_task.save_fpu_state(); + // Throughout this method, the task's context is alive and can be exclusively used. current_task.ctx.get() } else { @@ -90,4 +92,9 @@ pub(super) fn switch_to_task(next_task: Arc) { // See also `kernel_task_entry`. crate::arch::irq::enable_local(); + + // The `next_task` was moved into `CURRENT_TASK_PTR` above, now restore its FPU state. + if let Some(current) = Task::current() { + current.restore_fpu_state(); + } } diff --git a/ostd/src/user.rs b/ostd/src/user.rs index c516b166..16fe7fe9 100644 --- a/ostd/src/user.rs +++ b/ostd/src/user.rs @@ -4,7 +4,12 @@ //! User space. -use crate::{cpu::UserContext, mm::VmSpace, prelude::*, trap::TrapFrame}; +use crate::{ + cpu::{FpuState, UserContext}, + mm::VmSpace, + prelude::*, + trap::TrapFrame, +}; /// A user space. /// @@ -55,6 +60,11 @@ impl UserSpace { pub fn tls_pointer(&self) -> usize { self.init_ctx.tls_pointer() } + + /// Gets a reference to the FPU state. + pub fn fpu_state(&self) -> &FpuState { + self.init_ctx.fpu_state() + } } /// Specific architectures need to implement this trait. This should only used in [`UserMode`] @@ -126,7 +136,7 @@ impl<'a> UserMode<'a> { pub fn new(user_space: &'a Arc) -> Self { Self { user_space, - context: user_space.init_ctx, + context: user_space.init_ctx.clone(), } }