From af908c29cfa1d64210c609190ee7ed436f49fde1 Mon Sep 17 00:00:00 2001 From: Chen Chengjun Date: Fri, 28 Jun 2024 09:39:51 +0800 Subject: [PATCH] Refactor the read/write operations to userspace --- kernel/aster-nix/src/prelude.rs | 2 +- kernel/aster-nix/src/thread/exception.rs | 25 ++++-- kernel/aster-nix/src/util/mod.rs | 98 +++++++++++++++++----- kernel/aster-nix/src/vm/vmar/mod.rs | 12 +-- kernel/aster-nix/src/vm/vmar/vm_mapping.rs | 4 + ostd/src/arch/x86/trap.rs | 46 ++++++++-- ostd/src/mm/space.rs | 94 +++++++++++++++++++-- 7 files changed, 234 insertions(+), 47 deletions(-) diff --git a/kernel/aster-nix/src/prelude.rs b/kernel/aster-nix/src/prelude.rs index 9eb5c056d..fa0d9c975 100644 --- a/kernel/aster-nix/src/prelude.rs +++ b/kernel/aster-nix/src/prelude.rs @@ -17,7 +17,7 @@ pub(crate) use bitflags::bitflags; pub(crate) use int_to_c_enum::TryFromInt; pub(crate) use log::{debug, error, info, log_enabled, trace, warn}; pub(crate) use ostd::{ - mm::{Vaddr, PAGE_SIZE}, + mm::{Vaddr, VmReader, VmWriter, PAGE_SIZE}, sync::{Mutex, MutexGuard, RwLock, RwMutex, SpinLock, SpinLockGuard}, }; pub(crate) use pod::Pod; diff --git a/kernel/aster-nix/src/thread/exception.rs b/kernel/aster-nix/src/thread/exception.rs index ca350aa5e..b6fa39c2f 100644 --- a/kernel/aster-nix/src/thread/exception.rs +++ b/kernel/aster-nix/src/thread/exception.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] -use ostd::cpu::*; +use ostd::{cpu::*, mm::VmSpace}; use crate::{ prelude::*, process::signal::signals::fault::FaultSignal, @@ -18,7 +18,11 @@ pub fn handle_exception(context: &UserContext) { let root_vmar = current.root_vmar(); match *exception { - PAGE_FAULT => handle_page_fault(trap_info), + PAGE_FAULT => { + if handle_page_fault(root_vmar.vm_space(), trap_info).is_err() { + generate_fault_signal(trap_info); + } + } _ => { // We current do nothing about other exceptions generate_fault_signal(trap_info); @@ -26,7 +30,11 @@ pub fn handle_exception(context: &UserContext) { } } -fn handle_page_fault(trap_info: &CpuExceptionInfo) { +/// Handles the page fault occurs in the input `VmSpace`. +pub(crate) fn handle_page_fault( + vm_space: &VmSpace, + trap_info: &CpuExceptionInfo, +) -> core::result::Result<(), ()> { const PAGE_NOT_PRESENT_ERROR_MASK: usize = 0x1 << 0; const WRITE_ACCESS_MASK: usize = 0x1 << 1; let page_fault_addr = trap_info.page_fault_addr as Vaddr; @@ -41,16 +49,23 @@ fn handle_page_fault(trap_info: &CpuExceptionInfo) { // If page is not present or due to write access, we should ask the vmar try to commit this page let current = current!(); let root_vmar = current.root_vmar(); + + debug_assert_eq!( + Arc::as_ptr(root_vmar.vm_space()), + vm_space as *const VmSpace + ); + if let Err(e) = root_vmar.handle_page_fault(page_fault_addr, not_present, write) { error!( "page fault handler failed: addr: 0x{:x}, err: {:?}", page_fault_addr, e ); - generate_fault_signal(trap_info); + return Err(()); } + Ok(()) } else { // Otherwise, the page fault cannot be handled - generate_fault_signal(trap_info); + Err(()) } } diff --git a/kernel/aster-nix/src/util/mod.rs b/kernel/aster-nix/src/util/mod.rs index ea16dbd87..e1fa5bb0a 100644 --- a/kernel/aster-nix/src/util/mod.rs +++ b/kernel/aster-nix/src/util/mod.rs @@ -3,7 +3,10 @@ use core::mem; use aster_rights::Full; -use ostd::mm::VmIo; +use ostd::{ + mm::{KernelSpace, VmIo, VmReader, VmWriter}, + task::current_task, +}; use crate::{prelude::*, vm::vmar::Vmar}; mod iovec; @@ -12,38 +15,87 @@ pub mod random; pub use iovec::{copy_iovs_from_user, IoVec}; -/// Read bytes into the `dest` buffer +/// Reads bytes into the `dest` `VmWriter` /// from the user space of the current process. -/// If successful, -/// the `dest` buffer is filled with exact `dest.len` bytes. -pub fn read_bytes_from_user(src: Vaddr, dest: &mut [u8]) -> Result<()> { - let current = current!(); - let root_vmar = current.root_vmar(); - Ok(root_vmar.read_bytes(src, dest)?) +/// +/// If the reading is completely successful, returns `Ok`. +/// Otherwise, returns `Err`. +/// +/// TODO: this API can be discarded and replaced with the API of `VmReader` +/// after replacing all related `buf` usages. +pub fn read_bytes_from_user(src: Vaddr, dest: &mut VmWriter<'_>) -> Result<()> { + let current_task = current_task().ok_or(Error::with_message( + Errno::EFAULT, + "the current task is missing", + ))?; + let user_space = current_task.user_space().ok_or(Error::with_message( + Errno::EFAULT, + "the user space is missing", + ))?; + let copy_len = dest.avail(); + + let mut user_reader = user_space.vm_space().reader(src, copy_len)?; + user_reader.read_fallible(dest).map_err(|err| err.0)?; + Ok(()) } -/// Read a value of `Pod` type +/// Reads a value of `Pod` type /// from the user space of the current process. pub fn read_val_from_user(src: Vaddr) -> Result { - let current = current!(); - let root_vmar = current.root_vmar(); - Ok(root_vmar.read_val(src)?) + let current_task = current_task().ok_or(Error::with_message( + Errno::EFAULT, + "the current task is missing", + ))?; + let user_space = current_task.user_space().ok_or(Error::with_message( + Errno::EFAULT, + "the user space is missing", + ))?; + + let mut user_reader = user_space + .vm_space() + .reader(src, core::mem::size_of::())?; + Ok(user_reader.read_val()?) } -/// Write bytes from the `src` buffer -/// to the user space of the current process. If successful, -/// the write length will be equal to `src.len`. -pub fn write_bytes_to_user(dest: Vaddr, src: &[u8]) -> Result<()> { - let current = current!(); - let root_vmar = current.root_vmar(); - Ok(root_vmar.write_bytes(dest, src)?) +/// Writes bytes from the `src` `VmReader` +/// to the user space of the current process. +/// +/// If the writing is completely successful, returns `Ok`, +/// Otherwise, returns `Err`. +/// +/// TODO: this API can be discarded and replaced with the API of `VmWriter` +/// after replacing all related `buf` usages. +pub fn write_bytes_to_user(dest: Vaddr, src: &mut VmReader<'_, KernelSpace>) -> Result<()> { + let current_task = current_task().ok_or(Error::with_message( + Errno::EFAULT, + "the current task is missing", + ))?; + let user_space = current_task.user_space().ok_or(Error::with_message( + Errno::EFAULT, + "the user space is missing", + ))?; + let copy_len = src.remain(); + + let mut user_writer = user_space.vm_space().writer(dest, copy_len)?; + user_writer.write_fallible(src).map_err(|err| err.0)?; + Ok(()) } -/// Write `val` to the user space of the current process. +/// Writes `val` to the user space of the current process. pub fn write_val_to_user(dest: Vaddr, val: &T) -> Result<()> { - let current = current!(); - let root_vmar = current.root_vmar(); - Ok(root_vmar.write_val(dest, val)?) + let current_task = current_task().ok_or(Error::with_message( + Errno::EFAULT, + "the current task is missing", + ))?; + let user_space = current_task.user_space().ok_or(Error::with_message( + Errno::EFAULT, + "the user space is missing", + ))?; + + let mut user_writer = user_space + .vm_space() + .writer(dest, core::mem::size_of::())?; + Ok(user_writer.write_val(val)?) } /// Read a C string from the user space of the current process. diff --git a/kernel/aster-nix/src/vm/vmar/mod.rs b/kernel/aster-nix/src/vm/vmar/mod.rs index 7a63e5da8..eeb603705 100644 --- a/kernel/aster-nix/src/vm/vmar/mod.rs +++ b/kernel/aster-nix/src/vm/vmar/mod.rs @@ -21,7 +21,7 @@ use self::{ vm_mapping::VmMapping, }; use super::page_fault_handler::PageFaultHandler; -use crate::{prelude::*, vm::perms::VmPerms}; +use crate::{prelude::*, thread::exception::handle_page_fault, vm::perms::VmPerms}; /// Virtual Memory Address Regions (VMARs) are a type of capability that manages /// user address spaces. @@ -165,13 +165,9 @@ impl Vmar_ { vm_mappings: BTreeMap::new(), free_regions, }; - Vmar_::new( - vmar_inner, - Arc::new(VmSpace::new()), - 0, - ROOT_VMAR_CAP_ADDR, - None, - ) + let vm_space = VmSpace::new(); + vm_space.register_page_fault_handler(handle_page_fault); + Vmar_::new(vmar_inner, Arc::new(vm_space), 0, ROOT_VMAR_CAP_ADDR, None) } fn is_root_vmar(&self) -> bool { diff --git a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs index f65b9cbdc..6119c96d6 100644 --- a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs +++ b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs @@ -752,6 +752,10 @@ impl VmarMapOptions { if self.align % PAGE_SIZE != 0 || !self.align.is_power_of_two() { return_errno_with_message!(Errno::EINVAL, "invalid align"); } + debug_assert!(self.size % self.align == 0); + if self.size % self.align != 0 { + return_errno_with_message!(Errno::EINVAL, "invalid mapping size"); + } debug_assert!(self.vmo_offset % self.align == 0); if self.vmo_offset % self.align != 0 { return_errno_with_message!(Errno::EINVAL, "invalid vmo offset"); diff --git a/ostd/src/arch/x86/trap.rs b/ostd/src/arch/x86/trap.rs index 1a0ec1772..09a8b3c10 100644 --- a/ostd/src/arch/x86/trap.rs +++ b/ostd/src/arch/x86/trap.rs @@ -10,16 +10,18 @@ use log::debug; use tdx_guest::tdcall; use trapframe::TrapFrame; +use super::ex_table::ExTable; #[cfg(feature = "intel_tdx")] use crate::arch::{cpu::VIRTUALIZATION_EXCEPTION, tdx_guest::handle_virtual_exception}; use crate::{ - cpu::{CpuException, PageFaultErrorCode, PAGE_FAULT}, + cpu::{CpuException, CpuExceptionInfo, PageFaultErrorCode, PAGE_FAULT}, cpu_local, mm::{ kspace::{KERNEL_PAGE_TABLE, LINEAR_MAPPING_BASE_VADDR, LINEAR_MAPPING_VADDR_RANGE}, page_prop::{CachePolicy, PageProperty}, - PageFlags, PrivilegedPageFlags as PrivFlags, PAGE_SIZE, + PageFlags, PrivilegedPageFlags as PrivFlags, MAX_USERSPACE_VADDR, PAGE_SIZE, }, + task::current_task, trap::call_irq_callback_functions, }; @@ -45,7 +47,14 @@ extern "sysv64" fn trap_handler(f: &mut TrapFrame) { handle_virtual_exception(f, &ve_info); } &PAGE_FAULT => { - handle_kernel_page_fault(f); + let page_fault_addr = x86_64::registers::control::Cr2::read().as_u64(); + // The actual user space implementation should be responsible + // for providing mechanism to treat the 0 virtual address. + if (0..MAX_USERSPACE_VADDR).contains(&(page_fault_addr as usize)) { + handle_user_page_fault(f, page_fault_addr); + } else { + handle_kernel_page_fault(f, page_fault_addr); + } } exception => { panic!( @@ -61,10 +70,37 @@ extern "sysv64" fn trap_handler(f: &mut TrapFrame) { } } +/// Handles page fault from user space. +fn handle_user_page_fault(f: &mut TrapFrame, page_fault_addr: u64) { + let current_task = current_task().unwrap(); + let user_space = current_task + .user_space() + .expect("the user space is missing when a page fault from the user happens."); + + let info = CpuExceptionInfo { + page_fault_addr: page_fault_addr as usize, + id: f.trap_num, + error_code: f.error_code, + }; + + let res = user_space.vm_space().handle_page_fault(&info); + // Copying bytes by bytes can recover directly + // if handling the page fault successfully. + if res.is_ok() { + return; + } + + // Use the exception table to recover to normal execution. + if let Some(addr) = ExTable::find_recovery_inst_addr(f.rip) { + f.rip = addr; + } else { + panic!("Cannot handle user page fault; Trapframe:{:#x?}.", f); + } +} + /// FIXME: this is a hack because we don't allocate kernel space for IO memory. We are currently /// using the linear mapping for IO memory. This is not a good practice. -fn handle_kernel_page_fault(f: &TrapFrame) { - let page_fault_vaddr = x86_64::registers::control::Cr2::read().as_u64(); +fn handle_kernel_page_fault(f: &TrapFrame, page_fault_vaddr: u64) { let error_code = PageFaultErrorCode::from_bits_truncate(f.error_code); debug!( "kernel page fault: address {:?}, error code {:?}", diff --git a/ostd/src/mm/space.rs b/ostd/src/mm/space.rs index 9a3556b67..548348f76 100644 --- a/ostd/src/mm/space.rs +++ b/ostd/src/mm/space.rs @@ -2,17 +2,22 @@ use core::ops::Range; +use spin::Once; + use super::{ + io::UserSpace, is_page_aligned, kspace::KERNEL_PAGE_TABLE, page_table::{PageTable, PageTableMode, UserMode}, CachePolicy, FrameVec, PageFlags, PageProperty, PagingConstsTrait, PrivilegedPageFlags, - PAGE_SIZE, + VmReader, VmWriter, PAGE_SIZE, }; use crate::{ arch::mm::{ - tlb_flush_addr_range, tlb_flush_all_excluding_global, PageTableEntry, PagingConsts, + current_page_table_paddr, tlb_flush_addr_range, tlb_flush_all_excluding_global, + PageTableEntry, PagingConsts, }, + cpu::CpuExceptionInfo, mm::{ page_table::{Cursor, PageTableQueryResult as PtQr}, Frame, MAX_USERSPACE_VADDR, @@ -26,15 +31,22 @@ use crate::{ /// A virtual memory space (`VmSpace`) can be created and assigned to a user space so that /// the virtual memory of the user space can be manipulated safely. For example, /// given an arbitrary user-space pointer, one can read and write the memory -/// location refered to by the user-space pointer without the risk of breaking the +/// location referred to by the user-space pointer without the risk of breaking the /// memory safety of the kernel space. /// /// A newly-created `VmSpace` is not backed by any physical memory pages. /// To provide memory pages for a `VmSpace`, one can allocate and map /// physical memory ([`Frame`]s) to the `VmSpace`. -#[derive(Debug)] +/// +/// A `VmSpace` can also attach a page fault handler, which will be invoked to handle +/// page faults generated from user space. +/// +/// A `VmSpace` can also attach a page fault handler, which will be invoked to handle +/// page faults generated from user space. +#[allow(clippy::type_complexity)] pub struct VmSpace { pt: PageTable, + page_fault_handler: Once core::result::Result<(), ()>>, } // Notes on TLB flushing: @@ -51,6 +63,7 @@ impl VmSpace { pub fn new() -> Self { Self { pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), + page_fault_handler: Once::new(), } } @@ -59,6 +72,27 @@ impl VmSpace { self.pt.activate(); } + pub(crate) fn handle_page_fault( + &self, + info: &CpuExceptionInfo, + ) -> core::result::Result<(), ()> { + if let Some(func) = self.page_fault_handler.get() { + return func(self, info); + } + Err(()) + } + + /// Registers the page fault handler in this `VmSpace`. + /// + /// The page fault handler of a `VmSpace` can only be initialized once. + /// If it has been initialized before, calling this method will have no effect. + pub fn register_page_fault_handler( + &self, + func: fn(&VmSpace, &CpuExceptionInfo) -> core::result::Result<(), ()>, + ) { + self.page_fault_handler.call_once(|| func); + } + /// Maps some physical memory pages into the VM space according to the given /// options, returning the address where the mapping is created. /// @@ -116,7 +150,7 @@ impl VmSpace { } /// Queries about a range of virtual memory. - /// You will get a iterator of `VmQueryResult` which contains the information of + /// You will get an iterator of `VmQueryResult` which contains the information of /// each parts of the range. pub fn query_range(&self, range: &Range) -> Result { Ok(VmQueryIter { @@ -202,12 +236,62 @@ impl VmSpace { /// read-only. And both the VM space will take handles to the same /// physical memory pages. pub fn fork_copy_on_write(&self) -> Self { + let page_fault_handler = { + let new_handler = Once::new(); + if let Some(handler) = self.page_fault_handler.get() { + new_handler.call_once(|| *handler); + } + new_handler + }; let new_space = Self { pt: self.pt.fork_copy_on_write(), + page_fault_handler, }; tlb_flush_all_excluding_global(); new_space } + + /// Creates a reader to read data from the user space of the current task. + /// + /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task + /// or the `vaddr` and `len` do not represent a user space memory range. + pub fn reader(&self, vaddr: Vaddr, len: usize) -> Result> { + if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { + return Err(Error::AccessDenied); + } + + if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { + return Err(Error::AccessDenied); + } + + // SAFETY: As long as the current task owns user space, the page table of + // the current task will be activated during the execution of the current task. + // Since `VmReader` is neither `Sync` nor `Send`, it will not live longer than + // the current task. Hence, it is ensured that the correct page table + // is activated during the usage period of the `VmReader`. + Ok(unsafe { VmReader::::from_user_space(vaddr as *const u8, len) }) + } + + /// Creates a writer to write data into the user space. + /// + /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task + /// or the `vaddr` and `len` do not represent a user space memory range. + pub fn writer(&self, vaddr: Vaddr, len: usize) -> Result> { + if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { + return Err(Error::AccessDenied); + } + + if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { + return Err(Error::AccessDenied); + } + + // SAFETY: As long as the current task owns user space, the page table of + // the current task will be activated during the execution of the current task. + // Since `VmWriter` is neither `Sync` nor `Send`, it will not live longer than + // the current task. Hence, it is ensured that the correct page table + // is activated during the usage period of the `VmWriter`. + Ok(unsafe { VmWriter::::from_user_space(vaddr as *mut u8, len) }) + } } impl Default for VmSpace {