diff --git a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs index 6119c96d..bdec0e5b 100644 --- a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs +++ b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs @@ -5,7 +5,9 @@ use core::ops::Range; -use ostd::mm::{Frame, FrameVec, PageFlags, VmIo, VmMapOptions, VmSpace}; +use ostd::mm::{ + vm_space::VmQueryResult, CachePolicy, Frame, PageFlags, PageProperty, VmIo, VmSpace, +}; use super::{interval::Interval, is_intersected, Vmar, Vmar_}; use crate::{ @@ -194,22 +196,41 @@ impl VmMapping { let write_perms = VmPerms::WRITE; self.check_perms(&write_perms)?; - let mut page_addr = - self.map_to_addr() - self.vmo_offset() + page_idx_range.start * PAGE_SIZE; - for page_idx in page_idx_range { - let parent = self.parent.upgrade().unwrap(); - let vm_space = parent.vm_space(); + // We need to make sure the mapping exists. + // + // Also, if the `VmMapping` has the write permission but the corresponding + // PTE is present and is read-only, it would be a copy-on-write page. In + // this situation we need to trigger a page fault before writing at the + // VMO to guarantee the consistency between VMO and the page table. + { + let virt_addr = + self.map_to_addr() - self.vmo_offset() + page_idx_range.start * PAGE_SIZE; + let virt_range = virt_addr..virt_addr + page_idx_range.len() * PAGE_SIZE; - // The `VmMapping` has the write permission but the corresponding PTE is present and is read-only. - // This means this PTE is set to read-only due to the COW mechanism. In this situation we need to trigger a - // page fault before writing at the VMO to guarantee the consistency between VMO and the page table. - let need_page_fault = vm_space - .query(page_addr)? - .is_some_and(|prop| !prop.flags.contains(PageFlags::W)); - if need_page_fault { - self.handle_page_fault(page_addr, false, true)?; + // FIXME: any sane developer would recommend using `parent.vm_space().cursor(&virt_range)` + // to lock the range and check the mapping status. However, this will cause a deadlock because + // `Self::handle_page_fault` would like to create a cursor again. The following implementation + // indeed introduces a TOCTOU bug. + for page_va in virt_range.step_by(PAGE_SIZE) { + let parent = self.parent.upgrade().unwrap(); + let mut cursor = parent + .vm_space() + .cursor(&(page_va..page_va + PAGE_SIZE)) + .unwrap(); + let map_info = cursor.query().unwrap(); + drop(cursor); + + match map_info { + VmQueryResult::Mapped { va, prop, .. } => { + if !prop.flags.contains(PageFlags::W) { + self.handle_page_fault(va, false, true)?; + } + } + VmQueryResult::NotMapped { va, .. } => { + self.handle_page_fault(va, true, true)?; + } + } } - page_addr += PAGE_SIZE; } self.vmo.write_bytes(vmo_write_offset, buf)?; @@ -458,7 +479,8 @@ impl VmMappingInner { frame: Frame, is_readonly: bool, ) -> Result<()> { - let map_addr = self.page_map_addr(page_idx); + let map_va = self.page_map_addr(page_idx); + let map_va = map_va..map_va + PAGE_SIZE; let vm_perms = { let mut perms = self.perms; @@ -468,23 +490,11 @@ impl VmMappingInner { } perms }; + let map_prop = PageProperty::new(vm_perms.into(), CachePolicy::Writeback); - let vm_map_options = { - let mut options = VmMapOptions::new(); - options.addr(Some(map_addr)); - options.flags(vm_perms.into()); + let mut cursor = vm_space.cursor_mut(&map_va).unwrap(); + cursor.map(frame, map_prop); - // After `fork()`, the entire memory space of the parent and child processes is - // protected as read-only. Therefore, whether the pages need to be COWed (if the memory - // region is private) or not (if the memory region is shared), it is necessary to - // overwrite the page table entry to make the page writable again when the parent or - // child process first tries to write to the memory region. - options.can_overwrite(true); - - options - }; - - vm_space.map(FrameVec::from_one_frame(frame), &vm_map_options)?; self.mapped_pages.insert(page_idx); Ok(()) } @@ -492,9 +502,10 @@ impl VmMappingInner { fn unmap_one_page(&mut self, vm_space: &VmSpace, page_idx: usize) -> Result<()> { let map_addr = self.page_map_addr(page_idx); let range = map_addr..(map_addr + PAGE_SIZE); - if vm_space.query(map_addr)?.is_some() { - vm_space.unmap(&range)?; - } + + let mut cursor = vm_space.cursor_mut(&range).unwrap(); + cursor.unmap(PAGE_SIZE); + self.mapped_pages.remove(&page_idx); Ok(()) } @@ -528,17 +539,8 @@ impl VmMappingInner { ) -> Result<()> { debug_assert!(range.start % PAGE_SIZE == 0); debug_assert!(range.end % PAGE_SIZE == 0); - let start_page = (range.start - self.map_to_addr + self.vmo_offset) / PAGE_SIZE; - let end_page = (range.end - self.map_to_addr + self.vmo_offset) / PAGE_SIZE; - let flags: PageFlags = perms.into(); - for page_idx in start_page..end_page { - let page_addr = self.page_map_addr(page_idx); - if vm_space.query(page_addr)?.is_some() { - // If the page is already mapped, we will modify page table - let page_range = page_addr..(page_addr + PAGE_SIZE); - vm_space.protect(&page_range, |p| p.flags = flags)?; - } - } + let mut cursor = vm_space.cursor_mut(&range).unwrap(); + cursor.protect(range.len(), |p| p.flags = perms.into(), true)?; Ok(()) } diff --git a/ostd/src/mm/mod.rs b/ostd/src/mm/mod.rs index ab7f995a..c1ecdfb1 100644 --- a/ostd/src/mm/mod.rs +++ b/ostd/src/mm/mod.rs @@ -17,7 +17,7 @@ mod offset; pub(crate) mod page; pub(crate) mod page_prop; pub(crate) mod page_table; -mod space; +pub mod vm_space; use alloc::vec::Vec; use core::{fmt::Debug, ops::Range}; @@ -29,7 +29,7 @@ pub use self::{ frame::{options::FrameAllocOptions, Frame, FrameVec, FrameVecIter, Segment}, io::{KernelSpace, UserSpace, VmIo, VmReader, VmWriter}, page_prop::{CachePolicy, PageFlags, PageProperty}, - space::{VmMapOptions, VmSpace}, + vm_space::VmSpace, }; pub(crate) use self::{ kspace::paddr_to_vaddr, page::meta::init as init_page_meta, page_prop::PrivilegedPageFlags, diff --git a/ostd/src/mm/page_table/cursor.rs b/ostd/src/mm/page_table/cursor.rs index d24df707..5f8bdde9 100644 --- a/ostd/src/mm/page_table/cursor.rs +++ b/ostd/src/mm/page_table/cursor.rs @@ -198,9 +198,9 @@ where } /// Gets the information of the current slot. - pub(crate) fn query(&mut self) -> Option { + pub(crate) fn query(&mut self) -> Result { if self.va >= self.barrier_va.end { - return None; + return Err(PageTableError::InvalidVaddr(self.va)); } loop { @@ -209,7 +209,7 @@ where let pte = self.read_cur_pte(); if !pte.is_present() { - return Some(PageTableQueryResult::NotMapped { + return Ok(PageTableQueryResult::NotMapped { va, len: page_size::(level), }); @@ -221,14 +221,14 @@ where match self.cur_child() { Child::Page(page) => { - return Some(PageTableQueryResult::Mapped { + return Ok(PageTableQueryResult::Mapped { va, page, prop: pte.prop(), }); } Child::Untracked(pa) => { - return Some(PageTableQueryResult::MappedUntracked { + return Ok(PageTableQueryResult::MappedUntracked { va, pa, len: page_size::(level), @@ -246,7 +246,7 @@ where /// /// If reached the end of a page table node, it leads itself up to the next page of the parent /// page if possible. - fn move_forward(&mut self) { + pub(in crate::mm) fn move_forward(&mut self) { let page_size = page_size::(self.level); let next_va = self.va.align_down(page_size) + page_size; while self.level < self.guard_level && pte_index::(next_va, self.level) == 0 { @@ -255,6 +255,41 @@ where self.va = next_va; } + /// Jumps to the given virtual address. + /// + /// # Panics + /// + /// This method panics if the address is out of the range where the cursor is required to operate, + /// or has bad alignment. + pub(crate) fn jump(&mut self, va: Vaddr) { + assert!(self.barrier_va.contains(&va)); + assert!(va % C::BASE_PAGE_SIZE == 0); + + loop { + let cur_node_start = self.va & !(page_size::(self.level + 1) - 1); + let cur_node_end = cur_node_start + page_size::(self.level + 1); + // If the address is within the current node, we can jump directly. + if cur_node_start <= va && va < cur_node_end { + self.va = va; + return; + } + + // There is a corner case that the cursor is depleted, sitting at the start of the + // next node but the next node is not locked because the parent is not locked. + if self.va >= self.barrier_va.end && self.level == self.guard_level { + self.va = va; + return; + } + + debug_assert!(self.level < self.guard_level); + self.level_up(); + } + } + + pub fn virt_addr(&self) -> Vaddr { + self.va + } + /// Goes up a level. We release the current page if it has no mappings since the cursor only moves /// forward. And if needed we will do the final cleanup using this method after re-walk when the /// cursor is dropped. @@ -327,10 +362,10 @@ where fn next(&mut self) -> Option { let result = self.query(); - if result.is_some() { + if result.is_ok() { self.move_forward(); } - result + result.ok() } } @@ -365,43 +400,26 @@ where Cursor::new(pt, va).map(|inner| Self(inner)) } - /// Gets the information of the current slot and go to the next slot. - /// - /// We choose not to implement `Iterator` or `IterMut` for [`CursorMut`] - /// because the mutable cursor is indeed not an iterator. - pub(crate) fn next(&mut self) -> Option { - self.0.next() - } - /// Jumps to the given virtual address. /// + /// This is the same as [`Cursor::jump`]. + /// /// # Panics /// /// This method panics if the address is out of the range where the cursor is required to operate, /// or has bad alignment. pub(crate) fn jump(&mut self, va: Vaddr) { - assert!(self.0.barrier_va.contains(&va)); - assert!(va % C::BASE_PAGE_SIZE == 0); + self.0.jump(va) + } - loop { - let cur_node_start = self.0.va & !(page_size::(self.0.level + 1) - 1); - let cur_node_end = cur_node_start + page_size::(self.0.level + 1); - // If the address is within the current node, we can jump directly. - if cur_node_start <= va && va < cur_node_end { - self.0.va = va; - return; - } + /// Gets the current virtual address. + pub fn virt_addr(&self) -> Vaddr { + self.0.virt_addr() + } - // There is a corner case that the cursor is depleted, sitting at the start of the - // next node but the next node is not locked because the parent is not locked. - if self.0.va >= self.0.barrier_va.end && self.0.level == self.0.guard_level { - self.0.va = va; - return; - } - - debug_assert!(self.0.level < self.0.guard_level); - self.0.level_up(); - } + /// Gets the information of the current slot. + pub(crate) fn query(&mut self) -> Result { + self.0.query() } /// Maps the range starting from the current address to a [`DynPage`]. diff --git a/ostd/src/mm/page_table/mod.rs b/ostd/src/mm/page_table/mod.rs index 3b688692..91e3fa50 100644 --- a/ostd/src/mm/page_table/mod.rs +++ b/ostd/src/mm/page_table/mod.rs @@ -3,7 +3,7 @@ use core::{fmt::Debug, marker::PhantomData, ops::Range}; use super::{ - nr_subpage_per_huge, paddr_to_vaddr, + nr_subpage_per_huge, page_prop::{PageFlags, PageProperty}, page_size, Paddr, PagingConstsTrait, PagingLevel, Vaddr, }; @@ -23,8 +23,10 @@ pub(in crate::mm) mod boot_pt; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum PageTableError { - /// The virtual address range is invalid. + /// The provided virtual address range is invalid. InvalidVaddrRange(Vaddr, Vaddr), + /// The provided virtual address is invalid. + InvalidVaddr(Vaddr), /// Using virtual address not aligned. UnalignedVaddr, /// Protecting a mapping that does not exist. @@ -232,6 +234,7 @@ where /// Note that this function may fail reflect an accurate result if there are /// cursors concurrently accessing the same virtual address range, just like what /// happens for the hardware MMU walk. + #[cfg(ktest)] pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, PageProperty)> { // SAFETY: The root node is a valid page table node so the address is valid. unsafe { page_walk::(self.root_paddr(), vaddr) } @@ -288,13 +291,14 @@ where /// /// To mitigate this problem, the page table nodes are by default not /// actively recycled, until we find an appropriate solution. +#[cfg(ktest)] pub(super) unsafe fn page_walk( root_paddr: Paddr, vaddr: Vaddr, ) -> Option<(Paddr, PageProperty)> { - // We disable preemt here to mimic the MMU walk, which will not be interrupted - // then must finish within a given time. - let _guard = crate::task::disable_preempt(); + use super::paddr_to_vaddr; + + let preempt_guard = crate::task::disable_preempt(); let mut cur_level = C::NR_LEVELS; let mut cur_pte = { diff --git a/ostd/src/mm/space.rs b/ostd/src/mm/space.rs deleted file mode 100644 index 548348f7..00000000 --- a/ostd/src/mm/space.rs +++ /dev/null @@ -1,408 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -use core::ops::Range; - -use spin::Once; - -use super::{ - io::UserSpace, - is_page_aligned, - kspace::KERNEL_PAGE_TABLE, - page_table::{PageTable, PageTableMode, UserMode}, - CachePolicy, FrameVec, PageFlags, PageProperty, PagingConstsTrait, PrivilegedPageFlags, - VmReader, VmWriter, PAGE_SIZE, -}; -use crate::{ - arch::mm::{ - current_page_table_paddr, tlb_flush_addr_range, tlb_flush_all_excluding_global, - PageTableEntry, PagingConsts, - }, - cpu::CpuExceptionInfo, - mm::{ - page_table::{Cursor, PageTableQueryResult as PtQr}, - Frame, MAX_USERSPACE_VADDR, - }, - prelude::*, - Error, -}; - -/// Virtual memory space. -/// -/// A virtual memory space (`VmSpace`) can be created and assigned to a user space so that -/// the virtual memory of the user space can be manipulated safely. For example, -/// given an arbitrary user-space pointer, one can read and write the memory -/// location referred to by the user-space pointer without the risk of breaking the -/// memory safety of the kernel space. -/// -/// A newly-created `VmSpace` is not backed by any physical memory pages. -/// To provide memory pages for a `VmSpace`, one can allocate and map -/// physical memory ([`Frame`]s) to the `VmSpace`. -/// -/// A `VmSpace` can also attach a page fault handler, which will be invoked to handle -/// page faults generated from user space. -/// -/// A `VmSpace` can also attach a page fault handler, which will be invoked to handle -/// page faults generated from user space. -#[allow(clippy::type_complexity)] -pub struct VmSpace { - pt: PageTable, - page_fault_handler: Once core::result::Result<(), ()>>, -} - -// Notes on TLB flushing: -// -// We currently assume that: -// 1. `VmSpace` _might_ be activated on the current CPU and the user memory _might_ be used -// immediately after we make changes to the page table entries. So we must invalidate the -// corresponding TLB caches accordingly. -// 2. `VmSpace` must _not_ be activated on another CPU. This assumption is trivial, since SMP -// support is not yet available. But we need to consider this situation in the future (TODO). - -impl VmSpace { - /// Creates a new VM address space. - pub fn new() -> Self { - Self { - pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), - page_fault_handler: Once::new(), - } - } - - /// Activates the page table. - pub(crate) fn activate(&self) { - self.pt.activate(); - } - - pub(crate) fn handle_page_fault( - &self, - info: &CpuExceptionInfo, - ) -> core::result::Result<(), ()> { - if let Some(func) = self.page_fault_handler.get() { - return func(self, info); - } - Err(()) - } - - /// Registers the page fault handler in this `VmSpace`. - /// - /// The page fault handler of a `VmSpace` can only be initialized once. - /// If it has been initialized before, calling this method will have no effect. - pub fn register_page_fault_handler( - &self, - func: fn(&VmSpace, &CpuExceptionInfo) -> core::result::Result<(), ()>, - ) { - self.page_fault_handler.call_once(|| func); - } - - /// Maps some physical memory pages into the VM space according to the given - /// options, returning the address where the mapping is created. - /// - /// The ownership of the frames will be transferred to the `VmSpace`. - /// - /// For more information, see [`VmMapOptions`]. - pub fn map(&self, frames: FrameVec, options: &VmMapOptions) -> Result { - if options.addr.is_none() { - return Err(Error::InvalidArgs); - } - - let addr = options.addr.unwrap(); - - if addr % PAGE_SIZE != 0 { - return Err(Error::InvalidArgs); - } - - let size = frames.nbytes(); - let end = addr.checked_add(size).ok_or(Error::InvalidArgs)?; - - let va_range = addr..end; - if !UserMode::covers(&va_range) { - return Err(Error::InvalidArgs); - } - - let mut cursor = self.pt.cursor_mut(&va_range)?; - - // If overwrite is forbidden, we should check if there are existing mappings - if !options.can_overwrite { - while let Some(qr) = cursor.next() { - if matches!(qr, PtQr::Mapped { .. }) { - return Err(Error::MapAlreadyMappedVaddr); - } - } - cursor.jump(va_range.start); - } - - let prop = PageProperty { - flags: options.flags, - cache: CachePolicy::Writeback, - priv_flags: PrivilegedPageFlags::USER, - }; - - for frame in frames.into_iter() { - // SAFETY: mapping in the user space with `Frame` is safe. - unsafe { - cursor.map(frame.into(), prop); - } - } - - drop(cursor); - tlb_flush_addr_range(&va_range); - - Ok(addr) - } - - /// Queries about a range of virtual memory. - /// You will get an iterator of `VmQueryResult` which contains the information of - /// each parts of the range. - pub fn query_range(&self, range: &Range) -> Result { - Ok(VmQueryIter { - cursor: self.pt.cursor(range)?, - }) - } - - /// Queries about the mapping information about a byte in virtual memory. - /// This is more handy than [`query_range`], but less efficient if you want - /// to query in a batch. - /// - /// [`query_range`]: VmSpace::query_range - pub fn query(&self, vaddr: Vaddr) -> Result> { - if !(0..MAX_USERSPACE_VADDR).contains(&vaddr) { - return Err(Error::AccessDenied); - } - Ok(self.pt.query(vaddr).map(|(_pa, prop)| prop)) - } - - /// Unmaps the physical memory pages within the VM address range. - /// - /// The range is allowed to contain gaps, where no physical memory pages - /// are mapped. - pub fn unmap(&self, range: &Range) -> Result<()> { - if !is_page_aligned(range.start) || !is_page_aligned(range.end) { - return Err(Error::InvalidArgs); - } - if !UserMode::covers(range) { - return Err(Error::InvalidArgs); - } - - // SAFETY: unmapping in the user space is safe. - unsafe { - self.pt.unmap(range)?; - } - tlb_flush_addr_range(range); - - Ok(()) - } - - /// Clears all mappings - pub fn clear(&self) { - // SAFETY: unmapping user space is safe, and we don't care unmapping - // invalid ranges. - unsafe { - self.pt.unmap(&(0..MAX_USERSPACE_VADDR)).unwrap(); - } - tlb_flush_all_excluding_global(); - } - - /// Updates the VM protection permissions within the VM address range. - /// - /// If any of the page in the given range is not mapped, it is skipped. - /// The method panics when virtual address is not aligned to base page - /// size. - /// - /// It is guarenteed that the operation is called once for each valid - /// page found in the range. - /// - /// TODO: It returns error when invalid operations such as protect - /// partial huge page happens, and efforts are not reverted, leaving us - /// in a bad state. - pub fn protect(&self, range: &Range, op: impl FnMut(&mut PageProperty)) -> Result<()> { - if !is_page_aligned(range.start) || !is_page_aligned(range.end) { - return Err(Error::InvalidArgs); - } - if !UserMode::covers(range) { - return Err(Error::InvalidArgs); - } - - // SAFETY: protecting in the user space is safe. - unsafe { - self.pt.protect(range, op)?; - } - tlb_flush_addr_range(range); - - Ok(()) - } - - /// Forks a new VM space with copy-on-write semantics. - /// - /// Both the parent and the newly forked VM space will be marked as - /// read-only. And both the VM space will take handles to the same - /// physical memory pages. - pub fn fork_copy_on_write(&self) -> Self { - let page_fault_handler = { - let new_handler = Once::new(); - if let Some(handler) = self.page_fault_handler.get() { - new_handler.call_once(|| *handler); - } - new_handler - }; - let new_space = Self { - pt: self.pt.fork_copy_on_write(), - page_fault_handler, - }; - tlb_flush_all_excluding_global(); - new_space - } - - /// Creates a reader to read data from the user space of the current task. - /// - /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task - /// or the `vaddr` and `len` do not represent a user space memory range. - pub fn reader(&self, vaddr: Vaddr, len: usize) -> Result> { - if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { - return Err(Error::AccessDenied); - } - - if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { - return Err(Error::AccessDenied); - } - - // SAFETY: As long as the current task owns user space, the page table of - // the current task will be activated during the execution of the current task. - // Since `VmReader` is neither `Sync` nor `Send`, it will not live longer than - // the current task. Hence, it is ensured that the correct page table - // is activated during the usage period of the `VmReader`. - Ok(unsafe { VmReader::::from_user_space(vaddr as *const u8, len) }) - } - - /// Creates a writer to write data into the user space. - /// - /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task - /// or the `vaddr` and `len` do not represent a user space memory range. - pub fn writer(&self, vaddr: Vaddr, len: usize) -> Result> { - if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { - return Err(Error::AccessDenied); - } - - if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { - return Err(Error::AccessDenied); - } - - // SAFETY: As long as the current task owns user space, the page table of - // the current task will be activated during the execution of the current task. - // Since `VmWriter` is neither `Sync` nor `Send`, it will not live longer than - // the current task. Hence, it is ensured that the correct page table - // is activated during the usage period of the `VmWriter`. - Ok(unsafe { VmWriter::::from_user_space(vaddr as *mut u8, len) }) - } -} - -impl Default for VmSpace { - fn default() -> Self { - Self::new() - } -} - -/// Options for mapping physical memory pages into a VM address space. -/// See [`VmSpace::map`]. -#[derive(Clone, Debug)] -pub struct VmMapOptions { - /// Starting virtual address - addr: Option, - /// Map align - align: usize, - /// Page permissions and status - flags: PageFlags, - /// Can overwrite - can_overwrite: bool, -} - -impl VmMapOptions { - /// Creates the default options. - pub fn new() -> Self { - Self { - addr: None, - align: PagingConsts::BASE_PAGE_SIZE, - flags: PageFlags::empty(), - can_overwrite: false, - } - } - - /// Sets the alignment of the address of the mapping. - /// - /// The alignment must be a power-of-2 and greater than or equal to the - /// page size. - /// - /// The default value of this option is the page size. - pub fn align(&mut self, align: usize) -> &mut Self { - self.align = align; - self - } - - /// Sets the permissions of the mapping, which affects whether - /// the mapping can be read, written, or executed. - /// - /// The default value of this option is read-only. - pub fn flags(&mut self, flags: PageFlags) -> &mut Self { - self.flags = flags; - self - } - - /// Sets the address of the new mapping. - /// - /// The default value of this option is `None`. - pub fn addr(&mut self, addr: Option) -> &mut Self { - if addr.is_none() { - return self; - } - self.addr = Some(addr.unwrap()); - self - } - - /// Sets whether the mapping can overwrite any existing mappings. - /// - /// If this option is `true`, then the address option must be `Some(_)`. - /// - /// The default value of this option is `false`. - pub fn can_overwrite(&mut self, can_overwrite: bool) -> &mut Self { - self.can_overwrite = can_overwrite; - self - } -} - -impl Default for VmMapOptions { - fn default() -> Self { - Self::new() - } -} - -/// The iterator for querying over the VM space without modifying it. -pub struct VmQueryIter<'a> { - cursor: Cursor<'a, UserMode, PageTableEntry, PagingConsts>, -} - -pub enum VmQueryResult { - NotMapped { - va: Vaddr, - len: usize, - }, - Mapped { - va: Vaddr, - frame: Frame, - prop: PageProperty, - }, -} - -impl Iterator for VmQueryIter<'_> { - type Item = VmQueryResult; - - fn next(&mut self) -> Option { - self.cursor.next().map(|ptqr| match ptqr { - PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len }, - PtQr::Mapped { va, page, prop } => VmQueryResult::Mapped { - va, - frame: page.try_into().unwrap(), - prop, - }, - // It is not possible to map untyped memory in user space. - PtQr::MappedUntracked { .. } => unreachable!(), - }) - } -} diff --git a/ostd/src/mm/vm_space.rs b/ostd/src/mm/vm_space.rs new file mode 100644 index 00000000..72c799ee --- /dev/null +++ b/ostd/src/mm/vm_space.rs @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Virtual memory space management. +//! +//! The [`VmSpace`] struct is provided to manage the virtual memory space of a +//! user. Cursors are used to traverse and modify over the virtual memory space +//! concurrently. The VM space cursor [`self::Cursor`] is just a wrapper over +//! the page table cursor [`super::page_table::Cursor`], providing efficient, +//! powerful concurrent accesses to the page table, and suffers from the same +//! validity concerns as described in [`super::page_table::cursor`]. + +use core::ops::Range; + +use spin::Once; + +use super::{ + io::UserSpace, + kspace::KERNEL_PAGE_TABLE, + page_table::{PageTable, UserMode}, + PageProperty, VmReader, VmWriter, +}; +use crate::{ + arch::mm::{ + current_page_table_paddr, tlb_flush_addr_range, tlb_flush_all_excluding_global, + PageTableEntry, PagingConsts, + }, + cpu::CpuExceptionInfo, + mm::{ + page_table::{self, PageTableQueryResult as PtQr}, + Frame, MAX_USERSPACE_VADDR, + }, + prelude::*, + Error, +}; + +/// Virtual memory space. +/// +/// A virtual memory space (`VmSpace`) can be created and assigned to a user +/// space so that the virtual memory of the user space can be manipulated +/// safely. For example, given an arbitrary user-space pointer, one can read +/// and write the memory location referred to by the user-space pointer without +/// the risk of breaking the memory safety of the kernel space. +/// +/// A newly-created `VmSpace` is not backed by any physical memory pages. To +/// provide memory pages for a `VmSpace`, one can allocate and map physical +/// memory ([`Frame`]s) to the `VmSpace` using the cursor. +/// +/// A `VmSpace` can also attach a page fault handler, which will be invoked to +/// handle page faults generated from user space. +#[allow(clippy::type_complexity)] +pub struct VmSpace { + pt: PageTable, + page_fault_handler: Once core::result::Result<(), ()>>, +} + +// Notes on TLB flushing: +// +// We currently assume that: +// 1. `VmSpace` _might_ be activated on the current CPU and the user memory _might_ be used +// immediately after we make changes to the page table entries. So we must invalidate the +// corresponding TLB caches accordingly. +// 2. `VmSpace` must _not_ be activated on another CPU. This assumption is trivial, since SMP +// support is not yet available. But we need to consider this situation in the future (TODO). +impl VmSpace { + /// Creates a new VM address space. + pub fn new() -> Self { + Self { + pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), + page_fault_handler: Once::new(), + } + } + + /// Gets an immutable cursor in the virtual address range. + /// + /// The cursor behaves like a lock guard, exclusively owning a sub-tree of + /// the page table, preventing others from creating a cursor in it. So be + /// sure to drop the cursor as soon as possible. + /// + /// The creation of the cursor may block if another cursor having an + /// overlapping range is alive. + pub fn cursor(&self, va: &Range) -> Result> { + Ok(self.pt.cursor(va).map(Cursor)?) + } + + /// Gets an mutable cursor in the virtual address range. + /// + /// The same as [`Self::cursor`], the cursor behaves like a lock guard, + /// exclusively owning a sub-tree of the page table, preventing others + /// from creating a cursor in it. So be sure to drop the cursor as soon as + /// possible. + /// + /// The creation of the cursor may block if another cursor having an + /// overlapping range is alive. The modification to the mapping by the + /// cursor may also block or be overriden the mapping of another cursor. + pub fn cursor_mut(&self, va: &Range) -> Result> { + Ok(self.pt.cursor_mut(va).map(CursorMut)?) + } + + /// Activates the page table. + pub(crate) fn activate(&self) { + self.pt.activate(); + } + + pub(crate) fn handle_page_fault( + &self, + info: &CpuExceptionInfo, + ) -> core::result::Result<(), ()> { + if let Some(func) = self.page_fault_handler.get() { + return func(self, info); + } + Err(()) + } + + /// Registers the page fault handler in this `VmSpace`. + /// + /// The page fault handler of a `VmSpace` can only be initialized once. + /// If it has been initialized before, calling this method will have no effect. + pub fn register_page_fault_handler( + &self, + func: fn(&VmSpace, &CpuExceptionInfo) -> core::result::Result<(), ()>, + ) { + self.page_fault_handler.call_once(|| func); + } + + /// Clears all mappings + pub fn clear(&self) { + // SAFETY: unmapping user space is safe, and we don't care unmapping + // invalid ranges. + unsafe { + self.pt.unmap(&(0..MAX_USERSPACE_VADDR)).unwrap(); + } + tlb_flush_all_excluding_global(); + } + + /// Forks a new VM space with copy-on-write semantics. + /// + /// Both the parent and the newly forked VM space will be marked as + /// read-only. And both the VM space will take handles to the same + /// physical memory pages. + pub fn fork_copy_on_write(&self) -> Self { + let page_fault_handler = { + let new_handler = Once::new(); + if let Some(handler) = self.page_fault_handler.get() { + new_handler.call_once(|| *handler); + } + new_handler + }; + let new_space = Self { + pt: self.pt.fork_copy_on_write(), + page_fault_handler, + }; + tlb_flush_all_excluding_global(); + new_space + } + + /// Creates a reader to read data from the user space of the current task. + /// + /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task + /// or the `vaddr` and `len` do not represent a user space memory range. + pub fn reader(&self, vaddr: Vaddr, len: usize) -> Result> { + if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { + return Err(Error::AccessDenied); + } + + if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { + return Err(Error::AccessDenied); + } + + // SAFETY: As long as the current task owns user space, the page table of + // the current task will be activated during the execution of the current task. + // Since `VmReader` is neither `Sync` nor `Send`, it will not live longer than + // the current task. Hence, it is ensured that the correct page table + // is activated during the usage period of the `VmReader`. + Ok(unsafe { VmReader::::from_user_space(vaddr as *const u8, len) }) + } + + /// Creates a writer to write data into the user space. + /// + /// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task + /// or the `vaddr` and `len` do not represent a user space memory range. + pub fn writer(&self, vaddr: Vaddr, len: usize) -> Result> { + if current_page_table_paddr() != unsafe { self.pt.root_paddr() } { + return Err(Error::AccessDenied); + } + + if vaddr.checked_add(len).unwrap_or(usize::MAX) > MAX_USERSPACE_VADDR { + return Err(Error::AccessDenied); + } + + // SAFETY: As long as the current task owns user space, the page table of + // the current task will be activated during the execution of the current task. + // Since `VmWriter` is neither `Sync` nor `Send`, it will not live longer than + // the current task. Hence, it is ensured that the correct page table + // is activated during the usage period of the `VmWriter`. + Ok(unsafe { VmWriter::::from_user_space(vaddr as *mut u8, len) }) + } +} + +impl Default for VmSpace { + fn default() -> Self { + Self::new() + } +} + +/// The cursor for querying over the VM space without modifying it. +/// +/// It exclusively owns a sub-tree of the page table, preventing others from +/// reading or modifying the same sub-tree. Two read-only cursors can not be +/// created from the same virtual address range either. +pub struct Cursor<'a>(page_table::Cursor<'a, UserMode, PageTableEntry, PagingConsts>); + +impl Iterator for Cursor<'_> { + type Item = VmQueryResult; + + fn next(&mut self) -> Option { + let result = self.query(); + if result.is_ok() { + self.0.move_forward(); + } + result.ok() + } +} + +impl Cursor<'_> { + /// Query about the current slot. + /// + /// This function won't bring the cursor to the next slot. + pub fn query(&mut self) -> Result { + Ok(self.0.query().map(|ptqr| ptqr.try_into().unwrap())?) + } + + /// Jump to the virtual address. + pub fn jump(&mut self, va: Vaddr) { + self.0.jump(va); + } + + /// Get the virtual address of the current slot. + pub fn virt_addr(&self) -> Vaddr { + self.0.virt_addr() + } +} + +/// The cursor for modifying the mappings in VM space. +/// +/// It exclusively owns a sub-tree of the page table, preventing others from +/// reading or modifying the same sub-tree. +pub struct CursorMut<'a>(page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>); + +impl CursorMut<'_> { + /// Query about the current slot. + /// + /// This is the same as [`Cursor::query`]. + /// + /// This function won't bring the cursor to the next slot. + pub fn query(&mut self) -> Result { + Ok(self.0.query().map(|ptqr| ptqr.try_into().unwrap())?) + } + + /// Jump to the virtual address. + /// + /// This is the same as [`Cursor::jump`]. + pub fn jump(&mut self, va: Vaddr) { + self.0.jump(va); + } + + /// Get the virtual address of the current slot. + pub fn virt_addr(&self) -> Vaddr { + self.0.virt_addr() + } + + /// Map a frame into the current slot. + /// + /// This method will bring the cursor to the next slot after the modification. + pub fn map(&mut self, frame: Frame, prop: PageProperty) { + let start_va = self.virt_addr(); + let end_va = start_va + frame.size(); + + // SAFETY: It is safe to map untyped memory into the userspace. + unsafe { + self.0.map(frame.into(), prop); + } + + tlb_flush_addr_range(&(start_va..end_va)); + } + + /// Clear the mapping starting from the current slot. + /// + /// This method will bring the cursor forward by `len` bytes in the virtual + /// address space after the modification. + /// + /// # Panics + /// + /// This method will panic if `len` is not page-aligned. + pub fn unmap(&mut self, len: usize) { + assert!(len % super::PAGE_SIZE == 0); + let start_va = self.virt_addr(); + let end_va = start_va + len; + + // SAFETY: It is safe to un-map memory in the userspace. + unsafe { + self.0.unmap(len); + } + + tlb_flush_addr_range(&(start_va..end_va)); + } + + /// Change the mapping property starting from the current slot. + /// + /// This method will bring the cursor forward by `len` bytes in the virtual + /// address space after the modification. + /// + /// The way to change the property is specified by the closure `op`. + /// + /// # Panics + /// + /// This method will panic if `len` is not page-aligned. + pub fn protect( + &mut self, + len: usize, + op: impl FnMut(&mut PageProperty), + allow_protect_absent: bool, + ) -> Result<()> { + assert!(len % super::PAGE_SIZE == 0); + let start_va = self.virt_addr(); + let end_va = start_va + len; + + // SAFETY: It is safe to protect memory in the userspace. + let result = unsafe { self.0.protect(len, op, allow_protect_absent) }; + + tlb_flush_addr_range(&(start_va..end_va)); + + Ok(result?) + } +} + +/// The result of a query over the VM space. +#[derive(Debug)] +pub enum VmQueryResult { + /// The current slot is not mapped. + NotMapped { + /// The virtual address of the slot. + va: Vaddr, + /// The length of the slot. + len: usize, + }, + /// The current slot is mapped. + Mapped { + /// The virtual address of the slot. + va: Vaddr, + /// The mapped frame. + frame: Frame, + /// The property of the slot. + prop: PageProperty, + }, +} + +impl TryFrom for VmQueryResult { + type Error = &'static str; + + fn try_from(ptqr: PtQr) -> core::result::Result { + match ptqr { + PtQr::NotMapped { va, len } => Ok(VmQueryResult::NotMapped { va, len }), + PtQr::Mapped { va, page, prop } => Ok(VmQueryResult::Mapped { + va, + frame: page + .try_into() + .map_err(|_| "found typed memory mapped into `VmSpace`")?, + prop, + }), + PtQr::MappedUntracked { .. } => Err("found untracked memory mapped into `VmSpace`"), + } + } +}