diff --git a/framework/aster-frame/src/arch/x86/iommu/context_table.rs b/framework/aster-frame/src/arch/x86/iommu/context_table.rs index 85d96c17a..4103dce81 100644 --- a/framework/aster-frame/src/arch/x86/iommu/context_table.rs +++ b/framework/aster-frame/src/arch/x86/iommu/context_table.rs @@ -291,16 +291,18 @@ impl ContextTable { if device.device >= 32 || device.function >= 8 { return Err(ContextTableError::InvalidDeviceId); } - self.get_or_create_page_table(device).map_unchecked( - &(daddr..daddr + PAGE_SIZE), - &(paddr..paddr + PAGE_SIZE), - MapProperty { - perm: VmPerm::RW, - global: false, - extension: PageTableFlags::empty().bits(), - cache: CachePolicy::Uncacheable, - }, - ); + self.get_or_create_page_table(device) + .map( + &(daddr..daddr + PAGE_SIZE), + &(paddr..paddr + PAGE_SIZE), + MapProperty { + perm: VmPerm::RW, + global: false, + extension: PageTableFlags::empty().bits(), + cache: CachePolicy::Uncacheable, + }, + ) + .unwrap(); Ok(()) } @@ -310,7 +312,8 @@ impl ContextTable { } unsafe { self.get_or_create_page_table(device) - .unmap_unchecked(&(daddr..daddr + PAGE_SIZE)); + .unmap(&(daddr..daddr + PAGE_SIZE)) + .unwrap(); } Ok(()) } diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index 5d0af247b..c85c67cf3 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -183,11 +183,23 @@ impl PageTableEntryTrait for PageTableEntry { if self.0 & PageTableFlags::DIRTY.bits() != 0 { status |= MapStatus::DIRTY; } + let extension = { + #[cfg(feature = "intel_tdx")] + { + let mut ext = PageTableFlags::empty(); + if self.0 & PageTableFlags::SHARED.bits() != 0 { + ext |= PageTableFlags::SHARED; + } + ext + } + #[cfg(not(feature = "intel_tdx"))] + 0 + }; MapInfo { prop: MapProperty { perm, global, - extension: (self.0 & !Self::PHYS_ADDR_MASK) as u64, + extension, cache, }, status, diff --git a/framework/aster-frame/src/arch/x86/tdx_guest.rs b/framework/aster-frame/src/arch/x86/tdx_guest.rs index 3cdf7cd25..3777e36e9 100644 --- a/framework/aster-frame/src/arch/x86/tdx_guest.rs +++ b/framework/aster-frame/src/arch/x86/tdx_guest.rs @@ -416,14 +416,12 @@ pub unsafe fn unprotect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Pa } let vaddr = paddr_to_vaddr(gpa); let pt = KERNEL_PAGE_TABLE.get().unwrap(); - unsafe { - pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { - perm: info.prop.perm, - extension: PageTableFlags::SHARED.bits() as u64, - cache: info.prop.cache, - }) - .map_err(PageConvertError::PageTableError)?; - }; + pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { + perm: info.prop.perm, + extension: PageTableFlags::SHARED.bits() as u64, + cache: info.prop.cache, + }) + .map_err(PageConvertError::PageTableError)?; map_gpa( (gpa & (!PAGE_MASK)) as u64 | SHARED_MASK, (page_num * PAGE_SIZE) as u64, @@ -452,16 +450,14 @@ pub unsafe fn protect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Page } let vaddr = paddr_to_vaddr(gpa); let pt = KERNEL_PAGE_TABLE.get().unwrap(); - unsafe { - pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { - perm: info.prop.perm, - extension: (PageTableFlags::from_bits_truncate(info.prop.extension as usize) - - PageTableFlags::SHARED) - .bits() as u64, - cache: info.prop.cache, - }) - .map_err(PageConvertError::PageTableError)?; - }; + pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { + perm: info.prop.perm, + extension: (PageTableFlags::from_bits_truncate(info.prop.extension as usize) + - PageTableFlags::SHARED) + .bits() as u64, + cache: info.prop.cache, + }) + .map_err(PageConvertError::PageTableError)?; map_gpa((gpa & PAGE_MASK) as u64, (page_num * PAGE_SIZE) as u64) .map_err(PageConvertError::TdVmcallError)?; for i in 0..page_num { diff --git a/framework/aster-frame/src/task/task.rs b/framework/aster-frame/src/task/task.rs index a3d548d3c..7a92161fd 100644 --- a/framework/aster-frame/src/task/task.rs +++ b/framework/aster-frame/src/task/task.rs @@ -69,12 +69,14 @@ impl KernelStack { let guard_page_paddr = stack_segment.start_paddr(); crate::vm::paddr_to_vaddr(guard_page_paddr) }; - // Safety: the physical guard page address is exclusively used since we allocated it. + // Safety: the segment allocated is not used by others so we can protect it. unsafe { - page_table.protect_unchecked( - &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), - perm_op(|p| p - VmPerm::RW), - ); + page_table + .protect( + &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), + perm_op(|p| p - VmPerm::RW), + ) + .unwrap(); } Ok(Self { segment: stack_segment, @@ -96,12 +98,14 @@ impl Drop for KernelStack { let guard_page_paddr = self.segment.start_paddr(); crate::vm::paddr_to_vaddr(guard_page_paddr) }; - // Safety: the physical guard page address is exclusively used since we allocated it. + // Safety: the segment allocated is not used by others so we can protect it. unsafe { - page_table.protect_unchecked( - &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), - perm_op(|p| p | VmPerm::RW), - ); + page_table + .protect( + &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), + perm_op(|p| p | VmPerm::RW), + ) + .unwrap(); } } } diff --git a/framework/aster-frame/src/trap/handler.rs b/framework/aster-frame/src/trap/handler.rs index 0c4407654..0c28f4731 100644 --- a/framework/aster-frame/src/trap/handler.rs +++ b/framework/aster-frame/src/trap/handler.rs @@ -227,18 +227,20 @@ fn handle_kernel_page_fault(f: &TrapFrame) { // correctness follows the semantics of the direct mapping of physical memory. // Do the mapping unsafe { - page_table.map_unchecked( - &(vaddr..vaddr + PAGE_SIZE), - &(paddr..paddr + PAGE_SIZE), - MapProperty { - perm: VmPerm::RW, - global: true, - #[cfg(feature = "intel_tdx")] - extension: PageTableFlags::SHARED.bits() as u64, - #[cfg(not(feature = "intel_tdx"))] - extension: 0, - cache: CachePolicy::Uncacheable, - }, - ) + page_table + .map( + &(vaddr..vaddr + PAGE_SIZE), + &(paddr..paddr + PAGE_SIZE), + MapProperty { + perm: VmPerm::RW, + global: true, + #[cfg(feature = "intel_tdx")] + extension: PageTableFlags::SHARED.bits() as u64, + #[cfg(not(feature = "intel_tdx"))] + extension: 0, + cache: CachePolicy::Uncacheable, + }, + ) + .unwrap(); } } diff --git a/framework/aster-frame/src/vm/dma/dma_coherent.rs b/framework/aster-frame/src/vm/dma/dma_coherent.rs index a73bdeaf2..944549bf1 100644 --- a/framework/aster-frame/src/vm/dma/dma_coherent.rs +++ b/framework/aster-frame/src/vm/dma/dma_coherent.rs @@ -59,9 +59,11 @@ impl DmaCoherent { let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let vaddr = paddr_to_vaddr(start_paddr); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); - // Safety: the address is in the range of `vm_segment`. + // Safety: the physical mappings is only used by DMA so protecting it is safe. unsafe { - page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Uncacheable)); + page_table + .protect(&va_range, cache_policy_op(CachePolicy::Uncacheable)) + .unwrap(); } } let start_daddr = match dma_type() { @@ -144,9 +146,11 @@ impl Drop for DmaCoherentInner { let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let vaddr = paddr_to_vaddr(start_paddr); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); - // Safety: the address is in the range of `vm_segment`. + // Safety: the physical mappings is only used by DMA so protecting it is safe. unsafe { - page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Writeback)); + page_table + .protect(&va_range, cache_policy_op(CachePolicy::Writeback)) + .unwrap(); } } remove_dma_mapping(start_paddr, frame_count); diff --git a/framework/aster-frame/src/vm/kspace.rs b/framework/aster-frame/src/vm/kspace.rs index e3e1d06d8..29725a64b 100644 --- a/framework/aster-frame/src/vm/kspace.rs +++ b/framework/aster-frame/src/vm/kspace.rs @@ -79,7 +79,7 @@ pub fn init_kernel_page_table() { }; // Safety: we are doing the linear mapping for the kernel. unsafe { - kpt.map_unchecked(&from, &to, prop); + kpt.map(&from, &to, prop).unwrap(); } // Map for the I/O area. // TODO: we need to have an allocator to allocate kernel space for @@ -94,7 +94,7 @@ pub fn init_kernel_page_table() { }; // Safety: we are doing I/O mappings for the kernel. unsafe { - kpt.map_unchecked(&from, &to, prop); + kpt.map(&from, &to, prop).unwrap(); } // Map for the kernel code itself. // TODO: set separated permissions for each segments in the kernel. @@ -114,7 +114,7 @@ pub fn init_kernel_page_table() { }; // Safety: we are doing mappings for the kernel. unsafe { - kpt.map_unchecked(&from, &to, prop); + kpt.map(&from, &to, prop).unwrap(); } KERNEL_PAGE_TABLE.call_once(|| kpt); } diff --git a/framework/aster-frame/src/vm/page_table/cursor.rs b/framework/aster-frame/src/vm/page_table/cursor.rs index 90b4e8b57..5f9cece31 100644 --- a/framework/aster-frame/src/vm/page_table/cursor.rs +++ b/framework/aster-frame/src/vm/page_table/cursor.rs @@ -1,143 +1,213 @@ // SPDX-License-Identifier: MPL-2.0 +//! The page table cursor for mapping and querying over the page table. +//! +//! ## The page table lock protocol +//! +//! We provide a fine-grained lock protocol to allow concurrent accesses to +//! the page table. The protocol is originally proposed by Ruihan Li +//! . +//! +//! [`CursorMut::new`] accepts an address range, which indicates the page table +//! entries that may be visited by this cursor. +//! +//! Then, [`CursorMut::new`] finds an intermediate page table (not necessarily +//! the last-level or the top-level) which represents an address range that contains +//! the whole specified address range. It requires all locks from the root page +//! table to the intermediate page table, but then unlocks all locks excluding the +//! one for the intermediate page table. CursorMut then maintains the lock +//! guards from one for the intermediate page table to the leaf that the cursor is +//! currently manipulating. +//! +//! For example, if we're going to map the address range shown below: +//! +//! ```plain +//! Top-level page table node A +//! / +//! B +//! / \ +//! Last-level page table nodes C D +//! Last-level PTEs ---**...**--- +//! \__ __/ +//! V +//! Address range that we're going to map +//! ``` +//! +//! When calling [`CursorMut::new`], it will: +//! 1. `lock(A)`, `lock(B)`, `unlock(A)`; +//! 2. `guards = [ locked(B) ]`. +//! +//! When calling [`CursorMut::map`], it will: +//! 1. `lock(C)`, `guards = [ locked(B), locked(C) ]`; +//! 2. Map some pages in `C`; +//! 3. `unlock(C)`, `lock_guard = [ locked(B) ]`; +//! 4. `lock(D)`, `lock_guard = [ locked(B), locked(D) ]`; +//! 5. Map some pages in D; +//! 6. `unlock(D)`, `lock_guard = [ locked(B) ]`; +//! +//! If all the mappings in `B` are cancelled when cursor finished it's traversal, +//! and `B` need to be recycled, a page walk from the root page table to `B` is +//! required. The cursor unlock all locks, then lock all the way down to `B`, then +//! check if `B` is empty, and finally recycle all the resources on the way back. + use alloc::sync::Arc; -use core::{any::TypeId, mem::size_of, ops::Range}; +use core::{any::TypeId, ops::Range}; + +use align_ext::AlignExt; use super::{ Child, KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait, - PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, PtfRef, + PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, }; use crate::{ sync::{ArcSpinLockGuard, SpinLock}, - vm::{paddr_to_vaddr, Paddr, Vaddr, VmFrame}, + vm::{Paddr, Vaddr, VmFrame}, }; -/// The cursor for forward traversal over the page table. +/// The cursor for traversal over the page table. /// -/// Each method may move the cursor forward, doing mapping unmaping, or -/// querying this slot. +/// Efficient methods are provided to move the cursor forward by a slot, +/// doing mapping, unmaping, or querying for the traversed slot. Also you +/// can jump forward or backward by re-walking without releasing the lock. +/// +/// A slot is a PTE at any levels, which correspond to a certain virtual +/// memory range sized by the "page size" of the current level. /// /// Doing mapping is somewhat like a depth-first search on a tree, except -/// that we modify the tree while traversing it. We use a stack to simulate -/// the recursion. -/// -/// Any read or write accesses to nodes require exclusive access on the -/// entire path from the root to the node. But cursor can be created without -/// holding the lock, and can release the lock after yeilding the current -/// slot while querying over the page table with a range. Simultaneous -/// reading or writing to the same range in the page table will not produce -/// consistent results, only validity is guaranteed. -pub(super) struct PageTableCursor< - 'a, - M: PageTableMode, - E: PageTableEntryTrait, - C: PageTableConstsTrait, -> where - [(); C::NR_ENTRIES_PER_FRAME]:, - [(); C::NR_LEVELS]:, -{ - stack: [Option>; C::NR_LEVELS], - lock_guard: [Option>>; C::NR_LEVELS], - level: usize, - va: Vaddr, -} - -#[derive(Debug, Clone)] -pub(super) enum MapOption { - Map { - frame: VmFrame, - prop: MapProperty, - }, - MapUntyped { - pa: Paddr, - len: usize, - prop: MapProperty, - }, - Unmap { - len: usize, - }, -} - -impl MapOption { - fn paddr(&self) -> Option { - match self { - MapOption::Map { frame, prop } => Some(frame.start_paddr()), - MapOption::MapUntyped { pa, len, prop } => Some(*pa), - MapOption::Unmap { len } => None, - } - } - fn prop(&self) -> Option { - match self { - MapOption::Map { frame, prop } => Some(*prop), - MapOption::MapUntyped { pa, len, prop } => Some(*prop), - MapOption::Unmap { len } => None, - } - } - fn len(&self) -> usize { - match self { - // A VmFrame currently has a fixed size of 1 base page. - MapOption::Map { frame, prop } => crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE, - MapOption::MapUntyped { pa, len, prop } => *len, - MapOption::Unmap { len: l } => *l, - } - } - fn consume(&mut self, len: usize) -> Self { - match self { - MapOption::Map { frame, prop } => { - debug_assert_eq!(len, crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE); - let ret = self.clone(); - *self = MapOption::Unmap { len: 0 }; - ret - } - MapOption::MapUntyped { pa, len: l, prop } => { - debug_assert!(*l >= len); - let ret = MapOption::MapUntyped { - pa: *pa, - len, - prop: *prop, - }; - *self = MapOption::MapUntyped { - pa: *pa + len, - len: *l - len, - prop: *prop, - }; - ret - } - MapOption::Unmap { len: l } => { - debug_assert!(*l >= len); - let ret = MapOption::Unmap { len }; - *l -= len; - ret - } - } - } -} - -impl PageTableCursor<'_, M, E, C> +/// that we modify the tree while traversing it. We use a guard stack to +/// simulate the recursion, and adpot a page table locking protocol to +/// provide concurrency. +pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, { - pub(super) fn new(pt: &PageTable, va: Vaddr) -> Self { - let mut stack = core::array::from_fn(|_| None); - stack[0] = Some(pt.root_frame.clone()); - let lock_guard = core::array::from_fn(|_| None); - Self { - stack, - lock_guard, + pt: &'a PageTable, + guards: [Option>>; C::NR_LEVELS], + level: usize, // current level + guard_level: usize, // from guard_level to level, the locks are held + va: Vaddr, // current virtual address + barrier_va: Range, // virtual address range that is locked +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> CursorMut<'a, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + /// Create a cursor exclusively owning the locks for the given range. + /// + /// The cursor created will only be able to map, query or jump within the + /// given range. + pub(crate) fn new( + pt: &'a PageTable, + va: &Range, + ) -> Result { + if !M::covers(va) { + return Err(PageTableError::InvalidVaddrRange(va.start, va.end)); + } + if va.start % C::BASE_PAGE_SIZE != 0 || va.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::UnalignedVaddr); + } + // Create a guard array that only hold the root node lock. + let guards = core::array::from_fn(|i| { + if i == 0 { + Some(pt.root_frame.lock_arc()) + } else { + None + } + }); + let mut cursor = Self { + pt, + guards, level: C::NR_LEVELS, - va, + guard_level: C::NR_LEVELS, + va: va.start, + barrier_va: va.clone(), + }; + // Go down and get proper locks. The cursor should hold a lock of a + // page table node containing the virtual address range. + // + // While going down, previous guards of too-high levels will be released. + loop { + let level_too_high = { + let start_idx = C::in_frame_index(va.start, cursor.level); + let end_idx = C::in_frame_index(va.end - 1, cursor.level); + start_idx == end_idx + }; + if !level_too_high || !cursor.cur_child().is_pt() { + break; + } + cursor.level_down(None); + cursor.guards[C::NR_LEVELS - cursor.level - 1] = None; + cursor.guard_level -= 1; + } + Ok(cursor) + } + + /// Jump to the given virtual address. + /// + /// It panics if the address is out of the range where the cursor is required to operate, + /// or has bad alignment. + pub(crate) fn jump(&mut self, va: Vaddr) { + assert!(self.barrier_va.contains(&va)); + assert!(va % C::BASE_PAGE_SIZE == 0); + loop { + let cur_node_start = self.va & !(C::page_size(self.level + 1) - 1); + let cur_node_end = cur_node_start + C::page_size(self.level + 1); + // If the address is within the current node, we can jump directly. + if cur_node_start <= va && va < cur_node_end { + self.va = va; + return; + } + // There is a corner case that the cursor is depleted, sitting at the start of the + // next node but the next node is not locked because the parent is not locked. + if self.va >= self.barrier_va.end && self.level == self.guard_level { + self.va = va; + return; + } + debug_assert!(self.level < self.guard_level); + self.level_up(); } } - /// Map or unmap the range starting from the current address. + /// Map the range starting from the current address to a `VmFrame`. /// - /// The argument `create` allows you to map the continuous range to a physical - /// range with the given map property. + /// # Panic + /// + /// This function will panic if + /// - the virtual address range to be mapped is out of the range; + /// - it is already mapped to a huge page while the caller wants to map a smaller one. + /// + /// # Safety + /// + /// The caller should ensure that the virtual range being mapped does + /// not affect kernel's memory safety. + pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: MapProperty) { + let end = self.va + C::BASE_PAGE_SIZE; + assert!(end <= self.barrier_va.end); + // Go down if not applicable. + while self.level > C::HIGHEST_TRANSLATION_LEVEL + || self.va % C::page_size(self.level) != 0 + || self.va + C::page_size(self.level) > end + { + self.level_down(Some(prop)); + continue; + } + // Map the current page. + let idx = self.cur_idx(); + let level = self.level; + self.cur_node_mut() + .set_child(idx, Child::Frame(frame), Some(prop), level > 1); + self.move_forward(); + } + + /// Map the range starting from the current address to a physical address range. /// /// The function will map as more huge pages as possible, and it will split - /// the huge pages into smaller pages if necessary. If the input range is large, - /// the resulting mappings may look like this (if very huge pages supported): + /// the huge pages into smaller pages if necessary. If the input range is + /// large, the resulting mappings may look like this (if very huge pages + /// supported): /// /// ```text /// start end @@ -146,159 +216,245 @@ where /// 4KiB 2MiB 1GiB 4KiB 4KiB /// ``` /// - /// In practice it is suggested to use simple wrappers for this API that maps - /// frames for safety and conciseness. + /// In practice it is not suggested to use this method for safety and conciseness. /// /// # Safety /// - /// This function manipulates the page table directly, and it is unsafe because - /// it may cause undefined behavior if the caller does not ensure that the - /// mapped address is valid and the page table is not corrupted if it is used - /// by the kernel. - pub(super) unsafe fn map(&mut self, option: MapOption) { - self.acquire_locks(); - let len = option.len(); - let end = self.va + len; - let mut option = option; + /// The caller should ensure that + /// - the range being mapped does not affect kernel's memory safety; + /// - the physical address to be mapped is valid and safe to use. + pub(crate) unsafe fn map_pa(&mut self, pa: &Range, prop: MapProperty) { + let end = self.va + pa.len(); + let mut pa = pa.start; + assert!(end <= self.barrier_va.end); while self.va < end { - // Skip if we are unmapping and it is already invalid. - let cur_pte = unsafe { self.cur_pte_ptr().read() }; - if matches!(option, MapOption::Unmap { .. }) && !cur_pte.is_valid() { - self.next_slot(); + // We ensure not mapping in reserved kernel shared tables or releasing it. + // Although it may be an invariant for all architectures and will be optimized + // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. + let is_kernel_shared_node = + TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; + if self.level > C::HIGHEST_TRANSLATION_LEVEL + || is_kernel_shared_node + || self.va % C::page_size(self.level) != 0 + || self.va + C::page_size(self.level) > end + || pa % C::page_size(self.level) != 0 + { + self.level_down(Some(prop)); + continue; + } + // Map the current page. + let idx = self.cur_idx(); + let level = self.level; + self.cur_node_mut() + .set_child(idx, Child::Untracked(pa), Some(prop), level > 1); + pa += C::page_size(level); + self.move_forward(); + } + } + + /// Unmap the range starting from the current address with the given length of virtual address. + /// + /// # Safety + /// + /// The caller should ensure that the range being unmapped does not affect kernel's memory safety. + /// + /// # Panic + /// + /// This function will panic if: + /// - the range to be unmapped is out of the range where the cursor is required to operate; + /// - the range covers only a part of a page. + pub(crate) unsafe fn unmap(&mut self, len: usize) { + let end = self.va + len; + assert!(end <= self.barrier_va.end); + assert!(end % C::BASE_PAGE_SIZE == 0); + while self.va < end { + // Skip if it is already invalid. + if self.cur_child().is_none() { + if self.va + C::page_size(self.level) > end { + break; + } + self.move_forward(); continue; } // We check among the conditions that may lead to a level down. - let is_pa_not_aligned = option - .paddr() - .map(|pa| pa % C::page_size(self.level) != 0) - .unwrap_or(false); - let map_but_too_huge = self.level > C::HIGHEST_TRANSLATION_LEVEL - && !matches!(option, MapOption::Unmap { .. }); - // We ensure not mapping in reserved kernel shared tables or releasing it. - // Although it may be an invariant for all architectures and will be optimized - // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. - let kshared_lvl_down = + // We ensure not unmapping in reserved kernel shared tables or releasing it. + let is_kernel_shared_node = TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; - if map_but_too_huge - || kshared_lvl_down + if is_kernel_shared_node || self.va % C::page_size(self.level) != 0 || self.va + C::page_size(self.level) > end - || is_pa_not_aligned { - let ld_prop = option.prop().unwrap_or(MapProperty::new_invalid()); - self.level_down(Some(ld_prop)); + self.level_down(Some(MapProperty::new_invalid())); continue; } - self.map_page(option.consume(C::page_size(self.level))); - self.next_slot(); + + // Unmap the current page. + let idx = self.cur_idx(); + self.cur_node_mut().set_child(idx, Child::None, None, false); + self.move_forward(); } - self.release_locks(); } /// Apply the given operation to all the mappings within the range. - pub(super) unsafe fn protect( + /// + /// The funtction will return an error if it is not allowed to protect an invalid range and + /// it does so, or if the range to be protected only covers a part of a page. + /// + /// # Safety + /// + /// The caller should ensure that the range being protected does not affect kernel's memory safety. + /// + /// # Panic + /// + /// This function will panic if: + /// - the range to be protected is out of the range where the cursor is required to operate. + pub(crate) unsafe fn protect( &mut self, len: usize, op: impl MapOp, allow_protect_invalid: bool, ) -> Result<(), PageTableError> { - self.acquire_locks(); let end = self.va + len; + assert!(end <= self.barrier_va.end); while self.va < end { - let cur_pte = unsafe { self.cur_pte_ptr().read() }; - if !cur_pte.is_valid() { + if self.cur_child().is_none() { if !allow_protect_invalid { return Err(PageTableError::ProtectingInvalid); } - self.next_slot(); + self.move_forward(); continue; } - // Go down if it's not a last node or if the page size is too big. - if !(cur_pte.is_huge() || self.level == 1) - || (self.va % C::page_size(self.level)) != 0 - || self.va + C::page_size(self.level) > end - { - self.level_down(Some(op(cur_pte.info()))); + // Go down if it's not a last node. + if self.cur_child().is_pt() { + self.level_down(None); continue; } - // Apply the operation. - unsafe { - self.cur_pte_ptr().write(E::new( - cur_pte.paddr(), - op(cur_pte.info()), - cur_pte.is_huge(), - true, - )) - }; - self.next_slot(); + let vaddr_not_fit = + self.va % C::page_size(self.level) != 0 || self.va + C::page_size(self.level) > end; + let cur_pte_info = self.read_cur_pte_info(); + let protected_prop = op(cur_pte_info); + // Go down if the page size is too big and we are protecting part + // of untyped huge pages. + if self.cur_child().is_untyped() && vaddr_not_fit { + self.level_down(Some(protected_prop)); + continue; + } else if vaddr_not_fit { + return Err(PageTableError::ProtectingPartial); + } + let idx = self.cur_idx(); + let level = self.level; + self.cur_node_mut().protect(idx, protected_prop, level); + self.move_forward(); } - self.release_locks(); Ok(()) } - fn cur_pte_ptr(&self) -> *mut E { - let ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); - let frame_addr = paddr_to_vaddr(ptf.inner.start_paddr()); - let offset = C::in_frame_index(self.va, self.level); - (frame_addr + offset * size_of::()) as *mut E + /// Get the information of the current slot and move to the next slot. + pub(crate) fn query(&mut self) -> Option { + if self.va >= self.barrier_va.end { + return None; + } + loop { + let level = self.level; + let va = self.va; + let map_info = self.read_cur_pte_info(); + match self.cur_child().clone() { + Child::Frame(frame) => { + self.move_forward(); + return Some(PageTableQueryResult::Mapped { + va, + frame, + info: map_info, + }); + } + Child::PageTable(_) => { + // Go down if it's not a last node. + self.level_down(None); + continue; + } + Child::Untracked(pa) => { + self.move_forward(); + return Some(PageTableQueryResult::MappedUntyped { + va, + pa, + len: C::page_size(level), + info: map_info, + }); + } + Child::None => { + self.move_forward(); + return Some(PageTableQueryResult::NotMapped { + va, + len: C::page_size(level), + }); + } + } + } + } + + /// Consume itself and leak the root guard for the caller if it locked the root level. + /// + /// It is useful when the caller wants to keep the root guard while the cursor should be dropped. + pub(super) fn leak_root_guard(mut self) -> Option>> { + if self.guard_level != C::NR_LEVELS { + return None; + } + while self.level < C::NR_LEVELS { + self.level_up(); + } + self.guards[0].take() + // Ok to drop self here because we ensure not to access the page table if the current + // level is the root level when running the dropping method. } /// Traverse forward in the current level to the next PTE. - /// If reached the end of a page table frame, it leads itself up to the next frame of the parent frame. - fn next_slot(&mut self) { + /// + /// If reached the end of a page table frame, it leads itself up to the next frame of the parent + /// frame if possible. + fn move_forward(&mut self) { let page_size = C::page_size(self.level); - while self.level < C::NR_LEVELS && C::in_frame_index(self.va + page_size, self.level) == 0 { + let next_va = self.va.align_down(page_size) + page_size; + while self.level < self.guard_level && C::in_frame_index(next_va, self.level) == 0 { self.level_up(); } - self.va += page_size; + self.va = next_va; } /// Go up a level. We release the current frame if it has no mappings since the cursor only moves - /// forward. And we will do the final cleanup using `level_up` when the cursor is dropped. + /// forward. And if needed we will do the final cleanup using this method after re-walk when the + /// cursor is dropped. /// /// This method requires locks acquired before calling it. The discarded level will be unlocked. fn level_up(&mut self) { - let last_map_cnt_is_zero = { - let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); - top_ptf.map_count == 0 - }; - self.stack[C::NR_LEVELS - self.level] = None; - self.lock_guard[C::NR_LEVELS - self.level] = None; + let last_node_all_unmapped = self.cur_node().nr_valid_children() == 0; + self.guards[C::NR_LEVELS - self.level] = None; self.level += 1; let can_release_child = TypeId::of::() == TypeId::of::() && self.level < C::NR_LEVELS; - if can_release_child && last_map_cnt_is_zero { - let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] - .as_deref_mut() - .unwrap(); - let frame_addr = paddr_to_vaddr(top_ptf.inner.start_paddr()); - let idx = C::in_frame_index(self.va, self.level); - unsafe { (frame_addr as *mut E).add(idx).write(E::new_invalid()) } - top_ptf.child[idx] = None; - top_ptf.map_count -= 1; + if can_release_child && last_node_all_unmapped { + let idx = self.cur_idx(); + self.cur_node_mut().set_child(idx, Child::None, None, false); } } - /// A level down operation during traversal. It may split a huge page into - /// smaller pages if we have an end address within the next mapped huge page. - /// It may also create a new child frame if the current frame does not have one. - /// If that may happen the map property of intermediate level `prop` should be + /// A level down operation during traversal. It may create a new child frame if the + /// current frame does not have one. It may also split an untyped huge page into + /// smaller pages if we have an end address within the next mapped untyped huge page. + /// + /// If creation may happen the map property of intermediate level `prop` should be /// passed in correctly. Whether the map property matters in an intermediate /// level is architecture-dependent. /// - /// This method requires write locks acquired before calling it. The newly added - /// level will still hold the lock. - unsafe fn level_down(&mut self, prop: Option) { + /// Also, the staticness of the page table is guaranteed if the caller make sure + /// that there is a child node for the current node. + fn level_down(&mut self, prop: Option) { debug_assert!(self.level > 1); // Check if the child frame exists. let nxt_lvl_frame = { let idx = C::in_frame_index(self.va, self.level); - let child = { - let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); - &top_ptf.child[idx] - }; - if let Some(Child::PageTable(nxt_lvl_frame)) = child { + let child = self.cur_child(); + if let Child::PageTable(nxt_lvl_frame) = child { Some(nxt_lvl_frame.clone()) } else { None @@ -307,143 +463,89 @@ where // Create a new child frame if it does not exist. Sure it could be done only if // it is allowed to modify the page table. let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| { - let mut new_frame = PageTableFrame::::new(); - // If it already maps a huge page, we should split it. - let pte = unsafe { self.cur_pte_ptr().read() }; - if pte.is_valid() && pte.is_huge() { - let pa = pte.paddr(); - let prop = pte.info().prop; - for i in 0..C::NR_ENTRIES_PER_FRAME { - let nxt_level = self.level - 1; - let nxt_pte = { - let frame_addr = paddr_to_vaddr(new_frame.inner.start_paddr()); - &mut *(frame_addr as *mut E).add(i) - }; - *nxt_pte = E::new(pa + i * C::page_size(nxt_level), prop, nxt_level > 1, true); - } - new_frame.map_count = C::NR_ENTRIES_PER_FRAME; - unsafe { - self.cur_pte_ptr().write(E::new( - new_frame.inner.start_paddr(), - prop, - false, - false, - )) - } + // If it already maps an untyped huge page, we should split it. + if self.cur_child().is_untyped() { + let level = self.level; + let idx = self.cur_idx(); + self.cur_node_mut().split_untracked_huge(level, idx); + let Child::PageTable(nxt_lvl_frame) = self.cur_child() else { + unreachable!() + }; + nxt_lvl_frame.clone() + } else if self.cur_child().is_none() { + let new_frame = Arc::new(SpinLock::new(PageTableFrame::::new())); + let idx = self.cur_idx(); + self.cur_node_mut().set_child( + idx, + Child::PageTable(new_frame.clone()), + prop, + false, + ); + new_frame } else { - // The child couldn't be valid here because child is none and it's not huge. - debug_assert!(!pte.is_valid()); - unsafe { - self.cur_pte_ptr().write(E::new( - new_frame.inner.start_paddr(), - prop.unwrap(), - false, - false, - )) - } + panic!("Trying to level down when it is mapped to a typed frame"); } - let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] - .as_deref_mut() - .unwrap(); - top_ptf.map_count += 1; - let new_frame_ref = Arc::new(SpinLock::new(new_frame)); - top_ptf.child[C::in_frame_index(self.va, self.level)] = - Some(Child::PageTable(new_frame_ref.clone())); - new_frame_ref }); - self.lock_guard[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc()); - self.stack[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame); + self.guards[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc()); self.level -= 1; } - /// Map or unmap the page pointed to by the cursor (which could be large). - /// If the physical address and the map property are not provided, it unmaps - /// the current page. - /// - /// This method requires write locks acquired before calling it. - unsafe fn map_page(&mut self, option: MapOption) { - let pte_ptr = self.cur_pte_ptr(); - let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] - .as_deref_mut() - .unwrap(); - let child = { - let idx = C::in_frame_index(self.va, self.level); - if top_ptf.child[idx].is_some() { - top_ptf.child[idx] = None; - top_ptf.map_count -= 1; - } - &mut top_ptf.child[idx] - }; - match option { - MapOption::Map { frame, prop } => { - let pa = frame.start_paddr(); - unsafe { - pte_ptr.write(E::new(pa, prop, self.level > 1, true)); - } - *child = Some(Child::Frame(frame)); - top_ptf.map_count += 1; - } - MapOption::MapUntyped { pa, len, prop } => { - debug_assert_eq!(len, C::page_size(self.level)); - unsafe { - pte_ptr.write(E::new(pa, prop, self.level > 1, true)); - } - top_ptf.map_count += 1; - } - MapOption::Unmap { len } => { - debug_assert_eq!(len, C::page_size(self.level)); - unsafe { pte_ptr.write(E::new_invalid()) } - } - } + fn cur_node(&self) -> &ArcSpinLockGuard> { + self.guards[C::NR_LEVELS - self.level].as_ref().unwrap() } - fn acquire_locks(&mut self) { - for i in 0..=C::NR_LEVELS - self.level { - let Some(ref ptf) = self.stack[i] else { - panic!("Invalid values in PT cursor stack while acuqiring locks"); - }; - debug_assert!(self.lock_guard[i].is_none()); - self.lock_guard[i] = Some(ptf.lock_arc()); - } + fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard> { + self.guards[C::NR_LEVELS - self.level].as_mut().unwrap() } - fn release_locks(&mut self) { - for i in (0..=C::NR_LEVELS - self.level).rev() { - let Some(ref ptf) = self.stack[i] else { - panic!("Invalid values in PT cursor stack while releasing locks"); - }; - debug_assert!(self.lock_guard[i].is_some()); - self.lock_guard[i] = None; - } + fn cur_idx(&self) -> usize { + C::in_frame_index(self.va, self.level) + } + + fn cur_child(&self) -> &Child { + self.cur_node().child(self.cur_idx()) + } + + fn read_cur_pte_info(&self) -> MapInfo { + self.cur_node().read_pte_info(self.cur_idx()) } } -/// The iterator for querying over the page table without modifying it. -pub struct PageTableIter<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> +impl Drop + for CursorMut<'_, M, E, C> where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, { - cursor: PageTableCursor<'a, M, E, C>, - end_va: Vaddr, -} - -impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> - PageTableIter<'a, M, E, C> -where - [(); C::NR_ENTRIES_PER_FRAME]:, - [(); C::NR_LEVELS]:, -{ - pub(super) fn new(pt: &'a PageTable, va: &Range) -> Self { - Self { - cursor: PageTableCursor::new(pt, va.start), - end_va: va.end, + fn drop(&mut self) { + // Recycle what we can recycle now. + while self.level < self.guard_level { + self.level_up(); + } + // No need to do further cleanup if it is the root node or + // there are mappings left. + if self.level == self.guard_level || self.cur_node().nr_valid_children() != 0 { + return; + } + // Drop the lock on the guard level. + self.guards[C::NR_LEVELS - self.guard_level] = None; + // Re-walk the page table to retreive the locks. + self.guards[0] = Some(self.pt.root_frame.lock_arc()); + self.level = C::NR_LEVELS; + // Another cursor can unmap the guard level node before this cursor + // is dropped, we can just do our best here when re-walking. + while self.level > self.guard_level && self.cur_child().is_pt() { + self.level_down(None); + } + // Doing final cleanup by [`CursorMut::level_up`] to the root. + while self.level < C::NR_LEVELS { + self.level_up(); } } } #[derive(Clone, Debug)] -pub enum PageTableQueryResult { +pub(crate) enum PageTableQueryResult { NotMapped { va: Vaddr, len: usize, @@ -461,8 +563,32 @@ pub enum PageTableQueryResult { }, } +/// The read-only cursor for traversal over the page table. +/// +/// It implements the `Iterator` trait to provide a convenient way to query over the page table. +pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + inner: CursorMut<'a, M, E, C>, +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Cursor<'a, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(super) fn new( + pt: &'a PageTable, + va: &Range, + ) -> Result { + CursorMut::new(pt, va).map(|inner| Self { inner }) + } +} + impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator - for PageTableIter<'a, M, E, C> + for Cursor<'a, M, E, C> where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, @@ -470,63 +596,6 @@ where type Item = PageTableQueryResult; fn next(&mut self) -> Option { - self.cursor.acquire_locks(); - if self.cursor.va >= self.end_va { - return None; - } - loop { - let level = self.cursor.level; - let va = self.cursor.va; - let top_ptf = self.cursor.lock_guard[C::NR_LEVELS - level] - .as_ref() - .unwrap(); - let cur_pte = unsafe { self.cursor.cur_pte_ptr().read() }; - // Yeild if it's not a valid node. - if !cur_pte.is_valid() { - self.cursor.next_slot(); - self.cursor.release_locks(); - return Some(PageTableQueryResult::NotMapped { - va, - len: C::page_size(level), - }); - } - // Go down if it's not a last node. - if !(cur_pte.is_huge() || level == 1) { - debug_assert!(cur_pte.is_valid()); - // Safety: it's valid and there should be a child frame here. - unsafe { - self.cursor.level_down(None); - } - continue; - } - // Yield the current mapping. - let map_info = cur_pte.info(); - let idx = C::in_frame_index(self.cursor.va, self.cursor.level); - match top_ptf.child[idx] { - Some(Child::Frame(ref frame)) => { - let frame = frame.clone(); - self.cursor.next_slot(); - self.cursor.release_locks(); - return Some(PageTableQueryResult::Mapped { - va, - frame, - info: map_info, - }); - } - Some(Child::PageTable(_)) => { - panic!("The child couldn't be page table here because it's valid and not huge"); - } - None => { - self.cursor.next_slot(); - self.cursor.release_locks(); - return Some(PageTableQueryResult::MappedUntyped { - va, - pa: cur_pte.paddr(), - len: C::page_size(level), - info: map_info, - }); - } - } - } + self.inner.query() } } diff --git a/framework/aster-frame/src/vm/page_table/frame.rs b/framework/aster-frame/src/vm/page_table/frame.rs index 618ede097..ef3659434 100644 --- a/framework/aster-frame/src/vm/page_table/frame.rs +++ b/framework/aster-frame/src/vm/page_table/frame.rs @@ -2,10 +2,10 @@ use alloc::{boxed::Box, sync::Arc}; -use super::{PageTableConstsTrait, PageTableEntryTrait}; +use super::{MapInfo, MapProperty, PageTableConstsTrait, PageTableEntryTrait}; use crate::{ sync::SpinLock, - vm::{VmAllocOptions, VmFrame}, + vm::{Paddr, VmAllocOptions, VmFrame}, }; /// A page table frame. @@ -17,14 +17,12 @@ where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, { - pub inner: VmFrame, + inner: VmFrame, /// TODO: all the following fields can be removed if frame metadata is introduced. /// Here we allow 2x space overhead each frame temporarily. #[allow(clippy::type_complexity)] - pub child: Box<[Option>; C::NR_ENTRIES_PER_FRAME]>, - /// The number of mapped frames or page tables. - /// This is to track if we can free itself. - pub map_count: usize, + children: Box<[Child; C::NR_ENTRIES_PER_FRAME]>, + nr_valid_children: usize, } pub(super) type PtfRef = Arc>>; @@ -37,6 +35,47 @@ where { PageTable(PtfRef), Frame(VmFrame), + /// Frames not tracked by the frame allocator. + Untracked(Paddr), + None, +} + +impl Child +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(super) fn is_pt(&self) -> bool { + matches!(self, Child::PageTable(_)) + } + pub(super) fn is_frame(&self) -> bool { + matches!(self, Child::Frame(_)) + } + pub(super) fn is_none(&self) -> bool { + matches!(self, Child::None) + } + pub(super) fn is_some(&self) -> bool { + !self.is_none() + } + pub(super) fn is_untyped(&self) -> bool { + matches!(self, Child::Untracked(_)) + } + /// Is a last entry that maps to a physical address. + pub(super) fn is_last(&self) -> bool { + matches!(self, Child::Frame(_) | Child::Untracked(_)) + } + fn paddr(&self) -> Option { + match self { + Child::PageTable(node) => { + // Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`], + // and the cursor will not protect a node while holding the lock. + Some(node.lock().start_paddr()) + } + Child::Frame(frame) => Some(frame.start_paddr()), + Child::Untracked(pa) => Some(*pa), + Child::None => None, + } + } } impl Clone for Child @@ -49,6 +88,8 @@ where match self { Child::PageTable(ptf) => Child::PageTable(ptf.clone()), Child::Frame(frame) => Child::Frame(frame.clone()), + Child::Untracked(pa) => Child::Untracked(*pa), + Child::None => Child::None, } } } @@ -61,10 +102,130 @@ where pub(super) fn new() -> Self { Self { inner: VmAllocOptions::new(1).alloc_single().unwrap(), - child: Box::new(core::array::from_fn(|_| None)), - map_count: 0, + children: Box::new(core::array::from_fn(|_| Child::None)), + nr_valid_children: 0, } } + + pub(super) fn start_paddr(&self) -> Paddr { + self.inner.start_paddr() + } + + pub(super) fn child(&self, idx: usize) -> &Child { + debug_assert!(idx < C::NR_ENTRIES_PER_FRAME); + &self.children[idx] + } + + /// The number of mapped frames or page tables. + /// This is to track if we can free itself. + pub(super) fn nr_valid_children(&self) -> usize { + self.nr_valid_children + } + + /// Read the info from a page table entry at a given index. + pub(super) fn read_pte_info(&self, idx: usize) -> MapInfo { + self.read_pte(idx).info() + } + + /// Split the untracked huge page mapped at `idx` to smaller pages. + pub(super) fn split_untracked_huge(&mut self, cur_level: usize, idx: usize) { + debug_assert!(idx < C::NR_ENTRIES_PER_FRAME); + debug_assert!(cur_level > 1); + let Child::Untracked(pa) = self.children[idx] else { + panic!("split_untracked_huge: not an untyped huge page"); + }; + let info = self.read_pte_info(idx); + let mut new_frame = Self::new(); + for i in 0..C::NR_ENTRIES_PER_FRAME { + let small_pa = pa + i * C::page_size(cur_level - 1); + new_frame.set_child( + i, + Child::Untracked(small_pa), + Some(info.prop), + cur_level - 1 > 1, + ); + } + self.set_child( + idx, + Child::PageTable(Arc::new(SpinLock::new(new_frame))), + Some(info.prop), + false, + ); + } + + /// Map a child at a given index. + /// If mapping a non-none child, please give the property to map the child. + pub(super) fn set_child( + &mut self, + idx: usize, + child: Child, + prop: Option, + huge: bool, + ) { + assert!(idx < C::NR_ENTRIES_PER_FRAME); + // Safety: the index is within the bound and the PTE to be written is valid. + // And the physical address of PTE points to initialized memory. + // This applies to all the following `write_pte` invocations. + unsafe { + match &child { + Child::PageTable(node) => { + debug_assert!(!huge); + let frame = node.lock(); + self.write_pte( + idx, + E::new(frame.inner.start_paddr(), prop.unwrap(), false, false), + ); + self.nr_valid_children += 1; + } + Child::Frame(frame) => { + debug_assert!(!huge); // `VmFrame` currently can only be a regular page. + self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true)); + self.nr_valid_children += 1; + } + Child::Untracked(pa) => { + self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true)); + self.nr_valid_children += 1; + } + Child::None => { + self.write_pte(idx, E::new_invalid()); + } + } + } + if self.children[idx].is_some() { + self.nr_valid_children -= 1; + } + self.children[idx] = child; + } + + /// Protect an already mapped child at a given index. + pub(super) fn protect(&mut self, idx: usize, prop: MapProperty, level: usize) { + debug_assert!(self.children[idx].is_some()); + let paddr = self.children[idx].paddr().unwrap(); + // Safety: the index is within the bound and the PTE is valid. + unsafe { + self.write_pte( + idx, + E::new(paddr, prop, level > 1, self.children[idx].is_last()), + ); + } + } + + fn read_pte(&self, idx: usize) -> E { + assert!(idx < C::NR_ENTRIES_PER_FRAME); + // Safety: the index is within the bound and PTE is plain-old-data. + unsafe { (self.inner.as_ptr() as *const E).add(idx).read() } + } + + /// Write a page table entry at a given index. + /// + /// # Safety + /// + /// The caller must ensure that: + /// - the index is within bounds; + /// - the PTE is valid an the physical address in the PTE points to initialized memory. + unsafe fn write_pte(&mut self, idx: usize, pte: E) { + (self.inner.as_mut_ptr() as *mut E).add(idx).write(pte); + } } impl Clone for PageTableFrame @@ -77,13 +238,14 @@ where fn clone(&self) -> Self { let new_frame = VmAllocOptions::new(1).alloc_single().unwrap(); let new_ptr = new_frame.as_mut_ptr() as *mut E; - let ptr = self.inner.as_ptr() as *const E; - let child = Box::new(core::array::from_fn(|i| { - self.child[i].as_ref().map(|child| match child { - Child::PageTable(ptf) => unsafe { - let frame = ptf.lock(); + let children = Box::new(core::array::from_fn(|i| match self.child(i) { + Child::PageTable(node) => unsafe { + let frame = node.lock(); + // Possibly a cursor is waiting for the root lock to recycle this node. + // We can skip copying empty page table nodes. + if frame.nr_valid_children() != 0 { let cloned = frame.clone(); - let pte = ptr.add(i).read(); + let pte = self.read_pte(i); new_ptr.add(i).write(E::new( cloned.inner.start_paddr(), pte.info().prop, @@ -91,20 +253,22 @@ where false, )); Child::PageTable(Arc::new(SpinLock::new(cloned))) - }, - Child::Frame(frame) => { - unsafe { - let pte = ptr.add(i).read(); - new_ptr.add(i).write(pte); - } - Child::Frame(frame.clone()) + } else { + Child::None } - }) + }, + Child::Frame(_) | Child::Untracked(_) => { + unsafe { + new_ptr.add(i).write(self.read_pte(i)); + } + self.children[i].clone() + } + Child::None => Child::None, })); Self { inner: new_frame, - child, - map_count: self.map_count, + children, + nr_valid_children: self.nr_valid_children, } } } diff --git a/framework/aster-frame/src/vm/page_table/mod.rs b/framework/aster-frame/src/vm/page_table/mod.rs index bf87b43eb..32e1f3cf6 100644 --- a/framework/aster-frame/src/vm/page_table/mod.rs +++ b/framework/aster-frame/src/vm/page_table/mod.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::{boxed::Box, sync::Arc}; -use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range, panic}; +use alloc::sync::Arc; +use core::{fmt::Debug, marker::PhantomData, ops::Range, panic}; use crate::{ arch::mm::{activate_page_table, PageTableConsts, PageTableEntry}, sync::SpinLock, - vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrameVec, VmPerm}, + vm::{paddr_to_vaddr, Paddr, Vaddr, VmPerm}, }; mod properties; @@ -14,21 +14,20 @@ pub use properties::*; mod frame; use frame::*; mod cursor; -use cursor::*; -pub(crate) use cursor::{PageTableIter, PageTableQueryResult}; +pub(crate) use cursor::{Cursor, CursorMut, PageTableQueryResult}; #[cfg(ktest)] mod test; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum PageTableError { - InvalidVaddr(Vaddr), + /// The virtual address range is invalid. InvalidVaddrRange(Vaddr, Vaddr), - VaddrNotAligned(Vaddr), - VaddrRangeNotAligned(Vaddr, Vaddr), - PaddrNotAligned(Paddr), - PaddrRangeNotAligned(Vaddr, Vaddr), - // Protecting a mapping that does not exist. + /// Using virtual address not aligned. + UnalignedVaddr, + /// Protecting a mapping that does not exist. ProtectingInvalid, + /// Protecting a part of an already mapped page. + ProtectingPartial, } /// This is a compile-time technique to force the frame developers to distinguish @@ -38,8 +37,8 @@ pub trait PageTableMode: Clone + Debug + 'static { /// The range of virtual addresses that the page table can manage. const VADDR_RANGE: Range; - /// Check if the given range is within the valid virtual address range. - fn encloses(r: &Range) -> bool { + /// Check if the given range is covered by the valid virtual address range. + fn covers(r: &Range) -> bool { Self::VADDR_RANGE.start <= r.start && r.end <= Self::VADDR_RANGE.end } } @@ -78,61 +77,6 @@ where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, { - pub(crate) fn map_frames( - &self, - vaddr: Vaddr, - frames: VmFrameVec, - prop: MapProperty, - ) -> Result<(), PageTableError> { - if vaddr % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrNotAligned(vaddr)); - } - let va_range = vaddr - ..vaddr - .checked_add(frames.nbytes()) - .ok_or(PageTableError::InvalidVaddr(vaddr))?; - if !UserMode::encloses(&va_range) { - return Err(PageTableError::InvalidVaddrRange( - va_range.start, - va_range.end, - )); - } - // Safety: modification to the user page table is safe. - unsafe { - self.map_frames_unchecked(vaddr, frames, prop); - } - Ok(()) - } - - pub(crate) fn unmap(&self, vaddr: &Range) -> Result<(), PageTableError> { - if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end)); - } - if !UserMode::encloses(vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); - } - // Safety: modification to the user page table is safe. - unsafe { - self.unmap_unchecked(vaddr); - } - Ok(()) - } - - pub(crate) fn protect( - &self, - vaddr: &Range, - op: impl MapOp, - ) -> Result<(), PageTableError> { - if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end)); - } - if !UserMode::encloses(vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); - } - // Safety: modification to the user page table is safe. - unsafe { self.cursor(vaddr.start).protect(vaddr.len(), op, false) } - } - pub(crate) fn activate(&self) { // Safety: The usermode page table is safe to activate since the kernel // mappings are shared. @@ -141,55 +85,60 @@ where } } - /// Remove all write permissions from the user page table and mark the page - /// table as copy-on-write, and the create a handle to the new page table. + /// Remove all write permissions from the user page table and create a cloned + /// new page table. /// - /// That is, new page tables will be created when needed if a write operation - /// is performed on either of the user page table handles. Calling this function - /// performs no significant operations. + /// TODO: We may consider making the page table itself copy-on-write. pub(crate) fn fork_copy_on_write(&self) -> Self { + let mut cursor = self.cursor_mut(&UserMode::VADDR_RANGE).unwrap(); + // Safety: Protecting the user page table is safe. unsafe { - self.protect_unchecked(&UserMode::VADDR_RANGE, perm_op(|perm| perm & !VmPerm::W)); - } - // TODO: implement the copy-on-write mechanism. This is a simple workaround. - let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - let root_frame = self.root_frame.lock(); + cursor + .protect( + UserMode::VADDR_RANGE.len(), + perm_op(|perm| perm & !VmPerm::W), + true, + ) + .unwrap(); + }; + let root_frame = cursor.leak_root_guard().unwrap(); + let mut new_root_frame = PageTableFrame::::new(); let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; - let new_ptr = new_root_frame.as_mut_ptr() as *mut E; - let ptr = root_frame.inner.as_ptr() as *const E; - let child = Box::new(core::array::from_fn(|i| { - if i < half_of_entries { - // This is user space, deep copy the child. - root_frame.child[i].as_ref().map(|child| match child { - Child::PageTable(ptf) => unsafe { - let frame = ptf.lock(); + for i in 0..half_of_entries { + // This is user space, deep copy the child. + match root_frame.child(i) { + Child::PageTable(node) => { + let frame = node.lock(); + // Possibly a cursor is waiting for the root lock to recycle this node. + // We can skip copying empty page table nodes. + if frame.nr_valid_children() != 0 { let cloned = frame.clone(); - let pte = ptr.add(i).read(); - new_ptr.add(i).write(E::new( - cloned.inner.start_paddr(), - pte.info().prop, + let pt = Child::PageTable(Arc::new(SpinLock::new(cloned))); + new_root_frame.set_child( + i, + pt, + Some(root_frame.read_pte_info(i).prop), false, - false, - )); - Child::PageTable(Arc::new(SpinLock::new(cloned))) - }, - Child::Frame(_) => panic!("Unexpected frame child."), - }) - } else { - // This is kernel space, share the child. - unsafe { - let pte = ptr.add(i).read(); - new_ptr.add(i).write(pte); + ); + } + } + Child::None => {} + Child::Frame(_) | Child::Untracked(_) => { + panic!("Unexpected map child."); } - root_frame.child[i].clone() } - })); + } + for i in half_of_entries..C::NR_ENTRIES_PER_FRAME { + // This is kernel space, share the child. + new_root_frame.set_child( + i, + root_frame.child(i).clone(), + Some(root_frame.read_pte_info(i).prop), + false, + ) + } PageTable:: { - root_frame: Arc::new(SpinLock::new(PageTableFrame:: { - inner: new_root_frame, - child, - map_count: root_frame.map_count, - })), + root_frame: Arc::new(SpinLock::new(new_root_frame)), _phantom: PhantomData, } } @@ -208,17 +157,18 @@ where /// Then, one can use a user page table to call [`fork_copy_on_write`], creating /// other child page tables. pub(crate) fn create_user_page_table(&self) -> PageTable { - let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let mut new_root_frame = PageTableFrame::::new(); let root_frame = self.root_frame.lock(); - let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; - new_root_frame.copy_from_frame(&root_frame.inner); - let child = Box::new(core::array::from_fn(|i| root_frame.child[i].clone())); + for i in C::NR_ENTRIES_PER_FRAME / 2..C::NR_ENTRIES_PER_FRAME { + new_root_frame.set_child( + i, + root_frame.child(i).clone(), + Some(root_frame.read_pte_info(i).prop), + false, + ) + } PageTable:: { - root_frame: Arc::new(SpinLock::new(PageTableFrame:: { - inner: new_root_frame, - child, - map_count: root_frame.map_count, - })), + root_frame: Arc::new(SpinLock::new(new_root_frame)), _phantom: PhantomData, } } @@ -230,30 +180,26 @@ where /// instead of the virtual address range. pub(crate) fn make_shared_tables(&self, root_index: Range) { let start = root_index.start; - assert!(start < C::NR_ENTRIES_PER_FRAME); + debug_assert!(start >= C::NR_ENTRIES_PER_FRAME / 2); + debug_assert!(start < C::NR_ENTRIES_PER_FRAME); let end = root_index.end; - assert!(end <= C::NR_ENTRIES_PER_FRAME); + debug_assert!(end <= C::NR_ENTRIES_PER_FRAME); let mut root_frame = self.root_frame.lock(); for i in start..end { - let no_such_child = root_frame.child[i].is_none(); + let no_such_child = root_frame.child(i).is_none(); if no_such_child { - let frame = PageTableFrame::::new(); - let pte_ptr = (root_frame.inner.start_paddr() + i * size_of::()) as *mut E; - unsafe { - pte_ptr.write(E::new( - frame.inner.start_paddr(), - MapProperty { - perm: VmPerm::RWX, - global: true, - extension: 0, - cache: CachePolicy::Uncacheable, - }, - false, - false, - )); - } - root_frame.child[i] = Some(Child::PageTable(Arc::new(SpinLock::new(frame)))); - root_frame.map_count += 1; + let frame = Arc::new(SpinLock::new(PageTableFrame::::new())); + root_frame.set_child( + i, + Child::PageTable(frame), + Some(MapProperty { + perm: VmPerm::RWX, + global: true, + extension: 0, + cache: CachePolicy::Uncacheable, + }), + false, + ) } } } @@ -274,60 +220,40 @@ where /// The physical address of the root page table. pub(crate) fn root_paddr(&self) -> Paddr { - self.root_frame.lock().inner.start_paddr() + self.root_frame.lock().start_paddr() } - pub(crate) unsafe fn map_frames_unchecked( - &self, - vaddr: Vaddr, - frames: VmFrameVec, - prop: MapProperty, - ) { - let mut cursor = self.cursor(vaddr); - for frame in frames.into_iter() { - cursor.map(MapOption::Map { frame, prop }); - } - } - - pub(crate) unsafe fn map_unchecked( + pub(crate) unsafe fn map( &self, vaddr: &Range, paddr: &Range, prop: MapProperty, - ) { - self.cursor(vaddr.start).map(MapOption::MapUntyped { - pa: paddr.start, - len: vaddr.len(), - prop, - }); + ) -> Result<(), PageTableError> { + self.cursor_mut(vaddr)?.map_pa(paddr, prop); + Ok(()) } - pub(crate) unsafe fn unmap_unchecked(&self, vaddr: &Range) { - self.cursor(vaddr.start) - .map(MapOption::Unmap { len: vaddr.len() }); + pub(crate) unsafe fn unmap(&self, vaddr: &Range) -> Result<(), PageTableError> { + self.cursor_mut(vaddr)?.unmap(vaddr.len()); + Ok(()) } - pub(crate) unsafe fn protect_unchecked(&self, vaddr: &Range, op: impl MapOp) { - self.cursor(vaddr.start) + pub(crate) unsafe fn protect( + &self, + vaddr: &Range, + op: impl MapOp, + ) -> Result<(), PageTableError> { + self.cursor_mut(vaddr)? .protect(vaddr.len(), op, true) .unwrap(); - } - - /// Query about the mappings of a range of virtual addresses. - pub(crate) fn query_range( - &'a self, - vaddr: &Range, - ) -> Result, PageTableError> { - if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); - } - if !M::encloses(vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); - } - Ok(PageTableIter::new(self, vaddr)) + Ok(()) } /// Query about the mapping of a single byte at the given virtual address. + /// + /// Note that this function may fail reflect an accurate result if there are + /// cursors concurrently accessing the same virtual address range, just like what + /// happens for the hardware MMU walk. pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, MapInfo)> { // Safety: The root frame is a valid page table frame so the address is valid. unsafe { page_walk::(self.root_paddr(), vaddr) } @@ -337,10 +263,26 @@ where activate_page_table(self.root_paddr(), CachePolicy::Writeback); } - /// Create a new mutating cursor for the page table. - /// The cursor is initialized atthe given virtual address. - fn cursor(&self, va: usize) -> PageTableCursor<'a, M, E, C> { - PageTableCursor::new(self, va) + /// Create a new cursor exclusively accessing the virtual address range for mapping. + /// + /// If another cursor is already accessing the range, the new cursor will wait until the + /// previous cursor is dropped. + pub(crate) fn cursor_mut( + &'a self, + va: &Range, + ) -> Result, PageTableError> { + CursorMut::new(self, va) + } + + /// Create a new cursor exclusively accessing the virtual address range for querying. + /// + /// If another cursor is already accessing the range, the new cursor will wait until the + /// previous cursor is dropped. + pub(crate) fn cursor( + &'a self, + va: &Range, + ) -> Result, PageTableError> { + Cursor::new(self, va) } /// Create a new reference to the same page table. diff --git a/framework/aster-frame/src/vm/page_table/test.rs b/framework/aster-frame/src/vm/page_table/test.rs index 99a74a909..20a1ecc5d 100644 --- a/framework/aster-frame/src/vm/page_table/test.rs +++ b/framework/aster-frame/src/vm/page_table/test.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 use super::*; -use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm}; +use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm, VmAllocOptions}; const PAGE_SIZE: usize = 4096; @@ -12,47 +12,37 @@ fn test_range_check() { let bad_va = 0..PAGE_SIZE + 1; let bad_va2 = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + PAGE_SIZE; let to = VmAllocOptions::new(1).alloc().unwrap(); - assert!(pt.query_range(&good_va).is_ok()); - assert!(pt.query_range(&bad_va).is_err()); - assert!(pt.query_range(&bad_va2).is_err()); - assert!(pt.unmap(&good_va).is_ok()); - assert!(pt.unmap(&bad_va).is_err()); - assert!(pt.unmap(&bad_va2).is_err()); - assert!(pt - .map_frames( - good_va.start, - to.clone(), - MapProperty::new_general(VmPerm::R) - ) - .is_ok()); - assert!(pt - .map_frames(bad_va2.start, to.clone(), MapProperty::new_invalid()) - .is_err()); + assert!(pt.cursor_mut(&good_va).is_ok()); + assert!(pt.cursor_mut(&bad_va).is_err()); + assert!(pt.cursor_mut(&bad_va2).is_err()); + assert!(unsafe { pt.unmap(&good_va) }.is_ok()); + assert!(unsafe { pt.unmap(&bad_va) }.is_err()); + assert!(unsafe { pt.unmap(&bad_va2) }.is_err()); } #[ktest] fn test_map_unmap() { let pt = PageTable::::empty(); let from = PAGE_SIZE..PAGE_SIZE * 2; - let frames = VmAllocOptions::new(1).alloc().unwrap(); - let start_paddr = frames.get(0).unwrap().start_paddr(); + let frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let start_paddr = frame.start_paddr(); let prop = MapProperty::new_general(VmPerm::RW); - pt.map_frames(from.start, frames.clone(), prop).unwrap(); + unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) }; assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); - pt.unmap(&from).unwrap(); + unsafe { pt.unmap(&from).unwrap() }; assert!(pt.query(from.start + 10).is_none()); let from_ppn = 13245..512 * 512 + 23456; let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; - unsafe { pt.map_unchecked(&from, &to, prop) }; + unsafe { pt.map(&from, &to, prop).unwrap() }; for i in 0..100 { let offset = i * (PAGE_SIZE + 1000); assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); } let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434; - pt.unmap(&unmap).unwrap(); + unsafe { pt.unmap(&unmap).unwrap() }; for i in 0..100 { let offset = i * (PAGE_SIZE + 10); if unmap.start <= from.start + offset && from.start + offset < unmap.end { @@ -67,20 +57,20 @@ fn test_map_unmap() { fn test_user_copy_on_write() { let pt = PageTable::::empty(); let from = PAGE_SIZE..PAGE_SIZE * 2; - let frames = VmAllocOptions::new(1).alloc().unwrap(); - let start_paddr = frames.get(0).unwrap().start_paddr(); + let frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let start_paddr = frame.start_paddr(); let prop = MapProperty::new_general(VmPerm::RW); - pt.map_frames(from.start, frames.clone(), prop).unwrap(); + unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) }; assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); - pt.unmap(&from).unwrap(); + unsafe { pt.unmap(&from).unwrap() }; assert!(pt.query(from.start + 10).is_none()); - pt.map_frames(from.start, frames.clone(), prop).unwrap(); + unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) }; assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); let child_pt = pt.fork_copy_on_write(); assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); - pt.unmap(&from).unwrap(); + unsafe { pt.unmap(&from).unwrap() }; assert!(pt.query(from.start + 10).is_none()); assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); } @@ -98,26 +88,26 @@ impl PageTableConstsTrait for BasePageTableConsts { } #[ktest] -fn test_base_protect_query_range() { +fn test_base_protect_query() { let pt = PageTable::::empty(); let from_ppn = 1..1000; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999; let prop = MapProperty::new_general(VmPerm::RW); - unsafe { pt.map_unchecked(&from, &to, prop) }; - for (qr, i) in pt.query_range(&from).unwrap().zip(from_ppn) { + unsafe { pt.map(&from, &to, prop).unwrap() }; + for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.cache, CachePolicy::Writeback); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } let prot = PAGE_SIZE * 18..PAGE_SIZE * 20; - pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap(); - for (qr, i) in pt.query_range(&prot).unwrap().zip(18..20) { + unsafe { pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap() }; + for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::R); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); @@ -135,7 +125,7 @@ impl PageTableConstsTrait for VeryHugePageTableConsts { } #[ktest] -fn test_large_protect_query_range() { +fn test_large_protect_query() { let pt = PageTable::::empty(); let gmult = 512 * 512; let from_ppn = gmult - 512..gmult + gmult + 514; @@ -148,10 +138,10 @@ fn test_large_protect_query_range() { let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; let prop = MapProperty::new_general(VmPerm::RW); - unsafe { pt.map_unchecked(&from, &to, prop) }; - for (qr, i) in pt.query_range(&from).unwrap().zip(0..512 + 2 + 2) { + unsafe { pt.map(&from, &to, prop).unwrap() }; + for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.cache, CachePolicy::Writeback); @@ -171,32 +161,32 @@ fn test_large_protect_query_range() { } let ppn = from_ppn.start + 18..from_ppn.start + 20; let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end; - pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap(); + unsafe { pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap() }; for (qr, i) in pt - .query_range(&(va.start - PAGE_SIZE..va.start)) + .cursor(&(va.start - PAGE_SIZE..va.start)) .unwrap() .zip(ppn.start - 1..ppn.start) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } - for (qr, i) in pt.query_range(&va).unwrap().zip(ppn.clone()) { + for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::R); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } for (qr, i) in pt - .query_range(&(va.end..va.end + PAGE_SIZE)) + .cursor(&(va.end..va.end + PAGE_SIZE)) .unwrap() .zip(ppn.end..ppn.end + 1) { let Qr::MappedUntyped { va, pa, len, info } = qr else { - panic!("Expected MappedUntyped, got {:?}", qr); + panic!("Expected MappedUntyped, got {:#x?}", qr); }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); diff --git a/framework/aster-frame/src/vm/space.rs b/framework/aster-frame/src/vm/space.rs index dbeb174db..29d9681d3 100644 --- a/framework/aster-frame/src/vm/space.rs +++ b/framework/aster-frame/src/vm/space.rs @@ -9,7 +9,8 @@ use super::{ is_page_aligned, kspace::KERNEL_PAGE_TABLE, page_table::{ - MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableQueryResult as PtQr, UserMode, + MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableMode, + PageTableQueryResult as PtQr, PageTableQueryResult, UserMode, }, VmFrameVec, VmIo, PAGE_SIZE, }; @@ -17,7 +18,7 @@ use crate::{ arch::mm::{PageTableConsts, PageTableEntry}, prelude::*, vm::{ - page_table::{CachePolicy, MapProperty, PageTableIter}, + page_table::{CachePolicy, Cursor, MapProperty}, VmFrame, MAX_USERSPACE_VADDR, }, Error, @@ -63,27 +64,44 @@ impl VmSpace { } let addr = options.addr.unwrap(); + + if addr % PAGE_SIZE != 0 { + return Err(Error::InvalidArgs); + } + let size = frames.nbytes(); + let end = addr.checked_add(size).ok_or(Error::InvalidArgs)?; + + let va_range = addr..end; + if !UserMode::covers(&va_range) { + return Err(Error::InvalidArgs); + } + + let mut cursor = self.pt.cursor_mut(&va_range)?; // If overwrite is forbidden, we should check if there are existing mappings if !options.can_overwrite { - let end = addr.checked_add(size).ok_or(Error::Overflow)?; - for qr in self.query_range(&(addr..end)).unwrap() { - if matches!(qr, VmQueryResult::Mapped { .. }) { + while let Some(qr) = cursor.query() { + if matches!(qr, PageTableQueryResult::Mapped { .. }) { return Err(Error::MapAlreadyMappedVaddr); } } + cursor.jump(va_range.start); + } + + let prop = MapProperty { + perm: options.perm, + global: false, + extension: 0, + cache: CachePolicy::Writeback, + }; + + for frame in frames.into_iter() { + // Safety: mapping in the user space with `VmFrame` is safe. + unsafe { + cursor.map(frame, prop); + } } - self.pt.map_frames( - addr, - frames, - MapProperty { - perm: options.perm, - global: false, - extension: 0, - cache: CachePolicy::Writeback, - }, - )?; Ok(addr) } @@ -93,7 +111,7 @@ impl VmSpace { /// each parts of the range. pub fn query_range(&self, range: &Range) -> Result { Ok(VmQueryIter { - inner: self.pt.query_range(range)?, + cursor: self.pt.cursor(range)?, }) } @@ -112,8 +130,16 @@ impl VmSpace { /// The range is allowed to contain gaps, where no physical memory pages /// are mapped. pub fn unmap(&self, range: &Range) -> Result<()> { - assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); - self.pt.unmap(range)?; + if !is_page_aligned(range.start) || !is_page_aligned(range.end) { + return Err(Error::InvalidArgs); + } + if !UserMode::covers(range) { + return Err(Error::InvalidArgs); + } + // Safety: unmapping in the user space is safe. + unsafe { + self.pt.unmap(range)?; + } Ok(()) } @@ -122,7 +148,7 @@ impl VmSpace { // Safety: unmapping user space is safe, and we don't care unmapping // invalid ranges. unsafe { - self.pt.unmap_unchecked(&(0..MAX_USERSPACE_VADDR)); + self.pt.unmap(&(0..MAX_USERSPACE_VADDR)).unwrap(); } #[cfg(target_arch = "x86_64")] x86_64::instructions::tlb::flush_all(); @@ -138,8 +164,16 @@ impl VmSpace { /// partial huge page happens, and efforts are not reverted, leaving us /// in a bad state. pub fn protect(&self, range: &Range, op: impl MapOp) -> Result<()> { - assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); - self.pt.protect(range, op)?; + if !is_page_aligned(range.start) || !is_page_aligned(range.end) { + return Err(Error::InvalidArgs); + } + if !UserMode::covers(range) { + return Err(Error::InvalidArgs); + } + // Safety: protecting in the user space is safe. + unsafe { + self.pt.protect(range, op)?; + } Ok(()) } @@ -306,7 +340,7 @@ impl TryFrom for VmPerm { /// The iterator for querying over the VM space without modifying it. pub struct VmQueryIter<'a> { - inner: PageTableIter<'a, UserMode, PageTableEntry, PageTableConsts>, + cursor: Cursor<'a, UserMode, PageTableEntry, PageTableConsts>, } pub enum VmQueryResult { @@ -325,11 +359,11 @@ impl Iterator for VmQueryIter<'_> { type Item = VmQueryResult; fn next(&mut self) -> Option { - self.inner.next().map(|ptqr| match ptqr { + self.cursor.next().map(|ptqr| match ptqr { PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len }, PtQr::Mapped { va, frame, info } => VmQueryResult::Mapped { va, frame, info }, // It is not possible to map untyped memory in user space. - PtQr::MappedUntyped { va, pa, len, info } => unreachable!(), + PtQr::MappedUntyped { .. } => unreachable!(), }) } }