diff --git a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs index bb02355a7..63e3e9bd3 100644 --- a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs +++ b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs @@ -14,7 +14,7 @@ use crate::vm::{ /// The page table used by iommu maps the device address /// space to the physical address space. -#[derive(Clone)] +#[derive(Clone, Debug)] pub(super) struct DeviceMode {} impl PageTableMode for DeviceMode { diff --git a/framework/aster-frame/src/arch/x86/tdx_guest.rs b/framework/aster-frame/src/arch/x86/tdx_guest.rs index 74891f9d7..3cdf7cd25 100644 --- a/framework/aster-frame/src/arch/x86/tdx_guest.rs +++ b/framework/aster-frame/src/arch/x86/tdx_guest.rs @@ -415,7 +415,7 @@ pub unsafe fn unprotect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Pa warn!("Misaligned address: {:x}", gpa); } let vaddr = paddr_to_vaddr(gpa); - let mut pt = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let pt = KERNEL_PAGE_TABLE.get().unwrap(); unsafe { pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { perm: info.prop.perm, @@ -451,7 +451,7 @@ pub unsafe fn protect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Page warn!("Misaligned address: {:x}", gpa); } let vaddr = paddr_to_vaddr(gpa); - let mut pt = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let pt = KERNEL_PAGE_TABLE.get().unwrap(); unsafe { pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { perm: info.prop.perm, diff --git a/framework/aster-frame/src/error.rs b/framework/aster-frame/src/error.rs index fbe121096..eeef63875 100644 --- a/framework/aster-frame/src/error.rs +++ b/framework/aster-frame/src/error.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 +use crate::vm::page_table::PageTableError; + /// The error type which is returned from the APIs of this crate. #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Error { @@ -10,4 +12,11 @@ pub enum Error { IoError, NotEnoughResources, Overflow, + MapAlreadyMappedVaddr, +} + +impl From for Error { + fn from(_err: PageTableError) -> Error { + Error::AccessDenied + } } diff --git a/framework/aster-frame/src/lib.rs b/framework/aster-frame/src/lib.rs index b6d021d3a..c413f871d 100644 --- a/framework/aster-frame/src/lib.rs +++ b/framework/aster-frame/src/lib.rs @@ -79,7 +79,6 @@ pub fn init() { vm::kspace::KERNEL_PAGE_TABLE .get() .unwrap() - .lock() .activate_unchecked(); } invoke_ffi_init_funcs(); diff --git a/framework/aster-frame/src/task/task.rs b/framework/aster-frame/src/task/task.rs index 8c173a4e9..a3d548d3c 100644 --- a/framework/aster-frame/src/task/task.rs +++ b/framework/aster-frame/src/task/task.rs @@ -64,19 +64,17 @@ impl KernelStack { let stack_segment = VmAllocOptions::new(KERNEL_STACK_SIZE / PAGE_SIZE + 1).alloc_contiguous()?; // FIXME: modifying the the linear mapping is bad. - let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let guard_page_vaddr = { let guard_page_paddr = stack_segment.start_paddr(); crate::vm::paddr_to_vaddr(guard_page_paddr) }; // Safety: the physical guard page address is exclusively used since we allocated it. unsafe { - page_table - .protect_unchecked( - &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), - perm_op(|p| p - VmPerm::RW), - ) - .unwrap(); + page_table.protect_unchecked( + &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), + perm_op(|p| p - VmPerm::RW), + ); } Ok(Self { segment: stack_segment, @@ -93,19 +91,17 @@ impl Drop for KernelStack { fn drop(&mut self) { if self.has_guard_page { // FIXME: modifying the the linear mapping is bad. - let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let guard_page_vaddr = { let guard_page_paddr = self.segment.start_paddr(); crate::vm::paddr_to_vaddr(guard_page_paddr) }; // Safety: the physical guard page address is exclusively used since we allocated it. unsafe { - page_table - .protect_unchecked( - &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), - perm_op(|p| p | VmPerm::RW), - ) - .unwrap(); + page_table.protect_unchecked( + &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), + perm_op(|p| p | VmPerm::RW), + ); } } } diff --git a/framework/aster-frame/src/trap/handler.rs b/framework/aster-frame/src/trap/handler.rs index d30952353..0c4407654 100644 --- a/framework/aster-frame/src/trap/handler.rs +++ b/framework/aster-frame/src/trap/handler.rs @@ -214,10 +214,9 @@ fn handle_kernel_page_fault(f: &TrapFrame) { ); // Do the mapping - let mut page_table = KERNEL_PAGE_TABLE + let page_table = KERNEL_PAGE_TABLE .get() - .expect("The kernel page table is not initialized when kernel page fault happens") - .lock(); + .expect("The kernel page table is not initialized when kernel page fault happens"); let vaddr = (page_fault_vaddr as usize).align_down(PAGE_SIZE); let paddr = vaddr - LINEAR_MAPPING_BASE_VADDR; diff --git a/framework/aster-frame/src/user.rs b/framework/aster-frame/src/user.rs index be122f0d3..49fc78edb 100644 --- a/framework/aster-frame/src/user.rs +++ b/framework/aster-frame/src/user.rs @@ -12,7 +12,7 @@ use crate::{cpu::UserContext, prelude::*, task::Task, vm::VmSpace}; /// user mode. pub struct UserSpace { /// vm space - vm_space: VmSpace, + vm_space: Arc, /// cpu context before entering user space init_ctx: UserContext, } @@ -22,7 +22,7 @@ impl UserSpace { /// /// Each instance maintains a VM address space and the CPU state to enable /// execution in the user space. - pub fn new(vm_space: VmSpace, init_ctx: UserContext) -> Self { + pub fn new(vm_space: Arc, init_ctx: UserContext) -> Self { Self { vm_space, init_ctx } } diff --git a/framework/aster-frame/src/vm/dma/dma_coherent.rs b/framework/aster-frame/src/vm/dma/dma_coherent.rs index 4a21d7e7d..a73bdeaf2 100644 --- a/framework/aster-frame/src/vm/dma/dma_coherent.rs +++ b/framework/aster-frame/src/vm/dma/dma_coherent.rs @@ -56,14 +56,12 @@ impl DmaCoherent { // Ensure that the addresses used later will not overflow start_paddr.checked_add(frame_count * PAGE_SIZE).unwrap(); if !is_cache_coherent { - let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let vaddr = paddr_to_vaddr(start_paddr); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); // Safety: the address is in the range of `vm_segment`. unsafe { - page_table - .protect_unchecked(&va_range, cache_policy_op(CachePolicy::Uncacheable)) - .unwrap(); + page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Uncacheable)); } } let start_daddr = match dma_type() { @@ -143,14 +141,12 @@ impl Drop for DmaCoherentInner { } } if !self.is_cache_coherent { - let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let vaddr = paddr_to_vaddr(start_paddr); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); // Safety: the address is in the range of `vm_segment`. unsafe { - page_table - .protect_unchecked(&va_range, cache_policy_op(CachePolicy::Writeback)) - .unwrap(); + page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Writeback)); } } remove_dma_mapping(start_paddr, frame_count); @@ -210,19 +206,9 @@ mod test { .unwrap(); let dma_coherent = DmaCoherent::map(vm_segment.clone(), false).unwrap(); assert!(dma_coherent.paddr() == vm_segment.paddr()); - let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let vaddr = paddr_to_vaddr(vm_segment.paddr()); - assert!( - page_table - .query(&(vaddr..vaddr + PAGE_SIZE)) - .unwrap() - .next() - .unwrap() - .info - .prop - .cache - == CachePolicy::Uncacheable - ); + assert!(page_table.query(vaddr).unwrap().1.prop.cache == CachePolicy::Uncacheable); } #[ktest] diff --git a/framework/aster-frame/src/vm/kspace.rs b/framework/aster-frame/src/vm/kspace.rs index 62df59224..e3e1d06d8 100644 --- a/framework/aster-frame/src/vm/kspace.rs +++ b/framework/aster-frame/src/vm/kspace.rs @@ -8,7 +8,6 @@ use spin::Once; use super::page_table::PageTableConstsTrait; use crate::{ arch::mm::{PageTableConsts, PageTableEntry}, - sync::SpinLock, vm::{ page_table::{page_walk, CachePolicy, KernelMode, MapProperty, PageTable}, space::VmPerm, @@ -36,7 +35,7 @@ pub fn vaddr_to_paddr(va: Vaddr) -> Option { } else { let root_paddr = crate::arch::mm::current_page_table_paddr(); // Safety: the root page table is valid since we read it from the register. - unsafe { page_walk::(root_paddr, va) } + unsafe { page_walk::(root_paddr, va).map(|(pa, _)| pa) } } } @@ -45,9 +44,8 @@ pub(crate) fn paddr_to_vaddr(pa: Paddr) -> usize { pa + LINEAR_MAPPING_BASE_VADDR } -pub static KERNEL_PAGE_TABLE: Once< - SpinLock>, -> = Once::new(); +pub static KERNEL_PAGE_TABLE: Once> = + Once::new(); /// Initialize the kernel page table. /// @@ -58,7 +56,7 @@ pub static KERNEL_PAGE_TABLE: Once< /// This function should be called before: /// - any initializer that modifies the kernel page table. pub fn init_kernel_page_table() { - let mut kpt = PageTable::::empty(); + let kpt = PageTable::::empty(); kpt.make_shared_tables( PageTableConsts::NR_ENTRIES_PER_FRAME / 2..PageTableConsts::NR_ENTRIES_PER_FRAME, ); @@ -118,5 +116,5 @@ pub fn init_kernel_page_table() { unsafe { kpt.map_unchecked(&from, &to, prop); } - KERNEL_PAGE_TABLE.call_once(|| SpinLock::new(kpt)); + KERNEL_PAGE_TABLE.call_once(|| kpt); } diff --git a/framework/aster-frame/src/vm/memory_set.rs b/framework/aster-frame/src/vm/memory_set.rs deleted file mode 100644 index 246c09095..000000000 --- a/framework/aster-frame/src/vm/memory_set.rs +++ /dev/null @@ -1,282 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -use alloc::collections::{btree_map::Entry, BTreeMap}; -use core::fmt; - -use align_ext::AlignExt; - -use super::{ - kspace::KERNEL_PAGE_TABLE, - page_table::{MapInfo, MapOp, MapProperty, PageTable, UserMode}, -}; -use crate::{ - prelude::*, - vm::{ - is_page_aligned, page_table::MapStatus, VmAllocOptions, VmFrame, VmFrameVec, VmPerm, - VmReader, VmWriter, PAGE_SIZE, - }, - Error, -}; - -#[derive(Debug, Clone)] -pub struct MapArea { - pub info: MapInfo, - pub start_va: Vaddr, - pub size: usize, - pub mapper: BTreeMap, -} - -pub struct MemorySet { - pub pt: PageTable, - /// all the map area, sort by the start virtual address - areas: BTreeMap, -} - -impl MapArea { - pub fn mapped_size(&self) -> usize { - self.size - } - - /// This function will map the vitural address to the given physical frames - pub fn new( - start_va: Vaddr, - size: usize, - prop: MapProperty, - physical_frames: VmFrameVec, - ) -> Self { - assert!( - is_page_aligned(start_va) - && is_page_aligned(size) - && physical_frames.len() == (size / PAGE_SIZE) - ); - - let mut map_area = Self { - info: MapInfo { - prop, - status: MapStatus::empty(), - }, - start_va, - size, - mapper: BTreeMap::new(), - }; - let mut current_va = start_va; - let page_size = size / PAGE_SIZE; - let mut phy_frame_iter = physical_frames.iter(); - - for _ in 0..page_size { - let vm_frame = phy_frame_iter.next().unwrap(); - map_area.map_with_physical_address(current_va, vm_frame.clone()); - current_va += PAGE_SIZE; - } - - map_area - } - - pub fn map_with_physical_address(&mut self, va: Vaddr, pa: VmFrame) -> Paddr { - assert!(is_page_aligned(va)); - - match self.mapper.entry(va) { - Entry::Occupied(e) => panic!("already mapped a input physical address"), - Entry::Vacant(e) => e.insert(pa).start_paddr(), - } - } - - pub fn map(&mut self, va: Vaddr) -> Paddr { - assert!(is_page_aligned(va)); - - match self.mapper.entry(va) { - Entry::Occupied(e) => e.get().start_paddr(), - Entry::Vacant(e) => e - .insert(VmAllocOptions::new(1).alloc_single().unwrap()) - .start_paddr(), - } - } - - pub fn unmap(&mut self, va: Vaddr) -> Option { - self.mapper.remove(&va) - } - - pub fn write_data(&mut self, addr: usize, data: &[u8]) { - let mut current_start_address = addr; - let mut buf_reader: VmReader = data.into(); - for (va, pa) in self.mapper.iter() { - if current_start_address >= *va && current_start_address < va + PAGE_SIZE { - let offset = current_start_address - va; - let _ = pa.writer().skip(offset).write(&mut buf_reader); - if !buf_reader.has_remain() { - return; - } - current_start_address = va + PAGE_SIZE; - } - } - } - - pub fn read_data(&self, addr: usize, data: &mut [u8]) { - let mut start = addr; - let mut buf_writer: VmWriter = data.into(); - for (va, pa) in self.mapper.iter() { - if start >= *va && start < va + PAGE_SIZE { - let offset = start - va; - let _ = pa.reader().skip(offset).read(&mut buf_writer); - if !buf_writer.has_avail() { - return; - } - start = va + PAGE_SIZE; - } - } - } -} - -impl Default for MemorySet { - fn default() -> Self { - Self::new() - } -} - -impl MemorySet { - pub fn map(&mut self, area: MapArea) { - if area.size > 0 { - // TODO: check overlap - if let Entry::Vacant(e) = self.areas.entry(area.start_va) { - let area = e.insert(area); - for (va, frame) in area.mapper.iter() { - self.pt.map_frame(*va, frame, area.info.prop).unwrap(); - } - } else { - panic!( - "MemorySet::map: MapArea starts from {:#x?} is existed!", - area.start_va - ); - } - } - } - - /// Determine whether a Vaddr is in a mapped area - pub fn is_mapped(&self, vaddr: Vaddr) -> bool { - let vaddr = vaddr.align_down(PAGE_SIZE); - self.pt - .query(&(vaddr..vaddr + PAGE_SIZE)) - .map(|mut i| i.next().is_some()) - .unwrap_or(false) - } - - /// Return the information of the PTE for the target virtual memory address. - pub fn info(&self, vaddr: Vaddr) -> Option { - let vaddr = vaddr.align_down(PAGE_SIZE); - self.pt - .query(&(vaddr..vaddr + PAGE_SIZE)) - .map(|mut i| i.next().unwrap().info) - .ok() - } - - pub fn new() -> Self { - let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock().fork(); - Self { - pt: page_table, - areas: BTreeMap::new(), - } - } - - pub fn unmap(&mut self, va: Vaddr) -> Result<()> { - if let Some(area) = self.areas.remove(&va) { - for (va, _) in area.mapper.iter() { - self.pt.unmap(&(*va..*va + PAGE_SIZE)).unwrap(); - } - Ok(()) - } else { - Err(Error::PageFault) - } - } - - pub fn clear(&mut self) { - for area in self.areas.values_mut() { - for (va, _) in area.mapper.iter() { - self.pt.unmap(&(*va..*va + PAGE_SIZE)).unwrap(); - } - } - self.areas.clear(); - } - - pub fn write_bytes(&mut self, addr: usize, data: &[u8]) -> Result<()> { - let mut current_addr = addr; - let mut remain = data.len(); - let start_write = false; - let mut offset = 0usize; - for (va, area) in self.areas.iter_mut() { - if current_addr >= *va && current_addr < area.size + va { - if !area.info.prop.perm.contains(VmPerm::W) { - return Err(Error::PageFault); - } - let write_len = remain.min(area.size + va - current_addr); - area.write_data(current_addr, &data[offset..(offset + write_len)]); - offset += write_len; - remain -= write_len; - // remain -= (va.0 + area.size - current_addr).min(remain); - if remain == 0 { - return Ok(()); - } - current_addr = va + area.size; - } else if start_write { - return Err(Error::PageFault); - } - } - Err(Error::PageFault) - } - - pub fn read_bytes(&self, addr: usize, data: &mut [u8]) -> Result<()> { - let mut current_addr = addr; - let mut remain = data.len(); - let mut offset = 0usize; - let start_read = false; - for (va, area) in self.areas.iter() { - if current_addr >= *va && current_addr < area.size + va { - let read_len = remain.min(area.size + va - current_addr); - area.read_data(current_addr, &mut data[offset..(offset + read_len)]); - remain -= read_len; - offset += read_len; - // remain -= (va.0 + area.size - current_addr).min(remain); - if remain == 0 { - return Ok(()); - } - current_addr = va + area.size; - } else if start_read { - return Err(Error::PageFault); - } - } - Err(Error::PageFault) - } - - pub fn protect(&mut self, addr: Vaddr, op: impl MapOp) { - let va = addr..addr + PAGE_SIZE; - // Temporary solution, since the `MapArea` currently only represents - // a single `VmFrame`. - if let Some(areas) = self.areas.get_mut(&addr) { - areas.info.prop = op(areas.info); - } - self.pt.protect(&va, op).unwrap(); - } -} - -impl Clone for MemorySet { - fn clone(&self) -> Self { - let mut ms = Self::new(); - for area in self.areas.values() { - ms.map(area.clone()); - } - ms - } -} -impl Drop for MemorySet { - fn drop(&mut self) { - self.clear(); - } -} - -impl fmt::Debug for MemorySet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("MemorySet") - .field("areas", &self.areas) - .field("page_table_root", &self.pt.root_paddr()) - .finish() - } -} diff --git a/framework/aster-frame/src/vm/mod.rs b/framework/aster-frame/src/vm/mod.rs index 77d27a55d..36dbcc212 100644 --- a/framework/aster-frame/src/vm/mod.rs +++ b/framework/aster-frame/src/vm/mod.rs @@ -14,7 +14,6 @@ mod frame_allocator; pub(crate) mod heap_allocator; mod io; pub(crate) mod kspace; -mod memory_set; mod offset; mod options; pub(crate) mod page_table; @@ -25,17 +24,15 @@ use core::ops::Range; use spin::Once; -pub(crate) use self::kspace::paddr_to_vaddr; pub use self::{ dma::{Daddr, DmaCoherent, DmaDirection, DmaStream, DmaStreamSlice, HasDaddr}, frame::{VmFrame, VmFrameVec, VmFrameVecIter, VmReader, VmSegment, VmWriter}, io::VmIo, kspace::vaddr_to_paddr, - memory_set::{MapArea, MemorySet}, options::VmAllocOptions, - page_table::PageTable, space::{VmMapOptions, VmPerm, VmSpace}, }; +pub(crate) use self::{kspace::paddr_to_vaddr, page_table::PageTable}; use crate::boot::memory_region::{MemoryRegion, MemoryRegionType}; pub const PAGE_SIZE: usize = 0x1000; diff --git a/framework/aster-frame/src/vm/page_table/cursor.rs b/framework/aster-frame/src/vm/page_table/cursor.rs index eae42ae87..90b4e8b57 100644 --- a/framework/aster-frame/src/vm/page_table/cursor.rs +++ b/framework/aster-frame/src/vm/page_table/cursor.rs @@ -1,22 +1,32 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::{boxed::Box, sync::Arc}; -use core::{any::TypeId, marker::PhantomData, mem::size_of, ops::Range}; +use alloc::sync::Arc; +use core::{any::TypeId, mem::size_of, ops::Range}; use super::{ - KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait, PageTableEntryTrait, - PageTableError, PageTableFrame, PageTableMode, PtfRef, + Child, KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait, + PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, PtfRef, }; use crate::{ - sync::SpinLock, - vm::{paddr_to_vaddr, Paddr, Vaddr}, + sync::{ArcSpinLockGuard, SpinLock}, + vm::{paddr_to_vaddr, Paddr, Vaddr, VmFrame}, }; /// The cursor for forward traversal over the page table. /// +/// Each method may move the cursor forward, doing mapping unmaping, or +/// querying this slot. +/// /// Doing mapping is somewhat like a depth-first search on a tree, except /// that we modify the tree while traversing it. We use a stack to simulate /// the recursion. +/// +/// Any read or write accesses to nodes require exclusive access on the +/// entire path from the root to the node. But cursor can be created without +/// holding the lock, and can release the lock after yeilding the current +/// slot while querying over the page table with a range. Simultaneous +/// reading or writing to the same range in the page table will not produce +/// consistent results, only validity is guaranteed. pub(super) struct PageTableCursor< 'a, M: PageTableMode, @@ -27,9 +37,80 @@ pub(super) struct PageTableCursor< [(); C::NR_LEVELS]:, { stack: [Option>; C::NR_LEVELS], + lock_guard: [Option>>; C::NR_LEVELS], level: usize, va: Vaddr, - _phantom_ref: PhantomData<&'a PageTable>, +} + +#[derive(Debug, Clone)] +pub(super) enum MapOption { + Map { + frame: VmFrame, + prop: MapProperty, + }, + MapUntyped { + pa: Paddr, + len: usize, + prop: MapProperty, + }, + Unmap { + len: usize, + }, +} + +impl MapOption { + fn paddr(&self) -> Option { + match self { + MapOption::Map { frame, prop } => Some(frame.start_paddr()), + MapOption::MapUntyped { pa, len, prop } => Some(*pa), + MapOption::Unmap { len } => None, + } + } + fn prop(&self) -> Option { + match self { + MapOption::Map { frame, prop } => Some(*prop), + MapOption::MapUntyped { pa, len, prop } => Some(*prop), + MapOption::Unmap { len } => None, + } + } + fn len(&self) -> usize { + match self { + // A VmFrame currently has a fixed size of 1 base page. + MapOption::Map { frame, prop } => crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE, + MapOption::MapUntyped { pa, len, prop } => *len, + MapOption::Unmap { len: l } => *l, + } + } + fn consume(&mut self, len: usize) -> Self { + match self { + MapOption::Map { frame, prop } => { + debug_assert_eq!(len, crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE); + let ret = self.clone(); + *self = MapOption::Unmap { len: 0 }; + ret + } + MapOption::MapUntyped { pa, len: l, prop } => { + debug_assert!(*l >= len); + let ret = MapOption::MapUntyped { + pa: *pa, + len, + prop: *prop, + }; + *self = MapOption::MapUntyped { + pa: *pa + len, + len: *l - len, + prop: *prop, + }; + ret + } + MapOption::Unmap { len: l } => { + debug_assert!(*l >= len); + let ret = MapOption::Unmap { len }; + *l -= len; + ret + } + } + } } impl PageTableCursor<'_, M, E, C> @@ -40,11 +121,12 @@ where pub(super) fn new(pt: &PageTable, va: Vaddr) -> Self { let mut stack = core::array::from_fn(|_| None); stack[0] = Some(pt.root_frame.clone()); + let lock_guard = core::array::from_fn(|_| None); Self { stack, + lock_guard, level: C::NR_LEVELS, va, - _phantom_ref: PhantomData, } } @@ -73,38 +155,45 @@ where /// it may cause undefined behavior if the caller does not ensure that the /// mapped address is valid and the page table is not corrupted if it is used /// by the kernel. - pub(super) unsafe fn map(&mut self, len: usize, create: Option<(Paddr, MapProperty)>) { + pub(super) unsafe fn map(&mut self, option: MapOption) { + self.acquire_locks(); + let len = option.len(); let end = self.va + len; - let mut create = create; - while self.va != end { - let top_spin = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); - let mut top_ptf = top_spin.lock(); - // Go down if the page size is too big or alignment is not satisfied. - let is_pa_not_aligned = create - .map(|(pa, _)| pa % C::page_size(self.level) != 0) + let mut option = option; + while self.va < end { + // Skip if we are unmapping and it is already invalid. + let cur_pte = unsafe { self.cur_pte_ptr().read() }; + if matches!(option, MapOption::Unmap { .. }) && !cur_pte.is_valid() { + self.next_slot(); + continue; + } + + // We check among the conditions that may lead to a level down. + let is_pa_not_aligned = option + .paddr() + .map(|pa| pa % C::page_size(self.level) != 0) .unwrap_or(false); + let map_but_too_huge = self.level > C::HIGHEST_TRANSLATION_LEVEL + && !matches!(option, MapOption::Unmap { .. }); // We ensure not mapping in reserved kernel shared tables or releasing it. // Although it may be an invariant for all architectures and will be optimized // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. let kshared_lvl_down = TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; - if self.level > C::HIGHEST_TRANSLATION_LEVEL + if map_but_too_huge || kshared_lvl_down || self.va % C::page_size(self.level) != 0 || self.va + C::page_size(self.level) > end || is_pa_not_aligned { - let ld_prop = create - .map(|(pa, prop)| prop) - .unwrap_or(MapProperty::new_invalid()); - self.level_down(&mut top_ptf, Some(ld_prop)); + let ld_prop = option.prop().unwrap_or(MapProperty::new_invalid()); + self.level_down(Some(ld_prop)); continue; } - self.map_page(&mut top_ptf, create); - create = create.map(|(pa, prop)| (pa + C::page_size(self.level), prop)); - drop(top_ptf); + self.map_page(option.consume(C::page_size(self.level))); self.next_slot(); } + self.release_locks(); } /// Apply the given operation to all the mappings within the range. @@ -112,39 +201,44 @@ where &mut self, len: usize, op: impl MapOp, + allow_protect_invalid: bool, ) -> Result<(), PageTableError> { + self.acquire_locks(); let end = self.va + len; - while self.va != end { - let top_spin = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); - let mut top_ptf = top_spin.lock(); - let cur_pte = unsafe { self.cur_pte_ptr(&top_ptf).read() }; + while self.va < end { + let cur_pte = unsafe { self.cur_pte_ptr().read() }; if !cur_pte.is_valid() { - return Err(PageTableError::ProtectingInvalid); + if !allow_protect_invalid { + return Err(PageTableError::ProtectingInvalid); + } + self.next_slot(); + continue; } // Go down if it's not a last node or if the page size is too big. if !(cur_pte.is_huge() || self.level == 1) || (self.va % C::page_size(self.level)) != 0 || self.va + C::page_size(self.level) > end { - self.level_down(&mut top_ptf, Some(op(cur_pte.info()))); + self.level_down(Some(op(cur_pte.info()))); continue; } // Apply the operation. unsafe { - self.cur_pte_ptr(&top_ptf).write(E::new( + self.cur_pte_ptr().write(E::new( cur_pte.paddr(), op(cur_pte.info()), cur_pte.is_huge(), true, )) }; - drop(top_ptf); self.next_slot(); } + self.release_locks(); Ok(()) } - fn cur_pte_ptr(&self, ptf: &PageTableFrame) -> *mut E { + fn cur_pte_ptr(&self) -> *mut E { + let ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); let frame_addr = paddr_to_vaddr(ptf.inner.start_paddr()); let offset = C::in_frame_index(self.va, self.level); (frame_addr + offset * size_of::()) as *mut E @@ -162,24 +256,26 @@ where /// Go up a level. We release the current frame if it has no mappings since the cursor only moves /// forward. And we will do the final cleanup using `level_up` when the cursor is dropped. + /// + /// This method requires locks acquired before calling it. The discarded level will be unlocked. fn level_up(&mut self) { let last_map_cnt_is_zero = { - let top_ptf_ref = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); - let top_ptf = top_ptf_ref.lock(); + let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); top_ptf.map_count == 0 }; self.stack[C::NR_LEVELS - self.level] = None; + self.lock_guard[C::NR_LEVELS - self.level] = None; self.level += 1; let can_release_child = TypeId::of::() == TypeId::of::() && self.level < C::NR_LEVELS; if can_release_child && last_map_cnt_is_zero { - let top_ptf_ref = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); - let mut top_ptf = top_ptf_ref.lock(); + let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] + .as_deref_mut() + .unwrap(); let frame_addr = paddr_to_vaddr(top_ptf.inner.start_paddr()); - let offset = C::in_frame_index(self.va, self.level); - unsafe { ((frame_addr + offset) as *mut E).write(E::new_invalid()) } let idx = C::in_frame_index(self.va, self.level); - top_ptf.child.as_mut().unwrap()[idx] = None; + unsafe { (frame_addr as *mut E).add(idx).write(E::new_invalid()) } + top_ptf.child[idx] = None; top_ptf.map_count -= 1; } } @@ -190,18 +286,30 @@ where /// If that may happen the map property of intermediate level `prop` should be /// passed in correctly. Whether the map property matters in an intermediate /// level is architecture-dependent. - unsafe fn level_down(&mut self, top_ptf: &mut PageTableFrame, prop: Option) { - if top_ptf.child.is_none() { - top_ptf.child = Some(Box::new(core::array::from_fn(|_| None))); + /// + /// This method requires write locks acquired before calling it. The newly added + /// level will still hold the lock. + unsafe fn level_down(&mut self, prop: Option) { + debug_assert!(self.level > 1); + // Check if the child frame exists. + let nxt_lvl_frame = { + let idx = C::in_frame_index(self.va, self.level); + let child = { + let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); + &top_ptf.child[idx] + }; + if let Some(Child::PageTable(nxt_lvl_frame)) = child { + Some(nxt_lvl_frame.clone()) + } else { + None + } }; - let nxt_lvl_frame = if let Some(nxt_lvl_frame) = - top_ptf.child.as_ref().unwrap()[C::in_frame_index(self.va, self.level)].clone() - { - nxt_lvl_frame - } else { + // Create a new child frame if it does not exist. Sure it could be done only if + // it is allowed to modify the page table. + let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| { let mut new_frame = PageTableFrame::::new(); // If it already maps a huge page, we should split it. - let pte = unsafe { self.cur_pte_ptr(top_ptf).read() }; + let pte = unsafe { self.cur_pte_ptr().read() }; if pte.is_valid() && pte.is_huge() { let pa = pte.paddr(); let prop = pte.info().prop; @@ -215,7 +323,7 @@ where } new_frame.map_count = C::NR_ENTRIES_PER_FRAME; unsafe { - self.cur_pte_ptr(top_ptf).write(E::new( + self.cur_pte_ptr().write(E::new( new_frame.inner.start_paddr(), prop, false, @@ -223,10 +331,10 @@ where )) } } else { - // The child couldn't be valid here cause child is none and it's not huge. + // The child couldn't be valid here because child is none and it's not huge. debug_assert!(!pte.is_valid()); unsafe { - self.cur_pte_ptr(top_ptf).write(E::new( + self.cur_pte_ptr().write(E::new( new_frame.inner.start_paddr(), prop.unwrap(), false, @@ -234,12 +342,16 @@ where )) } } + let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] + .as_deref_mut() + .unwrap(); top_ptf.map_count += 1; let new_frame_ref = Arc::new(SpinLock::new(new_frame)); - top_ptf.child.as_mut().unwrap()[C::in_frame_index(self.va, self.level)] = - Some(new_frame_ref.clone()); + top_ptf.child[C::in_frame_index(self.va, self.level)] = + Some(Child::PageTable(new_frame_ref.clone())); new_frame_ref - }; + }); + self.lock_guard[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc()); self.stack[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame); self.level -= 1; } @@ -247,46 +359,61 @@ where /// Map or unmap the page pointed to by the cursor (which could be large). /// If the physical address and the map property are not provided, it unmaps /// the current page. - unsafe fn map_page( - &mut self, - top_ptf: &mut PageTableFrame, - create: Option<(Paddr, MapProperty)>, - ) { - let already_mapped = unsafe { self.cur_pte_ptr(top_ptf).read().is_valid() }; - if let Some((pa, prop)) = create { - unsafe { - self.cur_pte_ptr(top_ptf) - .write(E::new(pa, prop, self.level > 1, true)) - } - if !already_mapped { - top_ptf.map_count += 1; - } - } else { - unsafe { self.cur_pte_ptr(top_ptf).write(E::new_invalid()) } - if already_mapped { + /// + /// This method requires write locks acquired before calling it. + unsafe fn map_page(&mut self, option: MapOption) { + let pte_ptr = self.cur_pte_ptr(); + let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] + .as_deref_mut() + .unwrap(); + let child = { + let idx = C::in_frame_index(self.va, self.level); + if top_ptf.child[idx].is_some() { + top_ptf.child[idx] = None; top_ptf.map_count -= 1; } - } - // If it dismantle a child page table frame by mapping a huge page - // we ensure it to be released. - if let Some(child) = &mut top_ptf.child { - let idx = C::in_frame_index(self.va, self.level); - if child[idx].is_some() { - child[idx] = None; - } + &mut top_ptf.child[idx] }; + match option { + MapOption::Map { frame, prop } => { + let pa = frame.start_paddr(); + unsafe { + pte_ptr.write(E::new(pa, prop, self.level > 1, true)); + } + *child = Some(Child::Frame(frame)); + top_ptf.map_count += 1; + } + MapOption::MapUntyped { pa, len, prop } => { + debug_assert_eq!(len, C::page_size(self.level)); + unsafe { + pte_ptr.write(E::new(pa, prop, self.level > 1, true)); + } + top_ptf.map_count += 1; + } + MapOption::Unmap { len } => { + debug_assert_eq!(len, C::page_size(self.level)); + unsafe { pte_ptr.write(E::new_invalid()) } + } + } } -} -impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Drop - for PageTableCursor<'a, M, E, C> -where - [(); C::NR_ENTRIES_PER_FRAME]:, - [(); C::NR_LEVELS]:, -{ - fn drop(&mut self) { - while self.level < C::NR_LEVELS { - self.level_up(); + fn acquire_locks(&mut self) { + for i in 0..=C::NR_LEVELS - self.level { + let Some(ref ptf) = self.stack[i] else { + panic!("Invalid values in PT cursor stack while acuqiring locks"); + }; + debug_assert!(self.lock_guard[i].is_none()); + self.lock_guard[i] = Some(ptf.lock_arc()); + } + } + + fn release_locks(&mut self) { + for i in (0..=C::NR_LEVELS - self.level).rev() { + let Some(ref ptf) = self.stack[i] else { + panic!("Invalid values in PT cursor stack while releasing locks"); + }; + debug_assert!(self.lock_guard[i].is_some()); + self.lock_guard[i] = None; } } } @@ -315,9 +442,23 @@ where } } -pub struct PageTableQueryResult { - pub va: Range, - pub info: MapInfo, +#[derive(Clone, Debug)] +pub enum PageTableQueryResult { + NotMapped { + va: Vaddr, + len: usize, + }, + Mapped { + va: Vaddr, + frame: VmFrame, + info: MapInfo, + }, + MappedUntyped { + va: Vaddr, + pa: Paddr, + len: usize, + info: MapInfo, + }, } impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator @@ -329,36 +470,63 @@ where type Item = PageTableQueryResult; fn next(&mut self) -> Option { + self.cursor.acquire_locks(); if self.cursor.va >= self.end_va { return None; } loop { let level = self.cursor.level; let va = self.cursor.va; - let top_spin = self.cursor.stack[C::NR_LEVELS - level].clone().unwrap(); - let mut top_ptf = top_spin.lock(); - let cur_pte = unsafe { self.cursor.cur_pte_ptr(&top_ptf).read() }; + let top_ptf = self.cursor.lock_guard[C::NR_LEVELS - level] + .as_ref() + .unwrap(); + let cur_pte = unsafe { self.cursor.cur_pte_ptr().read() }; // Yeild if it's not a valid node. if !cur_pte.is_valid() { - return None; + self.cursor.next_slot(); + self.cursor.release_locks(); + return Some(PageTableQueryResult::NotMapped { + va, + len: C::page_size(level), + }); } // Go down if it's not a last node. if !(cur_pte.is_huge() || level == 1) { - // Safety: alignment checked and there should be a child frame here. + debug_assert!(cur_pte.is_valid()); + // Safety: it's valid and there should be a child frame here. unsafe { - self.cursor.level_down(&mut top_ptf, None); + self.cursor.level_down(None); } continue; } // Yield the current mapping. - let mapped_range = self.cursor.va..self.cursor.va + C::page_size(self.cursor.level); let map_info = cur_pte.info(); - drop(top_ptf); - self.cursor.next_slot(); - return Some(PageTableQueryResult { - va: mapped_range, - info: map_info, - }); + let idx = C::in_frame_index(self.cursor.va, self.cursor.level); + match top_ptf.child[idx] { + Some(Child::Frame(ref frame)) => { + let frame = frame.clone(); + self.cursor.next_slot(); + self.cursor.release_locks(); + return Some(PageTableQueryResult::Mapped { + va, + frame, + info: map_info, + }); + } + Some(Child::PageTable(_)) => { + panic!("The child couldn't be page table here because it's valid and not huge"); + } + None => { + self.cursor.next_slot(); + self.cursor.release_locks(); + return Some(PageTableQueryResult::MappedUntyped { + va, + pa: cur_pte.paddr(), + len: C::page_size(level), + info: map_info, + }); + } + } } } } diff --git a/framework/aster-frame/src/vm/page_table/frame.rs b/framework/aster-frame/src/vm/page_table/frame.rs new file mode 100644 index 000000000..618ede097 --- /dev/null +++ b/framework/aster-frame/src/vm/page_table/frame.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: MPL-2.0 + +use alloc::{boxed::Box, sync::Arc}; + +use super::{PageTableConstsTrait, PageTableEntryTrait}; +use crate::{ + sync::SpinLock, + vm::{VmAllocOptions, VmFrame}, +}; + +/// A page table frame. +/// It's also frequently referred to as a page table in many architectural documentations. +/// Cloning a page table frame will create a deep copy of the page table. +#[derive(Debug)] +pub(super) struct PageTableFrame +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub inner: VmFrame, + /// TODO: all the following fields can be removed if frame metadata is introduced. + /// Here we allow 2x space overhead each frame temporarily. + #[allow(clippy::type_complexity)] + pub child: Box<[Option>; C::NR_ENTRIES_PER_FRAME]>, + /// The number of mapped frames or page tables. + /// This is to track if we can free itself. + pub map_count: usize, +} + +pub(super) type PtfRef = Arc>>; + +#[derive(Debug)] +pub(super) enum Child +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + PageTable(PtfRef), + Frame(VmFrame), +} + +impl Clone for Child +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + /// This is a shallow copy. + fn clone(&self) -> Self { + match self { + Child::PageTable(ptf) => Child::PageTable(ptf.clone()), + Child::Frame(frame) => Child::Frame(frame.clone()), + } + } +} + +impl PageTableFrame +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(super) fn new() -> Self { + Self { + inner: VmAllocOptions::new(1).alloc_single().unwrap(), + child: Box::new(core::array::from_fn(|_| None)), + map_count: 0, + } + } +} + +impl Clone for PageTableFrame +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + /// Make a deep copy of the page table. + /// The child page tables are also being deep copied. + fn clone(&self) -> Self { + let new_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let new_ptr = new_frame.as_mut_ptr() as *mut E; + let ptr = self.inner.as_ptr() as *const E; + let child = Box::new(core::array::from_fn(|i| { + self.child[i].as_ref().map(|child| match child { + Child::PageTable(ptf) => unsafe { + let frame = ptf.lock(); + let cloned = frame.clone(); + let pte = ptr.add(i).read(); + new_ptr.add(i).write(E::new( + cloned.inner.start_paddr(), + pte.info().prop, + false, + false, + )); + Child::PageTable(Arc::new(SpinLock::new(cloned))) + }, + Child::Frame(frame) => { + unsafe { + let pte = ptr.add(i).read(); + new_ptr.add(i).write(pte); + } + Child::Frame(frame.clone()) + } + }) + })); + Self { + inner: new_frame, + child, + map_count: self.map_count, + } + } +} diff --git a/framework/aster-frame/src/vm/page_table/mod.rs b/framework/aster-frame/src/vm/page_table/mod.rs index f9b951083..bf87b43eb 100644 --- a/framework/aster-frame/src/vm/page_table/mod.rs +++ b/framework/aster-frame/src/vm/page_table/mod.rs @@ -1,29 +1,32 @@ // SPDX-License-Identifier: MPL-2.0 use alloc::{boxed::Box, sync::Arc}; -use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range}; +use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range, panic}; use crate::{ arch::mm::{activate_page_table, PageTableConsts, PageTableEntry}, sync::SpinLock, - vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrame, VmFrameVec, VmPerm, PAGE_SIZE}, + vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrameVec, VmPerm}, }; mod properties; pub use properties::*; +mod frame; +use frame::*; mod cursor; use cursor::*; +pub(crate) use cursor::{PageTableIter, PageTableQueryResult}; #[cfg(ktest)] mod test; -#[derive(Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum PageTableError { InvalidVaddr(Vaddr), - InvalidVaddrRange(Range), + InvalidVaddrRange(Vaddr, Vaddr), VaddrNotAligned(Vaddr), - VaddrRangeNotAligned(Range), + VaddrRangeNotAligned(Vaddr, Vaddr), PaddrNotAligned(Paddr), - PaddrRangeNotAligned(Range), + PaddrRangeNotAligned(Vaddr, Vaddr), // Protecting a mapping that does not exist. ProtectingInvalid, } @@ -31,27 +34,34 @@ pub enum PageTableError { /// This is a compile-time technique to force the frame developers to distinguish /// between the kernel global page table instance, process specific user page table /// instance, and device page table instances. -pub trait PageTableMode: 'static { +pub trait PageTableMode: Clone + Debug + 'static { /// The range of virtual addresses that the page table can manage. const VADDR_RANGE: Range; + + /// Check if the given range is within the valid virtual address range. + fn encloses(r: &Range) -> bool { + Self::VADDR_RANGE.start <= r.start && r.end <= Self::VADDR_RANGE.end + } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct UserMode {} impl PageTableMode for UserMode { const VADDR_RANGE: Range = 0..super::MAX_USERSPACE_VADDR; } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct KernelMode {} impl PageTableMode for KernelMode { const VADDR_RANGE: Range = super::KERNEL_BASE_VADDR..super::KERNEL_END_VADDR; } -/// A page table instance. -pub struct PageTable< +/// A handle to a page table. +/// A page table can track the lifetime of the mapped physical frames. +#[derive(Debug)] +pub(crate) struct PageTable< M: PageTableMode, E: PageTableEntryTrait = PageTableEntry, C: PageTableConstsTrait = PageTableConsts, @@ -63,70 +73,15 @@ pub struct PageTable< _phantom: PhantomData, } -/// A page table frame. -/// It's also frequently referred to as a page table in many architectural documentations. -#[derive(Debug)] -struct PageTableFrame -where - [(); C::NR_ENTRIES_PER_FRAME]:, - [(); C::NR_LEVELS]:, -{ - pub inner: VmFrame, - #[allow(clippy::type_complexity)] - pub child: Option>; C::NR_ENTRIES_PER_FRAME]>>, - /// The number of mapped frames or page tables. - /// This is to track if we can free itself. - pub map_count: usize, -} - -type PtfRef = Arc>>; - -impl PageTableFrame -where - [(); C::NR_ENTRIES_PER_FRAME]:, - [(); C::NR_LEVELS]:, -{ - pub(crate) fn new() -> Self { - Self { - inner: VmAllocOptions::new(1).alloc_single().unwrap(), - child: None, - map_count: 0, - } - } -} - impl PageTable where [(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_LEVELS]:, { - pub(crate) fn map_frame( - &mut self, - vaddr: Vaddr, - frame: &VmFrame, - prop: MapProperty, - ) -> Result<(), PageTableError> { - if vaddr % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrNotAligned(vaddr)); - } - let va_range = vaddr - ..vaddr - .checked_add(PAGE_SIZE) - .ok_or(PageTableError::InvalidVaddr(vaddr))?; - if !range_contains(&UserMode::VADDR_RANGE, &va_range) { - return Err(PageTableError::InvalidVaddrRange(va_range)); - } - // Safety: modification to the user page table is safe. - unsafe { - self.map_frame_unchecked(vaddr, frame, prop); - } - Ok(()) - } - pub(crate) fn map_frames( - &mut self, + &self, vaddr: Vaddr, - frames: &VmFrameVec, + frames: VmFrameVec, prop: MapProperty, ) -> Result<(), PageTableError> { if vaddr % C::BASE_PAGE_SIZE != 0 { @@ -136,8 +91,11 @@ where ..vaddr .checked_add(frames.nbytes()) .ok_or(PageTableError::InvalidVaddr(vaddr))?; - if !range_contains(&UserMode::VADDR_RANGE, &va_range) { - return Err(PageTableError::InvalidVaddrRange(va_range)); + if !UserMode::encloses(&va_range) { + return Err(PageTableError::InvalidVaddrRange( + va_range.start, + va_range.end, + )); } // Safety: modification to the user page table is safe. unsafe { @@ -146,34 +104,12 @@ where Ok(()) } - pub(crate) fn map( - &mut self, - vaddr: &Range, - paddr: &Range, - prop: MapProperty, - ) -> Result<(), PageTableError> { + pub(crate) fn unmap(&self, vaddr: &Range) -> Result<(), PageTableError> { if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); + return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end)); } - if paddr.start % C::BASE_PAGE_SIZE != 0 || paddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::PaddrRangeNotAligned(paddr.clone())); - } - if !range_contains(&UserMode::VADDR_RANGE, vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); - } - // Safety: modification to the user page table is safe. - unsafe { - self.map_unchecked(vaddr, paddr, prop); - } - Ok(()) - } - - pub(crate) fn unmap(&mut self, vaddr: &Range) -> Result<(), PageTableError> { - if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); - } - if !range_contains(&UserMode::VADDR_RANGE, vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + if !UserMode::encloses(vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); } // Safety: modification to the user page table is safe. unsafe { @@ -183,18 +119,18 @@ where } pub(crate) fn protect( - &mut self, + &self, vaddr: &Range, op: impl MapOp, ) -> Result<(), PageTableError> { if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); + return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end)); } - if !range_contains(&UserMode::VADDR_RANGE, vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + if !UserMode::encloses(vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); } // Safety: modification to the user page table is safe. - unsafe { self.protect_unchecked(vaddr, op) } + unsafe { self.cursor(vaddr.start).protect(vaddr.len(), op, false) } } pub(crate) fn activate(&self) { @@ -204,6 +140,59 @@ where self.activate_unchecked(); } } + + /// Remove all write permissions from the user page table and mark the page + /// table as copy-on-write, and the create a handle to the new page table. + /// + /// That is, new page tables will be created when needed if a write operation + /// is performed on either of the user page table handles. Calling this function + /// performs no significant operations. + pub(crate) fn fork_copy_on_write(&self) -> Self { + unsafe { + self.protect_unchecked(&UserMode::VADDR_RANGE, perm_op(|perm| perm & !VmPerm::W)); + } + // TODO: implement the copy-on-write mechanism. This is a simple workaround. + let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let root_frame = self.root_frame.lock(); + let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; + let new_ptr = new_root_frame.as_mut_ptr() as *mut E; + let ptr = root_frame.inner.as_ptr() as *const E; + let child = Box::new(core::array::from_fn(|i| { + if i < half_of_entries { + // This is user space, deep copy the child. + root_frame.child[i].as_ref().map(|child| match child { + Child::PageTable(ptf) => unsafe { + let frame = ptf.lock(); + let cloned = frame.clone(); + let pte = ptr.add(i).read(); + new_ptr.add(i).write(E::new( + cloned.inner.start_paddr(), + pte.info().prop, + false, + false, + )); + Child::PageTable(Arc::new(SpinLock::new(cloned))) + }, + Child::Frame(_) => panic!("Unexpected frame child."), + }) + } else { + // This is kernel space, share the child. + unsafe { + let pte = ptr.add(i).read(); + new_ptr.add(i).write(pte); + } + root_frame.child[i].clone() + } + })); + PageTable:: { + root_frame: Arc::new(SpinLock::new(PageTableFrame:: { + inner: new_root_frame, + child, + map_count: root_frame.map_count, + })), + _phantom: PhantomData, + } + } } impl PageTable @@ -213,22 +202,21 @@ where { /// Create a new user page table. /// - /// This should be the only way to create a user page table, that is + /// This should be the only way to create the first user page table, that is /// to fork the kernel page table with all the kernel mappings shared. - pub(crate) fn fork(&self) -> PageTable { + /// + /// Then, one can use a user page table to call [`fork_copy_on_write`], creating + /// other child page tables. + pub(crate) fn create_user_page_table(&self) -> PageTable { let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); let root_frame = self.root_frame.lock(); - // Safety: The root_paddr is the root of a valid page table and - // it does not overlap with the new page. - unsafe { - let src = paddr_to_vaddr(root_frame.inner.start_paddr()) as *const E; - let dst = paddr_to_vaddr(new_root_frame.start_paddr()) as *mut E; - core::ptr::copy_nonoverlapping(src, dst, C::NR_ENTRIES_PER_FRAME); - } + let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; + new_root_frame.copy_from_frame(&root_frame.inner); + let child = Box::new(core::array::from_fn(|i| root_frame.child[i].clone())); PageTable:: { root_frame: Arc::new(SpinLock::new(PageTableFrame:: { inner: new_root_frame, - child: root_frame.child.clone(), + child, map_count: root_frame.map_count, })), _phantom: PhantomData, @@ -246,11 +234,8 @@ where let end = root_index.end; assert!(end <= C::NR_ENTRIES_PER_FRAME); let mut root_frame = self.root_frame.lock(); - if root_frame.child.is_none() { - root_frame.child = Some(Box::new(core::array::from_fn(|_| None))); - } for i in start..end { - let no_such_child = root_frame.child.as_ref().unwrap()[i].is_none(); + let no_such_child = root_frame.child[i].is_none(); if no_such_child { let frame = PageTableFrame::::new(); let pte_ptr = (root_frame.inner.start_paddr() + i * size_of::()) as *mut E; @@ -267,8 +252,7 @@ where false, )); } - let child_array = root_frame.child.as_mut().unwrap(); - child_array[i] = Some(Arc::new(SpinLock::new(frame))); + root_frame.child[i] = Some(Child::PageTable(Arc::new(SpinLock::new(frame)))); root_frame.map_count += 1; } } @@ -293,74 +277,68 @@ where self.root_frame.lock().inner.start_paddr() } - /// Translate a virtual address to a physical address using the page table. - pub(crate) fn translate(&self, vaddr: Vaddr) -> Option { - // Safety: The root frame is a valid page table frame so the address is valid. - unsafe { page_walk::(self.root_paddr(), vaddr) } - } - - pub(crate) unsafe fn map_frame_unchecked( - &mut self, - vaddr: Vaddr, - frame: &VmFrame, - prop: MapProperty, - ) { - self.cursor(vaddr) - .map(PAGE_SIZE, Some((frame.start_paddr(), prop))); - } - pub(crate) unsafe fn map_frames_unchecked( - &mut self, + &self, vaddr: Vaddr, - frames: &VmFrameVec, + frames: VmFrameVec, prop: MapProperty, ) { let mut cursor = self.cursor(vaddr); - for frame in frames.iter() { - cursor.map(PAGE_SIZE, Some((frame.start_paddr(), prop))); + for frame in frames.into_iter() { + cursor.map(MapOption::Map { frame, prop }); } } pub(crate) unsafe fn map_unchecked( - &mut self, + &self, vaddr: &Range, paddr: &Range, prop: MapProperty, ) { + self.cursor(vaddr.start).map(MapOption::MapUntyped { + pa: paddr.start, + len: vaddr.len(), + prop, + }); + } + + pub(crate) unsafe fn unmap_unchecked(&self, vaddr: &Range) { self.cursor(vaddr.start) - .map(vaddr.len(), Some((paddr.start, prop))); + .map(MapOption::Unmap { len: vaddr.len() }); } - pub(crate) unsafe fn unmap_unchecked(&mut self, vaddr: &Range) { - self.cursor(vaddr.start).map(vaddr.len(), None); + pub(crate) unsafe fn protect_unchecked(&self, vaddr: &Range, op: impl MapOp) { + self.cursor(vaddr.start) + .protect(vaddr.len(), op, true) + .unwrap(); } - pub(crate) unsafe fn protect_unchecked( - &mut self, - vaddr: &Range, - op: impl MapOp, - ) -> Result<(), PageTableError> { - self.cursor(vaddr.start).protect(vaddr.len(), op) - } - - pub(crate) fn query( + /// Query about the mappings of a range of virtual addresses. + pub(crate) fn query_range( &'a self, vaddr: &Range, ) -> Result, PageTableError> { if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { - return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); } - if !range_contains(&M::VADDR_RANGE, vaddr) { - return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + if !M::encloses(vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end)); } Ok(PageTableIter::new(self, vaddr)) } + /// Query about the mapping of a single byte at the given virtual address. + pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, MapInfo)> { + // Safety: The root frame is a valid page table frame so the address is valid. + unsafe { page_walk::(self.root_paddr(), vaddr) } + } + pub(crate) unsafe fn activate_unchecked(&self) { activate_page_table(self.root_paddr(), CachePolicy::Writeback); } - /// Create a new cursor for the page table initialized at the given virtual address. + /// Create a new mutating cursor for the page table. + /// The cursor is initialized atthe given virtual address. fn cursor(&self, va: usize) -> PageTableCursor<'a, M, E, C> { PageTableCursor::new(self, va) } @@ -376,9 +354,23 @@ where } } +impl Clone for PageTable +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + fn clone(&self) -> Self { + let frame = self.root_frame.lock(); + PageTable { + root_frame: Arc::new(SpinLock::new(frame.clone())), + _phantom: PhantomData, + } + } +} + /// A software emulation of the MMU address translation process. -/// It returns the physical address of the given virtual address if a valid mapping -/// exists for the given virtual address. +/// It returns the physical address of the given virtual address and the mapping info +/// if a valid mapping exists for the given virtual address. /// /// # Safety /// @@ -387,13 +379,13 @@ where pub(super) unsafe fn page_walk( root_paddr: Paddr, vaddr: Vaddr, -) -> Option { +) -> Option<(Paddr, MapInfo)> { let mut cur_level = C::NR_LEVELS; let mut cur_pte = { let frame_addr = paddr_to_vaddr(root_paddr); let offset = C::in_frame_index(vaddr, cur_level); // Safety: The offset does not exceed the value of PAGE_SIZE. - unsafe { &*(frame_addr as *const E).add(offset) } + unsafe { (frame_addr as *const E).add(offset).read() } }; while cur_level > 1 { @@ -409,17 +401,16 @@ pub(super) unsafe fn page_walk( let frame_addr = paddr_to_vaddr(cur_pte.paddr()); let offset = C::in_frame_index(vaddr, cur_level); // Safety: The offset does not exceed the value of PAGE_SIZE. - unsafe { &*(frame_addr as *const E).add(offset) } + unsafe { (frame_addr as *const E).add(offset).read() } }; } if cur_pte.is_valid() { - Some(cur_pte.paddr() + (vaddr & (C::page_size(cur_level) - 1))) + Some(( + cur_pte.paddr() + (vaddr & (C::page_size(cur_level) - 1)), + cur_pte.info(), + )) } else { None } } - -fn range_contains>(parent: &Range, child: &Range) -> bool { - parent.start <= child.start && parent.end >= child.end -} diff --git a/framework/aster-frame/src/vm/page_table/properties.rs b/framework/aster-frame/src/vm/page_table/properties.rs index fa38138fe..0fe69ec3d 100644 --- a/framework/aster-frame/src/vm/page_table/properties.rs +++ b/framework/aster-frame/src/vm/page_table/properties.rs @@ -116,12 +116,12 @@ pub struct MapProperty { pub perm: VmPerm, /// Global. /// A global page is not evicted from the TLB when TLB is flushed. - pub global: bool, + pub(crate) global: bool, /// The properties of a memory mapping that is used and defined as flags in PTE /// in specific architectures on an ad hoc basis. The logics provided by the /// page table module will not be affected by this field. - pub extension: u64, - pub cache: CachePolicy, + pub(crate) extension: u64, + pub(crate) cache: CachePolicy, } /// Any functions that could be used to modify the map property of a memory mapping. @@ -129,7 +129,7 @@ pub struct MapProperty { /// To protect a virtual address range, you can either directly use a `MapProperty` object, /// /// ```rust -/// let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); +/// let page_table = KERNEL_PAGE_TABLE.get().unwrap(); /// let prop = MapProperty { /// perm: VmPerm::R, /// global: true, @@ -142,7 +142,7 @@ pub struct MapProperty { /// use a map operation /// /// ```rust -/// let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); +/// let page_table = KERNEL_PAGE_TABLE.get().unwrap(); /// page_table.map(0..PAGE_SIZE, cache_policy_op(CachePolicy::Writeback)); /// page_table.map(0..PAGE_SIZE, perm_op(|perm| perm | VmPerm::R)); /// ``` @@ -150,7 +150,7 @@ pub struct MapProperty { /// or even customize a map operation using a closure /// /// ```rust -/// let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); +/// let page_table = KERNEL_PAGE_TABLE.get().unwrap(); /// page_table.map(0..PAGE_SIZE, |info| { /// assert!(info.prop.perm.contains(VmPerm::R)); /// MapProperty { @@ -164,8 +164,8 @@ pub struct MapProperty { pub trait MapOp: Fn(MapInfo) -> MapProperty {} impl MapOp for F where F: Fn(MapInfo) -> MapProperty {} -// These implementations allow a property to be used as an overriding map operation. -// Other usages seems pointless. +// These implementations allow a property or permission to be used as an +// overriding map operation. Other usages seems pointless. impl FnOnce<(MapInfo,)> for MapProperty { type Output = MapProperty; extern "rust-call" fn call_once(self, _: (MapInfo,)) -> MapProperty { @@ -182,6 +182,31 @@ impl Fn<(MapInfo,)> for MapProperty { *self } } +impl FnOnce<(MapInfo,)> for VmPerm { + type Output = MapProperty; + extern "rust-call" fn call_once(self, info: (MapInfo,)) -> MapProperty { + MapProperty { + perm: self, + ..info.0.prop + } + } +} +impl FnMut<(MapInfo,)> for VmPerm { + extern "rust-call" fn call_mut(&mut self, info: (MapInfo,)) -> MapProperty { + MapProperty { + perm: *self, + ..info.0.prop + } + } +} +impl Fn<(MapInfo,)> for VmPerm { + extern "rust-call" fn call(&self, info: (MapInfo,)) -> MapProperty { + MapProperty { + perm: *self, + ..info.0.prop + } + } +} /// A life saver for creating a map operation that sets the cache policy. pub fn cache_policy_op(cache: CachePolicy) -> impl MapOp { @@ -229,6 +254,18 @@ pub struct MapInfo { pub status: MapStatus, } +impl MapInfo { + pub fn contains(&self, perm: VmPerm) -> bool { + self.prop.perm.contains(perm) + } + pub fn accessed(&self) -> bool { + self.status.contains(MapStatus::ACCESSED) + } + pub fn dirty(&self) -> bool { + self.status.contains(MapStatus::DIRTY) + } +} + pub trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug { /// Create a new invalid page table flags that causes page faults /// when the MMU meets them. diff --git a/framework/aster-frame/src/vm/page_table/test.rs b/framework/aster-frame/src/vm/page_table/test.rs index df8ada1b4..99a74a909 100644 --- a/framework/aster-frame/src/vm/page_table/test.rs +++ b/framework/aster-frame/src/vm/page_table/test.rs @@ -7,65 +7,84 @@ const PAGE_SIZE: usize = 4096; #[ktest] fn test_range_check() { - let mut pt = PageTable::::empty(); + let pt = PageTable::::empty(); let good_va = 0..PAGE_SIZE; let bad_va = 0..PAGE_SIZE + 1; let bad_va2 = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + PAGE_SIZE; - let to = PAGE_SIZE..PAGE_SIZE * 2; - assert!(pt.query(&good_va).is_ok()); - assert!(pt.query(&bad_va).is_err()); - assert!(pt.query(&bad_va2).is_err()); + let to = VmAllocOptions::new(1).alloc().unwrap(); + assert!(pt.query_range(&good_va).is_ok()); + assert!(pt.query_range(&bad_va).is_err()); + assert!(pt.query_range(&bad_va2).is_err()); assert!(pt.unmap(&good_va).is_ok()); assert!(pt.unmap(&bad_va).is_err()); assert!(pt.unmap(&bad_va2).is_err()); assert!(pt - .map(&good_va, &to, MapProperty::new_general(VmPerm::R)) + .map_frames( + good_va.start, + to.clone(), + MapProperty::new_general(VmPerm::R) + ) .is_ok()); - assert!(pt.map(&bad_va, &to, MapProperty::new_invalid()).is_err()); - assert!(pt.map(&bad_va2, &to, MapProperty::new_invalid()).is_err()); + assert!(pt + .map_frames(bad_va2.start, to.clone(), MapProperty::new_invalid()) + .is_err()); } #[ktest] fn test_map_unmap() { - let mut pt = PageTable::::empty(); + let pt = PageTable::::empty(); let from = PAGE_SIZE..PAGE_SIZE * 2; - let frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let frames = VmAllocOptions::new(1).alloc().unwrap(); + let start_paddr = frames.get(0).unwrap().start_paddr(); let prop = MapProperty::new_general(VmPerm::RW); - pt.map_frame(from.start, &frame, prop).unwrap(); - assert_eq!( - pt.translate(from.start + 10).unwrap(), - frame.start_paddr() + 10 - ); + pt.map_frames(from.start, frames.clone(), prop).unwrap(); + assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); pt.unmap(&from).unwrap(); - assert!(pt.translate(from.start + 10).is_none()); + assert!(pt.query(from.start + 10).is_none()); let from_ppn = 13245..512 * 512 + 23456; let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; - pt.map(&from, &to, prop).unwrap(); + unsafe { pt.map_unchecked(&from, &to, prop) }; for i in 0..100 { let offset = i * (PAGE_SIZE + 1000); - assert_eq!( - pt.translate(from.start + offset).unwrap(), - to.start + offset - ); + assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); } let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434; pt.unmap(&unmap).unwrap(); for i in 0..100 { let offset = i * (PAGE_SIZE + 10); if unmap.start <= from.start + offset && from.start + offset < unmap.end { - assert!(pt.translate(from.start + offset).is_none()); + assert!(pt.query(from.start + offset).is_none()); } else { - assert_eq!( - pt.translate(from.start + offset).unwrap(), - to.start + offset - ); + assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); } } } +#[ktest] +fn test_user_copy_on_write() { + let pt = PageTable::::empty(); + let from = PAGE_SIZE..PAGE_SIZE * 2; + let frames = VmAllocOptions::new(1).alloc().unwrap(); + let start_paddr = frames.get(0).unwrap().start_paddr(); + let prop = MapProperty::new_general(VmPerm::RW); + pt.map_frames(from.start, frames.clone(), prop).unwrap(); + assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); + pt.unmap(&from).unwrap(); + assert!(pt.query(from.start + 10).is_none()); + pt.map_frames(from.start, frames.clone(), prop).unwrap(); + assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); + + let child_pt = pt.fork_copy_on_write(); + assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); + assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); + pt.unmap(&from).unwrap(); + assert!(pt.query(from.start + 10).is_none()); + assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); +} + type Qr = PageTableQueryResult; #[derive(Debug)] @@ -79,23 +98,29 @@ impl PageTableConstsTrait for BasePageTableConsts { } #[ktest] -fn test_base_protect_query() { - let mut pt = PageTable::::empty(); +fn test_base_protect_query_range() { + let pt = PageTable::::empty(); let from_ppn = 1..1000; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999; let prop = MapProperty::new_general(VmPerm::RW); - pt.map(&from, &to, prop).unwrap(); - for (Qr { va, info }, i) in pt.query(&from).unwrap().zip(from_ppn) { + unsafe { pt.map_unchecked(&from, &to, prop) }; + for (qr, i) in pt.query_range(&from).unwrap().zip(from_ppn) { + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.cache, CachePolicy::Writeback); - assert_eq!(va, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } let prot = PAGE_SIZE * 18..PAGE_SIZE * 20; pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap(); - for (Qr { va, info }, i) in pt.query(&prot).unwrap().zip(18..20) { + for (qr, i) in pt.query_range(&prot).unwrap().zip(18..20) { + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; assert_eq!(info.prop.perm, VmPerm::R); - assert_eq!(va, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } } @@ -110,8 +135,8 @@ impl PageTableConstsTrait for VeryHugePageTableConsts { } #[ktest] -fn test_large_protect_query() { - let mut pt = PageTable::::empty(); +fn test_large_protect_query_range() { + let pt = PageTable::::empty(); let gmult = 512 * 512; let from_ppn = gmult - 512..gmult + gmult + 514; let to_ppn = gmult - 512 - 512..gmult + gmult - 512 + 514; @@ -123,20 +148,23 @@ fn test_large_protect_query() { let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; let prop = MapProperty::new_general(VmPerm::RW); - pt.map(&from, &to, prop).unwrap(); - for (Qr { va, info }, i) in pt.query(&from).unwrap().zip(0..512 + 2 + 2) { + unsafe { pt.map_unchecked(&from, &to, prop) }; + for (qr, i) in pt.query_range(&from).unwrap().zip(0..512 + 2 + 2) { + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.cache, CachePolicy::Writeback); if i < 512 + 2 { - assert_eq!(va.start, from.start + i * PAGE_SIZE * 512); - assert_eq!(va.end, from.start + (i + 1) * PAGE_SIZE * 512); + assert_eq!(va, from.start + i * PAGE_SIZE * 512); + assert_eq!(va + len, from.start + (i + 1) * PAGE_SIZE * 512); } else { assert_eq!( - va.start, + va, from.start + (512 + 2) * PAGE_SIZE * 512 + (i - 512 - 2) * PAGE_SIZE ); assert_eq!( - va.end, + va + len, from.start + (512 + 2) * PAGE_SIZE * 512 + (i - 512 - 2 + 1) * PAGE_SIZE ); } @@ -144,24 +172,33 @@ fn test_large_protect_query() { let ppn = from_ppn.start + 18..from_ppn.start + 20; let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end; pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap(); - for (r, i) in pt - .query(&(va.start - PAGE_SIZE..va.start)) + for (qr, i) in pt + .query_range(&(va.start - PAGE_SIZE..va.start)) .unwrap() .zip(ppn.start - 1..ppn.start) { - assert_eq!(r.info.prop.perm, VmPerm::RW); - assert_eq!(r.va, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; + assert_eq!(info.prop.perm, VmPerm::RW); + assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } - for (Qr { va, info }, i) in pt.query(&va).unwrap().zip(ppn.clone()) { + for (qr, i) in pt.query_range(&va).unwrap().zip(ppn.clone()) { + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; assert_eq!(info.prop.perm, VmPerm::R); - assert_eq!(va, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } - for (r, i) in pt - .query(&(va.end..va.end + PAGE_SIZE)) + for (qr, i) in pt + .query_range(&(va.end..va.end + PAGE_SIZE)) .unwrap() .zip(ppn.end..ppn.end + 1) { - assert_eq!(r.info.prop.perm, VmPerm::RW); - assert_eq!(r.va, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + let Qr::MappedUntyped { va, pa, len, info } = qr else { + panic!("Expected MappedUntyped, got {:?}", qr); + }; + assert_eq!(info.prop.perm, VmPerm::RW); + assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } } diff --git a/framework/aster-frame/src/vm/space.rs b/framework/aster-frame/src/vm/space.rs index 390ebf8dc..dbeb174db 100644 --- a/framework/aster-frame/src/vm/space.rs +++ b/framework/aster-frame/src/vm/space.rs @@ -2,14 +2,24 @@ use core::ops::Range; +use align_ext::AlignExt; use bitflags::bitflags; -use super::{is_page_aligned, MapArea, MemorySet, VmFrameVec, VmIo}; +use super::{ + is_page_aligned, + kspace::KERNEL_PAGE_TABLE, + page_table::{ + MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableQueryResult as PtQr, UserMode, + }, + VmFrameVec, VmIo, PAGE_SIZE, +}; use crate::{ - arch::mm::PageTableFlags, + arch::mm::{PageTableConsts, PageTableEntry}, prelude::*, - sync::Mutex, - vm::{page_table::MapProperty, PAGE_SIZE}, + vm::{ + page_table::{CachePolicy, MapProperty, PageTableIter}, + VmFrame, MAX_USERSPACE_VADDR, + }, Error, }; @@ -23,29 +33,28 @@ use crate::{ /// /// A newly-created `VmSpace` is not backed by any physical memory pages. /// To provide memory pages for a `VmSpace`, one can allocate and map -/// physical memory (`VmFrames`) to the `VmSpace`. - -#[derive(Debug, Clone)] +/// physical memory (`VmFrame`s) to the `VmSpace`. +#[derive(Debug)] pub struct VmSpace { - memory_set: Arc>, + pt: PageTable, } impl VmSpace { /// Creates a new VM address space. pub fn new() -> Self { Self { - memory_set: Arc::new(Mutex::new(MemorySet::new())), + pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), } } /// Activate the page table. - pub fn activate(&self) { - self.memory_set.lock().pt.activate(); + pub(crate) fn activate(&self) { + self.pt.activate(); } /// Maps some physical memory pages into the VM space according to the given /// options, returning the address where the mapping is created. /// - /// the frames in variable frames will delete after executing this function + /// The ownership of the frames will be transferred to the `VmSpace`. /// /// For more information, see `VmMapOptions`. pub fn map(&self, frames: VmFrameVec, options: &VmMapOptions) -> Result { @@ -53,56 +62,49 @@ impl VmSpace { return Err(Error::InvalidArgs); } - // if can overwrite, the old mapping should be unmapped. - if options.can_overwrite { - let addr = options.addr.unwrap(); - let size = frames.nbytes(); - let _ = self.unmap(&(addr..addr + size)); + let addr = options.addr.unwrap(); + let size = frames.nbytes(); + + // If overwrite is forbidden, we should check if there are existing mappings + if !options.can_overwrite { + let end = addr.checked_add(size).ok_or(Error::Overflow)?; + for qr in self.query_range(&(addr..end)).unwrap() { + if matches!(qr, VmQueryResult::Mapped { .. }) { + return Err(Error::MapAlreadyMappedVaddr); + } + } } - // debug!("map to vm space: 0x{:x}", options.addr.unwrap()); + self.pt.map_frames( + addr, + frames, + MapProperty { + perm: options.perm, + global: false, + extension: 0, + cache: CachePolicy::Writeback, + }, + )?; - let mut memory_set = self.memory_set.lock(); - // FIXME: This is only a hack here. The interface of MapArea cannot simply deal with unmap part of memory, - // so we only map MapArea of page size now. + Ok(addr) + } - // Ensure that the base address is not unwrapped repeatedly - // and the addresses used later will not overflow - let base_addr = options.addr.unwrap(); - base_addr - .checked_add(frames.len() * PAGE_SIZE) - .ok_or(Error::Overflow)?; - for (idx, frame) in frames.into_iter().enumerate() { - let addr = base_addr + idx * PAGE_SIZE; - let frames = VmFrameVec::from_one_frame(frame); - memory_set.map(MapArea::new( - addr, - PAGE_SIZE, - MapProperty::new_general(options.perm), - frames, - )); + /// Query about a range of virtual memory. + /// You will get a iterator of `VmQueryResult` which contains the information of + /// each parts of the range. + pub fn query_range(&self, range: &Range) -> Result { + Ok(VmQueryIter { + inner: self.pt.query_range(range)?, + }) + } + + /// Query about the mapping information about a byte in virtual memory. + /// This is more handy than [`query_range`], but less efficient if you want + /// to query in a batch. + pub fn query(&self, vaddr: Vaddr) -> Result> { + if !(0..MAX_USERSPACE_VADDR).contains(&vaddr) { + return Err(Error::AccessDenied); } - - Ok(base_addr) - } - - /// Determine whether a `vaddr` is already mapped. - pub fn is_mapped(&self, vaddr: Vaddr) -> bool { - let memory_set = self.memory_set.lock(); - memory_set.is_mapped(vaddr) - } - - /// Determine whether the target `vaddr` is writable based on the page table. - pub fn is_writable(&self, vaddr: Vaddr) -> bool { - let memory_set = self.memory_set.lock(); - let info = memory_set.info(vaddr); - info.is_some_and(|info| info.prop.perm.contains(VmPerm::W)) - } - - /// Determine whether the target `vaddr` is executable based on the page table. - pub fn is_executable(&self, vaddr: Vaddr) -> bool { - let memory_set = self.memory_set.lock(); - let info = memory_set.info(vaddr); - info.is_some_and(|info| info.prop.perm.contains(VmPerm::X)) + Ok(self.pt.query(vaddr).map(|(_pa, info)| info)) } /// Unmaps the physical memory pages within the VM address range. @@ -111,51 +113,44 @@ impl VmSpace { /// are mapped. pub fn unmap(&self, range: &Range) -> Result<()> { assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); - let mut start_va = range.start; - let num_pages = (range.end - range.start) / PAGE_SIZE; - let mut inner = self.memory_set.lock(); - start_va - .checked_add(PAGE_SIZE * num_pages) - .ok_or(Error::Overflow)?; - for i in 0..num_pages { - inner.unmap(start_va)?; - start_va += PAGE_SIZE; - } + self.pt.unmap(range)?; Ok(()) } /// clear all mappings pub fn clear(&self) { - self.memory_set.lock().clear(); + // Safety: unmapping user space is safe, and we don't care unmapping + // invalid ranges. + unsafe { + self.pt.unmap_unchecked(&(0..MAX_USERSPACE_VADDR)); + } #[cfg(target_arch = "x86_64")] x86_64::instructions::tlb::flush_all(); } /// Update the VM protection permissions within the VM address range. /// - /// The entire specified VM range must have been mapped with physical - /// memory pages. - pub fn protect(&self, range: &Range, perm: VmPerm) -> Result<()> { - debug_assert!(range.start % PAGE_SIZE == 0); - debug_assert!(range.end % PAGE_SIZE == 0); - let start_page = range.start / PAGE_SIZE; - let end_page = range.end / PAGE_SIZE; - for page_idx in start_page..end_page { - let addr = page_idx * PAGE_SIZE; - self.memory_set - .lock() - .protect(addr, MapProperty::new_general(perm)) - } + /// If any of the page in the given range is not mapped, it is skipped. + /// The method panics when virtual address is not aligned to base page + /// size. + /// + /// TODO: It returns error when invalid operations such as protect + /// partial huge page happens, and efforts are not reverted, leaving us + /// in a bad state. + pub fn protect(&self, range: &Range, op: impl MapOp) -> Result<()> { + assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); + self.pt.protect(range, op)?; Ok(()) } - /// Deep-copy the current `VmSpace`. + /// To fork a new VM space with copy-on-write semantics. /// - /// The generated new `VmSpace` possesses a `MemorySet` independent from the - /// original `VmSpace`, with initial contents identical to the original. - pub fn deep_copy(&self) -> Self { + /// Both the parent and the newly forked VM space will be marked as + /// read-only. And both the VM space will take handles to the same + /// physical memory pages. + pub fn fork_copy_on_write(&self) -> Self { Self { - memory_set: Arc::new(Mutex::new(self.memory_set.lock().clone())), + pt: self.pt.fork_copy_on_write(), } } } @@ -168,11 +163,37 @@ impl Default for VmSpace { impl VmIo for VmSpace { fn read_bytes(&self, vaddr: usize, buf: &mut [u8]) -> Result<()> { - self.memory_set.lock().read_bytes(vaddr, buf) + let range_end = vaddr.checked_add(buf.len()).ok_or(Error::Overflow)?; + let vaddr = vaddr.align_down(PAGE_SIZE); + let range_end = range_end.align_up(PAGE_SIZE); + for qr in self.query_range(&(vaddr..range_end))? { + if matches!(qr, VmQueryResult::NotMapped { .. }) { + return Err(Error::AccessDenied); + } + } + self.activate(); + buf.clone_from_slice(unsafe { core::slice::from_raw_parts(vaddr as *const u8, buf.len()) }); + Ok(()) } fn write_bytes(&self, vaddr: usize, buf: &[u8]) -> Result<()> { - self.memory_set.lock().write_bytes(vaddr, buf) + let range_end = vaddr.checked_add(buf.len()).ok_or(Error::Overflow)?; + let vaddr = vaddr.align_down(PAGE_SIZE); + let range_end = range_end.align_up(PAGE_SIZE); + for qr in self.query_range(&(vaddr..vaddr + range_end))? { + match qr { + VmQueryResult::NotMapped { .. } => return Err(Error::AccessDenied), + VmQueryResult::Mapped { info, .. } => { + if !info.prop.perm.contains(VmPerm::W) { + return Err(Error::AccessDenied); + } + } + } + } + self.activate(); + unsafe { core::slice::from_raw_parts_mut(vaddr as *mut u8, buf.len()) } + .clone_from_slice(buf); + Ok(()) } } @@ -195,7 +216,7 @@ impl VmMapOptions { pub fn new() -> Self { Self { addr: None, - align: PAGE_SIZE, + align: PageTableConsts::BASE_PAGE_SIZE, perm: VmPerm::empty(), can_overwrite: false, } @@ -283,16 +304,32 @@ impl TryFrom for VmPerm { } } -impl From for PageTableFlags { - fn from(vm_perm: VmPerm) -> Self { - let mut flags = PageTableFlags::PRESENT | PageTableFlags::USER; - if vm_perm.contains(VmPerm::W) { - flags |= PageTableFlags::WRITABLE; - } - // FIXME: how to respect executable flags? - if !vm_perm.contains(VmPerm::X) { - flags |= PageTableFlags::NO_EXECUTE; - } - flags +/// The iterator for querying over the VM space without modifying it. +pub struct VmQueryIter<'a> { + inner: PageTableIter<'a, UserMode, PageTableEntry, PageTableConsts>, +} + +pub enum VmQueryResult { + NotMapped { + va: Vaddr, + len: usize, + }, + Mapped { + va: Vaddr, + frame: VmFrame, + info: MapInfo, + }, +} + +impl Iterator for VmQueryIter<'_> { + type Item = VmQueryResult; + + fn next(&mut self) -> Option { + self.inner.next().map(|ptqr| match ptqr { + PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len }, + PtQr::Mapped { va, frame, info } => VmQueryResult::Mapped { va, frame, info }, + // It is not possible to map untyped memory in user space. + PtQr::MappedUntyped { va, pa, len, info } => unreachable!(), + }) } } diff --git a/kernel/aster-nix/src/error.rs b/kernel/aster-nix/src/error.rs index 4e6e84b10..d351233e6 100644 --- a/kernel/aster-nix/src/error.rs +++ b/kernel/aster-nix/src/error.rs @@ -190,6 +190,7 @@ impl From for Error { aster_frame::Error::NotEnoughResources => Error::new(Errno::EBUSY), aster_frame::Error::PageFault => Error::new(Errno::EFAULT), aster_frame::Error::Overflow => Error::new(Errno::EOVERFLOW), + aster_frame::Error::MapAlreadyMappedVaddr => Error::new(Errno::EINVAL), } } } diff --git a/kernel/aster-nix/src/vm/vmar/mod.rs b/kernel/aster-nix/src/vm/vmar/mod.rs index 6f53879ac..6852a99b8 100644 --- a/kernel/aster-nix/src/vm/vmar/mod.rs +++ b/kernel/aster-nix/src/vm/vmar/mod.rs @@ -90,7 +90,7 @@ impl PageFaultHandler for Vmar { impl Vmar { /// FIXME: This function should require access control - pub fn vm_space(&self) -> &VmSpace { + pub fn vm_space(&self) -> &Arc { self.0.vm_space() } } @@ -103,7 +103,7 @@ pub(super) struct Vmar_ { /// The total size of the VMAR in bytes size: usize, /// The attached vmspace - vm_space: VmSpace, + vm_space: Arc, /// The parent vmar. If points to none, this is a root vmar parent: Weak, } @@ -142,7 +142,7 @@ impl Interval for Arc { impl Vmar_ { fn new( inner: VmarInner, - vm_space: VmSpace, + vm_space: Arc, base: usize, size: usize, parent: Option<&Arc>, @@ -172,7 +172,13 @@ impl Vmar_ { vm_mappings: BTreeMap::new(), free_regions, }; - Vmar_::new(vmar_inner, VmSpace::new(), 0, ROOT_VMAR_CAP_ADDR, None) + Vmar_::new( + vmar_inner, + Arc::new(VmSpace::new()), + 0, + ROOT_VMAR_CAP_ADDR, + None, + ) } fn is_root_vmar(&self) -> bool { @@ -611,7 +617,7 @@ impl Vmar_ { } /// Returns the attached `VmSpace`. - pub(super) fn vm_space(&self) -> &VmSpace { + pub(super) fn vm_space(&self) -> &Arc { &self.vm_space } @@ -708,15 +714,6 @@ impl Vmar_ { self.new_cow(None) } - /// Set the entries in the page table associated with the current `Vmar` to read-only. - fn set_pt_read_only(&self) -> Result<()> { - let inner = self.inner.lock(); - for (map_addr, vm_mapping) in &inner.vm_mappings { - vm_mapping.set_pt_read_only(self.vm_space())?; - } - Ok(()) - } - /// Create a new vmar by creating cow child for all mapped vmos. fn new_cow(&self, parent: Option<&Arc>) -> Result> { let new_vmar_ = { @@ -724,17 +721,11 @@ impl Vmar_ { // If this is not a root `Vmar`, we clone the `VmSpace` from parent. // // If this is a root `Vmar`, we leverage Copy-On-Write (COW) mechanism to - // clone the `VmSpace` to the child. We set all the page table entries - // in current `VmSpace` to be read-only, then clone the `VmSpace` to the child. - // In this way, initially, the child shares the same page table contents - // as the current `Vmar`. Later on, whether the current `Vmar` or the child - // `Vmar` needs to perform a write operation, the COW mechanism will be triggered, - // creating a new page for writing. + // clone the `VmSpace` to the child. let vm_space = if let Some(parent) = parent { parent.vm_space().clone() } else { - self.set_pt_read_only()?; - self.vm_space().deep_copy() + Arc::new(self.vm_space().fork_copy_on_write()) }; Vmar_::new(vmar_inner, vm_space, self.base, self.size, parent) }; diff --git a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs index bd9e4660c..f65d7b16a 100644 --- a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs +++ b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs @@ -131,22 +131,6 @@ impl VmMapping { &self.vmo } - /// Set the entries in the page table associated with the current `VmMapping` to read-only. - pub(super) fn set_pt_read_only(&self, vm_space: &VmSpace) -> Result<()> { - let map_inner = self.inner.lock(); - let mapped_addr = &map_inner.mapped_pages; - let perm = map_inner.perm; - if !perm.contains(VmPerm::W) { - return Ok(()); - } - - for page_idx in mapped_addr { - let map_addr = map_inner.page_map_addr(*page_idx); - vm_space.protect(&(map_addr..map_addr + PAGE_SIZE), perm - VmPerm::W)?; - } - Ok(()) - } - /// Add a new committed page and map it to vmspace. If copy on write is set, it's allowed to unmap the page at the same address. /// FIXME: This implementation based on the truth that we map one page at a time. If multiple pages are mapped together, this implementation may have problems pub(super) fn map_one_page( @@ -214,7 +198,9 @@ impl VmMapping { // The `VmMapping` has the write permission but the corresponding PTE is present and is read-only. // This means this PTE is set to read-only due to the COW mechanism. In this situation we need to trigger a // page fault before writing at the VMO to guarantee the consistency between VMO and the page table. - let need_page_fault = vm_space.is_mapped(page_addr) && !vm_space.is_writable(page_addr); + let need_page_fault = vm_space + .query(page_addr)? + .is_some_and(|info| !info.contains(VmPerm::W)); if need_page_fault { self.handle_page_fault(page_addr, false, true)?; } @@ -478,7 +464,7 @@ impl VmMappingInner { }; // Cow child allows unmapping the mapped page. - if vmo.is_cow_vmo() && vm_space.is_mapped(map_addr) { + if vmo.is_cow_vmo() && vm_space.query(map_addr)?.is_some() { vm_space.unmap(&(map_addr..(map_addr + PAGE_SIZE))).unwrap(); } @@ -490,7 +476,7 @@ impl VmMappingInner { fn unmap_one_page(&mut self, vm_space: &VmSpace, page_idx: usize) -> Result<()> { let map_addr = self.page_map_addr(page_idx); let range = map_addr..(map_addr + PAGE_SIZE); - if vm_space.is_mapped(map_addr) { + if vm_space.query(map_addr)?.is_some() { vm_space.unmap(&range)?; } self.mapped_pages.remove(&page_idx); @@ -531,7 +517,7 @@ impl VmMappingInner { let perm = VmPerm::from(perms); for page_idx in start_page..end_page { let page_addr = self.page_map_addr(page_idx); - if vm_space.is_mapped(page_addr) { + if vm_space.query(page_addr)?.is_some() { // If the page is already mapped, we will modify page table let page_range = page_addr..(page_addr + PAGE_SIZE); vm_space.protect(&page_range, perm)?;