Implement fine grained locks for the page table and adjust APIs

This commit is contained in:
Zhang Junyang
2024-04-29 00:09:26 +08:00
committed by Tate, Hongliang Tian
parent ef1ab72ebe
commit 2dbeb92326
12 changed files with 937 additions and 717 deletions

View File

@ -291,16 +291,18 @@ impl ContextTable {
if device.device >= 32 || device.function >= 8 { if device.device >= 32 || device.function >= 8 {
return Err(ContextTableError::InvalidDeviceId); return Err(ContextTableError::InvalidDeviceId);
} }
self.get_or_create_page_table(device).map_unchecked( self.get_or_create_page_table(device)
&(daddr..daddr + PAGE_SIZE), .map(
&(paddr..paddr + PAGE_SIZE), &(daddr..daddr + PAGE_SIZE),
MapProperty { &(paddr..paddr + PAGE_SIZE),
perm: VmPerm::RW, MapProperty {
global: false, perm: VmPerm::RW,
extension: PageTableFlags::empty().bits(), global: false,
cache: CachePolicy::Uncacheable, extension: PageTableFlags::empty().bits(),
}, cache: CachePolicy::Uncacheable,
); },
)
.unwrap();
Ok(()) Ok(())
} }
@ -310,7 +312,8 @@ impl ContextTable {
} }
unsafe { unsafe {
self.get_or_create_page_table(device) self.get_or_create_page_table(device)
.unmap_unchecked(&(daddr..daddr + PAGE_SIZE)); .unmap(&(daddr..daddr + PAGE_SIZE))
.unwrap();
} }
Ok(()) Ok(())
} }

View File

@ -183,11 +183,23 @@ impl PageTableEntryTrait for PageTableEntry {
if self.0 & PageTableFlags::DIRTY.bits() != 0 { if self.0 & PageTableFlags::DIRTY.bits() != 0 {
status |= MapStatus::DIRTY; status |= MapStatus::DIRTY;
} }
let extension = {
#[cfg(feature = "intel_tdx")]
{
let mut ext = PageTableFlags::empty();
if self.0 & PageTableFlags::SHARED.bits() != 0 {
ext |= PageTableFlags::SHARED;
}
ext
}
#[cfg(not(feature = "intel_tdx"))]
0
};
MapInfo { MapInfo {
prop: MapProperty { prop: MapProperty {
perm, perm,
global, global,
extension: (self.0 & !Self::PHYS_ADDR_MASK) as u64, extension,
cache, cache,
}, },
status, status,

View File

@ -416,14 +416,12 @@ pub unsafe fn unprotect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Pa
} }
let vaddr = paddr_to_vaddr(gpa); let vaddr = paddr_to_vaddr(gpa);
let pt = KERNEL_PAGE_TABLE.get().unwrap(); let pt = KERNEL_PAGE_TABLE.get().unwrap();
unsafe { pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { perm: info.prop.perm,
perm: info.prop.perm, extension: PageTableFlags::SHARED.bits() as u64,
extension: PageTableFlags::SHARED.bits() as u64, cache: info.prop.cache,
cache: info.prop.cache, })
}) .map_err(PageConvertError::PageTableError)?;
.map_err(PageConvertError::PageTableError)?;
};
map_gpa( map_gpa(
(gpa & (!PAGE_MASK)) as u64 | SHARED_MASK, (gpa & (!PAGE_MASK)) as u64 | SHARED_MASK,
(page_num * PAGE_SIZE) as u64, (page_num * PAGE_SIZE) as u64,
@ -452,16 +450,14 @@ pub unsafe fn protect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Page
} }
let vaddr = paddr_to_vaddr(gpa); let vaddr = paddr_to_vaddr(gpa);
let pt = KERNEL_PAGE_TABLE.get().unwrap(); let pt = KERNEL_PAGE_TABLE.get().unwrap();
unsafe { pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty { perm: info.prop.perm,
perm: info.prop.perm, extension: (PageTableFlags::from_bits_truncate(info.prop.extension as usize)
extension: (PageTableFlags::from_bits_truncate(info.prop.extension as usize) - PageTableFlags::SHARED)
- PageTableFlags::SHARED) .bits() as u64,
.bits() as u64, cache: info.prop.cache,
cache: info.prop.cache, })
}) .map_err(PageConvertError::PageTableError)?;
.map_err(PageConvertError::PageTableError)?;
};
map_gpa((gpa & PAGE_MASK) as u64, (page_num * PAGE_SIZE) as u64) map_gpa((gpa & PAGE_MASK) as u64, (page_num * PAGE_SIZE) as u64)
.map_err(PageConvertError::TdVmcallError)?; .map_err(PageConvertError::TdVmcallError)?;
for i in 0..page_num { for i in 0..page_num {

View File

@ -69,12 +69,14 @@ impl KernelStack {
let guard_page_paddr = stack_segment.start_paddr(); let guard_page_paddr = stack_segment.start_paddr();
crate::vm::paddr_to_vaddr(guard_page_paddr) crate::vm::paddr_to_vaddr(guard_page_paddr)
}; };
// Safety: the physical guard page address is exclusively used since we allocated it. // Safety: the segment allocated is not used by others so we can protect it.
unsafe { unsafe {
page_table.protect_unchecked( page_table
&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), .protect(
perm_op(|p| p - VmPerm::RW), &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE),
); perm_op(|p| p - VmPerm::RW),
)
.unwrap();
} }
Ok(Self { Ok(Self {
segment: stack_segment, segment: stack_segment,
@ -96,12 +98,14 @@ impl Drop for KernelStack {
let guard_page_paddr = self.segment.start_paddr(); let guard_page_paddr = self.segment.start_paddr();
crate::vm::paddr_to_vaddr(guard_page_paddr) crate::vm::paddr_to_vaddr(guard_page_paddr)
}; };
// Safety: the physical guard page address is exclusively used since we allocated it. // Safety: the segment allocated is not used by others so we can protect it.
unsafe { unsafe {
page_table.protect_unchecked( page_table
&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), .protect(
perm_op(|p| p | VmPerm::RW), &(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE),
); perm_op(|p| p | VmPerm::RW),
)
.unwrap();
} }
} }
} }

View File

@ -227,18 +227,20 @@ fn handle_kernel_page_fault(f: &TrapFrame) {
// correctness follows the semantics of the direct mapping of physical memory. // correctness follows the semantics of the direct mapping of physical memory.
// Do the mapping // Do the mapping
unsafe { unsafe {
page_table.map_unchecked( page_table
&(vaddr..vaddr + PAGE_SIZE), .map(
&(paddr..paddr + PAGE_SIZE), &(vaddr..vaddr + PAGE_SIZE),
MapProperty { &(paddr..paddr + PAGE_SIZE),
perm: VmPerm::RW, MapProperty {
global: true, perm: VmPerm::RW,
#[cfg(feature = "intel_tdx")] global: true,
extension: PageTableFlags::SHARED.bits() as u64, #[cfg(feature = "intel_tdx")]
#[cfg(not(feature = "intel_tdx"))] extension: PageTableFlags::SHARED.bits() as u64,
extension: 0, #[cfg(not(feature = "intel_tdx"))]
cache: CachePolicy::Uncacheable, extension: 0,
}, cache: CachePolicy::Uncacheable,
) },
)
.unwrap();
} }
} }

View File

@ -59,9 +59,11 @@ impl DmaCoherent {
let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let vaddr = paddr_to_vaddr(start_paddr); let vaddr = paddr_to_vaddr(start_paddr);
let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE);
// Safety: the address is in the range of `vm_segment`. // Safety: the physical mappings is only used by DMA so protecting it is safe.
unsafe { unsafe {
page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Uncacheable)); page_table
.protect(&va_range, cache_policy_op(CachePolicy::Uncacheable))
.unwrap();
} }
} }
let start_daddr = match dma_type() { let start_daddr = match dma_type() {
@ -144,9 +146,11 @@ impl Drop for DmaCoherentInner {
let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let vaddr = paddr_to_vaddr(start_paddr); let vaddr = paddr_to_vaddr(start_paddr);
let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE);
// Safety: the address is in the range of `vm_segment`. // Safety: the physical mappings is only used by DMA so protecting it is safe.
unsafe { unsafe {
page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Writeback)); page_table
.protect(&va_range, cache_policy_op(CachePolicy::Writeback))
.unwrap();
} }
} }
remove_dma_mapping(start_paddr, frame_count); remove_dma_mapping(start_paddr, frame_count);

View File

@ -79,7 +79,7 @@ pub fn init_kernel_page_table() {
}; };
// Safety: we are doing the linear mapping for the kernel. // Safety: we are doing the linear mapping for the kernel.
unsafe { unsafe {
kpt.map_unchecked(&from, &to, prop); kpt.map(&from, &to, prop).unwrap();
} }
// Map for the I/O area. // Map for the I/O area.
// TODO: we need to have an allocator to allocate kernel space for // TODO: we need to have an allocator to allocate kernel space for
@ -94,7 +94,7 @@ pub fn init_kernel_page_table() {
}; };
// Safety: we are doing I/O mappings for the kernel. // Safety: we are doing I/O mappings for the kernel.
unsafe { unsafe {
kpt.map_unchecked(&from, &to, prop); kpt.map(&from, &to, prop).unwrap();
} }
// Map for the kernel code itself. // Map for the kernel code itself.
// TODO: set separated permissions for each segments in the kernel. // TODO: set separated permissions for each segments in the kernel.
@ -114,7 +114,7 @@ pub fn init_kernel_page_table() {
}; };
// Safety: we are doing mappings for the kernel. // Safety: we are doing mappings for the kernel.
unsafe { unsafe {
kpt.map_unchecked(&from, &to, prop); kpt.map(&from, &to, prop).unwrap();
} }
KERNEL_PAGE_TABLE.call_once(|| kpt); KERNEL_PAGE_TABLE.call_once(|| kpt);
} }

View File

@ -1,143 +1,213 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
//! The page table cursor for mapping and querying over the page table.
//!
//! ## The page table lock protocol
//!
//! We provide a fine-grained lock protocol to allow concurrent accesses to
//! the page table. The protocol is originally proposed by Ruihan Li
//! <lrh2000@pku.edu.cn>.
//!
//! [`CursorMut::new`] accepts an address range, which indicates the page table
//! entries that may be visited by this cursor.
//!
//! Then, [`CursorMut::new`] finds an intermediate page table (not necessarily
//! the last-level or the top-level) which represents an address range that contains
//! the whole specified address range. It requires all locks from the root page
//! table to the intermediate page table, but then unlocks all locks excluding the
//! one for the intermediate page table. CursorMut then maintains the lock
//! guards from one for the intermediate page table to the leaf that the cursor is
//! currently manipulating.
//!
//! For example, if we're going to map the address range shown below:
//!
//! ```plain
//! Top-level page table node A
//! /
//! B
//! / \
//! Last-level page table nodes C D
//! Last-level PTEs ---**...**---
//! \__ __/
//! V
//! Address range that we're going to map
//! ```
//!
//! When calling [`CursorMut::new`], it will:
//! 1. `lock(A)`, `lock(B)`, `unlock(A)`;
//! 2. `guards = [ locked(B) ]`.
//!
//! When calling [`CursorMut::map`], it will:
//! 1. `lock(C)`, `guards = [ locked(B), locked(C) ]`;
//! 2. Map some pages in `C`;
//! 3. `unlock(C)`, `lock_guard = [ locked(B) ]`;
//! 4. `lock(D)`, `lock_guard = [ locked(B), locked(D) ]`;
//! 5. Map some pages in D;
//! 6. `unlock(D)`, `lock_guard = [ locked(B) ]`;
//!
//! If all the mappings in `B` are cancelled when cursor finished it's traversal,
//! and `B` need to be recycled, a page walk from the root page table to `B` is
//! required. The cursor unlock all locks, then lock all the way down to `B`, then
//! check if `B` is empty, and finally recycle all the resources on the way back.
use alloc::sync::Arc; use alloc::sync::Arc;
use core::{any::TypeId, mem::size_of, ops::Range}; use core::{any::TypeId, ops::Range};
use align_ext::AlignExt;
use super::{ use super::{
Child, KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait, Child, KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait,
PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, PtfRef, PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode,
}; };
use crate::{ use crate::{
sync::{ArcSpinLockGuard, SpinLock}, sync::{ArcSpinLockGuard, SpinLock},
vm::{paddr_to_vaddr, Paddr, Vaddr, VmFrame}, vm::{Paddr, Vaddr, VmFrame},
}; };
/// The cursor for forward traversal over the page table. /// The cursor for traversal over the page table.
/// ///
/// Each method may move the cursor forward, doing mapping unmaping, or /// Efficient methods are provided to move the cursor forward by a slot,
/// querying this slot. /// doing mapping, unmaping, or querying for the traversed slot. Also you
/// can jump forward or backward by re-walking without releasing the lock.
///
/// A slot is a PTE at any levels, which correspond to a certain virtual
/// memory range sized by the "page size" of the current level.
/// ///
/// Doing mapping is somewhat like a depth-first search on a tree, except /// Doing mapping is somewhat like a depth-first search on a tree, except
/// that we modify the tree while traversing it. We use a stack to simulate /// that we modify the tree while traversing it. We use a guard stack to
/// the recursion. /// simulate the recursion, and adpot a page table locking protocol to
/// /// provide concurrency.
/// Any read or write accesses to nodes require exclusive access on the pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
/// entire path from the root to the node. But cursor can be created without
/// holding the lock, and can release the lock after yeilding the current
/// slot while querying over the page table with a range. Simultaneous
/// reading or writing to the same range in the page table will not produce
/// consistent results, only validity is guaranteed.
pub(super) struct PageTableCursor<
'a,
M: PageTableMode,
E: PageTableEntryTrait,
C: PageTableConstsTrait,
> where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
stack: [Option<PtfRef<E, C>>; C::NR_LEVELS],
lock_guard: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS],
level: usize,
va: Vaddr,
}
#[derive(Debug, Clone)]
pub(super) enum MapOption {
Map {
frame: VmFrame,
prop: MapProperty,
},
MapUntyped {
pa: Paddr,
len: usize,
prop: MapProperty,
},
Unmap {
len: usize,
},
}
impl MapOption {
fn paddr(&self) -> Option<Paddr> {
match self {
MapOption::Map { frame, prop } => Some(frame.start_paddr()),
MapOption::MapUntyped { pa, len, prop } => Some(*pa),
MapOption::Unmap { len } => None,
}
}
fn prop(&self) -> Option<MapProperty> {
match self {
MapOption::Map { frame, prop } => Some(*prop),
MapOption::MapUntyped { pa, len, prop } => Some(*prop),
MapOption::Unmap { len } => None,
}
}
fn len(&self) -> usize {
match self {
// A VmFrame currently has a fixed size of 1 base page.
MapOption::Map { frame, prop } => crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE,
MapOption::MapUntyped { pa, len, prop } => *len,
MapOption::Unmap { len: l } => *l,
}
}
fn consume(&mut self, len: usize) -> Self {
match self {
MapOption::Map { frame, prop } => {
debug_assert_eq!(len, crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE);
let ret = self.clone();
*self = MapOption::Unmap { len: 0 };
ret
}
MapOption::MapUntyped { pa, len: l, prop } => {
debug_assert!(*l >= len);
let ret = MapOption::MapUntyped {
pa: *pa,
len,
prop: *prop,
};
*self = MapOption::MapUntyped {
pa: *pa + len,
len: *l - len,
prop: *prop,
};
ret
}
MapOption::Unmap { len: l } => {
debug_assert!(*l >= len);
let ret = MapOption::Unmap { len };
*l -= len;
ret
}
}
}
}
impl<M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> PageTableCursor<'_, M, E, C>
where where
[(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:, [(); C::NR_LEVELS]:,
{ {
pub(super) fn new(pt: &PageTable<M, E, C>, va: Vaddr) -> Self { pt: &'a PageTable<M, E, C>,
let mut stack = core::array::from_fn(|_| None); guards: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS],
stack[0] = Some(pt.root_frame.clone()); level: usize, // current level
let lock_guard = core::array::from_fn(|_| None); guard_level: usize, // from guard_level to level, the locks are held
Self { va: Vaddr, // current virtual address
stack, barrier_va: Range<Vaddr>, // virtual address range that is locked
lock_guard, }
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> CursorMut<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
/// Create a cursor exclusively owning the locks for the given range.
///
/// The cursor created will only be able to map, query or jump within the
/// given range.
pub(crate) fn new(
pt: &'a PageTable<M, E, C>,
va: &Range<Vaddr>,
) -> Result<Self, PageTableError> {
if !M::covers(va) {
return Err(PageTableError::InvalidVaddrRange(va.start, va.end));
}
if va.start % C::BASE_PAGE_SIZE != 0 || va.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::UnalignedVaddr);
}
// Create a guard array that only hold the root node lock.
let guards = core::array::from_fn(|i| {
if i == 0 {
Some(pt.root_frame.lock_arc())
} else {
None
}
});
let mut cursor = Self {
pt,
guards,
level: C::NR_LEVELS, level: C::NR_LEVELS,
va, guard_level: C::NR_LEVELS,
va: va.start,
barrier_va: va.clone(),
};
// Go down and get proper locks. The cursor should hold a lock of a
// page table node containing the virtual address range.
//
// While going down, previous guards of too-high levels will be released.
loop {
let level_too_high = {
let start_idx = C::in_frame_index(va.start, cursor.level);
let end_idx = C::in_frame_index(va.end - 1, cursor.level);
start_idx == end_idx
};
if !level_too_high || !cursor.cur_child().is_pt() {
break;
}
cursor.level_down(None);
cursor.guards[C::NR_LEVELS - cursor.level - 1] = None;
cursor.guard_level -= 1;
}
Ok(cursor)
}
/// Jump to the given virtual address.
///
/// It panics if the address is out of the range where the cursor is required to operate,
/// or has bad alignment.
pub(crate) fn jump(&mut self, va: Vaddr) {
assert!(self.barrier_va.contains(&va));
assert!(va % C::BASE_PAGE_SIZE == 0);
loop {
let cur_node_start = self.va & !(C::page_size(self.level + 1) - 1);
let cur_node_end = cur_node_start + C::page_size(self.level + 1);
// If the address is within the current node, we can jump directly.
if cur_node_start <= va && va < cur_node_end {
self.va = va;
return;
}
// There is a corner case that the cursor is depleted, sitting at the start of the
// next node but the next node is not locked because the parent is not locked.
if self.va >= self.barrier_va.end && self.level == self.guard_level {
self.va = va;
return;
}
debug_assert!(self.level < self.guard_level);
self.level_up();
} }
} }
/// Map or unmap the range starting from the current address. /// Map the range starting from the current address to a `VmFrame`.
/// ///
/// The argument `create` allows you to map the continuous range to a physical /// # Panic
/// range with the given map property. ///
/// This function will panic if
/// - the virtual address range to be mapped is out of the range;
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
///
/// # Safety
///
/// The caller should ensure that the virtual range being mapped does
/// not affect kernel's memory safety.
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: MapProperty) {
let end = self.va + C::BASE_PAGE_SIZE;
assert!(end <= self.barrier_va.end);
// Go down if not applicable.
while self.level > C::HIGHEST_TRANSLATION_LEVEL
|| self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Frame(frame), Some(prop), level > 1);
self.move_forward();
}
/// Map the range starting from the current address to a physical address range.
/// ///
/// The function will map as more huge pages as possible, and it will split /// The function will map as more huge pages as possible, and it will split
/// the huge pages into smaller pages if necessary. If the input range is large, /// the huge pages into smaller pages if necessary. If the input range is
/// the resulting mappings may look like this (if very huge pages supported): /// large, the resulting mappings may look like this (if very huge pages
/// supported):
/// ///
/// ```text /// ```text
/// start end /// start end
@ -146,159 +216,245 @@ where
/// 4KiB 2MiB 1GiB 4KiB 4KiB /// 4KiB 2MiB 1GiB 4KiB 4KiB
/// ``` /// ```
/// ///
/// In practice it is suggested to use simple wrappers for this API that maps /// In practice it is not suggested to use this method for safety and conciseness.
/// frames for safety and conciseness.
/// ///
/// # Safety /// # Safety
/// ///
/// This function manipulates the page table directly, and it is unsafe because /// The caller should ensure that
/// it may cause undefined behavior if the caller does not ensure that the /// - the range being mapped does not affect kernel's memory safety;
/// mapped address is valid and the page table is not corrupted if it is used /// - the physical address to be mapped is valid and safe to use.
/// by the kernel. pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: MapProperty) {
pub(super) unsafe fn map(&mut self, option: MapOption) { let end = self.va + pa.len();
self.acquire_locks(); let mut pa = pa.start;
let len = option.len(); assert!(end <= self.barrier_va.end);
let end = self.va + len;
let mut option = option;
while self.va < end { while self.va < end {
// Skip if we are unmapping and it is already invalid. // We ensure not mapping in reserved kernel shared tables or releasing it.
let cur_pte = unsafe { self.cur_pte_ptr().read() }; // Although it may be an invariant for all architectures and will be optimized
if matches!(option, MapOption::Unmap { .. }) && !cur_pte.is_valid() { // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
self.next_slot(); let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if self.level > C::HIGHEST_TRANSLATION_LEVEL
|| is_kernel_shared_node
|| self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end
|| pa % C::page_size(self.level) != 0
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Untracked(pa), Some(prop), level > 1);
pa += C::page_size(level);
self.move_forward();
}
}
/// Unmap the range starting from the current address with the given length of virtual address.
///
/// # Safety
///
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be unmapped is out of the range where the cursor is required to operate;
/// - the range covers only a part of a page.
pub(crate) unsafe fn unmap(&mut self, len: usize) {
let end = self.va + len;
assert!(end <= self.barrier_va.end);
assert!(end % C::BASE_PAGE_SIZE == 0);
while self.va < end {
// Skip if it is already invalid.
if self.cur_child().is_none() {
if self.va + C::page_size(self.level) > end {
break;
}
self.move_forward();
continue; continue;
} }
// We check among the conditions that may lead to a level down. // We check among the conditions that may lead to a level down.
let is_pa_not_aligned = option // We ensure not unmapping in reserved kernel shared tables or releasing it.
.paddr() let is_kernel_shared_node =
.map(|pa| pa % C::page_size(self.level) != 0)
.unwrap_or(false);
let map_but_too_huge = self.level > C::HIGHEST_TRANSLATION_LEVEL
&& !matches!(option, MapOption::Unmap { .. });
// We ensure not mapping in reserved kernel shared tables or releasing it.
// Although it may be an invariant for all architectures and will be optimized
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
let kshared_lvl_down =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1; TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if map_but_too_huge if is_kernel_shared_node
|| kshared_lvl_down
|| self.va % C::page_size(self.level) != 0 || self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end || self.va + C::page_size(self.level) > end
|| is_pa_not_aligned
{ {
let ld_prop = option.prop().unwrap_or(MapProperty::new_invalid()); self.level_down(Some(MapProperty::new_invalid()));
self.level_down(Some(ld_prop));
continue; continue;
} }
self.map_page(option.consume(C::page_size(self.level)));
self.next_slot(); // Unmap the current page.
let idx = self.cur_idx();
self.cur_node_mut().set_child(idx, Child::None, None, false);
self.move_forward();
} }
self.release_locks();
} }
/// Apply the given operation to all the mappings within the range. /// Apply the given operation to all the mappings within the range.
pub(super) unsafe fn protect( ///
/// The funtction will return an error if it is not allowed to protect an invalid range and
/// it does so, or if the range to be protected only covers a part of a page.
///
/// # Safety
///
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be protected is out of the range where the cursor is required to operate.
pub(crate) unsafe fn protect(
&mut self, &mut self,
len: usize, len: usize,
op: impl MapOp, op: impl MapOp,
allow_protect_invalid: bool, allow_protect_invalid: bool,
) -> Result<(), PageTableError> { ) -> Result<(), PageTableError> {
self.acquire_locks();
let end = self.va + len; let end = self.va + len;
assert!(end <= self.barrier_va.end);
while self.va < end { while self.va < end {
let cur_pte = unsafe { self.cur_pte_ptr().read() }; if self.cur_child().is_none() {
if !cur_pte.is_valid() {
if !allow_protect_invalid { if !allow_protect_invalid {
return Err(PageTableError::ProtectingInvalid); return Err(PageTableError::ProtectingInvalid);
} }
self.next_slot(); self.move_forward();
continue; continue;
} }
// Go down if it's not a last node or if the page size is too big. // Go down if it's not a last node.
if !(cur_pte.is_huge() || self.level == 1) if self.cur_child().is_pt() {
|| (self.va % C::page_size(self.level)) != 0 self.level_down(None);
|| self.va + C::page_size(self.level) > end
{
self.level_down(Some(op(cur_pte.info())));
continue; continue;
} }
// Apply the operation. let vaddr_not_fit =
unsafe { self.va % C::page_size(self.level) != 0 || self.va + C::page_size(self.level) > end;
self.cur_pte_ptr().write(E::new( let cur_pte_info = self.read_cur_pte_info();
cur_pte.paddr(), let protected_prop = op(cur_pte_info);
op(cur_pte.info()), // Go down if the page size is too big and we are protecting part
cur_pte.is_huge(), // of untyped huge pages.
true, if self.cur_child().is_untyped() && vaddr_not_fit {
)) self.level_down(Some(protected_prop));
}; continue;
self.next_slot(); } else if vaddr_not_fit {
return Err(PageTableError::ProtectingPartial);
}
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut().protect(idx, protected_prop, level);
self.move_forward();
} }
self.release_locks();
Ok(()) Ok(())
} }
fn cur_pte_ptr(&self) -> *mut E { /// Get the information of the current slot and move to the next slot.
let ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); pub(crate) fn query(&mut self) -> Option<PageTableQueryResult> {
let frame_addr = paddr_to_vaddr(ptf.inner.start_paddr()); if self.va >= self.barrier_va.end {
let offset = C::in_frame_index(self.va, self.level); return None;
(frame_addr + offset * size_of::<E>()) as *mut E }
loop {
let level = self.level;
let va = self.va;
let map_info = self.read_cur_pte_info();
match self.cur_child().clone() {
Child::Frame(frame) => {
self.move_forward();
return Some(PageTableQueryResult::Mapped {
va,
frame,
info: map_info,
});
}
Child::PageTable(_) => {
// Go down if it's not a last node.
self.level_down(None);
continue;
}
Child::Untracked(pa) => {
self.move_forward();
return Some(PageTableQueryResult::MappedUntyped {
va,
pa,
len: C::page_size(level),
info: map_info,
});
}
Child::None => {
self.move_forward();
return Some(PageTableQueryResult::NotMapped {
va,
len: C::page_size(level),
});
}
}
}
}
/// Consume itself and leak the root guard for the caller if it locked the root level.
///
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
pub(super) fn leak_root_guard(mut self) -> Option<ArcSpinLockGuard<PageTableFrame<E, C>>> {
if self.guard_level != C::NR_LEVELS {
return None;
}
while self.level < C::NR_LEVELS {
self.level_up();
}
self.guards[0].take()
// Ok to drop self here because we ensure not to access the page table if the current
// level is the root level when running the dropping method.
} }
/// Traverse forward in the current level to the next PTE. /// Traverse forward in the current level to the next PTE.
/// If reached the end of a page table frame, it leads itself up to the next frame of the parent frame. ///
fn next_slot(&mut self) { /// If reached the end of a page table frame, it leads itself up to the next frame of the parent
/// frame if possible.
fn move_forward(&mut self) {
let page_size = C::page_size(self.level); let page_size = C::page_size(self.level);
while self.level < C::NR_LEVELS && C::in_frame_index(self.va + page_size, self.level) == 0 { let next_va = self.va.align_down(page_size) + page_size;
while self.level < self.guard_level && C::in_frame_index(next_va, self.level) == 0 {
self.level_up(); self.level_up();
} }
self.va += page_size; self.va = next_va;
} }
/// Go up a level. We release the current frame if it has no mappings since the cursor only moves /// Go up a level. We release the current frame if it has no mappings since the cursor only moves
/// forward. And we will do the final cleanup using `level_up` when the cursor is dropped. /// forward. And if needed we will do the final cleanup using this method after re-walk when the
/// cursor is dropped.
/// ///
/// This method requires locks acquired before calling it. The discarded level will be unlocked. /// This method requires locks acquired before calling it. The discarded level will be unlocked.
fn level_up(&mut self) { fn level_up(&mut self) {
let last_map_cnt_is_zero = { let last_node_all_unmapped = self.cur_node().nr_valid_children() == 0;
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); self.guards[C::NR_LEVELS - self.level] = None;
top_ptf.map_count == 0
};
self.stack[C::NR_LEVELS - self.level] = None;
self.lock_guard[C::NR_LEVELS - self.level] = None;
self.level += 1; self.level += 1;
let can_release_child = let can_release_child =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level < C::NR_LEVELS; TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level < C::NR_LEVELS;
if can_release_child && last_map_cnt_is_zero { if can_release_child && last_node_all_unmapped {
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level] let idx = self.cur_idx();
.as_deref_mut() self.cur_node_mut().set_child(idx, Child::None, None, false);
.unwrap();
let frame_addr = paddr_to_vaddr(top_ptf.inner.start_paddr());
let idx = C::in_frame_index(self.va, self.level);
unsafe { (frame_addr as *mut E).add(idx).write(E::new_invalid()) }
top_ptf.child[idx] = None;
top_ptf.map_count -= 1;
} }
} }
/// A level down operation during traversal. It may split a huge page into /// A level down operation during traversal. It may create a new child frame if the
/// smaller pages if we have an end address within the next mapped huge page. /// current frame does not have one. It may also split an untyped huge page into
/// It may also create a new child frame if the current frame does not have one. /// smaller pages if we have an end address within the next mapped untyped huge page.
/// If that may happen the map property of intermediate level `prop` should be ///
/// If creation may happen the map property of intermediate level `prop` should be
/// passed in correctly. Whether the map property matters in an intermediate /// passed in correctly. Whether the map property matters in an intermediate
/// level is architecture-dependent. /// level is architecture-dependent.
/// ///
/// This method requires write locks acquired before calling it. The newly added /// Also, the staticness of the page table is guaranteed if the caller make sure
/// level will still hold the lock. /// that there is a child node for the current node.
unsafe fn level_down(&mut self, prop: Option<MapProperty>) { fn level_down(&mut self, prop: Option<MapProperty>) {
debug_assert!(self.level > 1); debug_assert!(self.level > 1);
// Check if the child frame exists. // Check if the child frame exists.
let nxt_lvl_frame = { let nxt_lvl_frame = {
let idx = C::in_frame_index(self.va, self.level); let idx = C::in_frame_index(self.va, self.level);
let child = { let child = self.cur_child();
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap(); if let Child::PageTable(nxt_lvl_frame) = child {
&top_ptf.child[idx]
};
if let Some(Child::PageTable(nxt_lvl_frame)) = child {
Some(nxt_lvl_frame.clone()) Some(nxt_lvl_frame.clone())
} else { } else {
None None
@ -307,143 +463,89 @@ where
// Create a new child frame if it does not exist. Sure it could be done only if // Create a new child frame if it does not exist. Sure it could be done only if
// it is allowed to modify the page table. // it is allowed to modify the page table.
let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| { let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| {
let mut new_frame = PageTableFrame::<E, C>::new(); // If it already maps an untyped huge page, we should split it.
// If it already maps a huge page, we should split it. if self.cur_child().is_untyped() {
let pte = unsafe { self.cur_pte_ptr().read() }; let level = self.level;
if pte.is_valid() && pte.is_huge() { let idx = self.cur_idx();
let pa = pte.paddr(); self.cur_node_mut().split_untracked_huge(level, idx);
let prop = pte.info().prop; let Child::PageTable(nxt_lvl_frame) = self.cur_child() else {
for i in 0..C::NR_ENTRIES_PER_FRAME { unreachable!()
let nxt_level = self.level - 1; };
let nxt_pte = { nxt_lvl_frame.clone()
let frame_addr = paddr_to_vaddr(new_frame.inner.start_paddr()); } else if self.cur_child().is_none() {
&mut *(frame_addr as *mut E).add(i) let new_frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
}; let idx = self.cur_idx();
*nxt_pte = E::new(pa + i * C::page_size(nxt_level), prop, nxt_level > 1, true); self.cur_node_mut().set_child(
} idx,
new_frame.map_count = C::NR_ENTRIES_PER_FRAME; Child::PageTable(new_frame.clone()),
unsafe { prop,
self.cur_pte_ptr().write(E::new( false,
new_frame.inner.start_paddr(), );
prop, new_frame
false,
false,
))
}
} else { } else {
// The child couldn't be valid here because child is none and it's not huge. panic!("Trying to level down when it is mapped to a typed frame");
debug_assert!(!pte.is_valid());
unsafe {
self.cur_pte_ptr().write(E::new(
new_frame.inner.start_paddr(),
prop.unwrap(),
false,
false,
))
}
} }
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level]
.as_deref_mut()
.unwrap();
top_ptf.map_count += 1;
let new_frame_ref = Arc::new(SpinLock::new(new_frame));
top_ptf.child[C::in_frame_index(self.va, self.level)] =
Some(Child::PageTable(new_frame_ref.clone()));
new_frame_ref
}); });
self.lock_guard[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc()); self.guards[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc());
self.stack[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame);
self.level -= 1; self.level -= 1;
} }
/// Map or unmap the page pointed to by the cursor (which could be large). fn cur_node(&self) -> &ArcSpinLockGuard<PageTableFrame<E, C>> {
/// If the physical address and the map property are not provided, it unmaps self.guards[C::NR_LEVELS - self.level].as_ref().unwrap()
/// the current page.
///
/// This method requires write locks acquired before calling it.
unsafe fn map_page(&mut self, option: MapOption) {
let pte_ptr = self.cur_pte_ptr();
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level]
.as_deref_mut()
.unwrap();
let child = {
let idx = C::in_frame_index(self.va, self.level);
if top_ptf.child[idx].is_some() {
top_ptf.child[idx] = None;
top_ptf.map_count -= 1;
}
&mut top_ptf.child[idx]
};
match option {
MapOption::Map { frame, prop } => {
let pa = frame.start_paddr();
unsafe {
pte_ptr.write(E::new(pa, prop, self.level > 1, true));
}
*child = Some(Child::Frame(frame));
top_ptf.map_count += 1;
}
MapOption::MapUntyped { pa, len, prop } => {
debug_assert_eq!(len, C::page_size(self.level));
unsafe {
pte_ptr.write(E::new(pa, prop, self.level > 1, true));
}
top_ptf.map_count += 1;
}
MapOption::Unmap { len } => {
debug_assert_eq!(len, C::page_size(self.level));
unsafe { pte_ptr.write(E::new_invalid()) }
}
}
} }
fn acquire_locks(&mut self) { fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard<PageTableFrame<E, C>> {
for i in 0..=C::NR_LEVELS - self.level { self.guards[C::NR_LEVELS - self.level].as_mut().unwrap()
let Some(ref ptf) = self.stack[i] else {
panic!("Invalid values in PT cursor stack while acuqiring locks");
};
debug_assert!(self.lock_guard[i].is_none());
self.lock_guard[i] = Some(ptf.lock_arc());
}
} }
fn release_locks(&mut self) { fn cur_idx(&self) -> usize {
for i in (0..=C::NR_LEVELS - self.level).rev() { C::in_frame_index(self.va, self.level)
let Some(ref ptf) = self.stack[i] else { }
panic!("Invalid values in PT cursor stack while releasing locks");
}; fn cur_child(&self) -> &Child<E, C> {
debug_assert!(self.lock_guard[i].is_some()); self.cur_node().child(self.cur_idx())
self.lock_guard[i] = None; }
}
fn read_cur_pte_info(&self) -> MapInfo {
self.cur_node().read_pte_info(self.cur_idx())
} }
} }
/// The iterator for querying over the page table without modifying it. impl<M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Drop
pub struct PageTableIter<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> for CursorMut<'_, M, E, C>
where where
[(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:, [(); C::NR_LEVELS]:,
{ {
cursor: PageTableCursor<'a, M, E, C>, fn drop(&mut self) {
end_va: Vaddr, // Recycle what we can recycle now.
} while self.level < self.guard_level {
self.level_up();
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> }
PageTableIter<'a, M, E, C> // No need to do further cleanup if it is the root node or
where // there are mappings left.
[(); C::NR_ENTRIES_PER_FRAME]:, if self.level == self.guard_level || self.cur_node().nr_valid_children() != 0 {
[(); C::NR_LEVELS]:, return;
{ }
pub(super) fn new(pt: &'a PageTable<M, E, C>, va: &Range<Vaddr>) -> Self { // Drop the lock on the guard level.
Self { self.guards[C::NR_LEVELS - self.guard_level] = None;
cursor: PageTableCursor::new(pt, va.start), // Re-walk the page table to retreive the locks.
end_va: va.end, self.guards[0] = Some(self.pt.root_frame.lock_arc());
self.level = C::NR_LEVELS;
// Another cursor can unmap the guard level node before this cursor
// is dropped, we can just do our best here when re-walking.
while self.level > self.guard_level && self.cur_child().is_pt() {
self.level_down(None);
}
// Doing final cleanup by [`CursorMut::level_up`] to the root.
while self.level < C::NR_LEVELS {
self.level_up();
} }
} }
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum PageTableQueryResult { pub(crate) enum PageTableQueryResult {
NotMapped { NotMapped {
va: Vaddr, va: Vaddr,
len: usize, len: usize,
@ -461,8 +563,32 @@ pub enum PageTableQueryResult {
}, },
} }
/// The read-only cursor for traversal over the page table.
///
/// It implements the `Iterator` trait to provide a convenient way to query over the page table.
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
inner: CursorMut<'a, M, E, C>,
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Cursor<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn new(
pt: &'a PageTable<M, E, C>,
va: &Range<Vaddr>,
) -> Result<Self, PageTableError> {
CursorMut::new(pt, va).map(|inner| Self { inner })
}
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator
for PageTableIter<'a, M, E, C> for Cursor<'a, M, E, C>
where where
[(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:, [(); C::NR_LEVELS]:,
@ -470,63 +596,6 @@ where
type Item = PageTableQueryResult; type Item = PageTableQueryResult;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.cursor.acquire_locks(); self.inner.query()
if self.cursor.va >= self.end_va {
return None;
}
loop {
let level = self.cursor.level;
let va = self.cursor.va;
let top_ptf = self.cursor.lock_guard[C::NR_LEVELS - level]
.as_ref()
.unwrap();
let cur_pte = unsafe { self.cursor.cur_pte_ptr().read() };
// Yeild if it's not a valid node.
if !cur_pte.is_valid() {
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::NotMapped {
va,
len: C::page_size(level),
});
}
// Go down if it's not a last node.
if !(cur_pte.is_huge() || level == 1) {
debug_assert!(cur_pte.is_valid());
// Safety: it's valid and there should be a child frame here.
unsafe {
self.cursor.level_down(None);
}
continue;
}
// Yield the current mapping.
let map_info = cur_pte.info();
let idx = C::in_frame_index(self.cursor.va, self.cursor.level);
match top_ptf.child[idx] {
Some(Child::Frame(ref frame)) => {
let frame = frame.clone();
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::Mapped {
va,
frame,
info: map_info,
});
}
Some(Child::PageTable(_)) => {
panic!("The child couldn't be page table here because it's valid and not huge");
}
None => {
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::MappedUntyped {
va,
pa: cur_pte.paddr(),
len: C::page_size(level),
info: map_info,
});
}
}
}
} }
} }

View File

@ -2,10 +2,10 @@
use alloc::{boxed::Box, sync::Arc}; use alloc::{boxed::Box, sync::Arc};
use super::{PageTableConstsTrait, PageTableEntryTrait}; use super::{MapInfo, MapProperty, PageTableConstsTrait, PageTableEntryTrait};
use crate::{ use crate::{
sync::SpinLock, sync::SpinLock,
vm::{VmAllocOptions, VmFrame}, vm::{Paddr, VmAllocOptions, VmFrame},
}; };
/// A page table frame. /// A page table frame.
@ -17,14 +17,12 @@ where
[(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:, [(); C::NR_LEVELS]:,
{ {
pub inner: VmFrame, inner: VmFrame,
/// TODO: all the following fields can be removed if frame metadata is introduced. /// TODO: all the following fields can be removed if frame metadata is introduced.
/// Here we allow 2x space overhead each frame temporarily. /// Here we allow 2x space overhead each frame temporarily.
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub child: Box<[Option<Child<E, C>>; C::NR_ENTRIES_PER_FRAME]>, children: Box<[Child<E, C>; C::NR_ENTRIES_PER_FRAME]>,
/// The number of mapped frames or page tables. nr_valid_children: usize,
/// This is to track if we can free itself.
pub map_count: usize,
} }
pub(super) type PtfRef<E, C> = Arc<SpinLock<PageTableFrame<E, C>>>; pub(super) type PtfRef<E, C> = Arc<SpinLock<PageTableFrame<E, C>>>;
@ -37,6 +35,47 @@ where
{ {
PageTable(PtfRef<E, C>), PageTable(PtfRef<E, C>),
Frame(VmFrame), Frame(VmFrame),
/// Frames not tracked by the frame allocator.
Untracked(Paddr),
None,
}
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Child<E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn is_pt(&self) -> bool {
matches!(self, Child::PageTable(_))
}
pub(super) fn is_frame(&self) -> bool {
matches!(self, Child::Frame(_))
}
pub(super) fn is_none(&self) -> bool {
matches!(self, Child::None)
}
pub(super) fn is_some(&self) -> bool {
!self.is_none()
}
pub(super) fn is_untyped(&self) -> bool {
matches!(self, Child::Untracked(_))
}
/// Is a last entry that maps to a physical address.
pub(super) fn is_last(&self) -> bool {
matches!(self, Child::Frame(_) | Child::Untracked(_))
}
fn paddr(&self) -> Option<Paddr> {
match self {
Child::PageTable(node) => {
// Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`],
// and the cursor will not protect a node while holding the lock.
Some(node.lock().start_paddr())
}
Child::Frame(frame) => Some(frame.start_paddr()),
Child::Untracked(pa) => Some(*pa),
Child::None => None,
}
}
} }
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for Child<E, C> impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for Child<E, C>
@ -49,6 +88,8 @@ where
match self { match self {
Child::PageTable(ptf) => Child::PageTable(ptf.clone()), Child::PageTable(ptf) => Child::PageTable(ptf.clone()),
Child::Frame(frame) => Child::Frame(frame.clone()), Child::Frame(frame) => Child::Frame(frame.clone()),
Child::Untracked(pa) => Child::Untracked(*pa),
Child::None => Child::None,
} }
} }
} }
@ -61,10 +102,130 @@ where
pub(super) fn new() -> Self { pub(super) fn new() -> Self {
Self { Self {
inner: VmAllocOptions::new(1).alloc_single().unwrap(), inner: VmAllocOptions::new(1).alloc_single().unwrap(),
child: Box::new(core::array::from_fn(|_| None)), children: Box::new(core::array::from_fn(|_| Child::None)),
map_count: 0, nr_valid_children: 0,
} }
} }
pub(super) fn start_paddr(&self) -> Paddr {
self.inner.start_paddr()
}
pub(super) fn child(&self, idx: usize) -> &Child<E, C> {
debug_assert!(idx < C::NR_ENTRIES_PER_FRAME);
&self.children[idx]
}
/// The number of mapped frames or page tables.
/// This is to track if we can free itself.
pub(super) fn nr_valid_children(&self) -> usize {
self.nr_valid_children
}
/// Read the info from a page table entry at a given index.
pub(super) fn read_pte_info(&self, idx: usize) -> MapInfo {
self.read_pte(idx).info()
}
/// Split the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, cur_level: usize, idx: usize) {
debug_assert!(idx < C::NR_ENTRIES_PER_FRAME);
debug_assert!(cur_level > 1);
let Child::Untracked(pa) = self.children[idx] else {
panic!("split_untracked_huge: not an untyped huge page");
};
let info = self.read_pte_info(idx);
let mut new_frame = Self::new();
for i in 0..C::NR_ENTRIES_PER_FRAME {
let small_pa = pa + i * C::page_size(cur_level - 1);
new_frame.set_child(
i,
Child::Untracked(small_pa),
Some(info.prop),
cur_level - 1 > 1,
);
}
self.set_child(
idx,
Child::PageTable(Arc::new(SpinLock::new(new_frame))),
Some(info.prop),
false,
);
}
/// Map a child at a given index.
/// If mapping a non-none child, please give the property to map the child.
pub(super) fn set_child(
&mut self,
idx: usize,
child: Child<E, C>,
prop: Option<MapProperty>,
huge: bool,
) {
assert!(idx < C::NR_ENTRIES_PER_FRAME);
// Safety: the index is within the bound and the PTE to be written is valid.
// And the physical address of PTE points to initialized memory.
// This applies to all the following `write_pte` invocations.
unsafe {
match &child {
Child::PageTable(node) => {
debug_assert!(!huge);
let frame = node.lock();
self.write_pte(
idx,
E::new(frame.inner.start_paddr(), prop.unwrap(), false, false),
);
self.nr_valid_children += 1;
}
Child::Frame(frame) => {
debug_assert!(!huge); // `VmFrame` currently can only be a regular page.
self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true));
self.nr_valid_children += 1;
}
Child::Untracked(pa) => {
self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true));
self.nr_valid_children += 1;
}
Child::None => {
self.write_pte(idx, E::new_invalid());
}
}
}
if self.children[idx].is_some() {
self.nr_valid_children -= 1;
}
self.children[idx] = child;
}
/// Protect an already mapped child at a given index.
pub(super) fn protect(&mut self, idx: usize, prop: MapProperty, level: usize) {
debug_assert!(self.children[idx].is_some());
let paddr = self.children[idx].paddr().unwrap();
// Safety: the index is within the bound and the PTE is valid.
unsafe {
self.write_pte(
idx,
E::new(paddr, prop, level > 1, self.children[idx].is_last()),
);
}
}
fn read_pte(&self, idx: usize) -> E {
assert!(idx < C::NR_ENTRIES_PER_FRAME);
// Safety: the index is within the bound and PTE is plain-old-data.
unsafe { (self.inner.as_ptr() as *const E).add(idx).read() }
}
/// Write a page table entry at a given index.
///
/// # Safety
///
/// The caller must ensure that:
/// - the index is within bounds;
/// - the PTE is valid an the physical address in the PTE points to initialized memory.
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
(self.inner.as_mut_ptr() as *mut E).add(idx).write(pte);
}
} }
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for PageTableFrame<E, C> impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for PageTableFrame<E, C>
@ -77,13 +238,14 @@ where
fn clone(&self) -> Self { fn clone(&self) -> Self {
let new_frame = VmAllocOptions::new(1).alloc_single().unwrap(); let new_frame = VmAllocOptions::new(1).alloc_single().unwrap();
let new_ptr = new_frame.as_mut_ptr() as *mut E; let new_ptr = new_frame.as_mut_ptr() as *mut E;
let ptr = self.inner.as_ptr() as *const E; let children = Box::new(core::array::from_fn(|i| match self.child(i) {
let child = Box::new(core::array::from_fn(|i| { Child::PageTable(node) => unsafe {
self.child[i].as_ref().map(|child| match child { let frame = node.lock();
Child::PageTable(ptf) => unsafe { // Possibly a cursor is waiting for the root lock to recycle this node.
let frame = ptf.lock(); // We can skip copying empty page table nodes.
if frame.nr_valid_children() != 0 {
let cloned = frame.clone(); let cloned = frame.clone();
let pte = ptr.add(i).read(); let pte = self.read_pte(i);
new_ptr.add(i).write(E::new( new_ptr.add(i).write(E::new(
cloned.inner.start_paddr(), cloned.inner.start_paddr(),
pte.info().prop, pte.info().prop,
@ -91,20 +253,22 @@ where
false, false,
)); ));
Child::PageTable(Arc::new(SpinLock::new(cloned))) Child::PageTable(Arc::new(SpinLock::new(cloned)))
}, } else {
Child::Frame(frame) => { Child::None
unsafe {
let pte = ptr.add(i).read();
new_ptr.add(i).write(pte);
}
Child::Frame(frame.clone())
} }
}) },
Child::Frame(_) | Child::Untracked(_) => {
unsafe {
new_ptr.add(i).write(self.read_pte(i));
}
self.children[i].clone()
}
Child::None => Child::None,
})); }));
Self { Self {
inner: new_frame, inner: new_frame,
child, children,
map_count: self.map_count, nr_valid_children: self.nr_valid_children,
} }
} }
} }

View File

@ -1,12 +1,12 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
use alloc::{boxed::Box, sync::Arc}; use alloc::sync::Arc;
use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range, panic}; use core::{fmt::Debug, marker::PhantomData, ops::Range, panic};
use crate::{ use crate::{
arch::mm::{activate_page_table, PageTableConsts, PageTableEntry}, arch::mm::{activate_page_table, PageTableConsts, PageTableEntry},
sync::SpinLock, sync::SpinLock,
vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrameVec, VmPerm}, vm::{paddr_to_vaddr, Paddr, Vaddr, VmPerm},
}; };
mod properties; mod properties;
@ -14,21 +14,20 @@ pub use properties::*;
mod frame; mod frame;
use frame::*; use frame::*;
mod cursor; mod cursor;
use cursor::*; pub(crate) use cursor::{Cursor, CursorMut, PageTableQueryResult};
pub(crate) use cursor::{PageTableIter, PageTableQueryResult};
#[cfg(ktest)] #[cfg(ktest)]
mod test; mod test;
#[derive(Clone, Copy, PartialEq, Eq, Debug)] #[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum PageTableError { pub enum PageTableError {
InvalidVaddr(Vaddr), /// The virtual address range is invalid.
InvalidVaddrRange(Vaddr, Vaddr), InvalidVaddrRange(Vaddr, Vaddr),
VaddrNotAligned(Vaddr), /// Using virtual address not aligned.
VaddrRangeNotAligned(Vaddr, Vaddr), UnalignedVaddr,
PaddrNotAligned(Paddr), /// Protecting a mapping that does not exist.
PaddrRangeNotAligned(Vaddr, Vaddr),
// Protecting a mapping that does not exist.
ProtectingInvalid, ProtectingInvalid,
/// Protecting a part of an already mapped page.
ProtectingPartial,
} }
/// This is a compile-time technique to force the frame developers to distinguish /// This is a compile-time technique to force the frame developers to distinguish
@ -38,8 +37,8 @@ pub trait PageTableMode: Clone + Debug + 'static {
/// The range of virtual addresses that the page table can manage. /// The range of virtual addresses that the page table can manage.
const VADDR_RANGE: Range<Vaddr>; const VADDR_RANGE: Range<Vaddr>;
/// Check if the given range is within the valid virtual address range. /// Check if the given range is covered by the valid virtual address range.
fn encloses(r: &Range<Vaddr>) -> bool { fn covers(r: &Range<Vaddr>) -> bool {
Self::VADDR_RANGE.start <= r.start && r.end <= Self::VADDR_RANGE.end Self::VADDR_RANGE.start <= r.start && r.end <= Self::VADDR_RANGE.end
} }
} }
@ -78,61 +77,6 @@ where
[(); C::NR_ENTRIES_PER_FRAME]:, [(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:, [(); C::NR_LEVELS]:,
{ {
pub(crate) fn map_frames(
&self,
vaddr: Vaddr,
frames: VmFrameVec,
prop: MapProperty,
) -> Result<(), PageTableError> {
if vaddr % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrNotAligned(vaddr));
}
let va_range = vaddr
..vaddr
.checked_add(frames.nbytes())
.ok_or(PageTableError::InvalidVaddr(vaddr))?;
if !UserMode::encloses(&va_range) {
return Err(PageTableError::InvalidVaddrRange(
va_range.start,
va_range.end,
));
}
// Safety: modification to the user page table is safe.
unsafe {
self.map_frames_unchecked(vaddr, frames, prop);
}
Ok(())
}
pub(crate) fn unmap(&self, vaddr: &Range<Vaddr>) -> Result<(), PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end));
}
if !UserMode::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
// Safety: modification to the user page table is safe.
unsafe {
self.unmap_unchecked(vaddr);
}
Ok(())
}
pub(crate) fn protect(
&self,
vaddr: &Range<Vaddr>,
op: impl MapOp,
) -> Result<(), PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end));
}
if !UserMode::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
// Safety: modification to the user page table is safe.
unsafe { self.cursor(vaddr.start).protect(vaddr.len(), op, false) }
}
pub(crate) fn activate(&self) { pub(crate) fn activate(&self) {
// Safety: The usermode page table is safe to activate since the kernel // Safety: The usermode page table is safe to activate since the kernel
// mappings are shared. // mappings are shared.
@ -141,55 +85,60 @@ where
} }
} }
/// Remove all write permissions from the user page table and mark the page /// Remove all write permissions from the user page table and create a cloned
/// table as copy-on-write, and the create a handle to the new page table. /// new page table.
/// ///
/// That is, new page tables will be created when needed if a write operation /// TODO: We may consider making the page table itself copy-on-write.
/// is performed on either of the user page table handles. Calling this function
/// performs no significant operations.
pub(crate) fn fork_copy_on_write(&self) -> Self { pub(crate) fn fork_copy_on_write(&self) -> Self {
let mut cursor = self.cursor_mut(&UserMode::VADDR_RANGE).unwrap();
// Safety: Protecting the user page table is safe.
unsafe { unsafe {
self.protect_unchecked(&UserMode::VADDR_RANGE, perm_op(|perm| perm & !VmPerm::W)); cursor
} .protect(
// TODO: implement the copy-on-write mechanism. This is a simple workaround. UserMode::VADDR_RANGE.len(),
let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); perm_op(|perm| perm & !VmPerm::W),
let root_frame = self.root_frame.lock(); true,
)
.unwrap();
};
let root_frame = cursor.leak_root_guard().unwrap();
let mut new_root_frame = PageTableFrame::<E, C>::new();
let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2;
let new_ptr = new_root_frame.as_mut_ptr() as *mut E; for i in 0..half_of_entries {
let ptr = root_frame.inner.as_ptr() as *const E; // This is user space, deep copy the child.
let child = Box::new(core::array::from_fn(|i| { match root_frame.child(i) {
if i < half_of_entries { Child::PageTable(node) => {
// This is user space, deep copy the child. let frame = node.lock();
root_frame.child[i].as_ref().map(|child| match child { // Possibly a cursor is waiting for the root lock to recycle this node.
Child::PageTable(ptf) => unsafe { // We can skip copying empty page table nodes.
let frame = ptf.lock(); if frame.nr_valid_children() != 0 {
let cloned = frame.clone(); let cloned = frame.clone();
let pte = ptr.add(i).read(); let pt = Child::PageTable(Arc::new(SpinLock::new(cloned)));
new_ptr.add(i).write(E::new( new_root_frame.set_child(
cloned.inner.start_paddr(), i,
pte.info().prop, pt,
Some(root_frame.read_pte_info(i).prop),
false, false,
false, );
)); }
Child::PageTable(Arc::new(SpinLock::new(cloned))) }
}, Child::None => {}
Child::Frame(_) => panic!("Unexpected frame child."), Child::Frame(_) | Child::Untracked(_) => {
}) panic!("Unexpected map child.");
} else {
// This is kernel space, share the child.
unsafe {
let pte = ptr.add(i).read();
new_ptr.add(i).write(pte);
} }
root_frame.child[i].clone()
} }
})); }
for i in half_of_entries..C::NR_ENTRIES_PER_FRAME {
// This is kernel space, share the child.
new_root_frame.set_child(
i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_info(i).prop),
false,
)
}
PageTable::<UserMode, E, C> { PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C> { root_frame: Arc::new(SpinLock::new(new_root_frame)),
inner: new_root_frame,
child,
map_count: root_frame.map_count,
})),
_phantom: PhantomData, _phantom: PhantomData,
} }
} }
@ -208,17 +157,18 @@ where
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating /// Then, one can use a user page table to call [`fork_copy_on_write`], creating
/// other child page tables. /// other child page tables.
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode, E, C> { pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode, E, C> {
let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); let mut new_root_frame = PageTableFrame::<E, C>::new();
let root_frame = self.root_frame.lock(); let root_frame = self.root_frame.lock();
let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2; for i in C::NR_ENTRIES_PER_FRAME / 2..C::NR_ENTRIES_PER_FRAME {
new_root_frame.copy_from_frame(&root_frame.inner); new_root_frame.set_child(
let child = Box::new(core::array::from_fn(|i| root_frame.child[i].clone())); i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_info(i).prop),
false,
)
}
PageTable::<UserMode, E, C> { PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C> { root_frame: Arc::new(SpinLock::new(new_root_frame)),
inner: new_root_frame,
child,
map_count: root_frame.map_count,
})),
_phantom: PhantomData, _phantom: PhantomData,
} }
} }
@ -230,30 +180,26 @@ where
/// instead of the virtual address range. /// instead of the virtual address range.
pub(crate) fn make_shared_tables(&self, root_index: Range<usize>) { pub(crate) fn make_shared_tables(&self, root_index: Range<usize>) {
let start = root_index.start; let start = root_index.start;
assert!(start < C::NR_ENTRIES_PER_FRAME); debug_assert!(start >= C::NR_ENTRIES_PER_FRAME / 2);
debug_assert!(start < C::NR_ENTRIES_PER_FRAME);
let end = root_index.end; let end = root_index.end;
assert!(end <= C::NR_ENTRIES_PER_FRAME); debug_assert!(end <= C::NR_ENTRIES_PER_FRAME);
let mut root_frame = self.root_frame.lock(); let mut root_frame = self.root_frame.lock();
for i in start..end { for i in start..end {
let no_such_child = root_frame.child[i].is_none(); let no_such_child = root_frame.child(i).is_none();
if no_such_child { if no_such_child {
let frame = PageTableFrame::<E, C>::new(); let frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
let pte_ptr = (root_frame.inner.start_paddr() + i * size_of::<E>()) as *mut E; root_frame.set_child(
unsafe { i,
pte_ptr.write(E::new( Child::PageTable(frame),
frame.inner.start_paddr(), Some(MapProperty {
MapProperty { perm: VmPerm::RWX,
perm: VmPerm::RWX, global: true,
global: true, extension: 0,
extension: 0, cache: CachePolicy::Uncacheable,
cache: CachePolicy::Uncacheable, }),
}, false,
false, )
false,
));
}
root_frame.child[i] = Some(Child::PageTable(Arc::new(SpinLock::new(frame))));
root_frame.map_count += 1;
} }
} }
} }
@ -274,60 +220,40 @@ where
/// The physical address of the root page table. /// The physical address of the root page table.
pub(crate) fn root_paddr(&self) -> Paddr { pub(crate) fn root_paddr(&self) -> Paddr {
self.root_frame.lock().inner.start_paddr() self.root_frame.lock().start_paddr()
} }
pub(crate) unsafe fn map_frames_unchecked( pub(crate) unsafe fn map(
&self,
vaddr: Vaddr,
frames: VmFrameVec,
prop: MapProperty,
) {
let mut cursor = self.cursor(vaddr);
for frame in frames.into_iter() {
cursor.map(MapOption::Map { frame, prop });
}
}
pub(crate) unsafe fn map_unchecked(
&self, &self,
vaddr: &Range<Vaddr>, vaddr: &Range<Vaddr>,
paddr: &Range<Paddr>, paddr: &Range<Paddr>,
prop: MapProperty, prop: MapProperty,
) { ) -> Result<(), PageTableError> {
self.cursor(vaddr.start).map(MapOption::MapUntyped { self.cursor_mut(vaddr)?.map_pa(paddr, prop);
pa: paddr.start, Ok(())
len: vaddr.len(),
prop,
});
} }
pub(crate) unsafe fn unmap_unchecked(&self, vaddr: &Range<Vaddr>) { pub(crate) unsafe fn unmap(&self, vaddr: &Range<Vaddr>) -> Result<(), PageTableError> {
self.cursor(vaddr.start) self.cursor_mut(vaddr)?.unmap(vaddr.len());
.map(MapOption::Unmap { len: vaddr.len() }); Ok(())
} }
pub(crate) unsafe fn protect_unchecked(&self, vaddr: &Range<Vaddr>, op: impl MapOp) { pub(crate) unsafe fn protect(
self.cursor(vaddr.start) &self,
vaddr: &Range<Vaddr>,
op: impl MapOp,
) -> Result<(), PageTableError> {
self.cursor_mut(vaddr)?
.protect(vaddr.len(), op, true) .protect(vaddr.len(), op, true)
.unwrap(); .unwrap();
} Ok(())
/// Query about the mappings of a range of virtual addresses.
pub(crate) fn query_range(
&'a self,
vaddr: &Range<Vaddr>,
) -> Result<PageTableIter<'a, M, E, C>, PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
if !M::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
Ok(PageTableIter::new(self, vaddr))
} }
/// Query about the mapping of a single byte at the given virtual address. /// Query about the mapping of a single byte at the given virtual address.
///
/// Note that this function may fail reflect an accurate result if there are
/// cursors concurrently accessing the same virtual address range, just like what
/// happens for the hardware MMU walk.
pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, MapInfo)> { pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, MapInfo)> {
// Safety: The root frame is a valid page table frame so the address is valid. // Safety: The root frame is a valid page table frame so the address is valid.
unsafe { page_walk::<E, C>(self.root_paddr(), vaddr) } unsafe { page_walk::<E, C>(self.root_paddr(), vaddr) }
@ -337,10 +263,26 @@ where
activate_page_table(self.root_paddr(), CachePolicy::Writeback); activate_page_table(self.root_paddr(), CachePolicy::Writeback);
} }
/// Create a new mutating cursor for the page table. /// Create a new cursor exclusively accessing the virtual address range for mapping.
/// The cursor is initialized atthe given virtual address. ///
fn cursor(&self, va: usize) -> PageTableCursor<'a, M, E, C> { /// If another cursor is already accessing the range, the new cursor will wait until the
PageTableCursor::new(self, va) /// previous cursor is dropped.
pub(crate) fn cursor_mut(
&'a self,
va: &Range<Vaddr>,
) -> Result<CursorMut<'a, M, E, C>, PageTableError> {
CursorMut::new(self, va)
}
/// Create a new cursor exclusively accessing the virtual address range for querying.
///
/// If another cursor is already accessing the range, the new cursor will wait until the
/// previous cursor is dropped.
pub(crate) fn cursor(
&'a self,
va: &Range<Vaddr>,
) -> Result<Cursor<'a, M, E, C>, PageTableError> {
Cursor::new(self, va)
} }
/// Create a new reference to the same page table. /// Create a new reference to the same page table.

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
use super::*; use super::*;
use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm}; use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm, VmAllocOptions};
const PAGE_SIZE: usize = 4096; const PAGE_SIZE: usize = 4096;
@ -12,47 +12,37 @@ fn test_range_check() {
let bad_va = 0..PAGE_SIZE + 1; let bad_va = 0..PAGE_SIZE + 1;
let bad_va2 = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + PAGE_SIZE; let bad_va2 = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + PAGE_SIZE;
let to = VmAllocOptions::new(1).alloc().unwrap(); let to = VmAllocOptions::new(1).alloc().unwrap();
assert!(pt.query_range(&good_va).is_ok()); assert!(pt.cursor_mut(&good_va).is_ok());
assert!(pt.query_range(&bad_va).is_err()); assert!(pt.cursor_mut(&bad_va).is_err());
assert!(pt.query_range(&bad_va2).is_err()); assert!(pt.cursor_mut(&bad_va2).is_err());
assert!(pt.unmap(&good_va).is_ok()); assert!(unsafe { pt.unmap(&good_va) }.is_ok());
assert!(pt.unmap(&bad_va).is_err()); assert!(unsafe { pt.unmap(&bad_va) }.is_err());
assert!(pt.unmap(&bad_va2).is_err()); assert!(unsafe { pt.unmap(&bad_va2) }.is_err());
assert!(pt
.map_frames(
good_va.start,
to.clone(),
MapProperty::new_general(VmPerm::R)
)
.is_ok());
assert!(pt
.map_frames(bad_va2.start, to.clone(), MapProperty::new_invalid())
.is_err());
} }
#[ktest] #[ktest]
fn test_map_unmap() { fn test_map_unmap() {
let pt = PageTable::<UserMode>::empty(); let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2; let from = PAGE_SIZE..PAGE_SIZE * 2;
let frames = VmAllocOptions::new(1).alloc().unwrap(); let frame = VmAllocOptions::new(1).alloc_single().unwrap();
let start_paddr = frames.get(0).unwrap().start_paddr(); let start_paddr = frame.start_paddr();
let prop = MapProperty::new_general(VmPerm::RW); let prop = MapProperty::new_general(VmPerm::RW);
pt.map_frames(from.start, frames.clone(), prop).unwrap(); unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap(); unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none()); assert!(pt.query(from.start + 10).is_none());
let from_ppn = 13245..512 * 512 + 23456; let from_ppn = 13245..512 * 512 + 23456;
let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010; let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
unsafe { pt.map_unchecked(&from, &to, prop) }; unsafe { pt.map(&from, &to, prop).unwrap() };
for i in 0..100 { for i in 0..100 {
let offset = i * (PAGE_SIZE + 1000); let offset = i * (PAGE_SIZE + 1000);
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
} }
let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434; let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434;
pt.unmap(&unmap).unwrap(); unsafe { pt.unmap(&unmap).unwrap() };
for i in 0..100 { for i in 0..100 {
let offset = i * (PAGE_SIZE + 10); let offset = i * (PAGE_SIZE + 10);
if unmap.start <= from.start + offset && from.start + offset < unmap.end { if unmap.start <= from.start + offset && from.start + offset < unmap.end {
@ -67,20 +57,20 @@ fn test_map_unmap() {
fn test_user_copy_on_write() { fn test_user_copy_on_write() {
let pt = PageTable::<UserMode>::empty(); let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2; let from = PAGE_SIZE..PAGE_SIZE * 2;
let frames = VmAllocOptions::new(1).alloc().unwrap(); let frame = VmAllocOptions::new(1).alloc_single().unwrap();
let start_paddr = frames.get(0).unwrap().start_paddr(); let start_paddr = frame.start_paddr();
let prop = MapProperty::new_general(VmPerm::RW); let prop = MapProperty::new_general(VmPerm::RW);
pt.map_frames(from.start, frames.clone(), prop).unwrap(); unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap(); unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none()); assert!(pt.query(from.start + 10).is_none());
pt.map_frames(from.start, frames.clone(), prop).unwrap(); unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
let child_pt = pt.fork_copy_on_write(); let child_pt = pt.fork_copy_on_write();
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap(); unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none()); assert!(pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
} }
@ -98,26 +88,26 @@ impl PageTableConstsTrait for BasePageTableConsts {
} }
#[ktest] #[ktest]
fn test_base_protect_query_range() { fn test_base_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, BasePageTableConsts>::empty(); let pt = PageTable::<UserMode, PageTableEntry, BasePageTableConsts>::empty();
let from_ppn = 1..1000; let from_ppn = 1..1000;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999; let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999;
let prop = MapProperty::new_general(VmPerm::RW); let prop = MapProperty::new_general(VmPerm::RW);
unsafe { pt.map_unchecked(&from, &to, prop) }; unsafe { pt.map(&from, &to, prop).unwrap() };
for (qr, i) in pt.query_range(&from).unwrap().zip(from_ppn) { for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(info.prop.cache, CachePolicy::Writeback); assert_eq!(info.prop.cache, CachePolicy::Writeback);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
} }
let prot = PAGE_SIZE * 18..PAGE_SIZE * 20; let prot = PAGE_SIZE * 18..PAGE_SIZE * 20;
pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap(); unsafe { pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap() };
for (qr, i) in pt.query_range(&prot).unwrap().zip(18..20) { for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::R); assert_eq!(info.prop.perm, VmPerm::R);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
@ -135,7 +125,7 @@ impl PageTableConstsTrait for VeryHugePageTableConsts {
} }
#[ktest] #[ktest]
fn test_large_protect_query_range() { fn test_large_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, VeryHugePageTableConsts>::empty(); let pt = PageTable::<UserMode, PageTableEntry, VeryHugePageTableConsts>::empty();
let gmult = 512 * 512; let gmult = 512 * 512;
let from_ppn = gmult - 512..gmult + gmult + 514; let from_ppn = gmult - 512..gmult + gmult + 514;
@ -148,10 +138,10 @@ fn test_large_protect_query_range() {
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
let prop = MapProperty::new_general(VmPerm::RW); let prop = MapProperty::new_general(VmPerm::RW);
unsafe { pt.map_unchecked(&from, &to, prop) }; unsafe { pt.map(&from, &to, prop).unwrap() };
for (qr, i) in pt.query_range(&from).unwrap().zip(0..512 + 2 + 2) { for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(info.prop.cache, CachePolicy::Writeback); assert_eq!(info.prop.cache, CachePolicy::Writeback);
@ -171,32 +161,32 @@ fn test_large_protect_query_range() {
} }
let ppn = from_ppn.start + 18..from_ppn.start + 20; let ppn = from_ppn.start + 18..from_ppn.start + 20;
let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end; let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end;
pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap(); unsafe { pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap() };
for (qr, i) in pt for (qr, i) in pt
.query_range(&(va.start - PAGE_SIZE..va.start)) .cursor(&(va.start - PAGE_SIZE..va.start))
.unwrap() .unwrap()
.zip(ppn.start - 1..ppn.start) .zip(ppn.start - 1..ppn.start)
{ {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
} }
for (qr, i) in pt.query_range(&va).unwrap().zip(ppn.clone()) { for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::R); assert_eq!(info.prop.perm, VmPerm::R);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
} }
for (qr, i) in pt for (qr, i) in pt
.query_range(&(va.end..va.end + PAGE_SIZE)) .cursor(&(va.end..va.end + PAGE_SIZE))
.unwrap() .unwrap()
.zip(ppn.end..ppn.end + 1) .zip(ppn.end..ppn.end + 1)
{ {
let Qr::MappedUntyped { va, pa, len, info } = qr else { let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr); panic!("Expected MappedUntyped, got {:#x?}", qr);
}; };
assert_eq!(info.prop.perm, VmPerm::RW); assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);

View File

@ -9,7 +9,8 @@ use super::{
is_page_aligned, is_page_aligned,
kspace::KERNEL_PAGE_TABLE, kspace::KERNEL_PAGE_TABLE,
page_table::{ page_table::{
MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableQueryResult as PtQr, UserMode, MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableMode,
PageTableQueryResult as PtQr, PageTableQueryResult, UserMode,
}, },
VmFrameVec, VmIo, PAGE_SIZE, VmFrameVec, VmIo, PAGE_SIZE,
}; };
@ -17,7 +18,7 @@ use crate::{
arch::mm::{PageTableConsts, PageTableEntry}, arch::mm::{PageTableConsts, PageTableEntry},
prelude::*, prelude::*,
vm::{ vm::{
page_table::{CachePolicy, MapProperty, PageTableIter}, page_table::{CachePolicy, Cursor, MapProperty},
VmFrame, MAX_USERSPACE_VADDR, VmFrame, MAX_USERSPACE_VADDR,
}, },
Error, Error,
@ -63,27 +64,44 @@ impl VmSpace {
} }
let addr = options.addr.unwrap(); let addr = options.addr.unwrap();
if addr % PAGE_SIZE != 0 {
return Err(Error::InvalidArgs);
}
let size = frames.nbytes(); let size = frames.nbytes();
let end = addr.checked_add(size).ok_or(Error::InvalidArgs)?;
let va_range = addr..end;
if !UserMode::covers(&va_range) {
return Err(Error::InvalidArgs);
}
let mut cursor = self.pt.cursor_mut(&va_range)?;
// If overwrite is forbidden, we should check if there are existing mappings // If overwrite is forbidden, we should check if there are existing mappings
if !options.can_overwrite { if !options.can_overwrite {
let end = addr.checked_add(size).ok_or(Error::Overflow)?; while let Some(qr) = cursor.query() {
for qr in self.query_range(&(addr..end)).unwrap() { if matches!(qr, PageTableQueryResult::Mapped { .. }) {
if matches!(qr, VmQueryResult::Mapped { .. }) {
return Err(Error::MapAlreadyMappedVaddr); return Err(Error::MapAlreadyMappedVaddr);
} }
} }
cursor.jump(va_range.start);
}
let prop = MapProperty {
perm: options.perm,
global: false,
extension: 0,
cache: CachePolicy::Writeback,
};
for frame in frames.into_iter() {
// Safety: mapping in the user space with `VmFrame` is safe.
unsafe {
cursor.map(frame, prop);
}
} }
self.pt.map_frames(
addr,
frames,
MapProperty {
perm: options.perm,
global: false,
extension: 0,
cache: CachePolicy::Writeback,
},
)?;
Ok(addr) Ok(addr)
} }
@ -93,7 +111,7 @@ impl VmSpace {
/// each parts of the range. /// each parts of the range.
pub fn query_range(&self, range: &Range<Vaddr>) -> Result<VmQueryIter> { pub fn query_range(&self, range: &Range<Vaddr>) -> Result<VmQueryIter> {
Ok(VmQueryIter { Ok(VmQueryIter {
inner: self.pt.query_range(range)?, cursor: self.pt.cursor(range)?,
}) })
} }
@ -112,8 +130,16 @@ impl VmSpace {
/// The range is allowed to contain gaps, where no physical memory pages /// The range is allowed to contain gaps, where no physical memory pages
/// are mapped. /// are mapped.
pub fn unmap(&self, range: &Range<Vaddr>) -> Result<()> { pub fn unmap(&self, range: &Range<Vaddr>) -> Result<()> {
assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); if !is_page_aligned(range.start) || !is_page_aligned(range.end) {
self.pt.unmap(range)?; return Err(Error::InvalidArgs);
}
if !UserMode::covers(range) {
return Err(Error::InvalidArgs);
}
// Safety: unmapping in the user space is safe.
unsafe {
self.pt.unmap(range)?;
}
Ok(()) Ok(())
} }
@ -122,7 +148,7 @@ impl VmSpace {
// Safety: unmapping user space is safe, and we don't care unmapping // Safety: unmapping user space is safe, and we don't care unmapping
// invalid ranges. // invalid ranges.
unsafe { unsafe {
self.pt.unmap_unchecked(&(0..MAX_USERSPACE_VADDR)); self.pt.unmap(&(0..MAX_USERSPACE_VADDR)).unwrap();
} }
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
x86_64::instructions::tlb::flush_all(); x86_64::instructions::tlb::flush_all();
@ -138,8 +164,16 @@ impl VmSpace {
/// partial huge page happens, and efforts are not reverted, leaving us /// partial huge page happens, and efforts are not reverted, leaving us
/// in a bad state. /// in a bad state.
pub fn protect(&self, range: &Range<Vaddr>, op: impl MapOp) -> Result<()> { pub fn protect(&self, range: &Range<Vaddr>, op: impl MapOp) -> Result<()> {
assert!(is_page_aligned(range.start) && is_page_aligned(range.end)); if !is_page_aligned(range.start) || !is_page_aligned(range.end) {
self.pt.protect(range, op)?; return Err(Error::InvalidArgs);
}
if !UserMode::covers(range) {
return Err(Error::InvalidArgs);
}
// Safety: protecting in the user space is safe.
unsafe {
self.pt.protect(range, op)?;
}
Ok(()) Ok(())
} }
@ -306,7 +340,7 @@ impl TryFrom<u64> for VmPerm {
/// The iterator for querying over the VM space without modifying it. /// The iterator for querying over the VM space without modifying it.
pub struct VmQueryIter<'a> { pub struct VmQueryIter<'a> {
inner: PageTableIter<'a, UserMode, PageTableEntry, PageTableConsts>, cursor: Cursor<'a, UserMode, PageTableEntry, PageTableConsts>,
} }
pub enum VmQueryResult { pub enum VmQueryResult {
@ -325,11 +359,11 @@ impl Iterator for VmQueryIter<'_> {
type Item = VmQueryResult; type Item = VmQueryResult;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|ptqr| match ptqr { self.cursor.next().map(|ptqr| match ptqr {
PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len }, PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len },
PtQr::Mapped { va, frame, info } => VmQueryResult::Mapped { va, frame, info }, PtQr::Mapped { va, frame, info } => VmQueryResult::Mapped { va, frame, info },
// It is not possible to map untyped memory in user space. // It is not possible to map untyped memory in user space.
PtQr::MappedUntyped { va, pa, len, info } => unreachable!(), PtQr::MappedUntyped { .. } => unreachable!(),
}) })
} }
} }