Implement fine grained locks for the page table and adjust APIs

This commit is contained in:
Zhang Junyang
2024-04-29 00:09:26 +08:00
committed by Tate, Hongliang Tian
parent ef1ab72ebe
commit 2dbeb92326
12 changed files with 937 additions and 717 deletions

View File

@ -291,7 +291,8 @@ impl ContextTable {
if device.device >= 32 || device.function >= 8 {
return Err(ContextTableError::InvalidDeviceId);
}
self.get_or_create_page_table(device).map_unchecked(
self.get_or_create_page_table(device)
.map(
&(daddr..daddr + PAGE_SIZE),
&(paddr..paddr + PAGE_SIZE),
MapProperty {
@ -300,7 +301,8 @@ impl ContextTable {
extension: PageTableFlags::empty().bits(),
cache: CachePolicy::Uncacheable,
},
);
)
.unwrap();
Ok(())
}
@ -310,7 +312,8 @@ impl ContextTable {
}
unsafe {
self.get_or_create_page_table(device)
.unmap_unchecked(&(daddr..daddr + PAGE_SIZE));
.unmap(&(daddr..daddr + PAGE_SIZE))
.unwrap();
}
Ok(())
}

View File

@ -183,11 +183,23 @@ impl PageTableEntryTrait for PageTableEntry {
if self.0 & PageTableFlags::DIRTY.bits() != 0 {
status |= MapStatus::DIRTY;
}
let extension = {
#[cfg(feature = "intel_tdx")]
{
let mut ext = PageTableFlags::empty();
if self.0 & PageTableFlags::SHARED.bits() != 0 {
ext |= PageTableFlags::SHARED;
}
ext
}
#[cfg(not(feature = "intel_tdx"))]
0
};
MapInfo {
prop: MapProperty {
perm,
global,
extension: (self.0 & !Self::PHYS_ADDR_MASK) as u64,
extension,
cache,
},
status,

View File

@ -416,14 +416,12 @@ pub unsafe fn unprotect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Pa
}
let vaddr = paddr_to_vaddr(gpa);
let pt = KERNEL_PAGE_TABLE.get().unwrap();
unsafe {
pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
perm: info.prop.perm,
extension: PageTableFlags::SHARED.bits() as u64,
cache: info.prop.cache,
})
.map_err(PageConvertError::PageTableError)?;
};
map_gpa(
(gpa & (!PAGE_MASK)) as u64 | SHARED_MASK,
(page_num * PAGE_SIZE) as u64,
@ -452,8 +450,7 @@ pub unsafe fn protect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Page
}
let vaddr = paddr_to_vaddr(gpa);
let pt = KERNEL_PAGE_TABLE.get().unwrap();
unsafe {
pt.protect_unchecked(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
pt.protect(&(vaddr..page_num * PAGE_SIZE), |info| MapProperty {
perm: info.prop.perm,
extension: (PageTableFlags::from_bits_truncate(info.prop.extension as usize)
- PageTableFlags::SHARED)
@ -461,7 +458,6 @@ pub unsafe fn protect_gpa_range(gpa: TdxGpa, page_num: usize) -> Result<(), Page
cache: info.prop.cache,
})
.map_err(PageConvertError::PageTableError)?;
};
map_gpa((gpa & PAGE_MASK) as u64, (page_num * PAGE_SIZE) as u64)
.map_err(PageConvertError::TdVmcallError)?;
for i in 0..page_num {

View File

@ -69,12 +69,14 @@ impl KernelStack {
let guard_page_paddr = stack_segment.start_paddr();
crate::vm::paddr_to_vaddr(guard_page_paddr)
};
// Safety: the physical guard page address is exclusively used since we allocated it.
// Safety: the segment allocated is not used by others so we can protect it.
unsafe {
page_table.protect_unchecked(
page_table
.protect(
&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE),
perm_op(|p| p - VmPerm::RW),
);
)
.unwrap();
}
Ok(Self {
segment: stack_segment,
@ -96,12 +98,14 @@ impl Drop for KernelStack {
let guard_page_paddr = self.segment.start_paddr();
crate::vm::paddr_to_vaddr(guard_page_paddr)
};
// Safety: the physical guard page address is exclusively used since we allocated it.
// Safety: the segment allocated is not used by others so we can protect it.
unsafe {
page_table.protect_unchecked(
page_table
.protect(
&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE),
perm_op(|p| p | VmPerm::RW),
);
)
.unwrap();
}
}
}

View File

@ -227,7 +227,8 @@ fn handle_kernel_page_fault(f: &TrapFrame) {
// correctness follows the semantics of the direct mapping of physical memory.
// Do the mapping
unsafe {
page_table.map_unchecked(
page_table
.map(
&(vaddr..vaddr + PAGE_SIZE),
&(paddr..paddr + PAGE_SIZE),
MapProperty {
@ -240,5 +241,6 @@ fn handle_kernel_page_fault(f: &TrapFrame) {
cache: CachePolicy::Uncacheable,
},
)
.unwrap();
}
}

View File

@ -59,9 +59,11 @@ impl DmaCoherent {
let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let vaddr = paddr_to_vaddr(start_paddr);
let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE);
// Safety: the address is in the range of `vm_segment`.
// Safety: the physical mappings is only used by DMA so protecting it is safe.
unsafe {
page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Uncacheable));
page_table
.protect(&va_range, cache_policy_op(CachePolicy::Uncacheable))
.unwrap();
}
}
let start_daddr = match dma_type() {
@ -144,9 +146,11 @@ impl Drop for DmaCoherentInner {
let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let vaddr = paddr_to_vaddr(start_paddr);
let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE);
// Safety: the address is in the range of `vm_segment`.
// Safety: the physical mappings is only used by DMA so protecting it is safe.
unsafe {
page_table.protect_unchecked(&va_range, cache_policy_op(CachePolicy::Writeback));
page_table
.protect(&va_range, cache_policy_op(CachePolicy::Writeback))
.unwrap();
}
}
remove_dma_mapping(start_paddr, frame_count);

View File

@ -79,7 +79,7 @@ pub fn init_kernel_page_table() {
};
// Safety: we are doing the linear mapping for the kernel.
unsafe {
kpt.map_unchecked(&from, &to, prop);
kpt.map(&from, &to, prop).unwrap();
}
// Map for the I/O area.
// TODO: we need to have an allocator to allocate kernel space for
@ -94,7 +94,7 @@ pub fn init_kernel_page_table() {
};
// Safety: we are doing I/O mappings for the kernel.
unsafe {
kpt.map_unchecked(&from, &to, prop);
kpt.map(&from, &to, prop).unwrap();
}
// Map for the kernel code itself.
// TODO: set separated permissions for each segments in the kernel.
@ -114,7 +114,7 @@ pub fn init_kernel_page_table() {
};
// Safety: we are doing mappings for the kernel.
unsafe {
kpt.map_unchecked(&from, &to, prop);
kpt.map(&from, &to, prop).unwrap();
}
KERNEL_PAGE_TABLE.call_once(|| kpt);
}

View File

@ -1,143 +1,213 @@
// SPDX-License-Identifier: MPL-2.0
//! The page table cursor for mapping and querying over the page table.
//!
//! ## The page table lock protocol
//!
//! We provide a fine-grained lock protocol to allow concurrent accesses to
//! the page table. The protocol is originally proposed by Ruihan Li
//! <lrh2000@pku.edu.cn>.
//!
//! [`CursorMut::new`] accepts an address range, which indicates the page table
//! entries that may be visited by this cursor.
//!
//! Then, [`CursorMut::new`] finds an intermediate page table (not necessarily
//! the last-level or the top-level) which represents an address range that contains
//! the whole specified address range. It requires all locks from the root page
//! table to the intermediate page table, but then unlocks all locks excluding the
//! one for the intermediate page table. CursorMut then maintains the lock
//! guards from one for the intermediate page table to the leaf that the cursor is
//! currently manipulating.
//!
//! For example, if we're going to map the address range shown below:
//!
//! ```plain
//! Top-level page table node A
//! /
//! B
//! / \
//! Last-level page table nodes C D
//! Last-level PTEs ---**...**---
//! \__ __/
//! V
//! Address range that we're going to map
//! ```
//!
//! When calling [`CursorMut::new`], it will:
//! 1. `lock(A)`, `lock(B)`, `unlock(A)`;
//! 2. `guards = [ locked(B) ]`.
//!
//! When calling [`CursorMut::map`], it will:
//! 1. `lock(C)`, `guards = [ locked(B), locked(C) ]`;
//! 2. Map some pages in `C`;
//! 3. `unlock(C)`, `lock_guard = [ locked(B) ]`;
//! 4. `lock(D)`, `lock_guard = [ locked(B), locked(D) ]`;
//! 5. Map some pages in D;
//! 6. `unlock(D)`, `lock_guard = [ locked(B) ]`;
//!
//! If all the mappings in `B` are cancelled when cursor finished it's traversal,
//! and `B` need to be recycled, a page walk from the root page table to `B` is
//! required. The cursor unlock all locks, then lock all the way down to `B`, then
//! check if `B` is empty, and finally recycle all the resources on the way back.
use alloc::sync::Arc;
use core::{any::TypeId, mem::size_of, ops::Range};
use core::{any::TypeId, ops::Range};
use align_ext::AlignExt;
use super::{
Child, KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait,
PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, PtfRef,
PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode,
};
use crate::{
sync::{ArcSpinLockGuard, SpinLock},
vm::{paddr_to_vaddr, Paddr, Vaddr, VmFrame},
vm::{Paddr, Vaddr, VmFrame},
};
/// The cursor for forward traversal over the page table.
/// The cursor for traversal over the page table.
///
/// Each method may move the cursor forward, doing mapping unmaping, or
/// querying this slot.
/// Efficient methods are provided to move the cursor forward by a slot,
/// doing mapping, unmaping, or querying for the traversed slot. Also you
/// can jump forward or backward by re-walking without releasing the lock.
///
/// A slot is a PTE at any levels, which correspond to a certain virtual
/// memory range sized by the "page size" of the current level.
///
/// Doing mapping is somewhat like a depth-first search on a tree, except
/// that we modify the tree while traversing it. We use a stack to simulate
/// the recursion.
///
/// Any read or write accesses to nodes require exclusive access on the
/// entire path from the root to the node. But cursor can be created without
/// holding the lock, and can release the lock after yeilding the current
/// slot while querying over the page table with a range. Simultaneous
/// reading or writing to the same range in the page table will not produce
/// consistent results, only validity is guaranteed.
pub(super) struct PageTableCursor<
'a,
M: PageTableMode,
E: PageTableEntryTrait,
C: PageTableConstsTrait,
> where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
stack: [Option<PtfRef<E, C>>; C::NR_LEVELS],
lock_guard: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS],
level: usize,
va: Vaddr,
}
#[derive(Debug, Clone)]
pub(super) enum MapOption {
Map {
frame: VmFrame,
prop: MapProperty,
},
MapUntyped {
pa: Paddr,
len: usize,
prop: MapProperty,
},
Unmap {
len: usize,
},
}
impl MapOption {
fn paddr(&self) -> Option<Paddr> {
match self {
MapOption::Map { frame, prop } => Some(frame.start_paddr()),
MapOption::MapUntyped { pa, len, prop } => Some(*pa),
MapOption::Unmap { len } => None,
}
}
fn prop(&self) -> Option<MapProperty> {
match self {
MapOption::Map { frame, prop } => Some(*prop),
MapOption::MapUntyped { pa, len, prop } => Some(*prop),
MapOption::Unmap { len } => None,
}
}
fn len(&self) -> usize {
match self {
// A VmFrame currently has a fixed size of 1 base page.
MapOption::Map { frame, prop } => crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE,
MapOption::MapUntyped { pa, len, prop } => *len,
MapOption::Unmap { len: l } => *l,
}
}
fn consume(&mut self, len: usize) -> Self {
match self {
MapOption::Map { frame, prop } => {
debug_assert_eq!(len, crate::arch::mm::PageTableConsts::BASE_PAGE_SIZE);
let ret = self.clone();
*self = MapOption::Unmap { len: 0 };
ret
}
MapOption::MapUntyped { pa, len: l, prop } => {
debug_assert!(*l >= len);
let ret = MapOption::MapUntyped {
pa: *pa,
len,
prop: *prop,
};
*self = MapOption::MapUntyped {
pa: *pa + len,
len: *l - len,
prop: *prop,
};
ret
}
MapOption::Unmap { len: l } => {
debug_assert!(*l >= len);
let ret = MapOption::Unmap { len };
*l -= len;
ret
}
}
}
}
impl<M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> PageTableCursor<'_, M, E, C>
/// that we modify the tree while traversing it. We use a guard stack to
/// simulate the recursion, and adpot a page table locking protocol to
/// provide concurrency.
pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn new(pt: &PageTable<M, E, C>, va: Vaddr) -> Self {
let mut stack = core::array::from_fn(|_| None);
stack[0] = Some(pt.root_frame.clone());
let lock_guard = core::array::from_fn(|_| None);
Self {
stack,
lock_guard,
pt: &'a PageTable<M, E, C>,
guards: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS],
level: usize, // current level
guard_level: usize, // from guard_level to level, the locks are held
va: Vaddr, // current virtual address
barrier_va: Range<Vaddr>, // virtual address range that is locked
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> CursorMut<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
/// Create a cursor exclusively owning the locks for the given range.
///
/// The cursor created will only be able to map, query or jump within the
/// given range.
pub(crate) fn new(
pt: &'a PageTable<M, E, C>,
va: &Range<Vaddr>,
) -> Result<Self, PageTableError> {
if !M::covers(va) {
return Err(PageTableError::InvalidVaddrRange(va.start, va.end));
}
if va.start % C::BASE_PAGE_SIZE != 0 || va.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::UnalignedVaddr);
}
// Create a guard array that only hold the root node lock.
let guards = core::array::from_fn(|i| {
if i == 0 {
Some(pt.root_frame.lock_arc())
} else {
None
}
});
let mut cursor = Self {
pt,
guards,
level: C::NR_LEVELS,
va,
guard_level: C::NR_LEVELS,
va: va.start,
barrier_va: va.clone(),
};
// Go down and get proper locks. The cursor should hold a lock of a
// page table node containing the virtual address range.
//
// While going down, previous guards of too-high levels will be released.
loop {
let level_too_high = {
let start_idx = C::in_frame_index(va.start, cursor.level);
let end_idx = C::in_frame_index(va.end - 1, cursor.level);
start_idx == end_idx
};
if !level_too_high || !cursor.cur_child().is_pt() {
break;
}
cursor.level_down(None);
cursor.guards[C::NR_LEVELS - cursor.level - 1] = None;
cursor.guard_level -= 1;
}
Ok(cursor)
}
/// Jump to the given virtual address.
///
/// It panics if the address is out of the range where the cursor is required to operate,
/// or has bad alignment.
pub(crate) fn jump(&mut self, va: Vaddr) {
assert!(self.barrier_va.contains(&va));
assert!(va % C::BASE_PAGE_SIZE == 0);
loop {
let cur_node_start = self.va & !(C::page_size(self.level + 1) - 1);
let cur_node_end = cur_node_start + C::page_size(self.level + 1);
// If the address is within the current node, we can jump directly.
if cur_node_start <= va && va < cur_node_end {
self.va = va;
return;
}
// There is a corner case that the cursor is depleted, sitting at the start of the
// next node but the next node is not locked because the parent is not locked.
if self.va >= self.barrier_va.end && self.level == self.guard_level {
self.va = va;
return;
}
debug_assert!(self.level < self.guard_level);
self.level_up();
}
}
/// Map or unmap the range starting from the current address.
/// Map the range starting from the current address to a `VmFrame`.
///
/// The argument `create` allows you to map the continuous range to a physical
/// range with the given map property.
/// # Panic
///
/// This function will panic if
/// - the virtual address range to be mapped is out of the range;
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
///
/// # Safety
///
/// The caller should ensure that the virtual range being mapped does
/// not affect kernel's memory safety.
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: MapProperty) {
let end = self.va + C::BASE_PAGE_SIZE;
assert!(end <= self.barrier_va.end);
// Go down if not applicable.
while self.level > C::HIGHEST_TRANSLATION_LEVEL
|| self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Frame(frame), Some(prop), level > 1);
self.move_forward();
}
/// Map the range starting from the current address to a physical address range.
///
/// The function will map as more huge pages as possible, and it will split
/// the huge pages into smaller pages if necessary. If the input range is large,
/// the resulting mappings may look like this (if very huge pages supported):
/// the huge pages into smaller pages if necessary. If the input range is
/// large, the resulting mappings may look like this (if very huge pages
/// supported):
///
/// ```text
/// start end
@ -146,159 +216,245 @@ where
/// 4KiB 2MiB 1GiB 4KiB 4KiB
/// ```
///
/// In practice it is suggested to use simple wrappers for this API that maps
/// frames for safety and conciseness.
/// In practice it is not suggested to use this method for safety and conciseness.
///
/// # Safety
///
/// This function manipulates the page table directly, and it is unsafe because
/// it may cause undefined behavior if the caller does not ensure that the
/// mapped address is valid and the page table is not corrupted if it is used
/// by the kernel.
pub(super) unsafe fn map(&mut self, option: MapOption) {
self.acquire_locks();
let len = option.len();
let end = self.va + len;
let mut option = option;
/// The caller should ensure that
/// - the range being mapped does not affect kernel's memory safety;
/// - the physical address to be mapped is valid and safe to use.
pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: MapProperty) {
let end = self.va + pa.len();
let mut pa = pa.start;
assert!(end <= self.barrier_va.end);
while self.va < end {
// Skip if we are unmapping and it is already invalid.
let cur_pte = unsafe { self.cur_pte_ptr().read() };
if matches!(option, MapOption::Unmap { .. }) && !cur_pte.is_valid() {
self.next_slot();
// We ensure not mapping in reserved kernel shared tables or releasing it.
// Although it may be an invariant for all architectures and will be optimized
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if self.level > C::HIGHEST_TRANSLATION_LEVEL
|| is_kernel_shared_node
|| self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end
|| pa % C::page_size(self.level) != 0
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Untracked(pa), Some(prop), level > 1);
pa += C::page_size(level);
self.move_forward();
}
}
/// Unmap the range starting from the current address with the given length of virtual address.
///
/// # Safety
///
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be unmapped is out of the range where the cursor is required to operate;
/// - the range covers only a part of a page.
pub(crate) unsafe fn unmap(&mut self, len: usize) {
let end = self.va + len;
assert!(end <= self.barrier_va.end);
assert!(end % C::BASE_PAGE_SIZE == 0);
while self.va < end {
// Skip if it is already invalid.
if self.cur_child().is_none() {
if self.va + C::page_size(self.level) > end {
break;
}
self.move_forward();
continue;
}
// We check among the conditions that may lead to a level down.
let is_pa_not_aligned = option
.paddr()
.map(|pa| pa % C::page_size(self.level) != 0)
.unwrap_or(false);
let map_but_too_huge = self.level > C::HIGHEST_TRANSLATION_LEVEL
&& !matches!(option, MapOption::Unmap { .. });
// We ensure not mapping in reserved kernel shared tables or releasing it.
// Although it may be an invariant for all architectures and will be optimized
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
let kshared_lvl_down =
// We ensure not unmapping in reserved kernel shared tables or releasing it.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if map_but_too_huge
|| kshared_lvl_down
if is_kernel_shared_node
|| self.va % C::page_size(self.level) != 0
|| self.va + C::page_size(self.level) > end
|| is_pa_not_aligned
{
let ld_prop = option.prop().unwrap_or(MapProperty::new_invalid());
self.level_down(Some(ld_prop));
self.level_down(Some(MapProperty::new_invalid()));
continue;
}
self.map_page(option.consume(C::page_size(self.level)));
self.next_slot();
// Unmap the current page.
let idx = self.cur_idx();
self.cur_node_mut().set_child(idx, Child::None, None, false);
self.move_forward();
}
self.release_locks();
}
/// Apply the given operation to all the mappings within the range.
pub(super) unsafe fn protect(
///
/// The funtction will return an error if it is not allowed to protect an invalid range and
/// it does so, or if the range to be protected only covers a part of a page.
///
/// # Safety
///
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be protected is out of the range where the cursor is required to operate.
pub(crate) unsafe fn protect(
&mut self,
len: usize,
op: impl MapOp,
allow_protect_invalid: bool,
) -> Result<(), PageTableError> {
self.acquire_locks();
let end = self.va + len;
assert!(end <= self.barrier_va.end);
while self.va < end {
let cur_pte = unsafe { self.cur_pte_ptr().read() };
if !cur_pte.is_valid() {
if self.cur_child().is_none() {
if !allow_protect_invalid {
return Err(PageTableError::ProtectingInvalid);
}
self.next_slot();
self.move_forward();
continue;
}
// Go down if it's not a last node or if the page size is too big.
if !(cur_pte.is_huge() || self.level == 1)
|| (self.va % C::page_size(self.level)) != 0
|| self.va + C::page_size(self.level) > end
{
self.level_down(Some(op(cur_pte.info())));
// Go down if it's not a last node.
if self.cur_child().is_pt() {
self.level_down(None);
continue;
}
// Apply the operation.
unsafe {
self.cur_pte_ptr().write(E::new(
cur_pte.paddr(),
op(cur_pte.info()),
cur_pte.is_huge(),
true,
))
};
self.next_slot();
let vaddr_not_fit =
self.va % C::page_size(self.level) != 0 || self.va + C::page_size(self.level) > end;
let cur_pte_info = self.read_cur_pte_info();
let protected_prop = op(cur_pte_info);
// Go down if the page size is too big and we are protecting part
// of untyped huge pages.
if self.cur_child().is_untyped() && vaddr_not_fit {
self.level_down(Some(protected_prop));
continue;
} else if vaddr_not_fit {
return Err(PageTableError::ProtectingPartial);
}
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut().protect(idx, protected_prop, level);
self.move_forward();
}
self.release_locks();
Ok(())
}
fn cur_pte_ptr(&self) -> *mut E {
let ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap();
let frame_addr = paddr_to_vaddr(ptf.inner.start_paddr());
let offset = C::in_frame_index(self.va, self.level);
(frame_addr + offset * size_of::<E>()) as *mut E
/// Get the information of the current slot and move to the next slot.
pub(crate) fn query(&mut self) -> Option<PageTableQueryResult> {
if self.va >= self.barrier_va.end {
return None;
}
loop {
let level = self.level;
let va = self.va;
let map_info = self.read_cur_pte_info();
match self.cur_child().clone() {
Child::Frame(frame) => {
self.move_forward();
return Some(PageTableQueryResult::Mapped {
va,
frame,
info: map_info,
});
}
Child::PageTable(_) => {
// Go down if it's not a last node.
self.level_down(None);
continue;
}
Child::Untracked(pa) => {
self.move_forward();
return Some(PageTableQueryResult::MappedUntyped {
va,
pa,
len: C::page_size(level),
info: map_info,
});
}
Child::None => {
self.move_forward();
return Some(PageTableQueryResult::NotMapped {
va,
len: C::page_size(level),
});
}
}
}
}
/// Consume itself and leak the root guard for the caller if it locked the root level.
///
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
pub(super) fn leak_root_guard(mut self) -> Option<ArcSpinLockGuard<PageTableFrame<E, C>>> {
if self.guard_level != C::NR_LEVELS {
return None;
}
while self.level < C::NR_LEVELS {
self.level_up();
}
self.guards[0].take()
// Ok to drop self here because we ensure not to access the page table if the current
// level is the root level when running the dropping method.
}
/// Traverse forward in the current level to the next PTE.
/// If reached the end of a page table frame, it leads itself up to the next frame of the parent frame.
fn next_slot(&mut self) {
///
/// If reached the end of a page table frame, it leads itself up to the next frame of the parent
/// frame if possible.
fn move_forward(&mut self) {
let page_size = C::page_size(self.level);
while self.level < C::NR_LEVELS && C::in_frame_index(self.va + page_size, self.level) == 0 {
let next_va = self.va.align_down(page_size) + page_size;
while self.level < self.guard_level && C::in_frame_index(next_va, self.level) == 0 {
self.level_up();
}
self.va += page_size;
self.va = next_va;
}
/// Go up a level. We release the current frame if it has no mappings since the cursor only moves
/// forward. And we will do the final cleanup using `level_up` when the cursor is dropped.
/// forward. And if needed we will do the final cleanup using this method after re-walk when the
/// cursor is dropped.
///
/// This method requires locks acquired before calling it. The discarded level will be unlocked.
fn level_up(&mut self) {
let last_map_cnt_is_zero = {
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap();
top_ptf.map_count == 0
};
self.stack[C::NR_LEVELS - self.level] = None;
self.lock_guard[C::NR_LEVELS - self.level] = None;
let last_node_all_unmapped = self.cur_node().nr_valid_children() == 0;
self.guards[C::NR_LEVELS - self.level] = None;
self.level += 1;
let can_release_child =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level < C::NR_LEVELS;
if can_release_child && last_map_cnt_is_zero {
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level]
.as_deref_mut()
.unwrap();
let frame_addr = paddr_to_vaddr(top_ptf.inner.start_paddr());
let idx = C::in_frame_index(self.va, self.level);
unsafe { (frame_addr as *mut E).add(idx).write(E::new_invalid()) }
top_ptf.child[idx] = None;
top_ptf.map_count -= 1;
if can_release_child && last_node_all_unmapped {
let idx = self.cur_idx();
self.cur_node_mut().set_child(idx, Child::None, None, false);
}
}
/// A level down operation during traversal. It may split a huge page into
/// smaller pages if we have an end address within the next mapped huge page.
/// It may also create a new child frame if the current frame does not have one.
/// If that may happen the map property of intermediate level `prop` should be
/// A level down operation during traversal. It may create a new child frame if the
/// current frame does not have one. It may also split an untyped huge page into
/// smaller pages if we have an end address within the next mapped untyped huge page.
///
/// If creation may happen the map property of intermediate level `prop` should be
/// passed in correctly. Whether the map property matters in an intermediate
/// level is architecture-dependent.
///
/// This method requires write locks acquired before calling it. The newly added
/// level will still hold the lock.
unsafe fn level_down(&mut self, prop: Option<MapProperty>) {
/// Also, the staticness of the page table is guaranteed if the caller make sure
/// that there is a child node for the current node.
fn level_down(&mut self, prop: Option<MapProperty>) {
debug_assert!(self.level > 1);
// Check if the child frame exists.
let nxt_lvl_frame = {
let idx = C::in_frame_index(self.va, self.level);
let child = {
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level].as_ref().unwrap();
&top_ptf.child[idx]
};
if let Some(Child::PageTable(nxt_lvl_frame)) = child {
let child = self.cur_child();
if let Child::PageTable(nxt_lvl_frame) = child {
Some(nxt_lvl_frame.clone())
} else {
None
@ -307,143 +463,89 @@ where
// Create a new child frame if it does not exist. Sure it could be done only if
// it is allowed to modify the page table.
let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| {
let mut new_frame = PageTableFrame::<E, C>::new();
// If it already maps a huge page, we should split it.
let pte = unsafe { self.cur_pte_ptr().read() };
if pte.is_valid() && pte.is_huge() {
let pa = pte.paddr();
let prop = pte.info().prop;
for i in 0..C::NR_ENTRIES_PER_FRAME {
let nxt_level = self.level - 1;
let nxt_pte = {
let frame_addr = paddr_to_vaddr(new_frame.inner.start_paddr());
&mut *(frame_addr as *mut E).add(i)
// If it already maps an untyped huge page, we should split it.
if self.cur_child().is_untyped() {
let level = self.level;
let idx = self.cur_idx();
self.cur_node_mut().split_untracked_huge(level, idx);
let Child::PageTable(nxt_lvl_frame) = self.cur_child() else {
unreachable!()
};
*nxt_pte = E::new(pa + i * C::page_size(nxt_level), prop, nxt_level > 1, true);
}
new_frame.map_count = C::NR_ENTRIES_PER_FRAME;
unsafe {
self.cur_pte_ptr().write(E::new(
new_frame.inner.start_paddr(),
nxt_lvl_frame.clone()
} else if self.cur_child().is_none() {
let new_frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
let idx = self.cur_idx();
self.cur_node_mut().set_child(
idx,
Child::PageTable(new_frame.clone()),
prop,
false,
false,
))
}
);
new_frame
} else {
// The child couldn't be valid here because child is none and it's not huge.
debug_assert!(!pte.is_valid());
unsafe {
self.cur_pte_ptr().write(E::new(
new_frame.inner.start_paddr(),
prop.unwrap(),
false,
false,
))
panic!("Trying to level down when it is mapped to a typed frame");
}
}
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level]
.as_deref_mut()
.unwrap();
top_ptf.map_count += 1;
let new_frame_ref = Arc::new(SpinLock::new(new_frame));
top_ptf.child[C::in_frame_index(self.va, self.level)] =
Some(Child::PageTable(new_frame_ref.clone()));
new_frame_ref
});
self.lock_guard[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc());
self.stack[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame);
self.guards[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc());
self.level -= 1;
}
/// Map or unmap the page pointed to by the cursor (which could be large).
/// If the physical address and the map property are not provided, it unmaps
/// the current page.
///
/// This method requires write locks acquired before calling it.
unsafe fn map_page(&mut self, option: MapOption) {
let pte_ptr = self.cur_pte_ptr();
let top_ptf = self.lock_guard[C::NR_LEVELS - self.level]
.as_deref_mut()
.unwrap();
let child = {
let idx = C::in_frame_index(self.va, self.level);
if top_ptf.child[idx].is_some() {
top_ptf.child[idx] = None;
top_ptf.map_count -= 1;
fn cur_node(&self) -> &ArcSpinLockGuard<PageTableFrame<E, C>> {
self.guards[C::NR_LEVELS - self.level].as_ref().unwrap()
}
&mut top_ptf.child[idx]
};
match option {
MapOption::Map { frame, prop } => {
let pa = frame.start_paddr();
unsafe {
pte_ptr.write(E::new(pa, prop, self.level > 1, true));
fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard<PageTableFrame<E, C>> {
self.guards[C::NR_LEVELS - self.level].as_mut().unwrap()
}
*child = Some(Child::Frame(frame));
top_ptf.map_count += 1;
fn cur_idx(&self) -> usize {
C::in_frame_index(self.va, self.level)
}
MapOption::MapUntyped { pa, len, prop } => {
debug_assert_eq!(len, C::page_size(self.level));
unsafe {
pte_ptr.write(E::new(pa, prop, self.level > 1, true));
}
top_ptf.map_count += 1;
}
MapOption::Unmap { len } => {
debug_assert_eq!(len, C::page_size(self.level));
unsafe { pte_ptr.write(E::new_invalid()) }
fn cur_child(&self) -> &Child<E, C> {
self.cur_node().child(self.cur_idx())
}
fn read_cur_pte_info(&self) -> MapInfo {
self.cur_node().read_pte_info(self.cur_idx())
}
}
fn acquire_locks(&mut self) {
for i in 0..=C::NR_LEVELS - self.level {
let Some(ref ptf) = self.stack[i] else {
panic!("Invalid values in PT cursor stack while acuqiring locks");
};
debug_assert!(self.lock_guard[i].is_none());
self.lock_guard[i] = Some(ptf.lock_arc());
}
}
fn release_locks(&mut self) {
for i in (0..=C::NR_LEVELS - self.level).rev() {
let Some(ref ptf) = self.stack[i] else {
panic!("Invalid values in PT cursor stack while releasing locks");
};
debug_assert!(self.lock_guard[i].is_some());
self.lock_guard[i] = None;
}
}
}
/// The iterator for querying over the page table without modifying it.
pub struct PageTableIter<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
impl<M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Drop
for CursorMut<'_, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
cursor: PageTableCursor<'a, M, E, C>,
end_va: Vaddr,
fn drop(&mut self) {
// Recycle what we can recycle now.
while self.level < self.guard_level {
self.level_up();
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
PageTableIter<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn new(pt: &'a PageTable<M, E, C>, va: &Range<Vaddr>) -> Self {
Self {
cursor: PageTableCursor::new(pt, va.start),
end_va: va.end,
// No need to do further cleanup if it is the root node or
// there are mappings left.
if self.level == self.guard_level || self.cur_node().nr_valid_children() != 0 {
return;
}
// Drop the lock on the guard level.
self.guards[C::NR_LEVELS - self.guard_level] = None;
// Re-walk the page table to retreive the locks.
self.guards[0] = Some(self.pt.root_frame.lock_arc());
self.level = C::NR_LEVELS;
// Another cursor can unmap the guard level node before this cursor
// is dropped, we can just do our best here when re-walking.
while self.level > self.guard_level && self.cur_child().is_pt() {
self.level_down(None);
}
// Doing final cleanup by [`CursorMut::level_up`] to the root.
while self.level < C::NR_LEVELS {
self.level_up();
}
}
}
#[derive(Clone, Debug)]
pub enum PageTableQueryResult {
pub(crate) enum PageTableQueryResult {
NotMapped {
va: Vaddr,
len: usize,
@ -461,8 +563,32 @@ pub enum PageTableQueryResult {
},
}
/// The read-only cursor for traversal over the page table.
///
/// It implements the `Iterator` trait to provide a convenient way to query over the page table.
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
inner: CursorMut<'a, M, E, C>,
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Cursor<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn new(
pt: &'a PageTable<M, E, C>,
va: &Range<Vaddr>,
) -> Result<Self, PageTableError> {
CursorMut::new(pt, va).map(|inner| Self { inner })
}
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator
for PageTableIter<'a, M, E, C>
for Cursor<'a, M, E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
@ -470,63 +596,6 @@ where
type Item = PageTableQueryResult;
fn next(&mut self) -> Option<Self::Item> {
self.cursor.acquire_locks();
if self.cursor.va >= self.end_va {
return None;
}
loop {
let level = self.cursor.level;
let va = self.cursor.va;
let top_ptf = self.cursor.lock_guard[C::NR_LEVELS - level]
.as_ref()
.unwrap();
let cur_pte = unsafe { self.cursor.cur_pte_ptr().read() };
// Yeild if it's not a valid node.
if !cur_pte.is_valid() {
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::NotMapped {
va,
len: C::page_size(level),
});
}
// Go down if it's not a last node.
if !(cur_pte.is_huge() || level == 1) {
debug_assert!(cur_pte.is_valid());
// Safety: it's valid and there should be a child frame here.
unsafe {
self.cursor.level_down(None);
}
continue;
}
// Yield the current mapping.
let map_info = cur_pte.info();
let idx = C::in_frame_index(self.cursor.va, self.cursor.level);
match top_ptf.child[idx] {
Some(Child::Frame(ref frame)) => {
let frame = frame.clone();
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::Mapped {
va,
frame,
info: map_info,
});
}
Some(Child::PageTable(_)) => {
panic!("The child couldn't be page table here because it's valid and not huge");
}
None => {
self.cursor.next_slot();
self.cursor.release_locks();
return Some(PageTableQueryResult::MappedUntyped {
va,
pa: cur_pte.paddr(),
len: C::page_size(level),
info: map_info,
});
}
}
}
self.inner.query()
}
}

View File

@ -2,10 +2,10 @@
use alloc::{boxed::Box, sync::Arc};
use super::{PageTableConstsTrait, PageTableEntryTrait};
use super::{MapInfo, MapProperty, PageTableConstsTrait, PageTableEntryTrait};
use crate::{
sync::SpinLock,
vm::{VmAllocOptions, VmFrame},
vm::{Paddr, VmAllocOptions, VmFrame},
};
/// A page table frame.
@ -17,14 +17,12 @@ where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub inner: VmFrame,
inner: VmFrame,
/// TODO: all the following fields can be removed if frame metadata is introduced.
/// Here we allow 2x space overhead each frame temporarily.
#[allow(clippy::type_complexity)]
pub child: Box<[Option<Child<E, C>>; C::NR_ENTRIES_PER_FRAME]>,
/// The number of mapped frames or page tables.
/// This is to track if we can free itself.
pub map_count: usize,
children: Box<[Child<E, C>; C::NR_ENTRIES_PER_FRAME]>,
nr_valid_children: usize,
}
pub(super) type PtfRef<E, C> = Arc<SpinLock<PageTableFrame<E, C>>>;
@ -37,6 +35,47 @@ where
{
PageTable(PtfRef<E, C>),
Frame(VmFrame),
/// Frames not tracked by the frame allocator.
Untracked(Paddr),
None,
}
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Child<E, C>
where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(super) fn is_pt(&self) -> bool {
matches!(self, Child::PageTable(_))
}
pub(super) fn is_frame(&self) -> bool {
matches!(self, Child::Frame(_))
}
pub(super) fn is_none(&self) -> bool {
matches!(self, Child::None)
}
pub(super) fn is_some(&self) -> bool {
!self.is_none()
}
pub(super) fn is_untyped(&self) -> bool {
matches!(self, Child::Untracked(_))
}
/// Is a last entry that maps to a physical address.
pub(super) fn is_last(&self) -> bool {
matches!(self, Child::Frame(_) | Child::Untracked(_))
}
fn paddr(&self) -> Option<Paddr> {
match self {
Child::PageTable(node) => {
// Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`],
// and the cursor will not protect a node while holding the lock.
Some(node.lock().start_paddr())
}
Child::Frame(frame) => Some(frame.start_paddr()),
Child::Untracked(pa) => Some(*pa),
Child::None => None,
}
}
}
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for Child<E, C>
@ -49,6 +88,8 @@ where
match self {
Child::PageTable(ptf) => Child::PageTable(ptf.clone()),
Child::Frame(frame) => Child::Frame(frame.clone()),
Child::Untracked(pa) => Child::Untracked(*pa),
Child::None => Child::None,
}
}
}
@ -61,10 +102,130 @@ where
pub(super) fn new() -> Self {
Self {
inner: VmAllocOptions::new(1).alloc_single().unwrap(),
child: Box::new(core::array::from_fn(|_| None)),
map_count: 0,
children: Box::new(core::array::from_fn(|_| Child::None)),
nr_valid_children: 0,
}
}
pub(super) fn start_paddr(&self) -> Paddr {
self.inner.start_paddr()
}
pub(super) fn child(&self, idx: usize) -> &Child<E, C> {
debug_assert!(idx < C::NR_ENTRIES_PER_FRAME);
&self.children[idx]
}
/// The number of mapped frames or page tables.
/// This is to track if we can free itself.
pub(super) fn nr_valid_children(&self) -> usize {
self.nr_valid_children
}
/// Read the info from a page table entry at a given index.
pub(super) fn read_pte_info(&self, idx: usize) -> MapInfo {
self.read_pte(idx).info()
}
/// Split the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, cur_level: usize, idx: usize) {
debug_assert!(idx < C::NR_ENTRIES_PER_FRAME);
debug_assert!(cur_level > 1);
let Child::Untracked(pa) = self.children[idx] else {
panic!("split_untracked_huge: not an untyped huge page");
};
let info = self.read_pte_info(idx);
let mut new_frame = Self::new();
for i in 0..C::NR_ENTRIES_PER_FRAME {
let small_pa = pa + i * C::page_size(cur_level - 1);
new_frame.set_child(
i,
Child::Untracked(small_pa),
Some(info.prop),
cur_level - 1 > 1,
);
}
self.set_child(
idx,
Child::PageTable(Arc::new(SpinLock::new(new_frame))),
Some(info.prop),
false,
);
}
/// Map a child at a given index.
/// If mapping a non-none child, please give the property to map the child.
pub(super) fn set_child(
&mut self,
idx: usize,
child: Child<E, C>,
prop: Option<MapProperty>,
huge: bool,
) {
assert!(idx < C::NR_ENTRIES_PER_FRAME);
// Safety: the index is within the bound and the PTE to be written is valid.
// And the physical address of PTE points to initialized memory.
// This applies to all the following `write_pte` invocations.
unsafe {
match &child {
Child::PageTable(node) => {
debug_assert!(!huge);
let frame = node.lock();
self.write_pte(
idx,
E::new(frame.inner.start_paddr(), prop.unwrap(), false, false),
);
self.nr_valid_children += 1;
}
Child::Frame(frame) => {
debug_assert!(!huge); // `VmFrame` currently can only be a regular page.
self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true));
self.nr_valid_children += 1;
}
Child::Untracked(pa) => {
self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true));
self.nr_valid_children += 1;
}
Child::None => {
self.write_pte(idx, E::new_invalid());
}
}
}
if self.children[idx].is_some() {
self.nr_valid_children -= 1;
}
self.children[idx] = child;
}
/// Protect an already mapped child at a given index.
pub(super) fn protect(&mut self, idx: usize, prop: MapProperty, level: usize) {
debug_assert!(self.children[idx].is_some());
let paddr = self.children[idx].paddr().unwrap();
// Safety: the index is within the bound and the PTE is valid.
unsafe {
self.write_pte(
idx,
E::new(paddr, prop, level > 1, self.children[idx].is_last()),
);
}
}
fn read_pte(&self, idx: usize) -> E {
assert!(idx < C::NR_ENTRIES_PER_FRAME);
// Safety: the index is within the bound and PTE is plain-old-data.
unsafe { (self.inner.as_ptr() as *const E).add(idx).read() }
}
/// Write a page table entry at a given index.
///
/// # Safety
///
/// The caller must ensure that:
/// - the index is within bounds;
/// - the PTE is valid an the physical address in the PTE points to initialized memory.
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
(self.inner.as_mut_ptr() as *mut E).add(idx).write(pte);
}
}
impl<E: PageTableEntryTrait, C: PageTableConstsTrait> Clone for PageTableFrame<E, C>
@ -77,13 +238,14 @@ where
fn clone(&self) -> Self {
let new_frame = VmAllocOptions::new(1).alloc_single().unwrap();
let new_ptr = new_frame.as_mut_ptr() as *mut E;
let ptr = self.inner.as_ptr() as *const E;
let child = Box::new(core::array::from_fn(|i| {
self.child[i].as_ref().map(|child| match child {
Child::PageTable(ptf) => unsafe {
let frame = ptf.lock();
let children = Box::new(core::array::from_fn(|i| match self.child(i) {
Child::PageTable(node) => unsafe {
let frame = node.lock();
// Possibly a cursor is waiting for the root lock to recycle this node.
// We can skip copying empty page table nodes.
if frame.nr_valid_children() != 0 {
let cloned = frame.clone();
let pte = ptr.add(i).read();
let pte = self.read_pte(i);
new_ptr.add(i).write(E::new(
cloned.inner.start_paddr(),
pte.info().prop,
@ -91,20 +253,22 @@ where
false,
));
Child::PageTable(Arc::new(SpinLock::new(cloned)))
} else {
Child::None
}
},
Child::Frame(frame) => {
Child::Frame(_) | Child::Untracked(_) => {
unsafe {
let pte = ptr.add(i).read();
new_ptr.add(i).write(pte);
new_ptr.add(i).write(self.read_pte(i));
}
Child::Frame(frame.clone())
self.children[i].clone()
}
})
Child::None => Child::None,
}));
Self {
inner: new_frame,
child,
map_count: self.map_count,
children,
nr_valid_children: self.nr_valid_children,
}
}
}

View File

@ -1,12 +1,12 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::{boxed::Box, sync::Arc};
use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range, panic};
use alloc::sync::Arc;
use core::{fmt::Debug, marker::PhantomData, ops::Range, panic};
use crate::{
arch::mm::{activate_page_table, PageTableConsts, PageTableEntry},
sync::SpinLock,
vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrameVec, VmPerm},
vm::{paddr_to_vaddr, Paddr, Vaddr, VmPerm},
};
mod properties;
@ -14,21 +14,20 @@ pub use properties::*;
mod frame;
use frame::*;
mod cursor;
use cursor::*;
pub(crate) use cursor::{PageTableIter, PageTableQueryResult};
pub(crate) use cursor::{Cursor, CursorMut, PageTableQueryResult};
#[cfg(ktest)]
mod test;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum PageTableError {
InvalidVaddr(Vaddr),
/// The virtual address range is invalid.
InvalidVaddrRange(Vaddr, Vaddr),
VaddrNotAligned(Vaddr),
VaddrRangeNotAligned(Vaddr, Vaddr),
PaddrNotAligned(Paddr),
PaddrRangeNotAligned(Vaddr, Vaddr),
// Protecting a mapping that does not exist.
/// Using virtual address not aligned.
UnalignedVaddr,
/// Protecting a mapping that does not exist.
ProtectingInvalid,
/// Protecting a part of an already mapped page.
ProtectingPartial,
}
/// This is a compile-time technique to force the frame developers to distinguish
@ -38,8 +37,8 @@ pub trait PageTableMode: Clone + Debug + 'static {
/// The range of virtual addresses that the page table can manage.
const VADDR_RANGE: Range<Vaddr>;
/// Check if the given range is within the valid virtual address range.
fn encloses(r: &Range<Vaddr>) -> bool {
/// Check if the given range is covered by the valid virtual address range.
fn covers(r: &Range<Vaddr>) -> bool {
Self::VADDR_RANGE.start <= r.start && r.end <= Self::VADDR_RANGE.end
}
}
@ -78,61 +77,6 @@ where
[(); C::NR_ENTRIES_PER_FRAME]:,
[(); C::NR_LEVELS]:,
{
pub(crate) fn map_frames(
&self,
vaddr: Vaddr,
frames: VmFrameVec,
prop: MapProperty,
) -> Result<(), PageTableError> {
if vaddr % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrNotAligned(vaddr));
}
let va_range = vaddr
..vaddr
.checked_add(frames.nbytes())
.ok_or(PageTableError::InvalidVaddr(vaddr))?;
if !UserMode::encloses(&va_range) {
return Err(PageTableError::InvalidVaddrRange(
va_range.start,
va_range.end,
));
}
// Safety: modification to the user page table is safe.
unsafe {
self.map_frames_unchecked(vaddr, frames, prop);
}
Ok(())
}
pub(crate) fn unmap(&self, vaddr: &Range<Vaddr>) -> Result<(), PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end));
}
if !UserMode::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
// Safety: modification to the user page table is safe.
unsafe {
self.unmap_unchecked(vaddr);
}
Ok(())
}
pub(crate) fn protect(
&self,
vaddr: &Range<Vaddr>,
op: impl MapOp,
) -> Result<(), PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::VaddrRangeNotAligned(vaddr.start, vaddr.end));
}
if !UserMode::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
// Safety: modification to the user page table is safe.
unsafe { self.cursor(vaddr.start).protect(vaddr.len(), op, false) }
}
pub(crate) fn activate(&self) {
// Safety: The usermode page table is safe to activate since the kernel
// mappings are shared.
@ -141,55 +85,60 @@ where
}
}
/// Remove all write permissions from the user page table and mark the page
/// table as copy-on-write, and the create a handle to the new page table.
/// Remove all write permissions from the user page table and create a cloned
/// new page table.
///
/// That is, new page tables will be created when needed if a write operation
/// is performed on either of the user page table handles. Calling this function
/// performs no significant operations.
/// TODO: We may consider making the page table itself copy-on-write.
pub(crate) fn fork_copy_on_write(&self) -> Self {
let mut cursor = self.cursor_mut(&UserMode::VADDR_RANGE).unwrap();
// Safety: Protecting the user page table is safe.
unsafe {
self.protect_unchecked(&UserMode::VADDR_RANGE, perm_op(|perm| perm & !VmPerm::W));
}
// TODO: implement the copy-on-write mechanism. This is a simple workaround.
let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap();
let root_frame = self.root_frame.lock();
cursor
.protect(
UserMode::VADDR_RANGE.len(),
perm_op(|perm| perm & !VmPerm::W),
true,
)
.unwrap();
};
let root_frame = cursor.leak_root_guard().unwrap();
let mut new_root_frame = PageTableFrame::<E, C>::new();
let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2;
let new_ptr = new_root_frame.as_mut_ptr() as *mut E;
let ptr = root_frame.inner.as_ptr() as *const E;
let child = Box::new(core::array::from_fn(|i| {
if i < half_of_entries {
for i in 0..half_of_entries {
// This is user space, deep copy the child.
root_frame.child[i].as_ref().map(|child| match child {
Child::PageTable(ptf) => unsafe {
let frame = ptf.lock();
match root_frame.child(i) {
Child::PageTable(node) => {
let frame = node.lock();
// Possibly a cursor is waiting for the root lock to recycle this node.
// We can skip copying empty page table nodes.
if frame.nr_valid_children() != 0 {
let cloned = frame.clone();
let pte = ptr.add(i).read();
new_ptr.add(i).write(E::new(
cloned.inner.start_paddr(),
pte.info().prop,
let pt = Child::PageTable(Arc::new(SpinLock::new(cloned)));
new_root_frame.set_child(
i,
pt,
Some(root_frame.read_pte_info(i).prop),
false,
false,
));
Child::PageTable(Arc::new(SpinLock::new(cloned)))
},
Child::Frame(_) => panic!("Unexpected frame child."),
})
} else {
);
}
}
Child::None => {}
Child::Frame(_) | Child::Untracked(_) => {
panic!("Unexpected map child.");
}
}
}
for i in half_of_entries..C::NR_ENTRIES_PER_FRAME {
// This is kernel space, share the child.
unsafe {
let pte = ptr.add(i).read();
new_ptr.add(i).write(pte);
new_root_frame.set_child(
i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_info(i).prop),
false,
)
}
root_frame.child[i].clone()
}
}));
PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C> {
inner: new_root_frame,
child,
map_count: root_frame.map_count,
})),
root_frame: Arc::new(SpinLock::new(new_root_frame)),
_phantom: PhantomData,
}
}
@ -208,17 +157,18 @@ where
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating
/// other child page tables.
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode, E, C> {
let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap();
let mut new_root_frame = PageTableFrame::<E, C>::new();
let root_frame = self.root_frame.lock();
let half_of_entries = C::NR_ENTRIES_PER_FRAME / 2;
new_root_frame.copy_from_frame(&root_frame.inner);
let child = Box::new(core::array::from_fn(|i| root_frame.child[i].clone()));
for i in C::NR_ENTRIES_PER_FRAME / 2..C::NR_ENTRIES_PER_FRAME {
new_root_frame.set_child(
i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_info(i).prop),
false,
)
}
PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C> {
inner: new_root_frame,
child,
map_count: root_frame.map_count,
})),
root_frame: Arc::new(SpinLock::new(new_root_frame)),
_phantom: PhantomData,
}
}
@ -230,30 +180,26 @@ where
/// instead of the virtual address range.
pub(crate) fn make_shared_tables(&self, root_index: Range<usize>) {
let start = root_index.start;
assert!(start < C::NR_ENTRIES_PER_FRAME);
debug_assert!(start >= C::NR_ENTRIES_PER_FRAME / 2);
debug_assert!(start < C::NR_ENTRIES_PER_FRAME);
let end = root_index.end;
assert!(end <= C::NR_ENTRIES_PER_FRAME);
debug_assert!(end <= C::NR_ENTRIES_PER_FRAME);
let mut root_frame = self.root_frame.lock();
for i in start..end {
let no_such_child = root_frame.child[i].is_none();
let no_such_child = root_frame.child(i).is_none();
if no_such_child {
let frame = PageTableFrame::<E, C>::new();
let pte_ptr = (root_frame.inner.start_paddr() + i * size_of::<E>()) as *mut E;
unsafe {
pte_ptr.write(E::new(
frame.inner.start_paddr(),
MapProperty {
let frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
root_frame.set_child(
i,
Child::PageTable(frame),
Some(MapProperty {
perm: VmPerm::RWX,
global: true,
extension: 0,
cache: CachePolicy::Uncacheable,
},
}),
false,
false,
));
}
root_frame.child[i] = Some(Child::PageTable(Arc::new(SpinLock::new(frame))));
root_frame.map_count += 1;
)
}
}
}
@ -274,60 +220,40 @@ where
/// The physical address of the root page table.
pub(crate) fn root_paddr(&self) -> Paddr {
self.root_frame.lock().inner.start_paddr()
self.root_frame.lock().start_paddr()
}
pub(crate) unsafe fn map_frames_unchecked(
&self,
vaddr: Vaddr,
frames: VmFrameVec,
prop: MapProperty,
) {
let mut cursor = self.cursor(vaddr);
for frame in frames.into_iter() {
cursor.map(MapOption::Map { frame, prop });
}
}
pub(crate) unsafe fn map_unchecked(
pub(crate) unsafe fn map(
&self,
vaddr: &Range<Vaddr>,
paddr: &Range<Paddr>,
prop: MapProperty,
) {
self.cursor(vaddr.start).map(MapOption::MapUntyped {
pa: paddr.start,
len: vaddr.len(),
prop,
});
) -> Result<(), PageTableError> {
self.cursor_mut(vaddr)?.map_pa(paddr, prop);
Ok(())
}
pub(crate) unsafe fn unmap_unchecked(&self, vaddr: &Range<Vaddr>) {
self.cursor(vaddr.start)
.map(MapOption::Unmap { len: vaddr.len() });
pub(crate) unsafe fn unmap(&self, vaddr: &Range<Vaddr>) -> Result<(), PageTableError> {
self.cursor_mut(vaddr)?.unmap(vaddr.len());
Ok(())
}
pub(crate) unsafe fn protect_unchecked(&self, vaddr: &Range<Vaddr>, op: impl MapOp) {
self.cursor(vaddr.start)
pub(crate) unsafe fn protect(
&self,
vaddr: &Range<Vaddr>,
op: impl MapOp,
) -> Result<(), PageTableError> {
self.cursor_mut(vaddr)?
.protect(vaddr.len(), op, true)
.unwrap();
}
/// Query about the mappings of a range of virtual addresses.
pub(crate) fn query_range(
&'a self,
vaddr: &Range<Vaddr>,
) -> Result<PageTableIter<'a, M, E, C>, PageTableError> {
if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
if !M::encloses(vaddr) {
return Err(PageTableError::InvalidVaddrRange(vaddr.start, vaddr.end));
}
Ok(PageTableIter::new(self, vaddr))
Ok(())
}
/// Query about the mapping of a single byte at the given virtual address.
///
/// Note that this function may fail reflect an accurate result if there are
/// cursors concurrently accessing the same virtual address range, just like what
/// happens for the hardware MMU walk.
pub(crate) fn query(&self, vaddr: Vaddr) -> Option<(Paddr, MapInfo)> {
// Safety: The root frame is a valid page table frame so the address is valid.
unsafe { page_walk::<E, C>(self.root_paddr(), vaddr) }
@ -337,10 +263,26 @@ where
activate_page_table(self.root_paddr(), CachePolicy::Writeback);
}
/// Create a new mutating cursor for the page table.
/// The cursor is initialized atthe given virtual address.
fn cursor(&self, va: usize) -> PageTableCursor<'a, M, E, C> {
PageTableCursor::new(self, va)
/// Create a new cursor exclusively accessing the virtual address range for mapping.
///
/// If another cursor is already accessing the range, the new cursor will wait until the
/// previous cursor is dropped.
pub(crate) fn cursor_mut(
&'a self,
va: &Range<Vaddr>,
) -> Result<CursorMut<'a, M, E, C>, PageTableError> {
CursorMut::new(self, va)
}
/// Create a new cursor exclusively accessing the virtual address range for querying.
///
/// If another cursor is already accessing the range, the new cursor will wait until the
/// previous cursor is dropped.
pub(crate) fn cursor(
&'a self,
va: &Range<Vaddr>,
) -> Result<Cursor<'a, M, E, C>, PageTableError> {
Cursor::new(self, va)
}
/// Create a new reference to the same page table.

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
use super::*;
use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm};
use crate::vm::{kspace::LINEAR_MAPPING_BASE_VADDR, space::VmPerm, VmAllocOptions};
const PAGE_SIZE: usize = 4096;
@ -12,47 +12,37 @@ fn test_range_check() {
let bad_va = 0..PAGE_SIZE + 1;
let bad_va2 = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + PAGE_SIZE;
let to = VmAllocOptions::new(1).alloc().unwrap();
assert!(pt.query_range(&good_va).is_ok());
assert!(pt.query_range(&bad_va).is_err());
assert!(pt.query_range(&bad_va2).is_err());
assert!(pt.unmap(&good_va).is_ok());
assert!(pt.unmap(&bad_va).is_err());
assert!(pt.unmap(&bad_va2).is_err());
assert!(pt
.map_frames(
good_va.start,
to.clone(),
MapProperty::new_general(VmPerm::R)
)
.is_ok());
assert!(pt
.map_frames(bad_va2.start, to.clone(), MapProperty::new_invalid())
.is_err());
assert!(pt.cursor_mut(&good_va).is_ok());
assert!(pt.cursor_mut(&bad_va).is_err());
assert!(pt.cursor_mut(&bad_va2).is_err());
assert!(unsafe { pt.unmap(&good_va) }.is_ok());
assert!(unsafe { pt.unmap(&bad_va) }.is_err());
assert!(unsafe { pt.unmap(&bad_va2) }.is_err());
}
#[ktest]
fn test_map_unmap() {
let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2;
let frames = VmAllocOptions::new(1).alloc().unwrap();
let start_paddr = frames.get(0).unwrap().start_paddr();
let frame = VmAllocOptions::new(1).alloc_single().unwrap();
let start_paddr = frame.start_paddr();
let prop = MapProperty::new_general(VmPerm::RW);
pt.map_frames(from.start, frames.clone(), prop).unwrap();
unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap();
unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none());
let from_ppn = 13245..512 * 512 + 23456;
let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
unsafe { pt.map_unchecked(&from, &to, prop) };
unsafe { pt.map(&from, &to, prop).unwrap() };
for i in 0..100 {
let offset = i * (PAGE_SIZE + 1000);
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
}
let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434;
pt.unmap(&unmap).unwrap();
unsafe { pt.unmap(&unmap).unwrap() };
for i in 0..100 {
let offset = i * (PAGE_SIZE + 10);
if unmap.start <= from.start + offset && from.start + offset < unmap.end {
@ -67,20 +57,20 @@ fn test_map_unmap() {
fn test_user_copy_on_write() {
let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2;
let frames = VmAllocOptions::new(1).alloc().unwrap();
let start_paddr = frames.get(0).unwrap().start_paddr();
let frame = VmAllocOptions::new(1).alloc_single().unwrap();
let start_paddr = frame.start_paddr();
let prop = MapProperty::new_general(VmPerm::RW);
pt.map_frames(from.start, frames.clone(), prop).unwrap();
unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap();
unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none());
pt.map_frames(from.start, frames.clone(), prop).unwrap();
unsafe { pt.cursor_mut(&from).unwrap().map(frame.clone(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
let child_pt = pt.fork_copy_on_write();
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
pt.unmap(&from).unwrap();
unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
}
@ -98,26 +88,26 @@ impl PageTableConstsTrait for BasePageTableConsts {
}
#[ktest]
fn test_base_protect_query_range() {
fn test_base_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, BasePageTableConsts>::empty();
let from_ppn = 1..1000;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999;
let prop = MapProperty::new_general(VmPerm::RW);
unsafe { pt.map_unchecked(&from, &to, prop) };
for (qr, i) in pt.query_range(&from).unwrap().zip(from_ppn) {
unsafe { pt.map(&from, &to, prop).unwrap() };
for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) {
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(info.prop.cache, CachePolicy::Writeback);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
let prot = PAGE_SIZE * 18..PAGE_SIZE * 20;
pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap();
for (qr, i) in pt.query_range(&prot).unwrap().zip(18..20) {
unsafe { pt.protect(&prot, perm_op(|p| p - VmPerm::W)).unwrap() };
for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) {
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::R);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
@ -135,7 +125,7 @@ impl PageTableConstsTrait for VeryHugePageTableConsts {
}
#[ktest]
fn test_large_protect_query_range() {
fn test_large_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, VeryHugePageTableConsts>::empty();
let gmult = 512 * 512;
let from_ppn = gmult - 512..gmult + gmult + 514;
@ -148,10 +138,10 @@ fn test_large_protect_query_range() {
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
let prop = MapProperty::new_general(VmPerm::RW);
unsafe { pt.map_unchecked(&from, &to, prop) };
for (qr, i) in pt.query_range(&from).unwrap().zip(0..512 + 2 + 2) {
unsafe { pt.map(&from, &to, prop).unwrap() };
for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) {
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(info.prop.cache, CachePolicy::Writeback);
@ -171,32 +161,32 @@ fn test_large_protect_query_range() {
}
let ppn = from_ppn.start + 18..from_ppn.start + 20;
let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end;
pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap();
unsafe { pt.protect(&va, perm_op(|p| p - VmPerm::W)).unwrap() };
for (qr, i) in pt
.query_range(&(va.start - PAGE_SIZE..va.start))
.cursor(&(va.start - PAGE_SIZE..va.start))
.unwrap()
.zip(ppn.start - 1..ppn.start)
{
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
for (qr, i) in pt.query_range(&va).unwrap().zip(ppn.clone()) {
for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) {
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::R);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
for (qr, i) in pt
.query_range(&(va.end..va.end + PAGE_SIZE))
.cursor(&(va.end..va.end + PAGE_SIZE))
.unwrap()
.zip(ppn.end..ppn.end + 1)
{
let Qr::MappedUntyped { va, pa, len, info } = qr else {
panic!("Expected MappedUntyped, got {:?}", qr);
panic!("Expected MappedUntyped, got {:#x?}", qr);
};
assert_eq!(info.prop.perm, VmPerm::RW);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);

View File

@ -9,7 +9,8 @@ use super::{
is_page_aligned,
kspace::KERNEL_PAGE_TABLE,
page_table::{
MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableQueryResult as PtQr, UserMode,
MapInfo, MapOp, PageTable, PageTableConstsTrait, PageTableMode,
PageTableQueryResult as PtQr, PageTableQueryResult, UserMode,
},
VmFrameVec, VmIo, PAGE_SIZE,
};
@ -17,7 +18,7 @@ use crate::{
arch::mm::{PageTableConsts, PageTableEntry},
prelude::*,
vm::{
page_table::{CachePolicy, MapProperty, PageTableIter},
page_table::{CachePolicy, Cursor, MapProperty},
VmFrame, MAX_USERSPACE_VADDR,
},
Error,
@ -63,27 +64,44 @@ impl VmSpace {
}
let addr = options.addr.unwrap();
if addr % PAGE_SIZE != 0 {
return Err(Error::InvalidArgs);
}
let size = frames.nbytes();
let end = addr.checked_add(size).ok_or(Error::InvalidArgs)?;
let va_range = addr..end;
if !UserMode::covers(&va_range) {
return Err(Error::InvalidArgs);
}
let mut cursor = self.pt.cursor_mut(&va_range)?;
// If overwrite is forbidden, we should check if there are existing mappings
if !options.can_overwrite {
let end = addr.checked_add(size).ok_or(Error::Overflow)?;
for qr in self.query_range(&(addr..end)).unwrap() {
if matches!(qr, VmQueryResult::Mapped { .. }) {
while let Some(qr) = cursor.query() {
if matches!(qr, PageTableQueryResult::Mapped { .. }) {
return Err(Error::MapAlreadyMappedVaddr);
}
}
cursor.jump(va_range.start);
}
self.pt.map_frames(
addr,
frames,
MapProperty {
let prop = MapProperty {
perm: options.perm,
global: false,
extension: 0,
cache: CachePolicy::Writeback,
},
)?;
};
for frame in frames.into_iter() {
// Safety: mapping in the user space with `VmFrame` is safe.
unsafe {
cursor.map(frame, prop);
}
}
Ok(addr)
}
@ -93,7 +111,7 @@ impl VmSpace {
/// each parts of the range.
pub fn query_range(&self, range: &Range<Vaddr>) -> Result<VmQueryIter> {
Ok(VmQueryIter {
inner: self.pt.query_range(range)?,
cursor: self.pt.cursor(range)?,
})
}
@ -112,8 +130,16 @@ impl VmSpace {
/// The range is allowed to contain gaps, where no physical memory pages
/// are mapped.
pub fn unmap(&self, range: &Range<Vaddr>) -> Result<()> {
assert!(is_page_aligned(range.start) && is_page_aligned(range.end));
if !is_page_aligned(range.start) || !is_page_aligned(range.end) {
return Err(Error::InvalidArgs);
}
if !UserMode::covers(range) {
return Err(Error::InvalidArgs);
}
// Safety: unmapping in the user space is safe.
unsafe {
self.pt.unmap(range)?;
}
Ok(())
}
@ -122,7 +148,7 @@ impl VmSpace {
// Safety: unmapping user space is safe, and we don't care unmapping
// invalid ranges.
unsafe {
self.pt.unmap_unchecked(&(0..MAX_USERSPACE_VADDR));
self.pt.unmap(&(0..MAX_USERSPACE_VADDR)).unwrap();
}
#[cfg(target_arch = "x86_64")]
x86_64::instructions::tlb::flush_all();
@ -138,8 +164,16 @@ impl VmSpace {
/// partial huge page happens, and efforts are not reverted, leaving us
/// in a bad state.
pub fn protect(&self, range: &Range<Vaddr>, op: impl MapOp) -> Result<()> {
assert!(is_page_aligned(range.start) && is_page_aligned(range.end));
if !is_page_aligned(range.start) || !is_page_aligned(range.end) {
return Err(Error::InvalidArgs);
}
if !UserMode::covers(range) {
return Err(Error::InvalidArgs);
}
// Safety: protecting in the user space is safe.
unsafe {
self.pt.protect(range, op)?;
}
Ok(())
}
@ -306,7 +340,7 @@ impl TryFrom<u64> for VmPerm {
/// The iterator for querying over the VM space without modifying it.
pub struct VmQueryIter<'a> {
inner: PageTableIter<'a, UserMode, PageTableEntry, PageTableConsts>,
cursor: Cursor<'a, UserMode, PageTableEntry, PageTableConsts>,
}
pub enum VmQueryResult {
@ -325,11 +359,11 @@ impl Iterator for VmQueryIter<'_> {
type Item = VmQueryResult;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|ptqr| match ptqr {
self.cursor.next().map(|ptqr| match ptqr {
PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len },
PtQr::Mapped { va, frame, info } => VmQueryResult::Mapped { va, frame, info },
// It is not possible to map untyped memory in user space.
PtQr::MappedUntyped { va, pa, len, info } => unreachable!(),
PtQr::MappedUntyped { .. } => unreachable!(),
})
}
}