Optimize the page table implementation using the frame metadata

This PR also refactored the page table cursor, distinguishing `Cursor` from `CursorMut`, and split
a lot of functions to reduce dynamic condition checking.

There are also other sanitizations performed, including refactoring PTE's `is_huge` API to `is_last`,
hardening tracked mapping checks, and making `VmFrame` any size.
This commit is contained in:
Zhang Junyang
2024-05-16 16:42:36 +00:00
committed by Tate, Hongliang Tian
parent 69d464fc6b
commit 141fbeaf0c
15 changed files with 1148 additions and 839 deletions

View File

@ -138,7 +138,7 @@ impl RootTable {
if bus_entry.is_present() {
warn!("IOMMU: Overwritting the existing device page table");
}
let address = page_table.root_paddr();
let address = unsafe { page_table.root_paddr() };
context_table.page_tables.insert(address, page_table);
let entry = ContextEntry(address as u128 | 1 | 0x1_0000_0000_0000_0000);
context_table
@ -262,7 +262,7 @@ impl ContextTable {
if !bus_entry.is_present() {
let table = PageTable::<DeviceMode, PageTableEntry, PagingConsts>::empty();
let address = table.root_paddr();
let address = unsafe { table.root_paddr() };
self.page_tables.insert(address, table);
let entry = ContextEntry(address as u128 | 3 | 0x1_0000_0000_0000_0000);
self.entries_frame

View File

@ -6,7 +6,8 @@ mod remapping;
mod second_stage;
use log::info;
use second_stage::{DeviceMode, PageTableEntry, PagingConsts};
pub use second_stage::DeviceMode;
use second_stage::{PageTableEntry, PagingConsts};
use spin::Once;
use crate::{

View File

@ -13,7 +13,7 @@ use crate::vm::{
/// The page table used by iommu maps the device address
/// space to the physical address space.
#[derive(Clone, Debug)]
pub(super) struct DeviceMode {}
pub struct DeviceMode {}
impl PageTableMode for DeviceMode {
/// The device address space is 32-bit.
@ -67,32 +67,23 @@ bitflags::bitflags! {
pub struct PageTableEntry(u64);
impl PageTableEntry {
const PHYS_MASK: usize = 0xFFFF_FFFF_F000;
const PHYS_MASK: u64 = 0xFFFF_FFFF_F000;
const PROP_MASK: u64 = !Self::PHYS_MASK & !PageTableFlags::LAST_PAGE.bits();
}
impl PageTableEntryTrait for PageTableEntry {
fn new(paddr: crate::vm::Paddr, prop: PageProperty, huge: bool, last: bool) -> Self {
let mut flags = PageTableFlags::empty();
if prop.flags.contains(PageFlags::W) {
flags |= PageTableFlags::WRITABLE;
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self {
let mut pte = Self(paddr as u64 & Self::PHYS_MASK | PageTableFlags::LAST_PAGE.bits());
pte.set_prop(prop);
pte
}
if prop.flags.contains(PageFlags::R) {
flags |= PageTableFlags::READABLE;
}
if prop.cache != CachePolicy::Uncacheable {
flags |= PageTableFlags::SNOOP;
}
if last {
flags |= PageTableFlags::LAST_PAGE;
}
if huge {
panic!("Huge page is not supported in iommu page table");
}
Self((paddr & Self::PHYS_MASK) as u64 | flags.bits)
fn new_pt(paddr: Paddr) -> Self {
Self(paddr as u64 & Self::PHYS_MASK)
}
fn paddr(&self) -> Paddr {
(self.0 & Self::PHYS_MASK as u64) as usize
(self.0 & Self::PHYS_MASK) as usize
}
fn new_absent() -> Self {
@ -131,7 +122,21 @@ impl PageTableEntryTrait for PageTableEntry {
}
}
fn is_huge(&self) -> bool {
false
fn set_prop(&mut self, prop: PageProperty) {
let mut flags = PageTableFlags::empty();
if prop.flags.contains(PageFlags::W) {
flags |= PageTableFlags::WRITABLE;
}
if prop.flags.contains(PageFlags::R) {
flags |= PageTableFlags::READABLE;
}
if prop.cache != CachePolicy::Uncacheable {
flags |= PageTableFlags::SNOOP;
}
self.0 = self.0 & !Self::PROP_MASK | flags.bits();
}
fn is_last(&self, level: PagingLevel) -> bool {
level == 1
}
}

View File

@ -127,6 +127,7 @@ impl PageTableEntry {
const PHYS_ADDR_MASK: usize = 0xF_FFFF_FFFF_F000;
#[cfg(feature = "intel_tdx")]
const PHYS_ADDR_MASK: usize = 0x7_FFFF_FFFF_F000;
const PROP_MASK: usize = !Self::PHYS_ADDR_MASK & !PageTableFlags::HUGE.bits();
}
/// Parse a bit-flag bits `val` in the representation of `from` to `to` in bits.
@ -145,61 +146,30 @@ impl PageTableEntryTrait for PageTableEntry {
self.0 & PageTableFlags::PRESENT.bits() != 0
}
fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self {
let mut flags =
PageTableFlags::PRESENT.bits() | (huge as usize) << PageTableFlags::HUGE.bits().ilog2();
if !huge && !last {
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self {
let mut pte = Self(
paddr & Self::PHYS_ADDR_MASK
| ((level != 1) as usize) << PageTableFlags::HUGE.bits().ilog2(),
);
pte.set_prop(prop);
pte
}
fn new_pt(paddr: Paddr) -> Self {
// In x86 if it's an intermediate PTE, it's better to have the same permissions
// as the most permissive child (to reduce hardware page walk accesses). But we
// don't have a mechanism to keep it generic across architectures, thus just
// setting it to be the most permissive.
flags |= PageTableFlags::WRITABLE.bits() | PageTableFlags::USER.bits();
let flags = PageTableFlags::PRESENT.bits()
| PageTableFlags::WRITABLE.bits()
| PageTableFlags::USER.bits();
#[cfg(feature = "intel_tdx")]
{
flags |= parse_flags!(
let flags = flags
| parse_flags!(
prop.priv_flags.bits(),
PrivFlags::SHARED,
PageTableFlags::SHARED
);
}
} else {
flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE)
| parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE)
| parse_flags!(
prop.flags.bits(),
PageFlags::ACCESSED,
PageTableFlags::ACCESSED
)
| parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY)
| parse_flags!(
prop.priv_flags.bits(),
PrivFlags::USER,
PageTableFlags::USER
)
| parse_flags!(
prop.priv_flags.bits(),
PrivFlags::GLOBAL,
PageTableFlags::GLOBAL
);
#[cfg(feature = "intel_tdx")]
{
flags |= parse_flags!(
prop.priv_flags.bits(),
PrivFlags::SHARED,
PageTableFlags::SHARED
);
}
}
match prop.cache {
CachePolicy::Writeback => {}
CachePolicy::Writethrough => {
flags |= PageTableFlags::WRITE_THROUGH.bits();
}
CachePolicy::Uncacheable => {
flags |= PageTableFlags::NO_CACHE.bits();
}
_ => panic!("unsupported cache policy"),
}
Self(paddr & Self::PHYS_ADDR_MASK | flags)
}
@ -232,8 +202,49 @@ impl PageTableEntryTrait for PageTableEntry {
}
}
fn is_huge(&self) -> bool {
self.0 & PageTableFlags::HUGE.bits() != 0
fn set_prop(&mut self, prop: PageProperty) {
let mut flags = PageTableFlags::PRESENT.bits();
flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE)
| parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE)
| parse_flags!(
prop.flags.bits(),
PageFlags::ACCESSED,
PageTableFlags::ACCESSED
)
| parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY)
| parse_flags!(
prop.priv_flags.bits(),
PrivFlags::USER,
PageTableFlags::USER
)
| parse_flags!(
prop.priv_flags.bits(),
PrivFlags::GLOBAL,
PageTableFlags::GLOBAL
);
#[cfg(feature = "intel_tdx")]
{
flags |= parse_flags!(
prop.priv_flags.bits(),
PrivFlags::SHARED,
PageTableFlags::SHARED
);
}
match prop.cache {
CachePolicy::Writeback => {}
CachePolicy::Writethrough => {
flags |= PageTableFlags::WRITE_THROUGH.bits();
}
CachePolicy::Uncacheable => {
flags |= PageTableFlags::NO_CACHE.bits();
}
_ => panic!("unsupported cache policy"),
}
self.0 = self.0 & !Self::PROP_MASK | flags;
}
fn is_last(&self, level: PagingLevel) -> bool {
level == 1 || (self.0 & PageTableFlags::HUGE.bits() != 0)
}
}

View File

@ -2,7 +2,7 @@
use core::{
ops::Deref,
sync::atomic::{AtomicU32, AtomicU8},
sync::atomic::{AtomicU16, AtomicU32, AtomicU8},
};
use static_assertions::const_assert_eq;
@ -132,17 +132,18 @@ impl Deref for FrameMetaRef {
pub struct FrameMeta {
pub frame_type: FrameType, // 1 byte
/// The first 8-bit counter.
/// Currently unused.
/// - For [`FrameType::Anonymous`], it is not used.
/// - For [`FrameType::PageTable`], it is used as a spinlock.
pub counter8_1: AtomicU8, // 1 byte
/// The second 8-bit counter.
/// Currently unused.
pub counter8_2: AtomicU8, // 1 byte
/// The third 8-bit counter.
/// Currently unused.
pub counter8_3: AtomicU8, // 1 byte
/// The first 16-bit counter.
/// - For [`FrameType::Anonymous`], it is not used.
/// - For [`FrameType::PageTable`], it is used as the map count. The map
/// count is the number of present children.
pub counter16_1: AtomicU16, // 2 bytes
/// The first 32-bit counter.
/// It is used in different type of frame with different semantics.
/// - For [`FrameType::Anonymous`], it is the handle count.
/// - For [`FrameType::PageTable`], it is used as the reference count. The referencer
/// can be either a handle, a PTE or a CPU that loads it.
pub counter32_1: AtomicU32, // 4 bytes
}
@ -155,4 +156,6 @@ pub enum FrameType {
Meta,
Anonymous,
PageTable,
/// Frames that contains kernel code.
KernelCode,
}

View File

@ -239,6 +239,10 @@ impl VmFrame {
self.meta.size()
}
pub fn level(&self) -> PagingLevel {
self.meta.level()
}
pub fn end_paddr(&self) -> Paddr {
self.start_paddr() + self.size()
}
@ -258,7 +262,7 @@ impl VmFrame {
if self.size() != src.size() {
panic!("The size of the source frame is different from the destination frame");
}
// Safety: the source and the destination does not overlap.
// SAFETY: the source and the destination does not overlap.
unsafe {
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.size());
}
@ -268,13 +272,13 @@ impl VmFrame {
impl<'a> VmFrame {
/// Returns a reader to read data from it.
pub fn reader(&'a self) -> VmReader<'a> {
// Safety: the memory of the page is contiguous and is valid during `'a`.
// SAFETY: the memory of the page is contiguous and is valid during `'a`.
unsafe { VmReader::from_raw_parts(self.as_ptr(), self.size()) }
}
/// Returns a writer to write data into it.
pub fn writer(&'a self) -> VmWriter<'a> {
// Safety: the memory of the page is contiguous and is valid during `'a`.
// SAFETY: the memory of the page is contiguous and is valid during `'a`.
unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.size()) }
}
}
@ -309,10 +313,10 @@ impl Drop for VmFrame {
// A fence is needed here with the same reasons stated in the implementation of
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
atomic::fence(Ordering::Acquire);
// Safety: the reference counter is 1 before decremented, so this is the only
// SAFETY: the reference counter is 1 before decremented, so this is the only
// (exclusive) handle.
unsafe { self.meta.deref_mut().frame_type = FrameType::Free };
// Safety: the page frame is valid.
// SAFETY: the page frame is valid.
unsafe {
allocator::dealloc_contiguous(self.paddr() / PAGE_SIZE, self.size() / PAGE_SIZE);
}
@ -460,13 +464,13 @@ impl VmSegment {
impl<'a> VmSegment {
/// Returns a reader to read data from it.
pub fn reader(&'a self) -> VmReader<'a> {
// Safety: the memory of the page frames is contiguous and is valid during `'a`.
// SAFETY: the memory of the page frames is contiguous and is valid during `'a`.
unsafe { VmReader::from_raw_parts(self.as_ptr(), self.nbytes()) }
}
/// Returns a writer to write data into it.
pub fn writer(&'a self) -> VmWriter<'a> {
// Safety: the memory of the page frames is contiguous and is valid during `'a`.
// SAFETY: the memory of the page frames is contiguous and is valid during `'a`.
unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.nbytes()) }
}
}
@ -501,10 +505,10 @@ impl Drop for VmSegment {
// A fence is needed here with the same reasons stated in the implementation of
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
atomic::fence(Ordering::Acquire);
// Safety: the reference counter is 1 before decremented, so this is the only
// SAFETY: the reference counter is 1 before decremented, so this is the only
// (exclusive) handle.
unsafe { self.inner.meta.deref_mut().frame_type = FrameType::Free };
// Safety: the range of contiguous page frames is valid.
// SAFETY: the range of contiguous page frames is valid.
unsafe {
allocator::dealloc_contiguous(self.inner.start_frame_index(), self.inner.nframes);
}

View File

@ -174,7 +174,7 @@ impl<'a> VmReader<'a> {
/// Returns the number of bytes for the remaining data.
pub const fn remain(&self) -> usize {
// Safety: the end is equal to or greater than the cursor.
// SAFETY: the end is equal to or greater than the cursor.
unsafe { self.end.sub_ptr(self.cursor) }
}
@ -193,7 +193,7 @@ impl<'a> VmReader<'a> {
/// This method ensures the postcondition of `self.remain() <= max_remain`.
pub const fn limit(mut self, max_remain: usize) -> Self {
if max_remain < self.remain() {
// Safety: the new end is less than the old end.
// SAFETY: the new end is less than the old end.
unsafe { self.end = self.cursor.add(max_remain) };
}
self
@ -208,7 +208,7 @@ impl<'a> VmReader<'a> {
pub fn skip(mut self, nbytes: usize) -> Self {
assert!(nbytes <= self.remain());
// Safety: the new cursor is less than or equal to the end.
// SAFETY: the new cursor is less than or equal to the end.
unsafe { self.cursor = self.cursor.add(nbytes) };
self
}
@ -227,7 +227,7 @@ impl<'a> VmReader<'a> {
return 0;
}
// Safety: the memory range is valid since `copy_len` is the minimum
// SAFETY: the memory range is valid since `copy_len` is the minimum
// of the reader's remaining data and the writer's available space.
unsafe {
core::ptr::copy(self.cursor, writer.cursor, copy_len);
@ -255,7 +255,7 @@ impl<'a> VmReader<'a> {
impl<'a> From<&'a [u8]> for VmReader<'a> {
fn from(slice: &'a [u8]) -> Self {
// Safety: the range of memory is contiguous and is valid during `'a`.
// SAFETY: the range of memory is contiguous and is valid during `'a`.
unsafe { Self::from_raw_parts(slice.as_ptr(), slice.len()) }
}
}
@ -284,7 +284,7 @@ impl<'a> VmWriter<'a> {
/// Returns the number of bytes for the available space.
pub const fn avail(&self) -> usize {
// Safety: the end is equal to or greater than the cursor.
// SAFETY: the end is equal to or greater than the cursor.
unsafe { self.end.sub_ptr(self.cursor) }
}
@ -303,7 +303,7 @@ impl<'a> VmWriter<'a> {
/// This method ensures the postcondition of `self.avail() <= max_avail`.
pub const fn limit(mut self, max_avail: usize) -> Self {
if max_avail < self.avail() {
// Safety: the new end is less than the old end.
// SAFETY: the new end is less than the old end.
unsafe { self.end = self.cursor.add(max_avail) };
}
self
@ -318,7 +318,7 @@ impl<'a> VmWriter<'a> {
pub fn skip(mut self, nbytes: usize) -> Self {
assert!(nbytes <= self.avail());
// Safety: the new cursor is less than or equal to the end.
// SAFETY: the new cursor is less than or equal to the end.
unsafe { self.cursor = self.cursor.add(nbytes) };
self
}
@ -337,7 +337,7 @@ impl<'a> VmWriter<'a> {
return 0;
}
// Safety: the memory range is valid since `copy_len` is the minimum
// SAFETY: the memory range is valid since `copy_len` is the minimum
// of the reader's remaining data and the writer's available space.
unsafe {
core::ptr::copy(reader.cursor, self.cursor, copy_len);
@ -364,7 +364,7 @@ impl<'a> VmWriter<'a> {
let written_num = avail / core::mem::size_of::<T>();
for i in 0..written_num {
// Safety: `written_num` is calculated by the avail size and the size of the type `T`,
// SAFETY: `written_num` is calculated by the avail size and the size of the type `T`,
// hence the `add` operation and `write` operation are valid and will only manipulate
// the memory managed by this writer.
unsafe {
@ -380,7 +380,7 @@ impl<'a> VmWriter<'a> {
impl<'a> From<&'a mut [u8]> for VmWriter<'a> {
fn from(slice: &'a mut [u8]) -> Self {
// Safety: the range of memory is contiguous and is valid during `'a`.
// SAFETY: the range of memory is contiguous and is valid during `'a`.
unsafe { Self::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) }
}
}

View File

@ -7,7 +7,7 @@
//!
//! ```text
//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
//! | | For the kernel code, 1 GiB.
//! | | For the kernel code, 1 GiB. Mapped frames are tracked with handles.
//! +-+ <- 0xffff_ffff_8000_0000
//! | |
//! | | Unused hole.
@ -42,14 +42,13 @@ use spin::Once;
use super::{
frame::{
allocator::FRAME_ALLOCATOR,
meta,
meta::{FrameMeta, FrameType},
meta::{self, FrameMeta, FrameType},
},
nr_subpage_per_huge,
page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
page_size,
page_table::{boot_pt::BootPageTable, KernelMode, PageTable},
MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE,
FrameMetaRef, MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE,
};
use crate::{
arch::mm::{PageTableEntry, PagingConsts},
@ -161,7 +160,7 @@ pub fn init_kernel_page_table() {
};
let mut cursor = kpt.cursor_mut(&from).unwrap();
for frame in meta_frames {
// Safety: we are doing the metadata mappings for the kernel.
// SAFETY: we are doing the metadata mappings for the kernel.
unsafe {
cursor.map(frame, prop);
}
@ -201,9 +200,18 @@ pub fn init_kernel_page_table() {
cache: CachePolicy::Writeback,
priv_flags: PrivilegedPageFlags::GLOBAL,
};
let mut cursor = kpt.cursor_mut(&from).unwrap();
for frame_paddr in to.step_by(PAGE_SIZE) {
let mut meta = unsafe { FrameMetaRef::from_raw(frame_paddr, 1) };
// SAFETY: we are marking the type of the frame containing loaded kernel code.
unsafe {
meta.deref_mut().frame_type = FrameType::KernelCode;
}
let frame = VmFrame { meta };
// SAFETY: we are doing mappings for the kernel.
unsafe {
kpt.map(&from, &to, prop).unwrap();
cursor.map(frame, prop);
}
}
}
@ -211,7 +219,7 @@ pub fn init_kernel_page_table() {
}
pub fn activate_kernel_page_table() {
// Safety: the kernel page table is initialized properly.
// SAFETY: the kernel page table is initialized properly.
unsafe {
KERNEL_PAGE_TABLE.get().unwrap().activate_unchecked();
crate::arch::mm::tlb_flush_all_including_global();
@ -252,9 +260,9 @@ fn init_boot_page_table_and_page_meta(
let meta_frames = meta_frames
.into_iter()
.map(|paddr| {
// Safety: the frame is allocated but not initialized thus not referenced.
// SAFETY: the frame is allocated but not initialized thus not referenced.
let mut frame = unsafe { VmFrame::from_free_raw(paddr, 1) };
// Safety: this is the only reference to the frame so it's exclusive.
// SAFETY: this is the only reference to the frame so it's exclusive.
unsafe { frame.meta.deref_mut().frame_type = FrameType::Meta };
frame
})

View File

@ -33,7 +33,9 @@ pub use self::{
space::{VmMapOptions, VmSpace},
};
pub(crate) use self::{
frame::meta::FrameMetaRef, kspace::paddr_to_vaddr, page_prop::PrivilegedPageFlags,
frame::meta::{FrameMetaRef, FrameType},
kspace::paddr_to_vaddr,
page_prop::PrivilegedPageFlags,
page_table::PageTable,
};
use crate::{
@ -46,7 +48,7 @@ pub type PagingLevel = u8;
/// A minimal set of constants that determines the paging system.
/// This provides an abstraction over most paging modes in common architectures.
pub(crate) trait PagingConstsTrait: Debug + 'static {
pub(crate) trait PagingConstsTrait: Clone + Debug + 'static {
/// The smallest page size.
/// This is also the page size at level 1 page tables.
const BASE_PAGE_SIZE: usize;

View File

@ -49,10 +49,9 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> BootPageTable<E, C> {
let pte = unsafe { pte_ptr.read() };
pt = if !pte.is_present() {
let frame = self.alloc_frame();
let new_pte = E::new(frame * C::BASE_PAGE_SIZE, pte.prop(), false, false);
unsafe { pte_ptr.write(new_pte) };
unsafe { pte_ptr.write(E::new_pt(frame * C::BASE_PAGE_SIZE)) };
frame
} else if pte.is_huge() {
} else if pte.is_last(level) {
panic!("mapping an already mapped huge page in the boot page table");
} else {
pte.paddr() / C::BASE_PAGE_SIZE
@ -66,8 +65,7 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> BootPageTable<E, C> {
if pte.is_present() {
panic!("mapping an already mapped page in the boot page table");
}
let new_pte = E::new(to * C::BASE_PAGE_SIZE, prop, false, true);
unsafe { pte_ptr.write(new_pte) };
unsafe { pte_ptr.write(E::new_frame(to * C::BASE_PAGE_SIZE, 1, prop)) };
}
fn alloc_frame(&mut self) -> FrameNumber {

View File

@ -50,49 +50,59 @@
//! required. The cursor unlock all locks, then lock all the way down to `B`, then
//! check if `B` is empty, and finally recycle all the resources on the way back.
use alloc::sync::Arc;
use core::{any::TypeId, ops::Range};
use align_ext::AlignExt;
use super::{
nr_subpage_per_huge, page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait,
PageTableError, PageTableFrame, PageTableMode, PagingConstsTrait,
};
use crate::{
sync::{ArcSpinLockGuard, SpinLock},
vm::{Paddr, PageProperty, PagingLevel, Vaddr, VmFrame},
page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
PageTableFrame, PageTableMode, PagingConstsTrait, PagingLevel,
};
use crate::vm::{Paddr, PageProperty, Vaddr, VmFrame};
#[derive(Clone, Debug)]
pub(crate) enum PageTableQueryResult {
NotMapped {
va: Vaddr,
len: usize,
},
Mapped {
va: Vaddr,
frame: VmFrame,
prop: PageProperty,
},
MappedUntracked {
va: Vaddr,
pa: Paddr,
len: usize,
prop: PageProperty,
},
}
/// The cursor for traversal over the page table.
///
/// Efficient methods are provided to move the cursor forward by a slot,
/// doing mapping, unmaping, or querying for the traversed slot. Also you
/// can jump forward or backward by re-walking without releasing the lock.
///
/// A slot is a PTE at any levels, which correspond to a certain virtual
/// memory range sized by the "page size" of the current level.
///
/// Doing mapping is somewhat like a depth-first search on a tree, except
/// that we modify the tree while traversing it. We use a guard stack to
/// A cursor is able to move to the next slot, to read page properties,
/// and even to jump to a virtual address directly. We use a guard stack to
/// simulate the recursion, and adpot a page table locking protocol to
/// provide concurrency.
pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
#[derive(Debug)]
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
pt: &'a PageTable<M, E, C>,
guards: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS as usize],
guards: [Option<PageTableFrame<E, C>>; C::NR_LEVELS as usize],
level: PagingLevel, // current level
guard_level: PagingLevel, // from guard_level to level, the locks are held
va: Vaddr, // current virtual address
barrier_va: Range<Vaddr>, // virtual address range that is locked
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C>
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
/// Create a cursor exclusively owning the locks for the given range.
@ -112,7 +122,7 @@ where
// Create a guard array that only hold the root node lock.
let guards = core::array::from_fn(|i| {
if i == 0 {
Some(pt.root_frame.lock_arc())
Some(pt.root.copy_handle().lock())
} else {
None
}
@ -130,227 +140,24 @@ where
//
// While going down, previous guards of too-high levels will be released.
loop {
let cur_pte = cursor.read_cur_pte();
let level_too_high = {
let start_idx = pte_index::<C>(va.start, cursor.level);
let end_idx = pte_index::<C>(va.end - 1, cursor.level);
start_idx == end_idx
};
if !level_too_high || !cursor.cur_child().is_pt() {
if !level_too_high || !cur_pte.is_present() || cur_pte.is_last(cursor.level) {
break;
}
cursor.level_down(None);
cursor.level_down();
// Release the guard of the previous level.
cursor.guards[(C::NR_LEVELS - cursor.level) as usize - 1] = None;
cursor.guard_level -= 1;
}
Ok(cursor)
}
/// Jump to the given virtual address.
///
/// It panics if the address is out of the range where the cursor is required to operate,
/// or has bad alignment.
pub(crate) fn jump(&mut self, va: Vaddr) {
assert!(self.barrier_va.contains(&va));
assert!(va % C::BASE_PAGE_SIZE == 0);
loop {
let cur_node_start = self.va & !(page_size::<C>(self.level + 1) - 1);
let cur_node_end = cur_node_start + page_size::<C>(self.level + 1);
// If the address is within the current node, we can jump directly.
if cur_node_start <= va && va < cur_node_end {
self.va = va;
return;
}
// There is a corner case that the cursor is depleted, sitting at the start of the
// next node but the next node is not locked because the parent is not locked.
if self.va >= self.barrier_va.end && self.level == self.guard_level {
self.va = va;
return;
}
debug_assert!(self.level < self.guard_level);
self.level_up();
}
}
/// Map the range starting from the current address to a `VmFrame`.
///
/// # Panic
///
/// This function will panic if
/// - the virtual address range to be mapped is out of the range;
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
///
/// # Safety
///
/// The caller should ensure that the virtual range being mapped does
/// not affect kernel's memory safety.
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) {
let end = self.va + C::BASE_PAGE_SIZE;
assert!(end <= self.barrier_va.end);
// Go down if not applicable.
while self.level > C::HIGHEST_TRANSLATION_LEVEL
|| self.va % page_size::<C>(self.level) != 0
|| self.va + page_size::<C>(self.level) > end
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Frame(frame), Some(prop), level > 1);
self.move_forward();
}
/// Map the range starting from the current address to a physical address range.
///
/// The function will map as more huge pages as possible, and it will split
/// the huge pages into smaller pages if necessary. If the input range is
/// large, the resulting mappings may look like this (if very huge pages
/// supported):
///
/// ```text
/// start end
/// |----|----------------|--------------------------------|----|----|
/// base huge very huge base base
/// 4KiB 2MiB 1GiB 4KiB 4KiB
/// ```
///
/// In practice it is not suggested to use this method for safety and conciseness.
///
/// # Safety
///
/// The caller should ensure that
/// - the range being mapped does not affect kernel's memory safety;
/// - the physical address to be mapped is valid and safe to use.
pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: PageProperty) {
let end = self.va + pa.len();
let mut pa = pa.start;
assert!(end <= self.barrier_va.end);
while self.va < end {
// We ensure not mapping in reserved kernel shared tables or releasing it.
// Although it may be an invariant for all architectures and will be optimized
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if self.level > C::HIGHEST_TRANSLATION_LEVEL
|| is_kernel_shared_node
|| self.va % page_size::<C>(self.level) != 0
|| self.va + page_size::<C>(self.level) > end
|| pa % page_size::<C>(self.level) != 0
{
self.level_down(Some(prop));
continue;
}
// Map the current page.
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut()
.set_child(idx, Child::Untracked(pa), Some(prop), level > 1);
pa += page_size::<C>(level);
self.move_forward();
}
}
/// Unmap the range starting from the current address with the given length of virtual address.
///
/// # Safety
///
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be unmapped is out of the range where the cursor is required to operate;
/// - the range covers only a part of a page.
pub(crate) unsafe fn unmap(&mut self, len: usize) {
let end = self.va + len;
assert!(end <= self.barrier_va.end);
assert!(end % C::BASE_PAGE_SIZE == 0);
while self.va < end {
// Skip if it is already invalid.
if self.cur_child().is_none() {
if self.va + page_size::<C>(self.level) > end {
break;
}
self.move_forward();
continue;
}
// We check among the conditions that may lead to a level down.
// We ensure not unmapping in reserved kernel shared tables or releasing it.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
if is_kernel_shared_node
|| self.va % page_size::<C>(self.level) != 0
|| self.va + page_size::<C>(self.level) > end
{
self.level_down(Some(PageProperty::new_absent()));
continue;
}
// Unmap the current page.
let idx = self.cur_idx();
self.cur_node_mut().set_child(idx, Child::None, None, false);
self.move_forward();
}
}
/// Apply the given operation to all the mappings within the range.
///
/// The funtction will return an error if it is not allowed to protect an invalid range and
/// it does so, or if the range to be protected only covers a part of a page.
///
/// # Safety
///
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be protected is out of the range where the cursor is required to operate.
pub(crate) unsafe fn protect(
&mut self,
len: usize,
mut op: impl FnMut(&mut PageProperty),
allow_protect_invalid: bool,
) -> Result<(), PageTableError> {
let end = self.va + len;
assert!(end <= self.barrier_va.end);
while self.va < end {
if self.cur_child().is_none() {
if !allow_protect_invalid {
return Err(PageTableError::ProtectingInvalid);
}
self.move_forward();
continue;
}
// Go down if it's not a last node.
if self.cur_child().is_pt() {
self.level_down(None);
continue;
}
let vaddr_not_fit = self.va % page_size::<C>(self.level) != 0
|| self.va + page_size::<C>(self.level) > end;
let mut pte_prop = self.read_cur_pte_prop();
op(&mut pte_prop);
// Go down if the page size is too big and we are protecting part
// of untyped huge pages.
if self.cur_child().is_untyped() && vaddr_not_fit {
self.level_down(Some(pte_prop));
continue;
} else if vaddr_not_fit {
return Err(PageTableError::ProtectingPartial);
}
let idx = self.cur_idx();
let level = self.level;
self.cur_node_mut().protect(idx, pte_prop, level);
self.move_forward();
}
Ok(())
}
/// Get the information of the current slot and move to the next slot.
/// Get the information of the current slot.
pub(crate) fn query(&mut self) -> Option<PageTableQueryResult> {
if self.va >= self.barrier_va.end {
return None;
@ -358,54 +165,38 @@ where
loop {
let level = self.level;
let va = self.va;
let map_prop = self.read_cur_pte_prop();
match self.cur_child().clone() {
Child::Frame(frame) => {
self.move_forward();
return Some(PageTableQueryResult::Mapped {
va,
frame,
prop: map_prop,
});
}
Child::PageTable(_) => {
// Go down if it's not a last node.
self.level_down(None);
continue;
}
Child::Untracked(pa) => {
self.move_forward();
return Some(PageTableQueryResult::MappedUntyped {
va,
pa,
len: page_size::<C>(level),
prop: map_prop,
});
}
Child::None => {
self.move_forward();
let pte = self.read_cur_pte();
if !pte.is_present() {
return Some(PageTableQueryResult::NotMapped {
va,
len: page_size::<C>(level),
});
}
if !pte.is_last(level) {
self.level_down();
continue;
}
match self.cur_child() {
Child::Frame(frame) => {
return Some(PageTableQueryResult::Mapped {
va,
frame,
prop: pte.prop(),
});
}
Child::Untracked(pa) => {
return Some(PageTableQueryResult::MappedUntracked {
va,
pa,
len: page_size::<C>(level),
prop: pte.prop(),
});
}
Child::None | Child::PageTable(_) => {
unreachable!(); // Already checked with the PTE.
}
}
}
/// Consume itself and leak the root guard for the caller if it locked the root level.
///
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
pub(super) fn leak_root_guard(mut self) -> Option<ArcSpinLockGuard<PageTableFrame<E, C>>> {
if self.guard_level != C::NR_LEVELS {
return None;
}
while self.level < C::NR_LEVELS {
self.level_up();
}
self.guards[0].take()
// Ok to drop self here because we ensure not to access the page table if the current
// level is the root level when running the dropping method.
}
/// Traverse forward in the current level to the next PTE.
@ -437,92 +228,75 @@ where
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level < C::NR_LEVELS;
if can_release_child && last_node_all_unmapped {
let idx = self.cur_idx();
self.cur_node_mut().set_child(idx, Child::None, None, false);
let untracked = self.in_untracked_range();
self.cur_node_mut().unset_child(idx, false, untracked);
}
}
}
/// A level down operation during traversal. It may create a new child frame if the
/// current frame does not have one. It may also split an untyped huge page into
/// smaller pages if we have an end address within the next mapped untyped huge page.
///
/// If creation may happen the map property of intermediate level `prop` should be
/// passed in correctly. Whether the map property matters in an intermediate
/// level is architecture-dependent.
///
/// Also, the staticness of the page table is guaranteed if the caller make sure
/// that there is a child node for the current node.
fn level_down(&mut self, prop: Option<PageProperty>) {
/// Go down a level assuming a child page table exists.
fn level_down(&mut self) {
debug_assert!(self.level > 1);
// Check if the child frame exists.
let nxt_lvl_frame = {
let idx = pte_index::<C>(self.va, self.level);
let child = self.cur_child();
if let Child::PageTable(nxt_lvl_frame) = child {
Some(nxt_lvl_frame.clone())
} else {
None
}
};
// Create a new child frame if it does not exist. Sure it could be done only if
// it is allowed to modify the page table.
let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| {
// If it already maps an untyped huge page, we should split it.
if self.cur_child().is_untyped() {
let level = self.level;
let idx = self.cur_idx();
self.cur_node_mut().split_untracked_huge(level, idx);
let Child::PageTable(nxt_lvl_frame) = self.cur_child() else {
unreachable!()
};
nxt_lvl_frame.clone()
} else if self.cur_child().is_none() {
let new_frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
let idx = self.cur_idx();
self.cur_node_mut().set_child(
idx,
Child::PageTable(new_frame.clone()),
prop,
false,
);
new_frame
} else {
panic!("Trying to level down when it is mapped to a typed frame");
}
});
self.guards[(C::NR_LEVELS - self.level) as usize + 1] = Some(nxt_lvl_frame.lock_arc());
if let Child::PageTable(nxt_lvl_frame) = self.cur_child() {
self.level -= 1;
self.guards[(C::NR_LEVELS - self.level) as usize] = Some(nxt_lvl_frame.lock());
} else {
panic!("Trying to level down when it is not mapped to a page table");
}
}
fn cur_node(&self) -> &ArcSpinLockGuard<PageTableFrame<E, C>> {
fn cur_node(&self) -> &PageTableFrame<E, C> {
self.guards[(C::NR_LEVELS - self.level) as usize]
.as_ref()
.unwrap()
}
fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard<PageTableFrame<E, C>> {
self.guards[(C::NR_LEVELS - self.level) as usize]
.as_mut()
.unwrap()
}
fn cur_idx(&self) -> usize {
pte_index::<C>(self.va, self.level)
}
fn cur_child(&self) -> &Child<E, C> {
self.cur_node().child(self.cur_idx())
fn cur_child(&self) -> Child<E, C> {
self.cur_node()
.child(self.cur_idx(), !self.in_untracked_range())
}
fn read_cur_pte_prop(&self) -> PageProperty {
self.cur_node().read_pte_prop(self.cur_idx())
fn read_cur_pte(&self) -> E {
self.cur_node().read_pte(self.cur_idx())
}
/// Tell if the current virtual range must contain untracked mappings.
///
/// In the kernel mode, this is aligned with the definition in [`crate::vm::kspace`].
/// Only linear mappings in the kernel are considered as untracked mappings.
///
/// All mappings in the user mode are tracked. And all mappings in the IOMMU
/// page table are untracked.
fn in_untracked_range(&self) -> bool {
TypeId::of::<M>() == TypeId::of::<crate::arch::iommu::DeviceMode>()
|| crate::vm::kspace::LINEAR_MAPPING_VADDR_RANGE.contains(&self.va)
}
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator
for Cursor<'a, M, E, C>
where
[(); C::NR_LEVELS as usize]:,
{
type Item = PageTableQueryResult;
fn next(&mut self) -> Option<Self::Item> {
let result = self.query();
if result.is_some() {
self.move_forward();
}
result
}
}
#[cfg(feature = "page_table_recycle")]
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Drop for CursorMut<'_, M, E, C>
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Drop for Cursor<'_, M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
fn drop(&mut self) {
@ -538,12 +312,14 @@ where
// Drop the lock on the guard level.
self.guards[C::NR_LEVELS - self.guard_level] = None;
// Re-walk the page table to retreive the locks.
self.guards[0] = Some(self.pt.root_frame.lock_arc());
self.guards[0] = Some(self.pt.root.copy_handle().lock());
self.level = C::NR_LEVELS;
let cur_pte = self.read_cur_pte();
let cur_child_is_pt = cur_pte.is_present() && !cur_pte.is_last(self.level);
// Another cursor can unmap the guard level node before this cursor
// is dropped, we can just do our best here when re-walking.
while self.level > self.guard_level && self.cur_child().is_pt() {
self.level_down(None);
while self.level > self.guard_level && cur_child_is_pt {
self.level_down();
}
// Doing final cleanup by [`CursorMut::level_up`] to the root.
while self.level < C::NR_LEVELS {
@ -552,58 +328,320 @@ where
}
}
#[derive(Clone, Debug)]
pub(crate) enum PageTableQueryResult {
NotMapped {
va: Vaddr,
len: usize,
},
Mapped {
va: Vaddr,
frame: VmFrame,
prop: PageProperty,
},
MappedUntyped {
va: Vaddr,
pa: Paddr,
len: usize,
prop: PageProperty,
},
}
/// The read-only cursor for traversal over the page table.
/// The cursor of a page table that is capable of map, unmap or protect pages.
///
/// It implements the `Iterator` trait to provide a convenient way to query over the page table.
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
/// Also, it has all the capabilities of a [`Cursor`]. A virtual address range
/// in a page table can only be accessed by one cursor whether it is mutable or not.
#[derive(Debug)]
pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>(
Cursor<'a, M, E, C>,
)
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
inner: CursorMut<'a, M, E, C>,
}
[(); C::NR_LEVELS as usize]:;
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C>
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
pub(super) fn new(
pt: &'a PageTable<M, E, C>,
va: &Range<Vaddr>,
) -> Result<Self, PageTableError> {
CursorMut::new(pt, va).map(|inner| Self { inner })
}
}
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator
for Cursor<'a, M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
type Item = PageTableQueryResult;
fn next(&mut self) -> Option<Self::Item> {
self.inner.query()
Cursor::new(pt, va).map(|inner| Self(inner))
}
/// Get the information of the current slot and go to the next slot.
///
/// We choose not to implement `Iterator` or `IterMut` for [`CursorMut`]
/// because the mutable cursor is indeed not an iterator.
pub(crate) fn next(&mut self) -> Option<PageTableQueryResult> {
self.0.next()
}
/// Jump to the given virtual address.
///
/// It panics if the address is out of the range where the cursor is required to operate,
/// or has bad alignment.
pub(crate) fn jump(&mut self, va: Vaddr) {
assert!(self.0.barrier_va.contains(&va));
assert!(va % C::BASE_PAGE_SIZE == 0);
loop {
let cur_node_start = self.0.va & !(page_size::<C>(self.0.level + 1) - 1);
let cur_node_end = cur_node_start + page_size::<C>(self.0.level + 1);
// If the address is within the current node, we can jump directly.
if cur_node_start <= va && va < cur_node_end {
self.0.va = va;
return;
}
// There is a corner case that the cursor is depleted, sitting at the start of the
// next node but the next node is not locked because the parent is not locked.
if self.0.va >= self.0.barrier_va.end && self.0.level == self.0.guard_level {
self.0.va = va;
return;
}
debug_assert!(self.0.level < self.0.guard_level);
self.0.level_up();
}
}
/// Map the range starting from the current address to a `VmFrame`.
///
/// # Panic
///
/// This function will panic if
/// - the virtual address range to be mapped is out of the range;
/// - the alignment of the frame is not satisfied by the virtual address;
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
///
/// # Safety
///
/// The caller should ensure that the virtual range being mapped does
/// not affect kernel's memory safety.
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) {
let end = self.0.va + frame.size();
assert!(end <= self.0.barrier_va.end);
debug_assert!(!self.0.in_untracked_range());
// Go down if not applicable.
while self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|| self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end
{
let pte = self.0.read_cur_pte();
if pte.is_present() && !pte.is_last(self.0.level) {
self.0.level_down();
} else if !pte.is_present() {
self.level_down_create();
} else {
panic!("Mapping a smaller page in an already mapped huge page");
}
continue;
}
debug_assert_eq!(self.0.level, frame.level());
// Map the current page.
let idx = self.0.cur_idx();
let level = self.0.level;
self.cur_node_mut().set_child_frame(idx, frame, prop);
self.0.move_forward();
}
/// Map the range starting from the current address to a physical address range.
///
/// The function will map as more huge pages as possible, and it will split
/// the huge pages into smaller pages if necessary. If the input range is
/// large, the resulting mappings may look like this (if very huge pages
/// supported):
///
/// ```text
/// start end
/// |----|----------------|--------------------------------|----|----|
/// base huge very huge base base
/// 4KiB 2MiB 1GiB 4KiB 4KiB
/// ```
///
/// In practice it is not suggested to use this method for safety and conciseness.
///
/// # Panic
///
/// This function will panic if
/// - the virtual address range to be mapped is out of the range.
///
/// # Safety
///
/// The caller should ensure that
/// - the range being mapped does not affect kernel's memory safety;
/// - the physical address to be mapped is valid and safe to use;
/// - it is allowed to map untracked pages in this virtual address range.
pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: PageProperty) {
let end = self.0.va + pa.len();
let mut pa = pa.start;
assert!(end <= self.0.barrier_va.end);
while self.0.va < end {
// We ensure not mapping in reserved kernel shared tables or releasing it.
// Although it may be an invariant for all architectures and will be optimized
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.0.level >= C::NR_LEVELS - 1;
if self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|| is_kernel_shared_node
|| self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end
|| pa % page_size::<C>(self.0.level) != 0
{
let pte = self.0.read_cur_pte();
if pte.is_present() && !pte.is_last(self.0.level) {
self.0.level_down();
} else if !pte.is_present() {
self.level_down_create();
} else {
self.level_down_split();
}
continue;
}
// Map the current page.
debug_assert!(self.0.in_untracked_range());
let idx = self.0.cur_idx();
let level = self.0.level;
self.cur_node_mut().set_child_untracked(idx, pa, prop);
pa += page_size::<C>(level);
self.0.move_forward();
}
}
/// Unmap the range starting from the current address with the given length of virtual address.
///
/// # Safety
///
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be unmapped is out of the range where the cursor is required to operate;
/// - the range covers only a part of a page.
pub(crate) unsafe fn unmap(&mut self, len: usize) {
let end = self.0.va + len;
assert!(end <= self.0.barrier_va.end);
assert!(end % C::BASE_PAGE_SIZE == 0);
while self.0.va < end {
let cur_pte = self.0.read_cur_pte();
let untracked = self.0.in_untracked_range();
// Skip if it is already invalid.
if !cur_pte.is_present() {
if self.0.va + page_size::<C>(self.0.level) > end {
break;
}
self.0.move_forward();
continue;
}
// We check among the conditions that may lead to a level down.
// We ensure not unmapping in reserved kernel shared tables or releasing it.
let is_kernel_shared_node =
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.0.level >= C::NR_LEVELS - 1;
if is_kernel_shared_node
|| self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end
{
if cur_pte.is_present() && !cur_pte.is_last(self.0.level) {
self.0.level_down();
} else if untracked {
self.level_down_split();
} else {
unreachable!();
}
continue;
}
// Unmap the current page.
let idx = self.0.cur_idx();
self.cur_node_mut().unset_child(idx, untracked);
self.0.move_forward();
}
}
/// Apply the given operation to all the mappings within the range.
///
/// The funtction will return an error if it is not allowed to protect an invalid range and
/// it does so, or if the range to be protected only covers a part of a page.
///
/// # Safety
///
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
///
/// # Panic
///
/// This function will panic if:
/// - the range to be protected is out of the range where the cursor is required to operate.
pub(crate) unsafe fn protect(
&mut self,
len: usize,
mut op: impl FnMut(&mut PageProperty),
allow_protect_absent: bool,
) -> Result<(), PageTableError> {
let end = self.0.va + len;
assert!(end <= self.0.barrier_va.end);
while self.0.va < end {
let cur_pte = self.0.read_cur_pte();
if !cur_pte.is_present() {
if !allow_protect_absent {
return Err(PageTableError::ProtectingAbsent);
}
self.0.move_forward();
continue;
}
// Go down if it's not a last node.
if !cur_pte.is_last(self.0.level) {
self.0.level_down();
continue;
}
// Go down if the page size is too big and we are protecting part
// of untracked huge pages.
let vaddr_not_fit = self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end;
if self.0.in_untracked_range() && vaddr_not_fit {
self.level_down_split();
continue;
} else if vaddr_not_fit {
return Err(PageTableError::ProtectingPartial);
}
let idx = self.0.cur_idx();
let level = self.0.level;
let mut pte_prop = cur_pte.prop();
op(&mut pte_prop);
self.cur_node_mut().protect(idx, pte_prop);
self.0.move_forward();
}
Ok(())
}
/// Consume itself and leak the root guard for the caller if it locked the root level.
///
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
pub(super) fn leak_root_guard(mut self) -> Option<PageTableFrame<E, C>> {
if self.0.guard_level != C::NR_LEVELS {
return None;
}
while self.0.level < C::NR_LEVELS {
self.0.level_up();
}
self.0.guards[0].take()
// Ok to drop the cursor here because we ensure not to access the page table if the current
// level is the root level when running the dropping method.
}
/// Go down a level assuming the current slot is absent.
///
/// This method will create a new child frame and go down to it.
fn level_down_create(&mut self) {
debug_assert!(self.0.level > 1);
let new_frame = PageTableFrame::<E, C>::alloc(self.0.level - 1);
let idx = self.0.cur_idx();
let untracked = self.0.in_untracked_range();
self.cur_node_mut()
.set_child_pt(idx, new_frame.clone_raw(), untracked);
self.0.level -= 1;
self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame);
}
/// Go down a level assuming the current slot is an untracked huge page.
///
/// This method will split the huge page and go down to the next level.
fn level_down_split(&mut self) {
debug_assert!(self.0.level > 1);
debug_assert!(self.0.in_untracked_range());
let idx = self.0.cur_idx();
self.cur_node_mut().split_untracked_huge(idx);
let Child::PageTable(new_frame) = self.0.cur_child() else {
unreachable!();
};
self.0.level -= 1;
self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame.lock());
}
fn cur_node_mut(&mut self) -> &mut PageTableFrame<E, C> {
self.0.guards[(C::NR_LEVELS - self.0.level) as usize]
.as_mut()
.unwrap()
}
}

View File

@ -1,125 +1,413 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::{boxed::Box, sync::Arc};
//! This module defines page table frame abstractions and the handle.
//!
//! The page table frame is also frequently referred to as a page table in many architectural
//! documentations. We also call it the page table node if emphasizing the tree structure.
//!
//! This module leverages the frame metadata to manage the page table frames, which makes it
//! easier to provide the following guarantees:
//!
//! The page table frame is not freed when it is still in use by:
//! - a parent page table frame,
//! - or a handle to a page table frame,
//! - or a processor.
//! This is implemented by using a reference counter in the frame metadata. If the above
//! conditions are not met, the page table frame is ensured to be freed upon dropping the last
//! reference.
//!
//! One can acquire exclusive access to a page table frame using merely the physical address of
//! the page table frame. This is implemented by a lock in the frame metadata. Here the
//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the
//! page table frame while a lock is held. So the modification to the PTEs should be done after
//! the initialization of the entity that the PTE points to. This is taken care in this module.
//!
use core::{marker::PhantomData, mem::ManuallyDrop, ops::Range, panic, sync::atomic::Ordering};
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
use crate::{
sync::SpinLock,
vm::{page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, VmAllocOptions, VmFrame},
arch::mm::{PageTableEntry, PagingConsts},
vm::{
frame::allocator::FRAME_ALLOCATOR, paddr_to_vaddr, page_prop::PageProperty, FrameMetaRef,
FrameType, Paddr, PagingConstsTrait, PagingLevel, VmFrame, PAGE_SIZE,
},
};
/// A page table frame.
/// It's also frequently referred to as a page table in many architectural documentations.
/// Cloning a page table frame will create a deep copy of the page table.
/// The raw handle to a page table frame.
///
/// This handle is a referencer of a page table frame. Thus creating and dropping it will affect
/// the reference count of the page table frame. If dropped the raw handle as the last reference,
/// the page table frame and subsequent children will be freed.
///
/// Only the CPU or a PTE can access a page table frame using a raw handle. To access the page
/// table frame from the kernel code, use the handle [`PageTableFrame`].
#[derive(Debug)]
pub(super) struct PageTableFrame<E: PageTableEntryTrait, C: PagingConstsTrait>
pub(super) struct RawPageTableFrame<E: PageTableEntryTrait, C: PagingConstsTrait>(
Paddr,
PagingLevel,
PhantomData<(E, C)>,
)
where
[(); C::NR_LEVELS as usize]:;
impl<E: PageTableEntryTrait, C: PagingConstsTrait> RawPageTableFrame<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
inner: VmFrame,
/// TODO: all the following fields can be removed if frame metadata is introduced.
/// Here we allow 2x space overhead each frame temporarily.
#[allow(clippy::type_complexity)]
children: Box<[Child<E, C>; nr_subpage_per_huge::<C>()]>,
nr_valid_children: usize,
pub(super) fn paddr(&self) -> Paddr {
self.0
}
/// Convert a raw handle to an accessible handle by pertaining the lock.
pub(super) fn lock(self) -> PageTableFrame<E, C> {
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
let level = self.1;
// Acquire the lock.
while meta
.counter8_1
.compare_exchange(0, 1, Ordering::Acquire, Ordering::Relaxed)
.is_err()
{
core::hint::spin_loop();
}
// Prevent dropping the handle.
let _ = ManuallyDrop::new(self);
PageTableFrame::<E, C> {
meta,
newly_created: false,
level,
_phantom: PhantomData,
}
}
/// Create a copy of the handle.
pub(super) fn copy_handle(&self) -> Self {
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
// Increment the reference count.
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
Self(self.0, self.1, PhantomData)
}
pub(super) fn nr_valid_children(&self) -> u16 {
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
meta.counter16_1.load(Ordering::Relaxed)
}
/// Activate the page table assuming it is a root page table.
///
/// Here we ensure not dropping an active page table by making a
/// processor a page table owner. When activating a page table, the
/// reference count of the last activated page table is decremented.
/// And that of the current page table is incremented.
///
/// # Safety
///
/// The caller must ensure that the page table to be activated has
/// proper mappings for the kernel and has the correct const parameters
/// matching the current CPU.
pub(crate) unsafe fn activate(&self) {
use core::sync::atomic::AtomicBool;
use crate::{
arch::mm::{activate_page_table, current_page_table_paddr},
vm::CachePolicy,
};
debug_assert_eq!(self.1, PagingConsts::NR_LEVELS);
let last_activated_paddr = current_page_table_paddr();
activate_page_table(self.0, CachePolicy::Writeback);
if last_activated_paddr == self.0 {
return;
}
// Increment the reference count of the current page table.
FrameMetaRef::from_raw(self.0, 1)
.counter32_1
.fetch_add(1, Ordering::Relaxed);
// Decrement the reference count of the last activated page table.
// Boot page tables are not tracked with [`PageTableFrame`], but
// all page tables after the boot stage are tracked.
//
// TODO: the `cpu_local` implementation currently is underpowered,
// there's no need using `AtomicBool` here.
crate::cpu_local! {
static CURRENT_IS_BOOT_PT: AtomicBool = AtomicBool::new(true);
}
if !CURRENT_IS_BOOT_PT.load(Ordering::Acquire) {
// Restore and drop the last activated page table.
let _last_activated_pt =
Self(last_activated_paddr, PagingConsts::NR_LEVELS, PhantomData);
} else {
CURRENT_IS_BOOT_PT.store(false, Ordering::Release);
}
}
}
pub(super) type PtfRef<E, C> = Arc<SpinLock<PageTableFrame<E, C>>>;
#[derive(Debug)]
pub(super) enum Child<E: PageTableEntryTrait, C: PagingConstsTrait>
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Drop for RawPageTableFrame<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
PageTable(PtfRef<E, C>),
fn drop(&mut self) {
let mut meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
if meta.counter32_1.fetch_sub(1, Ordering::Release) == 1 {
// A fence is needed here with the same reasons stated in the implementation of
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
core::sync::atomic::fence(Ordering::Acquire);
// Drop the children.
for i in 0..nr_subpage_per_huge::<C>() {
// SAFETY: the index is within the bound and PTE is plain-old-data. The
// address is aligned as well. We also have an exclusive access ensured
// by reference counting.
let pte_ptr = unsafe { (paddr_to_vaddr(self.paddr()) as *const E).add(i) };
let pte = unsafe { pte_ptr.read() };
if pte.is_present() {
// Just restore the handle and drop the handle.
if !pte.is_last(self.1) {
// This is a page table.
let _dropping_raw = Self(pte.paddr(), self.1 - 1, PhantomData);
} else {
// This is a frame. You cannot drop a page table node that maps to
// untracked frames. This must be verified.
let frame_meta = unsafe { FrameMetaRef::from_raw(pte.paddr(), self.1) };
let _dropping_frame = VmFrame { meta: frame_meta };
}
}
}
// SAFETY: the frame is initialized and the physical address points to initialized memory.
// We also have and exclusive access ensured by reference counting.
unsafe {
meta.deref_mut().frame_type = FrameType::Free;
}
// Recycle this page table frame.
FRAME_ALLOCATOR
.get()
.unwrap()
.lock()
.dealloc(self.0 / PAGE_SIZE, 1);
}
}
}
/// A mutable handle to a page table frame.
///
/// The page table frame can own a set of handles to children, ensuring that the children
/// don't outlive the page table frame. Cloning a page table frame will create a deep copy
/// of the page table. Dropping the page table frame will also drop all handles if the page
/// table frame has no references. You can set the page table frame as a child of another
/// page table frame.
#[derive(Debug)]
pub(super) struct PageTableFrame<
E: PageTableEntryTrait = PageTableEntry,
C: PagingConstsTrait = PagingConsts,
> where
[(); C::NR_LEVELS as usize]:,
{
pub(super) meta: FrameMetaRef,
/// This is an optimization to save a few atomic operations on the lock.
///
/// If the handle is newly created using [`Self::alloc`], this is true and there's no need
/// to acquire the lock since the handle is exclusive. However if the handle is acquired
/// from a [`RawPageTableFrame`], this is false and the lock should be acquired.
newly_created: bool,
/// The level of the page table frame. This is needed because we cannot tell from a PTE
/// alone if it is a page table or a frame.
level: PagingLevel,
_phantom: core::marker::PhantomData<(E, C)>,
}
/// A child of a page table frame.
#[derive(Debug)]
pub(super) enum Child<E: PageTableEntryTrait = PageTableEntry, C: PagingConstsTrait = PagingConsts>
where
[(); C::NR_LEVELS as usize]:,
{
PageTable(RawPageTableFrame<E, C>),
Frame(VmFrame),
/// Frames not tracked by the frame allocator.
/// Frames not tracked by handles.
Untracked(Paddr),
None,
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Child<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
pub(super) fn is_pt(&self) -> bool {
matches!(self, Child::PageTable(_))
}
pub(super) fn is_frame(&self) -> bool {
matches!(self, Child::Frame(_))
}
pub(super) fn is_none(&self) -> bool {
matches!(self, Child::None)
}
pub(super) fn is_some(&self) -> bool {
!self.is_none()
}
pub(super) fn is_untyped(&self) -> bool {
matches!(self, Child::Untracked(_))
}
/// Is a last entry that maps to a physical address.
pub(super) fn is_last(&self) -> bool {
matches!(self, Child::Frame(_) | Child::Untracked(_))
}
fn paddr(&self) -> Option<Paddr> {
match self {
Child::PageTable(node) => {
// Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`],
// and the cursor will not protect a node while holding the lock.
Some(node.lock().start_paddr())
}
Child::Frame(frame) => Some(frame.start_paddr()),
Child::Untracked(pa) => Some(*pa),
Child::None => None,
}
}
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Clone for Child<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
/// This is a shallow copy.
fn clone(&self) -> Self {
match self {
Child::PageTable(ptf) => Child::PageTable(ptf.clone()),
Child::Frame(frame) => Child::Frame(frame.clone()),
Child::Untracked(pa) => Child::Untracked(*pa),
Child::None => Child::None,
}
}
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTableFrame<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
pub(super) fn new() -> Self {
/// Allocate a new empty page table frame.
///
/// This function returns an owning handle. The newly created handle does not
/// set the lock bit for performance as it is exclusive and unlocking is an
/// extra unnecessary expensive operation.
pub(super) fn alloc(level: PagingLevel) -> Self {
let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap() * PAGE_SIZE;
let mut meta = unsafe { FrameMetaRef::from_raw(frame, 1) };
// The reference count is initialized to 1.
meta.counter32_1.store(1, Ordering::Relaxed);
// The lock is initialized to 0.
meta.counter8_1.store(0, Ordering::Release);
// SAFETY: here we have an exlusive access since it's just initialized.
unsafe {
meta.deref_mut().frame_type = FrameType::PageTable;
}
// Zero out the page table frame.
let ptr = paddr_to_vaddr(meta.paddr()) as *mut u8;
unsafe { core::ptr::write_bytes(ptr, 0, PAGE_SIZE) };
Self {
inner: VmAllocOptions::new(1).alloc_single().unwrap(),
children: Box::new(core::array::from_fn(|_| Child::None)),
nr_valid_children: 0,
meta,
newly_created: true,
level,
_phantom: PhantomData,
}
}
pub(super) fn start_paddr(&self) -> Paddr {
self.inner.start_paddr()
/// Convert the handle into a raw handle to be stored in a PTE or CPU.
pub(super) fn into_raw(mut self) -> RawPageTableFrame<E, C> {
if !self.newly_created {
self.meta.counter8_1.store(0, Ordering::Release);
} else {
self.newly_created = false;
}
let raw = RawPageTableFrame(self.start_paddr(), self.level, PhantomData);
let _ = ManuallyDrop::new(self);
raw
}
pub(super) fn child(&self, idx: usize) -> &Child<E, C> {
/// Get a raw handle while still preserving the original handle.
pub(super) fn clone_raw(&self) -> RawPageTableFrame<E, C> {
self.meta.counter32_1.fetch_add(1, Ordering::Relaxed);
RawPageTableFrame(self.start_paddr(), self.level, PhantomData)
}
/// Get an extra reference of the child at the given index.
pub(super) fn child(&self, idx: usize, tracked: bool) -> Child<E, C> {
debug_assert!(idx < nr_subpage_per_huge::<C>());
&self.children[idx]
let pte = self.read_pte(idx);
if !pte.is_present() {
Child::None
} else {
let paddr = pte.paddr();
if !pte.is_last(self.level) {
let meta = unsafe { FrameMetaRef::from_raw(paddr, 1) };
// This is the handle count. We are creating a new handle thus increment the counter.
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
Child::PageTable(RawPageTableFrame(paddr, self.level - 1, PhantomData))
} else if tracked {
let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) };
// This is the handle count. We are creating a new handle thus increment the counter.
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
Child::Frame(VmFrame { meta })
} else {
Child::Untracked(paddr)
}
}
}
/// Make a copy of the page table frame.
///
/// This function allows you to control about the way to copy the children.
/// For indexes in `deep`, the children are deep copied and this function will be recursively called.
/// For indexes in `shallow`, the children are shallow copied as new references.
///
/// You cannot shallow copy a child that is mapped to a frame. Deep copying a frame child will not
/// copy the mapped frame but will copy the handle to the frame.
///
/// You cannot either deep copy or shallow copy a child that is mapped to an untracked frame.
///
/// The ranges must be disjoint.
pub(super) unsafe fn make_copy(&self, deep: Range<usize>, shallow: Range<usize>) -> Self {
let mut new_frame = Self::alloc(self.level);
debug_assert!(deep.end <= nr_subpage_per_huge::<C>());
debug_assert!(shallow.end <= nr_subpage_per_huge::<C>());
debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end);
for i in deep {
match self.child(i, /*meaningless*/ true) {
Child::PageTable(pt) => {
let guard = pt.copy_handle().lock();
let new_child = guard.make_copy(0..nr_subpage_per_huge::<C>(), 0..0);
new_frame.set_child_pt(i, new_child.into_raw(), /*meaningless*/ true);
}
Child::Frame(frame) => {
let prop = self.read_pte_prop(i);
new_frame.set_child_frame(i, frame.clone(), prop);
}
Child::None => {}
Child::Untracked(_) => {
unreachable!();
}
}
}
for i in shallow {
debug_assert_eq!(self.level, C::NR_LEVELS);
match self.child(i, /*meaningless*/ true) {
Child::PageTable(pt) => {
new_frame.set_child_pt(i, pt.copy_handle(), /*meaningless*/ true);
}
Child::None => {}
Child::Frame(_) | Child::Untracked(_) => {
unreachable!();
}
}
}
new_frame
}
/// Remove a child if the child at the given index is present.
pub(super) fn unset_child(&self, idx: usize, in_untracked_range: bool) {
debug_assert!(idx < nr_subpage_per_huge::<C>());
self.overwrite_pte(idx, None, in_untracked_range);
}
/// Set a child page table at a given index.
pub(super) fn set_child_pt(
&mut self,
idx: usize,
pt: RawPageTableFrame<E, C>,
in_untracked_range: bool,
) {
// They should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
debug_assert_eq!(pt.1, self.level - 1);
let pte = Some(E::new_pt(pt.paddr()));
self.overwrite_pte(idx, pte, in_untracked_range);
// The ownership is transferred to a raw PTE. Don't drop the handle.
let _ = ManuallyDrop::new(pt);
}
/// Map a frame at a given index.
pub(super) fn set_child_frame(&mut self, idx: usize, frame: VmFrame, prop: PageProperty) {
// They should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
debug_assert_eq!(frame.level(), self.level);
let pte = Some(E::new_frame(frame.start_paddr(), self.level, prop));
self.overwrite_pte(idx, pte, false);
// The ownership is transferred to a raw PTE. Don't drop the handle.
let _ = ManuallyDrop::new(frame);
}
/// Set an untracked child frame at a given index.
///
/// # Safety
///
/// The caller must ensure that the physical address is valid and safe to map.
pub(super) unsafe fn set_child_untracked(&mut self, idx: usize, pa: Paddr, prop: PageProperty) {
// It should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
let pte = Some(E::new_frame(pa, self.level, prop));
self.overwrite_pte(idx, pte, true);
}
/// The number of mapped frames or page tables.
/// This is to track if we can free itself.
pub(super) fn nr_valid_children(&self) -> usize {
self.nr_valid_children
pub(super) fn nr_valid_children(&self) -> u16 {
self.meta.counter16_1.load(Ordering::Relaxed)
}
/// Read the info from a page table entry at a given index.
@ -128,142 +416,104 @@ where
}
/// Split the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, cur_level: PagingLevel, idx: usize) {
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
// These should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
debug_assert!(cur_level > 1);
let Child::Untracked(pa) = self.children[idx] else {
panic!("split_untracked_huge: not an untyped huge page");
debug_assert!(self.level > 1);
let Child::Untracked(pa) = self.child(idx, false) else {
panic!("`split_untracked_huge` not called on an untracked huge page");
};
let prop = self.read_pte_prop(idx);
let mut new_frame = Self::new();
let mut new_frame = PageTableFrame::<E, C>::alloc(self.level - 1);
for i in 0..nr_subpage_per_huge::<C>() {
let small_pa = pa + i * page_size::<C>(cur_level - 1);
new_frame.set_child(i, Child::Untracked(small_pa), Some(prop), cur_level - 1 > 1);
let small_pa = pa + i * page_size::<C>(self.level - 1);
unsafe { new_frame.set_child_untracked(i, small_pa, prop) };
}
self.set_child(
idx,
Child::PageTable(Arc::new(SpinLock::new(new_frame))),
Some(prop),
false,
);
}
/// Map a child at a given index.
/// If mapping a non-none child, please give the property to map the child.
pub(super) fn set_child(
&mut self,
idx: usize,
child: Child<E, C>,
prop: Option<PageProperty>,
huge: bool,
) {
assert!(idx < nr_subpage_per_huge::<C>());
// SAFETY: the index is within the bound and the PTE to be written is valid.
// And the physical address of PTE points to initialized memory.
// This applies to all the following `write_pte` invocations.
unsafe {
match &child {
Child::PageTable(node) => {
debug_assert!(!huge);
let frame = node.lock();
self.write_pte(
idx,
E::new(frame.inner.start_paddr(), prop.unwrap(), false, false),
);
self.nr_valid_children += 1;
}
Child::Frame(frame) => {
debug_assert!(!huge); // `VmFrame` currently can only be a regular page.
self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true));
self.nr_valid_children += 1;
}
Child::Untracked(pa) => {
self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true));
self.nr_valid_children += 1;
}
Child::None => {
self.write_pte(idx, E::new_absent());
}
}
}
if self.children[idx].is_some() {
self.nr_valid_children -= 1;
}
self.children[idx] = child;
self.set_child_pt(idx, new_frame.into_raw(), true);
}
/// Protect an already mapped child at a given index.
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty, level: PagingLevel) {
debug_assert!(self.children[idx].is_some());
let paddr = self.children[idx].paddr().unwrap();
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) {
let mut pte = self.read_pte(idx);
debug_assert!(pte.is_present()); // This should be ensured by the cursor.
pte.set_prop(prop);
// SAFETY: the index is within the bound and the PTE is valid.
unsafe {
self.write_pte(
idx,
E::new(paddr, prop, level > 1, self.children[idx].is_last()),
);
(self.as_ptr() as *mut E).add(idx).write(pte);
}
}
fn read_pte(&self, idx: usize) -> E {
assert!(idx < nr_subpage_per_huge::<C>());
pub(super) fn read_pte(&self, idx: usize) -> E {
// It should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
// SAFETY: the index is within the bound and PTE is plain-old-data.
unsafe { (self.inner.as_ptr() as *const E).add(idx).read() }
unsafe { self.as_ptr().add(idx).read() }
}
/// Write a page table entry at a given index.
fn start_paddr(&self) -> Paddr {
self.meta.paddr()
}
/// Replace a page table entry at a given index.
///
/// # Safety
/// This method will ensure that the child presented by the overwritten
/// PTE is dropped, and the child count is updated.
///
/// The caller must ensure that:
/// - the index is within bounds;
/// - the PTE is valid an the physical address in the PTE points to initialized memory.
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
(self.inner.as_mut_ptr() as *mut E).add(idx).write(pte);
/// The caller in this module will ensure that the PTE points to initialized
/// memory if the child is a page table.
fn overwrite_pte(&self, idx: usize, pte: Option<E>, in_untracked_range: bool) {
let existing_pte = self.read_pte(idx);
if existing_pte.is_present() {
// SAFETY: The index is within the bound and the address is aligned.
// The validity of the PTE is checked within this module.
// The safetiness also holds in the following branch.
unsafe {
(self.as_ptr() as *mut E)
.add(idx)
.write(pte.unwrap_or(E::new_absent()))
};
// Drop the child. We must set the PTE before dropping the child. To
// drop the child just restore the handle and drop the handle.
let paddr = existing_pte.paddr();
if !existing_pte.is_last(self.level) {
// This is a page table.
let _dropping_raw = RawPageTableFrame::<E, C>(paddr, self.level - 1, PhantomData);
} else if !in_untracked_range {
// This is a frame.
let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) };
let _dropping_frame = VmFrame { meta };
}
if pte.is_none() {
// Decrement the child count.
self.meta.counter16_1.fetch_sub(1, Ordering::Relaxed);
}
} else if let Some(e) = pte {
unsafe { (self.as_ptr() as *mut E).add(idx).write(e) };
// Increment the child count.
self.meta.counter16_1.fetch_add(1, Ordering::Relaxed);
}
}
fn as_ptr(&self) -> *const E {
paddr_to_vaddr(self.start_paddr()) as *const E
}
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Clone for PageTableFrame<E, C>
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Drop for PageTableFrame<E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
/// Make a deep copy of the page table.
/// The child page tables are also being deep copied.
fn clone(&self) -> Self {
let new_frame = VmAllocOptions::new(1).alloc_single().unwrap();
let new_ptr = new_frame.as_mut_ptr() as *mut E;
let children = Box::new(core::array::from_fn(|i| match self.child(i) {
Child::PageTable(node) => unsafe {
let frame = node.lock();
// Possibly a cursor is waiting for the root lock to recycle this node.
// We can skip copying empty page table nodes.
if frame.nr_valid_children() != 0 {
let cloned = frame.clone();
let pte = self.read_pte(i);
new_ptr.add(i).write(E::new(
cloned.inner.start_paddr(),
pte.prop(),
false,
false,
));
Child::PageTable(Arc::new(SpinLock::new(cloned)))
} else {
Child::None
}
},
Child::Frame(_) | Child::Untracked(_) => {
unsafe {
new_ptr.add(i).write(self.read_pte(i));
}
self.children[i].clone()
}
Child::None => Child::None,
}));
Self {
inner: new_frame,
children,
nr_valid_children: self.nr_valid_children,
fn drop(&mut self) {
// Release the lock.
if !self.newly_created {
self.meta.counter8_1.store(0, Ordering::Release);
}
// Drop the frame by `RawPageTableFrame::drop`.
let _dropping_raw = RawPageTableFrame::<E, C>(self.start_paddr(), self.level, PhantomData);
}
}

View File

@ -1,19 +1,15 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::sync::Arc;
use core::{fmt::Debug, marker::PhantomData, ops::Range, panic};
use core::{fmt::Debug, marker::PhantomData, ops::Range};
use pod::Pod;
use super::{
nr_subpage_per_huge, paddr_to_vaddr,
page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
page_prop::{PageFlags, PageProperty},
page_size, Paddr, PagingConstsTrait, PagingLevel, Vaddr,
};
use crate::{
arch::mm::{activate_page_table, PageTableEntry, PagingConsts},
sync::SpinLock,
};
use crate::arch::mm::{PageTableEntry, PagingConsts};
mod frame;
use frame::*;
@ -31,7 +27,7 @@ pub enum PageTableError {
/// Using virtual address not aligned.
UnalignedVaddr,
/// Protecting a mapping that does not exist.
ProtectingInvalid,
ProtectingAbsent,
/// Protecting a part of an already mapped page.
ProtectingPartial,
}
@ -84,23 +80,18 @@ pub(crate) struct PageTable<
E: PageTableEntryTrait = PageTableEntry,
C: PagingConstsTrait = PagingConsts,
> where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
root_frame: PtfRef<E, C>,
root: RawPageTableFrame<E, C>,
_phantom: PhantomData<M>,
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<UserMode, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
impl PageTable<UserMode> {
pub(crate) fn activate(&self) {
// SAFETY: The usermode page table is safe to activate since the kernel
// mappings are shared.
unsafe {
self.activate_unchecked();
self.root.activate();
}
}
@ -121,48 +112,21 @@ where
.unwrap();
};
let root_frame = cursor.leak_root_guard().unwrap();
let mut new_root_frame = PageTableFrame::<E, C>::new();
let half_of_entries = nr_subpage_per_huge::<C>() / 2;
for i in 0..half_of_entries {
// This is user space, deep copy the child.
match root_frame.child(i) {
Child::PageTable(node) => {
let frame = node.lock();
// Possibly a cursor is waiting for the root lock to recycle this node.
// We can skip copying empty page table nodes.
if frame.nr_valid_children() != 0 {
let cloned = frame.clone();
let pt = Child::PageTable(Arc::new(SpinLock::new(cloned)));
new_root_frame.set_child(i, pt, Some(root_frame.read_pte_prop(i)), false);
}
}
Child::None => {}
Child::Frame(_) | Child::Untracked(_) => {
panic!("Unexpected map child.");
}
}
}
for i in half_of_entries..nr_subpage_per_huge::<C>() {
// This is kernel space, share the child.
new_root_frame.set_child(
i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_prop(i)),
false,
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
let new_root_frame = unsafe {
root_frame.make_copy(
0..NR_PTES_PER_NODE / 2,
NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE,
)
}
PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(new_root_frame)),
};
PageTable::<UserMode> {
root: new_root_frame.into_raw(),
_phantom: PhantomData,
}
}
}
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<KernelMode, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
impl PageTable<KernelMode> {
/// Create a new user page table.
///
/// This should be the only way to create the first user page table, that is
@ -170,19 +134,13 @@ where
///
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating
/// other child page tables.
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode, E, C> {
let mut new_root_frame = PageTableFrame::<E, C>::new();
let root_frame = self.root_frame.lock();
for i in nr_subpage_per_huge::<C>() / 2..nr_subpage_per_huge::<C>() {
new_root_frame.set_child(
i,
root_frame.child(i).clone(),
Some(root_frame.read_pte_prop(i)),
false,
)
}
PageTable::<UserMode, E, C> {
root_frame: Arc::new(SpinLock::new(new_root_frame)),
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode> {
let root_frame = self.root.copy_handle().lock();
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
let new_root_frame =
unsafe { root_frame.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) };
PageTable::<UserMode> {
root: new_root_frame.into_raw(),
_phantom: PhantomData,
}
}
@ -193,26 +151,17 @@ where
/// usize overflows, the caller should provide the index range of the root level pages
/// instead of the virtual address range.
pub(crate) fn make_shared_tables(&self, root_index: Range<usize>) {
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
let start = root_index.start;
debug_assert!(start >= nr_subpage_per_huge::<C>() / 2);
debug_assert!(start < nr_subpage_per_huge::<C>());
debug_assert!(start >= NR_PTES_PER_NODE / 2);
debug_assert!(start < NR_PTES_PER_NODE);
let end = root_index.end;
debug_assert!(end <= nr_subpage_per_huge::<C>());
let mut root_frame = self.root_frame.lock();
debug_assert!(end <= NR_PTES_PER_NODE);
let mut root_frame = self.root.copy_handle().lock();
for i in start..end {
let no_such_child = root_frame.child(i).is_none();
if no_such_child {
let frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
root_frame.set_child(
i,
Child::PageTable(frame),
Some(PageProperty {
flags: PageFlags::RWX,
cache: CachePolicy::Writeback,
priv_flags: PrivilegedPageFlags::GLOBAL,
}),
false,
)
if !root_frame.read_pte(i).is_present() {
let frame = PageTableFrame::alloc(PagingConsts::NR_LEVELS - 1);
root_frame.set_child_pt(i, frame.into_raw(), i < NR_PTES_PER_NODE * 3 / 4);
}
}
}
@ -220,20 +169,26 @@ where
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
/// Create a new empty page table. Useful for the kernel page table and IOMMU page tables only.
pub(crate) fn empty() -> Self {
PageTable {
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C>::new())),
root: PageTableFrame::<E, C>::alloc(C::NR_LEVELS).into_raw(),
_phantom: PhantomData,
}
}
pub(crate) unsafe fn activate_unchecked(&self) {
self.root.activate();
}
/// The physical address of the root page table.
pub(crate) fn root_paddr(&self) -> Paddr {
self.root_frame.lock().start_paddr()
///
/// It is dangerous to directly provide the physical address of the root page table to the
/// hardware since the page table frame may be dropped, resulting in UAF.
pub(crate) unsafe fn root_paddr(&self) -> Paddr {
self.root.paddr()
}
pub(crate) unsafe fn map(
@ -272,10 +227,6 @@ where
unsafe { page_walk::<E, C>(self.root_paddr(), vaddr) }
}
pub(crate) unsafe fn activate_unchecked(&self) {
activate_page_table(self.root_paddr(), CachePolicy::Writeback);
}
/// Create a new cursor exclusively accessing the virtual address range for mapping.
///
/// If another cursor is already accessing the range, the new cursor will wait until the
@ -303,21 +254,7 @@ where
/// This is only useful for IOMMU page tables. Think twice before using it in other cases.
pub(crate) unsafe fn shallow_copy(&self) -> Self {
PageTable {
root_frame: self.root_frame.clone(),
_phantom: PhantomData,
}
}
}
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Clone for PageTable<M, E, C>
where
[(); nr_subpage_per_huge::<C>()]:,
[(); C::NR_LEVELS as usize]:,
{
fn clone(&self) -> Self {
let frame = self.root_frame.lock();
PageTable {
root_frame: Arc::new(SpinLock::new(frame.clone())),
root: self.root.copy_handle(),
_phantom: PhantomData,
}
}
@ -361,7 +298,7 @@ pub(super) unsafe fn page_walk<E: PageTableEntryTrait, C: PagingConstsTrait>(
if !cur_pte.is_present() {
return None;
}
if cur_pte.is_huge() {
if cur_pte.is_last(cur_level) {
debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL);
break;
}
@ -393,12 +330,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug {
/// If the flags are present with valid mappings.
fn is_present(&self) -> bool;
/// Create a new PTE with the given physical address and flags.
/// The huge flag indicates that the PTE maps a huge page.
/// The last flag indicates that the PTE is the last level page table.
/// If the huge and last flags are both false, the PTE maps a page
/// table node.
fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self;
/// Create a new PTE with the given physical address and flags that map to a frame.
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self;
/// Create a new PTE that map to a child page table.
fn new_pt(paddr: Paddr) -> Self;
/// Get the physical address from the PTE.
/// The physical address recorded in the PTE is either:
@ -408,6 +344,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug {
fn prop(&self) -> PageProperty;
/// If the PTE maps a huge page or a page table frame.
fn is_huge(&self) -> bool;
fn set_prop(&mut self, prop: PageProperty);
/// If the PTE maps a page rather than a child page table.
///
/// The level of the page table the entry resides is given since architectures
/// like amd64 only uses a huge bit in intermediate levels.
fn is_last(&self, level: PagingLevel) -> bool;
}

View File

@ -1,5 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
use core::mem::ManuallyDrop;
use super::*;
use crate::vm::{
kspace::LINEAR_MAPPING_BASE_VADDR,
@ -25,8 +27,9 @@ fn test_range_check() {
}
#[ktest]
fn test_map_unmap() {
fn test_tracked_map_unmap() {
let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2;
let frame = VmAllocOptions::new(1).alloc_single().unwrap();
let start_paddr = frame.start_paddr();
@ -35,17 +38,25 @@ fn test_map_unmap() {
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none());
}
#[ktest]
fn test_untracked_map_unmap() {
let pt = PageTable::<KernelMode>::empty();
const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR;
let from_ppn = 13245..512 * 512 + 23456;
let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let from =
UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
unsafe { pt.map(&from, &to, prop).unwrap() };
for i in 0..100 {
let offset = i * (PAGE_SIZE + 1000);
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
}
let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434;
let unmap = UNTRACKED_OFFSET + PAGE_SIZE * 123..UNTRACKED_OFFSET + PAGE_SIZE * 3434;
unsafe { pt.unmap(&unmap).unwrap() };
for i in 0..100 {
let offset = i * (PAGE_SIZE + 10);
@ -55,6 +66,9 @@ fn test_map_unmap() {
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
}
}
// Since untracked mappings cannot be dropped, we just leak it here.
let _ = ManuallyDrop::new(pt);
}
#[ktest]
@ -77,11 +91,30 @@ fn test_user_copy_on_write() {
unsafe { pt.unmap(&from).unwrap() };
assert!(pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
let sibling_pt = pt.fork_copy_on_write();
assert!(sibling_pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
drop(pt);
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
unsafe { child_pt.unmap(&from).unwrap() };
assert!(child_pt.query(from.start + 10).is_none());
unsafe {
sibling_pt
.cursor_mut(&from)
.unwrap()
.map(frame.clone(), prop)
};
assert_eq!(
sibling_pt.query(from.start + 10).unwrap().0,
start_paddr + 10
);
assert!(child_pt.query(from.start + 10).is_none());
}
type Qr = PageTableQueryResult;
#[derive(Debug)]
#[derive(Clone, Debug)]
struct BasePagingConsts {}
impl PagingConstsTrait for BasePagingConsts {
@ -94,32 +127,38 @@ impl PagingConstsTrait for BasePagingConsts {
#[ktest]
fn test_base_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, BasePagingConsts>::empty();
let pt = PageTable::<UserMode>::empty();
let from_ppn = 1..1000;
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999;
let to = VmAllocOptions::new(999).alloc().unwrap();
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
unsafe { pt.map(&from, &to, prop).unwrap() };
unsafe {
let mut cursor = pt.cursor_mut(&from).unwrap();
for frame in to {
cursor.map(frame.clone(), prop);
}
}
for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) {
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::Mapped { va, frame, prop } = qr else {
panic!("Expected Mapped, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::RW);
assert_eq!(prop.cache, CachePolicy::Writeback);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
let prot = PAGE_SIZE * 18..PAGE_SIZE * 20;
unsafe { pt.protect(&prot, |p| p.flags -= PageFlags::W).unwrap() };
for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) {
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::Mapped { va, frame, prop } = qr else {
panic!("Expected Mapped, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::R);
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
}
#[derive(Debug)]
#[derive(Clone, Debug)]
struct VeryHugePagingConsts {}
impl PagingConstsTrait for VeryHugePagingConsts {
@ -131,8 +170,10 @@ impl PagingConstsTrait for VeryHugePagingConsts {
}
#[ktest]
fn test_large_protect_query() {
let pt = PageTable::<UserMode, PageTableEntry, VeryHugePagingConsts>::empty();
fn test_untracked_large_protect_query() {
let pt = PageTable::<KernelMode, PageTableEntry, VeryHugePagingConsts>::empty();
const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR;
let gmult = 512 * 512;
let from_ppn = gmult - 512..gmult + gmult + 514;
let to_ppn = gmult - 512 - 512..gmult + gmult - 512 + 514;
@ -141,13 +182,14 @@ fn test_large_protect_query() {
// from: |--2M--|-------------1G-------------|--2M--|-|
// to: |--2M--|--2M--|-------------1G-------------|-|
// Thus all mappings except the last few pages are mapped in 2M huge pages
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
let from =
UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end;
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
unsafe { pt.map(&from, &to, prop).unwrap() };
for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) {
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
panic!("Expected MappedUntracked, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::RW);
assert_eq!(prop.cache, CachePolicy::Writeback);
@ -166,24 +208,26 @@ fn test_large_protect_query() {
}
}
let ppn = from_ppn.start + 18..from_ppn.start + 20;
let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end;
let va = UNTRACKED_OFFSET + PAGE_SIZE * ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * ppn.end;
unsafe { pt.protect(&va, |p| p.flags -= PageFlags::W).unwrap() };
for (qr, i) in pt
.cursor(&(va.start - PAGE_SIZE..va.start))
.unwrap()
.zip(ppn.start - 1..ppn.start)
{
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
panic!("Expected MappedUntracked, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::RW);
let va = va - UNTRACKED_OFFSET;
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) {
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
panic!("Expected MappedUntracked, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::R);
let va = va - UNTRACKED_OFFSET;
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
for (qr, i) in pt
@ -191,10 +235,14 @@ fn test_large_protect_query() {
.unwrap()
.zip(ppn.end..ppn.end + 1)
{
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
panic!("Expected MappedUntyped, got {:#x?}", qr);
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
panic!("Expected MappedUntracked, got {:#x?}", qr);
};
assert_eq!(prop.flags, PageFlags::RW);
let va = va - UNTRACKED_OFFSET;
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
}
// Since untracked mappings cannot be dropped, we just leak it here.
let _ = ManuallyDrop::new(pt);
}

View File

@ -90,7 +90,7 @@ impl VmSpace {
// If overwrite is forbidden, we should check if there are existing mappings
if !options.can_overwrite {
while let Some(qr) = cursor.query() {
while let Some(qr) = cursor.next() {
if matches!(qr, PtQr::Mapped { .. }) {
return Err(Error::MapAlreadyMappedVaddr);
}
@ -350,7 +350,7 @@ impl Iterator for VmQueryIter<'_> {
PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len },
PtQr::Mapped { va, frame, prop } => VmQueryResult::Mapped { va, frame, prop },
// It is not possible to map untyped memory in user space.
PtQr::MappedUntyped { .. } => unreachable!(),
PtQr::MappedUntracked { .. } => unreachable!(),
})
}
}