From 141fbeaf0c91c8f4bb621fcd7062b83d9d555488 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Thu, 16 May 2024 16:42:36 +0000 Subject: [PATCH] Optimize the page table implementation using the frame metadata This PR also refactored the page table cursor, distinguishing `Cursor` from `CursorMut`, and split a lot of functions to reduce dynamic condition checking. There are also other sanitizations performed, including refactoring PTE's `is_huge` API to `is_last`, hardening tracked mapping checks, and making `VmFrame` any size. --- .../src/arch/x86/iommu/context_table.rs | 4 +- .../aster-frame/src/arch/x86/iommu/mod.rs | 3 +- .../src/arch/x86/iommu/second_stage.rs | 51 +- framework/aster-frame/src/arch/x86/mm/mod.rs | 125 +-- framework/aster-frame/src/vm/frame/meta.rs | 21 +- framework/aster-frame/src/vm/frame/mod.rs | 22 +- framework/aster-frame/src/vm/io.rs | 22 +- framework/aster-frame/src/vm/kspace.rs | 30 +- framework/aster-frame/src/vm/mod.rs | 6 +- .../aster-frame/src/vm/page_table/boot_pt.rs | 8 +- .../aster-frame/src/vm/page_table/cursor.rs | 776 +++++++++--------- .../aster-frame/src/vm/page_table/frame.rs | 646 ++++++++++----- .../aster-frame/src/vm/page_table/mod.rs | 169 ++-- .../aster-frame/src/vm/page_table/test.rs | 100 ++- framework/aster-frame/src/vm/space.rs | 4 +- 15 files changed, 1148 insertions(+), 839 deletions(-) diff --git a/framework/aster-frame/src/arch/x86/iommu/context_table.rs b/framework/aster-frame/src/arch/x86/iommu/context_table.rs index 0bce1eb14..37eafcaef 100644 --- a/framework/aster-frame/src/arch/x86/iommu/context_table.rs +++ b/framework/aster-frame/src/arch/x86/iommu/context_table.rs @@ -138,7 +138,7 @@ impl RootTable { if bus_entry.is_present() { warn!("IOMMU: Overwritting the existing device page table"); } - let address = page_table.root_paddr(); + let address = unsafe { page_table.root_paddr() }; context_table.page_tables.insert(address, page_table); let entry = ContextEntry(address as u128 | 1 | 0x1_0000_0000_0000_0000); context_table @@ -262,7 +262,7 @@ impl ContextTable { if !bus_entry.is_present() { let table = PageTable::::empty(); - let address = table.root_paddr(); + let address = unsafe { table.root_paddr() }; self.page_tables.insert(address, table); let entry = ContextEntry(address as u128 | 3 | 0x1_0000_0000_0000_0000); self.entries_frame diff --git a/framework/aster-frame/src/arch/x86/iommu/mod.rs b/framework/aster-frame/src/arch/x86/iommu/mod.rs index a7825c642..a7a0efa22 100644 --- a/framework/aster-frame/src/arch/x86/iommu/mod.rs +++ b/framework/aster-frame/src/arch/x86/iommu/mod.rs @@ -6,7 +6,8 @@ mod remapping; mod second_stage; use log::info; -use second_stage::{DeviceMode, PageTableEntry, PagingConsts}; +pub use second_stage::DeviceMode; +use second_stage::{PageTableEntry, PagingConsts}; use spin::Once; use crate::{ diff --git a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs index acf910946..18d908388 100644 --- a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs +++ b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs @@ -13,7 +13,7 @@ use crate::vm::{ /// The page table used by iommu maps the device address /// space to the physical address space. #[derive(Clone, Debug)] -pub(super) struct DeviceMode {} +pub struct DeviceMode {} impl PageTableMode for DeviceMode { /// The device address space is 32-bit. @@ -67,32 +67,23 @@ bitflags::bitflags! { pub struct PageTableEntry(u64); impl PageTableEntry { - const PHYS_MASK: usize = 0xFFFF_FFFF_F000; + const PHYS_MASK: u64 = 0xFFFF_FFFF_F000; + const PROP_MASK: u64 = !Self::PHYS_MASK & !PageTableFlags::LAST_PAGE.bits(); } impl PageTableEntryTrait for PageTableEntry { - fn new(paddr: crate::vm::Paddr, prop: PageProperty, huge: bool, last: bool) -> Self { - let mut flags = PageTableFlags::empty(); - if prop.flags.contains(PageFlags::W) { - flags |= PageTableFlags::WRITABLE; - } - if prop.flags.contains(PageFlags::R) { - flags |= PageTableFlags::READABLE; - } - if prop.cache != CachePolicy::Uncacheable { - flags |= PageTableFlags::SNOOP; - } - if last { - flags |= PageTableFlags::LAST_PAGE; - } - if huge { - panic!("Huge page is not supported in iommu page table"); - } - Self((paddr & Self::PHYS_MASK) as u64 | flags.bits) + fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self { + let mut pte = Self(paddr as u64 & Self::PHYS_MASK | PageTableFlags::LAST_PAGE.bits()); + pte.set_prop(prop); + pte + } + + fn new_pt(paddr: Paddr) -> Self { + Self(paddr as u64 & Self::PHYS_MASK) } fn paddr(&self) -> Paddr { - (self.0 & Self::PHYS_MASK as u64) as usize + (self.0 & Self::PHYS_MASK) as usize } fn new_absent() -> Self { @@ -131,7 +122,21 @@ impl PageTableEntryTrait for PageTableEntry { } } - fn is_huge(&self) -> bool { - false + fn set_prop(&mut self, prop: PageProperty) { + let mut flags = PageTableFlags::empty(); + if prop.flags.contains(PageFlags::W) { + flags |= PageTableFlags::WRITABLE; + } + if prop.flags.contains(PageFlags::R) { + flags |= PageTableFlags::READABLE; + } + if prop.cache != CachePolicy::Uncacheable { + flags |= PageTableFlags::SNOOP; + } + self.0 = self.0 & !Self::PROP_MASK | flags.bits(); + } + + fn is_last(&self, level: PagingLevel) -> bool { + level == 1 } } diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index 6f68e060b..f12beff88 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -127,6 +127,7 @@ impl PageTableEntry { const PHYS_ADDR_MASK: usize = 0xF_FFFF_FFFF_F000; #[cfg(feature = "intel_tdx")] const PHYS_ADDR_MASK: usize = 0x7_FFFF_FFFF_F000; + const PROP_MASK: usize = !Self::PHYS_ADDR_MASK & !PageTableFlags::HUGE.bits(); } /// Parse a bit-flag bits `val` in the representation of `from` to `to` in bits. @@ -145,61 +146,30 @@ impl PageTableEntryTrait for PageTableEntry { self.0 & PageTableFlags::PRESENT.bits() != 0 } - fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self { - let mut flags = - PageTableFlags::PRESENT.bits() | (huge as usize) << PageTableFlags::HUGE.bits().ilog2(); - if !huge && !last { - // In x86 if it's an intermediate PTE, it's better to have the same permissions - // as the most permissive child (to reduce hardware page walk accesses). But we - // don't have a mechanism to keep it generic across architectures, thus just - // setting it to be the most permissive. - flags |= PageTableFlags::WRITABLE.bits() | PageTableFlags::USER.bits(); - #[cfg(feature = "intel_tdx")] - { - flags |= parse_flags!( - prop.priv_flags.bits(), - PrivFlags::SHARED, - PageTableFlags::SHARED - ); - } - } else { - flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE) - | parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE) - | parse_flags!( - prop.flags.bits(), - PageFlags::ACCESSED, - PageTableFlags::ACCESSED - ) - | parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY) - | parse_flags!( - prop.priv_flags.bits(), - PrivFlags::USER, - PageTableFlags::USER - ) - | parse_flags!( - prop.priv_flags.bits(), - PrivFlags::GLOBAL, - PageTableFlags::GLOBAL - ); - #[cfg(feature = "intel_tdx")] - { - flags |= parse_flags!( - prop.priv_flags.bits(), - PrivFlags::SHARED, - PageTableFlags::SHARED - ); - } - } - match prop.cache { - CachePolicy::Writeback => {} - CachePolicy::Writethrough => { - flags |= PageTableFlags::WRITE_THROUGH.bits(); - } - CachePolicy::Uncacheable => { - flags |= PageTableFlags::NO_CACHE.bits(); - } - _ => panic!("unsupported cache policy"), - } + fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self { + let mut pte = Self( + paddr & Self::PHYS_ADDR_MASK + | ((level != 1) as usize) << PageTableFlags::HUGE.bits().ilog2(), + ); + pte.set_prop(prop); + pte + } + + fn new_pt(paddr: Paddr) -> Self { + // In x86 if it's an intermediate PTE, it's better to have the same permissions + // as the most permissive child (to reduce hardware page walk accesses). But we + // don't have a mechanism to keep it generic across architectures, thus just + // setting it to be the most permissive. + let flags = PageTableFlags::PRESENT.bits() + | PageTableFlags::WRITABLE.bits() + | PageTableFlags::USER.bits(); + #[cfg(feature = "intel_tdx")] + let flags = flags + | parse_flags!( + prop.priv_flags.bits(), + PrivFlags::SHARED, + PageTableFlags::SHARED + ); Self(paddr & Self::PHYS_ADDR_MASK | flags) } @@ -232,8 +202,49 @@ impl PageTableEntryTrait for PageTableEntry { } } - fn is_huge(&self) -> bool { - self.0 & PageTableFlags::HUGE.bits() != 0 + fn set_prop(&mut self, prop: PageProperty) { + let mut flags = PageTableFlags::PRESENT.bits(); + flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE) + | parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE) + | parse_flags!( + prop.flags.bits(), + PageFlags::ACCESSED, + PageTableFlags::ACCESSED + ) + | parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY) + | parse_flags!( + prop.priv_flags.bits(), + PrivFlags::USER, + PageTableFlags::USER + ) + | parse_flags!( + prop.priv_flags.bits(), + PrivFlags::GLOBAL, + PageTableFlags::GLOBAL + ); + #[cfg(feature = "intel_tdx")] + { + flags |= parse_flags!( + prop.priv_flags.bits(), + PrivFlags::SHARED, + PageTableFlags::SHARED + ); + } + match prop.cache { + CachePolicy::Writeback => {} + CachePolicy::Writethrough => { + flags |= PageTableFlags::WRITE_THROUGH.bits(); + } + CachePolicy::Uncacheable => { + flags |= PageTableFlags::NO_CACHE.bits(); + } + _ => panic!("unsupported cache policy"), + } + self.0 = self.0 & !Self::PROP_MASK | flags; + } + + fn is_last(&self, level: PagingLevel) -> bool { + level == 1 || (self.0 & PageTableFlags::HUGE.bits() != 0) } } diff --git a/framework/aster-frame/src/vm/frame/meta.rs b/framework/aster-frame/src/vm/frame/meta.rs index 6084dba1b..775ca8577 100644 --- a/framework/aster-frame/src/vm/frame/meta.rs +++ b/framework/aster-frame/src/vm/frame/meta.rs @@ -2,7 +2,7 @@ use core::{ ops::Deref, - sync::atomic::{AtomicU32, AtomicU8}, + sync::atomic::{AtomicU16, AtomicU32, AtomicU8}, }; use static_assertions::const_assert_eq; @@ -132,17 +132,18 @@ impl Deref for FrameMetaRef { pub struct FrameMeta { pub frame_type: FrameType, // 1 byte /// The first 8-bit counter. - /// Currently unused. + /// - For [`FrameType::Anonymous`], it is not used. + /// - For [`FrameType::PageTable`], it is used as a spinlock. pub counter8_1: AtomicU8, // 1 byte - /// The second 8-bit counter. - /// Currently unused. - pub counter8_2: AtomicU8, // 1 byte - /// The third 8-bit counter. - /// Currently unused. - pub counter8_3: AtomicU8, // 1 byte + /// The first 16-bit counter. + /// - For [`FrameType::Anonymous`], it is not used. + /// - For [`FrameType::PageTable`], it is used as the map count. The map + /// count is the number of present children. + pub counter16_1: AtomicU16, // 2 bytes /// The first 32-bit counter. - /// It is used in different type of frame with different semantics. /// - For [`FrameType::Anonymous`], it is the handle count. + /// - For [`FrameType::PageTable`], it is used as the reference count. The referencer + /// can be either a handle, a PTE or a CPU that loads it. pub counter32_1: AtomicU32, // 4 bytes } @@ -155,4 +156,6 @@ pub enum FrameType { Meta, Anonymous, PageTable, + /// Frames that contains kernel code. + KernelCode, } diff --git a/framework/aster-frame/src/vm/frame/mod.rs b/framework/aster-frame/src/vm/frame/mod.rs index 390e289a2..2aa0e4ad6 100644 --- a/framework/aster-frame/src/vm/frame/mod.rs +++ b/framework/aster-frame/src/vm/frame/mod.rs @@ -239,6 +239,10 @@ impl VmFrame { self.meta.size() } + pub fn level(&self) -> PagingLevel { + self.meta.level() + } + pub fn end_paddr(&self) -> Paddr { self.start_paddr() + self.size() } @@ -258,7 +262,7 @@ impl VmFrame { if self.size() != src.size() { panic!("The size of the source frame is different from the destination frame"); } - // Safety: the source and the destination does not overlap. + // SAFETY: the source and the destination does not overlap. unsafe { core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.size()); } @@ -268,13 +272,13 @@ impl VmFrame { impl<'a> VmFrame { /// Returns a reader to read data from it. pub fn reader(&'a self) -> VmReader<'a> { - // Safety: the memory of the page is contiguous and is valid during `'a`. + // SAFETY: the memory of the page is contiguous and is valid during `'a`. unsafe { VmReader::from_raw_parts(self.as_ptr(), self.size()) } } /// Returns a writer to write data into it. pub fn writer(&'a self) -> VmWriter<'a> { - // Safety: the memory of the page is contiguous and is valid during `'a`. + // SAFETY: the memory of the page is contiguous and is valid during `'a`. unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.size()) } } } @@ -309,10 +313,10 @@ impl Drop for VmFrame { // A fence is needed here with the same reasons stated in the implementation of // `Arc::drop`: . atomic::fence(Ordering::Acquire); - // Safety: the reference counter is 1 before decremented, so this is the only + // SAFETY: the reference counter is 1 before decremented, so this is the only // (exclusive) handle. unsafe { self.meta.deref_mut().frame_type = FrameType::Free }; - // Safety: the page frame is valid. + // SAFETY: the page frame is valid. unsafe { allocator::dealloc_contiguous(self.paddr() / PAGE_SIZE, self.size() / PAGE_SIZE); } @@ -460,13 +464,13 @@ impl VmSegment { impl<'a> VmSegment { /// Returns a reader to read data from it. pub fn reader(&'a self) -> VmReader<'a> { - // Safety: the memory of the page frames is contiguous and is valid during `'a`. + // SAFETY: the memory of the page frames is contiguous and is valid during `'a`. unsafe { VmReader::from_raw_parts(self.as_ptr(), self.nbytes()) } } /// Returns a writer to write data into it. pub fn writer(&'a self) -> VmWriter<'a> { - // Safety: the memory of the page frames is contiguous and is valid during `'a`. + // SAFETY: the memory of the page frames is contiguous and is valid during `'a`. unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.nbytes()) } } } @@ -501,10 +505,10 @@ impl Drop for VmSegment { // A fence is needed here with the same reasons stated in the implementation of // `Arc::drop`: . atomic::fence(Ordering::Acquire); - // Safety: the reference counter is 1 before decremented, so this is the only + // SAFETY: the reference counter is 1 before decremented, so this is the only // (exclusive) handle. unsafe { self.inner.meta.deref_mut().frame_type = FrameType::Free }; - // Safety: the range of contiguous page frames is valid. + // SAFETY: the range of contiguous page frames is valid. unsafe { allocator::dealloc_contiguous(self.inner.start_frame_index(), self.inner.nframes); } diff --git a/framework/aster-frame/src/vm/io.rs b/framework/aster-frame/src/vm/io.rs index 3aae6deb7..618b9d07d 100644 --- a/framework/aster-frame/src/vm/io.rs +++ b/framework/aster-frame/src/vm/io.rs @@ -174,7 +174,7 @@ impl<'a> VmReader<'a> { /// Returns the number of bytes for the remaining data. pub const fn remain(&self) -> usize { - // Safety: the end is equal to or greater than the cursor. + // SAFETY: the end is equal to or greater than the cursor. unsafe { self.end.sub_ptr(self.cursor) } } @@ -193,7 +193,7 @@ impl<'a> VmReader<'a> { /// This method ensures the postcondition of `self.remain() <= max_remain`. pub const fn limit(mut self, max_remain: usize) -> Self { if max_remain < self.remain() { - // Safety: the new end is less than the old end. + // SAFETY: the new end is less than the old end. unsafe { self.end = self.cursor.add(max_remain) }; } self @@ -208,7 +208,7 @@ impl<'a> VmReader<'a> { pub fn skip(mut self, nbytes: usize) -> Self { assert!(nbytes <= self.remain()); - // Safety: the new cursor is less than or equal to the end. + // SAFETY: the new cursor is less than or equal to the end. unsafe { self.cursor = self.cursor.add(nbytes) }; self } @@ -227,7 +227,7 @@ impl<'a> VmReader<'a> { return 0; } - // Safety: the memory range is valid since `copy_len` is the minimum + // SAFETY: the memory range is valid since `copy_len` is the minimum // of the reader's remaining data and the writer's available space. unsafe { core::ptr::copy(self.cursor, writer.cursor, copy_len); @@ -255,7 +255,7 @@ impl<'a> VmReader<'a> { impl<'a> From<&'a [u8]> for VmReader<'a> { fn from(slice: &'a [u8]) -> Self { - // Safety: the range of memory is contiguous and is valid during `'a`. + // SAFETY: the range of memory is contiguous and is valid during `'a`. unsafe { Self::from_raw_parts(slice.as_ptr(), slice.len()) } } } @@ -284,7 +284,7 @@ impl<'a> VmWriter<'a> { /// Returns the number of bytes for the available space. pub const fn avail(&self) -> usize { - // Safety: the end is equal to or greater than the cursor. + // SAFETY: the end is equal to or greater than the cursor. unsafe { self.end.sub_ptr(self.cursor) } } @@ -303,7 +303,7 @@ impl<'a> VmWriter<'a> { /// This method ensures the postcondition of `self.avail() <= max_avail`. pub const fn limit(mut self, max_avail: usize) -> Self { if max_avail < self.avail() { - // Safety: the new end is less than the old end. + // SAFETY: the new end is less than the old end. unsafe { self.end = self.cursor.add(max_avail) }; } self @@ -318,7 +318,7 @@ impl<'a> VmWriter<'a> { pub fn skip(mut self, nbytes: usize) -> Self { assert!(nbytes <= self.avail()); - // Safety: the new cursor is less than or equal to the end. + // SAFETY: the new cursor is less than or equal to the end. unsafe { self.cursor = self.cursor.add(nbytes) }; self } @@ -337,7 +337,7 @@ impl<'a> VmWriter<'a> { return 0; } - // Safety: the memory range is valid since `copy_len` is the minimum + // SAFETY: the memory range is valid since `copy_len` is the minimum // of the reader's remaining data and the writer's available space. unsafe { core::ptr::copy(reader.cursor, self.cursor, copy_len); @@ -364,7 +364,7 @@ impl<'a> VmWriter<'a> { let written_num = avail / core::mem::size_of::(); for i in 0..written_num { - // Safety: `written_num` is calculated by the avail size and the size of the type `T`, + // SAFETY: `written_num` is calculated by the avail size and the size of the type `T`, // hence the `add` operation and `write` operation are valid and will only manipulate // the memory managed by this writer. unsafe { @@ -380,7 +380,7 @@ impl<'a> VmWriter<'a> { impl<'a> From<&'a mut [u8]> for VmWriter<'a> { fn from(slice: &'a mut [u8]) -> Self { - // Safety: the range of memory is contiguous and is valid during `'a`. + // SAFETY: the range of memory is contiguous and is valid during `'a`. unsafe { Self::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) } } } diff --git a/framework/aster-frame/src/vm/kspace.rs b/framework/aster-frame/src/vm/kspace.rs index ac6c1e320..912a9363f 100644 --- a/framework/aster-frame/src/vm/kspace.rs +++ b/framework/aster-frame/src/vm/kspace.rs @@ -7,7 +7,7 @@ //! //! ```text //! +-+ <- the highest used address (0xffff_ffff_ffff_0000) -//! | | For the kernel code, 1 GiB. +//! | | For the kernel code, 1 GiB. Mapped frames are tracked with handles. //! +-+ <- 0xffff_ffff_8000_0000 //! | | //! | | Unused hole. @@ -42,14 +42,13 @@ use spin::Once; use super::{ frame::{ allocator::FRAME_ALLOCATOR, - meta, - meta::{FrameMeta, FrameType}, + meta::{self, FrameMeta, FrameType}, }, nr_subpage_per_huge, page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags}, page_size, page_table::{boot_pt::BootPageTable, KernelMode, PageTable}, - MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE, + FrameMetaRef, MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE, }; use crate::{ arch::mm::{PageTableEntry, PagingConsts}, @@ -161,7 +160,7 @@ pub fn init_kernel_page_table() { }; let mut cursor = kpt.cursor_mut(&from).unwrap(); for frame in meta_frames { - // Safety: we are doing the metadata mappings for the kernel. + // SAFETY: we are doing the metadata mappings for the kernel. unsafe { cursor.map(frame, prop); } @@ -201,9 +200,18 @@ pub fn init_kernel_page_table() { cache: CachePolicy::Writeback, priv_flags: PrivilegedPageFlags::GLOBAL, }; - // SAFETY: we are doing mappings for the kernel. - unsafe { - kpt.map(&from, &to, prop).unwrap(); + let mut cursor = kpt.cursor_mut(&from).unwrap(); + for frame_paddr in to.step_by(PAGE_SIZE) { + let mut meta = unsafe { FrameMetaRef::from_raw(frame_paddr, 1) }; + // SAFETY: we are marking the type of the frame containing loaded kernel code. + unsafe { + meta.deref_mut().frame_type = FrameType::KernelCode; + } + let frame = VmFrame { meta }; + // SAFETY: we are doing mappings for the kernel. + unsafe { + cursor.map(frame, prop); + } } } @@ -211,7 +219,7 @@ pub fn init_kernel_page_table() { } pub fn activate_kernel_page_table() { - // Safety: the kernel page table is initialized properly. + // SAFETY: the kernel page table is initialized properly. unsafe { KERNEL_PAGE_TABLE.get().unwrap().activate_unchecked(); crate::arch::mm::tlb_flush_all_including_global(); @@ -252,9 +260,9 @@ fn init_boot_page_table_and_page_meta( let meta_frames = meta_frames .into_iter() .map(|paddr| { - // Safety: the frame is allocated but not initialized thus not referenced. + // SAFETY: the frame is allocated but not initialized thus not referenced. let mut frame = unsafe { VmFrame::from_free_raw(paddr, 1) }; - // Safety: this is the only reference to the frame so it's exclusive. + // SAFETY: this is the only reference to the frame so it's exclusive. unsafe { frame.meta.deref_mut().frame_type = FrameType::Meta }; frame }) diff --git a/framework/aster-frame/src/vm/mod.rs b/framework/aster-frame/src/vm/mod.rs index 47fbfa501..d0ff2ab06 100644 --- a/framework/aster-frame/src/vm/mod.rs +++ b/framework/aster-frame/src/vm/mod.rs @@ -33,7 +33,9 @@ pub use self::{ space::{VmMapOptions, VmSpace}, }; pub(crate) use self::{ - frame::meta::FrameMetaRef, kspace::paddr_to_vaddr, page_prop::PrivilegedPageFlags, + frame::meta::{FrameMetaRef, FrameType}, + kspace::paddr_to_vaddr, + page_prop::PrivilegedPageFlags, page_table::PageTable, }; use crate::{ @@ -46,7 +48,7 @@ pub type PagingLevel = u8; /// A minimal set of constants that determines the paging system. /// This provides an abstraction over most paging modes in common architectures. -pub(crate) trait PagingConstsTrait: Debug + 'static { +pub(crate) trait PagingConstsTrait: Clone + Debug + 'static { /// The smallest page size. /// This is also the page size at level 1 page tables. const BASE_PAGE_SIZE: usize; diff --git a/framework/aster-frame/src/vm/page_table/boot_pt.rs b/framework/aster-frame/src/vm/page_table/boot_pt.rs index a4284c9b1..e4088ebeb 100644 --- a/framework/aster-frame/src/vm/page_table/boot_pt.rs +++ b/framework/aster-frame/src/vm/page_table/boot_pt.rs @@ -49,10 +49,9 @@ impl BootPageTable { let pte = unsafe { pte_ptr.read() }; pt = if !pte.is_present() { let frame = self.alloc_frame(); - let new_pte = E::new(frame * C::BASE_PAGE_SIZE, pte.prop(), false, false); - unsafe { pte_ptr.write(new_pte) }; + unsafe { pte_ptr.write(E::new_pt(frame * C::BASE_PAGE_SIZE)) }; frame - } else if pte.is_huge() { + } else if pte.is_last(level) { panic!("mapping an already mapped huge page in the boot page table"); } else { pte.paddr() / C::BASE_PAGE_SIZE @@ -66,8 +65,7 @@ impl BootPageTable { if pte.is_present() { panic!("mapping an already mapped page in the boot page table"); } - let new_pte = E::new(to * C::BASE_PAGE_SIZE, prop, false, true); - unsafe { pte_ptr.write(new_pte) }; + unsafe { pte_ptr.write(E::new_frame(to * C::BASE_PAGE_SIZE, 1, prop)) }; } fn alloc_frame(&mut self) -> FrameNumber { diff --git a/framework/aster-frame/src/vm/page_table/cursor.rs b/framework/aster-frame/src/vm/page_table/cursor.rs index e25367e0e..fffd239cd 100644 --- a/framework/aster-frame/src/vm/page_table/cursor.rs +++ b/framework/aster-frame/src/vm/page_table/cursor.rs @@ -50,49 +50,59 @@ //! required. The cursor unlock all locks, then lock all the way down to `B`, then //! check if `B` is empty, and finally recycle all the resources on the way back. -use alloc::sync::Arc; use core::{any::TypeId, ops::Range}; use align_ext::AlignExt; use super::{ - nr_subpage_per_huge, page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, - PageTableError, PageTableFrame, PageTableMode, PagingConstsTrait, -}; -use crate::{ - sync::{ArcSpinLockGuard, SpinLock}, - vm::{Paddr, PageProperty, PagingLevel, Vaddr, VmFrame}, + page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError, + PageTableFrame, PageTableMode, PagingConstsTrait, PagingLevel, }; +use crate::vm::{Paddr, PageProperty, Vaddr, VmFrame}; + +#[derive(Clone, Debug)] +pub(crate) enum PageTableQueryResult { + NotMapped { + va: Vaddr, + len: usize, + }, + Mapped { + va: Vaddr, + frame: VmFrame, + prop: PageProperty, + }, + MappedUntracked { + va: Vaddr, + pa: Paddr, + len: usize, + prop: PageProperty, + }, +} /// The cursor for traversal over the page table. /// -/// Efficient methods are provided to move the cursor forward by a slot, -/// doing mapping, unmaping, or querying for the traversed slot. Also you -/// can jump forward or backward by re-walking without releasing the lock. -/// /// A slot is a PTE at any levels, which correspond to a certain virtual /// memory range sized by the "page size" of the current level. /// -/// Doing mapping is somewhat like a depth-first search on a tree, except -/// that we modify the tree while traversing it. We use a guard stack to +/// A cursor is able to move to the next slot, to read page properties, +/// and even to jump to a virtual address directly. We use a guard stack to /// simulate the recursion, and adpot a page table locking protocol to /// provide concurrency. -pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> +#[derive(Debug)] +pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { pt: &'a PageTable, - guards: [Option>>; C::NR_LEVELS as usize], + guards: [Option>; C::NR_LEVELS as usize], level: PagingLevel, // current level guard_level: PagingLevel, // from guard_level to level, the locks are held va: Vaddr, // current virtual address barrier_va: Range, // virtual address range that is locked } -impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C> +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C> where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { /// Create a cursor exclusively owning the locks for the given range. @@ -112,7 +122,7 @@ where // Create a guard array that only hold the root node lock. let guards = core::array::from_fn(|i| { if i == 0 { - Some(pt.root_frame.lock_arc()) + Some(pt.root.copy_handle().lock()) } else { None } @@ -130,227 +140,24 @@ where // // While going down, previous guards of too-high levels will be released. loop { + let cur_pte = cursor.read_cur_pte(); let level_too_high = { let start_idx = pte_index::(va.start, cursor.level); let end_idx = pte_index::(va.end - 1, cursor.level); start_idx == end_idx }; - if !level_too_high || !cursor.cur_child().is_pt() { + if !level_too_high || !cur_pte.is_present() || cur_pte.is_last(cursor.level) { break; } - cursor.level_down(None); + cursor.level_down(); + // Release the guard of the previous level. cursor.guards[(C::NR_LEVELS - cursor.level) as usize - 1] = None; cursor.guard_level -= 1; } Ok(cursor) } - /// Jump to the given virtual address. - /// - /// It panics if the address is out of the range where the cursor is required to operate, - /// or has bad alignment. - pub(crate) fn jump(&mut self, va: Vaddr) { - assert!(self.barrier_va.contains(&va)); - assert!(va % C::BASE_PAGE_SIZE == 0); - loop { - let cur_node_start = self.va & !(page_size::(self.level + 1) - 1); - let cur_node_end = cur_node_start + page_size::(self.level + 1); - // If the address is within the current node, we can jump directly. - if cur_node_start <= va && va < cur_node_end { - self.va = va; - return; - } - // There is a corner case that the cursor is depleted, sitting at the start of the - // next node but the next node is not locked because the parent is not locked. - if self.va >= self.barrier_va.end && self.level == self.guard_level { - self.va = va; - return; - } - debug_assert!(self.level < self.guard_level); - self.level_up(); - } - } - - /// Map the range starting from the current address to a `VmFrame`. - /// - /// # Panic - /// - /// This function will panic if - /// - the virtual address range to be mapped is out of the range; - /// - it is already mapped to a huge page while the caller wants to map a smaller one. - /// - /// # Safety - /// - /// The caller should ensure that the virtual range being mapped does - /// not affect kernel's memory safety. - pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) { - let end = self.va + C::BASE_PAGE_SIZE; - assert!(end <= self.barrier_va.end); - // Go down if not applicable. - while self.level > C::HIGHEST_TRANSLATION_LEVEL - || self.va % page_size::(self.level) != 0 - || self.va + page_size::(self.level) > end - { - self.level_down(Some(prop)); - continue; - } - // Map the current page. - let idx = self.cur_idx(); - let level = self.level; - self.cur_node_mut() - .set_child(idx, Child::Frame(frame), Some(prop), level > 1); - self.move_forward(); - } - - /// Map the range starting from the current address to a physical address range. - /// - /// The function will map as more huge pages as possible, and it will split - /// the huge pages into smaller pages if necessary. If the input range is - /// large, the resulting mappings may look like this (if very huge pages - /// supported): - /// - /// ```text - /// start end - /// |----|----------------|--------------------------------|----|----| - /// base huge very huge base base - /// 4KiB 2MiB 1GiB 4KiB 4KiB - /// ``` - /// - /// In practice it is not suggested to use this method for safety and conciseness. - /// - /// # Safety - /// - /// The caller should ensure that - /// - the range being mapped does not affect kernel's memory safety; - /// - the physical address to be mapped is valid and safe to use. - pub(crate) unsafe fn map_pa(&mut self, pa: &Range, prop: PageProperty) { - let end = self.va + pa.len(); - let mut pa = pa.start; - assert!(end <= self.barrier_va.end); - while self.va < end { - // We ensure not mapping in reserved kernel shared tables or releasing it. - // Although it may be an invariant for all architectures and will be optimized - // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. - let is_kernel_shared_node = - TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; - if self.level > C::HIGHEST_TRANSLATION_LEVEL - || is_kernel_shared_node - || self.va % page_size::(self.level) != 0 - || self.va + page_size::(self.level) > end - || pa % page_size::(self.level) != 0 - { - self.level_down(Some(prop)); - continue; - } - // Map the current page. - let idx = self.cur_idx(); - let level = self.level; - self.cur_node_mut() - .set_child(idx, Child::Untracked(pa), Some(prop), level > 1); - pa += page_size::(level); - self.move_forward(); - } - } - - /// Unmap the range starting from the current address with the given length of virtual address. - /// - /// # Safety - /// - /// The caller should ensure that the range being unmapped does not affect kernel's memory safety. - /// - /// # Panic - /// - /// This function will panic if: - /// - the range to be unmapped is out of the range where the cursor is required to operate; - /// - the range covers only a part of a page. - pub(crate) unsafe fn unmap(&mut self, len: usize) { - let end = self.va + len; - assert!(end <= self.barrier_va.end); - assert!(end % C::BASE_PAGE_SIZE == 0); - while self.va < end { - // Skip if it is already invalid. - if self.cur_child().is_none() { - if self.va + page_size::(self.level) > end { - break; - } - self.move_forward(); - continue; - } - - // We check among the conditions that may lead to a level down. - // We ensure not unmapping in reserved kernel shared tables or releasing it. - let is_kernel_shared_node = - TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; - if is_kernel_shared_node - || self.va % page_size::(self.level) != 0 - || self.va + page_size::(self.level) > end - { - self.level_down(Some(PageProperty::new_absent())); - continue; - } - - // Unmap the current page. - let idx = self.cur_idx(); - self.cur_node_mut().set_child(idx, Child::None, None, false); - self.move_forward(); - } - } - - /// Apply the given operation to all the mappings within the range. - /// - /// The funtction will return an error if it is not allowed to protect an invalid range and - /// it does so, or if the range to be protected only covers a part of a page. - /// - /// # Safety - /// - /// The caller should ensure that the range being protected does not affect kernel's memory safety. - /// - /// # Panic - /// - /// This function will panic if: - /// - the range to be protected is out of the range where the cursor is required to operate. - pub(crate) unsafe fn protect( - &mut self, - len: usize, - mut op: impl FnMut(&mut PageProperty), - allow_protect_invalid: bool, - ) -> Result<(), PageTableError> { - let end = self.va + len; - assert!(end <= self.barrier_va.end); - while self.va < end { - if self.cur_child().is_none() { - if !allow_protect_invalid { - return Err(PageTableError::ProtectingInvalid); - } - self.move_forward(); - continue; - } - // Go down if it's not a last node. - if self.cur_child().is_pt() { - self.level_down(None); - continue; - } - let vaddr_not_fit = self.va % page_size::(self.level) != 0 - || self.va + page_size::(self.level) > end; - let mut pte_prop = self.read_cur_pte_prop(); - op(&mut pte_prop); - // Go down if the page size is too big and we are protecting part - // of untyped huge pages. - if self.cur_child().is_untyped() && vaddr_not_fit { - self.level_down(Some(pte_prop)); - continue; - } else if vaddr_not_fit { - return Err(PageTableError::ProtectingPartial); - } - let idx = self.cur_idx(); - let level = self.level; - self.cur_node_mut().protect(idx, pte_prop, level); - self.move_forward(); - } - Ok(()) - } - - /// Get the information of the current slot and move to the next slot. + /// Get the information of the current slot. pub(crate) fn query(&mut self) -> Option { if self.va >= self.barrier_va.end { return None; @@ -358,56 +165,40 @@ where loop { let level = self.level; let va = self.va; - let map_prop = self.read_cur_pte_prop(); - match self.cur_child().clone() { + let pte = self.read_cur_pte(); + if !pte.is_present() { + return Some(PageTableQueryResult::NotMapped { + va, + len: page_size::(level), + }); + } + if !pte.is_last(level) { + self.level_down(); + continue; + } + match self.cur_child() { Child::Frame(frame) => { - self.move_forward(); return Some(PageTableQueryResult::Mapped { va, frame, - prop: map_prop, + prop: pte.prop(), }); } - Child::PageTable(_) => { - // Go down if it's not a last node. - self.level_down(None); - continue; - } Child::Untracked(pa) => { - self.move_forward(); - return Some(PageTableQueryResult::MappedUntyped { + return Some(PageTableQueryResult::MappedUntracked { va, pa, len: page_size::(level), - prop: map_prop, + prop: pte.prop(), }); } - Child::None => { - self.move_forward(); - return Some(PageTableQueryResult::NotMapped { - va, - len: page_size::(level), - }); + Child::None | Child::PageTable(_) => { + unreachable!(); // Already checked with the PTE. } } } } - /// Consume itself and leak the root guard for the caller if it locked the root level. - /// - /// It is useful when the caller wants to keep the root guard while the cursor should be dropped. - pub(super) fn leak_root_guard(mut self) -> Option>> { - if self.guard_level != C::NR_LEVELS { - return None; - } - while self.level < C::NR_LEVELS { - self.level_up(); - } - self.guards[0].take() - // Ok to drop self here because we ensure not to access the page table if the current - // level is the root level when running the dropping method. - } - /// Traverse forward in the current level to the next PTE. /// /// If reached the end of a page table frame, it leads itself up to the next frame of the parent @@ -437,92 +228,75 @@ where TypeId::of::() == TypeId::of::() && self.level < C::NR_LEVELS; if can_release_child && last_node_all_unmapped { let idx = self.cur_idx(); - self.cur_node_mut().set_child(idx, Child::None, None, false); + let untracked = self.in_untracked_range(); + self.cur_node_mut().unset_child(idx, false, untracked); } } } - /// A level down operation during traversal. It may create a new child frame if the - /// current frame does not have one. It may also split an untyped huge page into - /// smaller pages if we have an end address within the next mapped untyped huge page. - /// - /// If creation may happen the map property of intermediate level `prop` should be - /// passed in correctly. Whether the map property matters in an intermediate - /// level is architecture-dependent. - /// - /// Also, the staticness of the page table is guaranteed if the caller make sure - /// that there is a child node for the current node. - fn level_down(&mut self, prop: Option) { + /// Go down a level assuming a child page table exists. + fn level_down(&mut self) { debug_assert!(self.level > 1); - // Check if the child frame exists. - let nxt_lvl_frame = { - let idx = pte_index::(self.va, self.level); - let child = self.cur_child(); - if let Child::PageTable(nxt_lvl_frame) = child { - Some(nxt_lvl_frame.clone()) - } else { - None - } - }; - // Create a new child frame if it does not exist. Sure it could be done only if - // it is allowed to modify the page table. - let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| { - // If it already maps an untyped huge page, we should split it. - if self.cur_child().is_untyped() { - let level = self.level; - let idx = self.cur_idx(); - self.cur_node_mut().split_untracked_huge(level, idx); - let Child::PageTable(nxt_lvl_frame) = self.cur_child() else { - unreachable!() - }; - nxt_lvl_frame.clone() - } else if self.cur_child().is_none() { - let new_frame = Arc::new(SpinLock::new(PageTableFrame::::new())); - let idx = self.cur_idx(); - self.cur_node_mut().set_child( - idx, - Child::PageTable(new_frame.clone()), - prop, - false, - ); - new_frame - } else { - panic!("Trying to level down when it is mapped to a typed frame"); - } - }); - self.guards[(C::NR_LEVELS - self.level) as usize + 1] = Some(nxt_lvl_frame.lock_arc()); - self.level -= 1; + let idx = pte_index::(self.va, self.level); + if let Child::PageTable(nxt_lvl_frame) = self.cur_child() { + self.level -= 1; + self.guards[(C::NR_LEVELS - self.level) as usize] = Some(nxt_lvl_frame.lock()); + } else { + panic!("Trying to level down when it is not mapped to a page table"); + } } - fn cur_node(&self) -> &ArcSpinLockGuard> { + fn cur_node(&self) -> &PageTableFrame { self.guards[(C::NR_LEVELS - self.level) as usize] .as_ref() .unwrap() } - fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard> { - self.guards[(C::NR_LEVELS - self.level) as usize] - .as_mut() - .unwrap() - } - fn cur_idx(&self) -> usize { pte_index::(self.va, self.level) } - fn cur_child(&self) -> &Child { - self.cur_node().child(self.cur_idx()) + fn cur_child(&self) -> Child { + self.cur_node() + .child(self.cur_idx(), !self.in_untracked_range()) } - fn read_cur_pte_prop(&self) -> PageProperty { - self.cur_node().read_pte_prop(self.cur_idx()) + fn read_cur_pte(&self) -> E { + self.cur_node().read_pte(self.cur_idx()) + } + + /// Tell if the current virtual range must contain untracked mappings. + /// + /// In the kernel mode, this is aligned with the definition in [`crate::vm::kspace`]. + /// Only linear mappings in the kernel are considered as untracked mappings. + /// + /// All mappings in the user mode are tracked. And all mappings in the IOMMU + /// page table are untracked. + fn in_untracked_range(&self) -> bool { + TypeId::of::() == TypeId::of::() + || crate::vm::kspace::LINEAR_MAPPING_VADDR_RANGE.contains(&self.va) + } +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator + for Cursor<'a, M, E, C> +where + [(); C::NR_LEVELS as usize]:, +{ + type Item = PageTableQueryResult; + + fn next(&mut self) -> Option { + let result = self.query(); + if result.is_some() { + self.move_forward(); + } + result } } #[cfg(feature = "page_table_recycle")] -impl Drop for CursorMut<'_, M, E, C> +impl Drop for Cursor<'_, M, E, C> where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { fn drop(&mut self) { @@ -538,12 +312,14 @@ where // Drop the lock on the guard level. self.guards[C::NR_LEVELS - self.guard_level] = None; // Re-walk the page table to retreive the locks. - self.guards[0] = Some(self.pt.root_frame.lock_arc()); + self.guards[0] = Some(self.pt.root.copy_handle().lock()); self.level = C::NR_LEVELS; + let cur_pte = self.read_cur_pte(); + let cur_child_is_pt = cur_pte.is_present() && !cur_pte.is_last(self.level); // Another cursor can unmap the guard level node before this cursor // is dropped, we can just do our best here when re-walking. - while self.level > self.guard_level && self.cur_child().is_pt() { - self.level_down(None); + while self.level > self.guard_level && cur_child_is_pt { + self.level_down(); } // Doing final cleanup by [`CursorMut::level_up`] to the root. while self.level < C::NR_LEVELS { @@ -552,58 +328,320 @@ where } } -#[derive(Clone, Debug)] -pub(crate) enum PageTableQueryResult { - NotMapped { - va: Vaddr, - len: usize, - }, - Mapped { - va: Vaddr, - frame: VmFrame, - prop: PageProperty, - }, - MappedUntyped { - va: Vaddr, - pa: Paddr, - len: usize, - prop: PageProperty, - }, -} - -/// The read-only cursor for traversal over the page table. +/// The cursor of a page table that is capable of map, unmap or protect pages. /// -/// It implements the `Iterator` trait to provide a convenient way to query over the page table. -pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> +/// Also, it has all the capabilities of a [`Cursor`]. A virtual address range +/// in a page table can only be accessed by one cursor whether it is mutable or not. +#[derive(Debug)] +pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>( + Cursor<'a, M, E, C>, +) where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ - inner: CursorMut<'a, M, E, C>, -} + [(); C::NR_LEVELS as usize]:; -impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C> +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C> where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { pub(super) fn new( pt: &'a PageTable, va: &Range, ) -> Result { - CursorMut::new(pt, va).map(|inner| Self { inner }) - } -} - -impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator - for Cursor<'a, M, E, C> -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ - type Item = PageTableQueryResult; - - fn next(&mut self) -> Option { - self.inner.query() + Cursor::new(pt, va).map(|inner| Self(inner)) + } + + /// Get the information of the current slot and go to the next slot. + /// + /// We choose not to implement `Iterator` or `IterMut` for [`CursorMut`] + /// because the mutable cursor is indeed not an iterator. + pub(crate) fn next(&mut self) -> Option { + self.0.next() + } + + /// Jump to the given virtual address. + /// + /// It panics if the address is out of the range where the cursor is required to operate, + /// or has bad alignment. + pub(crate) fn jump(&mut self, va: Vaddr) { + assert!(self.0.barrier_va.contains(&va)); + assert!(va % C::BASE_PAGE_SIZE == 0); + loop { + let cur_node_start = self.0.va & !(page_size::(self.0.level + 1) - 1); + let cur_node_end = cur_node_start + page_size::(self.0.level + 1); + // If the address is within the current node, we can jump directly. + if cur_node_start <= va && va < cur_node_end { + self.0.va = va; + return; + } + // There is a corner case that the cursor is depleted, sitting at the start of the + // next node but the next node is not locked because the parent is not locked. + if self.0.va >= self.0.barrier_va.end && self.0.level == self.0.guard_level { + self.0.va = va; + return; + } + debug_assert!(self.0.level < self.0.guard_level); + self.0.level_up(); + } + } + + /// Map the range starting from the current address to a `VmFrame`. + /// + /// # Panic + /// + /// This function will panic if + /// - the virtual address range to be mapped is out of the range; + /// - the alignment of the frame is not satisfied by the virtual address; + /// - it is already mapped to a huge page while the caller wants to map a smaller one. + /// + /// # Safety + /// + /// The caller should ensure that the virtual range being mapped does + /// not affect kernel's memory safety. + pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) { + let end = self.0.va + frame.size(); + assert!(end <= self.0.barrier_va.end); + debug_assert!(!self.0.in_untracked_range()); + // Go down if not applicable. + while self.0.level > C::HIGHEST_TRANSLATION_LEVEL + || self.0.va % page_size::(self.0.level) != 0 + || self.0.va + page_size::(self.0.level) > end + { + let pte = self.0.read_cur_pte(); + if pte.is_present() && !pte.is_last(self.0.level) { + self.0.level_down(); + } else if !pte.is_present() { + self.level_down_create(); + } else { + panic!("Mapping a smaller page in an already mapped huge page"); + } + continue; + } + debug_assert_eq!(self.0.level, frame.level()); + // Map the current page. + let idx = self.0.cur_idx(); + let level = self.0.level; + self.cur_node_mut().set_child_frame(idx, frame, prop); + self.0.move_forward(); + } + + /// Map the range starting from the current address to a physical address range. + /// + /// The function will map as more huge pages as possible, and it will split + /// the huge pages into smaller pages if necessary. If the input range is + /// large, the resulting mappings may look like this (if very huge pages + /// supported): + /// + /// ```text + /// start end + /// |----|----------------|--------------------------------|----|----| + /// base huge very huge base base + /// 4KiB 2MiB 1GiB 4KiB 4KiB + /// ``` + /// + /// In practice it is not suggested to use this method for safety and conciseness. + /// + /// # Panic + /// + /// This function will panic if + /// - the virtual address range to be mapped is out of the range. + /// + /// # Safety + /// + /// The caller should ensure that + /// - the range being mapped does not affect kernel's memory safety; + /// - the physical address to be mapped is valid and safe to use; + /// - it is allowed to map untracked pages in this virtual address range. + pub(crate) unsafe fn map_pa(&mut self, pa: &Range, prop: PageProperty) { + let end = self.0.va + pa.len(); + let mut pa = pa.start; + assert!(end <= self.0.barrier_va.end); + while self.0.va < end { + // We ensure not mapping in reserved kernel shared tables or releasing it. + // Although it may be an invariant for all architectures and will be optimized + // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. + let is_kernel_shared_node = + TypeId::of::() == TypeId::of::() && self.0.level >= C::NR_LEVELS - 1; + if self.0.level > C::HIGHEST_TRANSLATION_LEVEL + || is_kernel_shared_node + || self.0.va % page_size::(self.0.level) != 0 + || self.0.va + page_size::(self.0.level) > end + || pa % page_size::(self.0.level) != 0 + { + let pte = self.0.read_cur_pte(); + if pte.is_present() && !pte.is_last(self.0.level) { + self.0.level_down(); + } else if !pte.is_present() { + self.level_down_create(); + } else { + self.level_down_split(); + } + continue; + } + // Map the current page. + debug_assert!(self.0.in_untracked_range()); + let idx = self.0.cur_idx(); + let level = self.0.level; + self.cur_node_mut().set_child_untracked(idx, pa, prop); + pa += page_size::(level); + self.0.move_forward(); + } + } + + /// Unmap the range starting from the current address with the given length of virtual address. + /// + /// # Safety + /// + /// The caller should ensure that the range being unmapped does not affect kernel's memory safety. + /// + /// # Panic + /// + /// This function will panic if: + /// - the range to be unmapped is out of the range where the cursor is required to operate; + /// - the range covers only a part of a page. + pub(crate) unsafe fn unmap(&mut self, len: usize) { + let end = self.0.va + len; + assert!(end <= self.0.barrier_va.end); + assert!(end % C::BASE_PAGE_SIZE == 0); + while self.0.va < end { + let cur_pte = self.0.read_cur_pte(); + let untracked = self.0.in_untracked_range(); + + // Skip if it is already invalid. + if !cur_pte.is_present() { + if self.0.va + page_size::(self.0.level) > end { + break; + } + self.0.move_forward(); + continue; + } + + // We check among the conditions that may lead to a level down. + // We ensure not unmapping in reserved kernel shared tables or releasing it. + let is_kernel_shared_node = + TypeId::of::() == TypeId::of::() && self.0.level >= C::NR_LEVELS - 1; + if is_kernel_shared_node + || self.0.va % page_size::(self.0.level) != 0 + || self.0.va + page_size::(self.0.level) > end + { + if cur_pte.is_present() && !cur_pte.is_last(self.0.level) { + self.0.level_down(); + } else if untracked { + self.level_down_split(); + } else { + unreachable!(); + } + continue; + } + + // Unmap the current page. + let idx = self.0.cur_idx(); + self.cur_node_mut().unset_child(idx, untracked); + self.0.move_forward(); + } + } + + /// Apply the given operation to all the mappings within the range. + /// + /// The funtction will return an error if it is not allowed to protect an invalid range and + /// it does so, or if the range to be protected only covers a part of a page. + /// + /// # Safety + /// + /// The caller should ensure that the range being protected does not affect kernel's memory safety. + /// + /// # Panic + /// + /// This function will panic if: + /// - the range to be protected is out of the range where the cursor is required to operate. + pub(crate) unsafe fn protect( + &mut self, + len: usize, + mut op: impl FnMut(&mut PageProperty), + allow_protect_absent: bool, + ) -> Result<(), PageTableError> { + let end = self.0.va + len; + assert!(end <= self.0.barrier_va.end); + while self.0.va < end { + let cur_pte = self.0.read_cur_pte(); + if !cur_pte.is_present() { + if !allow_protect_absent { + return Err(PageTableError::ProtectingAbsent); + } + self.0.move_forward(); + continue; + } + // Go down if it's not a last node. + if !cur_pte.is_last(self.0.level) { + self.0.level_down(); + continue; + } + // Go down if the page size is too big and we are protecting part + // of untracked huge pages. + let vaddr_not_fit = self.0.va % page_size::(self.0.level) != 0 + || self.0.va + page_size::(self.0.level) > end; + if self.0.in_untracked_range() && vaddr_not_fit { + self.level_down_split(); + continue; + } else if vaddr_not_fit { + return Err(PageTableError::ProtectingPartial); + } + let idx = self.0.cur_idx(); + let level = self.0.level; + let mut pte_prop = cur_pte.prop(); + op(&mut pte_prop); + self.cur_node_mut().protect(idx, pte_prop); + self.0.move_forward(); + } + Ok(()) + } + + /// Consume itself and leak the root guard for the caller if it locked the root level. + /// + /// It is useful when the caller wants to keep the root guard while the cursor should be dropped. + pub(super) fn leak_root_guard(mut self) -> Option> { + if self.0.guard_level != C::NR_LEVELS { + return None; + } + while self.0.level < C::NR_LEVELS { + self.0.level_up(); + } + self.0.guards[0].take() + // Ok to drop the cursor here because we ensure not to access the page table if the current + // level is the root level when running the dropping method. + } + + /// Go down a level assuming the current slot is absent. + /// + /// This method will create a new child frame and go down to it. + fn level_down_create(&mut self) { + debug_assert!(self.0.level > 1); + let new_frame = PageTableFrame::::alloc(self.0.level - 1); + let idx = self.0.cur_idx(); + let untracked = self.0.in_untracked_range(); + self.cur_node_mut() + .set_child_pt(idx, new_frame.clone_raw(), untracked); + self.0.level -= 1; + self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame); + } + + /// Go down a level assuming the current slot is an untracked huge page. + /// + /// This method will split the huge page and go down to the next level. + fn level_down_split(&mut self) { + debug_assert!(self.0.level > 1); + debug_assert!(self.0.in_untracked_range()); + let idx = self.0.cur_idx(); + self.cur_node_mut().split_untracked_huge(idx); + let Child::PageTable(new_frame) = self.0.cur_child() else { + unreachable!(); + }; + self.0.level -= 1; + self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame.lock()); + } + + fn cur_node_mut(&mut self) -> &mut PageTableFrame { + self.0.guards[(C::NR_LEVELS - self.0.level) as usize] + .as_mut() + .unwrap() } } diff --git a/framework/aster-frame/src/vm/page_table/frame.rs b/framework/aster-frame/src/vm/page_table/frame.rs index a7ee48ef1..47ba87251 100644 --- a/framework/aster-frame/src/vm/page_table/frame.rs +++ b/framework/aster-frame/src/vm/page_table/frame.rs @@ -1,125 +1,413 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::{boxed::Box, sync::Arc}; +//! This module defines page table frame abstractions and the handle. +//! +//! The page table frame is also frequently referred to as a page table in many architectural +//! documentations. We also call it the page table node if emphasizing the tree structure. +//! +//! This module leverages the frame metadata to manage the page table frames, which makes it +//! easier to provide the following guarantees: +//! +//! The page table frame is not freed when it is still in use by: +//! - a parent page table frame, +//! - or a handle to a page table frame, +//! - or a processor. +//! This is implemented by using a reference counter in the frame metadata. If the above +//! conditions are not met, the page table frame is ensured to be freed upon dropping the last +//! reference. +//! +//! One can acquire exclusive access to a page table frame using merely the physical address of +//! the page table frame. This is implemented by a lock in the frame metadata. Here the +//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the +//! page table frame while a lock is held. So the modification to the PTEs should be done after +//! the initialization of the entity that the PTE points to. This is taken care in this module. +//! + +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Range, panic, sync::atomic::Ordering}; use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait}; use crate::{ - sync::SpinLock, - vm::{page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, VmAllocOptions, VmFrame}, + arch::mm::{PageTableEntry, PagingConsts}, + vm::{ + frame::allocator::FRAME_ALLOCATOR, paddr_to_vaddr, page_prop::PageProperty, FrameMetaRef, + FrameType, Paddr, PagingConstsTrait, PagingLevel, VmFrame, PAGE_SIZE, + }, }; -/// A page table frame. -/// It's also frequently referred to as a page table in many architectural documentations. -/// Cloning a page table frame will create a deep copy of the page table. +/// The raw handle to a page table frame. +/// +/// This handle is a referencer of a page table frame. Thus creating and dropping it will affect +/// the reference count of the page table frame. If dropped the raw handle as the last reference, +/// the page table frame and subsequent children will be freed. +/// +/// Only the CPU or a PTE can access a page table frame using a raw handle. To access the page +/// table frame from the kernel code, use the handle [`PageTableFrame`]. #[derive(Debug)] -pub(super) struct PageTableFrame +pub(super) struct RawPageTableFrame( + Paddr, + PagingLevel, + PhantomData<(E, C)>, +) +where + [(); C::NR_LEVELS as usize]:; + +impl RawPageTableFrame where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { - inner: VmFrame, - /// TODO: all the following fields can be removed if frame metadata is introduced. - /// Here we allow 2x space overhead each frame temporarily. - #[allow(clippy::type_complexity)] - children: Box<[Child; nr_subpage_per_huge::()]>, - nr_valid_children: usize, + pub(super) fn paddr(&self) -> Paddr { + self.0 + } + + /// Convert a raw handle to an accessible handle by pertaining the lock. + pub(super) fn lock(self) -> PageTableFrame { + let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) }; + let level = self.1; + // Acquire the lock. + while meta + .counter8_1 + .compare_exchange(0, 1, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + core::hint::spin_loop(); + } + // Prevent dropping the handle. + let _ = ManuallyDrop::new(self); + PageTableFrame:: { + meta, + newly_created: false, + level, + _phantom: PhantomData, + } + } + + /// Create a copy of the handle. + pub(super) fn copy_handle(&self) -> Self { + let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) }; + // Increment the reference count. + meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Self(self.0, self.1, PhantomData) + } + + pub(super) fn nr_valid_children(&self) -> u16 { + let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) }; + meta.counter16_1.load(Ordering::Relaxed) + } + + /// Activate the page table assuming it is a root page table. + /// + /// Here we ensure not dropping an active page table by making a + /// processor a page table owner. When activating a page table, the + /// reference count of the last activated page table is decremented. + /// And that of the current page table is incremented. + /// + /// # Safety + /// + /// The caller must ensure that the page table to be activated has + /// proper mappings for the kernel and has the correct const parameters + /// matching the current CPU. + pub(crate) unsafe fn activate(&self) { + use core::sync::atomic::AtomicBool; + + use crate::{ + arch::mm::{activate_page_table, current_page_table_paddr}, + vm::CachePolicy, + }; + + debug_assert_eq!(self.1, PagingConsts::NR_LEVELS); + + let last_activated_paddr = current_page_table_paddr(); + + activate_page_table(self.0, CachePolicy::Writeback); + + if last_activated_paddr == self.0 { + return; + } + + // Increment the reference count of the current page table. + + FrameMetaRef::from_raw(self.0, 1) + .counter32_1 + .fetch_add(1, Ordering::Relaxed); + + // Decrement the reference count of the last activated page table. + + // Boot page tables are not tracked with [`PageTableFrame`], but + // all page tables after the boot stage are tracked. + // + // TODO: the `cpu_local` implementation currently is underpowered, + // there's no need using `AtomicBool` here. + crate::cpu_local! { + static CURRENT_IS_BOOT_PT: AtomicBool = AtomicBool::new(true); + } + if !CURRENT_IS_BOOT_PT.load(Ordering::Acquire) { + // Restore and drop the last activated page table. + let _last_activated_pt = + Self(last_activated_paddr, PagingConsts::NR_LEVELS, PhantomData); + } else { + CURRENT_IS_BOOT_PT.store(false, Ordering::Release); + } + } } -pub(super) type PtfRef = Arc>>; - -#[derive(Debug)] -pub(super) enum Child +impl Drop for RawPageTableFrame where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { - PageTable(PtfRef), + fn drop(&mut self) { + let mut meta = unsafe { FrameMetaRef::from_raw(self.0, 1) }; + if meta.counter32_1.fetch_sub(1, Ordering::Release) == 1 { + // A fence is needed here with the same reasons stated in the implementation of + // `Arc::drop`: . + core::sync::atomic::fence(Ordering::Acquire); + // Drop the children. + for i in 0..nr_subpage_per_huge::() { + // SAFETY: the index is within the bound and PTE is plain-old-data. The + // address is aligned as well. We also have an exclusive access ensured + // by reference counting. + let pte_ptr = unsafe { (paddr_to_vaddr(self.paddr()) as *const E).add(i) }; + let pte = unsafe { pte_ptr.read() }; + if pte.is_present() { + // Just restore the handle and drop the handle. + if !pte.is_last(self.1) { + // This is a page table. + let _dropping_raw = Self(pte.paddr(), self.1 - 1, PhantomData); + } else { + // This is a frame. You cannot drop a page table node that maps to + // untracked frames. This must be verified. + let frame_meta = unsafe { FrameMetaRef::from_raw(pte.paddr(), self.1) }; + let _dropping_frame = VmFrame { meta: frame_meta }; + } + } + } + // SAFETY: the frame is initialized and the physical address points to initialized memory. + // We also have and exclusive access ensured by reference counting. + unsafe { + meta.deref_mut().frame_type = FrameType::Free; + } + // Recycle this page table frame. + FRAME_ALLOCATOR + .get() + .unwrap() + .lock() + .dealloc(self.0 / PAGE_SIZE, 1); + } + } +} + +/// A mutable handle to a page table frame. +/// +/// The page table frame can own a set of handles to children, ensuring that the children +/// don't outlive the page table frame. Cloning a page table frame will create a deep copy +/// of the page table. Dropping the page table frame will also drop all handles if the page +/// table frame has no references. You can set the page table frame as a child of another +/// page table frame. +#[derive(Debug)] +pub(super) struct PageTableFrame< + E: PageTableEntryTrait = PageTableEntry, + C: PagingConstsTrait = PagingConsts, +> where + [(); C::NR_LEVELS as usize]:, +{ + pub(super) meta: FrameMetaRef, + /// This is an optimization to save a few atomic operations on the lock. + /// + /// If the handle is newly created using [`Self::alloc`], this is true and there's no need + /// to acquire the lock since the handle is exclusive. However if the handle is acquired + /// from a [`RawPageTableFrame`], this is false and the lock should be acquired. + newly_created: bool, + /// The level of the page table frame. This is needed because we cannot tell from a PTE + /// alone if it is a page table or a frame. + level: PagingLevel, + _phantom: core::marker::PhantomData<(E, C)>, +} + +/// A child of a page table frame. +#[derive(Debug)] +pub(super) enum Child +where + [(); C::NR_LEVELS as usize]:, +{ + PageTable(RawPageTableFrame), Frame(VmFrame), - /// Frames not tracked by the frame allocator. + /// Frames not tracked by handles. Untracked(Paddr), None, } -impl Child -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ - pub(super) fn is_pt(&self) -> bool { - matches!(self, Child::PageTable(_)) - } - pub(super) fn is_frame(&self) -> bool { - matches!(self, Child::Frame(_)) - } - pub(super) fn is_none(&self) -> bool { - matches!(self, Child::None) - } - pub(super) fn is_some(&self) -> bool { - !self.is_none() - } - pub(super) fn is_untyped(&self) -> bool { - matches!(self, Child::Untracked(_)) - } - /// Is a last entry that maps to a physical address. - pub(super) fn is_last(&self) -> bool { - matches!(self, Child::Frame(_) | Child::Untracked(_)) - } - fn paddr(&self) -> Option { - match self { - Child::PageTable(node) => { - // Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`], - // and the cursor will not protect a node while holding the lock. - Some(node.lock().start_paddr()) - } - Child::Frame(frame) => Some(frame.start_paddr()), - Child::Untracked(pa) => Some(*pa), - Child::None => None, - } - } -} - -impl Clone for Child -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ - /// This is a shallow copy. - fn clone(&self) -> Self { - match self { - Child::PageTable(ptf) => Child::PageTable(ptf.clone()), - Child::Frame(frame) => Child::Frame(frame.clone()), - Child::Untracked(pa) => Child::Untracked(*pa), - Child::None => Child::None, - } - } -} - impl PageTableFrame where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { - pub(super) fn new() -> Self { + /// Allocate a new empty page table frame. + /// + /// This function returns an owning handle. The newly created handle does not + /// set the lock bit for performance as it is exclusive and unlocking is an + /// extra unnecessary expensive operation. + pub(super) fn alloc(level: PagingLevel) -> Self { + let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap() * PAGE_SIZE; + let mut meta = unsafe { FrameMetaRef::from_raw(frame, 1) }; + // The reference count is initialized to 1. + meta.counter32_1.store(1, Ordering::Relaxed); + // The lock is initialized to 0. + meta.counter8_1.store(0, Ordering::Release); + // SAFETY: here we have an exlusive access since it's just initialized. + unsafe { + meta.deref_mut().frame_type = FrameType::PageTable; + } + + // Zero out the page table frame. + let ptr = paddr_to_vaddr(meta.paddr()) as *mut u8; + unsafe { core::ptr::write_bytes(ptr, 0, PAGE_SIZE) }; + Self { - inner: VmAllocOptions::new(1).alloc_single().unwrap(), - children: Box::new(core::array::from_fn(|_| Child::None)), - nr_valid_children: 0, + meta, + newly_created: true, + level, + _phantom: PhantomData, } } - pub(super) fn start_paddr(&self) -> Paddr { - self.inner.start_paddr() + /// Convert the handle into a raw handle to be stored in a PTE or CPU. + pub(super) fn into_raw(mut self) -> RawPageTableFrame { + if !self.newly_created { + self.meta.counter8_1.store(0, Ordering::Release); + } else { + self.newly_created = false; + } + let raw = RawPageTableFrame(self.start_paddr(), self.level, PhantomData); + let _ = ManuallyDrop::new(self); + raw } - pub(super) fn child(&self, idx: usize) -> &Child { + /// Get a raw handle while still preserving the original handle. + pub(super) fn clone_raw(&self) -> RawPageTableFrame { + self.meta.counter32_1.fetch_add(1, Ordering::Relaxed); + RawPageTableFrame(self.start_paddr(), self.level, PhantomData) + } + + /// Get an extra reference of the child at the given index. + pub(super) fn child(&self, idx: usize, tracked: bool) -> Child { debug_assert!(idx < nr_subpage_per_huge::()); - &self.children[idx] + let pte = self.read_pte(idx); + if !pte.is_present() { + Child::None + } else { + let paddr = pte.paddr(); + if !pte.is_last(self.level) { + let meta = unsafe { FrameMetaRef::from_raw(paddr, 1) }; + // This is the handle count. We are creating a new handle thus increment the counter. + meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Child::PageTable(RawPageTableFrame(paddr, self.level - 1, PhantomData)) + } else if tracked { + let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) }; + // This is the handle count. We are creating a new handle thus increment the counter. + meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Child::Frame(VmFrame { meta }) + } else { + Child::Untracked(paddr) + } + } + } + + /// Make a copy of the page table frame. + /// + /// This function allows you to control about the way to copy the children. + /// For indexes in `deep`, the children are deep copied and this function will be recursively called. + /// For indexes in `shallow`, the children are shallow copied as new references. + /// + /// You cannot shallow copy a child that is mapped to a frame. Deep copying a frame child will not + /// copy the mapped frame but will copy the handle to the frame. + /// + /// You cannot either deep copy or shallow copy a child that is mapped to an untracked frame. + /// + /// The ranges must be disjoint. + pub(super) unsafe fn make_copy(&self, deep: Range, shallow: Range) -> Self { + let mut new_frame = Self::alloc(self.level); + debug_assert!(deep.end <= nr_subpage_per_huge::()); + debug_assert!(shallow.end <= nr_subpage_per_huge::()); + debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end); + for i in deep { + match self.child(i, /*meaningless*/ true) { + Child::PageTable(pt) => { + let guard = pt.copy_handle().lock(); + let new_child = guard.make_copy(0..nr_subpage_per_huge::(), 0..0); + new_frame.set_child_pt(i, new_child.into_raw(), /*meaningless*/ true); + } + Child::Frame(frame) => { + let prop = self.read_pte_prop(i); + new_frame.set_child_frame(i, frame.clone(), prop); + } + Child::None => {} + Child::Untracked(_) => { + unreachable!(); + } + } + } + for i in shallow { + debug_assert_eq!(self.level, C::NR_LEVELS); + match self.child(i, /*meaningless*/ true) { + Child::PageTable(pt) => { + new_frame.set_child_pt(i, pt.copy_handle(), /*meaningless*/ true); + } + Child::None => {} + Child::Frame(_) | Child::Untracked(_) => { + unreachable!(); + } + } + } + new_frame + } + + /// Remove a child if the child at the given index is present. + pub(super) fn unset_child(&self, idx: usize, in_untracked_range: bool) { + debug_assert!(idx < nr_subpage_per_huge::()); + self.overwrite_pte(idx, None, in_untracked_range); + } + + /// Set a child page table at a given index. + pub(super) fn set_child_pt( + &mut self, + idx: usize, + pt: RawPageTableFrame, + in_untracked_range: bool, + ) { + // They should be ensured by the cursor. + debug_assert!(idx < nr_subpage_per_huge::()); + debug_assert_eq!(pt.1, self.level - 1); + let pte = Some(E::new_pt(pt.paddr())); + self.overwrite_pte(idx, pte, in_untracked_range); + // The ownership is transferred to a raw PTE. Don't drop the handle. + let _ = ManuallyDrop::new(pt); + } + + /// Map a frame at a given index. + pub(super) fn set_child_frame(&mut self, idx: usize, frame: VmFrame, prop: PageProperty) { + // They should be ensured by the cursor. + debug_assert!(idx < nr_subpage_per_huge::()); + debug_assert_eq!(frame.level(), self.level); + let pte = Some(E::new_frame(frame.start_paddr(), self.level, prop)); + self.overwrite_pte(idx, pte, false); + // The ownership is transferred to a raw PTE. Don't drop the handle. + let _ = ManuallyDrop::new(frame); + } + + /// Set an untracked child frame at a given index. + /// + /// # Safety + /// + /// The caller must ensure that the physical address is valid and safe to map. + pub(super) unsafe fn set_child_untracked(&mut self, idx: usize, pa: Paddr, prop: PageProperty) { + // It should be ensured by the cursor. + debug_assert!(idx < nr_subpage_per_huge::()); + let pte = Some(E::new_frame(pa, self.level, prop)); + self.overwrite_pte(idx, pte, true); } /// The number of mapped frames or page tables. /// This is to track if we can free itself. - pub(super) fn nr_valid_children(&self) -> usize { - self.nr_valid_children + pub(super) fn nr_valid_children(&self) -> u16 { + self.meta.counter16_1.load(Ordering::Relaxed) } /// Read the info from a page table entry at a given index. @@ -128,142 +416,104 @@ where } /// Split the untracked huge page mapped at `idx` to smaller pages. - pub(super) fn split_untracked_huge(&mut self, cur_level: PagingLevel, idx: usize) { + pub(super) fn split_untracked_huge(&mut self, idx: usize) { + // These should be ensured by the cursor. debug_assert!(idx < nr_subpage_per_huge::()); - debug_assert!(cur_level > 1); - let Child::Untracked(pa) = self.children[idx] else { - panic!("split_untracked_huge: not an untyped huge page"); + debug_assert!(self.level > 1); + + let Child::Untracked(pa) = self.child(idx, false) else { + panic!("`split_untracked_huge` not called on an untracked huge page"); }; let prop = self.read_pte_prop(idx); - let mut new_frame = Self::new(); + let mut new_frame = PageTableFrame::::alloc(self.level - 1); for i in 0..nr_subpage_per_huge::() { - let small_pa = pa + i * page_size::(cur_level - 1); - new_frame.set_child(i, Child::Untracked(small_pa), Some(prop), cur_level - 1 > 1); + let small_pa = pa + i * page_size::(self.level - 1); + unsafe { new_frame.set_child_untracked(i, small_pa, prop) }; } - self.set_child( - idx, - Child::PageTable(Arc::new(SpinLock::new(new_frame))), - Some(prop), - false, - ); - } - - /// Map a child at a given index. - /// If mapping a non-none child, please give the property to map the child. - pub(super) fn set_child( - &mut self, - idx: usize, - child: Child, - prop: Option, - huge: bool, - ) { - assert!(idx < nr_subpage_per_huge::()); - // SAFETY: the index is within the bound and the PTE to be written is valid. - // And the physical address of PTE points to initialized memory. - // This applies to all the following `write_pte` invocations. - unsafe { - match &child { - Child::PageTable(node) => { - debug_assert!(!huge); - let frame = node.lock(); - self.write_pte( - idx, - E::new(frame.inner.start_paddr(), prop.unwrap(), false, false), - ); - self.nr_valid_children += 1; - } - Child::Frame(frame) => { - debug_assert!(!huge); // `VmFrame` currently can only be a regular page. - self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true)); - self.nr_valid_children += 1; - } - Child::Untracked(pa) => { - self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true)); - self.nr_valid_children += 1; - } - Child::None => { - self.write_pte(idx, E::new_absent()); - } - } - } - if self.children[idx].is_some() { - self.nr_valid_children -= 1; - } - self.children[idx] = child; + self.set_child_pt(idx, new_frame.into_raw(), true); } /// Protect an already mapped child at a given index. - pub(super) fn protect(&mut self, idx: usize, prop: PageProperty, level: PagingLevel) { - debug_assert!(self.children[idx].is_some()); - let paddr = self.children[idx].paddr().unwrap(); + pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) { + let mut pte = self.read_pte(idx); + debug_assert!(pte.is_present()); // This should be ensured by the cursor. + pte.set_prop(prop); // SAFETY: the index is within the bound and the PTE is valid. unsafe { - self.write_pte( - idx, - E::new(paddr, prop, level > 1, self.children[idx].is_last()), - ); + (self.as_ptr() as *mut E).add(idx).write(pte); } } - fn read_pte(&self, idx: usize) -> E { - assert!(idx < nr_subpage_per_huge::()); + pub(super) fn read_pte(&self, idx: usize) -> E { + // It should be ensured by the cursor. + debug_assert!(idx < nr_subpage_per_huge::()); // SAFETY: the index is within the bound and PTE is plain-old-data. - unsafe { (self.inner.as_ptr() as *const E).add(idx).read() } + unsafe { self.as_ptr().add(idx).read() } } - /// Write a page table entry at a given index. + fn start_paddr(&self) -> Paddr { + self.meta.paddr() + } + + /// Replace a page table entry at a given index. /// - /// # Safety + /// This method will ensure that the child presented by the overwritten + /// PTE is dropped, and the child count is updated. /// - /// The caller must ensure that: - /// - the index is within bounds; - /// - the PTE is valid an the physical address in the PTE points to initialized memory. - unsafe fn write_pte(&mut self, idx: usize, pte: E) { - (self.inner.as_mut_ptr() as *mut E).add(idx).write(pte); + /// The caller in this module will ensure that the PTE points to initialized + /// memory if the child is a page table. + fn overwrite_pte(&self, idx: usize, pte: Option, in_untracked_range: bool) { + let existing_pte = self.read_pte(idx); + if existing_pte.is_present() { + // SAFETY: The index is within the bound and the address is aligned. + // The validity of the PTE is checked within this module. + // The safetiness also holds in the following branch. + unsafe { + (self.as_ptr() as *mut E) + .add(idx) + .write(pte.unwrap_or(E::new_absent())) + }; + + // Drop the child. We must set the PTE before dropping the child. To + // drop the child just restore the handle and drop the handle. + + let paddr = existing_pte.paddr(); + if !existing_pte.is_last(self.level) { + // This is a page table. + let _dropping_raw = RawPageTableFrame::(paddr, self.level - 1, PhantomData); + } else if !in_untracked_range { + // This is a frame. + let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) }; + let _dropping_frame = VmFrame { meta }; + } + + if pte.is_none() { + // Decrement the child count. + self.meta.counter16_1.fetch_sub(1, Ordering::Relaxed); + } + } else if let Some(e) = pte { + unsafe { (self.as_ptr() as *mut E).add(idx).write(e) }; + + // Increment the child count. + self.meta.counter16_1.fetch_add(1, Ordering::Relaxed); + } + } + + fn as_ptr(&self) -> *const E { + paddr_to_vaddr(self.start_paddr()) as *const E } } -impl Clone for PageTableFrame +impl Drop for PageTableFrame where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { - /// Make a deep copy of the page table. - /// The child page tables are also being deep copied. - fn clone(&self) -> Self { - let new_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - let new_ptr = new_frame.as_mut_ptr() as *mut E; - let children = Box::new(core::array::from_fn(|i| match self.child(i) { - Child::PageTable(node) => unsafe { - let frame = node.lock(); - // Possibly a cursor is waiting for the root lock to recycle this node. - // We can skip copying empty page table nodes. - if frame.nr_valid_children() != 0 { - let cloned = frame.clone(); - let pte = self.read_pte(i); - new_ptr.add(i).write(E::new( - cloned.inner.start_paddr(), - pte.prop(), - false, - false, - )); - Child::PageTable(Arc::new(SpinLock::new(cloned))) - } else { - Child::None - } - }, - Child::Frame(_) | Child::Untracked(_) => { - unsafe { - new_ptr.add(i).write(self.read_pte(i)); - } - self.children[i].clone() - } - Child::None => Child::None, - })); - Self { - inner: new_frame, - children, - nr_valid_children: self.nr_valid_children, + fn drop(&mut self) { + // Release the lock. + if !self.newly_created { + self.meta.counter8_1.store(0, Ordering::Release); } + // Drop the frame by `RawPageTableFrame::drop`. + let _dropping_raw = RawPageTableFrame::(self.start_paddr(), self.level, PhantomData); } } diff --git a/framework/aster-frame/src/vm/page_table/mod.rs b/framework/aster-frame/src/vm/page_table/mod.rs index b730641a0..c37272d91 100644 --- a/framework/aster-frame/src/vm/page_table/mod.rs +++ b/framework/aster-frame/src/vm/page_table/mod.rs @@ -1,19 +1,15 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::sync::Arc; -use core::{fmt::Debug, marker::PhantomData, ops::Range, panic}; +use core::{fmt::Debug, marker::PhantomData, ops::Range}; use pod::Pod; use super::{ nr_subpage_per_huge, paddr_to_vaddr, - page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags}, + page_prop::{PageFlags, PageProperty}, page_size, Paddr, PagingConstsTrait, PagingLevel, Vaddr, }; -use crate::{ - arch::mm::{activate_page_table, PageTableEntry, PagingConsts}, - sync::SpinLock, -}; +use crate::arch::mm::{PageTableEntry, PagingConsts}; mod frame; use frame::*; @@ -31,7 +27,7 @@ pub enum PageTableError { /// Using virtual address not aligned. UnalignedVaddr, /// Protecting a mapping that does not exist. - ProtectingInvalid, + ProtectingAbsent, /// Protecting a part of an already mapped page. ProtectingPartial, } @@ -84,23 +80,18 @@ pub(crate) struct PageTable< E: PageTableEntryTrait = PageTableEntry, C: PagingConstsTrait = PagingConsts, > where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { - root_frame: PtfRef, + root: RawPageTableFrame, _phantom: PhantomData, } -impl PageTable -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ +impl PageTable { pub(crate) fn activate(&self) { // SAFETY: The usermode page table is safe to activate since the kernel // mappings are shared. unsafe { - self.activate_unchecked(); + self.root.activate(); } } @@ -121,48 +112,21 @@ where .unwrap(); }; let root_frame = cursor.leak_root_guard().unwrap(); - let mut new_root_frame = PageTableFrame::::new(); - let half_of_entries = nr_subpage_per_huge::() / 2; - for i in 0..half_of_entries { - // This is user space, deep copy the child. - match root_frame.child(i) { - Child::PageTable(node) => { - let frame = node.lock(); - // Possibly a cursor is waiting for the root lock to recycle this node. - // We can skip copying empty page table nodes. - if frame.nr_valid_children() != 0 { - let cloned = frame.clone(); - let pt = Child::PageTable(Arc::new(SpinLock::new(cloned))); - new_root_frame.set_child(i, pt, Some(root_frame.read_pte_prop(i)), false); - } - } - Child::None => {} - Child::Frame(_) | Child::Untracked(_) => { - panic!("Unexpected map child."); - } - } - } - for i in half_of_entries..nr_subpage_per_huge::() { - // This is kernel space, share the child. - new_root_frame.set_child( - i, - root_frame.child(i).clone(), - Some(root_frame.read_pte_prop(i)), - false, + const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); + let new_root_frame = unsafe { + root_frame.make_copy( + 0..NR_PTES_PER_NODE / 2, + NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE, ) - } - PageTable:: { - root_frame: Arc::new(SpinLock::new(new_root_frame)), + }; + PageTable:: { + root: new_root_frame.into_raw(), _phantom: PhantomData, } } } -impl PageTable -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ +impl PageTable { /// Create a new user page table. /// /// This should be the only way to create the first user page table, that is @@ -170,19 +134,13 @@ where /// /// Then, one can use a user page table to call [`fork_copy_on_write`], creating /// other child page tables. - pub(crate) fn create_user_page_table(&self) -> PageTable { - let mut new_root_frame = PageTableFrame::::new(); - let root_frame = self.root_frame.lock(); - for i in nr_subpage_per_huge::() / 2..nr_subpage_per_huge::() { - new_root_frame.set_child( - i, - root_frame.child(i).clone(), - Some(root_frame.read_pte_prop(i)), - false, - ) - } - PageTable:: { - root_frame: Arc::new(SpinLock::new(new_root_frame)), + pub(crate) fn create_user_page_table(&self) -> PageTable { + let root_frame = self.root.copy_handle().lock(); + const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); + let new_root_frame = + unsafe { root_frame.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) }; + PageTable:: { + root: new_root_frame.into_raw(), _phantom: PhantomData, } } @@ -193,26 +151,17 @@ where /// usize overflows, the caller should provide the index range of the root level pages /// instead of the virtual address range. pub(crate) fn make_shared_tables(&self, root_index: Range) { + const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); let start = root_index.start; - debug_assert!(start >= nr_subpage_per_huge::() / 2); - debug_assert!(start < nr_subpage_per_huge::()); + debug_assert!(start >= NR_PTES_PER_NODE / 2); + debug_assert!(start < NR_PTES_PER_NODE); let end = root_index.end; - debug_assert!(end <= nr_subpage_per_huge::()); - let mut root_frame = self.root_frame.lock(); + debug_assert!(end <= NR_PTES_PER_NODE); + let mut root_frame = self.root.copy_handle().lock(); for i in start..end { - let no_such_child = root_frame.child(i).is_none(); - if no_such_child { - let frame = Arc::new(SpinLock::new(PageTableFrame::::new())); - root_frame.set_child( - i, - Child::PageTable(frame), - Some(PageProperty { - flags: PageFlags::RWX, - cache: CachePolicy::Writeback, - priv_flags: PrivilegedPageFlags::GLOBAL, - }), - false, - ) + if !root_frame.read_pte(i).is_present() { + let frame = PageTableFrame::alloc(PagingConsts::NR_LEVELS - 1); + root_frame.set_child_pt(i, frame.into_raw(), i < NR_PTES_PER_NODE * 3 / 4); } } } @@ -220,20 +169,26 @@ where impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> PageTable where - [(); nr_subpage_per_huge::()]:, [(); C::NR_LEVELS as usize]:, { /// Create a new empty page table. Useful for the kernel page table and IOMMU page tables only. pub(crate) fn empty() -> Self { PageTable { - root_frame: Arc::new(SpinLock::new(PageTableFrame::::new())), + root: PageTableFrame::::alloc(C::NR_LEVELS).into_raw(), _phantom: PhantomData, } } + pub(crate) unsafe fn activate_unchecked(&self) { + self.root.activate(); + } + /// The physical address of the root page table. - pub(crate) fn root_paddr(&self) -> Paddr { - self.root_frame.lock().start_paddr() + /// + /// It is dangerous to directly provide the physical address of the root page table to the + /// hardware since the page table frame may be dropped, resulting in UAF. + pub(crate) unsafe fn root_paddr(&self) -> Paddr { + self.root.paddr() } pub(crate) unsafe fn map( @@ -272,10 +227,6 @@ where unsafe { page_walk::(self.root_paddr(), vaddr) } } - pub(crate) unsafe fn activate_unchecked(&self) { - activate_page_table(self.root_paddr(), CachePolicy::Writeback); - } - /// Create a new cursor exclusively accessing the virtual address range for mapping. /// /// If another cursor is already accessing the range, the new cursor will wait until the @@ -303,21 +254,7 @@ where /// This is only useful for IOMMU page tables. Think twice before using it in other cases. pub(crate) unsafe fn shallow_copy(&self) -> Self { PageTable { - root_frame: self.root_frame.clone(), - _phantom: PhantomData, - } - } -} - -impl Clone for PageTable -where - [(); nr_subpage_per_huge::()]:, - [(); C::NR_LEVELS as usize]:, -{ - fn clone(&self) -> Self { - let frame = self.root_frame.lock(); - PageTable { - root_frame: Arc::new(SpinLock::new(frame.clone())), + root: self.root.copy_handle(), _phantom: PhantomData, } } @@ -361,7 +298,7 @@ pub(super) unsafe fn page_walk( if !cur_pte.is_present() { return None; } - if cur_pte.is_huge() { + if cur_pte.is_last(cur_level) { debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL); break; } @@ -393,12 +330,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug { /// If the flags are present with valid mappings. fn is_present(&self) -> bool; - /// Create a new PTE with the given physical address and flags. - /// The huge flag indicates that the PTE maps a huge page. - /// The last flag indicates that the PTE is the last level page table. - /// If the huge and last flags are both false, the PTE maps a page - /// table node. - fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self; + /// Create a new PTE with the given physical address and flags that map to a frame. + fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self; + + /// Create a new PTE that map to a child page table. + fn new_pt(paddr: Paddr) -> Self; /// Get the physical address from the PTE. /// The physical address recorded in the PTE is either: @@ -408,6 +344,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug { fn prop(&self) -> PageProperty; - /// If the PTE maps a huge page or a page table frame. - fn is_huge(&self) -> bool; + fn set_prop(&mut self, prop: PageProperty); + + /// If the PTE maps a page rather than a child page table. + /// + /// The level of the page table the entry resides is given since architectures + /// like amd64 only uses a huge bit in intermediate levels. + fn is_last(&self, level: PagingLevel) -> bool; } diff --git a/framework/aster-frame/src/vm/page_table/test.rs b/framework/aster-frame/src/vm/page_table/test.rs index 97c715b9a..211fdb2ed 100644 --- a/framework/aster-frame/src/vm/page_table/test.rs +++ b/framework/aster-frame/src/vm/page_table/test.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 +use core::mem::ManuallyDrop; + use super::*; use crate::vm::{ kspace::LINEAR_MAPPING_BASE_VADDR, @@ -25,8 +27,9 @@ fn test_range_check() { } #[ktest] -fn test_map_unmap() { +fn test_tracked_map_unmap() { let pt = PageTable::::empty(); + let from = PAGE_SIZE..PAGE_SIZE * 2; let frame = VmAllocOptions::new(1).alloc_single().unwrap(); let start_paddr = frame.start_paddr(); @@ -35,17 +38,25 @@ fn test_map_unmap() { assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10); unsafe { pt.unmap(&from).unwrap() }; assert!(pt.query(from.start + 10).is_none()); +} + +#[ktest] +fn test_untracked_map_unmap() { + let pt = PageTable::::empty(); + const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR; let from_ppn = 13245..512 * 512 + 23456; let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010; - let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; + let from = + UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; + let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback); unsafe { pt.map(&from, &to, prop).unwrap() }; for i in 0..100 { let offset = i * (PAGE_SIZE + 1000); assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); } - let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434; + let unmap = UNTRACKED_OFFSET + PAGE_SIZE * 123..UNTRACKED_OFFSET + PAGE_SIZE * 3434; unsafe { pt.unmap(&unmap).unwrap() }; for i in 0..100 { let offset = i * (PAGE_SIZE + 10); @@ -55,6 +66,9 @@ fn test_map_unmap() { assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset); } } + + // Since untracked mappings cannot be dropped, we just leak it here. + let _ = ManuallyDrop::new(pt); } #[ktest] @@ -77,11 +91,30 @@ fn test_user_copy_on_write() { unsafe { pt.unmap(&from).unwrap() }; assert!(pt.query(from.start + 10).is_none()); assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); + + let sibling_pt = pt.fork_copy_on_write(); + assert!(sibling_pt.query(from.start + 10).is_none()); + assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); + drop(pt); + assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10); + unsafe { child_pt.unmap(&from).unwrap() }; + assert!(child_pt.query(from.start + 10).is_none()); + unsafe { + sibling_pt + .cursor_mut(&from) + .unwrap() + .map(frame.clone(), prop) + }; + assert_eq!( + sibling_pt.query(from.start + 10).unwrap().0, + start_paddr + 10 + ); + assert!(child_pt.query(from.start + 10).is_none()); } type Qr = PageTableQueryResult; -#[derive(Debug)] +#[derive(Clone, Debug)] struct BasePagingConsts {} impl PagingConstsTrait for BasePagingConsts { @@ -94,32 +127,38 @@ impl PagingConstsTrait for BasePagingConsts { #[ktest] fn test_base_protect_query() { - let pt = PageTable::::empty(); + let pt = PageTable::::empty(); + let from_ppn = 1..1000; let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; - let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999; + let to = VmAllocOptions::new(999).alloc().unwrap(); let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback); - unsafe { pt.map(&from, &to, prop).unwrap() }; + unsafe { + let mut cursor = pt.cursor_mut(&from).unwrap(); + for frame in to { + cursor.map(frame.clone(), prop); + } + } for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::Mapped { va, frame, prop } = qr else { + panic!("Expected Mapped, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::RW); assert_eq!(prop.cache, CachePolicy::Writeback); - assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } let prot = PAGE_SIZE * 18..PAGE_SIZE * 20; unsafe { pt.protect(&prot, |p| p.flags -= PageFlags::W).unwrap() }; for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::Mapped { va, frame, prop } = qr else { + panic!("Expected Mapped, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::R); - assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); + assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } } -#[derive(Debug)] +#[derive(Clone, Debug)] struct VeryHugePagingConsts {} impl PagingConstsTrait for VeryHugePagingConsts { @@ -131,8 +170,10 @@ impl PagingConstsTrait for VeryHugePagingConsts { } #[ktest] -fn test_large_protect_query() { - let pt = PageTable::::empty(); +fn test_untracked_large_protect_query() { + let pt = PageTable::::empty(); + const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR; + let gmult = 512 * 512; let from_ppn = gmult - 512..gmult + gmult + 514; let to_ppn = gmult - 512 - 512..gmult + gmult - 512 + 514; @@ -141,13 +182,14 @@ fn test_large_protect_query() { // from: |--2M--|-------------1G-------------|--2M--|-| // to: |--2M--|--2M--|-------------1G-------------|-| // Thus all mappings except the last few pages are mapped in 2M huge pages - let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end; + let from = + UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end; let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end; let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback); unsafe { pt.map(&from, &to, prop).unwrap() }; for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::MappedUntracked { va, pa, len, prop } = qr else { + panic!("Expected MappedUntracked, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::RW); assert_eq!(prop.cache, CachePolicy::Writeback); @@ -166,24 +208,26 @@ fn test_large_protect_query() { } } let ppn = from_ppn.start + 18..from_ppn.start + 20; - let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end; + let va = UNTRACKED_OFFSET + PAGE_SIZE * ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * ppn.end; unsafe { pt.protect(&va, |p| p.flags -= PageFlags::W).unwrap() }; for (qr, i) in pt .cursor(&(va.start - PAGE_SIZE..va.start)) .unwrap() .zip(ppn.start - 1..ppn.start) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::MappedUntracked { va, pa, len, prop } = qr else { + panic!("Expected MappedUntracked, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::RW); + let va = va - UNTRACKED_OFFSET; assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::MappedUntracked { va, pa, len, prop } = qr else { + panic!("Expected MappedUntracked, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::R); + let va = va - UNTRACKED_OFFSET; assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } for (qr, i) in pt @@ -191,10 +235,14 @@ fn test_large_protect_query() { .unwrap() .zip(ppn.end..ppn.end + 1) { - let Qr::MappedUntyped { va, pa, len, prop } = qr else { - panic!("Expected MappedUntyped, got {:#x?}", qr); + let Qr::MappedUntracked { va, pa, len, prop } = qr else { + panic!("Expected MappedUntracked, got {:#x?}", qr); }; assert_eq!(prop.flags, PageFlags::RW); + let va = va - UNTRACKED_OFFSET; assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE); } + + // Since untracked mappings cannot be dropped, we just leak it here. + let _ = ManuallyDrop::new(pt); } diff --git a/framework/aster-frame/src/vm/space.rs b/framework/aster-frame/src/vm/space.rs index 488783a74..381f10e9f 100644 --- a/framework/aster-frame/src/vm/space.rs +++ b/framework/aster-frame/src/vm/space.rs @@ -90,7 +90,7 @@ impl VmSpace { // If overwrite is forbidden, we should check if there are existing mappings if !options.can_overwrite { - while let Some(qr) = cursor.query() { + while let Some(qr) = cursor.next() { if matches!(qr, PtQr::Mapped { .. }) { return Err(Error::MapAlreadyMappedVaddr); } @@ -350,7 +350,7 @@ impl Iterator for VmQueryIter<'_> { PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len }, PtQr::Mapped { va, frame, prop } => VmQueryResult::Mapped { va, frame, prop }, // It is not possible to map untyped memory in user space. - PtQr::MappedUntyped { .. } => unreachable!(), + PtQr::MappedUntracked { .. } => unreachable!(), }) } }