diff --git a/ostd/src/arch/riscv/mm/mod.rs b/ostd/src/arch/riscv/mm/mod.rs index 72dfc3226..69efec730 100644 --- a/ostd/src/arch/riscv/mm/mod.rs +++ b/ostd/src/arch/riscv/mm/mod.rs @@ -47,6 +47,12 @@ bitflags::bitflags! { const ACCESSED = 1 << 6; /// Whether the memory area represented by this entry is modified. const DIRTY = 1 << 7; + + // First bit ignored by MMU. + const RSV1 = 1 << 8; + // Second bit ignored by MMU. + const RSV2 = 1 << 9; + // PBMT: Non-cacheable, idempotent, weakly-ordered (RVWMO), main memory const PBMT_NC = 1 << 61; // PBMT: Non-cacheable, non-idempotent, strongly-ordered (I/O ordering), I/O @@ -144,7 +150,9 @@ impl PageTableEntryTrait for PageTableEntry { | parse_flags!(self.0, PageTableFlags::WRITABLE, PageFlags::W) | parse_flags!(self.0, PageTableFlags::EXECUTABLE, PageFlags::X) | parse_flags!(self.0, PageTableFlags::ACCESSED, PageFlags::ACCESSED) - | parse_flags!(self.0, PageTableFlags::DIRTY, PageFlags::DIRTY); + | parse_flags!(self.0, PageTableFlags::DIRTY, PageFlags::DIRTY) + | parse_flags!(self.0, PageTableFlags::RSV1, PageFlags::AVAIL1) + | parse_flags!(self.0, PageTableFlags::RSV2, PageFlags::AVAIL2); let priv_flags = parse_flags!(self.0, PageTableFlags::USER, PrivFlags::USER) | parse_flags!(self.0, PageTableFlags::GLOBAL, PrivFlags::GLOBAL); @@ -175,6 +183,16 @@ impl PageTableEntryTrait for PageTableEntry { prop.priv_flags.bits(), PrivFlags::GLOBAL, PageTableFlags::GLOBAL + ) + | parse_flags!( + prop.flags.AVAIL1.bits(), + PageFlags::AVAIL1, + PageTableFlags::RSV1 + ) + | parse_flags!( + prop.flags.AVAIL2.bits(), + PageFlags::AVAIL2, + PageTableFlags::RSV2 ); match prop.cache { diff --git a/ostd/src/arch/x86/mm/mod.rs b/ostd/src/arch/x86/mm/mod.rs index 4c0cc2a0a..480c835d0 100644 --- a/ostd/src/arch/x86/mm/mod.rs +++ b/ostd/src/arch/x86/mm/mod.rs @@ -64,6 +64,12 @@ bitflags::bitflags! { /// TDX shared bit. #[cfg(feature = "cvm_guest")] const SHARED = 1 << 51; + + /// Ignored by the hardware. Free to use. + const HIGH_IGN1 = 1 << 52; + /// Ignored by the hardware. Free to use. + const HIGH_IGN2 = 1 << 53; + /// Forbid execute codes on the page. The NXE bits in EFER msr must be set. const NO_EXECUTE = 1 << 63; } @@ -192,7 +198,9 @@ impl PageTableEntryTrait for PageTableEntry { | parse_flags!(self.0, PageTableFlags::WRITABLE, PageFlags::W) | parse_flags!(!self.0, PageTableFlags::NO_EXECUTE, PageFlags::X) | parse_flags!(self.0, PageTableFlags::ACCESSED, PageFlags::ACCESSED) - | parse_flags!(self.0, PageTableFlags::DIRTY, PageFlags::DIRTY); + | parse_flags!(self.0, PageTableFlags::DIRTY, PageFlags::DIRTY) + | parse_flags!(self.0, PageTableFlags::HIGH_IGN1, PageFlags::AVAIL1) + | parse_flags!(self.0, PageTableFlags::HIGH_IGN2, PageFlags::AVAIL2); let priv_flags = parse_flags!(self.0, PageTableFlags::USER, PrivFlags::USER) | parse_flags!(self.0, PageTableFlags::GLOBAL, PrivFlags::GLOBAL); #[cfg(feature = "cvm_guest")] @@ -226,6 +234,16 @@ impl PageTableEntryTrait for PageTableEntry { PageTableFlags::ACCESSED ) | parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY) + | parse_flags!( + prop.flags.bits(), + PageFlags::AVAIL1, + PageTableFlags::HIGH_IGN1 + ) + | parse_flags!( + prop.flags.bits(), + PageFlags::AVAIL2, + PageTableFlags::HIGH_IGN2 + ) | parse_flags!( prop.priv_flags.bits(), PrivFlags::USER, diff --git a/ostd/src/mm/page_prop.rs b/ostd/src/mm/page_prop.rs index e0230e8ae..56107cca0 100644 --- a/ostd/src/mm/page_prop.rs +++ b/ostd/src/mm/page_prop.rs @@ -115,6 +115,11 @@ bitflags! { const ACCESSED = 0b00001000; /// Has the memory page been written. const DIRTY = 0b00010000; + + /// The first bit available for software use. + const AVAIL1 = 0b01000000; + /// The second bit available for software use. + const AVAIL2 = 0b10000000; } } diff --git a/ostd/src/mm/page_table/boot_pt.rs b/ostd/src/mm/page_table/boot_pt.rs index 3b670f52b..e62691ac5 100644 --- a/ostd/src/mm/page_table/boot_pt.rs +++ b/ostd/src/mm/page_table/boot_pt.rs @@ -4,7 +4,6 @@ //! and mapped, the boot page table is needed to do early stage page table setup //! in order to initialize the running phase page tables. -use alloc::vec::Vec; use core::{ result::Result, sync::atomic::{AtomicU32, Ordering}, @@ -16,8 +15,8 @@ use crate::{ cpu::num_cpus, cpu_local_cell, mm::{ - frame::allocator::FRAME_ALLOCATOR, nr_subpage_per_huge, paddr_to_vaddr, Paddr, - PageProperty, PagingConstsTrait, Vaddr, PAGE_SIZE, + frame::allocator::FRAME_ALLOCATOR, nr_subpage_per_huge, paddr_to_vaddr, Paddr, PageFlags, + PageProperty, PagingConstsTrait, PagingLevel, Vaddr, PAGE_SIZE, }, sync::SpinLock, }; @@ -83,17 +82,18 @@ cpu_local_cell! { } /// A simple boot page table singleton for boot stage mapping management. +/// /// If applicable, the boot page table could track the lifetime of page table /// frames that are set up by the firmware, loader or the setup code. -pub struct BootPageTable< +/// +/// All the newly allocated page table frames have the first unused bit in +/// parent PTEs. This allows us to deallocate them when the boot page table +/// is dropped. +pub(crate) struct BootPageTable< E: PageTableEntryTrait = PageTableEntry, C: PagingConstsTrait = PagingConsts, > { root_pt: FrameNumber, - // The frames allocated for this page table are not tracked with - // metadata [`crate::mm::frame::meta`]. Here is a record of it - // for deallocation. - frames: Vec, _pretend_to_use: core::marker::PhantomData<(E, C)>, } @@ -107,10 +107,19 @@ impl BootPageTable { /// Otherwise, It would lead to double-drop of the page table frames set up /// by the firmware, loader or the setup code. unsafe fn from_current_pt() -> Self { - let root_paddr = crate::arch::mm::current_page_table_paddr(); + let root_pt = crate::arch::mm::current_page_table_paddr() / C::BASE_PAGE_SIZE; + // Make sure the first available bit is not set for firmware page tables. + dfs_walk_on_leave::(root_pt, C::NR_LEVELS, &mut |pte: &mut E| { + let prop = pte.prop(); + if prop.flags.contains(PageFlags::AVAIL1) { + pte.set_prop(PageProperty::new( + prop.flags - PageFlags::AVAIL1, + prop.cache, + )); + } + }); Self { - root_pt: root_paddr / C::BASE_PAGE_SIZE, - frames: Vec::new(), + root_pt, _pretend_to_use: core::marker::PhantomData, } } @@ -139,9 +148,9 @@ impl BootPageTable { let pte_ptr = unsafe { (paddr_to_vaddr(pt * C::BASE_PAGE_SIZE) as *mut E).add(index) }; let pte = unsafe { pte_ptr.read() }; pt = if !pte.is_present() { - let frame = self.alloc_frame(); - unsafe { pte_ptr.write(E::new_pt(frame * C::BASE_PAGE_SIZE)) }; - frame + let pte = self.alloc_child(); + unsafe { pte_ptr.write(pte) }; + pte.paddr() / C::BASE_PAGE_SIZE } else if pte.is_last(level) { panic!("mapping an already mapped huge page in the boot page table"); } else { @@ -188,11 +197,11 @@ impl BootPageTable { panic!("protecting an unmapped page in the boot page table"); } else if pte.is_last(level) { // Split the huge page. - let frame = self.alloc_frame(); + let child_pte = self.alloc_child(); + let child_frame_pa = child_pte.paddr(); let huge_pa = pte.paddr(); for i in 0..nr_subpage_per_huge::() { - let nxt_ptr = - unsafe { (paddr_to_vaddr(frame * C::BASE_PAGE_SIZE) as *mut E).add(i) }; + let nxt_ptr = unsafe { (paddr_to_vaddr(child_frame_pa) as *mut E).add(i) }; unsafe { nxt_ptr.write(E::new_page( huge_pa + i * C::BASE_PAGE_SIZE, @@ -201,8 +210,8 @@ impl BootPageTable { )) }; } - unsafe { pte_ptr.write(E::new_pt(frame * C::BASE_PAGE_SIZE)) }; - frame + unsafe { pte_ptr.write(E::new_pt(child_frame_pa)) }; + child_frame_pa / C::BASE_PAGE_SIZE } else { pte.paddr() / C::BASE_PAGE_SIZE }; @@ -220,21 +229,55 @@ impl BootPageTable { unsafe { pte_ptr.write(E::new_page(pte.paddr(), 1, prop)) }; } - fn alloc_frame(&mut self) -> FrameNumber { + fn alloc_child(&mut self) -> E { let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap(); - self.frames.push(frame); // Zero it out. let vaddr = paddr_to_vaddr(frame * PAGE_SIZE) as *mut u8; unsafe { core::ptr::write_bytes(vaddr, 0, PAGE_SIZE) }; - frame + + let mut pte = E::new_pt(frame * C::BASE_PAGE_SIZE); + let prop = pte.prop(); + pte.set_prop(PageProperty::new( + prop.flags | PageFlags::AVAIL1, + prop.cache, + )); + + pte + } +} + +/// A helper function to walk on the page table frames. +/// +/// Once leaving a page table frame, the closure will be called with the PTE to +/// the frame. +fn dfs_walk_on_leave( + pt: FrameNumber, + level: PagingLevel, + op: &mut impl FnMut(&mut E), +) { + if level >= 2 { + let pt_vaddr = paddr_to_vaddr(pt * C::BASE_PAGE_SIZE) as *mut E; + let pt = unsafe { core::slice::from_raw_parts_mut(pt_vaddr, nr_subpage_per_huge::()) }; + for pte in pt { + if pte.is_present() && !pte.is_last(level) { + dfs_walk_on_leave::(pte.paddr() / C::BASE_PAGE_SIZE, level - 1, op); + op(pte) + } + } } } impl Drop for BootPageTable { fn drop(&mut self) { - for frame in &self.frames { - FRAME_ALLOCATOR.get().unwrap().lock().dealloc(*frame, 1); - } + dfs_walk_on_leave::(self.root_pt, C::NR_LEVELS, &mut |pte| { + if pte.prop().flags.contains(PageFlags::AVAIL1) { + let pt = pte.paddr() / C::BASE_PAGE_SIZE; + FRAME_ALLOCATOR.get().unwrap().lock().dealloc(pt, 1); + } + // Firmware provided page tables may be a DAG instead of a tree. + // Clear it to avoid double-free when we meet it the second time. + *pte = E::new_absent(); + }); } } @@ -255,7 +298,6 @@ fn test_boot_pt_map_protect() { let mut boot_pt = BootPageTable:: { root_pt: root_paddr / PagingConsts::BASE_PAGE_SIZE, - frames: Vec::new(), _pretend_to_use: core::marker::PhantomData, };