diff --git a/framework/aster-frame/src/mm/page_table/cursor.rs b/framework/aster-frame/src/mm/page_table/cursor.rs index 4a2710157..978e990c1 100644 --- a/framework/aster-frame/src/mm/page_table/cursor.rs +++ b/framework/aster-frame/src/mm/page_table/cursor.rs @@ -120,6 +120,7 @@ where if va.start % C::BASE_PAGE_SIZE != 0 || va.end % C::BASE_PAGE_SIZE != 0 { return Err(PageTableError::UnalignedVaddr); } + // Create a guard array that only hold the root node lock. let guards = core::array::from_fn(|i| { if i == 0 { @@ -136,25 +137,33 @@ where barrier_va: va.clone(), phantom: PhantomData, }; + // Go down and get proper locks. The cursor should hold a lock of a // page table node containing the virtual address range. // // While going down, previous guards of too-high levels will be released. loop { - let cur_pte = cursor.read_cur_pte(); let level_too_high = { let start_idx = pte_index::(va.start, cursor.level); let end_idx = pte_index::(va.end - 1, cursor.level); start_idx == end_idx }; - if !level_too_high || !cur_pte.is_present() || cur_pte.is_last(cursor.level) { + if !level_too_high { break; } + + let cur_pte = cursor.read_cur_pte(); + if !cur_pte.is_present() || cur_pte.is_last(cursor.level) { + break; + } + cursor.level_down(); + // Release the guard of the previous level. cursor.guards[(C::NR_LEVELS - cursor.level) as usize - 1] = None; cursor.guard_level -= 1; } + Ok(cursor) } @@ -163,9 +172,11 @@ where if self.va >= self.barrier_va.end { return None; } + loop { let level = self.level; let va = self.va; + let pte = self.read_cur_pte(); if !pte.is_present() { return Some(PageTableQueryResult::NotMapped { @@ -177,6 +188,7 @@ where self.level_down(); continue; } + match self.cur_child() { Child::Frame(frame) => { return Some(PageTableQueryResult::Mapped { @@ -228,6 +240,7 @@ where /// Goes down a level assuming a child page table exists. fn level_down(&mut self) { debug_assert!(self.level > 1); + if let Child::PageTable(nxt_lvl_frame) = self.cur_child() { self.level -= 1; self.guards[(C::NR_LEVELS - self.level) as usize] = Some(nxt_lvl_frame.lock()); @@ -324,6 +337,7 @@ where pub(crate) fn jump(&mut self, va: Vaddr) { assert!(self.0.barrier_va.contains(&va)); assert!(va % C::BASE_PAGE_SIZE == 0); + loop { let cur_node_start = self.0.va & !(page_size::(self.0.level + 1) - 1); let cur_node_end = cur_node_start + page_size::(self.0.level + 1); @@ -332,12 +346,14 @@ where self.0.va = va; return; } + // There is a corner case that the cursor is depleted, sitting at the start of the // next node but the next node is not locked because the parent is not locked. if self.0.va >= self.0.barrier_va.end && self.0.level == self.0.guard_level { self.0.va = va; return; } + debug_assert!(self.0.level < self.0.guard_level); self.0.level_up(); } @@ -360,6 +376,7 @@ where let end = self.0.va + frame.size(); assert!(end <= self.0.barrier_va.end); debug_assert!(!self.0.in_untracked_range()); + // Go down if not applicable. while self.0.level > C::HIGHEST_TRANSLATION_LEVEL || self.0.va % page_size::(self.0.level) != 0 @@ -376,9 +393,11 @@ where continue; } debug_assert_eq!(self.0.level, frame.level()); + // Map the current page. let idx = self.0.cur_idx(); self.cur_node_mut().set_child_frame(idx, frame, prop); + self.0.move_forward(); } @@ -413,6 +432,7 @@ where let end = self.0.va + pa.len(); let mut pa = pa.start; assert!(end <= self.0.barrier_va.end); + while self.0.va < end { // We ensure not mapping in reserved kernel shared tables or releasing it. // Although it may be an invariant for all architectures and will be optimized @@ -435,11 +455,13 @@ where } continue; } + // Map the current page. debug_assert!(self.0.in_untracked_range()); let idx = self.0.cur_idx(); - let level = self.0.level; self.cur_node_mut().set_child_untracked(idx, pa, prop); + + let level = self.0.level; pa += page_size::(level); self.0.move_forward(); } @@ -460,6 +482,7 @@ where let end = self.0.va + len; assert!(end <= self.0.barrier_va.end); assert!(end % C::BASE_PAGE_SIZE == 0); + while self.0.va < end { let cur_pte = self.0.read_cur_pte(); let untracked = self.0.in_untracked_range(); @@ -494,6 +517,7 @@ where // Unmap the current page. let idx = self.0.cur_idx(); self.cur_node_mut().unset_child(idx, untracked); + self.0.move_forward(); } } @@ -519,6 +543,7 @@ where ) -> Result<(), PageTableError> { let end = self.0.va + len; assert!(end <= self.0.barrier_va.end); + while self.0.va < end { let cur_pte = self.0.read_cur_pte(); if !cur_pte.is_present() { @@ -528,11 +553,13 @@ where self.0.move_forward(); continue; } + // Go down if it's not a last node. if !cur_pte.is_last(self.0.level) { self.0.level_down(); continue; } + // Go down if the page size is too big and we are protecting part // of untracked huge pages. let vaddr_not_fit = self.0.va % page_size::(self.0.level) != 0 @@ -543,10 +570,13 @@ where } else if vaddr_not_fit { return Err(PageTableError::ProtectingPartial); } - let idx = self.0.cur_idx(); + let mut pte_prop = cur_pte.prop(); op(&mut pte_prop); + + let idx = self.0.cur_idx(); self.cur_node_mut().protect(idx, pte_prop); + self.0.move_forward(); } Ok(()) @@ -559,10 +589,13 @@ where if self.0.guard_level != C::NR_LEVELS { return None; } + while self.0.level < C::NR_LEVELS { self.0.level_up(); } + self.0.guards[0].take() + // Ok to drop the cursor here because we ensure not to access the page table if the current // level is the root level when running the dropping method. } @@ -572,6 +605,7 @@ where /// This method will create a new child frame and go down to it. fn level_down_create(&mut self) { debug_assert!(self.0.level > 1); + let new_frame = PageTableNode::::alloc(self.0.level - 1); let idx = self.0.cur_idx(); let untracked = self.0.in_untracked_range(); @@ -587,8 +621,10 @@ where fn level_down_split(&mut self) { debug_assert!(self.0.level > 1); debug_assert!(self.0.in_untracked_range()); + let idx = self.0.cur_idx(); self.cur_node_mut().split_untracked_huge(idx); + let Child::PageTable(new_frame) = self.0.cur_child() else { unreachable!(); }; diff --git a/framework/aster-frame/src/mm/page_table/mod.rs b/framework/aster-frame/src/mm/page_table/mod.rs index d42cb1de1..fe0566276 100644 --- a/framework/aster-frame/src/mm/page_table/mod.rs +++ b/framework/aster-frame/src/mm/page_table/mod.rs @@ -101,6 +101,7 @@ impl PageTable { /// TODO: We may consider making the page table itself copy-on-write. pub(crate) fn fork_copy_on_write(&self) -> Self { let mut cursor = self.cursor_mut(&UserMode::VADDR_RANGE).unwrap(); + // SAFETY: Protecting the user page table is safe. unsafe { cursor @@ -111,7 +112,9 @@ impl PageTable { ) .unwrap(); }; + let root_frame = cursor.leak_root_guard().unwrap(); + const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); let new_root_frame = unsafe { root_frame.make_copy( @@ -119,6 +122,7 @@ impl PageTable { NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE, ) }; + PageTable:: { root: new_root_frame.into_raw(), _phantom: PhantomData, @@ -136,9 +140,11 @@ impl PageTable { /// other child page tables. pub(crate) fn create_user_page_table(&self) -> PageTable { let root_frame = self.root.clone_shallow().lock(); + const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); let new_root_frame = unsafe { root_frame.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) }; + PageTable:: { root: new_root_frame.into_raw(), _phantom: PhantomData, @@ -152,11 +158,14 @@ impl PageTable { /// instead of the virtual address range. pub(crate) fn make_shared_tables(&self, root_index: Range) { const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); + let start = root_index.start; debug_assert!(start >= NR_PTES_PER_NODE / 2); debug_assert!(start < NR_PTES_PER_NODE); + let end = root_index.end; debug_assert!(end <= NR_PTES_PER_NODE); + let mut root_frame = self.root.clone_shallow().lock(); for i in start..end { if !root_frame.read_pte(i).is_present() { @@ -298,10 +307,12 @@ pub(super) unsafe fn page_walk( if !cur_pte.is_present() { return None; } + if cur_pte.is_last(cur_level) { debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL); break; } + cur_level -= 1; cur_pte = { let frame_addr = paddr_to_vaddr(cur_pte.paddr()); diff --git a/framework/aster-frame/src/mm/page_table/node.rs b/framework/aster-frame/src/mm/page_table/node.rs index 071d1b0d0..4a3056511 100644 --- a/framework/aster-frame/src/mm/page_table/node.rs +++ b/framework/aster-frame/src/mm/page_table/node.rs @@ -75,6 +75,7 @@ where // count is needed. let page = unsafe { Page::>::from_raw(self.paddr()) }; debug_assert!(page.meta().level == self.level); + // Acquire the lock. while page .meta() @@ -84,14 +85,17 @@ where { core::hint::spin_loop(); } + // Prevent dropping the handle. let _ = ManuallyDrop::new(self); + PageTableNode:: { page } } /// Creates a copy of the handle. pub(super) fn clone_shallow(&self) -> Self { self.inc_ref(); + Self { raw: self.raw, level: self.level, @@ -144,8 +148,11 @@ where /// with [`Self::activate()`] in other senses. pub(super) unsafe fn first_activate(&self) { use crate::{arch::mm::activate_page_table, mm::CachePolicy}; + debug_assert_eq!(self.level, PagingConsts::NR_LEVELS); + self.inc_ref(); + activate_page_table(self.raw, CachePolicy::Writeback); } @@ -211,6 +218,7 @@ where pub(super) fn alloc(level: PagingLevel) -> Self { let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap() * PAGE_SIZE; let mut page = Page::>::from_unused(frame); + // The lock is initialized as held. page.meta().lock.store(1, Ordering::Relaxed); @@ -235,8 +243,10 @@ where pub(super) fn into_raw(self) -> RawPageTableNode { let level = self.level(); let raw = self.page.paddr(); + self.page.meta().lock.store(0, Ordering::Release); core::mem::forget(self); + RawPageTableNode { raw, level, @@ -247,6 +257,7 @@ where /// Gets a raw handle while still preserving the original handle. pub(super) fn clone_raw(&self) -> RawPageTableNode { core::mem::forget(self.page.clone()); + RawPageTableNode { raw: self.page.paddr(), level: self.level(), @@ -257,6 +268,7 @@ where /// Gets an extra reference of the child at the given index. pub(super) fn child(&self, idx: usize, tracked: bool) -> Child { debug_assert!(idx < nr_subpage_per_huge::()); + let pte = self.read_pte(idx); if !pte.is_present() { Child::None @@ -301,10 +313,12 @@ where /// /// The ranges must be disjoint. pub(super) unsafe fn make_copy(&self, deep: Range, shallow: Range) -> Self { - let mut new_frame = Self::alloc(self.level()); debug_assert!(deep.end <= nr_subpage_per_huge::()); debug_assert!(shallow.end <= nr_subpage_per_huge::()); debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end); + + let mut new_frame = Self::alloc(self.level()); + for i in deep { match self.child(i, /*meaningless*/ true) { Child::PageTable(pt) => { @@ -322,6 +336,7 @@ where } } } + for i in shallow { debug_assert_eq!(self.level(), C::NR_LEVELS); match self.child(i, /*meaningless*/ true) { @@ -334,12 +349,14 @@ where } } } + new_frame } /// Removes a child if the child at the given index is present. pub(super) fn unset_child(&mut self, idx: usize, in_untracked_range: bool) { debug_assert!(idx < nr_subpage_per_huge::()); + self.overwrite_pte(idx, None, in_untracked_range); } @@ -353,6 +370,7 @@ where // They should be ensured by the cursor. debug_assert!(idx < nr_subpage_per_huge::()); debug_assert_eq!(pt.level, self.level() - 1); + let pte = Some(E::new_pt(pt.paddr())); self.overwrite_pte(idx, pte, in_untracked_range); // The ownership is transferred to a raw PTE. Don't drop the handle. @@ -364,6 +382,7 @@ where // They should be ensured by the cursor. debug_assert!(idx < nr_subpage_per_huge::()); debug_assert_eq!(frame.level(), self.level()); + let pte = Some(E::new_frame(frame.start_paddr(), self.level(), prop)); self.overwrite_pte(idx, pte, false); // The ownership is transferred to a raw PTE. Don't drop the handle. @@ -378,6 +397,7 @@ where pub(super) unsafe fn set_child_untracked(&mut self, idx: usize, pa: Paddr, prop: PageProperty) { // It should be ensured by the cursor. debug_assert!(idx < nr_subpage_per_huge::()); + let pte = Some(E::new_frame(pa, self.level(), prop)); self.overwrite_pte(idx, pte, true); } @@ -397,6 +417,7 @@ where panic!("`split_untracked_huge` not called on an untracked huge page"); }; let prop = self.read_pte_prop(idx); + let mut new_frame = PageTableNode::::alloc(self.level() - 1); for i in 0..nr_subpage_per_huge::() { let small_pa = pa + i * page_size::(self.level() - 1); @@ -404,6 +425,7 @@ where // the property are valid. unsafe { new_frame.set_child_untracked(i, small_pa, prop) }; } + self.set_child_pt(idx, new_frame.into_raw(), true); } @@ -411,7 +433,9 @@ where pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) { let mut pte = self.read_pte(idx); debug_assert!(pte.is_present()); // This should be ensured by the cursor. + pte.set_prop(prop); + // SAFETY: the index is within the bound and the PTE is valid. unsafe { (self.as_ptr() as *mut E).add(idx).write(pte); @@ -421,6 +445,7 @@ where pub(super) fn read_pte(&self, idx: usize) -> E { // It should be ensured by the cursor. debug_assert!(idx < nr_subpage_per_huge::()); + // SAFETY: the index is within the bound and PTE is plain-old-data. unsafe { self.as_ptr().add(idx).read() } } @@ -438,6 +463,7 @@ where /// memory if the child is a page table. fn overwrite_pte(&mut self, idx: usize, pte: Option, in_untracked_range: bool) { let existing_pte = self.read_pte(idx); + if existing_pte.is_present() { // SAFETY: The index is within the bound and the address is aligned. // The validity of the PTE is checked within this module. @@ -501,6 +527,7 @@ where fn on_drop(page: &mut Page) { let paddr = page.paddr(); let level = page.meta().level; + // Drop the children. for i in 0..nr_subpage_per_huge::() { // SAFETY: The index is within the bound and PTE is plain-old-data. The @@ -525,6 +552,7 @@ where } } } + // Recycle this page table node. FRAME_ALLOCATOR .get()