From e1e7afe0ca0c1511e666ef53dece74aed8af9c55 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Thu, 3 Oct 2024 02:11:07 +0800 Subject: [PATCH] Use `node::Entry` to optimize page table cursor operations --- ostd/src/mm/page/mod.rs | 52 ++-- ostd/src/mm/page_table/cursor.rs | 383 ++++++++++++--------------- ostd/src/mm/page_table/mod.rs | 13 +- ostd/src/mm/page_table/node/child.rs | 105 +++++--- ostd/src/mm/page_table/node/entry.rs | 161 +++++++++++ ostd/src/mm/page_table/node/mod.rs | 217 +++++++-------- 6 files changed, 516 insertions(+), 415 deletions(-) create mode 100644 ostd/src/mm/page_table/node/entry.rs diff --git a/ostd/src/mm/page/mod.rs b/ostd/src/mm/page/mod.rs index 9c4004df9..336199756 100644 --- a/ostd/src/mm/page/mod.rs +++ b/ostd/src/mm/page/mod.rs @@ -119,25 +119,6 @@ impl Page { } } - /// Increase the reference count of the page by one. - /// - /// # Safety - /// - /// The physical address must represent a valid page. - /// - /// And the caller must ensure the metadata slot pointed through the corresponding - /// virtual address is initialized by holding a reference count of the page firstly. - /// Otherwise the function may add a reference count to an unused page. - pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) { - debug_assert!(paddr % PAGE_SIZE == 0); - debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr); - let vaddr: Vaddr = mapping::page_to_meta::(paddr); - // SAFETY: The virtual address points to an initialized metadata slot. - (*(vaddr as *const MetaSlot)) - .ref_count - .fetch_add(1, Ordering::Relaxed); - } - /// Get the physical address. pub fn paddr(&self) -> Paddr { mapping::meta_to_page::(self.ptr as Vaddr) @@ -248,20 +229,6 @@ impl DynPage { Self { ptr } } - /// Increase the reference count of the page by one. - /// - /// # Safety - /// - /// This is the same as [`Page::inc_ref_count`]. - pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) { - debug_assert!(paddr % PAGE_SIZE == 0); - debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr); - let vaddr: Vaddr = mapping::page_to_meta::(paddr); - (*(vaddr as *const MetaSlot)) - .ref_count - .fetch_add(1, Ordering::Relaxed); - } - /// Get the physical address of the start of the page pub fn paddr(&self) -> Paddr { mapping::meta_to_page::(self.ptr as Vaddr) @@ -363,3 +330,22 @@ impl Drop for DynPage { } } } + +/// Increases the reference count of the page by one. +/// +/// # Safety +/// +/// The caller should ensure the following conditions: +/// 1. The physical address must represent a valid page; +/// 2. The caller must have already held a reference to the page. +pub(in crate::mm) unsafe fn inc_page_ref_count(paddr: Paddr) { + debug_assert!(paddr % PAGE_SIZE == 0); + debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr); + + let vaddr: Vaddr = mapping::page_to_meta::(paddr); + // SAFETY: The virtual address points to an initialized metadata slot. + let slot = unsafe { &*(vaddr as *const MetaSlot) }; + let old = slot.ref_count.fetch_add(1, Ordering::Relaxed); + + debug_assert!(old > 0); +} diff --git a/ostd/src/mm/page_table/cursor.rs b/ostd/src/mm/page_table/cursor.rs index d3f3a05fe..29e61895a 100644 --- a/ostd/src/mm/page_table/cursor.rs +++ b/ostd/src/mm/page_table/cursor.rs @@ -70,7 +70,7 @@ use core::{any::TypeId, marker::PhantomData, ops::Range}; use align_ext::AlignExt; use super::{ - page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError, + page_size, pte_index, Child, Entry, KernelMode, PageTable, PageTableEntryTrait, PageTableError, PageTableMode, PageTableNode, PagingConstsTrait, PagingLevel, UserMode, }; use crate::{ @@ -138,6 +138,7 @@ where va: Vaddr, /// The virtual address range that is locked. barrier_va: Range, + #[allow(dead_code)] preempt_guard: DisabledPreemptGuard, _phantom: PhantomData<&'a PageTable>, } @@ -194,12 +195,15 @@ where break; } - let cur_pte = cursor.read_cur_pte(); - if !cur_pte.is_present() || cur_pte.is_last(cursor.level) { + let entry = cursor.cur_entry(); + if !entry.is_node() { break; } + let Child::PageTable(child_pt) = entry.to_owned() else { + unreachable!("Already checked"); + }; - cursor.level_down(); + cursor.push_level(child_pt.lock()); // Release the guard of the previous (upper) level. cursor.guards[cursor.level as usize] = None; @@ -219,9 +223,9 @@ where let level = self.level; let va = self.va; - match self.cur_child() { - Child::PageTable(_) => { - self.level_down(); + match self.cur_entry().to_owned() { + Child::PageTable(pt) => { + self.push_level(pt.lock()); continue; } Child::None => { @@ -254,7 +258,7 @@ where let page_size = page_size::(self.level); let next_va = self.va.align_down(page_size) + page_size; while self.level < self.guard_level && pte_index::(next_va, self.level) == 0 { - self.level_up(); + self.pop_level(); } self.va = next_va; } @@ -288,7 +292,7 @@ where } debug_assert!(self.level < self.guard_level); - self.level_up(); + self.pop_level(); } } @@ -296,36 +300,26 @@ where self.va } - pub fn preempt_guard(&self) -> &DisabledPreemptGuard { - &self.preempt_guard - } - - /// Goes up a level. We release the current page if it has no mappings since the cursor only moves - /// forward. And if needed we will do the final cleanup using this method after re-walk when the - /// cursor is dropped. + /// Goes up a level. /// - /// This method requires locks acquired before calling it. The discarded level will be unlocked. - fn level_up(&mut self) { + /// We release the current page if it has no mappings since the cursor + /// only moves forward. And if needed we will do the final cleanup using + /// this method after re-walk when the cursor is dropped. + /// + /// This method requires locks acquired before calling it. The discarded + /// level will be unlocked. + fn pop_level(&mut self) { self.guards[(self.level - 1) as usize] = None; self.level += 1; // TODO: Drop page tables if page tables become empty. } - /// Goes down a level assuming a child page table exists. - fn level_down(&mut self) { - debug_assert!(self.level > 1); - - let Child::PageTable(nxt_lvl_ptn) = self.cur_child() else { - panic!("Trying to level down when it is not mapped to a page table"); - }; - - let nxt_lvl_ptn_locked = nxt_lvl_ptn.lock(); - + /// Goes down a level to a child page table. + fn push_level(&mut self, child_pt: PageTableNode) { self.level -= 1; - debug_assert_eq!(self.level, nxt_lvl_ptn_locked.level()); - - self.guards[(self.level - 1) as usize] = Some(nxt_lvl_ptn_locked); + debug_assert_eq!(self.level, child_pt.level()); + self.guards[(self.level - 1) as usize] = Some(child_pt); } fn should_map_as_tracked(&self) -> bool { @@ -334,20 +328,9 @@ where && should_map_as_tracked(self.va) } - fn cur_node(&self) -> &PageTableNode { - self.guards[(self.level - 1) as usize].as_ref().unwrap() - } - - fn cur_idx(&self) -> usize { - pte_index::(self.va, self.level) - } - - fn cur_child(&self) -> Child { - self.cur_node().child(self.cur_idx()) - } - - fn read_cur_pte(&self) -> E { - self.cur_node().read_pte(self.cur_idx()) + fn cur_entry(&mut self) -> Entry<'_, E, C> { + let node = self.guards[(self.level - 1) as usize].as_mut().unwrap(); + node.entry(pte_index::(self.va, self.level)) } } @@ -445,24 +428,31 @@ where || self.0.va + page_size::(self.0.level) > end { debug_assert!(self.0.should_map_as_tracked()); - - let pte = self.0.read_cur_pte(); - if pte.is_present() && !pte.is_last(self.0.level) { - self.0.level_down(); - } else if !pte.is_present() { - self.level_down_create(); - } else { - panic!("Mapping a smaller page in an already mapped huge page"); + let cur_level = self.0.level; + let cur_entry = self.0.cur_entry(); + match cur_entry.to_owned() { + Child::PageTable(pt) => { + self.0.push_level(pt.lock()); + } + Child::None => { + let pt = + PageTableNode::::alloc(cur_level - 1, MapTrackingStatus::Tracked); + let _ = cur_entry.replace(Child::PageTable(pt.clone_raw())); + self.0.push_level(pt); + } + Child::Page(_, _) => { + panic!("Mapping a smaller page in an already mapped huge page"); + } + Child::Untracked(_, _, _) => { + panic!("Mapping a tracked page in an untracked range"); + } } continue; } debug_assert_eq!(self.0.level, page.level()); // Map the current page. - let idx = self.0.cur_idx(); - let old = self - .cur_node_mut() - .replace_child(idx, Child::Page(page, prop)); + let old = self.0.cur_entry().replace(Child::Page(page, prop)); self.0.move_forward(); match old { @@ -519,26 +509,40 @@ where || self.0.va + page_size::(self.0.level) > end || pa % page_size::(self.0.level) != 0 { - let pte = self.0.read_cur_pte(); - if pte.is_present() && !pte.is_last(self.0.level) { - self.0.level_down(); - } else if !pte.is_present() { - self.level_down_create(); - } else { - self.level_down_split(); + let cur_level = self.0.level; + let cur_entry = self.0.cur_entry(); + match cur_entry.to_owned() { + Child::PageTable(pt) => { + self.0.push_level(pt.lock()); + } + Child::None => { + let pt = PageTableNode::::alloc( + cur_level - 1, + MapTrackingStatus::Untracked, + ); + let _ = cur_entry.replace(Child::PageTable(pt.clone_raw())); + self.0.push_level(pt); + } + Child::Page(_, _) => { + panic!("Mapping a smaller page in an already mapped huge page"); + } + Child::Untracked(_, _, _) => { + let split_child = cur_entry.split_if_untracked_huge().unwrap(); + self.0.push_level(split_child); + } } continue; } // Map the current page. debug_assert!(!self.0.should_map_as_tracked()); - let idx = self.0.cur_idx(); let level = self.0.level; let _ = self - .cur_node_mut() - .replace_child(idx, Child::Untracked(pa, level, prop)); + .0 + .cur_entry() + .replace(Child::Untracked(pa, level, prop)); - let level = self.0.level; + // Move forward. pa += page_size::(level); self.0.move_forward(); } @@ -575,10 +579,12 @@ where assert!(end <= self.0.barrier_va.end); while self.0.va < end { - let cur_pte = self.0.read_cur_pte(); + let cur_va = self.0.va; + let cur_level = self.0.level; + let cur_entry = self.0.cur_entry(); // Skip if it is already absent. - if !cur_pte.is_present() { + if cur_entry.is_none() { if self.0.va + page_size::(self.0.level) > end { self.0.va = end; break; @@ -587,54 +593,53 @@ where continue; } - if self.0.va % page_size::(self.0.level) != 0 - || self.0.va + page_size::(self.0.level) > end - { - if cur_pte.is_last(self.0.level) { - if !self.0.should_map_as_tracked() { - // Level down if we are removing part of a huge untracked page. - self.level_down_split(); - continue; - } else { - panic!("removing part of a huge page"); + // Go down if not applicable. + if cur_va % page_size::(cur_level) != 0 || cur_va + page_size::(cur_level) > end { + let child = cur_entry.to_owned(); + match child { + Child::PageTable(pt) => { + let pt = pt.lock(); + // If there's no mapped PTEs in the next level, we can + // skip to save time. + if pt.nr_children() != 0 { + self.0.push_level(pt); + } else { + if self.0.va + page_size::(self.0.level) > end { + self.0.va = end; + break; + } + self.0.move_forward(); + } + } + Child::None => { + unreachable!("Already checked"); + } + Child::Page(_, _) => { + panic!("Removing part of a huge page"); + } + Child::Untracked(_, _, _) => { + let split_child = cur_entry.split_if_untracked_huge().unwrap(); + self.0.push_level(split_child); } - } - - // Level down if the current PTE points to a page table and we cannot - // unmap this page table node entirely. - self.0.level_down(); - - // We have got down a level. If there's no mapped PTEs in - // the current node, we can go back and skip to save time. - if self.0.guards[(self.0.level - 1) as usize] - .as_ref() - .unwrap() - .nr_children() - == 0 - { - self.0.level_up(); - self.0.move_forward(); } continue; } // Unmap the current page and return it. - let idx = self.0.cur_idx(); - let ret = self.cur_node_mut().replace_child(idx, Child::None); - let ret_page_va = self.0.va; + let old = cur_entry.replace(Child::None); self.0.move_forward(); - return match ret { + return match old { Child::Page(page, prop) => PageTableItem::Mapped { - va: ret_page_va, + va: self.0.va, page, prop, }, Child::Untracked(pa, level, prop) => { debug_assert_eq!(level, self.0.level); PageTableItem::MappedUntracked { - va: ret_page_va, + va: self.0.va, pa, len: page_size::(level), prop, @@ -684,51 +689,46 @@ where assert!(end <= self.0.barrier_va.end); while self.0.va < end { - let cur_pte = self.0.read_cur_pte(); - if !cur_pte.is_present() { + let cur_va = self.0.va; + let cur_level = self.0.level; + let mut cur_entry = self.0.cur_entry(); + + // Skip if it is already absent. + if cur_entry.is_none() { self.0.move_forward(); continue; } - // Go down if it's not a last node. - if !cur_pte.is_last(self.0.level) { - self.0.level_down(); - - // We have got down a level. If there's no mapped PTEs in - // the current node, we can go back and skip to save time. - if self.0.guards[(self.0.level - 1) as usize] - .as_ref() - .unwrap() - .nr_children() - == 0 - { - self.0.level_up(); + // Go down if it's not a last entry. + if cur_entry.is_node() { + let Child::PageTable(pt) = cur_entry.to_owned() else { + unreachable!("Already checked"); + }; + let pt = pt.lock(); + // If there's no mapped PTEs in the next level, we can + // skip to save time. + if pt.nr_children() != 0 { + self.0.push_level(pt); + } else { self.0.move_forward(); } - continue; } // Go down if the page size is too big and we are protecting part // of untracked huge pages. - if self.0.va % page_size::(self.0.level) != 0 - || self.0.va + page_size::(self.0.level) > end - { - if self.0.should_map_as_tracked() { - panic!("protecting part of a huge page"); - } else { - self.level_down_split(); - continue; - } + if cur_va % page_size::(cur_level) != 0 || cur_va + page_size::(cur_level) > end { + let split_child = cur_entry + .split_if_untracked_huge() + .expect("Protecting part of a huge page"); + self.0.push_level(split_child); + continue; } - let mut pte_prop = cur_pte.prop(); - op(&mut pte_prop); + // Protect the current page. + cur_entry.protect(op); - let idx = self.0.cur_idx(); - self.cur_node_mut().protect(idx, pte_prop); let protected_va = self.0.va..self.0.va + page_size::(self.0.level); - self.0.move_forward(); return Some(protected_va); @@ -777,95 +777,46 @@ where assert!(src_end <= src.0.barrier_va.end); while self.0.va < this_end && src.0.va < src_end { - let cur_pte = src.0.read_cur_pte(); - if !cur_pte.is_present() { - src.0.move_forward(); - continue; - } + let src_va = src.0.va; + let mut src_entry = src.0.cur_entry(); - // Go down if it's not a last node. - if !cur_pte.is_last(src.0.level) { - src.0.level_down(); + match src_entry.to_owned() { + Child::PageTable(pt) => { + let pt = pt.lock(); + // If there's no mapped PTEs in the next level, we can + // skip to save time. + if pt.nr_children() != 0 { + src.0.push_level(pt); + } else { + src.0.move_forward(); + } + continue; + } + Child::None => { + src.0.move_forward(); + continue; + } + Child::Untracked(_, _, _) => { + panic!("Copying untracked mappings"); + } + Child::Page(page, mut prop) => { + let mapped_page_size = page.size(); - // We have got down a level. If there's no mapped PTEs in - // the current node, we can go back and skip to save time. - if src.0.guards[(src.0.level - 1) as usize] - .as_ref() - .unwrap() - .nr_children() - == 0 - { - src.0.level_up(); + // Do protection. + src_entry.protect(op); + + // Do copy. + op(&mut prop); + self.jump(src_va).unwrap(); + let original = self.map(page, prop); + assert!(original.is_none()); + + // Only move the source cursor forward since `Self::map` will do it. + // This assertion is to ensure that they move by the same length. + debug_assert_eq!(mapped_page_size, page_size::(src.0.level)); src.0.move_forward(); } - - continue; } - - // Do protection. - let mut pte_prop = cur_pte.prop(); - op(&mut pte_prop); - - let idx = src.0.cur_idx(); - src.cur_node_mut().protect(idx, pte_prop); - - // Do copy. - let child = src.cur_node_mut().child(idx); - let Child::::Page(page, prop) = child else { - panic!("Unexpected child for source mapping: {:#?}", child); - }; - self.jump(src.0.va).unwrap(); - let mapped_page_size = page.size(); - let original = self.map(page, prop); - debug_assert!(original.is_none()); - - // Only move the source cursor forward since `Self::map` will do it. - // This assertion is to ensure that they move by the same length. - debug_assert_eq!(mapped_page_size, page_size::(src.0.level)); - src.0.move_forward(); } } - - /// Goes down a level assuming the current slot is absent. - /// - /// This method will create a new child page table node and go down to it. - fn level_down_create(&mut self) { - debug_assert!(self.0.level > 1); - let new_node = PageTableNode::::alloc( - self.0.level - 1, - if self.0.should_map_as_tracked() { - MapTrackingStatus::Tracked - } else { - MapTrackingStatus::Untracked - }, - ); - let idx = self.0.cur_idx(); - let old = self - .cur_node_mut() - .replace_child(idx, Child::PageTable(new_node.clone_raw())); - debug_assert!(old.is_none()); - self.0.level -= 1; - self.0.guards[(self.0.level - 1) as usize] = Some(new_node); - } - - /// Goes down a level assuming the current slot is an untracked huge page. - /// - /// This method will split the huge page and go down to the next level. - fn level_down_split(&mut self) { - debug_assert!(self.0.level > 1); - debug_assert!(!self.0.should_map_as_tracked()); - - let idx = self.0.cur_idx(); - self.cur_node_mut().split_untracked_huge(idx); - - let Child::PageTable(new_node) = self.0.cur_child() else { - unreachable!(); - }; - self.0.level -= 1; - self.0.guards[(self.0.level - 1) as usize] = Some(new_node.lock()); - } - - fn cur_node_mut(&mut self) -> &mut PageTableNode { - self.0.guards[(self.0.level - 1) as usize].as_mut().unwrap() - } } diff --git a/ostd/src/mm/page_table/mod.rs b/ostd/src/mm/page_table/mod.rs index 24a4cc8a8..2aec94160 100644 --- a/ostd/src/mm/page_table/mod.rs +++ b/ostd/src/mm/page_table/mod.rs @@ -100,7 +100,7 @@ impl PageTable { /// This should be the only way to create the user page table, that is to /// duplicate the kernel page table with all the kernel mappings shared. pub fn create_user_page_table(&self) -> PageTable { - let root_node = self.root.clone_shallow().lock(); + let mut root_node = self.root.clone_shallow().lock(); let mut new_node = PageTableNode::alloc(PagingConsts::NR_LEVELS, MapTrackingStatus::NotApplicable); @@ -108,9 +108,9 @@ impl PageTable { // The user space range is not copied. const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE { - let child = root_node.child(i); - if !child.is_none() { - let _ = new_node.replace_child(i, child); + let root_entry = root_node.entry(i); + if !root_entry.is_none() { + let _ = new_node.entry(i).replace(root_entry.to_owned()); } } @@ -137,7 +137,8 @@ impl PageTable { let mut root_node = self.root.clone_shallow().lock(); for i in start..end { - if !root_node.read_pte(i).is_present() { + let root_entry = root_node.entry(i); + if root_entry.is_none() { let nxt_level = PagingConsts::NR_LEVELS - 1; let is_tracked = if super::kspace::should_map_as_tracked( i * page_size::(nxt_level), @@ -147,7 +148,7 @@ impl PageTable { MapTrackingStatus::Untracked }; let node = PageTableNode::alloc(nxt_level, is_tracked); - let _ = root_node.replace_child(i, Child::PageTable(node.into_raw())); + let _ = root_entry.replace(Child::PageTable(node.into_raw())); } } } diff --git a/ostd/src/mm/page_table/node/child.rs b/ostd/src/mm/page_table/node/child.rs index f1f5688fe..4a6e9bd4d 100644 --- a/ostd/src/mm/page_table/node/child.rs +++ b/ostd/src/mm/page_table/node/child.rs @@ -8,10 +8,7 @@ use super::{PageTableEntryTrait, RawPageTableNode}; use crate::{ arch::mm::{PageTableEntry, PagingConsts}, mm::{ - page::{ - meta::{MapTrackingStatus, PageTablePageMeta}, - DynPage, Page, - }, + page::{inc_page_ref_count, meta::MapTrackingStatus, DynPage}, page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, }, @@ -45,6 +42,27 @@ where matches!(self, Child::None) } + /// Returns whether the child is compatible with the given node. + /// + /// In other words, it checks whether the child can be a child of a node + /// with the given level and tracking status. + pub(super) fn is_compatible( + &self, + node_level: PagingLevel, + is_tracked: MapTrackingStatus, + ) -> bool { + match self { + Child::PageTable(pt) => node_level == pt.level() + 1, + Child::Page(p, _) => { + node_level == p.level() && is_tracked == MapTrackingStatus::Tracked + } + Child::Untracked(_, level, _) => { + node_level == *level && is_tracked == MapTrackingStatus::Untracked + } + Child::None => true, + } + } + /// Converts a child into a owning PTE. /// /// By conversion it loses information about whether the page is tracked @@ -74,8 +92,10 @@ where /// # Safety /// /// The provided PTE must be originated from [`Child::into_pte`]. And the - /// provided information (level and tracking status) must align with the - /// lost information during the conversion. + /// provided information (level and tracking status) must be the same with + /// the lost information during the conversion. Strictly speaking, the + /// provided arguments must be compatible with the original child ( + /// specified by [`Child::is_compatible`]). /// /// This method should be only used no more than once for a PTE that has /// been converted from a child using the [`Child::into_pte`] method. @@ -85,18 +105,25 @@ where is_tracked: MapTrackingStatus, ) -> Self { if !pte.is_present() { - Child::None - } else { - let paddr = pte.paddr(); - if !pte.is_last(level) { - Child::PageTable(RawPageTableNode::from_paddr(paddr)) - } else { - match is_tracked { - MapTrackingStatus::Tracked => Child::Page(DynPage::from_raw(paddr), pte.prop()), - MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), - MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"), - } + return Child::None; + } + + let paddr = pte.paddr(); + + if !pte.is_last(level) { + // SAFETY: The physical address points to a valid page table node + // at the given level. + return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) }); + } + + match is_tracked { + MapTrackingStatus::Tracked => { + // SAFETY: The physical address points to a valid page. + let page = unsafe { DynPage::from_raw(paddr) }; + Child::Page(page, pte.prop()) } + MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), + MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"), } } @@ -104,9 +131,8 @@ where /// /// # Safety /// - /// The provided PTE must be originated from [`Child::into_pte`]. And the - /// provided information (level and tracking status) must align with the - /// lost information during the conversion. + /// The provided PTE must be originated from [`Child::into_pte`], which is + /// the same requirement as the [`Child::from_pte`] method. /// /// This method must not be used with a PTE that has been restored to a /// child using the [`Child::from_pte`] method. @@ -116,22 +142,31 @@ where is_tracked: MapTrackingStatus, ) -> Self { if !pte.is_present() { - Child::None - } else { - let paddr = pte.paddr(); - if !pte.is_last(level) { - Page::>::inc_ref_count(paddr); - Child::PageTable(RawPageTableNode::from_paddr(paddr)) - } else { - match is_tracked { - MapTrackingStatus::Tracked => { - DynPage::inc_ref_count(paddr); - Child::Page(DynPage::from_raw(paddr), pte.prop()) - } - MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), - MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"), - } + return Child::None; + } + + let paddr = pte.paddr(); + + if !pte.is_last(level) { + // SAFETY: The physical address is valid and the PTE already owns + // the reference to the page. + unsafe { inc_page_ref_count(paddr) }; + // SAFETY: The physical address points to a valid page table node + // at the given level. + return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) }); + } + + match is_tracked { + MapTrackingStatus::Tracked => { + // SAFETY: The physical address is valid and the PTE already owns + // the reference to the page. + unsafe { inc_page_ref_count(paddr) }; + // SAFETY: The physical address points to a valid page. + let page = unsafe { DynPage::from_raw(paddr) }; + Child::Page(page, pte.prop()) } + MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), + MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"), } } } diff --git a/ostd/src/mm/page_table/node/entry.rs b/ostd/src/mm/page_table/node/entry.rs new file mode 100644 index 000000000..e81beb53c --- /dev/null +++ b/ostd/src/mm/page_table/node/entry.rs @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module provides accessors to the page table entries in a node. + +use super::{Child, PageTableEntryTrait, PageTableNode}; +use crate::mm::{ + nr_subpage_per_huge, page::meta::MapTrackingStatus, page_prop::PageProperty, page_size, + PagingConstsTrait, +}; + +/// A view of an entry in a page table node. +/// +/// It can be borrowed from a node using the [`PageTableNode::entry`] method. +/// +/// This is a static reference to an entry in a node that does not account for +/// a dynamic reference count to the child. It can be used to create a owned +/// handle, which is a [`Child`]. +pub(in crate::mm) struct Entry<'a, E: PageTableEntryTrait, C: PagingConstsTrait> +where + [(); C::NR_LEVELS as usize]:, +{ + /// The page table entry. + /// + /// We store the page table entry here to optimize the number of reads from + /// the node. We cannot hold a `&mut E` reference to the entry because that + /// other CPUs may modify the memory location for accessed/dirty bits. Such + /// accesses will violate the aliasing rules of Rust and cause undefined + /// behaviors. + pte: E, + /// The index of the entry in the node. + idx: usize, + /// The node that contains the entry. + node: &'a mut PageTableNode, +} + +impl<'a, E: PageTableEntryTrait, C: PagingConstsTrait> Entry<'a, E, C> +where + [(); C::NR_LEVELS as usize]:, +{ + /// Returns if the entry does not map to anything. + pub(in crate::mm) fn is_none(&self) -> bool { + !self.pte.is_present() + } + + /// Returns if the entry maps to a page table node. + pub(in crate::mm) fn is_node(&self) -> bool { + self.pte.is_present() && !self.pte.is_last(self.node.level()) + } + + /// Gets a owned handle to the child. + pub(in crate::mm) fn to_owned(&self) -> Child { + // SAFETY: The entry structure represents an existent entry with the + // right node information. + unsafe { Child::clone_from_pte(&self.pte, self.node.level(), self.node.is_tracked()) } + } + + /// Operates on the mapping properties of the entry. + /// + /// It only modifies the properties if the entry is present. + // FIXME: in x86_64, you can protect a page with neither of the RWX + // permissions. This would make the page not accessible and leaked. Such a + // behavior is memory-safe but wrong. In RISC-V there's no problem. + pub(in crate::mm) fn protect(&mut self, op: &mut impl FnMut(&mut PageProperty)) { + if !self.pte.is_present() { + return; + } + + let prop = self.pte.prop(); + let mut new_prop = prop; + op(&mut new_prop); + + if prop == new_prop { + return; + } + + self.pte.set_prop(new_prop); + + // SAFETY: + // 1. The index is within the bounds. + // 2. We replace the PTE with a new one, which differs only in + // `PageProperty`, so it is still compatible with the current + // page table node. + unsafe { self.node.write_pte(self.idx, self.pte) }; + } + + /// Replaces the entry with a new child. + /// + /// The old child is returned. + /// + /// # Panics + /// + /// The method panics if the given child is not compatible with the node. + /// The compatibility is specified by the [`Child::is_compatible`]. + pub(in crate::mm) fn replace(self, new_child: Child) -> Child { + assert!(new_child.is_compatible(self.node.level(), self.node.is_tracked())); + + // SAFETY: The entry structure represents an existent entry with the + // right node information. The old PTE is overwritten by the new child + // so that it is not used anymore. + let old_child = + unsafe { Child::from_pte(self.pte, self.node.level(), self.node.is_tracked()) }; + + if old_child.is_none() && !new_child.is_none() { + *self.node.nr_children_mut() += 1; + } else if !old_child.is_none() && new_child.is_none() { + *self.node.nr_children_mut() -= 1; + } + + // SAFETY: + // 1. The index is within the bounds. + // 2. The new PTE is compatible with the page table node, as asserted above. + unsafe { self.node.write_pte(self.idx, new_child.into_pte()) }; + + old_child + } + + /// Splits the entry to smaller pages if it maps to a untracked huge page. + /// + /// If the entry does map to a untracked huge page, it is split into smaller + /// pages mapped by a child page table node. The new child page table node + /// is returned. + /// + /// If the entry does not map to a untracked huge page, the method returns + /// `None`. + pub(in crate::mm) fn split_if_untracked_huge(self) -> Option> { + let level = self.node.level(); + + if !(self.pte.is_last(level) + && level > 1 + && self.node.is_tracked() == MapTrackingStatus::Untracked) + { + return None; + } + + let pa = self.pte.paddr(); + let prop = self.pte.prop(); + + let mut new_page = PageTableNode::::alloc(level - 1, MapTrackingStatus::Untracked); + for i in 0..nr_subpage_per_huge::() { + let small_pa = pa + i * page_size::(level - 1); + let _ = new_page + .entry(i) + .replace(Child::Untracked(small_pa, level - 1, prop)); + } + + let _ = self.replace(Child::PageTable(new_page.clone_raw())); + + Some(new_page) + } + + /// Create a new entry at the node. + /// + /// # Safety + /// + /// The caller must ensure that the index is within the bounds of the node. + pub(super) unsafe fn new_at(node: &'a mut PageTableNode, idx: usize) -> Self { + // SAFETY: The index is within the bound. + let pte = unsafe { node.read_pte(idx) }; + Self { pte, idx, node } + } +} diff --git a/ostd/src/mm/page_table/node/mod.rs b/ostd/src/mm/page_table/node/mod.rs index 0aed00bdd..a5e1dcb1d 100644 --- a/ostd/src/mm/page_table/node/mod.rs +++ b/ostd/src/mm/page_table/node/mod.rs @@ -26,22 +26,21 @@ //! mod child; +mod entry; -use core::{marker::PhantomData, mem::ManuallyDrop, panic, sync::atomic::Ordering}; +use core::{marker::PhantomData, mem::ManuallyDrop, sync::atomic::Ordering}; -pub(in crate::mm) use child::Child; - -use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait}; +pub(in crate::mm) use self::{child::Child, entry::Entry}; +use super::{nr_subpage_per_huge, PageTableEntryTrait}; use crate::{ arch::mm::{PageTableEntry, PagingConsts}, mm::{ paddr_to_vaddr, page::{ - self, + self, inc_page_ref_count, meta::{MapTrackingStatus, PageMeta, PageTablePageMeta, PageUsage}, DynPage, Page, }, - page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, PAGE_SIZE, }, }; @@ -60,6 +59,7 @@ where [(); C::NR_LEVELS as usize]:, { raw: Paddr, + level: PagingLevel, _phantom: PhantomData<(E, C)>, } @@ -71,8 +71,13 @@ where self.raw } + pub(super) fn level(&self) -> PagingLevel { + self.level + } + /// Converts a raw handle to an accessible handle by pertaining the lock. pub(super) fn lock(self) -> PageTableNode { + let level = self.level; let page: Page> = self.into(); // Acquire the lock. @@ -85,6 +90,8 @@ where core::hint::spin_loop(); } + debug_assert_eq!(page.meta().level, level); + PageTableNode:: { page } } @@ -94,6 +101,7 @@ where Self { raw: self.raw, + level: self.level, _phantom: PhantomData, } } @@ -110,12 +118,18 @@ where /// The caller must ensure that the page table to be activated has /// proper mappings for the kernel and has the correct const parameters /// matching the current CPU. + /// + /// # Panics + /// + /// Only top-level page tables can be activated using this function. pub(crate) unsafe fn activate(&self) { use crate::{ arch::mm::{activate_page_table, current_page_table_paddr}, mm::CachePolicy, }; + assert_eq!(self.level, C::NR_LEVELS); + let last_activated_paddr = current_page_table_paddr(); if last_activated_paddr == self.raw { @@ -130,6 +144,7 @@ where // Restore and drop the last activated page table. drop(Self { raw: last_activated_paddr, + level: C::NR_LEVELS, _phantom: PhantomData, }); } @@ -150,20 +165,21 @@ where // SAFETY: We have a reference count to the page and can safely increase the reference // count by one more. unsafe { - Page::>::inc_ref_count(self.paddr()); + inc_page_ref_count(self.paddr()); } } - /// Restore the handle to a page table node from a physical address. + /// Restores the handle from the physical address and level. /// /// # Safety /// /// The caller must ensure that the physical address is valid and points to /// a forgotten page table node. A forgotten page table node can only be - /// restored once. - unsafe fn from_paddr(paddr: Paddr) -> Self { + /// restored once. The level must match the level of the page table node. + unsafe fn from_raw_parts(paddr: Paddr, level: PagingLevel) -> Self { Self { raw: paddr, + level, _phantom: PhantomData, } } @@ -215,6 +231,28 @@ impl PageTableNode where [(); C::NR_LEVELS as usize]:, { + /// Borrows an entry in the node at a given index. + /// + /// # Panics + /// + /// Panics if the index is not within the bound of + /// [`nr_subpage_per_huge`]. + pub(super) fn entry(&mut self, idx: usize) -> Entry<'_, E, C> { + assert!(idx < nr_subpage_per_huge::()); + // SAFETY: The index is within the bound. + unsafe { Entry::new_at(self, idx) } + } + + /// Gets the level of the page table node. + pub(super) fn level(&self) -> PagingLevel { + self.page.meta().level + } + + /// Gets the tracking status of the page table node. + pub(super) fn is_tracked(&self) -> MapTrackingStatus { + self.page.meta().is_tracked + } + /// Allocates a new empty page table node. /// /// This function returns an owning handle. The newly created handle does not @@ -234,148 +272,70 @@ where Self { page } } - pub fn level(&self) -> PagingLevel { - self.page.meta().level - } - - pub fn is_tracked(&self) -> MapTrackingStatus { - self.page.meta().is_tracked - } - /// Converts the handle into a raw handle to be stored in a PTE or CPU. pub(super) fn into_raw(self) -> RawPageTableNode { let this = ManuallyDrop::new(self); - let raw = this.page.paddr(); - + // Release the lock. this.page.meta().lock.store(0, Ordering::Release); - RawPageTableNode { - raw, - _phantom: PhantomData, - } + // SAFETY: The provided physical address is valid and the level is + // correct. The reference count is not changed. + unsafe { RawPageTableNode::from_raw_parts(this.page.paddr(), this.page.meta().level) } } /// Gets a raw handle while still preserving the original handle. pub(super) fn clone_raw(&self) -> RawPageTableNode { - core::mem::forget(self.page.clone()); + let page = ManuallyDrop::new(self.page.clone()); - RawPageTableNode { - raw: self.page.paddr(), - _phantom: PhantomData, - } + // SAFETY: The provided physical address is valid and the level is + // correct. The reference count is increased by one. + unsafe { RawPageTableNode::from_raw_parts(page.paddr(), page.meta().level) } } - /// Gets an extra reference of the child at the given index. - pub(super) fn child(&self, idx: usize) -> Child { - debug_assert!(idx < nr_subpage_per_huge::()); - - let pte = self.read_pte(idx); - - // SAFETY: The PTE is read from this page table node so the information - // recorded in this page table is correct. - unsafe { Child::clone_from_pte(&pte, self.level(), self.is_tracked()) } + /// Gets the number of valid PTEs in the node. + pub(super) fn nr_children(&self) -> u16 { + // SAFETY: The lock is held so we have an exclusive access. + unsafe { *self.page.meta().nr_children.get() } } - /// Replace the child at the given index with a new child. + /// Reads a non-owning PTE at the given index. /// - /// The old child is returned. The new child must match the level of the page - /// table node and the tracking status of the page table node. - pub(super) fn replace_child(&mut self, idx: usize, new_child: Child) -> Child { - // It should be ensured by the cursor. - #[cfg(debug_assertions)] - match &new_child { - Child::PageTable(_) => { - debug_assert!(self.level() > 1); - } - Child::Page(p, _) => { - debug_assert!(self.level() == p.level()); - debug_assert!(self.is_tracked() == MapTrackingStatus::Tracked); - } - Child::Untracked(_, level, _) => { - debug_assert!(self.level() == *level); - debug_assert!(self.is_tracked() == MapTrackingStatus::Untracked); - } - Child::None => {} - } - - let pte = self.read_pte(idx); - // SAFETY: The PTE is read from this page table node so the information - // provided is correct. The PTE is not restored twice. - let old_child = unsafe { Child::from_pte(pte, self.level(), self.is_tracked()) }; - - if old_child.is_none() && !new_child.is_none() { - *self.nr_children_mut() += 1; - } else if !old_child.is_none() && new_child.is_none() { - *self.nr_children_mut() -= 1; - } - - self.write_pte(idx, new_child.into_pte()); - - old_child - } - - /// Splits the untracked huge page mapped at `idx` to smaller pages. - pub(super) fn split_untracked_huge(&mut self, idx: usize) { - // These should be ensured by the cursor. + /// A non-owning PTE means that it does not account for a reference count + /// of the a page if the PTE points to a page. The original PTE still owns + /// the child page. + /// + /// # Safety + /// + /// The caller must ensure that the index is within the bound. + unsafe fn read_pte(&self, idx: usize) -> E { debug_assert!(idx < nr_subpage_per_huge::()); - debug_assert!(self.level() > 1); - - let Child::Untracked(pa, level, prop) = self.child(idx) else { - panic!("`split_untracked_huge` not called on an untracked huge page"); - }; - - debug_assert_eq!(level, self.level()); - - let mut new_page = PageTableNode::::alloc(level - 1, MapTrackingStatus::Untracked); - for i in 0..nr_subpage_per_huge::() { - let small_pa = pa + i * page_size::(level - 1); - new_page.replace_child(i, Child::Untracked(small_pa, level - 1, prop)); - } - - self.replace_child(idx, Child::PageTable(new_page.into_raw())); - } - - /// Protects an already mapped child at a given index. - pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) { - let mut pte = self.read_pte(idx); - debug_assert!(pte.is_present()); // This should be ensured by the cursor. - - pte.set_prop(prop); - - self.write_pte(idx, pte); - } - - pub(super) fn read_pte(&self, idx: usize) -> E { - // It should be ensured by the cursor. - debug_assert!(idx < nr_subpage_per_huge::()); - let ptr = paddr_to_vaddr(self.page.paddr()) as *const E; - - // SAFETY: the index is within the bound and PTE is plain-old-data. + // SAFETY: The index is within the bound and the PTE is plain-old-data. unsafe { ptr.add(idx).read() } } /// Writes a page table entry at a given index. /// - /// This operation will leak the old child if the PTE is present. - fn write_pte(&mut self, idx: usize, pte: E) { - // It should be ensured by the cursor. + /// This operation will leak the old child if the old PTE is present. + /// + /// The child represented by the given PTE will handover the ownership to + /// the node. The PTE will be rendered invalid after this operation. + /// + /// # Safety + /// + /// The caller must ensure that: + /// 1. The index must be within the bound; + /// 2. The PTE must represent a child compatible with this page table node + /// (see [`Child::is_compatible`]). + unsafe fn write_pte(&mut self, idx: usize, pte: E) { debug_assert!(idx < nr_subpage_per_huge::()); - let ptr = paddr_to_vaddr(self.page.paddr()) as *mut E; - - // SAFETY: the index is within the bound and PTE is plain-old-data. - unsafe { ptr.add(idx).write(pte) }; - } - - /// The number of valid PTEs. - pub(super) fn nr_children(&self) -> u16 { - // SAFETY: The lock is held so there is no mutable reference to it. - // It would be safe to read. - unsafe { *self.page.meta().nr_children.get() } + // SAFETY: The index is within the bound and the PTE is plain-old-data. + unsafe { ptr.add(idx).write(pte) } } + /// Gets the mutable reference to the number of valid PTEs in the node. fn nr_children_mut(&mut self) -> &mut u16 { // SAFETY: The lock is held so we have an exclusive access. unsafe { &mut *self.page.meta().nr_children.get() } @@ -399,6 +359,13 @@ where const USAGE: PageUsage = PageUsage::PageTable; fn on_drop(page: &mut Page) { + // SAFETY: This is the last reference so we have an exclusive access. + let nr_children = unsafe { *page.meta().nr_children.get() }; + + if nr_children == 0 { + return; + } + let paddr = page.paddr(); let level = page.meta().level; let is_tracked = page.meta().is_tracked;