Use node::Entry to optimize page table cursor operations

This commit is contained in:
Zhang Junyang
2024-10-03 02:11:07 +08:00
committed by Tate, Hongliang Tian
parent 96f120d957
commit e1e7afe0ca
6 changed files with 516 additions and 415 deletions

View File

@ -119,25 +119,6 @@ impl<M: PageMeta> Page<M> {
} }
} }
/// Increase the reference count of the page by one.
///
/// # Safety
///
/// The physical address must represent a valid page.
///
/// And the caller must ensure the metadata slot pointed through the corresponding
/// virtual address is initialized by holding a reference count of the page firstly.
/// Otherwise the function may add a reference count to an unused page.
pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
// SAFETY: The virtual address points to an initialized metadata slot.
(*(vaddr as *const MetaSlot))
.ref_count
.fetch_add(1, Ordering::Relaxed);
}
/// Get the physical address. /// Get the physical address.
pub fn paddr(&self) -> Paddr { pub fn paddr(&self) -> Paddr {
mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr) mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr)
@ -248,20 +229,6 @@ impl DynPage {
Self { ptr } Self { ptr }
} }
/// Increase the reference count of the page by one.
///
/// # Safety
///
/// This is the same as [`Page::inc_ref_count`].
pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
(*(vaddr as *const MetaSlot))
.ref_count
.fetch_add(1, Ordering::Relaxed);
}
/// Get the physical address of the start of the page /// Get the physical address of the start of the page
pub fn paddr(&self) -> Paddr { pub fn paddr(&self) -> Paddr {
mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr) mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr)
@ -363,3 +330,22 @@ impl Drop for DynPage {
} }
} }
} }
/// Increases the reference count of the page by one.
///
/// # Safety
///
/// The caller should ensure the following conditions:
/// 1. The physical address must represent a valid page;
/// 2. The caller must have already held a reference to the page.
pub(in crate::mm) unsafe fn inc_page_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
// SAFETY: The virtual address points to an initialized metadata slot.
let slot = unsafe { &*(vaddr as *const MetaSlot) };
let old = slot.ref_count.fetch_add(1, Ordering::Relaxed);
debug_assert!(old > 0);
}

View File

@ -70,7 +70,7 @@ use core::{any::TypeId, marker::PhantomData, ops::Range};
use align_ext::AlignExt; use align_ext::AlignExt;
use super::{ use super::{
page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError, page_size, pte_index, Child, Entry, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
PageTableMode, PageTableNode, PagingConstsTrait, PagingLevel, UserMode, PageTableMode, PageTableNode, PagingConstsTrait, PagingLevel, UserMode,
}; };
use crate::{ use crate::{
@ -138,6 +138,7 @@ where
va: Vaddr, va: Vaddr,
/// The virtual address range that is locked. /// The virtual address range that is locked.
barrier_va: Range<Vaddr>, barrier_va: Range<Vaddr>,
#[allow(dead_code)]
preempt_guard: DisabledPreemptGuard, preempt_guard: DisabledPreemptGuard,
_phantom: PhantomData<&'a PageTable<M, E, C>>, _phantom: PhantomData<&'a PageTable<M, E, C>>,
} }
@ -194,12 +195,15 @@ where
break; break;
} }
let cur_pte = cursor.read_cur_pte(); let entry = cursor.cur_entry();
if !cur_pte.is_present() || cur_pte.is_last(cursor.level) { if !entry.is_node() {
break; break;
} }
let Child::PageTable(child_pt) = entry.to_owned() else {
unreachable!("Already checked");
};
cursor.level_down(); cursor.push_level(child_pt.lock());
// Release the guard of the previous (upper) level. // Release the guard of the previous (upper) level.
cursor.guards[cursor.level as usize] = None; cursor.guards[cursor.level as usize] = None;
@ -219,9 +223,9 @@ where
let level = self.level; let level = self.level;
let va = self.va; let va = self.va;
match self.cur_child() { match self.cur_entry().to_owned() {
Child::PageTable(_) => { Child::PageTable(pt) => {
self.level_down(); self.push_level(pt.lock());
continue; continue;
} }
Child::None => { Child::None => {
@ -254,7 +258,7 @@ where
let page_size = page_size::<C>(self.level); let page_size = page_size::<C>(self.level);
let next_va = self.va.align_down(page_size) + page_size; let next_va = self.va.align_down(page_size) + page_size;
while self.level < self.guard_level && pte_index::<C>(next_va, self.level) == 0 { while self.level < self.guard_level && pte_index::<C>(next_va, self.level) == 0 {
self.level_up(); self.pop_level();
} }
self.va = next_va; self.va = next_va;
} }
@ -288,7 +292,7 @@ where
} }
debug_assert!(self.level < self.guard_level); debug_assert!(self.level < self.guard_level);
self.level_up(); self.pop_level();
} }
} }
@ -296,36 +300,26 @@ where
self.va self.va
} }
pub fn preempt_guard(&self) -> &DisabledPreemptGuard { /// Goes up a level.
&self.preempt_guard
}
/// Goes up a level. We release the current page if it has no mappings since the cursor only moves
/// forward. And if needed we will do the final cleanup using this method after re-walk when the
/// cursor is dropped.
/// ///
/// This method requires locks acquired before calling it. The discarded level will be unlocked. /// We release the current page if it has no mappings since the cursor
fn level_up(&mut self) { /// only moves forward. And if needed we will do the final cleanup using
/// this method after re-walk when the cursor is dropped.
///
/// This method requires locks acquired before calling it. The discarded
/// level will be unlocked.
fn pop_level(&mut self) {
self.guards[(self.level - 1) as usize] = None; self.guards[(self.level - 1) as usize] = None;
self.level += 1; self.level += 1;
// TODO: Drop page tables if page tables become empty. // TODO: Drop page tables if page tables become empty.
} }
/// Goes down a level assuming a child page table exists. /// Goes down a level to a child page table.
fn level_down(&mut self) { fn push_level(&mut self, child_pt: PageTableNode<E, C>) {
debug_assert!(self.level > 1);
let Child::PageTable(nxt_lvl_ptn) = self.cur_child() else {
panic!("Trying to level down when it is not mapped to a page table");
};
let nxt_lvl_ptn_locked = nxt_lvl_ptn.lock();
self.level -= 1; self.level -= 1;
debug_assert_eq!(self.level, nxt_lvl_ptn_locked.level()); debug_assert_eq!(self.level, child_pt.level());
self.guards[(self.level - 1) as usize] = Some(child_pt);
self.guards[(self.level - 1) as usize] = Some(nxt_lvl_ptn_locked);
} }
fn should_map_as_tracked(&self) -> bool { fn should_map_as_tracked(&self) -> bool {
@ -334,20 +328,9 @@ where
&& should_map_as_tracked(self.va) && should_map_as_tracked(self.va)
} }
fn cur_node(&self) -> &PageTableNode<E, C> { fn cur_entry(&mut self) -> Entry<'_, E, C> {
self.guards[(self.level - 1) as usize].as_ref().unwrap() let node = self.guards[(self.level - 1) as usize].as_mut().unwrap();
} node.entry(pte_index::<C>(self.va, self.level))
fn cur_idx(&self) -> usize {
pte_index::<C>(self.va, self.level)
}
fn cur_child(&self) -> Child<E, C> {
self.cur_node().child(self.cur_idx())
}
fn read_cur_pte(&self) -> E {
self.cur_node().read_pte(self.cur_idx())
} }
} }
@ -445,24 +428,31 @@ where
|| self.0.va + page_size::<C>(self.0.level) > end || self.0.va + page_size::<C>(self.0.level) > end
{ {
debug_assert!(self.0.should_map_as_tracked()); debug_assert!(self.0.should_map_as_tracked());
let cur_level = self.0.level;
let pte = self.0.read_cur_pte(); let cur_entry = self.0.cur_entry();
if pte.is_present() && !pte.is_last(self.0.level) { match cur_entry.to_owned() {
self.0.level_down(); Child::PageTable(pt) => {
} else if !pte.is_present() { self.0.push_level(pt.lock());
self.level_down_create(); }
} else { Child::None => {
panic!("Mapping a smaller page in an already mapped huge page"); let pt =
PageTableNode::<E, C>::alloc(cur_level - 1, MapTrackingStatus::Tracked);
let _ = cur_entry.replace(Child::PageTable(pt.clone_raw()));
self.0.push_level(pt);
}
Child::Page(_, _) => {
panic!("Mapping a smaller page in an already mapped huge page");
}
Child::Untracked(_, _, _) => {
panic!("Mapping a tracked page in an untracked range");
}
} }
continue; continue;
} }
debug_assert_eq!(self.0.level, page.level()); debug_assert_eq!(self.0.level, page.level());
// Map the current page. // Map the current page.
let idx = self.0.cur_idx(); let old = self.0.cur_entry().replace(Child::Page(page, prop));
let old = self
.cur_node_mut()
.replace_child(idx, Child::Page(page, prop));
self.0.move_forward(); self.0.move_forward();
match old { match old {
@ -519,26 +509,40 @@ where
|| self.0.va + page_size::<C>(self.0.level) > end || self.0.va + page_size::<C>(self.0.level) > end
|| pa % page_size::<C>(self.0.level) != 0 || pa % page_size::<C>(self.0.level) != 0
{ {
let pte = self.0.read_cur_pte(); let cur_level = self.0.level;
if pte.is_present() && !pte.is_last(self.0.level) { let cur_entry = self.0.cur_entry();
self.0.level_down(); match cur_entry.to_owned() {
} else if !pte.is_present() { Child::PageTable(pt) => {
self.level_down_create(); self.0.push_level(pt.lock());
} else { }
self.level_down_split(); Child::None => {
let pt = PageTableNode::<E, C>::alloc(
cur_level - 1,
MapTrackingStatus::Untracked,
);
let _ = cur_entry.replace(Child::PageTable(pt.clone_raw()));
self.0.push_level(pt);
}
Child::Page(_, _) => {
panic!("Mapping a smaller page in an already mapped huge page");
}
Child::Untracked(_, _, _) => {
let split_child = cur_entry.split_if_untracked_huge().unwrap();
self.0.push_level(split_child);
}
} }
continue; continue;
} }
// Map the current page. // Map the current page.
debug_assert!(!self.0.should_map_as_tracked()); debug_assert!(!self.0.should_map_as_tracked());
let idx = self.0.cur_idx();
let level = self.0.level; let level = self.0.level;
let _ = self let _ = self
.cur_node_mut() .0
.replace_child(idx, Child::Untracked(pa, level, prop)); .cur_entry()
.replace(Child::Untracked(pa, level, prop));
let level = self.0.level; // Move forward.
pa += page_size::<C>(level); pa += page_size::<C>(level);
self.0.move_forward(); self.0.move_forward();
} }
@ -575,10 +579,12 @@ where
assert!(end <= self.0.barrier_va.end); assert!(end <= self.0.barrier_va.end);
while self.0.va < end { while self.0.va < end {
let cur_pte = self.0.read_cur_pte(); let cur_va = self.0.va;
let cur_level = self.0.level;
let cur_entry = self.0.cur_entry();
// Skip if it is already absent. // Skip if it is already absent.
if !cur_pte.is_present() { if cur_entry.is_none() {
if self.0.va + page_size::<C>(self.0.level) > end { if self.0.va + page_size::<C>(self.0.level) > end {
self.0.va = end; self.0.va = end;
break; break;
@ -587,54 +593,53 @@ where
continue; continue;
} }
if self.0.va % page_size::<C>(self.0.level) != 0 // Go down if not applicable.
|| self.0.va + page_size::<C>(self.0.level) > end if cur_va % page_size::<C>(cur_level) != 0 || cur_va + page_size::<C>(cur_level) > end {
{ let child = cur_entry.to_owned();
if cur_pte.is_last(self.0.level) { match child {
if !self.0.should_map_as_tracked() { Child::PageTable(pt) => {
// Level down if we are removing part of a huge untracked page. let pt = pt.lock();
self.level_down_split(); // If there's no mapped PTEs in the next level, we can
continue; // skip to save time.
} else { if pt.nr_children() != 0 {
panic!("removing part of a huge page"); self.0.push_level(pt);
} else {
if self.0.va + page_size::<C>(self.0.level) > end {
self.0.va = end;
break;
}
self.0.move_forward();
}
}
Child::None => {
unreachable!("Already checked");
}
Child::Page(_, _) => {
panic!("Removing part of a huge page");
}
Child::Untracked(_, _, _) => {
let split_child = cur_entry.split_if_untracked_huge().unwrap();
self.0.push_level(split_child);
} }
}
// Level down if the current PTE points to a page table and we cannot
// unmap this page table node entirely.
self.0.level_down();
// We have got down a level. If there's no mapped PTEs in
// the current node, we can go back and skip to save time.
if self.0.guards[(self.0.level - 1) as usize]
.as_ref()
.unwrap()
.nr_children()
== 0
{
self.0.level_up();
self.0.move_forward();
} }
continue; continue;
} }
// Unmap the current page and return it. // Unmap the current page and return it.
let idx = self.0.cur_idx(); let old = cur_entry.replace(Child::None);
let ret = self.cur_node_mut().replace_child(idx, Child::None);
let ret_page_va = self.0.va;
self.0.move_forward(); self.0.move_forward();
return match ret { return match old {
Child::Page(page, prop) => PageTableItem::Mapped { Child::Page(page, prop) => PageTableItem::Mapped {
va: ret_page_va, va: self.0.va,
page, page,
prop, prop,
}, },
Child::Untracked(pa, level, prop) => { Child::Untracked(pa, level, prop) => {
debug_assert_eq!(level, self.0.level); debug_assert_eq!(level, self.0.level);
PageTableItem::MappedUntracked { PageTableItem::MappedUntracked {
va: ret_page_va, va: self.0.va,
pa, pa,
len: page_size::<C>(level), len: page_size::<C>(level),
prop, prop,
@ -684,51 +689,46 @@ where
assert!(end <= self.0.barrier_va.end); assert!(end <= self.0.barrier_va.end);
while self.0.va < end { while self.0.va < end {
let cur_pte = self.0.read_cur_pte(); let cur_va = self.0.va;
if !cur_pte.is_present() { let cur_level = self.0.level;
let mut cur_entry = self.0.cur_entry();
// Skip if it is already absent.
if cur_entry.is_none() {
self.0.move_forward(); self.0.move_forward();
continue; continue;
} }
// Go down if it's not a last node. // Go down if it's not a last entry.
if !cur_pte.is_last(self.0.level) { if cur_entry.is_node() {
self.0.level_down(); let Child::PageTable(pt) = cur_entry.to_owned() else {
unreachable!("Already checked");
// We have got down a level. If there's no mapped PTEs in };
// the current node, we can go back and skip to save time. let pt = pt.lock();
if self.0.guards[(self.0.level - 1) as usize] // If there's no mapped PTEs in the next level, we can
.as_ref() // skip to save time.
.unwrap() if pt.nr_children() != 0 {
.nr_children() self.0.push_level(pt);
== 0 } else {
{
self.0.level_up();
self.0.move_forward(); self.0.move_forward();
} }
continue; continue;
} }
// Go down if the page size is too big and we are protecting part // Go down if the page size is too big and we are protecting part
// of untracked huge pages. // of untracked huge pages.
if self.0.va % page_size::<C>(self.0.level) != 0 if cur_va % page_size::<C>(cur_level) != 0 || cur_va + page_size::<C>(cur_level) > end {
|| self.0.va + page_size::<C>(self.0.level) > end let split_child = cur_entry
{ .split_if_untracked_huge()
if self.0.should_map_as_tracked() { .expect("Protecting part of a huge page");
panic!("protecting part of a huge page"); self.0.push_level(split_child);
} else { continue;
self.level_down_split();
continue;
}
} }
let mut pte_prop = cur_pte.prop(); // Protect the current page.
op(&mut pte_prop); cur_entry.protect(op);
let idx = self.0.cur_idx();
self.cur_node_mut().protect(idx, pte_prop);
let protected_va = self.0.va..self.0.va + page_size::<C>(self.0.level); let protected_va = self.0.va..self.0.va + page_size::<C>(self.0.level);
self.0.move_forward(); self.0.move_forward();
return Some(protected_va); return Some(protected_va);
@ -777,95 +777,46 @@ where
assert!(src_end <= src.0.barrier_va.end); assert!(src_end <= src.0.barrier_va.end);
while self.0.va < this_end && src.0.va < src_end { while self.0.va < this_end && src.0.va < src_end {
let cur_pte = src.0.read_cur_pte(); let src_va = src.0.va;
if !cur_pte.is_present() { let mut src_entry = src.0.cur_entry();
src.0.move_forward();
continue;
}
// Go down if it's not a last node. match src_entry.to_owned() {
if !cur_pte.is_last(src.0.level) { Child::PageTable(pt) => {
src.0.level_down(); let pt = pt.lock();
// If there's no mapped PTEs in the next level, we can
// skip to save time.
if pt.nr_children() != 0 {
src.0.push_level(pt);
} else {
src.0.move_forward();
}
continue;
}
Child::None => {
src.0.move_forward();
continue;
}
Child::Untracked(_, _, _) => {
panic!("Copying untracked mappings");
}
Child::Page(page, mut prop) => {
let mapped_page_size = page.size();
// We have got down a level. If there's no mapped PTEs in // Do protection.
// the current node, we can go back and skip to save time. src_entry.protect(op);
if src.0.guards[(src.0.level - 1) as usize]
.as_ref() // Do copy.
.unwrap() op(&mut prop);
.nr_children() self.jump(src_va).unwrap();
== 0 let original = self.map(page, prop);
{ assert!(original.is_none());
src.0.level_up();
// Only move the source cursor forward since `Self::map` will do it.
// This assertion is to ensure that they move by the same length.
debug_assert_eq!(mapped_page_size, page_size::<C>(src.0.level));
src.0.move_forward(); src.0.move_forward();
} }
continue;
} }
// Do protection.
let mut pte_prop = cur_pte.prop();
op(&mut pte_prop);
let idx = src.0.cur_idx();
src.cur_node_mut().protect(idx, pte_prop);
// Do copy.
let child = src.cur_node_mut().child(idx);
let Child::<E, C>::Page(page, prop) = child else {
panic!("Unexpected child for source mapping: {:#?}", child);
};
self.jump(src.0.va).unwrap();
let mapped_page_size = page.size();
let original = self.map(page, prop);
debug_assert!(original.is_none());
// Only move the source cursor forward since `Self::map` will do it.
// This assertion is to ensure that they move by the same length.
debug_assert_eq!(mapped_page_size, page_size::<C>(src.0.level));
src.0.move_forward();
} }
} }
/// Goes down a level assuming the current slot is absent.
///
/// This method will create a new child page table node and go down to it.
fn level_down_create(&mut self) {
debug_assert!(self.0.level > 1);
let new_node = PageTableNode::<E, C>::alloc(
self.0.level - 1,
if self.0.should_map_as_tracked() {
MapTrackingStatus::Tracked
} else {
MapTrackingStatus::Untracked
},
);
let idx = self.0.cur_idx();
let old = self
.cur_node_mut()
.replace_child(idx, Child::PageTable(new_node.clone_raw()));
debug_assert!(old.is_none());
self.0.level -= 1;
self.0.guards[(self.0.level - 1) as usize] = Some(new_node);
}
/// Goes down a level assuming the current slot is an untracked huge page.
///
/// This method will split the huge page and go down to the next level.
fn level_down_split(&mut self) {
debug_assert!(self.0.level > 1);
debug_assert!(!self.0.should_map_as_tracked());
let idx = self.0.cur_idx();
self.cur_node_mut().split_untracked_huge(idx);
let Child::PageTable(new_node) = self.0.cur_child() else {
unreachable!();
};
self.0.level -= 1;
self.0.guards[(self.0.level - 1) as usize] = Some(new_node.lock());
}
fn cur_node_mut(&mut self) -> &mut PageTableNode<E, C> {
self.0.guards[(self.0.level - 1) as usize].as_mut().unwrap()
}
} }

View File

@ -100,7 +100,7 @@ impl PageTable<KernelMode> {
/// This should be the only way to create the user page table, that is to /// This should be the only way to create the user page table, that is to
/// duplicate the kernel page table with all the kernel mappings shared. /// duplicate the kernel page table with all the kernel mappings shared.
pub fn create_user_page_table(&self) -> PageTable<UserMode> { pub fn create_user_page_table(&self) -> PageTable<UserMode> {
let root_node = self.root.clone_shallow().lock(); let mut root_node = self.root.clone_shallow().lock();
let mut new_node = let mut new_node =
PageTableNode::alloc(PagingConsts::NR_LEVELS, MapTrackingStatus::NotApplicable); PageTableNode::alloc(PagingConsts::NR_LEVELS, MapTrackingStatus::NotApplicable);
@ -108,9 +108,9 @@ impl PageTable<KernelMode> {
// The user space range is not copied. // The user space range is not copied.
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>(); const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE { for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE {
let child = root_node.child(i); let root_entry = root_node.entry(i);
if !child.is_none() { if !root_entry.is_none() {
let _ = new_node.replace_child(i, child); let _ = new_node.entry(i).replace(root_entry.to_owned());
} }
} }
@ -137,7 +137,8 @@ impl PageTable<KernelMode> {
let mut root_node = self.root.clone_shallow().lock(); let mut root_node = self.root.clone_shallow().lock();
for i in start..end { for i in start..end {
if !root_node.read_pte(i).is_present() { let root_entry = root_node.entry(i);
if root_entry.is_none() {
let nxt_level = PagingConsts::NR_LEVELS - 1; let nxt_level = PagingConsts::NR_LEVELS - 1;
let is_tracked = if super::kspace::should_map_as_tracked( let is_tracked = if super::kspace::should_map_as_tracked(
i * page_size::<PagingConsts>(nxt_level), i * page_size::<PagingConsts>(nxt_level),
@ -147,7 +148,7 @@ impl PageTable<KernelMode> {
MapTrackingStatus::Untracked MapTrackingStatus::Untracked
}; };
let node = PageTableNode::alloc(nxt_level, is_tracked); let node = PageTableNode::alloc(nxt_level, is_tracked);
let _ = root_node.replace_child(i, Child::PageTable(node.into_raw())); let _ = root_entry.replace(Child::PageTable(node.into_raw()));
} }
} }
} }

View File

@ -8,10 +8,7 @@ use super::{PageTableEntryTrait, RawPageTableNode};
use crate::{ use crate::{
arch::mm::{PageTableEntry, PagingConsts}, arch::mm::{PageTableEntry, PagingConsts},
mm::{ mm::{
page::{ page::{inc_page_ref_count, meta::MapTrackingStatus, DynPage},
meta::{MapTrackingStatus, PageTablePageMeta},
DynPage, Page,
},
page_prop::PageProperty, page_prop::PageProperty,
Paddr, PagingConstsTrait, PagingLevel, Paddr, PagingConstsTrait, PagingLevel,
}, },
@ -45,6 +42,27 @@ where
matches!(self, Child::None) matches!(self, Child::None)
} }
/// Returns whether the child is compatible with the given node.
///
/// In other words, it checks whether the child can be a child of a node
/// with the given level and tracking status.
pub(super) fn is_compatible(
&self,
node_level: PagingLevel,
is_tracked: MapTrackingStatus,
) -> bool {
match self {
Child::PageTable(pt) => node_level == pt.level() + 1,
Child::Page(p, _) => {
node_level == p.level() && is_tracked == MapTrackingStatus::Tracked
}
Child::Untracked(_, level, _) => {
node_level == *level && is_tracked == MapTrackingStatus::Untracked
}
Child::None => true,
}
}
/// Converts a child into a owning PTE. /// Converts a child into a owning PTE.
/// ///
/// By conversion it loses information about whether the page is tracked /// By conversion it loses information about whether the page is tracked
@ -74,8 +92,10 @@ where
/// # Safety /// # Safety
/// ///
/// The provided PTE must be originated from [`Child::into_pte`]. And the /// The provided PTE must be originated from [`Child::into_pte`]. And the
/// provided information (level and tracking status) must align with the /// provided information (level and tracking status) must be the same with
/// lost information during the conversion. /// the lost information during the conversion. Strictly speaking, the
/// provided arguments must be compatible with the original child (
/// specified by [`Child::is_compatible`]).
/// ///
/// This method should be only used no more than once for a PTE that has /// This method should be only used no more than once for a PTE that has
/// been converted from a child using the [`Child::into_pte`] method. /// been converted from a child using the [`Child::into_pte`] method.
@ -85,18 +105,25 @@ where
is_tracked: MapTrackingStatus, is_tracked: MapTrackingStatus,
) -> Self { ) -> Self {
if !pte.is_present() { if !pte.is_present() {
Child::None return Child::None;
} else { }
let paddr = pte.paddr();
if !pte.is_last(level) { let paddr = pte.paddr();
Child::PageTable(RawPageTableNode::from_paddr(paddr))
} else { if !pte.is_last(level) {
match is_tracked { // SAFETY: The physical address points to a valid page table node
MapTrackingStatus::Tracked => Child::Page(DynPage::from_raw(paddr), pte.prop()), // at the given level.
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) });
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"), }
}
match is_tracked {
MapTrackingStatus::Tracked => {
// SAFETY: The physical address points to a valid page.
let page = unsafe { DynPage::from_raw(paddr) };
Child::Page(page, pte.prop())
} }
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()),
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"),
} }
} }
@ -104,9 +131,8 @@ where
/// ///
/// # Safety /// # Safety
/// ///
/// The provided PTE must be originated from [`Child::into_pte`]. And the /// The provided PTE must be originated from [`Child::into_pte`], which is
/// provided information (level and tracking status) must align with the /// the same requirement as the [`Child::from_pte`] method.
/// lost information during the conversion.
/// ///
/// This method must not be used with a PTE that has been restored to a /// This method must not be used with a PTE that has been restored to a
/// child using the [`Child::from_pte`] method. /// child using the [`Child::from_pte`] method.
@ -116,22 +142,31 @@ where
is_tracked: MapTrackingStatus, is_tracked: MapTrackingStatus,
) -> Self { ) -> Self {
if !pte.is_present() { if !pte.is_present() {
Child::None return Child::None;
} else { }
let paddr = pte.paddr();
if !pte.is_last(level) { let paddr = pte.paddr();
Page::<PageTablePageMeta<E, C>>::inc_ref_count(paddr);
Child::PageTable(RawPageTableNode::from_paddr(paddr)) if !pte.is_last(level) {
} else { // SAFETY: The physical address is valid and the PTE already owns
match is_tracked { // the reference to the page.
MapTrackingStatus::Tracked => { unsafe { inc_page_ref_count(paddr) };
DynPage::inc_ref_count(paddr); // SAFETY: The physical address points to a valid page table node
Child::Page(DynPage::from_raw(paddr), pte.prop()) // at the given level.
} return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) });
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()), }
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"),
} match is_tracked {
MapTrackingStatus::Tracked => {
// SAFETY: The physical address is valid and the PTE already owns
// the reference to the page.
unsafe { inc_page_ref_count(paddr) };
// SAFETY: The physical address points to a valid page.
let page = unsafe { DynPage::from_raw(paddr) };
Child::Page(page, pte.prop())
} }
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()),
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"),
} }
} }
} }

View File

@ -0,0 +1,161 @@
// SPDX-License-Identifier: MPL-2.0
//! This module provides accessors to the page table entries in a node.
use super::{Child, PageTableEntryTrait, PageTableNode};
use crate::mm::{
nr_subpage_per_huge, page::meta::MapTrackingStatus, page_prop::PageProperty, page_size,
PagingConstsTrait,
};
/// A view of an entry in a page table node.
///
/// It can be borrowed from a node using the [`PageTableNode::entry`] method.
///
/// This is a static reference to an entry in a node that does not account for
/// a dynamic reference count to the child. It can be used to create a owned
/// handle, which is a [`Child`].
pub(in crate::mm) struct Entry<'a, E: PageTableEntryTrait, C: PagingConstsTrait>
where
[(); C::NR_LEVELS as usize]:,
{
/// The page table entry.
///
/// We store the page table entry here to optimize the number of reads from
/// the node. We cannot hold a `&mut E` reference to the entry because that
/// other CPUs may modify the memory location for accessed/dirty bits. Such
/// accesses will violate the aliasing rules of Rust and cause undefined
/// behaviors.
pte: E,
/// The index of the entry in the node.
idx: usize,
/// The node that contains the entry.
node: &'a mut PageTableNode<E, C>,
}
impl<'a, E: PageTableEntryTrait, C: PagingConstsTrait> Entry<'a, E, C>
where
[(); C::NR_LEVELS as usize]:,
{
/// Returns if the entry does not map to anything.
pub(in crate::mm) fn is_none(&self) -> bool {
!self.pte.is_present()
}
/// Returns if the entry maps to a page table node.
pub(in crate::mm) fn is_node(&self) -> bool {
self.pte.is_present() && !self.pte.is_last(self.node.level())
}
/// Gets a owned handle to the child.
pub(in crate::mm) fn to_owned(&self) -> Child<E, C> {
// SAFETY: The entry structure represents an existent entry with the
// right node information.
unsafe { Child::clone_from_pte(&self.pte, self.node.level(), self.node.is_tracked()) }
}
/// Operates on the mapping properties of the entry.
///
/// It only modifies the properties if the entry is present.
// FIXME: in x86_64, you can protect a page with neither of the RWX
// permissions. This would make the page not accessible and leaked. Such a
// behavior is memory-safe but wrong. In RISC-V there's no problem.
pub(in crate::mm) fn protect(&mut self, op: &mut impl FnMut(&mut PageProperty)) {
if !self.pte.is_present() {
return;
}
let prop = self.pte.prop();
let mut new_prop = prop;
op(&mut new_prop);
if prop == new_prop {
return;
}
self.pte.set_prop(new_prop);
// SAFETY:
// 1. The index is within the bounds.
// 2. We replace the PTE with a new one, which differs only in
// `PageProperty`, so it is still compatible with the current
// page table node.
unsafe { self.node.write_pte(self.idx, self.pte) };
}
/// Replaces the entry with a new child.
///
/// The old child is returned.
///
/// # Panics
///
/// The method panics if the given child is not compatible with the node.
/// The compatibility is specified by the [`Child::is_compatible`].
pub(in crate::mm) fn replace(self, new_child: Child<E, C>) -> Child<E, C> {
assert!(new_child.is_compatible(self.node.level(), self.node.is_tracked()));
// SAFETY: The entry structure represents an existent entry with the
// right node information. The old PTE is overwritten by the new child
// so that it is not used anymore.
let old_child =
unsafe { Child::from_pte(self.pte, self.node.level(), self.node.is_tracked()) };
if old_child.is_none() && !new_child.is_none() {
*self.node.nr_children_mut() += 1;
} else if !old_child.is_none() && new_child.is_none() {
*self.node.nr_children_mut() -= 1;
}
// SAFETY:
// 1. The index is within the bounds.
// 2. The new PTE is compatible with the page table node, as asserted above.
unsafe { self.node.write_pte(self.idx, new_child.into_pte()) };
old_child
}
/// Splits the entry to smaller pages if it maps to a untracked huge page.
///
/// If the entry does map to a untracked huge page, it is split into smaller
/// pages mapped by a child page table node. The new child page table node
/// is returned.
///
/// If the entry does not map to a untracked huge page, the method returns
/// `None`.
pub(in crate::mm) fn split_if_untracked_huge(self) -> Option<PageTableNode<E, C>> {
let level = self.node.level();
if !(self.pte.is_last(level)
&& level > 1
&& self.node.is_tracked() == MapTrackingStatus::Untracked)
{
return None;
}
let pa = self.pte.paddr();
let prop = self.pte.prop();
let mut new_page = PageTableNode::<E, C>::alloc(level - 1, MapTrackingStatus::Untracked);
for i in 0..nr_subpage_per_huge::<C>() {
let small_pa = pa + i * page_size::<C>(level - 1);
let _ = new_page
.entry(i)
.replace(Child::Untracked(small_pa, level - 1, prop));
}
let _ = self.replace(Child::PageTable(new_page.clone_raw()));
Some(new_page)
}
/// Create a new entry at the node.
///
/// # Safety
///
/// The caller must ensure that the index is within the bounds of the node.
pub(super) unsafe fn new_at(node: &'a mut PageTableNode<E, C>, idx: usize) -> Self {
// SAFETY: The index is within the bound.
let pte = unsafe { node.read_pte(idx) };
Self { pte, idx, node }
}
}

View File

@ -26,22 +26,21 @@
//! //!
mod child; mod child;
mod entry;
use core::{marker::PhantomData, mem::ManuallyDrop, panic, sync::atomic::Ordering}; use core::{marker::PhantomData, mem::ManuallyDrop, sync::atomic::Ordering};
pub(in crate::mm) use child::Child; pub(in crate::mm) use self::{child::Child, entry::Entry};
use super::{nr_subpage_per_huge, PageTableEntryTrait};
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
use crate::{ use crate::{
arch::mm::{PageTableEntry, PagingConsts}, arch::mm::{PageTableEntry, PagingConsts},
mm::{ mm::{
paddr_to_vaddr, paddr_to_vaddr,
page::{ page::{
self, self, inc_page_ref_count,
meta::{MapTrackingStatus, PageMeta, PageTablePageMeta, PageUsage}, meta::{MapTrackingStatus, PageMeta, PageTablePageMeta, PageUsage},
DynPage, Page, DynPage, Page,
}, },
page_prop::PageProperty,
Paddr, PagingConstsTrait, PagingLevel, PAGE_SIZE, Paddr, PagingConstsTrait, PagingLevel, PAGE_SIZE,
}, },
}; };
@ -60,6 +59,7 @@ where
[(); C::NR_LEVELS as usize]:, [(); C::NR_LEVELS as usize]:,
{ {
raw: Paddr, raw: Paddr,
level: PagingLevel,
_phantom: PhantomData<(E, C)>, _phantom: PhantomData<(E, C)>,
} }
@ -71,8 +71,13 @@ where
self.raw self.raw
} }
pub(super) fn level(&self) -> PagingLevel {
self.level
}
/// Converts a raw handle to an accessible handle by pertaining the lock. /// Converts a raw handle to an accessible handle by pertaining the lock.
pub(super) fn lock(self) -> PageTableNode<E, C> { pub(super) fn lock(self) -> PageTableNode<E, C> {
let level = self.level;
let page: Page<PageTablePageMeta<E, C>> = self.into(); let page: Page<PageTablePageMeta<E, C>> = self.into();
// Acquire the lock. // Acquire the lock.
@ -85,6 +90,8 @@ where
core::hint::spin_loop(); core::hint::spin_loop();
} }
debug_assert_eq!(page.meta().level, level);
PageTableNode::<E, C> { page } PageTableNode::<E, C> { page }
} }
@ -94,6 +101,7 @@ where
Self { Self {
raw: self.raw, raw: self.raw,
level: self.level,
_phantom: PhantomData, _phantom: PhantomData,
} }
} }
@ -110,12 +118,18 @@ where
/// The caller must ensure that the page table to be activated has /// The caller must ensure that the page table to be activated has
/// proper mappings for the kernel and has the correct const parameters /// proper mappings for the kernel and has the correct const parameters
/// matching the current CPU. /// matching the current CPU.
///
/// # Panics
///
/// Only top-level page tables can be activated using this function.
pub(crate) unsafe fn activate(&self) { pub(crate) unsafe fn activate(&self) {
use crate::{ use crate::{
arch::mm::{activate_page_table, current_page_table_paddr}, arch::mm::{activate_page_table, current_page_table_paddr},
mm::CachePolicy, mm::CachePolicy,
}; };
assert_eq!(self.level, C::NR_LEVELS);
let last_activated_paddr = current_page_table_paddr(); let last_activated_paddr = current_page_table_paddr();
if last_activated_paddr == self.raw { if last_activated_paddr == self.raw {
@ -130,6 +144,7 @@ where
// Restore and drop the last activated page table. // Restore and drop the last activated page table.
drop(Self { drop(Self {
raw: last_activated_paddr, raw: last_activated_paddr,
level: C::NR_LEVELS,
_phantom: PhantomData, _phantom: PhantomData,
}); });
} }
@ -150,20 +165,21 @@ where
// SAFETY: We have a reference count to the page and can safely increase the reference // SAFETY: We have a reference count to the page and can safely increase the reference
// count by one more. // count by one more.
unsafe { unsafe {
Page::<PageTablePageMeta<E, C>>::inc_ref_count(self.paddr()); inc_page_ref_count(self.paddr());
} }
} }
/// Restore the handle to a page table node from a physical address. /// Restores the handle from the physical address and level.
/// ///
/// # Safety /// # Safety
/// ///
/// The caller must ensure that the physical address is valid and points to /// The caller must ensure that the physical address is valid and points to
/// a forgotten page table node. A forgotten page table node can only be /// a forgotten page table node. A forgotten page table node can only be
/// restored once. /// restored once. The level must match the level of the page table node.
unsafe fn from_paddr(paddr: Paddr) -> Self { unsafe fn from_raw_parts(paddr: Paddr, level: PagingLevel) -> Self {
Self { Self {
raw: paddr, raw: paddr,
level,
_phantom: PhantomData, _phantom: PhantomData,
} }
} }
@ -215,6 +231,28 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTableNode<E, C>
where where
[(); C::NR_LEVELS as usize]:, [(); C::NR_LEVELS as usize]:,
{ {
/// Borrows an entry in the node at a given index.
///
/// # Panics
///
/// Panics if the index is not within the bound of
/// [`nr_subpage_per_huge<C>`].
pub(super) fn entry(&mut self, idx: usize) -> Entry<'_, E, C> {
assert!(idx < nr_subpage_per_huge::<C>());
// SAFETY: The index is within the bound.
unsafe { Entry::new_at(self, idx) }
}
/// Gets the level of the page table node.
pub(super) fn level(&self) -> PagingLevel {
self.page.meta().level
}
/// Gets the tracking status of the page table node.
pub(super) fn is_tracked(&self) -> MapTrackingStatus {
self.page.meta().is_tracked
}
/// Allocates a new empty page table node. /// Allocates a new empty page table node.
/// ///
/// This function returns an owning handle. The newly created handle does not /// This function returns an owning handle. The newly created handle does not
@ -234,148 +272,70 @@ where
Self { page } Self { page }
} }
pub fn level(&self) -> PagingLevel {
self.page.meta().level
}
pub fn is_tracked(&self) -> MapTrackingStatus {
self.page.meta().is_tracked
}
/// Converts the handle into a raw handle to be stored in a PTE or CPU. /// Converts the handle into a raw handle to be stored in a PTE or CPU.
pub(super) fn into_raw(self) -> RawPageTableNode<E, C> { pub(super) fn into_raw(self) -> RawPageTableNode<E, C> {
let this = ManuallyDrop::new(self); let this = ManuallyDrop::new(self);
let raw = this.page.paddr(); // Release the lock.
this.page.meta().lock.store(0, Ordering::Release); this.page.meta().lock.store(0, Ordering::Release);
RawPageTableNode { // SAFETY: The provided physical address is valid and the level is
raw, // correct. The reference count is not changed.
_phantom: PhantomData, unsafe { RawPageTableNode::from_raw_parts(this.page.paddr(), this.page.meta().level) }
}
} }
/// Gets a raw handle while still preserving the original handle. /// Gets a raw handle while still preserving the original handle.
pub(super) fn clone_raw(&self) -> RawPageTableNode<E, C> { pub(super) fn clone_raw(&self) -> RawPageTableNode<E, C> {
core::mem::forget(self.page.clone()); let page = ManuallyDrop::new(self.page.clone());
RawPageTableNode { // SAFETY: The provided physical address is valid and the level is
raw: self.page.paddr(), // correct. The reference count is increased by one.
_phantom: PhantomData, unsafe { RawPageTableNode::from_raw_parts(page.paddr(), page.meta().level) }
}
} }
/// Gets an extra reference of the child at the given index. /// Gets the number of valid PTEs in the node.
pub(super) fn child(&self, idx: usize) -> Child<E, C> { pub(super) fn nr_children(&self) -> u16 {
debug_assert!(idx < nr_subpage_per_huge::<C>()); // SAFETY: The lock is held so we have an exclusive access.
unsafe { *self.page.meta().nr_children.get() }
let pte = self.read_pte(idx);
// SAFETY: The PTE is read from this page table node so the information
// recorded in this page table is correct.
unsafe { Child::clone_from_pte(&pte, self.level(), self.is_tracked()) }
} }
/// Replace the child at the given index with a new child. /// Reads a non-owning PTE at the given index.
/// ///
/// The old child is returned. The new child must match the level of the page /// A non-owning PTE means that it does not account for a reference count
/// table node and the tracking status of the page table node. /// of the a page if the PTE points to a page. The original PTE still owns
pub(super) fn replace_child(&mut self, idx: usize, new_child: Child<E, C>) -> Child<E, C> { /// the child page.
// It should be ensured by the cursor. ///
#[cfg(debug_assertions)] /// # Safety
match &new_child { ///
Child::PageTable(_) => { /// The caller must ensure that the index is within the bound.
debug_assert!(self.level() > 1); unsafe fn read_pte(&self, idx: usize) -> E {
}
Child::Page(p, _) => {
debug_assert!(self.level() == p.level());
debug_assert!(self.is_tracked() == MapTrackingStatus::Tracked);
}
Child::Untracked(_, level, _) => {
debug_assert!(self.level() == *level);
debug_assert!(self.is_tracked() == MapTrackingStatus::Untracked);
}
Child::None => {}
}
let pte = self.read_pte(idx);
// SAFETY: The PTE is read from this page table node so the information
// provided is correct. The PTE is not restored twice.
let old_child = unsafe { Child::from_pte(pte, self.level(), self.is_tracked()) };
if old_child.is_none() && !new_child.is_none() {
*self.nr_children_mut() += 1;
} else if !old_child.is_none() && new_child.is_none() {
*self.nr_children_mut() -= 1;
}
self.write_pte(idx, new_child.into_pte());
old_child
}
/// Splits the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
// These should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>()); debug_assert!(idx < nr_subpage_per_huge::<C>());
debug_assert!(self.level() > 1);
let Child::Untracked(pa, level, prop) = self.child(idx) else {
panic!("`split_untracked_huge` not called on an untracked huge page");
};
debug_assert_eq!(level, self.level());
let mut new_page = PageTableNode::<E, C>::alloc(level - 1, MapTrackingStatus::Untracked);
for i in 0..nr_subpage_per_huge::<C>() {
let small_pa = pa + i * page_size::<C>(level - 1);
new_page.replace_child(i, Child::Untracked(small_pa, level - 1, prop));
}
self.replace_child(idx, Child::PageTable(new_page.into_raw()));
}
/// Protects an already mapped child at a given index.
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) {
let mut pte = self.read_pte(idx);
debug_assert!(pte.is_present()); // This should be ensured by the cursor.
pte.set_prop(prop);
self.write_pte(idx, pte);
}
pub(super) fn read_pte(&self, idx: usize) -> E {
// It should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
let ptr = paddr_to_vaddr(self.page.paddr()) as *const E; let ptr = paddr_to_vaddr(self.page.paddr()) as *const E;
// SAFETY: The index is within the bound and the PTE is plain-old-data.
// SAFETY: the index is within the bound and PTE is plain-old-data.
unsafe { ptr.add(idx).read() } unsafe { ptr.add(idx).read() }
} }
/// Writes a page table entry at a given index. /// Writes a page table entry at a given index.
/// ///
/// This operation will leak the old child if the PTE is present. /// This operation will leak the old child if the old PTE is present.
fn write_pte(&mut self, idx: usize, pte: E) { ///
// It should be ensured by the cursor. /// The child represented by the given PTE will handover the ownership to
/// the node. The PTE will be rendered invalid after this operation.
///
/// # Safety
///
/// The caller must ensure that:
/// 1. The index must be within the bound;
/// 2. The PTE must represent a child compatible with this page table node
/// (see [`Child::is_compatible`]).
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
debug_assert!(idx < nr_subpage_per_huge::<C>()); debug_assert!(idx < nr_subpage_per_huge::<C>());
let ptr = paddr_to_vaddr(self.page.paddr()) as *mut E; let ptr = paddr_to_vaddr(self.page.paddr()) as *mut E;
// SAFETY: The index is within the bound and the PTE is plain-old-data.
// SAFETY: the index is within the bound and PTE is plain-old-data. unsafe { ptr.add(idx).write(pte) }
unsafe { ptr.add(idx).write(pte) };
}
/// The number of valid PTEs.
pub(super) fn nr_children(&self) -> u16 {
// SAFETY: The lock is held so there is no mutable reference to it.
// It would be safe to read.
unsafe { *self.page.meta().nr_children.get() }
} }
/// Gets the mutable reference to the number of valid PTEs in the node.
fn nr_children_mut(&mut self) -> &mut u16 { fn nr_children_mut(&mut self) -> &mut u16 {
// SAFETY: The lock is held so we have an exclusive access. // SAFETY: The lock is held so we have an exclusive access.
unsafe { &mut *self.page.meta().nr_children.get() } unsafe { &mut *self.page.meta().nr_children.get() }
@ -399,6 +359,13 @@ where
const USAGE: PageUsage = PageUsage::PageTable; const USAGE: PageUsage = PageUsage::PageTable;
fn on_drop(page: &mut Page<Self>) { fn on_drop(page: &mut Page<Self>) {
// SAFETY: This is the last reference so we have an exclusive access.
let nr_children = unsafe { *page.meta().nr_children.get() };
if nr_children == 0 {
return;
}
let paddr = page.paddr(); let paddr = page.paddr();
let level = page.meta().level; let level = page.meta().level;
let is_tracked = page.meta().is_tracked; let is_tracked = page.meta().is_tracked;