Use node::Entry to optimize page table cursor operations

This commit is contained in:
Zhang Junyang
2024-10-03 02:11:07 +08:00
committed by Tate, Hongliang Tian
parent 96f120d957
commit e1e7afe0ca
6 changed files with 516 additions and 415 deletions

View File

@ -119,25 +119,6 @@ impl<M: PageMeta> Page<M> {
}
}
/// Increase the reference count of the page by one.
///
/// # Safety
///
/// The physical address must represent a valid page.
///
/// And the caller must ensure the metadata slot pointed through the corresponding
/// virtual address is initialized by holding a reference count of the page firstly.
/// Otherwise the function may add a reference count to an unused page.
pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
// SAFETY: The virtual address points to an initialized metadata slot.
(*(vaddr as *const MetaSlot))
.ref_count
.fetch_add(1, Ordering::Relaxed);
}
/// Get the physical address.
pub fn paddr(&self) -> Paddr {
mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr)
@ -248,20 +229,6 @@ impl DynPage {
Self { ptr }
}
/// Increase the reference count of the page by one.
///
/// # Safety
///
/// This is the same as [`Page::inc_ref_count`].
pub(in crate::mm) unsafe fn inc_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
(*(vaddr as *const MetaSlot))
.ref_count
.fetch_add(1, Ordering::Relaxed);
}
/// Get the physical address of the start of the page
pub fn paddr(&self) -> Paddr {
mapping::meta_to_page::<PagingConsts>(self.ptr as Vaddr)
@ -363,3 +330,22 @@ impl Drop for DynPage {
}
}
}
/// Increases the reference count of the page by one.
///
/// # Safety
///
/// The caller should ensure the following conditions:
/// 1. The physical address must represent a valid page;
/// 2. The caller must have already held a reference to the page.
pub(in crate::mm) unsafe fn inc_page_ref_count(paddr: Paddr) {
debug_assert!(paddr % PAGE_SIZE == 0);
debug_assert!(paddr < MAX_PADDR.load(Ordering::Relaxed) as Paddr);
let vaddr: Vaddr = mapping::page_to_meta::<PagingConsts>(paddr);
// SAFETY: The virtual address points to an initialized metadata slot.
let slot = unsafe { &*(vaddr as *const MetaSlot) };
let old = slot.ref_count.fetch_add(1, Ordering::Relaxed);
debug_assert!(old > 0);
}

View File

@ -70,7 +70,7 @@ use core::{any::TypeId, marker::PhantomData, ops::Range};
use align_ext::AlignExt;
use super::{
page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
page_size, pte_index, Child, Entry, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
PageTableMode, PageTableNode, PagingConstsTrait, PagingLevel, UserMode,
};
use crate::{
@ -138,6 +138,7 @@ where
va: Vaddr,
/// The virtual address range that is locked.
barrier_va: Range<Vaddr>,
#[allow(dead_code)]
preempt_guard: DisabledPreemptGuard,
_phantom: PhantomData<&'a PageTable<M, E, C>>,
}
@ -194,12 +195,15 @@ where
break;
}
let cur_pte = cursor.read_cur_pte();
if !cur_pte.is_present() || cur_pte.is_last(cursor.level) {
let entry = cursor.cur_entry();
if !entry.is_node() {
break;
}
let Child::PageTable(child_pt) = entry.to_owned() else {
unreachable!("Already checked");
};
cursor.level_down();
cursor.push_level(child_pt.lock());
// Release the guard of the previous (upper) level.
cursor.guards[cursor.level as usize] = None;
@ -219,9 +223,9 @@ where
let level = self.level;
let va = self.va;
match self.cur_child() {
Child::PageTable(_) => {
self.level_down();
match self.cur_entry().to_owned() {
Child::PageTable(pt) => {
self.push_level(pt.lock());
continue;
}
Child::None => {
@ -254,7 +258,7 @@ where
let page_size = page_size::<C>(self.level);
let next_va = self.va.align_down(page_size) + page_size;
while self.level < self.guard_level && pte_index::<C>(next_va, self.level) == 0 {
self.level_up();
self.pop_level();
}
self.va = next_va;
}
@ -288,7 +292,7 @@ where
}
debug_assert!(self.level < self.guard_level);
self.level_up();
self.pop_level();
}
}
@ -296,36 +300,26 @@ where
self.va
}
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
&self.preempt_guard
}
/// Goes up a level. We release the current page if it has no mappings since the cursor only moves
/// forward. And if needed we will do the final cleanup using this method after re-walk when the
/// cursor is dropped.
/// Goes up a level.
///
/// This method requires locks acquired before calling it. The discarded level will be unlocked.
fn level_up(&mut self) {
/// We release the current page if it has no mappings since the cursor
/// only moves forward. And if needed we will do the final cleanup using
/// this method after re-walk when the cursor is dropped.
///
/// This method requires locks acquired before calling it. The discarded
/// level will be unlocked.
fn pop_level(&mut self) {
self.guards[(self.level - 1) as usize] = None;
self.level += 1;
// TODO: Drop page tables if page tables become empty.
}
/// Goes down a level assuming a child page table exists.
fn level_down(&mut self) {
debug_assert!(self.level > 1);
let Child::PageTable(nxt_lvl_ptn) = self.cur_child() else {
panic!("Trying to level down when it is not mapped to a page table");
};
let nxt_lvl_ptn_locked = nxt_lvl_ptn.lock();
/// Goes down a level to a child page table.
fn push_level(&mut self, child_pt: PageTableNode<E, C>) {
self.level -= 1;
debug_assert_eq!(self.level, nxt_lvl_ptn_locked.level());
self.guards[(self.level - 1) as usize] = Some(nxt_lvl_ptn_locked);
debug_assert_eq!(self.level, child_pt.level());
self.guards[(self.level - 1) as usize] = Some(child_pt);
}
fn should_map_as_tracked(&self) -> bool {
@ -334,20 +328,9 @@ where
&& should_map_as_tracked(self.va)
}
fn cur_node(&self) -> &PageTableNode<E, C> {
self.guards[(self.level - 1) as usize].as_ref().unwrap()
}
fn cur_idx(&self) -> usize {
pte_index::<C>(self.va, self.level)
}
fn cur_child(&self) -> Child<E, C> {
self.cur_node().child(self.cur_idx())
}
fn read_cur_pte(&self) -> E {
self.cur_node().read_pte(self.cur_idx())
fn cur_entry(&mut self) -> Entry<'_, E, C> {
let node = self.guards[(self.level - 1) as usize].as_mut().unwrap();
node.entry(pte_index::<C>(self.va, self.level))
}
}
@ -445,24 +428,31 @@ where
|| self.0.va + page_size::<C>(self.0.level) > end
{
debug_assert!(self.0.should_map_as_tracked());
let pte = self.0.read_cur_pte();
if pte.is_present() && !pte.is_last(self.0.level) {
self.0.level_down();
} else if !pte.is_present() {
self.level_down_create();
} else {
let cur_level = self.0.level;
let cur_entry = self.0.cur_entry();
match cur_entry.to_owned() {
Child::PageTable(pt) => {
self.0.push_level(pt.lock());
}
Child::None => {
let pt =
PageTableNode::<E, C>::alloc(cur_level - 1, MapTrackingStatus::Tracked);
let _ = cur_entry.replace(Child::PageTable(pt.clone_raw()));
self.0.push_level(pt);
}
Child::Page(_, _) => {
panic!("Mapping a smaller page in an already mapped huge page");
}
Child::Untracked(_, _, _) => {
panic!("Mapping a tracked page in an untracked range");
}
}
continue;
}
debug_assert_eq!(self.0.level, page.level());
// Map the current page.
let idx = self.0.cur_idx();
let old = self
.cur_node_mut()
.replace_child(idx, Child::Page(page, prop));
let old = self.0.cur_entry().replace(Child::Page(page, prop));
self.0.move_forward();
match old {
@ -519,26 +509,40 @@ where
|| self.0.va + page_size::<C>(self.0.level) > end
|| pa % page_size::<C>(self.0.level) != 0
{
let pte = self.0.read_cur_pte();
if pte.is_present() && !pte.is_last(self.0.level) {
self.0.level_down();
} else if !pte.is_present() {
self.level_down_create();
} else {
self.level_down_split();
let cur_level = self.0.level;
let cur_entry = self.0.cur_entry();
match cur_entry.to_owned() {
Child::PageTable(pt) => {
self.0.push_level(pt.lock());
}
Child::None => {
let pt = PageTableNode::<E, C>::alloc(
cur_level - 1,
MapTrackingStatus::Untracked,
);
let _ = cur_entry.replace(Child::PageTable(pt.clone_raw()));
self.0.push_level(pt);
}
Child::Page(_, _) => {
panic!("Mapping a smaller page in an already mapped huge page");
}
Child::Untracked(_, _, _) => {
let split_child = cur_entry.split_if_untracked_huge().unwrap();
self.0.push_level(split_child);
}
}
continue;
}
// Map the current page.
debug_assert!(!self.0.should_map_as_tracked());
let idx = self.0.cur_idx();
let level = self.0.level;
let _ = self
.cur_node_mut()
.replace_child(idx, Child::Untracked(pa, level, prop));
.0
.cur_entry()
.replace(Child::Untracked(pa, level, prop));
let level = self.0.level;
// Move forward.
pa += page_size::<C>(level);
self.0.move_forward();
}
@ -575,10 +579,12 @@ where
assert!(end <= self.0.barrier_va.end);
while self.0.va < end {
let cur_pte = self.0.read_cur_pte();
let cur_va = self.0.va;
let cur_level = self.0.level;
let cur_entry = self.0.cur_entry();
// Skip if it is already absent.
if !cur_pte.is_present() {
if cur_entry.is_none() {
if self.0.va + page_size::<C>(self.0.level) > end {
self.0.va = end;
break;
@ -587,54 +593,53 @@ where
continue;
}
if self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end
{
if cur_pte.is_last(self.0.level) {
if !self.0.should_map_as_tracked() {
// Level down if we are removing part of a huge untracked page.
self.level_down_split();
continue;
// Go down if not applicable.
if cur_va % page_size::<C>(cur_level) != 0 || cur_va + page_size::<C>(cur_level) > end {
let child = cur_entry.to_owned();
match child {
Child::PageTable(pt) => {
let pt = pt.lock();
// If there's no mapped PTEs in the next level, we can
// skip to save time.
if pt.nr_children() != 0 {
self.0.push_level(pt);
} else {
panic!("removing part of a huge page");
if self.0.va + page_size::<C>(self.0.level) > end {
self.0.va = end;
break;
}
}
// Level down if the current PTE points to a page table and we cannot
// unmap this page table node entirely.
self.0.level_down();
// We have got down a level. If there's no mapped PTEs in
// the current node, we can go back and skip to save time.
if self.0.guards[(self.0.level - 1) as usize]
.as_ref()
.unwrap()
.nr_children()
== 0
{
self.0.level_up();
self.0.move_forward();
}
}
Child::None => {
unreachable!("Already checked");
}
Child::Page(_, _) => {
panic!("Removing part of a huge page");
}
Child::Untracked(_, _, _) => {
let split_child = cur_entry.split_if_untracked_huge().unwrap();
self.0.push_level(split_child);
}
}
continue;
}
// Unmap the current page and return it.
let idx = self.0.cur_idx();
let ret = self.cur_node_mut().replace_child(idx, Child::None);
let ret_page_va = self.0.va;
let old = cur_entry.replace(Child::None);
self.0.move_forward();
return match ret {
return match old {
Child::Page(page, prop) => PageTableItem::Mapped {
va: ret_page_va,
va: self.0.va,
page,
prop,
},
Child::Untracked(pa, level, prop) => {
debug_assert_eq!(level, self.0.level);
PageTableItem::MappedUntracked {
va: ret_page_va,
va: self.0.va,
pa,
len: page_size::<C>(level),
prop,
@ -684,51 +689,46 @@ where
assert!(end <= self.0.barrier_va.end);
while self.0.va < end {
let cur_pte = self.0.read_cur_pte();
if !cur_pte.is_present() {
let cur_va = self.0.va;
let cur_level = self.0.level;
let mut cur_entry = self.0.cur_entry();
// Skip if it is already absent.
if cur_entry.is_none() {
self.0.move_forward();
continue;
}
// Go down if it's not a last node.
if !cur_pte.is_last(self.0.level) {
self.0.level_down();
// We have got down a level. If there's no mapped PTEs in
// the current node, we can go back and skip to save time.
if self.0.guards[(self.0.level - 1) as usize]
.as_ref()
.unwrap()
.nr_children()
== 0
{
self.0.level_up();
// Go down if it's not a last entry.
if cur_entry.is_node() {
let Child::PageTable(pt) = cur_entry.to_owned() else {
unreachable!("Already checked");
};
let pt = pt.lock();
// If there's no mapped PTEs in the next level, we can
// skip to save time.
if pt.nr_children() != 0 {
self.0.push_level(pt);
} else {
self.0.move_forward();
}
continue;
}
// Go down if the page size is too big and we are protecting part
// of untracked huge pages.
if self.0.va % page_size::<C>(self.0.level) != 0
|| self.0.va + page_size::<C>(self.0.level) > end
{
if self.0.should_map_as_tracked() {
panic!("protecting part of a huge page");
} else {
self.level_down_split();
if cur_va % page_size::<C>(cur_level) != 0 || cur_va + page_size::<C>(cur_level) > end {
let split_child = cur_entry
.split_if_untracked_huge()
.expect("Protecting part of a huge page");
self.0.push_level(split_child);
continue;
}
}
let mut pte_prop = cur_pte.prop();
op(&mut pte_prop);
// Protect the current page.
cur_entry.protect(op);
let idx = self.0.cur_idx();
self.cur_node_mut().protect(idx, pte_prop);
let protected_va = self.0.va..self.0.va + page_size::<C>(self.0.level);
self.0.move_forward();
return Some(protected_va);
@ -777,47 +777,39 @@ where
assert!(src_end <= src.0.barrier_va.end);
while self.0.va < this_end && src.0.va < src_end {
let cur_pte = src.0.read_cur_pte();
if !cur_pte.is_present() {
let src_va = src.0.va;
let mut src_entry = src.0.cur_entry();
match src_entry.to_owned() {
Child::PageTable(pt) => {
let pt = pt.lock();
// If there's no mapped PTEs in the next level, we can
// skip to save time.
if pt.nr_children() != 0 {
src.0.push_level(pt);
} else {
src.0.move_forward();
}
continue;
}
Child::None => {
src.0.move_forward();
continue;
}
// Go down if it's not a last node.
if !cur_pte.is_last(src.0.level) {
src.0.level_down();
// We have got down a level. If there's no mapped PTEs in
// the current node, we can go back and skip to save time.
if src.0.guards[(src.0.level - 1) as usize]
.as_ref()
.unwrap()
.nr_children()
== 0
{
src.0.level_up();
src.0.move_forward();
}
continue;
Child::Untracked(_, _, _) => {
panic!("Copying untracked mappings");
}
Child::Page(page, mut prop) => {
let mapped_page_size = page.size();
// Do protection.
let mut pte_prop = cur_pte.prop();
op(&mut pte_prop);
let idx = src.0.cur_idx();
src.cur_node_mut().protect(idx, pte_prop);
src_entry.protect(op);
// Do copy.
let child = src.cur_node_mut().child(idx);
let Child::<E, C>::Page(page, prop) = child else {
panic!("Unexpected child for source mapping: {:#?}", child);
};
self.jump(src.0.va).unwrap();
let mapped_page_size = page.size();
op(&mut prop);
self.jump(src_va).unwrap();
let original = self.map(page, prop);
debug_assert!(original.is_none());
assert!(original.is_none());
// Only move the source cursor forward since `Self::map` will do it.
// This assertion is to ensure that they move by the same length.
@ -825,47 +817,6 @@ where
src.0.move_forward();
}
}
/// Goes down a level assuming the current slot is absent.
///
/// This method will create a new child page table node and go down to it.
fn level_down_create(&mut self) {
debug_assert!(self.0.level > 1);
let new_node = PageTableNode::<E, C>::alloc(
self.0.level - 1,
if self.0.should_map_as_tracked() {
MapTrackingStatus::Tracked
} else {
MapTrackingStatus::Untracked
},
);
let idx = self.0.cur_idx();
let old = self
.cur_node_mut()
.replace_child(idx, Child::PageTable(new_node.clone_raw()));
debug_assert!(old.is_none());
self.0.level -= 1;
self.0.guards[(self.0.level - 1) as usize] = Some(new_node);
}
/// Goes down a level assuming the current slot is an untracked huge page.
///
/// This method will split the huge page and go down to the next level.
fn level_down_split(&mut self) {
debug_assert!(self.0.level > 1);
debug_assert!(!self.0.should_map_as_tracked());
let idx = self.0.cur_idx();
self.cur_node_mut().split_untracked_huge(idx);
let Child::PageTable(new_node) = self.0.cur_child() else {
unreachable!();
};
self.0.level -= 1;
self.0.guards[(self.0.level - 1) as usize] = Some(new_node.lock());
}
fn cur_node_mut(&mut self) -> &mut PageTableNode<E, C> {
self.0.guards[(self.0.level - 1) as usize].as_mut().unwrap()
}
}
}

View File

@ -100,7 +100,7 @@ impl PageTable<KernelMode> {
/// This should be the only way to create the user page table, that is to
/// duplicate the kernel page table with all the kernel mappings shared.
pub fn create_user_page_table(&self) -> PageTable<UserMode> {
let root_node = self.root.clone_shallow().lock();
let mut root_node = self.root.clone_shallow().lock();
let mut new_node =
PageTableNode::alloc(PagingConsts::NR_LEVELS, MapTrackingStatus::NotApplicable);
@ -108,9 +108,9 @@ impl PageTable<KernelMode> {
// The user space range is not copied.
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE {
let child = root_node.child(i);
if !child.is_none() {
let _ = new_node.replace_child(i, child);
let root_entry = root_node.entry(i);
if !root_entry.is_none() {
let _ = new_node.entry(i).replace(root_entry.to_owned());
}
}
@ -137,7 +137,8 @@ impl PageTable<KernelMode> {
let mut root_node = self.root.clone_shallow().lock();
for i in start..end {
if !root_node.read_pte(i).is_present() {
let root_entry = root_node.entry(i);
if root_entry.is_none() {
let nxt_level = PagingConsts::NR_LEVELS - 1;
let is_tracked = if super::kspace::should_map_as_tracked(
i * page_size::<PagingConsts>(nxt_level),
@ -147,7 +148,7 @@ impl PageTable<KernelMode> {
MapTrackingStatus::Untracked
};
let node = PageTableNode::alloc(nxt_level, is_tracked);
let _ = root_node.replace_child(i, Child::PageTable(node.into_raw()));
let _ = root_entry.replace(Child::PageTable(node.into_raw()));
}
}
}

View File

@ -8,10 +8,7 @@ use super::{PageTableEntryTrait, RawPageTableNode};
use crate::{
arch::mm::{PageTableEntry, PagingConsts},
mm::{
page::{
meta::{MapTrackingStatus, PageTablePageMeta},
DynPage, Page,
},
page::{inc_page_ref_count, meta::MapTrackingStatus, DynPage},
page_prop::PageProperty,
Paddr, PagingConstsTrait, PagingLevel,
},
@ -45,6 +42,27 @@ where
matches!(self, Child::None)
}
/// Returns whether the child is compatible with the given node.
///
/// In other words, it checks whether the child can be a child of a node
/// with the given level and tracking status.
pub(super) fn is_compatible(
&self,
node_level: PagingLevel,
is_tracked: MapTrackingStatus,
) -> bool {
match self {
Child::PageTable(pt) => node_level == pt.level() + 1,
Child::Page(p, _) => {
node_level == p.level() && is_tracked == MapTrackingStatus::Tracked
}
Child::Untracked(_, level, _) => {
node_level == *level && is_tracked == MapTrackingStatus::Untracked
}
Child::None => true,
}
}
/// Converts a child into a owning PTE.
///
/// By conversion it loses information about whether the page is tracked
@ -74,8 +92,10 @@ where
/// # Safety
///
/// The provided PTE must be originated from [`Child::into_pte`]. And the
/// provided information (level and tracking status) must align with the
/// lost information during the conversion.
/// provided information (level and tracking status) must be the same with
/// the lost information during the conversion. Strictly speaking, the
/// provided arguments must be compatible with the original child (
/// specified by [`Child::is_compatible`]).
///
/// This method should be only used no more than once for a PTE that has
/// been converted from a child using the [`Child::into_pte`] method.
@ -85,28 +105,34 @@ where
is_tracked: MapTrackingStatus,
) -> Self {
if !pte.is_present() {
Child::None
} else {
return Child::None;
}
let paddr = pte.paddr();
if !pte.is_last(level) {
Child::PageTable(RawPageTableNode::from_paddr(paddr))
} else {
// SAFETY: The physical address points to a valid page table node
// at the given level.
return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) });
}
match is_tracked {
MapTrackingStatus::Tracked => Child::Page(DynPage::from_raw(paddr), pte.prop()),
MapTrackingStatus::Tracked => {
// SAFETY: The physical address points to a valid page.
let page = unsafe { DynPage::from_raw(paddr) };
Child::Page(page, pte.prop())
}
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()),
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"),
}
}
}
}
/// Gains an extra owning reference to the child.
///
/// # Safety
///
/// The provided PTE must be originated from [`Child::into_pte`]. And the
/// provided information (level and tracking status) must align with the
/// lost information during the conversion.
/// The provided PTE must be originated from [`Child::into_pte`], which is
/// the same requirement as the [`Child::from_pte`] method.
///
/// This method must not be used with a PTE that has been restored to a
/// child using the [`Child::from_pte`] method.
@ -116,22 +142,31 @@ where
is_tracked: MapTrackingStatus,
) -> Self {
if !pte.is_present() {
Child::None
} else {
return Child::None;
}
let paddr = pte.paddr();
if !pte.is_last(level) {
Page::<PageTablePageMeta<E, C>>::inc_ref_count(paddr);
Child::PageTable(RawPageTableNode::from_paddr(paddr))
} else {
// SAFETY: The physical address is valid and the PTE already owns
// the reference to the page.
unsafe { inc_page_ref_count(paddr) };
// SAFETY: The physical address points to a valid page table node
// at the given level.
return Child::PageTable(unsafe { RawPageTableNode::from_raw_parts(paddr, level - 1) });
}
match is_tracked {
MapTrackingStatus::Tracked => {
DynPage::inc_ref_count(paddr);
Child::Page(DynPage::from_raw(paddr), pte.prop())
// SAFETY: The physical address is valid and the PTE already owns
// the reference to the page.
unsafe { inc_page_ref_count(paddr) };
// SAFETY: The physical address points to a valid page.
let page = unsafe { DynPage::from_raw(paddr) };
Child::Page(page, pte.prop())
}
MapTrackingStatus::Untracked => Child::Untracked(paddr, level, pte.prop()),
MapTrackingStatus::NotApplicable => panic!("Invalid tracking status"),
}
}
}
}
}

View File

@ -0,0 +1,161 @@
// SPDX-License-Identifier: MPL-2.0
//! This module provides accessors to the page table entries in a node.
use super::{Child, PageTableEntryTrait, PageTableNode};
use crate::mm::{
nr_subpage_per_huge, page::meta::MapTrackingStatus, page_prop::PageProperty, page_size,
PagingConstsTrait,
};
/// A view of an entry in a page table node.
///
/// It can be borrowed from a node using the [`PageTableNode::entry`] method.
///
/// This is a static reference to an entry in a node that does not account for
/// a dynamic reference count to the child. It can be used to create a owned
/// handle, which is a [`Child`].
pub(in crate::mm) struct Entry<'a, E: PageTableEntryTrait, C: PagingConstsTrait>
where
[(); C::NR_LEVELS as usize]:,
{
/// The page table entry.
///
/// We store the page table entry here to optimize the number of reads from
/// the node. We cannot hold a `&mut E` reference to the entry because that
/// other CPUs may modify the memory location for accessed/dirty bits. Such
/// accesses will violate the aliasing rules of Rust and cause undefined
/// behaviors.
pte: E,
/// The index of the entry in the node.
idx: usize,
/// The node that contains the entry.
node: &'a mut PageTableNode<E, C>,
}
impl<'a, E: PageTableEntryTrait, C: PagingConstsTrait> Entry<'a, E, C>
where
[(); C::NR_LEVELS as usize]:,
{
/// Returns if the entry does not map to anything.
pub(in crate::mm) fn is_none(&self) -> bool {
!self.pte.is_present()
}
/// Returns if the entry maps to a page table node.
pub(in crate::mm) fn is_node(&self) -> bool {
self.pte.is_present() && !self.pte.is_last(self.node.level())
}
/// Gets a owned handle to the child.
pub(in crate::mm) fn to_owned(&self) -> Child<E, C> {
// SAFETY: The entry structure represents an existent entry with the
// right node information.
unsafe { Child::clone_from_pte(&self.pte, self.node.level(), self.node.is_tracked()) }
}
/// Operates on the mapping properties of the entry.
///
/// It only modifies the properties if the entry is present.
// FIXME: in x86_64, you can protect a page with neither of the RWX
// permissions. This would make the page not accessible and leaked. Such a
// behavior is memory-safe but wrong. In RISC-V there's no problem.
pub(in crate::mm) fn protect(&mut self, op: &mut impl FnMut(&mut PageProperty)) {
if !self.pte.is_present() {
return;
}
let prop = self.pte.prop();
let mut new_prop = prop;
op(&mut new_prop);
if prop == new_prop {
return;
}
self.pte.set_prop(new_prop);
// SAFETY:
// 1. The index is within the bounds.
// 2. We replace the PTE with a new one, which differs only in
// `PageProperty`, so it is still compatible with the current
// page table node.
unsafe { self.node.write_pte(self.idx, self.pte) };
}
/// Replaces the entry with a new child.
///
/// The old child is returned.
///
/// # Panics
///
/// The method panics if the given child is not compatible with the node.
/// The compatibility is specified by the [`Child::is_compatible`].
pub(in crate::mm) fn replace(self, new_child: Child<E, C>) -> Child<E, C> {
assert!(new_child.is_compatible(self.node.level(), self.node.is_tracked()));
// SAFETY: The entry structure represents an existent entry with the
// right node information. The old PTE is overwritten by the new child
// so that it is not used anymore.
let old_child =
unsafe { Child::from_pte(self.pte, self.node.level(), self.node.is_tracked()) };
if old_child.is_none() && !new_child.is_none() {
*self.node.nr_children_mut() += 1;
} else if !old_child.is_none() && new_child.is_none() {
*self.node.nr_children_mut() -= 1;
}
// SAFETY:
// 1. The index is within the bounds.
// 2. The new PTE is compatible with the page table node, as asserted above.
unsafe { self.node.write_pte(self.idx, new_child.into_pte()) };
old_child
}
/// Splits the entry to smaller pages if it maps to a untracked huge page.
///
/// If the entry does map to a untracked huge page, it is split into smaller
/// pages mapped by a child page table node. The new child page table node
/// is returned.
///
/// If the entry does not map to a untracked huge page, the method returns
/// `None`.
pub(in crate::mm) fn split_if_untracked_huge(self) -> Option<PageTableNode<E, C>> {
let level = self.node.level();
if !(self.pte.is_last(level)
&& level > 1
&& self.node.is_tracked() == MapTrackingStatus::Untracked)
{
return None;
}
let pa = self.pte.paddr();
let prop = self.pte.prop();
let mut new_page = PageTableNode::<E, C>::alloc(level - 1, MapTrackingStatus::Untracked);
for i in 0..nr_subpage_per_huge::<C>() {
let small_pa = pa + i * page_size::<C>(level - 1);
let _ = new_page
.entry(i)
.replace(Child::Untracked(small_pa, level - 1, prop));
}
let _ = self.replace(Child::PageTable(new_page.clone_raw()));
Some(new_page)
}
/// Create a new entry at the node.
///
/// # Safety
///
/// The caller must ensure that the index is within the bounds of the node.
pub(super) unsafe fn new_at(node: &'a mut PageTableNode<E, C>, idx: usize) -> Self {
// SAFETY: The index is within the bound.
let pte = unsafe { node.read_pte(idx) };
Self { pte, idx, node }
}
}

View File

@ -26,22 +26,21 @@
//!
mod child;
mod entry;
use core::{marker::PhantomData, mem::ManuallyDrop, panic, sync::atomic::Ordering};
use core::{marker::PhantomData, mem::ManuallyDrop, sync::atomic::Ordering};
pub(in crate::mm) use child::Child;
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
pub(in crate::mm) use self::{child::Child, entry::Entry};
use super::{nr_subpage_per_huge, PageTableEntryTrait};
use crate::{
arch::mm::{PageTableEntry, PagingConsts},
mm::{
paddr_to_vaddr,
page::{
self,
self, inc_page_ref_count,
meta::{MapTrackingStatus, PageMeta, PageTablePageMeta, PageUsage},
DynPage, Page,
},
page_prop::PageProperty,
Paddr, PagingConstsTrait, PagingLevel, PAGE_SIZE,
},
};
@ -60,6 +59,7 @@ where
[(); C::NR_LEVELS as usize]:,
{
raw: Paddr,
level: PagingLevel,
_phantom: PhantomData<(E, C)>,
}
@ -71,8 +71,13 @@ where
self.raw
}
pub(super) fn level(&self) -> PagingLevel {
self.level
}
/// Converts a raw handle to an accessible handle by pertaining the lock.
pub(super) fn lock(self) -> PageTableNode<E, C> {
let level = self.level;
let page: Page<PageTablePageMeta<E, C>> = self.into();
// Acquire the lock.
@ -85,6 +90,8 @@ where
core::hint::spin_loop();
}
debug_assert_eq!(page.meta().level, level);
PageTableNode::<E, C> { page }
}
@ -94,6 +101,7 @@ where
Self {
raw: self.raw,
level: self.level,
_phantom: PhantomData,
}
}
@ -110,12 +118,18 @@ where
/// The caller must ensure that the page table to be activated has
/// proper mappings for the kernel and has the correct const parameters
/// matching the current CPU.
///
/// # Panics
///
/// Only top-level page tables can be activated using this function.
pub(crate) unsafe fn activate(&self) {
use crate::{
arch::mm::{activate_page_table, current_page_table_paddr},
mm::CachePolicy,
};
assert_eq!(self.level, C::NR_LEVELS);
let last_activated_paddr = current_page_table_paddr();
if last_activated_paddr == self.raw {
@ -130,6 +144,7 @@ where
// Restore and drop the last activated page table.
drop(Self {
raw: last_activated_paddr,
level: C::NR_LEVELS,
_phantom: PhantomData,
});
}
@ -150,20 +165,21 @@ where
// SAFETY: We have a reference count to the page and can safely increase the reference
// count by one more.
unsafe {
Page::<PageTablePageMeta<E, C>>::inc_ref_count(self.paddr());
inc_page_ref_count(self.paddr());
}
}
/// Restore the handle to a page table node from a physical address.
/// Restores the handle from the physical address and level.
///
/// # Safety
///
/// The caller must ensure that the physical address is valid and points to
/// a forgotten page table node. A forgotten page table node can only be
/// restored once.
unsafe fn from_paddr(paddr: Paddr) -> Self {
/// restored once. The level must match the level of the page table node.
unsafe fn from_raw_parts(paddr: Paddr, level: PagingLevel) -> Self {
Self {
raw: paddr,
level,
_phantom: PhantomData,
}
}
@ -215,6 +231,28 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTableNode<E, C>
where
[(); C::NR_LEVELS as usize]:,
{
/// Borrows an entry in the node at a given index.
///
/// # Panics
///
/// Panics if the index is not within the bound of
/// [`nr_subpage_per_huge<C>`].
pub(super) fn entry(&mut self, idx: usize) -> Entry<'_, E, C> {
assert!(idx < nr_subpage_per_huge::<C>());
// SAFETY: The index is within the bound.
unsafe { Entry::new_at(self, idx) }
}
/// Gets the level of the page table node.
pub(super) fn level(&self) -> PagingLevel {
self.page.meta().level
}
/// Gets the tracking status of the page table node.
pub(super) fn is_tracked(&self) -> MapTrackingStatus {
self.page.meta().is_tracked
}
/// Allocates a new empty page table node.
///
/// This function returns an owning handle. The newly created handle does not
@ -234,148 +272,70 @@ where
Self { page }
}
pub fn level(&self) -> PagingLevel {
self.page.meta().level
}
pub fn is_tracked(&self) -> MapTrackingStatus {
self.page.meta().is_tracked
}
/// Converts the handle into a raw handle to be stored in a PTE or CPU.
pub(super) fn into_raw(self) -> RawPageTableNode<E, C> {
let this = ManuallyDrop::new(self);
let raw = this.page.paddr();
// Release the lock.
this.page.meta().lock.store(0, Ordering::Release);
RawPageTableNode {
raw,
_phantom: PhantomData,
}
// SAFETY: The provided physical address is valid and the level is
// correct. The reference count is not changed.
unsafe { RawPageTableNode::from_raw_parts(this.page.paddr(), this.page.meta().level) }
}
/// Gets a raw handle while still preserving the original handle.
pub(super) fn clone_raw(&self) -> RawPageTableNode<E, C> {
core::mem::forget(self.page.clone());
let page = ManuallyDrop::new(self.page.clone());
RawPageTableNode {
raw: self.page.paddr(),
_phantom: PhantomData,
}
// SAFETY: The provided physical address is valid and the level is
// correct. The reference count is increased by one.
unsafe { RawPageTableNode::from_raw_parts(page.paddr(), page.meta().level) }
}
/// Gets an extra reference of the child at the given index.
pub(super) fn child(&self, idx: usize) -> Child<E, C> {
debug_assert!(idx < nr_subpage_per_huge::<C>());
let pte = self.read_pte(idx);
// SAFETY: The PTE is read from this page table node so the information
// recorded in this page table is correct.
unsafe { Child::clone_from_pte(&pte, self.level(), self.is_tracked()) }
/// Gets the number of valid PTEs in the node.
pub(super) fn nr_children(&self) -> u16 {
// SAFETY: The lock is held so we have an exclusive access.
unsafe { *self.page.meta().nr_children.get() }
}
/// Replace the child at the given index with a new child.
/// Reads a non-owning PTE at the given index.
///
/// The old child is returned. The new child must match the level of the page
/// table node and the tracking status of the page table node.
pub(super) fn replace_child(&mut self, idx: usize, new_child: Child<E, C>) -> Child<E, C> {
// It should be ensured by the cursor.
#[cfg(debug_assertions)]
match &new_child {
Child::PageTable(_) => {
debug_assert!(self.level() > 1);
}
Child::Page(p, _) => {
debug_assert!(self.level() == p.level());
debug_assert!(self.is_tracked() == MapTrackingStatus::Tracked);
}
Child::Untracked(_, level, _) => {
debug_assert!(self.level() == *level);
debug_assert!(self.is_tracked() == MapTrackingStatus::Untracked);
}
Child::None => {}
}
let pte = self.read_pte(idx);
// SAFETY: The PTE is read from this page table node so the information
// provided is correct. The PTE is not restored twice.
let old_child = unsafe { Child::from_pte(pte, self.level(), self.is_tracked()) };
if old_child.is_none() && !new_child.is_none() {
*self.nr_children_mut() += 1;
} else if !old_child.is_none() && new_child.is_none() {
*self.nr_children_mut() -= 1;
}
self.write_pte(idx, new_child.into_pte());
old_child
}
/// Splits the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
// These should be ensured by the cursor.
/// A non-owning PTE means that it does not account for a reference count
/// of the a page if the PTE points to a page. The original PTE still owns
/// the child page.
///
/// # Safety
///
/// The caller must ensure that the index is within the bound.
unsafe fn read_pte(&self, idx: usize) -> E {
debug_assert!(idx < nr_subpage_per_huge::<C>());
debug_assert!(self.level() > 1);
let Child::Untracked(pa, level, prop) = self.child(idx) else {
panic!("`split_untracked_huge` not called on an untracked huge page");
};
debug_assert_eq!(level, self.level());
let mut new_page = PageTableNode::<E, C>::alloc(level - 1, MapTrackingStatus::Untracked);
for i in 0..nr_subpage_per_huge::<C>() {
let small_pa = pa + i * page_size::<C>(level - 1);
new_page.replace_child(i, Child::Untracked(small_pa, level - 1, prop));
}
self.replace_child(idx, Child::PageTable(new_page.into_raw()));
}
/// Protects an already mapped child at a given index.
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) {
let mut pte = self.read_pte(idx);
debug_assert!(pte.is_present()); // This should be ensured by the cursor.
pte.set_prop(prop);
self.write_pte(idx, pte);
}
pub(super) fn read_pte(&self, idx: usize) -> E {
// It should be ensured by the cursor.
debug_assert!(idx < nr_subpage_per_huge::<C>());
let ptr = paddr_to_vaddr(self.page.paddr()) as *const E;
// SAFETY: the index is within the bound and PTE is plain-old-data.
// SAFETY: The index is within the bound and the PTE is plain-old-data.
unsafe { ptr.add(idx).read() }
}
/// Writes a page table entry at a given index.
///
/// This operation will leak the old child if the PTE is present.
fn write_pte(&mut self, idx: usize, pte: E) {
// It should be ensured by the cursor.
/// This operation will leak the old child if the old PTE is present.
///
/// The child represented by the given PTE will handover the ownership to
/// the node. The PTE will be rendered invalid after this operation.
///
/// # Safety
///
/// The caller must ensure that:
/// 1. The index must be within the bound;
/// 2. The PTE must represent a child compatible with this page table node
/// (see [`Child::is_compatible`]).
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
debug_assert!(idx < nr_subpage_per_huge::<C>());
let ptr = paddr_to_vaddr(self.page.paddr()) as *mut E;
// SAFETY: the index is within the bound and PTE is plain-old-data.
unsafe { ptr.add(idx).write(pte) };
}
/// The number of valid PTEs.
pub(super) fn nr_children(&self) -> u16 {
// SAFETY: The lock is held so there is no mutable reference to it.
// It would be safe to read.
unsafe { *self.page.meta().nr_children.get() }
// SAFETY: The index is within the bound and the PTE is plain-old-data.
unsafe { ptr.add(idx).write(pte) }
}
/// Gets the mutable reference to the number of valid PTEs in the node.
fn nr_children_mut(&mut self) -> &mut u16 {
// SAFETY: The lock is held so we have an exclusive access.
unsafe { &mut *self.page.meta().nr_children.get() }
@ -399,6 +359,13 @@ where
const USAGE: PageUsage = PageUsage::PageTable;
fn on_drop(page: &mut Page<Self>) {
// SAFETY: This is the last reference so we have an exclusive access.
let nr_children = unsafe { *page.meta().nr_children.get() };
if nr_children == 0 {
return;
}
let paddr = page.paddr();
let level = page.meta().level;
let is_tracked = page.meta().is_tracked;