mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-17 12:47:16 +00:00
773 lines
28 KiB
Rust
773 lines
28 KiB
Rust
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
//! The page table cursor for mapping and querying over the page table.
|
|
//!
|
|
//! ## The page table lock protocol
|
|
//!
|
|
//! We provide a fine-grained lock protocol to allow concurrent accesses to
|
|
//! the page table. The protocol is originally proposed by Ruihan Li
|
|
//! <lrh2000@pku.edu.cn>.
|
|
//!
|
|
//! [`CursorMut::new`] accepts an address range, which indicates the page table
|
|
//! entries that may be visited by this cursor.
|
|
//!
|
|
//! Then, [`CursorMut::new`] finds an intermediate page table (not necessarily
|
|
//! the last-level or the top-level) which represents an address range that contains
|
|
//! the whole specified address range. It requires all locks from the root page
|
|
//! table to the intermediate page table, but then unlocks all locks excluding the
|
|
//! one for the intermediate page table. CursorMut then maintains the lock
|
|
//! guards from one for the intermediate page table to the leaf that the cursor is
|
|
//! currently manipulating.
|
|
//!
|
|
//! For example, if we're going to map the address range shown below:
|
|
//!
|
|
//! ```plain
|
|
//! Top-level page table node A
|
|
//! /
|
|
//! B
|
|
//! / \
|
|
//! Last-level page table nodes C D
|
|
//! Last-level PTEs ---**...**---
|
|
//! \__ __/
|
|
//! V
|
|
//! Address range that we're going to map
|
|
//! ```
|
|
//!
|
|
//! When calling [`CursorMut::new`], it will:
|
|
//! 1. `lock(A)`, `lock(B)`, `unlock(A)`;
|
|
//! 2. `guards = [ locked(B) ]`.
|
|
//!
|
|
//! When calling [`CursorMut::map`], it will:
|
|
//! 1. `lock(C)`, `guards = [ locked(B), locked(C) ]`;
|
|
//! 2. Map some pages in `C`;
|
|
//! 3. `unlock(C)`, `lock_guard = [ locked(B) ]`;
|
|
//! 4. `lock(D)`, `lock_guard = [ locked(B), locked(D) ]`;
|
|
//! 5. Map some pages in D;
|
|
//! 6. `unlock(D)`, `lock_guard = [ locked(B) ]`;
|
|
//!
|
|
//!
|
|
//! ## Validity
|
|
//!
|
|
//! The page table cursor API will guarantee that the page table, as a data
|
|
//! structure, whose occupied memory will not suffer from data races. This is
|
|
//! ensured by the page table lock protocol. In other words, any operations
|
|
//! provided by the APIs (as long as safety requirements are met) will not
|
|
//! break the page table data structure (or other memory).
|
|
//!
|
|
//! However, the page table cursor creation APIs, [`CursorMut::new`] or
|
|
//! [`Cursor::new`], do not guarantee exclusive access to the virtual address
|
|
//! area you claim. From the lock protocol, you can see that there are chances
|
|
//! to create 2 cursors that claim the same virtual address range (one covers
|
|
//! another). In this case, the greater cursor may block if it wants to modify
|
|
//! the page table entries covered by the smaller cursor. Also, if the greater
|
|
//! cursor destructs the smaller cursor's parent page table node, it won't block
|
|
//! and the smaller cursor's change will not be visible. The user of the page
|
|
//! table cursor should add additional entry point checks to prevent these defined
|
|
//! behaviors if they are not wanted.
|
|
|
|
use core::{any::TypeId, marker::PhantomData, ops::Range};
|
|
|
|
use align_ext::AlignExt;
|
|
|
|
use super::{
|
|
page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
|
|
PageTableMode, PageTableNode, PagingConstsTrait, PagingLevel, UserMode,
|
|
};
|
|
use crate::mm::{page::DynPage, Paddr, PageProperty, Vaddr};
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum PageTableItem {
|
|
NotMapped {
|
|
va: Vaddr,
|
|
len: usize,
|
|
},
|
|
Mapped {
|
|
va: Vaddr,
|
|
page: DynPage,
|
|
prop: PageProperty,
|
|
},
|
|
#[allow(dead_code)]
|
|
MappedUntracked {
|
|
va: Vaddr,
|
|
pa: Paddr,
|
|
len: usize,
|
|
prop: PageProperty,
|
|
},
|
|
}
|
|
|
|
/// The cursor for traversal over the page table.
|
|
///
|
|
/// A slot is a PTE at any levels, which correspond to a certain virtual
|
|
/// memory range sized by the "page size" of the current level.
|
|
///
|
|
/// A cursor is able to move to the next slot, to read page properties,
|
|
/// and even to jump to a virtual address directly. We use a guard stack to
|
|
/// simulate the recursion, and adpot a page table locking protocol to
|
|
/// provide concurrency.
|
|
#[derive(Debug)]
|
|
pub struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
|
|
where
|
|
[(); C::NR_LEVELS as usize]:,
|
|
{
|
|
/// The lock guards of the cursor. The level 1 page table lock guard is at
|
|
/// index 0, and the level N page table lock guard is at index N - 1.
|
|
///
|
|
/// When destructing the cursor, the locks will be released in the order
|
|
/// from low to high, exactly the reverse order of the acquisition.
|
|
/// This behavior is ensured by the default drop implementation of Rust:
|
|
/// <https://doc.rust-lang.org/reference/destructors.html>.
|
|
guards: [Option<PageTableNode<E, C>>; C::NR_LEVELS as usize],
|
|
/// The level of the page table that the cursor points to.
|
|
level: PagingLevel,
|
|
/// From `guard_level` to `level`, the locks are held in `guards`.
|
|
guard_level: PagingLevel,
|
|
/// The current virtual address that the cursor points to.
|
|
va: Vaddr,
|
|
/// The virtual address range that is locked.
|
|
barrier_va: Range<Vaddr>,
|
|
phantom: PhantomData<&'a PageTable<M, E, C>>,
|
|
}
|
|
|
|
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C>
|
|
where
|
|
[(); C::NR_LEVELS as usize]:,
|
|
{
|
|
/// Creates a cursor claiming the read access for the given range.
|
|
///
|
|
/// The cursor created will only be able to query or jump within the given
|
|
/// range. Out-of-bound accesses will result in panics or errors as return values,
|
|
/// depending on the access method.
|
|
///
|
|
/// Note that this function does not ensure exclusive access to the claimed
|
|
/// virtual address range. The accesses using this cursor may block or fail.
|
|
pub fn new(pt: &'a PageTable<M, E, C>, va: &Range<Vaddr>) -> Result<Self, PageTableError> {
|
|
if !M::covers(va) {
|
|
return Err(PageTableError::InvalidVaddrRange(va.start, va.end));
|
|
}
|
|
if va.start % C::BASE_PAGE_SIZE != 0 || va.end % C::BASE_PAGE_SIZE != 0 {
|
|
return Err(PageTableError::UnalignedVaddr);
|
|
}
|
|
|
|
// Create a guard array that only hold the root node lock.
|
|
let guards = core::array::from_fn(|i| {
|
|
if i == (C::NR_LEVELS - 1) as usize {
|
|
Some(pt.root.clone_shallow().lock())
|
|
} else {
|
|
None
|
|
}
|
|
});
|
|
let mut cursor = Self {
|
|
guards,
|
|
level: C::NR_LEVELS,
|
|
guard_level: C::NR_LEVELS,
|
|
va: va.start,
|
|
barrier_va: va.clone(),
|
|
phantom: PhantomData,
|
|
};
|
|
|
|
// Go down and get proper locks. The cursor should hold a lock of a
|
|
// page table node containing the virtual address range.
|
|
//
|
|
// While going down, previous guards of too-high levels will be released.
|
|
loop {
|
|
let level_too_high = {
|
|
let start_idx = pte_index::<C>(va.start, cursor.level);
|
|
let end_idx = pte_index::<C>(va.end - 1, cursor.level);
|
|
start_idx == end_idx
|
|
};
|
|
if !level_too_high {
|
|
break;
|
|
}
|
|
|
|
let cur_pte = cursor.read_cur_pte();
|
|
if !cur_pte.is_present() || cur_pte.is_last(cursor.level) {
|
|
break;
|
|
}
|
|
|
|
cursor.level_down();
|
|
|
|
// Release the guard of the previous (upper) level.
|
|
cursor.guards[cursor.level as usize] = None;
|
|
cursor.guard_level -= 1;
|
|
}
|
|
|
|
Ok(cursor)
|
|
}
|
|
|
|
/// Gets the information of the current slot.
|
|
pub fn query(&mut self) -> Result<PageTableItem, PageTableError> {
|
|
if self.va >= self.barrier_va.end {
|
|
return Err(PageTableError::InvalidVaddr(self.va));
|
|
}
|
|
|
|
loop {
|
|
let level = self.level;
|
|
let va = self.va;
|
|
|
|
let pte = self.read_cur_pte();
|
|
if !pte.is_present() {
|
|
return Ok(PageTableItem::NotMapped {
|
|
va,
|
|
len: page_size::<C>(level),
|
|
});
|
|
}
|
|
if !pte.is_last(level) {
|
|
self.level_down();
|
|
continue;
|
|
}
|
|
|
|
match self.cur_child() {
|
|
Child::Page(page) => {
|
|
return Ok(PageTableItem::Mapped {
|
|
va,
|
|
page,
|
|
prop: pte.prop(),
|
|
});
|
|
}
|
|
Child::Untracked(pa) => {
|
|
return Ok(PageTableItem::MappedUntracked {
|
|
va,
|
|
pa,
|
|
len: page_size::<C>(level),
|
|
prop: pte.prop(),
|
|
});
|
|
}
|
|
Child::None | Child::PageTable(_) => {
|
|
unreachable!(); // Already checked with the PTE.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Traverses forward in the current level to the next PTE.
|
|
///
|
|
/// If reached the end of a page table node, it leads itself up to the next page of the parent
|
|
/// page if possible.
|
|
pub(in crate::mm) fn move_forward(&mut self) {
|
|
let page_size = page_size::<C>(self.level);
|
|
let next_va = self.va.align_down(page_size) + page_size;
|
|
while self.level < self.guard_level && pte_index::<C>(next_va, self.level) == 0 {
|
|
self.level_up();
|
|
}
|
|
self.va = next_va;
|
|
}
|
|
|
|
/// Jumps to the given virtual address.
|
|
/// If the target address is out of the range, this method will return `Err`.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This method panics if the address has bad alignment.
|
|
pub fn jump(&mut self, va: Vaddr) -> Result<(), PageTableError> {
|
|
assert!(va % C::BASE_PAGE_SIZE == 0);
|
|
if !self.barrier_va.contains(&va) {
|
|
return Err(PageTableError::InvalidVaddr(va));
|
|
}
|
|
|
|
loop {
|
|
let cur_node_start = self.va & !(page_size::<C>(self.level + 1) - 1);
|
|
let cur_node_end = cur_node_start + page_size::<C>(self.level + 1);
|
|
// If the address is within the current node, we can jump directly.
|
|
if cur_node_start <= va && va < cur_node_end {
|
|
self.va = va;
|
|
return Ok(());
|
|
}
|
|
|
|
// There is a corner case that the cursor is depleted, sitting at the start of the
|
|
// next node but the next node is not locked because the parent is not locked.
|
|
if self.va >= self.barrier_va.end && self.level == self.guard_level {
|
|
self.va = va;
|
|
return Ok(());
|
|
}
|
|
|
|
debug_assert!(self.level < self.guard_level);
|
|
self.level_up();
|
|
}
|
|
}
|
|
|
|
pub fn virt_addr(&self) -> Vaddr {
|
|
self.va
|
|
}
|
|
|
|
/// Goes up a level. We release the current page if it has no mappings since the cursor only moves
|
|
/// forward. And if needed we will do the final cleanup using this method after re-walk when the
|
|
/// cursor is dropped.
|
|
///
|
|
/// This method requires locks acquired before calling it. The discarded level will be unlocked.
|
|
fn level_up(&mut self) {
|
|
self.guards[(self.level - 1) as usize] = None;
|
|
self.level += 1;
|
|
|
|
// TODO: Drop page tables if page tables become empty.
|
|
}
|
|
|
|
/// Goes down a level assuming a child page table exists.
|
|
fn level_down(&mut self) {
|
|
debug_assert!(self.level > 1);
|
|
|
|
let Child::PageTable(nxt_lvl_ptn) = self.cur_child() else {
|
|
panic!("Trying to level down when it is not mapped to a page table");
|
|
};
|
|
|
|
let nxt_lvl_ptn_locked = nxt_lvl_ptn.lock();
|
|
|
|
self.level -= 1;
|
|
debug_assert_eq!(self.level, nxt_lvl_ptn_locked.level());
|
|
|
|
self.guards[(self.level - 1) as usize] = Some(nxt_lvl_ptn_locked);
|
|
}
|
|
|
|
fn cur_node(&self) -> &PageTableNode<E, C> {
|
|
self.guards[(self.level - 1) as usize].as_ref().unwrap()
|
|
}
|
|
|
|
fn cur_idx(&self) -> usize {
|
|
pte_index::<C>(self.va, self.level)
|
|
}
|
|
|
|
fn cur_child(&self) -> Child<E, C> {
|
|
self.cur_node()
|
|
.child(self.cur_idx(), self.in_tracked_range())
|
|
}
|
|
|
|
fn read_cur_pte(&self) -> E {
|
|
self.cur_node().read_pte(self.cur_idx())
|
|
}
|
|
|
|
/// Tells if the current virtual range must contain untracked mappings.
|
|
///
|
|
/// _Tracked mappings_ means that the mapped physical addresses (in PTEs) points to pages
|
|
/// tracked by the metadata system. _Tracked mappings_ must be created with page handles.
|
|
/// While _untracked mappings_ solely maps to plain physical addresses.
|
|
///
|
|
/// In the kernel mode, this is aligned with the definition in [`crate::mm::kspace`].
|
|
/// Only linear mappings in the kernel should be considered as untracked mappings.
|
|
///
|
|
/// All mappings in the user mode are tracked. And all mappings in the IOMMU
|
|
/// page table are untracked.
|
|
fn in_tracked_range(&self) -> bool {
|
|
TypeId::of::<M>() == TypeId::of::<UserMode>()
|
|
|| TypeId::of::<M>() == TypeId::of::<KernelMode>()
|
|
&& !crate::mm::kspace::LINEAR_MAPPING_VADDR_RANGE.contains(&self.va)
|
|
}
|
|
}
|
|
|
|
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator
|
|
for Cursor<'a, M, E, C>
|
|
where
|
|
[(); C::NR_LEVELS as usize]:,
|
|
{
|
|
type Item = PageTableItem;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let result = self.query();
|
|
if result.is_ok() {
|
|
self.move_forward();
|
|
}
|
|
result.ok()
|
|
}
|
|
}
|
|
|
|
/// The cursor of a page table that is capable of map, unmap or protect pages.
|
|
///
|
|
/// Also, it has all the capabilities of a [`Cursor`]. A virtual address range
|
|
/// in a page table can only be accessed by one cursor whether it is mutable or not.
|
|
#[derive(Debug)]
|
|
pub struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>(
|
|
Cursor<'a, M, E, C>,
|
|
)
|
|
where
|
|
[(); C::NR_LEVELS as usize]:;
|
|
|
|
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C>
|
|
where
|
|
[(); C::NR_LEVELS as usize]:,
|
|
{
|
|
/// Creates a cursor claiming the write access for the given range.
|
|
///
|
|
/// The cursor created will only be able to map, query or jump within the given
|
|
/// range. Out-of-bound accesses will result in panics or errors as return values,
|
|
/// depending on the access method.
|
|
///
|
|
/// Note that this function, the same as [`Cursor::new`], does not ensure exclusive
|
|
/// access to the claimed virtual address range. The accesses using this cursor may
|
|
/// block or fail.
|
|
pub(super) fn new(
|
|
pt: &'a PageTable<M, E, C>,
|
|
va: &Range<Vaddr>,
|
|
) -> Result<Self, PageTableError> {
|
|
Cursor::new(pt, va).map(|inner| Self(inner))
|
|
}
|
|
|
|
/// Jumps to the given virtual address.
|
|
///
|
|
/// This is the same as [`Cursor::jump`].
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This method panics if the address is out of the range where the cursor is required to operate,
|
|
/// or has bad alignment.
|
|
pub fn jump(&mut self, va: Vaddr) -> Result<(), PageTableError> {
|
|
self.0.jump(va)
|
|
}
|
|
|
|
/// Gets the current virtual address.
|
|
pub fn virt_addr(&self) -> Vaddr {
|
|
self.0.virt_addr()
|
|
}
|
|
|
|
/// Gets the information of the current slot.
|
|
pub fn query(&mut self) -> Result<PageTableItem, PageTableError> {
|
|
self.0.query()
|
|
}
|
|
|
|
/// Maps the range starting from the current address to a [`DynPage`].
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This function will panic if
|
|
/// - the virtual address range to be mapped is out of the range;
|
|
/// - the alignment of the page is not satisfied by the virtual address;
|
|
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// The caller should ensure that the virtual range being mapped does
|
|
/// not affect kernel's memory safety.
|
|
pub unsafe fn map(&mut self, page: DynPage, prop: PageProperty) {
|
|
let end = self.0.va + page.size();
|
|
assert!(end <= self.0.barrier_va.end);
|
|
debug_assert!(self.0.in_tracked_range());
|
|
|
|
// Go down if not applicable.
|
|
while self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|
|
|| self.0.va % page_size::<C>(self.0.level) != 0
|
|
|| self.0.va + page_size::<C>(self.0.level) > end
|
|
{
|
|
let pte = self.0.read_cur_pte();
|
|
if pte.is_present() && !pte.is_last(self.0.level) {
|
|
self.0.level_down();
|
|
} else if !pte.is_present() {
|
|
self.level_down_create();
|
|
} else {
|
|
panic!("Mapping a smaller page in an already mapped huge page");
|
|
}
|
|
continue;
|
|
}
|
|
debug_assert_eq!(self.0.level, page.level());
|
|
|
|
// Map the current page.
|
|
let idx = self.0.cur_idx();
|
|
self.cur_node_mut().set_child_page(idx, page, prop);
|
|
self.0.move_forward();
|
|
}
|
|
|
|
/// Maps the range starting from the current address to a physical address range.
|
|
///
|
|
/// The function will map as more huge pages as possible, and it will split
|
|
/// the huge pages into smaller pages if necessary. If the input range is
|
|
/// large, the resulting mappings may look like this (if very huge pages
|
|
/// supported):
|
|
///
|
|
/// ```text
|
|
/// start end
|
|
/// |----|----------------|--------------------------------|----|----|
|
|
/// base huge very huge base base
|
|
/// 4KiB 2MiB 1GiB 4KiB 4KiB
|
|
/// ```
|
|
///
|
|
/// In practice it is not suggested to use this method for safety and conciseness.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This function will panic if
|
|
/// - the virtual address range to be mapped is out of the range.
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// The caller should ensure that
|
|
/// - the range being mapped does not affect kernel's memory safety;
|
|
/// - the physical address to be mapped is valid and safe to use;
|
|
/// - it is allowed to map untracked pages in this virtual address range.
|
|
pub unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: PageProperty) {
|
|
let end = self.0.va + pa.len();
|
|
let mut pa = pa.start;
|
|
assert!(end <= self.0.barrier_va.end);
|
|
|
|
while self.0.va < end {
|
|
// We ensure not mapping in reserved kernel shared tables or releasing it.
|
|
// Although it may be an invariant for all architectures and will be optimized
|
|
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
|
|
let is_kernel_shared_node =
|
|
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.0.level >= C::NR_LEVELS - 1;
|
|
if self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|
|
|| is_kernel_shared_node
|
|
|| self.0.va % page_size::<C>(self.0.level) != 0
|
|
|| self.0.va + page_size::<C>(self.0.level) > end
|
|
|| pa % page_size::<C>(self.0.level) != 0
|
|
{
|
|
let pte = self.0.read_cur_pte();
|
|
if pte.is_present() && !pte.is_last(self.0.level) {
|
|
self.0.level_down();
|
|
} else if !pte.is_present() {
|
|
self.level_down_create();
|
|
} else {
|
|
self.level_down_split();
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Map the current page.
|
|
debug_assert!(!self.0.in_tracked_range());
|
|
let idx = self.0.cur_idx();
|
|
self.cur_node_mut().set_child_untracked(idx, pa, prop);
|
|
|
|
let level = self.0.level;
|
|
pa += page_size::<C>(level);
|
|
self.0.move_forward();
|
|
}
|
|
}
|
|
|
|
/// Find and remove the first page in the cursor's following range.
|
|
///
|
|
/// The range to be found in is the current virtual address with the
|
|
/// provided length.
|
|
///
|
|
/// The function stops and yields the page if it has actually removed a
|
|
/// page, no matter if the following pages are also required to be unmapped.
|
|
/// The returned page is the virtual page that existed before the removal
|
|
/// but having just been unmapped.
|
|
///
|
|
/// It also makes the cursor moves forward to the next page after the
|
|
/// removed one, when an actual page is removed. If no mapped pages exist
|
|
/// in the following range, the cursor will stop at the end of the range
|
|
/// and return [`PageTableItem::NotMapped`].
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// The caller should ensure that the range being unmapped does not affect
|
|
/// kernel's memory safety.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This function will panic if the end range covers a part of a huge page
|
|
/// and the next page is that huge page.
|
|
pub unsafe fn take_next(&mut self, len: usize) -> PageTableItem {
|
|
let start = self.0.va;
|
|
assert!(len % page_size::<C>(1) == 0);
|
|
let end = start + len;
|
|
assert!(end <= self.0.barrier_va.end);
|
|
|
|
while self.0.va < end {
|
|
let cur_pte = self.0.read_cur_pte();
|
|
let is_tracked = self.0.in_tracked_range();
|
|
|
|
// Skip if it is already absent.
|
|
if !cur_pte.is_present() {
|
|
if self.0.va + page_size::<C>(self.0.level) > end {
|
|
self.0.va = end;
|
|
break;
|
|
}
|
|
self.0.move_forward();
|
|
continue;
|
|
}
|
|
|
|
// Level down if the current PTE points to a page table.
|
|
if !cur_pte.is_last(self.0.level) {
|
|
self.0.level_down();
|
|
|
|
// We have got down a level. If there's no mapped PTEs in
|
|
// the current node, we can go back and skip to save time.
|
|
if self.0.guards[(self.0.level - 1) as usize]
|
|
.as_ref()
|
|
.unwrap()
|
|
.nr_children()
|
|
== 0
|
|
{
|
|
self.0.level_up();
|
|
self.0.move_forward();
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Level down if we are removing part of a huge untracked page.
|
|
if self.0.va % page_size::<C>(self.0.level) != 0
|
|
|| self.0.va + page_size::<C>(self.0.level) > end
|
|
{
|
|
if !is_tracked {
|
|
self.level_down_split();
|
|
continue;
|
|
} else {
|
|
panic!("removing part of a huge page");
|
|
}
|
|
}
|
|
|
|
// Unmap the current page and return it.
|
|
let idx = self.0.cur_idx();
|
|
let ret = self.cur_node_mut().take_child(idx, is_tracked);
|
|
let ret_page_va = self.0.va;
|
|
let ret_page_size = page_size::<C>(self.0.level);
|
|
|
|
self.0.move_forward();
|
|
|
|
return match ret {
|
|
Child::Page(page) => PageTableItem::Mapped {
|
|
va: ret_page_va,
|
|
page,
|
|
prop: cur_pte.prop(),
|
|
},
|
|
Child::Untracked(pa) => PageTableItem::MappedUntracked {
|
|
va: ret_page_va,
|
|
pa,
|
|
len: ret_page_size,
|
|
prop: cur_pte.prop(),
|
|
},
|
|
Child::None | Child::PageTable(_) => unreachable!(),
|
|
};
|
|
}
|
|
|
|
// If the loop exits, we did not find any mapped pages in the range.
|
|
PageTableItem::NotMapped { va: start, len }
|
|
}
|
|
|
|
/// Applies the operation to the next slot of mapping within the range.
|
|
///
|
|
/// The range to be found in is the current virtual address with the
|
|
/// provided length.
|
|
///
|
|
/// The function stops and yields the actually protected range if it has
|
|
/// actually protected a page, no matter if the following pages are also
|
|
/// required to be protected.
|
|
///
|
|
/// It also makes the cursor moves forward to the next page after the
|
|
/// protected one. If no mapped pages exist in the following range, the
|
|
/// cursor will stop at the end of the range and return [`None`].
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// The caller should ensure that the range being protected with the
|
|
/// operation does not affect kernel's memory safety.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// This function will panic if:
|
|
/// - the range to be protected is out of the range where the cursor
|
|
/// is required to operate;
|
|
/// - the specified virtual address range only covers a part of a page.
|
|
pub unsafe fn protect_next(
|
|
&mut self,
|
|
len: usize,
|
|
op: &mut impl FnMut(&mut PageProperty),
|
|
) -> Option<Range<Vaddr>> {
|
|
let end = self.0.va + len;
|
|
assert!(end <= self.0.barrier_va.end);
|
|
|
|
while self.0.va < end {
|
|
let cur_pte = self.0.read_cur_pte();
|
|
if !cur_pte.is_present() {
|
|
self.0.move_forward();
|
|
continue;
|
|
}
|
|
|
|
// Go down if it's not a last node.
|
|
if !cur_pte.is_last(self.0.level) {
|
|
self.0.level_down();
|
|
|
|
// We have got down a level. If there's no mapped PTEs in
|
|
// the current node, we can go back and skip to save time.
|
|
if self.0.guards[(self.0.level - 1) as usize]
|
|
.as_ref()
|
|
.unwrap()
|
|
.nr_children()
|
|
== 0
|
|
{
|
|
self.0.level_up();
|
|
self.0.move_forward();
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Go down if the page size is too big and we are protecting part
|
|
// of untracked huge pages.
|
|
if self.0.va % page_size::<C>(self.0.level) != 0
|
|
|| self.0.va + page_size::<C>(self.0.level) > end
|
|
{
|
|
if self.0.in_tracked_range() {
|
|
panic!("protecting part of a huge page");
|
|
} else {
|
|
self.level_down_split();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
let mut pte_prop = cur_pte.prop();
|
|
op(&mut pte_prop);
|
|
|
|
let idx = self.0.cur_idx();
|
|
self.cur_node_mut().protect(idx, pte_prop);
|
|
let protected_va = self.0.va..self.0.va + page_size::<C>(self.0.level);
|
|
|
|
self.0.move_forward();
|
|
|
|
return Some(protected_va);
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
/// Consumes itself and leak the root guard for the caller if it locked the root level.
|
|
///
|
|
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
|
|
pub(super) fn leak_root_guard(mut self) -> Option<PageTableNode<E, C>> {
|
|
if self.0.guard_level != C::NR_LEVELS {
|
|
return None;
|
|
}
|
|
|
|
while self.0.level < C::NR_LEVELS {
|
|
self.0.level_up();
|
|
}
|
|
|
|
self.0.guards[(C::NR_LEVELS - 1) as usize].take()
|
|
|
|
// Ok to drop the cursor here because we ensure not to access the page table if the current
|
|
// level is the root level when running the dropping method.
|
|
}
|
|
|
|
/// Goes down a level assuming the current slot is absent.
|
|
///
|
|
/// This method will create a new child page table node and go down to it.
|
|
fn level_down_create(&mut self) {
|
|
debug_assert!(self.0.level > 1);
|
|
let new_node = PageTableNode::<E, C>::alloc(self.0.level - 1);
|
|
let idx = self.0.cur_idx();
|
|
let is_tracked = self.0.in_tracked_range();
|
|
self.cur_node_mut()
|
|
.set_child_pt(idx, new_node.clone_raw(), is_tracked);
|
|
self.0.level -= 1;
|
|
self.0.guards[(self.0.level - 1) as usize] = Some(new_node);
|
|
}
|
|
|
|
/// Goes down a level assuming the current slot is an untracked huge page.
|
|
///
|
|
/// This method will split the huge page and go down to the next level.
|
|
fn level_down_split(&mut self) {
|
|
debug_assert!(self.0.level > 1);
|
|
debug_assert!(!self.0.in_tracked_range());
|
|
|
|
let idx = self.0.cur_idx();
|
|
self.cur_node_mut().split_untracked_huge(idx);
|
|
|
|
let Child::PageTable(new_node) = self.0.cur_child() else {
|
|
unreachable!();
|
|
};
|
|
self.0.level -= 1;
|
|
self.0.guards[(self.0.level - 1) as usize] = Some(new_node.lock());
|
|
}
|
|
|
|
fn cur_node_mut(&mut self) -> &mut PageTableNode<E, C> {
|
|
self.0.guards[(self.0.level - 1) as usize].as_mut().unwrap()
|
|
}
|
|
}
|