mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-26 10:53:25 +00:00
Optimize the page table implementation using the frame metadata
This PR also refactored the page table cursor, distinguishing `Cursor` from `CursorMut`, and split a lot of functions to reduce dynamic condition checking. There are also other sanitizations performed, including refactoring PTE's `is_huge` API to `is_last`, hardening tracked mapping checks, and making `VmFrame` any size.
This commit is contained in:
committed by
Tate, Hongliang Tian
parent
69d464fc6b
commit
141fbeaf0c
@ -138,7 +138,7 @@ impl RootTable {
|
||||
if bus_entry.is_present() {
|
||||
warn!("IOMMU: Overwritting the existing device page table");
|
||||
}
|
||||
let address = page_table.root_paddr();
|
||||
let address = unsafe { page_table.root_paddr() };
|
||||
context_table.page_tables.insert(address, page_table);
|
||||
let entry = ContextEntry(address as u128 | 1 | 0x1_0000_0000_0000_0000);
|
||||
context_table
|
||||
@ -262,7 +262,7 @@ impl ContextTable {
|
||||
|
||||
if !bus_entry.is_present() {
|
||||
let table = PageTable::<DeviceMode, PageTableEntry, PagingConsts>::empty();
|
||||
let address = table.root_paddr();
|
||||
let address = unsafe { table.root_paddr() };
|
||||
self.page_tables.insert(address, table);
|
||||
let entry = ContextEntry(address as u128 | 3 | 0x1_0000_0000_0000_0000);
|
||||
self.entries_frame
|
||||
|
@ -6,7 +6,8 @@ mod remapping;
|
||||
mod second_stage;
|
||||
|
||||
use log::info;
|
||||
use second_stage::{DeviceMode, PageTableEntry, PagingConsts};
|
||||
pub use second_stage::DeviceMode;
|
||||
use second_stage::{PageTableEntry, PagingConsts};
|
||||
use spin::Once;
|
||||
|
||||
use crate::{
|
||||
|
@ -13,7 +13,7 @@ use crate::vm::{
|
||||
/// The page table used by iommu maps the device address
|
||||
/// space to the physical address space.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct DeviceMode {}
|
||||
pub struct DeviceMode {}
|
||||
|
||||
impl PageTableMode for DeviceMode {
|
||||
/// The device address space is 32-bit.
|
||||
@ -67,32 +67,23 @@ bitflags::bitflags! {
|
||||
pub struct PageTableEntry(u64);
|
||||
|
||||
impl PageTableEntry {
|
||||
const PHYS_MASK: usize = 0xFFFF_FFFF_F000;
|
||||
const PHYS_MASK: u64 = 0xFFFF_FFFF_F000;
|
||||
const PROP_MASK: u64 = !Self::PHYS_MASK & !PageTableFlags::LAST_PAGE.bits();
|
||||
}
|
||||
|
||||
impl PageTableEntryTrait for PageTableEntry {
|
||||
fn new(paddr: crate::vm::Paddr, prop: PageProperty, huge: bool, last: bool) -> Self {
|
||||
let mut flags = PageTableFlags::empty();
|
||||
if prop.flags.contains(PageFlags::W) {
|
||||
flags |= PageTableFlags::WRITABLE;
|
||||
}
|
||||
if prop.flags.contains(PageFlags::R) {
|
||||
flags |= PageTableFlags::READABLE;
|
||||
}
|
||||
if prop.cache != CachePolicy::Uncacheable {
|
||||
flags |= PageTableFlags::SNOOP;
|
||||
}
|
||||
if last {
|
||||
flags |= PageTableFlags::LAST_PAGE;
|
||||
}
|
||||
if huge {
|
||||
panic!("Huge page is not supported in iommu page table");
|
||||
}
|
||||
Self((paddr & Self::PHYS_MASK) as u64 | flags.bits)
|
||||
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self {
|
||||
let mut pte = Self(paddr as u64 & Self::PHYS_MASK | PageTableFlags::LAST_PAGE.bits());
|
||||
pte.set_prop(prop);
|
||||
pte
|
||||
}
|
||||
|
||||
fn new_pt(paddr: Paddr) -> Self {
|
||||
Self(paddr as u64 & Self::PHYS_MASK)
|
||||
}
|
||||
|
||||
fn paddr(&self) -> Paddr {
|
||||
(self.0 & Self::PHYS_MASK as u64) as usize
|
||||
(self.0 & Self::PHYS_MASK) as usize
|
||||
}
|
||||
|
||||
fn new_absent() -> Self {
|
||||
@ -131,7 +122,21 @@ impl PageTableEntryTrait for PageTableEntry {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_huge(&self) -> bool {
|
||||
false
|
||||
fn set_prop(&mut self, prop: PageProperty) {
|
||||
let mut flags = PageTableFlags::empty();
|
||||
if prop.flags.contains(PageFlags::W) {
|
||||
flags |= PageTableFlags::WRITABLE;
|
||||
}
|
||||
if prop.flags.contains(PageFlags::R) {
|
||||
flags |= PageTableFlags::READABLE;
|
||||
}
|
||||
if prop.cache != CachePolicy::Uncacheable {
|
||||
flags |= PageTableFlags::SNOOP;
|
||||
}
|
||||
self.0 = self.0 & !Self::PROP_MASK | flags.bits();
|
||||
}
|
||||
|
||||
fn is_last(&self, level: PagingLevel) -> bool {
|
||||
level == 1
|
||||
}
|
||||
}
|
||||
|
@ -127,6 +127,7 @@ impl PageTableEntry {
|
||||
const PHYS_ADDR_MASK: usize = 0xF_FFFF_FFFF_F000;
|
||||
#[cfg(feature = "intel_tdx")]
|
||||
const PHYS_ADDR_MASK: usize = 0x7_FFFF_FFFF_F000;
|
||||
const PROP_MASK: usize = !Self::PHYS_ADDR_MASK & !PageTableFlags::HUGE.bits();
|
||||
}
|
||||
|
||||
/// Parse a bit-flag bits `val` in the representation of `from` to `to` in bits.
|
||||
@ -145,61 +146,30 @@ impl PageTableEntryTrait for PageTableEntry {
|
||||
self.0 & PageTableFlags::PRESENT.bits() != 0
|
||||
}
|
||||
|
||||
fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self {
|
||||
let mut flags =
|
||||
PageTableFlags::PRESENT.bits() | (huge as usize) << PageTableFlags::HUGE.bits().ilog2();
|
||||
if !huge && !last {
|
||||
// In x86 if it's an intermediate PTE, it's better to have the same permissions
|
||||
// as the most permissive child (to reduce hardware page walk accesses). But we
|
||||
// don't have a mechanism to keep it generic across architectures, thus just
|
||||
// setting it to be the most permissive.
|
||||
flags |= PageTableFlags::WRITABLE.bits() | PageTableFlags::USER.bits();
|
||||
#[cfg(feature = "intel_tdx")]
|
||||
{
|
||||
flags |= parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::SHARED,
|
||||
PageTableFlags::SHARED
|
||||
);
|
||||
}
|
||||
} else {
|
||||
flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE)
|
||||
| parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE)
|
||||
| parse_flags!(
|
||||
prop.flags.bits(),
|
||||
PageFlags::ACCESSED,
|
||||
PageTableFlags::ACCESSED
|
||||
)
|
||||
| parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY)
|
||||
| parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::USER,
|
||||
PageTableFlags::USER
|
||||
)
|
||||
| parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::GLOBAL,
|
||||
PageTableFlags::GLOBAL
|
||||
);
|
||||
#[cfg(feature = "intel_tdx")]
|
||||
{
|
||||
flags |= parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::SHARED,
|
||||
PageTableFlags::SHARED
|
||||
);
|
||||
}
|
||||
}
|
||||
match prop.cache {
|
||||
CachePolicy::Writeback => {}
|
||||
CachePolicy::Writethrough => {
|
||||
flags |= PageTableFlags::WRITE_THROUGH.bits();
|
||||
}
|
||||
CachePolicy::Uncacheable => {
|
||||
flags |= PageTableFlags::NO_CACHE.bits();
|
||||
}
|
||||
_ => panic!("unsupported cache policy"),
|
||||
}
|
||||
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self {
|
||||
let mut pte = Self(
|
||||
paddr & Self::PHYS_ADDR_MASK
|
||||
| ((level != 1) as usize) << PageTableFlags::HUGE.bits().ilog2(),
|
||||
);
|
||||
pte.set_prop(prop);
|
||||
pte
|
||||
}
|
||||
|
||||
fn new_pt(paddr: Paddr) -> Self {
|
||||
// In x86 if it's an intermediate PTE, it's better to have the same permissions
|
||||
// as the most permissive child (to reduce hardware page walk accesses). But we
|
||||
// don't have a mechanism to keep it generic across architectures, thus just
|
||||
// setting it to be the most permissive.
|
||||
let flags = PageTableFlags::PRESENT.bits()
|
||||
| PageTableFlags::WRITABLE.bits()
|
||||
| PageTableFlags::USER.bits();
|
||||
#[cfg(feature = "intel_tdx")]
|
||||
let flags = flags
|
||||
| parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::SHARED,
|
||||
PageTableFlags::SHARED
|
||||
);
|
||||
Self(paddr & Self::PHYS_ADDR_MASK | flags)
|
||||
}
|
||||
|
||||
@ -232,8 +202,49 @@ impl PageTableEntryTrait for PageTableEntry {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_huge(&self) -> bool {
|
||||
self.0 & PageTableFlags::HUGE.bits() != 0
|
||||
fn set_prop(&mut self, prop: PageProperty) {
|
||||
let mut flags = PageTableFlags::PRESENT.bits();
|
||||
flags |= parse_flags!(prop.flags.bits(), PageFlags::W, PageTableFlags::WRITABLE)
|
||||
| parse_flags!(!prop.flags.bits(), PageFlags::X, PageTableFlags::NO_EXECUTE)
|
||||
| parse_flags!(
|
||||
prop.flags.bits(),
|
||||
PageFlags::ACCESSED,
|
||||
PageTableFlags::ACCESSED
|
||||
)
|
||||
| parse_flags!(prop.flags.bits(), PageFlags::DIRTY, PageTableFlags::DIRTY)
|
||||
| parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::USER,
|
||||
PageTableFlags::USER
|
||||
)
|
||||
| parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::GLOBAL,
|
||||
PageTableFlags::GLOBAL
|
||||
);
|
||||
#[cfg(feature = "intel_tdx")]
|
||||
{
|
||||
flags |= parse_flags!(
|
||||
prop.priv_flags.bits(),
|
||||
PrivFlags::SHARED,
|
||||
PageTableFlags::SHARED
|
||||
);
|
||||
}
|
||||
match prop.cache {
|
||||
CachePolicy::Writeback => {}
|
||||
CachePolicy::Writethrough => {
|
||||
flags |= PageTableFlags::WRITE_THROUGH.bits();
|
||||
}
|
||||
CachePolicy::Uncacheable => {
|
||||
flags |= PageTableFlags::NO_CACHE.bits();
|
||||
}
|
||||
_ => panic!("unsupported cache policy"),
|
||||
}
|
||||
self.0 = self.0 & !Self::PROP_MASK | flags;
|
||||
}
|
||||
|
||||
fn is_last(&self, level: PagingLevel) -> bool {
|
||||
level == 1 || (self.0 & PageTableFlags::HUGE.bits() != 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
use core::{
|
||||
ops::Deref,
|
||||
sync::atomic::{AtomicU32, AtomicU8},
|
||||
sync::atomic::{AtomicU16, AtomicU32, AtomicU8},
|
||||
};
|
||||
|
||||
use static_assertions::const_assert_eq;
|
||||
@ -132,17 +132,18 @@ impl Deref for FrameMetaRef {
|
||||
pub struct FrameMeta {
|
||||
pub frame_type: FrameType, // 1 byte
|
||||
/// The first 8-bit counter.
|
||||
/// Currently unused.
|
||||
/// - For [`FrameType::Anonymous`], it is not used.
|
||||
/// - For [`FrameType::PageTable`], it is used as a spinlock.
|
||||
pub counter8_1: AtomicU8, // 1 byte
|
||||
/// The second 8-bit counter.
|
||||
/// Currently unused.
|
||||
pub counter8_2: AtomicU8, // 1 byte
|
||||
/// The third 8-bit counter.
|
||||
/// Currently unused.
|
||||
pub counter8_3: AtomicU8, // 1 byte
|
||||
/// The first 16-bit counter.
|
||||
/// - For [`FrameType::Anonymous`], it is not used.
|
||||
/// - For [`FrameType::PageTable`], it is used as the map count. The map
|
||||
/// count is the number of present children.
|
||||
pub counter16_1: AtomicU16, // 2 bytes
|
||||
/// The first 32-bit counter.
|
||||
/// It is used in different type of frame with different semantics.
|
||||
/// - For [`FrameType::Anonymous`], it is the handle count.
|
||||
/// - For [`FrameType::PageTable`], it is used as the reference count. The referencer
|
||||
/// can be either a handle, a PTE or a CPU that loads it.
|
||||
pub counter32_1: AtomicU32, // 4 bytes
|
||||
}
|
||||
|
||||
@ -155,4 +156,6 @@ pub enum FrameType {
|
||||
Meta,
|
||||
Anonymous,
|
||||
PageTable,
|
||||
/// Frames that contains kernel code.
|
||||
KernelCode,
|
||||
}
|
||||
|
@ -239,6 +239,10 @@ impl VmFrame {
|
||||
self.meta.size()
|
||||
}
|
||||
|
||||
pub fn level(&self) -> PagingLevel {
|
||||
self.meta.level()
|
||||
}
|
||||
|
||||
pub fn end_paddr(&self) -> Paddr {
|
||||
self.start_paddr() + self.size()
|
||||
}
|
||||
@ -258,7 +262,7 @@ impl VmFrame {
|
||||
if self.size() != src.size() {
|
||||
panic!("The size of the source frame is different from the destination frame");
|
||||
}
|
||||
// Safety: the source and the destination does not overlap.
|
||||
// SAFETY: the source and the destination does not overlap.
|
||||
unsafe {
|
||||
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.size());
|
||||
}
|
||||
@ -268,13 +272,13 @@ impl VmFrame {
|
||||
impl<'a> VmFrame {
|
||||
/// Returns a reader to read data from it.
|
||||
pub fn reader(&'a self) -> VmReader<'a> {
|
||||
// Safety: the memory of the page is contiguous and is valid during `'a`.
|
||||
// SAFETY: the memory of the page is contiguous and is valid during `'a`.
|
||||
unsafe { VmReader::from_raw_parts(self.as_ptr(), self.size()) }
|
||||
}
|
||||
|
||||
/// Returns a writer to write data into it.
|
||||
pub fn writer(&'a self) -> VmWriter<'a> {
|
||||
// Safety: the memory of the page is contiguous and is valid during `'a`.
|
||||
// SAFETY: the memory of the page is contiguous and is valid during `'a`.
|
||||
unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.size()) }
|
||||
}
|
||||
}
|
||||
@ -309,10 +313,10 @@ impl Drop for VmFrame {
|
||||
// A fence is needed here with the same reasons stated in the implementation of
|
||||
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
|
||||
atomic::fence(Ordering::Acquire);
|
||||
// Safety: the reference counter is 1 before decremented, so this is the only
|
||||
// SAFETY: the reference counter is 1 before decremented, so this is the only
|
||||
// (exclusive) handle.
|
||||
unsafe { self.meta.deref_mut().frame_type = FrameType::Free };
|
||||
// Safety: the page frame is valid.
|
||||
// SAFETY: the page frame is valid.
|
||||
unsafe {
|
||||
allocator::dealloc_contiguous(self.paddr() / PAGE_SIZE, self.size() / PAGE_SIZE);
|
||||
}
|
||||
@ -460,13 +464,13 @@ impl VmSegment {
|
||||
impl<'a> VmSegment {
|
||||
/// Returns a reader to read data from it.
|
||||
pub fn reader(&'a self) -> VmReader<'a> {
|
||||
// Safety: the memory of the page frames is contiguous and is valid during `'a`.
|
||||
// SAFETY: the memory of the page frames is contiguous and is valid during `'a`.
|
||||
unsafe { VmReader::from_raw_parts(self.as_ptr(), self.nbytes()) }
|
||||
}
|
||||
|
||||
/// Returns a writer to write data into it.
|
||||
pub fn writer(&'a self) -> VmWriter<'a> {
|
||||
// Safety: the memory of the page frames is contiguous and is valid during `'a`.
|
||||
// SAFETY: the memory of the page frames is contiguous and is valid during `'a`.
|
||||
unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.nbytes()) }
|
||||
}
|
||||
}
|
||||
@ -501,10 +505,10 @@ impl Drop for VmSegment {
|
||||
// A fence is needed here with the same reasons stated in the implementation of
|
||||
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
|
||||
atomic::fence(Ordering::Acquire);
|
||||
// Safety: the reference counter is 1 before decremented, so this is the only
|
||||
// SAFETY: the reference counter is 1 before decremented, so this is the only
|
||||
// (exclusive) handle.
|
||||
unsafe { self.inner.meta.deref_mut().frame_type = FrameType::Free };
|
||||
// Safety: the range of contiguous page frames is valid.
|
||||
// SAFETY: the range of contiguous page frames is valid.
|
||||
unsafe {
|
||||
allocator::dealloc_contiguous(self.inner.start_frame_index(), self.inner.nframes);
|
||||
}
|
||||
|
@ -174,7 +174,7 @@ impl<'a> VmReader<'a> {
|
||||
|
||||
/// Returns the number of bytes for the remaining data.
|
||||
pub const fn remain(&self) -> usize {
|
||||
// Safety: the end is equal to or greater than the cursor.
|
||||
// SAFETY: the end is equal to or greater than the cursor.
|
||||
unsafe { self.end.sub_ptr(self.cursor) }
|
||||
}
|
||||
|
||||
@ -193,7 +193,7 @@ impl<'a> VmReader<'a> {
|
||||
/// This method ensures the postcondition of `self.remain() <= max_remain`.
|
||||
pub const fn limit(mut self, max_remain: usize) -> Self {
|
||||
if max_remain < self.remain() {
|
||||
// Safety: the new end is less than the old end.
|
||||
// SAFETY: the new end is less than the old end.
|
||||
unsafe { self.end = self.cursor.add(max_remain) };
|
||||
}
|
||||
self
|
||||
@ -208,7 +208,7 @@ impl<'a> VmReader<'a> {
|
||||
pub fn skip(mut self, nbytes: usize) -> Self {
|
||||
assert!(nbytes <= self.remain());
|
||||
|
||||
// Safety: the new cursor is less than or equal to the end.
|
||||
// SAFETY: the new cursor is less than or equal to the end.
|
||||
unsafe { self.cursor = self.cursor.add(nbytes) };
|
||||
self
|
||||
}
|
||||
@ -227,7 +227,7 @@ impl<'a> VmReader<'a> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
||||
// of the reader's remaining data and the writer's available space.
|
||||
unsafe {
|
||||
core::ptr::copy(self.cursor, writer.cursor, copy_len);
|
||||
@ -255,7 +255,7 @@ impl<'a> VmReader<'a> {
|
||||
|
||||
impl<'a> From<&'a [u8]> for VmReader<'a> {
|
||||
fn from(slice: &'a [u8]) -> Self {
|
||||
// Safety: the range of memory is contiguous and is valid during `'a`.
|
||||
// SAFETY: the range of memory is contiguous and is valid during `'a`.
|
||||
unsafe { Self::from_raw_parts(slice.as_ptr(), slice.len()) }
|
||||
}
|
||||
}
|
||||
@ -284,7 +284,7 @@ impl<'a> VmWriter<'a> {
|
||||
|
||||
/// Returns the number of bytes for the available space.
|
||||
pub const fn avail(&self) -> usize {
|
||||
// Safety: the end is equal to or greater than the cursor.
|
||||
// SAFETY: the end is equal to or greater than the cursor.
|
||||
unsafe { self.end.sub_ptr(self.cursor) }
|
||||
}
|
||||
|
||||
@ -303,7 +303,7 @@ impl<'a> VmWriter<'a> {
|
||||
/// This method ensures the postcondition of `self.avail() <= max_avail`.
|
||||
pub const fn limit(mut self, max_avail: usize) -> Self {
|
||||
if max_avail < self.avail() {
|
||||
// Safety: the new end is less than the old end.
|
||||
// SAFETY: the new end is less than the old end.
|
||||
unsafe { self.end = self.cursor.add(max_avail) };
|
||||
}
|
||||
self
|
||||
@ -318,7 +318,7 @@ impl<'a> VmWriter<'a> {
|
||||
pub fn skip(mut self, nbytes: usize) -> Self {
|
||||
assert!(nbytes <= self.avail());
|
||||
|
||||
// Safety: the new cursor is less than or equal to the end.
|
||||
// SAFETY: the new cursor is less than or equal to the end.
|
||||
unsafe { self.cursor = self.cursor.add(nbytes) };
|
||||
self
|
||||
}
|
||||
@ -337,7 +337,7 @@ impl<'a> VmWriter<'a> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
||||
// of the reader's remaining data and the writer's available space.
|
||||
unsafe {
|
||||
core::ptr::copy(reader.cursor, self.cursor, copy_len);
|
||||
@ -364,7 +364,7 @@ impl<'a> VmWriter<'a> {
|
||||
let written_num = avail / core::mem::size_of::<T>();
|
||||
|
||||
for i in 0..written_num {
|
||||
// Safety: `written_num` is calculated by the avail size and the size of the type `T`,
|
||||
// SAFETY: `written_num` is calculated by the avail size and the size of the type `T`,
|
||||
// hence the `add` operation and `write` operation are valid and will only manipulate
|
||||
// the memory managed by this writer.
|
||||
unsafe {
|
||||
@ -380,7 +380,7 @@ impl<'a> VmWriter<'a> {
|
||||
|
||||
impl<'a> From<&'a mut [u8]> for VmWriter<'a> {
|
||||
fn from(slice: &'a mut [u8]) -> Self {
|
||||
// Safety: the range of memory is contiguous and is valid during `'a`.
|
||||
// SAFETY: the range of memory is contiguous and is valid during `'a`.
|
||||
unsafe { Self::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) }
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
//!
|
||||
//! ```text
|
||||
//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
|
||||
//! | | For the kernel code, 1 GiB.
|
||||
//! | | For the kernel code, 1 GiB. Mapped frames are tracked with handles.
|
||||
//! +-+ <- 0xffff_ffff_8000_0000
|
||||
//! | |
|
||||
//! | | Unused hole.
|
||||
@ -42,14 +42,13 @@ use spin::Once;
|
||||
use super::{
|
||||
frame::{
|
||||
allocator::FRAME_ALLOCATOR,
|
||||
meta,
|
||||
meta::{FrameMeta, FrameType},
|
||||
meta::{self, FrameMeta, FrameType},
|
||||
},
|
||||
nr_subpage_per_huge,
|
||||
page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
|
||||
page_size,
|
||||
page_table::{boot_pt::BootPageTable, KernelMode, PageTable},
|
||||
MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE,
|
||||
FrameMetaRef, MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE,
|
||||
};
|
||||
use crate::{
|
||||
arch::mm::{PageTableEntry, PagingConsts},
|
||||
@ -161,7 +160,7 @@ pub fn init_kernel_page_table() {
|
||||
};
|
||||
let mut cursor = kpt.cursor_mut(&from).unwrap();
|
||||
for frame in meta_frames {
|
||||
// Safety: we are doing the metadata mappings for the kernel.
|
||||
// SAFETY: we are doing the metadata mappings for the kernel.
|
||||
unsafe {
|
||||
cursor.map(frame, prop);
|
||||
}
|
||||
@ -201,9 +200,18 @@ pub fn init_kernel_page_table() {
|
||||
cache: CachePolicy::Writeback,
|
||||
priv_flags: PrivilegedPageFlags::GLOBAL,
|
||||
};
|
||||
// SAFETY: we are doing mappings for the kernel.
|
||||
unsafe {
|
||||
kpt.map(&from, &to, prop).unwrap();
|
||||
let mut cursor = kpt.cursor_mut(&from).unwrap();
|
||||
for frame_paddr in to.step_by(PAGE_SIZE) {
|
||||
let mut meta = unsafe { FrameMetaRef::from_raw(frame_paddr, 1) };
|
||||
// SAFETY: we are marking the type of the frame containing loaded kernel code.
|
||||
unsafe {
|
||||
meta.deref_mut().frame_type = FrameType::KernelCode;
|
||||
}
|
||||
let frame = VmFrame { meta };
|
||||
// SAFETY: we are doing mappings for the kernel.
|
||||
unsafe {
|
||||
cursor.map(frame, prop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,7 +219,7 @@ pub fn init_kernel_page_table() {
|
||||
}
|
||||
|
||||
pub fn activate_kernel_page_table() {
|
||||
// Safety: the kernel page table is initialized properly.
|
||||
// SAFETY: the kernel page table is initialized properly.
|
||||
unsafe {
|
||||
KERNEL_PAGE_TABLE.get().unwrap().activate_unchecked();
|
||||
crate::arch::mm::tlb_flush_all_including_global();
|
||||
@ -252,9 +260,9 @@ fn init_boot_page_table_and_page_meta(
|
||||
let meta_frames = meta_frames
|
||||
.into_iter()
|
||||
.map(|paddr| {
|
||||
// Safety: the frame is allocated but not initialized thus not referenced.
|
||||
// SAFETY: the frame is allocated but not initialized thus not referenced.
|
||||
let mut frame = unsafe { VmFrame::from_free_raw(paddr, 1) };
|
||||
// Safety: this is the only reference to the frame so it's exclusive.
|
||||
// SAFETY: this is the only reference to the frame so it's exclusive.
|
||||
unsafe { frame.meta.deref_mut().frame_type = FrameType::Meta };
|
||||
frame
|
||||
})
|
||||
|
@ -33,7 +33,9 @@ pub use self::{
|
||||
space::{VmMapOptions, VmSpace},
|
||||
};
|
||||
pub(crate) use self::{
|
||||
frame::meta::FrameMetaRef, kspace::paddr_to_vaddr, page_prop::PrivilegedPageFlags,
|
||||
frame::meta::{FrameMetaRef, FrameType},
|
||||
kspace::paddr_to_vaddr,
|
||||
page_prop::PrivilegedPageFlags,
|
||||
page_table::PageTable,
|
||||
};
|
||||
use crate::{
|
||||
@ -46,7 +48,7 @@ pub type PagingLevel = u8;
|
||||
|
||||
/// A minimal set of constants that determines the paging system.
|
||||
/// This provides an abstraction over most paging modes in common architectures.
|
||||
pub(crate) trait PagingConstsTrait: Debug + 'static {
|
||||
pub(crate) trait PagingConstsTrait: Clone + Debug + 'static {
|
||||
/// The smallest page size.
|
||||
/// This is also the page size at level 1 page tables.
|
||||
const BASE_PAGE_SIZE: usize;
|
||||
|
@ -49,10 +49,9 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> BootPageTable<E, C> {
|
||||
let pte = unsafe { pte_ptr.read() };
|
||||
pt = if !pte.is_present() {
|
||||
let frame = self.alloc_frame();
|
||||
let new_pte = E::new(frame * C::BASE_PAGE_SIZE, pte.prop(), false, false);
|
||||
unsafe { pte_ptr.write(new_pte) };
|
||||
unsafe { pte_ptr.write(E::new_pt(frame * C::BASE_PAGE_SIZE)) };
|
||||
frame
|
||||
} else if pte.is_huge() {
|
||||
} else if pte.is_last(level) {
|
||||
panic!("mapping an already mapped huge page in the boot page table");
|
||||
} else {
|
||||
pte.paddr() / C::BASE_PAGE_SIZE
|
||||
@ -66,8 +65,7 @@ impl<E: PageTableEntryTrait, C: PagingConstsTrait> BootPageTable<E, C> {
|
||||
if pte.is_present() {
|
||||
panic!("mapping an already mapped page in the boot page table");
|
||||
}
|
||||
let new_pte = E::new(to * C::BASE_PAGE_SIZE, prop, false, true);
|
||||
unsafe { pte_ptr.write(new_pte) };
|
||||
unsafe { pte_ptr.write(E::new_frame(to * C::BASE_PAGE_SIZE, 1, prop)) };
|
||||
}
|
||||
|
||||
fn alloc_frame(&mut self) -> FrameNumber {
|
||||
|
@ -50,49 +50,59 @@
|
||||
//! required. The cursor unlock all locks, then lock all the way down to `B`, then
|
||||
//! check if `B` is empty, and finally recycle all the resources on the way back.
|
||||
|
||||
use alloc::sync::Arc;
|
||||
use core::{any::TypeId, ops::Range};
|
||||
|
||||
use align_ext::AlignExt;
|
||||
|
||||
use super::{
|
||||
nr_subpage_per_huge, page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait,
|
||||
PageTableError, PageTableFrame, PageTableMode, PagingConstsTrait,
|
||||
};
|
||||
use crate::{
|
||||
sync::{ArcSpinLockGuard, SpinLock},
|
||||
vm::{Paddr, PageProperty, PagingLevel, Vaddr, VmFrame},
|
||||
page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError,
|
||||
PageTableFrame, PageTableMode, PagingConstsTrait, PagingLevel,
|
||||
};
|
||||
use crate::vm::{Paddr, PageProperty, Vaddr, VmFrame};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) enum PageTableQueryResult {
|
||||
NotMapped {
|
||||
va: Vaddr,
|
||||
len: usize,
|
||||
},
|
||||
Mapped {
|
||||
va: Vaddr,
|
||||
frame: VmFrame,
|
||||
prop: PageProperty,
|
||||
},
|
||||
MappedUntracked {
|
||||
va: Vaddr,
|
||||
pa: Paddr,
|
||||
len: usize,
|
||||
prop: PageProperty,
|
||||
},
|
||||
}
|
||||
|
||||
/// The cursor for traversal over the page table.
|
||||
///
|
||||
/// Efficient methods are provided to move the cursor forward by a slot,
|
||||
/// doing mapping, unmaping, or querying for the traversed slot. Also you
|
||||
/// can jump forward or backward by re-walking without releasing the lock.
|
||||
///
|
||||
/// A slot is a PTE at any levels, which correspond to a certain virtual
|
||||
/// memory range sized by the "page size" of the current level.
|
||||
///
|
||||
/// Doing mapping is somewhat like a depth-first search on a tree, except
|
||||
/// that we modify the tree while traversing it. We use a guard stack to
|
||||
/// A cursor is able to move to the next slot, to read page properties,
|
||||
/// and even to jump to a virtual address directly. We use a guard stack to
|
||||
/// simulate the recursion, and adpot a page table locking protocol to
|
||||
/// provide concurrency.
|
||||
pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
pt: &'a PageTable<M, E, C>,
|
||||
guards: [Option<ArcSpinLockGuard<PageTableFrame<E, C>>>; C::NR_LEVELS as usize],
|
||||
guards: [Option<PageTableFrame<E, C>>; C::NR_LEVELS as usize],
|
||||
level: PagingLevel, // current level
|
||||
guard_level: PagingLevel, // from guard_level to level, the locks are held
|
||||
va: Vaddr, // current virtual address
|
||||
barrier_va: Range<Vaddr>, // virtual address range that is locked
|
||||
}
|
||||
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C>
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
/// Create a cursor exclusively owning the locks for the given range.
|
||||
@ -112,7 +122,7 @@ where
|
||||
// Create a guard array that only hold the root node lock.
|
||||
let guards = core::array::from_fn(|i| {
|
||||
if i == 0 {
|
||||
Some(pt.root_frame.lock_arc())
|
||||
Some(pt.root.copy_handle().lock())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@ -130,227 +140,24 @@ where
|
||||
//
|
||||
// While going down, previous guards of too-high levels will be released.
|
||||
loop {
|
||||
let cur_pte = cursor.read_cur_pte();
|
||||
let level_too_high = {
|
||||
let start_idx = pte_index::<C>(va.start, cursor.level);
|
||||
let end_idx = pte_index::<C>(va.end - 1, cursor.level);
|
||||
start_idx == end_idx
|
||||
};
|
||||
if !level_too_high || !cursor.cur_child().is_pt() {
|
||||
if !level_too_high || !cur_pte.is_present() || cur_pte.is_last(cursor.level) {
|
||||
break;
|
||||
}
|
||||
cursor.level_down(None);
|
||||
cursor.level_down();
|
||||
// Release the guard of the previous level.
|
||||
cursor.guards[(C::NR_LEVELS - cursor.level) as usize - 1] = None;
|
||||
cursor.guard_level -= 1;
|
||||
}
|
||||
Ok(cursor)
|
||||
}
|
||||
|
||||
/// Jump to the given virtual address.
|
||||
///
|
||||
/// It panics if the address is out of the range where the cursor is required to operate,
|
||||
/// or has bad alignment.
|
||||
pub(crate) fn jump(&mut self, va: Vaddr) {
|
||||
assert!(self.barrier_va.contains(&va));
|
||||
assert!(va % C::BASE_PAGE_SIZE == 0);
|
||||
loop {
|
||||
let cur_node_start = self.va & !(page_size::<C>(self.level + 1) - 1);
|
||||
let cur_node_end = cur_node_start + page_size::<C>(self.level + 1);
|
||||
// If the address is within the current node, we can jump directly.
|
||||
if cur_node_start <= va && va < cur_node_end {
|
||||
self.va = va;
|
||||
return;
|
||||
}
|
||||
// There is a corner case that the cursor is depleted, sitting at the start of the
|
||||
// next node but the next node is not locked because the parent is not locked.
|
||||
if self.va >= self.barrier_va.end && self.level == self.guard_level {
|
||||
self.va = va;
|
||||
return;
|
||||
}
|
||||
debug_assert!(self.level < self.guard_level);
|
||||
self.level_up();
|
||||
}
|
||||
}
|
||||
|
||||
/// Map the range starting from the current address to a `VmFrame`.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if
|
||||
/// - the virtual address range to be mapped is out of the range;
|
||||
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the virtual range being mapped does
|
||||
/// not affect kernel's memory safety.
|
||||
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) {
|
||||
let end = self.va + C::BASE_PAGE_SIZE;
|
||||
assert!(end <= self.barrier_va.end);
|
||||
// Go down if not applicable.
|
||||
while self.level > C::HIGHEST_TRANSLATION_LEVEL
|
||||
|| self.va % page_size::<C>(self.level) != 0
|
||||
|| self.va + page_size::<C>(self.level) > end
|
||||
{
|
||||
self.level_down(Some(prop));
|
||||
continue;
|
||||
}
|
||||
// Map the current page.
|
||||
let idx = self.cur_idx();
|
||||
let level = self.level;
|
||||
self.cur_node_mut()
|
||||
.set_child(idx, Child::Frame(frame), Some(prop), level > 1);
|
||||
self.move_forward();
|
||||
}
|
||||
|
||||
/// Map the range starting from the current address to a physical address range.
|
||||
///
|
||||
/// The function will map as more huge pages as possible, and it will split
|
||||
/// the huge pages into smaller pages if necessary. If the input range is
|
||||
/// large, the resulting mappings may look like this (if very huge pages
|
||||
/// supported):
|
||||
///
|
||||
/// ```text
|
||||
/// start end
|
||||
/// |----|----------------|--------------------------------|----|----|
|
||||
/// base huge very huge base base
|
||||
/// 4KiB 2MiB 1GiB 4KiB 4KiB
|
||||
/// ```
|
||||
///
|
||||
/// In practice it is not suggested to use this method for safety and conciseness.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that
|
||||
/// - the range being mapped does not affect kernel's memory safety;
|
||||
/// - the physical address to be mapped is valid and safe to use.
|
||||
pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: PageProperty) {
|
||||
let end = self.va + pa.len();
|
||||
let mut pa = pa.start;
|
||||
assert!(end <= self.barrier_va.end);
|
||||
while self.va < end {
|
||||
// We ensure not mapping in reserved kernel shared tables or releasing it.
|
||||
// Although it may be an invariant for all architectures and will be optimized
|
||||
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
|
||||
let is_kernel_shared_node =
|
||||
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
|
||||
if self.level > C::HIGHEST_TRANSLATION_LEVEL
|
||||
|| is_kernel_shared_node
|
||||
|| self.va % page_size::<C>(self.level) != 0
|
||||
|| self.va + page_size::<C>(self.level) > end
|
||||
|| pa % page_size::<C>(self.level) != 0
|
||||
{
|
||||
self.level_down(Some(prop));
|
||||
continue;
|
||||
}
|
||||
// Map the current page.
|
||||
let idx = self.cur_idx();
|
||||
let level = self.level;
|
||||
self.cur_node_mut()
|
||||
.set_child(idx, Child::Untracked(pa), Some(prop), level > 1);
|
||||
pa += page_size::<C>(level);
|
||||
self.move_forward();
|
||||
}
|
||||
}
|
||||
|
||||
/// Unmap the range starting from the current address with the given length of virtual address.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - the range to be unmapped is out of the range where the cursor is required to operate;
|
||||
/// - the range covers only a part of a page.
|
||||
pub(crate) unsafe fn unmap(&mut self, len: usize) {
|
||||
let end = self.va + len;
|
||||
assert!(end <= self.barrier_va.end);
|
||||
assert!(end % C::BASE_PAGE_SIZE == 0);
|
||||
while self.va < end {
|
||||
// Skip if it is already invalid.
|
||||
if self.cur_child().is_none() {
|
||||
if self.va + page_size::<C>(self.level) > end {
|
||||
break;
|
||||
}
|
||||
self.move_forward();
|
||||
continue;
|
||||
}
|
||||
|
||||
// We check among the conditions that may lead to a level down.
|
||||
// We ensure not unmapping in reserved kernel shared tables or releasing it.
|
||||
let is_kernel_shared_node =
|
||||
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level >= C::NR_LEVELS - 1;
|
||||
if is_kernel_shared_node
|
||||
|| self.va % page_size::<C>(self.level) != 0
|
||||
|| self.va + page_size::<C>(self.level) > end
|
||||
{
|
||||
self.level_down(Some(PageProperty::new_absent()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unmap the current page.
|
||||
let idx = self.cur_idx();
|
||||
self.cur_node_mut().set_child(idx, Child::None, None, false);
|
||||
self.move_forward();
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the given operation to all the mappings within the range.
|
||||
///
|
||||
/// The funtction will return an error if it is not allowed to protect an invalid range and
|
||||
/// it does so, or if the range to be protected only covers a part of a page.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - the range to be protected is out of the range where the cursor is required to operate.
|
||||
pub(crate) unsafe fn protect(
|
||||
&mut self,
|
||||
len: usize,
|
||||
mut op: impl FnMut(&mut PageProperty),
|
||||
allow_protect_invalid: bool,
|
||||
) -> Result<(), PageTableError> {
|
||||
let end = self.va + len;
|
||||
assert!(end <= self.barrier_va.end);
|
||||
while self.va < end {
|
||||
if self.cur_child().is_none() {
|
||||
if !allow_protect_invalid {
|
||||
return Err(PageTableError::ProtectingInvalid);
|
||||
}
|
||||
self.move_forward();
|
||||
continue;
|
||||
}
|
||||
// Go down if it's not a last node.
|
||||
if self.cur_child().is_pt() {
|
||||
self.level_down(None);
|
||||
continue;
|
||||
}
|
||||
let vaddr_not_fit = self.va % page_size::<C>(self.level) != 0
|
||||
|| self.va + page_size::<C>(self.level) > end;
|
||||
let mut pte_prop = self.read_cur_pte_prop();
|
||||
op(&mut pte_prop);
|
||||
// Go down if the page size is too big and we are protecting part
|
||||
// of untyped huge pages.
|
||||
if self.cur_child().is_untyped() && vaddr_not_fit {
|
||||
self.level_down(Some(pte_prop));
|
||||
continue;
|
||||
} else if vaddr_not_fit {
|
||||
return Err(PageTableError::ProtectingPartial);
|
||||
}
|
||||
let idx = self.cur_idx();
|
||||
let level = self.level;
|
||||
self.cur_node_mut().protect(idx, pte_prop, level);
|
||||
self.move_forward();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the information of the current slot and move to the next slot.
|
||||
/// Get the information of the current slot.
|
||||
pub(crate) fn query(&mut self) -> Option<PageTableQueryResult> {
|
||||
if self.va >= self.barrier_va.end {
|
||||
return None;
|
||||
@ -358,56 +165,40 @@ where
|
||||
loop {
|
||||
let level = self.level;
|
||||
let va = self.va;
|
||||
let map_prop = self.read_cur_pte_prop();
|
||||
match self.cur_child().clone() {
|
||||
let pte = self.read_cur_pte();
|
||||
if !pte.is_present() {
|
||||
return Some(PageTableQueryResult::NotMapped {
|
||||
va,
|
||||
len: page_size::<C>(level),
|
||||
});
|
||||
}
|
||||
if !pte.is_last(level) {
|
||||
self.level_down();
|
||||
continue;
|
||||
}
|
||||
match self.cur_child() {
|
||||
Child::Frame(frame) => {
|
||||
self.move_forward();
|
||||
return Some(PageTableQueryResult::Mapped {
|
||||
va,
|
||||
frame,
|
||||
prop: map_prop,
|
||||
prop: pte.prop(),
|
||||
});
|
||||
}
|
||||
Child::PageTable(_) => {
|
||||
// Go down if it's not a last node.
|
||||
self.level_down(None);
|
||||
continue;
|
||||
}
|
||||
Child::Untracked(pa) => {
|
||||
self.move_forward();
|
||||
return Some(PageTableQueryResult::MappedUntyped {
|
||||
return Some(PageTableQueryResult::MappedUntracked {
|
||||
va,
|
||||
pa,
|
||||
len: page_size::<C>(level),
|
||||
prop: map_prop,
|
||||
prop: pte.prop(),
|
||||
});
|
||||
}
|
||||
Child::None => {
|
||||
self.move_forward();
|
||||
return Some(PageTableQueryResult::NotMapped {
|
||||
va,
|
||||
len: page_size::<C>(level),
|
||||
});
|
||||
Child::None | Child::PageTable(_) => {
|
||||
unreachable!(); // Already checked with the PTE.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume itself and leak the root guard for the caller if it locked the root level.
|
||||
///
|
||||
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
|
||||
pub(super) fn leak_root_guard(mut self) -> Option<ArcSpinLockGuard<PageTableFrame<E, C>>> {
|
||||
if self.guard_level != C::NR_LEVELS {
|
||||
return None;
|
||||
}
|
||||
while self.level < C::NR_LEVELS {
|
||||
self.level_up();
|
||||
}
|
||||
self.guards[0].take()
|
||||
// Ok to drop self here because we ensure not to access the page table if the current
|
||||
// level is the root level when running the dropping method.
|
||||
}
|
||||
|
||||
/// Traverse forward in the current level to the next PTE.
|
||||
///
|
||||
/// If reached the end of a page table frame, it leads itself up to the next frame of the parent
|
||||
@ -437,92 +228,75 @@ where
|
||||
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.level < C::NR_LEVELS;
|
||||
if can_release_child && last_node_all_unmapped {
|
||||
let idx = self.cur_idx();
|
||||
self.cur_node_mut().set_child(idx, Child::None, None, false);
|
||||
let untracked = self.in_untracked_range();
|
||||
self.cur_node_mut().unset_child(idx, false, untracked);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A level down operation during traversal. It may create a new child frame if the
|
||||
/// current frame does not have one. It may also split an untyped huge page into
|
||||
/// smaller pages if we have an end address within the next mapped untyped huge page.
|
||||
///
|
||||
/// If creation may happen the map property of intermediate level `prop` should be
|
||||
/// passed in correctly. Whether the map property matters in an intermediate
|
||||
/// level is architecture-dependent.
|
||||
///
|
||||
/// Also, the staticness of the page table is guaranteed if the caller make sure
|
||||
/// that there is a child node for the current node.
|
||||
fn level_down(&mut self, prop: Option<PageProperty>) {
|
||||
/// Go down a level assuming a child page table exists.
|
||||
fn level_down(&mut self) {
|
||||
debug_assert!(self.level > 1);
|
||||
// Check if the child frame exists.
|
||||
let nxt_lvl_frame = {
|
||||
let idx = pte_index::<C>(self.va, self.level);
|
||||
let child = self.cur_child();
|
||||
if let Child::PageTable(nxt_lvl_frame) = child {
|
||||
Some(nxt_lvl_frame.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
// Create a new child frame if it does not exist. Sure it could be done only if
|
||||
// it is allowed to modify the page table.
|
||||
let nxt_lvl_frame = nxt_lvl_frame.unwrap_or_else(|| {
|
||||
// If it already maps an untyped huge page, we should split it.
|
||||
if self.cur_child().is_untyped() {
|
||||
let level = self.level;
|
||||
let idx = self.cur_idx();
|
||||
self.cur_node_mut().split_untracked_huge(level, idx);
|
||||
let Child::PageTable(nxt_lvl_frame) = self.cur_child() else {
|
||||
unreachable!()
|
||||
};
|
||||
nxt_lvl_frame.clone()
|
||||
} else if self.cur_child().is_none() {
|
||||
let new_frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
|
||||
let idx = self.cur_idx();
|
||||
self.cur_node_mut().set_child(
|
||||
idx,
|
||||
Child::PageTable(new_frame.clone()),
|
||||
prop,
|
||||
false,
|
||||
);
|
||||
new_frame
|
||||
} else {
|
||||
panic!("Trying to level down when it is mapped to a typed frame");
|
||||
}
|
||||
});
|
||||
self.guards[(C::NR_LEVELS - self.level) as usize + 1] = Some(nxt_lvl_frame.lock_arc());
|
||||
self.level -= 1;
|
||||
let idx = pte_index::<C>(self.va, self.level);
|
||||
if let Child::PageTable(nxt_lvl_frame) = self.cur_child() {
|
||||
self.level -= 1;
|
||||
self.guards[(C::NR_LEVELS - self.level) as usize] = Some(nxt_lvl_frame.lock());
|
||||
} else {
|
||||
panic!("Trying to level down when it is not mapped to a page table");
|
||||
}
|
||||
}
|
||||
|
||||
fn cur_node(&self) -> &ArcSpinLockGuard<PageTableFrame<E, C>> {
|
||||
fn cur_node(&self) -> &PageTableFrame<E, C> {
|
||||
self.guards[(C::NR_LEVELS - self.level) as usize]
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard<PageTableFrame<E, C>> {
|
||||
self.guards[(C::NR_LEVELS - self.level) as usize]
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn cur_idx(&self) -> usize {
|
||||
pte_index::<C>(self.va, self.level)
|
||||
}
|
||||
|
||||
fn cur_child(&self) -> &Child<E, C> {
|
||||
self.cur_node().child(self.cur_idx())
|
||||
fn cur_child(&self) -> Child<E, C> {
|
||||
self.cur_node()
|
||||
.child(self.cur_idx(), !self.in_untracked_range())
|
||||
}
|
||||
|
||||
fn read_cur_pte_prop(&self) -> PageProperty {
|
||||
self.cur_node().read_pte_prop(self.cur_idx())
|
||||
fn read_cur_pte(&self) -> E {
|
||||
self.cur_node().read_pte(self.cur_idx())
|
||||
}
|
||||
|
||||
/// Tell if the current virtual range must contain untracked mappings.
|
||||
///
|
||||
/// In the kernel mode, this is aligned with the definition in [`crate::vm::kspace`].
|
||||
/// Only linear mappings in the kernel are considered as untracked mappings.
|
||||
///
|
||||
/// All mappings in the user mode are tracked. And all mappings in the IOMMU
|
||||
/// page table are untracked.
|
||||
fn in_untracked_range(&self) -> bool {
|
||||
TypeId::of::<M>() == TypeId::of::<crate::arch::iommu::DeviceMode>()
|
||||
|| crate::vm::kspace::LINEAR_MAPPING_VADDR_RANGE.contains(&self.va)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator
|
||||
for Cursor<'a, M, E, C>
|
||||
where
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
type Item = PageTableQueryResult;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let result = self.query();
|
||||
if result.is_some() {
|
||||
self.move_forward();
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "page_table_recycle")]
|
||||
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Drop for CursorMut<'_, M, E, C>
|
||||
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Drop for Cursor<'_, M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
@ -538,12 +312,14 @@ where
|
||||
// Drop the lock on the guard level.
|
||||
self.guards[C::NR_LEVELS - self.guard_level] = None;
|
||||
// Re-walk the page table to retreive the locks.
|
||||
self.guards[0] = Some(self.pt.root_frame.lock_arc());
|
||||
self.guards[0] = Some(self.pt.root.copy_handle().lock());
|
||||
self.level = C::NR_LEVELS;
|
||||
let cur_pte = self.read_cur_pte();
|
||||
let cur_child_is_pt = cur_pte.is_present() && !cur_pte.is_last(self.level);
|
||||
// Another cursor can unmap the guard level node before this cursor
|
||||
// is dropped, we can just do our best here when re-walking.
|
||||
while self.level > self.guard_level && self.cur_child().is_pt() {
|
||||
self.level_down(None);
|
||||
while self.level > self.guard_level && cur_child_is_pt {
|
||||
self.level_down();
|
||||
}
|
||||
// Doing final cleanup by [`CursorMut::level_up`] to the root.
|
||||
while self.level < C::NR_LEVELS {
|
||||
@ -552,58 +328,320 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) enum PageTableQueryResult {
|
||||
NotMapped {
|
||||
va: Vaddr,
|
||||
len: usize,
|
||||
},
|
||||
Mapped {
|
||||
va: Vaddr,
|
||||
frame: VmFrame,
|
||||
prop: PageProperty,
|
||||
},
|
||||
MappedUntyped {
|
||||
va: Vaddr,
|
||||
pa: Paddr,
|
||||
len: usize,
|
||||
prop: PageProperty,
|
||||
},
|
||||
}
|
||||
|
||||
/// The read-only cursor for traversal over the page table.
|
||||
/// The cursor of a page table that is capable of map, unmap or protect pages.
|
||||
///
|
||||
/// It implements the `Iterator` trait to provide a convenient way to query over the page table.
|
||||
pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>
|
||||
/// Also, it has all the capabilities of a [`Cursor`]. A virtual address range
|
||||
/// in a page table can only be accessed by one cursor whether it is mutable or not.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait>(
|
||||
Cursor<'a, M, E, C>,
|
||||
)
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
inner: CursorMut<'a, M, E, C>,
|
||||
}
|
||||
[(); C::NR_LEVELS as usize]:;
|
||||
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C>
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
pub(super) fn new(
|
||||
pt: &'a PageTable<M, E, C>,
|
||||
va: &Range<Vaddr>,
|
||||
) -> Result<Self, PageTableError> {
|
||||
CursorMut::new(pt, va).map(|inner| Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator
|
||||
for Cursor<'a, M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
type Item = PageTableQueryResult;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner.query()
|
||||
Cursor::new(pt, va).map(|inner| Self(inner))
|
||||
}
|
||||
|
||||
/// Get the information of the current slot and go to the next slot.
|
||||
///
|
||||
/// We choose not to implement `Iterator` or `IterMut` for [`CursorMut`]
|
||||
/// because the mutable cursor is indeed not an iterator.
|
||||
pub(crate) fn next(&mut self) -> Option<PageTableQueryResult> {
|
||||
self.0.next()
|
||||
}
|
||||
|
||||
/// Jump to the given virtual address.
|
||||
///
|
||||
/// It panics if the address is out of the range where the cursor is required to operate,
|
||||
/// or has bad alignment.
|
||||
pub(crate) fn jump(&mut self, va: Vaddr) {
|
||||
assert!(self.0.barrier_va.contains(&va));
|
||||
assert!(va % C::BASE_PAGE_SIZE == 0);
|
||||
loop {
|
||||
let cur_node_start = self.0.va & !(page_size::<C>(self.0.level + 1) - 1);
|
||||
let cur_node_end = cur_node_start + page_size::<C>(self.0.level + 1);
|
||||
// If the address is within the current node, we can jump directly.
|
||||
if cur_node_start <= va && va < cur_node_end {
|
||||
self.0.va = va;
|
||||
return;
|
||||
}
|
||||
// There is a corner case that the cursor is depleted, sitting at the start of the
|
||||
// next node but the next node is not locked because the parent is not locked.
|
||||
if self.0.va >= self.0.barrier_va.end && self.0.level == self.0.guard_level {
|
||||
self.0.va = va;
|
||||
return;
|
||||
}
|
||||
debug_assert!(self.0.level < self.0.guard_level);
|
||||
self.0.level_up();
|
||||
}
|
||||
}
|
||||
|
||||
/// Map the range starting from the current address to a `VmFrame`.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if
|
||||
/// - the virtual address range to be mapped is out of the range;
|
||||
/// - the alignment of the frame is not satisfied by the virtual address;
|
||||
/// - it is already mapped to a huge page while the caller wants to map a smaller one.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the virtual range being mapped does
|
||||
/// not affect kernel's memory safety.
|
||||
pub(crate) unsafe fn map(&mut self, frame: VmFrame, prop: PageProperty) {
|
||||
let end = self.0.va + frame.size();
|
||||
assert!(end <= self.0.barrier_va.end);
|
||||
debug_assert!(!self.0.in_untracked_range());
|
||||
// Go down if not applicable.
|
||||
while self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|
||||
|| self.0.va % page_size::<C>(self.0.level) != 0
|
||||
|| self.0.va + page_size::<C>(self.0.level) > end
|
||||
{
|
||||
let pte = self.0.read_cur_pte();
|
||||
if pte.is_present() && !pte.is_last(self.0.level) {
|
||||
self.0.level_down();
|
||||
} else if !pte.is_present() {
|
||||
self.level_down_create();
|
||||
} else {
|
||||
panic!("Mapping a smaller page in an already mapped huge page");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
debug_assert_eq!(self.0.level, frame.level());
|
||||
// Map the current page.
|
||||
let idx = self.0.cur_idx();
|
||||
let level = self.0.level;
|
||||
self.cur_node_mut().set_child_frame(idx, frame, prop);
|
||||
self.0.move_forward();
|
||||
}
|
||||
|
||||
/// Map the range starting from the current address to a physical address range.
|
||||
///
|
||||
/// The function will map as more huge pages as possible, and it will split
|
||||
/// the huge pages into smaller pages if necessary. If the input range is
|
||||
/// large, the resulting mappings may look like this (if very huge pages
|
||||
/// supported):
|
||||
///
|
||||
/// ```text
|
||||
/// start end
|
||||
/// |----|----------------|--------------------------------|----|----|
|
||||
/// base huge very huge base base
|
||||
/// 4KiB 2MiB 1GiB 4KiB 4KiB
|
||||
/// ```
|
||||
///
|
||||
/// In practice it is not suggested to use this method for safety and conciseness.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if
|
||||
/// - the virtual address range to be mapped is out of the range.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that
|
||||
/// - the range being mapped does not affect kernel's memory safety;
|
||||
/// - the physical address to be mapped is valid and safe to use;
|
||||
/// - it is allowed to map untracked pages in this virtual address range.
|
||||
pub(crate) unsafe fn map_pa(&mut self, pa: &Range<Paddr>, prop: PageProperty) {
|
||||
let end = self.0.va + pa.len();
|
||||
let mut pa = pa.start;
|
||||
assert!(end <= self.0.barrier_va.end);
|
||||
while self.0.va < end {
|
||||
// We ensure not mapping in reserved kernel shared tables or releasing it.
|
||||
// Although it may be an invariant for all architectures and will be optimized
|
||||
// out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`.
|
||||
let is_kernel_shared_node =
|
||||
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.0.level >= C::NR_LEVELS - 1;
|
||||
if self.0.level > C::HIGHEST_TRANSLATION_LEVEL
|
||||
|| is_kernel_shared_node
|
||||
|| self.0.va % page_size::<C>(self.0.level) != 0
|
||||
|| self.0.va + page_size::<C>(self.0.level) > end
|
||||
|| pa % page_size::<C>(self.0.level) != 0
|
||||
{
|
||||
let pte = self.0.read_cur_pte();
|
||||
if pte.is_present() && !pte.is_last(self.0.level) {
|
||||
self.0.level_down();
|
||||
} else if !pte.is_present() {
|
||||
self.level_down_create();
|
||||
} else {
|
||||
self.level_down_split();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Map the current page.
|
||||
debug_assert!(self.0.in_untracked_range());
|
||||
let idx = self.0.cur_idx();
|
||||
let level = self.0.level;
|
||||
self.cur_node_mut().set_child_untracked(idx, pa, prop);
|
||||
pa += page_size::<C>(level);
|
||||
self.0.move_forward();
|
||||
}
|
||||
}
|
||||
|
||||
/// Unmap the range starting from the current address with the given length of virtual address.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the range being unmapped does not affect kernel's memory safety.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - the range to be unmapped is out of the range where the cursor is required to operate;
|
||||
/// - the range covers only a part of a page.
|
||||
pub(crate) unsafe fn unmap(&mut self, len: usize) {
|
||||
let end = self.0.va + len;
|
||||
assert!(end <= self.0.barrier_va.end);
|
||||
assert!(end % C::BASE_PAGE_SIZE == 0);
|
||||
while self.0.va < end {
|
||||
let cur_pte = self.0.read_cur_pte();
|
||||
let untracked = self.0.in_untracked_range();
|
||||
|
||||
// Skip if it is already invalid.
|
||||
if !cur_pte.is_present() {
|
||||
if self.0.va + page_size::<C>(self.0.level) > end {
|
||||
break;
|
||||
}
|
||||
self.0.move_forward();
|
||||
continue;
|
||||
}
|
||||
|
||||
// We check among the conditions that may lead to a level down.
|
||||
// We ensure not unmapping in reserved kernel shared tables or releasing it.
|
||||
let is_kernel_shared_node =
|
||||
TypeId::of::<M>() == TypeId::of::<KernelMode>() && self.0.level >= C::NR_LEVELS - 1;
|
||||
if is_kernel_shared_node
|
||||
|| self.0.va % page_size::<C>(self.0.level) != 0
|
||||
|| self.0.va + page_size::<C>(self.0.level) > end
|
||||
{
|
||||
if cur_pte.is_present() && !cur_pte.is_last(self.0.level) {
|
||||
self.0.level_down();
|
||||
} else if untracked {
|
||||
self.level_down_split();
|
||||
} else {
|
||||
unreachable!();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unmap the current page.
|
||||
let idx = self.0.cur_idx();
|
||||
self.cur_node_mut().unset_child(idx, untracked);
|
||||
self.0.move_forward();
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the given operation to all the mappings within the range.
|
||||
///
|
||||
/// The funtction will return an error if it is not allowed to protect an invalid range and
|
||||
/// it does so, or if the range to be protected only covers a part of a page.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that the range being protected does not affect kernel's memory safety.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - the range to be protected is out of the range where the cursor is required to operate.
|
||||
pub(crate) unsafe fn protect(
|
||||
&mut self,
|
||||
len: usize,
|
||||
mut op: impl FnMut(&mut PageProperty),
|
||||
allow_protect_absent: bool,
|
||||
) -> Result<(), PageTableError> {
|
||||
let end = self.0.va + len;
|
||||
assert!(end <= self.0.barrier_va.end);
|
||||
while self.0.va < end {
|
||||
let cur_pte = self.0.read_cur_pte();
|
||||
if !cur_pte.is_present() {
|
||||
if !allow_protect_absent {
|
||||
return Err(PageTableError::ProtectingAbsent);
|
||||
}
|
||||
self.0.move_forward();
|
||||
continue;
|
||||
}
|
||||
// Go down if it's not a last node.
|
||||
if !cur_pte.is_last(self.0.level) {
|
||||
self.0.level_down();
|
||||
continue;
|
||||
}
|
||||
// Go down if the page size is too big and we are protecting part
|
||||
// of untracked huge pages.
|
||||
let vaddr_not_fit = self.0.va % page_size::<C>(self.0.level) != 0
|
||||
|| self.0.va + page_size::<C>(self.0.level) > end;
|
||||
if self.0.in_untracked_range() && vaddr_not_fit {
|
||||
self.level_down_split();
|
||||
continue;
|
||||
} else if vaddr_not_fit {
|
||||
return Err(PageTableError::ProtectingPartial);
|
||||
}
|
||||
let idx = self.0.cur_idx();
|
||||
let level = self.0.level;
|
||||
let mut pte_prop = cur_pte.prop();
|
||||
op(&mut pte_prop);
|
||||
self.cur_node_mut().protect(idx, pte_prop);
|
||||
self.0.move_forward();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Consume itself and leak the root guard for the caller if it locked the root level.
|
||||
///
|
||||
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
|
||||
pub(super) fn leak_root_guard(mut self) -> Option<PageTableFrame<E, C>> {
|
||||
if self.0.guard_level != C::NR_LEVELS {
|
||||
return None;
|
||||
}
|
||||
while self.0.level < C::NR_LEVELS {
|
||||
self.0.level_up();
|
||||
}
|
||||
self.0.guards[0].take()
|
||||
// Ok to drop the cursor here because we ensure not to access the page table if the current
|
||||
// level is the root level when running the dropping method.
|
||||
}
|
||||
|
||||
/// Go down a level assuming the current slot is absent.
|
||||
///
|
||||
/// This method will create a new child frame and go down to it.
|
||||
fn level_down_create(&mut self) {
|
||||
debug_assert!(self.0.level > 1);
|
||||
let new_frame = PageTableFrame::<E, C>::alloc(self.0.level - 1);
|
||||
let idx = self.0.cur_idx();
|
||||
let untracked = self.0.in_untracked_range();
|
||||
self.cur_node_mut()
|
||||
.set_child_pt(idx, new_frame.clone_raw(), untracked);
|
||||
self.0.level -= 1;
|
||||
self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame);
|
||||
}
|
||||
|
||||
/// Go down a level assuming the current slot is an untracked huge page.
|
||||
///
|
||||
/// This method will split the huge page and go down to the next level.
|
||||
fn level_down_split(&mut self) {
|
||||
debug_assert!(self.0.level > 1);
|
||||
debug_assert!(self.0.in_untracked_range());
|
||||
let idx = self.0.cur_idx();
|
||||
self.cur_node_mut().split_untracked_huge(idx);
|
||||
let Child::PageTable(new_frame) = self.0.cur_child() else {
|
||||
unreachable!();
|
||||
};
|
||||
self.0.level -= 1;
|
||||
self.0.guards[(C::NR_LEVELS - self.0.level) as usize] = Some(new_frame.lock());
|
||||
}
|
||||
|
||||
fn cur_node_mut(&mut self) -> &mut PageTableFrame<E, C> {
|
||||
self.0.guards[(C::NR_LEVELS - self.0.level) as usize]
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
@ -1,125 +1,413 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use alloc::{boxed::Box, sync::Arc};
|
||||
//! This module defines page table frame abstractions and the handle.
|
||||
//!
|
||||
//! The page table frame is also frequently referred to as a page table in many architectural
|
||||
//! documentations. We also call it the page table node if emphasizing the tree structure.
|
||||
//!
|
||||
//! This module leverages the frame metadata to manage the page table frames, which makes it
|
||||
//! easier to provide the following guarantees:
|
||||
//!
|
||||
//! The page table frame is not freed when it is still in use by:
|
||||
//! - a parent page table frame,
|
||||
//! - or a handle to a page table frame,
|
||||
//! - or a processor.
|
||||
//! This is implemented by using a reference counter in the frame metadata. If the above
|
||||
//! conditions are not met, the page table frame is ensured to be freed upon dropping the last
|
||||
//! reference.
|
||||
//!
|
||||
//! One can acquire exclusive access to a page table frame using merely the physical address of
|
||||
//! the page table frame. This is implemented by a lock in the frame metadata. Here the
|
||||
//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the
|
||||
//! page table frame while a lock is held. So the modification to the PTEs should be done after
|
||||
//! the initialization of the entity that the PTE points to. This is taken care in this module.
|
||||
//!
|
||||
|
||||
use core::{marker::PhantomData, mem::ManuallyDrop, ops::Range, panic, sync::atomic::Ordering};
|
||||
|
||||
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
|
||||
use crate::{
|
||||
sync::SpinLock,
|
||||
vm::{page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, VmAllocOptions, VmFrame},
|
||||
arch::mm::{PageTableEntry, PagingConsts},
|
||||
vm::{
|
||||
frame::allocator::FRAME_ALLOCATOR, paddr_to_vaddr, page_prop::PageProperty, FrameMetaRef,
|
||||
FrameType, Paddr, PagingConstsTrait, PagingLevel, VmFrame, PAGE_SIZE,
|
||||
},
|
||||
};
|
||||
|
||||
/// A page table frame.
|
||||
/// It's also frequently referred to as a page table in many architectural documentations.
|
||||
/// Cloning a page table frame will create a deep copy of the page table.
|
||||
/// The raw handle to a page table frame.
|
||||
///
|
||||
/// This handle is a referencer of a page table frame. Thus creating and dropping it will affect
|
||||
/// the reference count of the page table frame. If dropped the raw handle as the last reference,
|
||||
/// the page table frame and subsequent children will be freed.
|
||||
///
|
||||
/// Only the CPU or a PTE can access a page table frame using a raw handle. To access the page
|
||||
/// table frame from the kernel code, use the handle [`PageTableFrame`].
|
||||
#[derive(Debug)]
|
||||
pub(super) struct PageTableFrame<E: PageTableEntryTrait, C: PagingConstsTrait>
|
||||
pub(super) struct RawPageTableFrame<E: PageTableEntryTrait, C: PagingConstsTrait>(
|
||||
Paddr,
|
||||
PagingLevel,
|
||||
PhantomData<(E, C)>,
|
||||
)
|
||||
where
|
||||
[(); C::NR_LEVELS as usize]:;
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> RawPageTableFrame<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
inner: VmFrame,
|
||||
/// TODO: all the following fields can be removed if frame metadata is introduced.
|
||||
/// Here we allow 2x space overhead each frame temporarily.
|
||||
#[allow(clippy::type_complexity)]
|
||||
children: Box<[Child<E, C>; nr_subpage_per_huge::<C>()]>,
|
||||
nr_valid_children: usize,
|
||||
pub(super) fn paddr(&self) -> Paddr {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Convert a raw handle to an accessible handle by pertaining the lock.
|
||||
pub(super) fn lock(self) -> PageTableFrame<E, C> {
|
||||
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
|
||||
let level = self.1;
|
||||
// Acquire the lock.
|
||||
while meta
|
||||
.counter8_1
|
||||
.compare_exchange(0, 1, Ordering::Acquire, Ordering::Relaxed)
|
||||
.is_err()
|
||||
{
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
// Prevent dropping the handle.
|
||||
let _ = ManuallyDrop::new(self);
|
||||
PageTableFrame::<E, C> {
|
||||
meta,
|
||||
newly_created: false,
|
||||
level,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a copy of the handle.
|
||||
pub(super) fn copy_handle(&self) -> Self {
|
||||
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
|
||||
// Increment the reference count.
|
||||
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
|
||||
Self(self.0, self.1, PhantomData)
|
||||
}
|
||||
|
||||
pub(super) fn nr_valid_children(&self) -> u16 {
|
||||
let meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
|
||||
meta.counter16_1.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Activate the page table assuming it is a root page table.
|
||||
///
|
||||
/// Here we ensure not dropping an active page table by making a
|
||||
/// processor a page table owner. When activating a page table, the
|
||||
/// reference count of the last activated page table is decremented.
|
||||
/// And that of the current page table is incremented.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure that the page table to be activated has
|
||||
/// proper mappings for the kernel and has the correct const parameters
|
||||
/// matching the current CPU.
|
||||
pub(crate) unsafe fn activate(&self) {
|
||||
use core::sync::atomic::AtomicBool;
|
||||
|
||||
use crate::{
|
||||
arch::mm::{activate_page_table, current_page_table_paddr},
|
||||
vm::CachePolicy,
|
||||
};
|
||||
|
||||
debug_assert_eq!(self.1, PagingConsts::NR_LEVELS);
|
||||
|
||||
let last_activated_paddr = current_page_table_paddr();
|
||||
|
||||
activate_page_table(self.0, CachePolicy::Writeback);
|
||||
|
||||
if last_activated_paddr == self.0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Increment the reference count of the current page table.
|
||||
|
||||
FrameMetaRef::from_raw(self.0, 1)
|
||||
.counter32_1
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// Decrement the reference count of the last activated page table.
|
||||
|
||||
// Boot page tables are not tracked with [`PageTableFrame`], but
|
||||
// all page tables after the boot stage are tracked.
|
||||
//
|
||||
// TODO: the `cpu_local` implementation currently is underpowered,
|
||||
// there's no need using `AtomicBool` here.
|
||||
crate::cpu_local! {
|
||||
static CURRENT_IS_BOOT_PT: AtomicBool = AtomicBool::new(true);
|
||||
}
|
||||
if !CURRENT_IS_BOOT_PT.load(Ordering::Acquire) {
|
||||
// Restore and drop the last activated page table.
|
||||
let _last_activated_pt =
|
||||
Self(last_activated_paddr, PagingConsts::NR_LEVELS, PhantomData);
|
||||
} else {
|
||||
CURRENT_IS_BOOT_PT.store(false, Ordering::Release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) type PtfRef<E, C> = Arc<SpinLock<PageTableFrame<E, C>>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) enum Child<E: PageTableEntryTrait, C: PagingConstsTrait>
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Drop for RawPageTableFrame<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
PageTable(PtfRef<E, C>),
|
||||
fn drop(&mut self) {
|
||||
let mut meta = unsafe { FrameMetaRef::from_raw(self.0, 1) };
|
||||
if meta.counter32_1.fetch_sub(1, Ordering::Release) == 1 {
|
||||
// A fence is needed here with the same reasons stated in the implementation of
|
||||
// `Arc::drop`: <https://doc.rust-lang.org/std/sync/struct.Arc.html#method.drop>.
|
||||
core::sync::atomic::fence(Ordering::Acquire);
|
||||
// Drop the children.
|
||||
for i in 0..nr_subpage_per_huge::<C>() {
|
||||
// SAFETY: the index is within the bound and PTE is plain-old-data. The
|
||||
// address is aligned as well. We also have an exclusive access ensured
|
||||
// by reference counting.
|
||||
let pte_ptr = unsafe { (paddr_to_vaddr(self.paddr()) as *const E).add(i) };
|
||||
let pte = unsafe { pte_ptr.read() };
|
||||
if pte.is_present() {
|
||||
// Just restore the handle and drop the handle.
|
||||
if !pte.is_last(self.1) {
|
||||
// This is a page table.
|
||||
let _dropping_raw = Self(pte.paddr(), self.1 - 1, PhantomData);
|
||||
} else {
|
||||
// This is a frame. You cannot drop a page table node that maps to
|
||||
// untracked frames. This must be verified.
|
||||
let frame_meta = unsafe { FrameMetaRef::from_raw(pte.paddr(), self.1) };
|
||||
let _dropping_frame = VmFrame { meta: frame_meta };
|
||||
}
|
||||
}
|
||||
}
|
||||
// SAFETY: the frame is initialized and the physical address points to initialized memory.
|
||||
// We also have and exclusive access ensured by reference counting.
|
||||
unsafe {
|
||||
meta.deref_mut().frame_type = FrameType::Free;
|
||||
}
|
||||
// Recycle this page table frame.
|
||||
FRAME_ALLOCATOR
|
||||
.get()
|
||||
.unwrap()
|
||||
.lock()
|
||||
.dealloc(self.0 / PAGE_SIZE, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A mutable handle to a page table frame.
|
||||
///
|
||||
/// The page table frame can own a set of handles to children, ensuring that the children
|
||||
/// don't outlive the page table frame. Cloning a page table frame will create a deep copy
|
||||
/// of the page table. Dropping the page table frame will also drop all handles if the page
|
||||
/// table frame has no references. You can set the page table frame as a child of another
|
||||
/// page table frame.
|
||||
#[derive(Debug)]
|
||||
pub(super) struct PageTableFrame<
|
||||
E: PageTableEntryTrait = PageTableEntry,
|
||||
C: PagingConstsTrait = PagingConsts,
|
||||
> where
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
pub(super) meta: FrameMetaRef,
|
||||
/// This is an optimization to save a few atomic operations on the lock.
|
||||
///
|
||||
/// If the handle is newly created using [`Self::alloc`], this is true and there's no need
|
||||
/// to acquire the lock since the handle is exclusive. However if the handle is acquired
|
||||
/// from a [`RawPageTableFrame`], this is false and the lock should be acquired.
|
||||
newly_created: bool,
|
||||
/// The level of the page table frame. This is needed because we cannot tell from a PTE
|
||||
/// alone if it is a page table or a frame.
|
||||
level: PagingLevel,
|
||||
_phantom: core::marker::PhantomData<(E, C)>,
|
||||
}
|
||||
|
||||
/// A child of a page table frame.
|
||||
#[derive(Debug)]
|
||||
pub(super) enum Child<E: PageTableEntryTrait = PageTableEntry, C: PagingConstsTrait = PagingConsts>
|
||||
where
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
PageTable(RawPageTableFrame<E, C>),
|
||||
Frame(VmFrame),
|
||||
/// Frames not tracked by the frame allocator.
|
||||
/// Frames not tracked by handles.
|
||||
Untracked(Paddr),
|
||||
None,
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Child<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
pub(super) fn is_pt(&self) -> bool {
|
||||
matches!(self, Child::PageTable(_))
|
||||
}
|
||||
pub(super) fn is_frame(&self) -> bool {
|
||||
matches!(self, Child::Frame(_))
|
||||
}
|
||||
pub(super) fn is_none(&self) -> bool {
|
||||
matches!(self, Child::None)
|
||||
}
|
||||
pub(super) fn is_some(&self) -> bool {
|
||||
!self.is_none()
|
||||
}
|
||||
pub(super) fn is_untyped(&self) -> bool {
|
||||
matches!(self, Child::Untracked(_))
|
||||
}
|
||||
/// Is a last entry that maps to a physical address.
|
||||
pub(super) fn is_last(&self) -> bool {
|
||||
matches!(self, Child::Frame(_) | Child::Untracked(_))
|
||||
}
|
||||
fn paddr(&self) -> Option<Paddr> {
|
||||
match self {
|
||||
Child::PageTable(node) => {
|
||||
// Chance if dead lock is zero because it is only called by [`PageTableFrame::protect`],
|
||||
// and the cursor will not protect a node while holding the lock.
|
||||
Some(node.lock().start_paddr())
|
||||
}
|
||||
Child::Frame(frame) => Some(frame.start_paddr()),
|
||||
Child::Untracked(pa) => Some(*pa),
|
||||
Child::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Clone for Child<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
/// This is a shallow copy.
|
||||
fn clone(&self) -> Self {
|
||||
match self {
|
||||
Child::PageTable(ptf) => Child::PageTable(ptf.clone()),
|
||||
Child::Frame(frame) => Child::Frame(frame.clone()),
|
||||
Child::Untracked(pa) => Child::Untracked(*pa),
|
||||
Child::None => Child::None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTableFrame<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
pub(super) fn new() -> Self {
|
||||
/// Allocate a new empty page table frame.
|
||||
///
|
||||
/// This function returns an owning handle. The newly created handle does not
|
||||
/// set the lock bit for performance as it is exclusive and unlocking is an
|
||||
/// extra unnecessary expensive operation.
|
||||
pub(super) fn alloc(level: PagingLevel) -> Self {
|
||||
let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap() * PAGE_SIZE;
|
||||
let mut meta = unsafe { FrameMetaRef::from_raw(frame, 1) };
|
||||
// The reference count is initialized to 1.
|
||||
meta.counter32_1.store(1, Ordering::Relaxed);
|
||||
// The lock is initialized to 0.
|
||||
meta.counter8_1.store(0, Ordering::Release);
|
||||
// SAFETY: here we have an exlusive access since it's just initialized.
|
||||
unsafe {
|
||||
meta.deref_mut().frame_type = FrameType::PageTable;
|
||||
}
|
||||
|
||||
// Zero out the page table frame.
|
||||
let ptr = paddr_to_vaddr(meta.paddr()) as *mut u8;
|
||||
unsafe { core::ptr::write_bytes(ptr, 0, PAGE_SIZE) };
|
||||
|
||||
Self {
|
||||
inner: VmAllocOptions::new(1).alloc_single().unwrap(),
|
||||
children: Box::new(core::array::from_fn(|_| Child::None)),
|
||||
nr_valid_children: 0,
|
||||
meta,
|
||||
newly_created: true,
|
||||
level,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn start_paddr(&self) -> Paddr {
|
||||
self.inner.start_paddr()
|
||||
/// Convert the handle into a raw handle to be stored in a PTE or CPU.
|
||||
pub(super) fn into_raw(mut self) -> RawPageTableFrame<E, C> {
|
||||
if !self.newly_created {
|
||||
self.meta.counter8_1.store(0, Ordering::Release);
|
||||
} else {
|
||||
self.newly_created = false;
|
||||
}
|
||||
let raw = RawPageTableFrame(self.start_paddr(), self.level, PhantomData);
|
||||
let _ = ManuallyDrop::new(self);
|
||||
raw
|
||||
}
|
||||
|
||||
pub(super) fn child(&self, idx: usize) -> &Child<E, C> {
|
||||
/// Get a raw handle while still preserving the original handle.
|
||||
pub(super) fn clone_raw(&self) -> RawPageTableFrame<E, C> {
|
||||
self.meta.counter32_1.fetch_add(1, Ordering::Relaxed);
|
||||
RawPageTableFrame(self.start_paddr(), self.level, PhantomData)
|
||||
}
|
||||
|
||||
/// Get an extra reference of the child at the given index.
|
||||
pub(super) fn child(&self, idx: usize, tracked: bool) -> Child<E, C> {
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
&self.children[idx]
|
||||
let pte = self.read_pte(idx);
|
||||
if !pte.is_present() {
|
||||
Child::None
|
||||
} else {
|
||||
let paddr = pte.paddr();
|
||||
if !pte.is_last(self.level) {
|
||||
let meta = unsafe { FrameMetaRef::from_raw(paddr, 1) };
|
||||
// This is the handle count. We are creating a new handle thus increment the counter.
|
||||
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
|
||||
Child::PageTable(RawPageTableFrame(paddr, self.level - 1, PhantomData))
|
||||
} else if tracked {
|
||||
let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) };
|
||||
// This is the handle count. We are creating a new handle thus increment the counter.
|
||||
meta.counter32_1.fetch_add(1, Ordering::Relaxed);
|
||||
Child::Frame(VmFrame { meta })
|
||||
} else {
|
||||
Child::Untracked(paddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a copy of the page table frame.
|
||||
///
|
||||
/// This function allows you to control about the way to copy the children.
|
||||
/// For indexes in `deep`, the children are deep copied and this function will be recursively called.
|
||||
/// For indexes in `shallow`, the children are shallow copied as new references.
|
||||
///
|
||||
/// You cannot shallow copy a child that is mapped to a frame. Deep copying a frame child will not
|
||||
/// copy the mapped frame but will copy the handle to the frame.
|
||||
///
|
||||
/// You cannot either deep copy or shallow copy a child that is mapped to an untracked frame.
|
||||
///
|
||||
/// The ranges must be disjoint.
|
||||
pub(super) unsafe fn make_copy(&self, deep: Range<usize>, shallow: Range<usize>) -> Self {
|
||||
let mut new_frame = Self::alloc(self.level);
|
||||
debug_assert!(deep.end <= nr_subpage_per_huge::<C>());
|
||||
debug_assert!(shallow.end <= nr_subpage_per_huge::<C>());
|
||||
debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end);
|
||||
for i in deep {
|
||||
match self.child(i, /*meaningless*/ true) {
|
||||
Child::PageTable(pt) => {
|
||||
let guard = pt.copy_handle().lock();
|
||||
let new_child = guard.make_copy(0..nr_subpage_per_huge::<C>(), 0..0);
|
||||
new_frame.set_child_pt(i, new_child.into_raw(), /*meaningless*/ true);
|
||||
}
|
||||
Child::Frame(frame) => {
|
||||
let prop = self.read_pte_prop(i);
|
||||
new_frame.set_child_frame(i, frame.clone(), prop);
|
||||
}
|
||||
Child::None => {}
|
||||
Child::Untracked(_) => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
for i in shallow {
|
||||
debug_assert_eq!(self.level, C::NR_LEVELS);
|
||||
match self.child(i, /*meaningless*/ true) {
|
||||
Child::PageTable(pt) => {
|
||||
new_frame.set_child_pt(i, pt.copy_handle(), /*meaningless*/ true);
|
||||
}
|
||||
Child::None => {}
|
||||
Child::Frame(_) | Child::Untracked(_) => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
new_frame
|
||||
}
|
||||
|
||||
/// Remove a child if the child at the given index is present.
|
||||
pub(super) fn unset_child(&self, idx: usize, in_untracked_range: bool) {
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
self.overwrite_pte(idx, None, in_untracked_range);
|
||||
}
|
||||
|
||||
/// Set a child page table at a given index.
|
||||
pub(super) fn set_child_pt(
|
||||
&mut self,
|
||||
idx: usize,
|
||||
pt: RawPageTableFrame<E, C>,
|
||||
in_untracked_range: bool,
|
||||
) {
|
||||
// They should be ensured by the cursor.
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
debug_assert_eq!(pt.1, self.level - 1);
|
||||
let pte = Some(E::new_pt(pt.paddr()));
|
||||
self.overwrite_pte(idx, pte, in_untracked_range);
|
||||
// The ownership is transferred to a raw PTE. Don't drop the handle.
|
||||
let _ = ManuallyDrop::new(pt);
|
||||
}
|
||||
|
||||
/// Map a frame at a given index.
|
||||
pub(super) fn set_child_frame(&mut self, idx: usize, frame: VmFrame, prop: PageProperty) {
|
||||
// They should be ensured by the cursor.
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
debug_assert_eq!(frame.level(), self.level);
|
||||
let pte = Some(E::new_frame(frame.start_paddr(), self.level, prop));
|
||||
self.overwrite_pte(idx, pte, false);
|
||||
// The ownership is transferred to a raw PTE. Don't drop the handle.
|
||||
let _ = ManuallyDrop::new(frame);
|
||||
}
|
||||
|
||||
/// Set an untracked child frame at a given index.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure that the physical address is valid and safe to map.
|
||||
pub(super) unsafe fn set_child_untracked(&mut self, idx: usize, pa: Paddr, prop: PageProperty) {
|
||||
// It should be ensured by the cursor.
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
let pte = Some(E::new_frame(pa, self.level, prop));
|
||||
self.overwrite_pte(idx, pte, true);
|
||||
}
|
||||
|
||||
/// The number of mapped frames or page tables.
|
||||
/// This is to track if we can free itself.
|
||||
pub(super) fn nr_valid_children(&self) -> usize {
|
||||
self.nr_valid_children
|
||||
pub(super) fn nr_valid_children(&self) -> u16 {
|
||||
self.meta.counter16_1.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Read the info from a page table entry at a given index.
|
||||
@ -128,142 +416,104 @@ where
|
||||
}
|
||||
|
||||
/// Split the untracked huge page mapped at `idx` to smaller pages.
|
||||
pub(super) fn split_untracked_huge(&mut self, cur_level: PagingLevel, idx: usize) {
|
||||
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
|
||||
// These should be ensured by the cursor.
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
debug_assert!(cur_level > 1);
|
||||
let Child::Untracked(pa) = self.children[idx] else {
|
||||
panic!("split_untracked_huge: not an untyped huge page");
|
||||
debug_assert!(self.level > 1);
|
||||
|
||||
let Child::Untracked(pa) = self.child(idx, false) else {
|
||||
panic!("`split_untracked_huge` not called on an untracked huge page");
|
||||
};
|
||||
let prop = self.read_pte_prop(idx);
|
||||
let mut new_frame = Self::new();
|
||||
let mut new_frame = PageTableFrame::<E, C>::alloc(self.level - 1);
|
||||
for i in 0..nr_subpage_per_huge::<C>() {
|
||||
let small_pa = pa + i * page_size::<C>(cur_level - 1);
|
||||
new_frame.set_child(i, Child::Untracked(small_pa), Some(prop), cur_level - 1 > 1);
|
||||
let small_pa = pa + i * page_size::<C>(self.level - 1);
|
||||
unsafe { new_frame.set_child_untracked(i, small_pa, prop) };
|
||||
}
|
||||
self.set_child(
|
||||
idx,
|
||||
Child::PageTable(Arc::new(SpinLock::new(new_frame))),
|
||||
Some(prop),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
/// Map a child at a given index.
|
||||
/// If mapping a non-none child, please give the property to map the child.
|
||||
pub(super) fn set_child(
|
||||
&mut self,
|
||||
idx: usize,
|
||||
child: Child<E, C>,
|
||||
prop: Option<PageProperty>,
|
||||
huge: bool,
|
||||
) {
|
||||
assert!(idx < nr_subpage_per_huge::<C>());
|
||||
// SAFETY: the index is within the bound and the PTE to be written is valid.
|
||||
// And the physical address of PTE points to initialized memory.
|
||||
// This applies to all the following `write_pte` invocations.
|
||||
unsafe {
|
||||
match &child {
|
||||
Child::PageTable(node) => {
|
||||
debug_assert!(!huge);
|
||||
let frame = node.lock();
|
||||
self.write_pte(
|
||||
idx,
|
||||
E::new(frame.inner.start_paddr(), prop.unwrap(), false, false),
|
||||
);
|
||||
self.nr_valid_children += 1;
|
||||
}
|
||||
Child::Frame(frame) => {
|
||||
debug_assert!(!huge); // `VmFrame` currently can only be a regular page.
|
||||
self.write_pte(idx, E::new(frame.start_paddr(), prop.unwrap(), false, true));
|
||||
self.nr_valid_children += 1;
|
||||
}
|
||||
Child::Untracked(pa) => {
|
||||
self.write_pte(idx, E::new(*pa, prop.unwrap(), huge, true));
|
||||
self.nr_valid_children += 1;
|
||||
}
|
||||
Child::None => {
|
||||
self.write_pte(idx, E::new_absent());
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.children[idx].is_some() {
|
||||
self.nr_valid_children -= 1;
|
||||
}
|
||||
self.children[idx] = child;
|
||||
self.set_child_pt(idx, new_frame.into_raw(), true);
|
||||
}
|
||||
|
||||
/// Protect an already mapped child at a given index.
|
||||
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty, level: PagingLevel) {
|
||||
debug_assert!(self.children[idx].is_some());
|
||||
let paddr = self.children[idx].paddr().unwrap();
|
||||
pub(super) fn protect(&mut self, idx: usize, prop: PageProperty) {
|
||||
let mut pte = self.read_pte(idx);
|
||||
debug_assert!(pte.is_present()); // This should be ensured by the cursor.
|
||||
pte.set_prop(prop);
|
||||
// SAFETY: the index is within the bound and the PTE is valid.
|
||||
unsafe {
|
||||
self.write_pte(
|
||||
idx,
|
||||
E::new(paddr, prop, level > 1, self.children[idx].is_last()),
|
||||
);
|
||||
(self.as_ptr() as *mut E).add(idx).write(pte);
|
||||
}
|
||||
}
|
||||
|
||||
fn read_pte(&self, idx: usize) -> E {
|
||||
assert!(idx < nr_subpage_per_huge::<C>());
|
||||
pub(super) fn read_pte(&self, idx: usize) -> E {
|
||||
// It should be ensured by the cursor.
|
||||
debug_assert!(idx < nr_subpage_per_huge::<C>());
|
||||
// SAFETY: the index is within the bound and PTE is plain-old-data.
|
||||
unsafe { (self.inner.as_ptr() as *const E).add(idx).read() }
|
||||
unsafe { self.as_ptr().add(idx).read() }
|
||||
}
|
||||
|
||||
/// Write a page table entry at a given index.
|
||||
fn start_paddr(&self) -> Paddr {
|
||||
self.meta.paddr()
|
||||
}
|
||||
|
||||
/// Replace a page table entry at a given index.
|
||||
///
|
||||
/// # Safety
|
||||
/// This method will ensure that the child presented by the overwritten
|
||||
/// PTE is dropped, and the child count is updated.
|
||||
///
|
||||
/// The caller must ensure that:
|
||||
/// - the index is within bounds;
|
||||
/// - the PTE is valid an the physical address in the PTE points to initialized memory.
|
||||
unsafe fn write_pte(&mut self, idx: usize, pte: E) {
|
||||
(self.inner.as_mut_ptr() as *mut E).add(idx).write(pte);
|
||||
/// The caller in this module will ensure that the PTE points to initialized
|
||||
/// memory if the child is a page table.
|
||||
fn overwrite_pte(&self, idx: usize, pte: Option<E>, in_untracked_range: bool) {
|
||||
let existing_pte = self.read_pte(idx);
|
||||
if existing_pte.is_present() {
|
||||
// SAFETY: The index is within the bound and the address is aligned.
|
||||
// The validity of the PTE is checked within this module.
|
||||
// The safetiness also holds in the following branch.
|
||||
unsafe {
|
||||
(self.as_ptr() as *mut E)
|
||||
.add(idx)
|
||||
.write(pte.unwrap_or(E::new_absent()))
|
||||
};
|
||||
|
||||
// Drop the child. We must set the PTE before dropping the child. To
|
||||
// drop the child just restore the handle and drop the handle.
|
||||
|
||||
let paddr = existing_pte.paddr();
|
||||
if !existing_pte.is_last(self.level) {
|
||||
// This is a page table.
|
||||
let _dropping_raw = RawPageTableFrame::<E, C>(paddr, self.level - 1, PhantomData);
|
||||
} else if !in_untracked_range {
|
||||
// This is a frame.
|
||||
let meta = unsafe { FrameMetaRef::from_raw(paddr, self.level) };
|
||||
let _dropping_frame = VmFrame { meta };
|
||||
}
|
||||
|
||||
if pte.is_none() {
|
||||
// Decrement the child count.
|
||||
self.meta.counter16_1.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
} else if let Some(e) = pte {
|
||||
unsafe { (self.as_ptr() as *mut E).add(idx).write(e) };
|
||||
|
||||
// Increment the child count.
|
||||
self.meta.counter16_1.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
fn as_ptr(&self) -> *const E {
|
||||
paddr_to_vaddr(self.start_paddr()) as *const E
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Clone for PageTableFrame<E, C>
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> Drop for PageTableFrame<E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
/// Make a deep copy of the page table.
|
||||
/// The child page tables are also being deep copied.
|
||||
fn clone(&self) -> Self {
|
||||
let new_frame = VmAllocOptions::new(1).alloc_single().unwrap();
|
||||
let new_ptr = new_frame.as_mut_ptr() as *mut E;
|
||||
let children = Box::new(core::array::from_fn(|i| match self.child(i) {
|
||||
Child::PageTable(node) => unsafe {
|
||||
let frame = node.lock();
|
||||
// Possibly a cursor is waiting for the root lock to recycle this node.
|
||||
// We can skip copying empty page table nodes.
|
||||
if frame.nr_valid_children() != 0 {
|
||||
let cloned = frame.clone();
|
||||
let pte = self.read_pte(i);
|
||||
new_ptr.add(i).write(E::new(
|
||||
cloned.inner.start_paddr(),
|
||||
pte.prop(),
|
||||
false,
|
||||
false,
|
||||
));
|
||||
Child::PageTable(Arc::new(SpinLock::new(cloned)))
|
||||
} else {
|
||||
Child::None
|
||||
}
|
||||
},
|
||||
Child::Frame(_) | Child::Untracked(_) => {
|
||||
unsafe {
|
||||
new_ptr.add(i).write(self.read_pte(i));
|
||||
}
|
||||
self.children[i].clone()
|
||||
}
|
||||
Child::None => Child::None,
|
||||
}));
|
||||
Self {
|
||||
inner: new_frame,
|
||||
children,
|
||||
nr_valid_children: self.nr_valid_children,
|
||||
fn drop(&mut self) {
|
||||
// Release the lock.
|
||||
if !self.newly_created {
|
||||
self.meta.counter8_1.store(0, Ordering::Release);
|
||||
}
|
||||
// Drop the frame by `RawPageTableFrame::drop`.
|
||||
let _dropping_raw = RawPageTableFrame::<E, C>(self.start_paddr(), self.level, PhantomData);
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,15 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use alloc::sync::Arc;
|
||||
use core::{fmt::Debug, marker::PhantomData, ops::Range, panic};
|
||||
use core::{fmt::Debug, marker::PhantomData, ops::Range};
|
||||
|
||||
use pod::Pod;
|
||||
|
||||
use super::{
|
||||
nr_subpage_per_huge, paddr_to_vaddr,
|
||||
page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
|
||||
page_prop::{PageFlags, PageProperty},
|
||||
page_size, Paddr, PagingConstsTrait, PagingLevel, Vaddr,
|
||||
};
|
||||
use crate::{
|
||||
arch::mm::{activate_page_table, PageTableEntry, PagingConsts},
|
||||
sync::SpinLock,
|
||||
};
|
||||
use crate::arch::mm::{PageTableEntry, PagingConsts};
|
||||
|
||||
mod frame;
|
||||
use frame::*;
|
||||
@ -31,7 +27,7 @@ pub enum PageTableError {
|
||||
/// Using virtual address not aligned.
|
||||
UnalignedVaddr,
|
||||
/// Protecting a mapping that does not exist.
|
||||
ProtectingInvalid,
|
||||
ProtectingAbsent,
|
||||
/// Protecting a part of an already mapped page.
|
||||
ProtectingPartial,
|
||||
}
|
||||
@ -84,23 +80,18 @@ pub(crate) struct PageTable<
|
||||
E: PageTableEntryTrait = PageTableEntry,
|
||||
C: PagingConstsTrait = PagingConsts,
|
||||
> where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
root_frame: PtfRef<E, C>,
|
||||
root: RawPageTableFrame<E, C>,
|
||||
_phantom: PhantomData<M>,
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<UserMode, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
impl PageTable<UserMode> {
|
||||
pub(crate) fn activate(&self) {
|
||||
// SAFETY: The usermode page table is safe to activate since the kernel
|
||||
// mappings are shared.
|
||||
unsafe {
|
||||
self.activate_unchecked();
|
||||
self.root.activate();
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,48 +112,21 @@ where
|
||||
.unwrap();
|
||||
};
|
||||
let root_frame = cursor.leak_root_guard().unwrap();
|
||||
let mut new_root_frame = PageTableFrame::<E, C>::new();
|
||||
let half_of_entries = nr_subpage_per_huge::<C>() / 2;
|
||||
for i in 0..half_of_entries {
|
||||
// This is user space, deep copy the child.
|
||||
match root_frame.child(i) {
|
||||
Child::PageTable(node) => {
|
||||
let frame = node.lock();
|
||||
// Possibly a cursor is waiting for the root lock to recycle this node.
|
||||
// We can skip copying empty page table nodes.
|
||||
if frame.nr_valid_children() != 0 {
|
||||
let cloned = frame.clone();
|
||||
let pt = Child::PageTable(Arc::new(SpinLock::new(cloned)));
|
||||
new_root_frame.set_child(i, pt, Some(root_frame.read_pte_prop(i)), false);
|
||||
}
|
||||
}
|
||||
Child::None => {}
|
||||
Child::Frame(_) | Child::Untracked(_) => {
|
||||
panic!("Unexpected map child.");
|
||||
}
|
||||
}
|
||||
}
|
||||
for i in half_of_entries..nr_subpage_per_huge::<C>() {
|
||||
// This is kernel space, share the child.
|
||||
new_root_frame.set_child(
|
||||
i,
|
||||
root_frame.child(i).clone(),
|
||||
Some(root_frame.read_pte_prop(i)),
|
||||
false,
|
||||
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
|
||||
let new_root_frame = unsafe {
|
||||
root_frame.make_copy(
|
||||
0..NR_PTES_PER_NODE / 2,
|
||||
NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE,
|
||||
)
|
||||
}
|
||||
PageTable::<UserMode, E, C> {
|
||||
root_frame: Arc::new(SpinLock::new(new_root_frame)),
|
||||
};
|
||||
PageTable::<UserMode> {
|
||||
root: new_root_frame.into_raw(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<KernelMode, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
impl PageTable<KernelMode> {
|
||||
/// Create a new user page table.
|
||||
///
|
||||
/// This should be the only way to create the first user page table, that is
|
||||
@ -170,19 +134,13 @@ where
|
||||
///
|
||||
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating
|
||||
/// other child page tables.
|
||||
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode, E, C> {
|
||||
let mut new_root_frame = PageTableFrame::<E, C>::new();
|
||||
let root_frame = self.root_frame.lock();
|
||||
for i in nr_subpage_per_huge::<C>() / 2..nr_subpage_per_huge::<C>() {
|
||||
new_root_frame.set_child(
|
||||
i,
|
||||
root_frame.child(i).clone(),
|
||||
Some(root_frame.read_pte_prop(i)),
|
||||
false,
|
||||
)
|
||||
}
|
||||
PageTable::<UserMode, E, C> {
|
||||
root_frame: Arc::new(SpinLock::new(new_root_frame)),
|
||||
pub(crate) fn create_user_page_table(&self) -> PageTable<UserMode> {
|
||||
let root_frame = self.root.copy_handle().lock();
|
||||
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
|
||||
let new_root_frame =
|
||||
unsafe { root_frame.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) };
|
||||
PageTable::<UserMode> {
|
||||
root: new_root_frame.into_raw(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -193,26 +151,17 @@ where
|
||||
/// usize overflows, the caller should provide the index range of the root level pages
|
||||
/// instead of the virtual address range.
|
||||
pub(crate) fn make_shared_tables(&self, root_index: Range<usize>) {
|
||||
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
|
||||
let start = root_index.start;
|
||||
debug_assert!(start >= nr_subpage_per_huge::<C>() / 2);
|
||||
debug_assert!(start < nr_subpage_per_huge::<C>());
|
||||
debug_assert!(start >= NR_PTES_PER_NODE / 2);
|
||||
debug_assert!(start < NR_PTES_PER_NODE);
|
||||
let end = root_index.end;
|
||||
debug_assert!(end <= nr_subpage_per_huge::<C>());
|
||||
let mut root_frame = self.root_frame.lock();
|
||||
debug_assert!(end <= NR_PTES_PER_NODE);
|
||||
let mut root_frame = self.root.copy_handle().lock();
|
||||
for i in start..end {
|
||||
let no_such_child = root_frame.child(i).is_none();
|
||||
if no_such_child {
|
||||
let frame = Arc::new(SpinLock::new(PageTableFrame::<E, C>::new()));
|
||||
root_frame.set_child(
|
||||
i,
|
||||
Child::PageTable(frame),
|
||||
Some(PageProperty {
|
||||
flags: PageFlags::RWX,
|
||||
cache: CachePolicy::Writeback,
|
||||
priv_flags: PrivilegedPageFlags::GLOBAL,
|
||||
}),
|
||||
false,
|
||||
)
|
||||
if !root_frame.read_pte(i).is_present() {
|
||||
let frame = PageTableFrame::alloc(PagingConsts::NR_LEVELS - 1);
|
||||
root_frame.set_child_pt(i, frame.into_raw(), i < NR_PTES_PER_NODE * 3 / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -220,20 +169,26 @@ where
|
||||
|
||||
impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> PageTable<M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
/// Create a new empty page table. Useful for the kernel page table and IOMMU page tables only.
|
||||
pub(crate) fn empty() -> Self {
|
||||
PageTable {
|
||||
root_frame: Arc::new(SpinLock::new(PageTableFrame::<E, C>::new())),
|
||||
root: PageTableFrame::<E, C>::alloc(C::NR_LEVELS).into_raw(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn activate_unchecked(&self) {
|
||||
self.root.activate();
|
||||
}
|
||||
|
||||
/// The physical address of the root page table.
|
||||
pub(crate) fn root_paddr(&self) -> Paddr {
|
||||
self.root_frame.lock().start_paddr()
|
||||
///
|
||||
/// It is dangerous to directly provide the physical address of the root page table to the
|
||||
/// hardware since the page table frame may be dropped, resulting in UAF.
|
||||
pub(crate) unsafe fn root_paddr(&self) -> Paddr {
|
||||
self.root.paddr()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn map(
|
||||
@ -272,10 +227,6 @@ where
|
||||
unsafe { page_walk::<E, C>(self.root_paddr(), vaddr) }
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn activate_unchecked(&self) {
|
||||
activate_page_table(self.root_paddr(), CachePolicy::Writeback);
|
||||
}
|
||||
|
||||
/// Create a new cursor exclusively accessing the virtual address range for mapping.
|
||||
///
|
||||
/// If another cursor is already accessing the range, the new cursor will wait until the
|
||||
@ -303,21 +254,7 @@ where
|
||||
/// This is only useful for IOMMU page tables. Think twice before using it in other cases.
|
||||
pub(crate) unsafe fn shallow_copy(&self) -> Self {
|
||||
PageTable {
|
||||
root_frame: self.root_frame.clone(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Clone for PageTable<M, E, C>
|
||||
where
|
||||
[(); nr_subpage_per_huge::<C>()]:,
|
||||
[(); C::NR_LEVELS as usize]:,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
let frame = self.root_frame.lock();
|
||||
PageTable {
|
||||
root_frame: Arc::new(SpinLock::new(frame.clone())),
|
||||
root: self.root.copy_handle(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -361,7 +298,7 @@ pub(super) unsafe fn page_walk<E: PageTableEntryTrait, C: PagingConstsTrait>(
|
||||
if !cur_pte.is_present() {
|
||||
return None;
|
||||
}
|
||||
if cur_pte.is_huge() {
|
||||
if cur_pte.is_last(cur_level) {
|
||||
debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL);
|
||||
break;
|
||||
}
|
||||
@ -393,12 +330,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug {
|
||||
/// If the flags are present with valid mappings.
|
||||
fn is_present(&self) -> bool;
|
||||
|
||||
/// Create a new PTE with the given physical address and flags.
|
||||
/// The huge flag indicates that the PTE maps a huge page.
|
||||
/// The last flag indicates that the PTE is the last level page table.
|
||||
/// If the huge and last flags are both false, the PTE maps a page
|
||||
/// table node.
|
||||
fn new(paddr: Paddr, prop: PageProperty, huge: bool, last: bool) -> Self;
|
||||
/// Create a new PTE with the given physical address and flags that map to a frame.
|
||||
fn new_frame(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self;
|
||||
|
||||
/// Create a new PTE that map to a child page table.
|
||||
fn new_pt(paddr: Paddr) -> Self;
|
||||
|
||||
/// Get the physical address from the PTE.
|
||||
/// The physical address recorded in the PTE is either:
|
||||
@ -408,6 +344,11 @@ pub(crate) trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug {
|
||||
|
||||
fn prop(&self) -> PageProperty;
|
||||
|
||||
/// If the PTE maps a huge page or a page table frame.
|
||||
fn is_huge(&self) -> bool;
|
||||
fn set_prop(&mut self, prop: PageProperty);
|
||||
|
||||
/// If the PTE maps a page rather than a child page table.
|
||||
///
|
||||
/// The level of the page table the entry resides is given since architectures
|
||||
/// like amd64 only uses a huge bit in intermediate levels.
|
||||
fn is_last(&self, level: PagingLevel) -> bool;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use core::mem::ManuallyDrop;
|
||||
|
||||
use super::*;
|
||||
use crate::vm::{
|
||||
kspace::LINEAR_MAPPING_BASE_VADDR,
|
||||
@ -25,8 +27,9 @@ fn test_range_check() {
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
fn test_map_unmap() {
|
||||
fn test_tracked_map_unmap() {
|
||||
let pt = PageTable::<UserMode>::empty();
|
||||
|
||||
let from = PAGE_SIZE..PAGE_SIZE * 2;
|
||||
let frame = VmAllocOptions::new(1).alloc_single().unwrap();
|
||||
let start_paddr = frame.start_paddr();
|
||||
@ -35,17 +38,25 @@ fn test_map_unmap() {
|
||||
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
unsafe { pt.unmap(&from).unwrap() };
|
||||
assert!(pt.query(from.start + 10).is_none());
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
fn test_untracked_map_unmap() {
|
||||
let pt = PageTable::<KernelMode>::empty();
|
||||
const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR;
|
||||
|
||||
let from_ppn = 13245..512 * 512 + 23456;
|
||||
let to_ppn = from_ppn.start - 11010..from_ppn.end - 11010;
|
||||
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
|
||||
let from =
|
||||
UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end;
|
||||
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
|
||||
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
|
||||
unsafe { pt.map(&from, &to, prop).unwrap() };
|
||||
for i in 0..100 {
|
||||
let offset = i * (PAGE_SIZE + 1000);
|
||||
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
|
||||
}
|
||||
let unmap = PAGE_SIZE * 123..PAGE_SIZE * 3434;
|
||||
let unmap = UNTRACKED_OFFSET + PAGE_SIZE * 123..UNTRACKED_OFFSET + PAGE_SIZE * 3434;
|
||||
unsafe { pt.unmap(&unmap).unwrap() };
|
||||
for i in 0..100 {
|
||||
let offset = i * (PAGE_SIZE + 10);
|
||||
@ -55,6 +66,9 @@ fn test_map_unmap() {
|
||||
assert_eq!(pt.query(from.start + offset).unwrap().0, to.start + offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Since untracked mappings cannot be dropped, we just leak it here.
|
||||
let _ = ManuallyDrop::new(pt);
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
@ -77,11 +91,30 @@ fn test_user_copy_on_write() {
|
||||
unsafe { pt.unmap(&from).unwrap() };
|
||||
assert!(pt.query(from.start + 10).is_none());
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
|
||||
let sibling_pt = pt.fork_copy_on_write();
|
||||
assert!(sibling_pt.query(from.start + 10).is_none());
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
drop(pt);
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
unsafe { child_pt.unmap(&from).unwrap() };
|
||||
assert!(child_pt.query(from.start + 10).is_none());
|
||||
unsafe {
|
||||
sibling_pt
|
||||
.cursor_mut(&from)
|
||||
.unwrap()
|
||||
.map(frame.clone(), prop)
|
||||
};
|
||||
assert_eq!(
|
||||
sibling_pt.query(from.start + 10).unwrap().0,
|
||||
start_paddr + 10
|
||||
);
|
||||
assert!(child_pt.query(from.start + 10).is_none());
|
||||
}
|
||||
|
||||
type Qr = PageTableQueryResult;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
struct BasePagingConsts {}
|
||||
|
||||
impl PagingConstsTrait for BasePagingConsts {
|
||||
@ -94,32 +127,38 @@ impl PagingConstsTrait for BasePagingConsts {
|
||||
|
||||
#[ktest]
|
||||
fn test_base_protect_query() {
|
||||
let pt = PageTable::<UserMode, PageTableEntry, BasePagingConsts>::empty();
|
||||
let pt = PageTable::<UserMode>::empty();
|
||||
|
||||
let from_ppn = 1..1000;
|
||||
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
|
||||
let to = PAGE_SIZE * 1000..PAGE_SIZE * 1999;
|
||||
let to = VmAllocOptions::new(999).alloc().unwrap();
|
||||
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
|
||||
unsafe { pt.map(&from, &to, prop).unwrap() };
|
||||
unsafe {
|
||||
let mut cursor = pt.cursor_mut(&from).unwrap();
|
||||
for frame in to {
|
||||
cursor.map(frame.clone(), prop);
|
||||
}
|
||||
}
|
||||
for (qr, i) in pt.cursor(&from).unwrap().zip(from_ppn) {
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::Mapped { va, frame, prop } = qr else {
|
||||
panic!("Expected Mapped, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::RW);
|
||||
assert_eq!(prop.cache, CachePolicy::Writeback);
|
||||
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
}
|
||||
let prot = PAGE_SIZE * 18..PAGE_SIZE * 20;
|
||||
unsafe { pt.protect(&prot, |p| p.flags -= PageFlags::W).unwrap() };
|
||||
for (qr, i) in pt.cursor(&prot).unwrap().zip(18..20) {
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::Mapped { va, frame, prop } = qr else {
|
||||
panic!("Expected Mapped, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::R);
|
||||
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
assert_eq!(va..va + frame.size(), i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
struct VeryHugePagingConsts {}
|
||||
|
||||
impl PagingConstsTrait for VeryHugePagingConsts {
|
||||
@ -131,8 +170,10 @@ impl PagingConstsTrait for VeryHugePagingConsts {
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
fn test_large_protect_query() {
|
||||
let pt = PageTable::<UserMode, PageTableEntry, VeryHugePagingConsts>::empty();
|
||||
fn test_untracked_large_protect_query() {
|
||||
let pt = PageTable::<KernelMode, PageTableEntry, VeryHugePagingConsts>::empty();
|
||||
const UNTRACKED_OFFSET: usize = crate::vm::kspace::LINEAR_MAPPING_BASE_VADDR;
|
||||
|
||||
let gmult = 512 * 512;
|
||||
let from_ppn = gmult - 512..gmult + gmult + 514;
|
||||
let to_ppn = gmult - 512 - 512..gmult + gmult - 512 + 514;
|
||||
@ -141,13 +182,14 @@ fn test_large_protect_query() {
|
||||
// from: |--2M--|-------------1G-------------|--2M--|-|
|
||||
// to: |--2M--|--2M--|-------------1G-------------|-|
|
||||
// Thus all mappings except the last few pages are mapped in 2M huge pages
|
||||
let from = PAGE_SIZE * from_ppn.start..PAGE_SIZE * from_ppn.end;
|
||||
let from =
|
||||
UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * from_ppn.end;
|
||||
let to = PAGE_SIZE * to_ppn.start..PAGE_SIZE * to_ppn.end;
|
||||
let prop = PageProperty::new(PageFlags::RW, CachePolicy::Writeback);
|
||||
unsafe { pt.map(&from, &to, prop).unwrap() };
|
||||
for (qr, i) in pt.cursor(&from).unwrap().zip(0..512 + 2 + 2) {
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntracked, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::RW);
|
||||
assert_eq!(prop.cache, CachePolicy::Writeback);
|
||||
@ -166,24 +208,26 @@ fn test_large_protect_query() {
|
||||
}
|
||||
}
|
||||
let ppn = from_ppn.start + 18..from_ppn.start + 20;
|
||||
let va = PAGE_SIZE * ppn.start..PAGE_SIZE * ppn.end;
|
||||
let va = UNTRACKED_OFFSET + PAGE_SIZE * ppn.start..UNTRACKED_OFFSET + PAGE_SIZE * ppn.end;
|
||||
unsafe { pt.protect(&va, |p| p.flags -= PageFlags::W).unwrap() };
|
||||
for (qr, i) in pt
|
||||
.cursor(&(va.start - PAGE_SIZE..va.start))
|
||||
.unwrap()
|
||||
.zip(ppn.start - 1..ppn.start)
|
||||
{
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntracked, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::RW);
|
||||
let va = va - UNTRACKED_OFFSET;
|
||||
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
}
|
||||
for (qr, i) in pt.cursor(&va).unwrap().zip(ppn.clone()) {
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntracked, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::R);
|
||||
let va = va - UNTRACKED_OFFSET;
|
||||
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
}
|
||||
for (qr, i) in pt
|
||||
@ -191,10 +235,14 @@ fn test_large_protect_query() {
|
||||
.unwrap()
|
||||
.zip(ppn.end..ppn.end + 1)
|
||||
{
|
||||
let Qr::MappedUntyped { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntyped, got {:#x?}", qr);
|
||||
let Qr::MappedUntracked { va, pa, len, prop } = qr else {
|
||||
panic!("Expected MappedUntracked, got {:#x?}", qr);
|
||||
};
|
||||
assert_eq!(prop.flags, PageFlags::RW);
|
||||
let va = va - UNTRACKED_OFFSET;
|
||||
assert_eq!(va..va + len, i * PAGE_SIZE..(i + 1) * PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Since untracked mappings cannot be dropped, we just leak it here.
|
||||
let _ = ManuallyDrop::new(pt);
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ impl VmSpace {
|
||||
|
||||
// If overwrite is forbidden, we should check if there are existing mappings
|
||||
if !options.can_overwrite {
|
||||
while let Some(qr) = cursor.query() {
|
||||
while let Some(qr) = cursor.next() {
|
||||
if matches!(qr, PtQr::Mapped { .. }) {
|
||||
return Err(Error::MapAlreadyMappedVaddr);
|
||||
}
|
||||
@ -350,7 +350,7 @@ impl Iterator for VmQueryIter<'_> {
|
||||
PtQr::NotMapped { va, len } => VmQueryResult::NotMapped { va, len },
|
||||
PtQr::Mapped { va, frame, prop } => VmQueryResult::Mapped { va, frame, prop },
|
||||
// It is not possible to map untyped memory in user space.
|
||||
PtQr::MappedUntyped { .. } => unreachable!(),
|
||||
PtQr::MappedUntracked { .. } => unreachable!(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user