From 69d464fc6b1411eb5364affefc39989eb8fb5f23 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Wed, 15 May 2024 05:41:30 +0000 Subject: [PATCH] Use metadata to track `VmFrame`s In this commit, the frame metadata storage schema is implemented. The bootstrap process is refactored and a boot page table is introduced to perform early stage metadata mapping. The metadata is then used to track `VmFrame`s instead of the former `Arc` approach. --- .../src/arch/x86/iommu/second_stage.rs | 9 +- framework/aster-frame/src/arch/x86/mm/mod.rs | 9 +- .../aster-frame/src/collections/xarray.rs | 11 +- framework/aster-frame/src/lib.rs | 12 +- .../allocator.rs} | 58 +- framework/aster-frame/src/vm/frame/meta.rs | 158 ++++++ framework/aster-frame/src/vm/frame/mod.rs | 527 ++++++++++++++++++ .../aster-frame/src/vm/heap_allocator.rs | 2 +- framework/aster-frame/src/vm/io.rs | 237 ++++++++ framework/aster-frame/src/vm/kspace.rs | 239 ++++++-- framework/aster-frame/src/vm/mod.rs | 44 +- framework/aster-frame/src/vm/options.rs | 45 +- framework/aster-frame/src/vm/page_prop.rs | 4 +- .../aster-frame/src/vm/page_table/boot_pt.rs | 123 ++++ .../aster-frame/src/vm/page_table/cursor.rs | 48 +- .../aster-frame/src/vm/page_table/frame.rs | 44 +- .../aster-frame/src/vm/page_table/mod.rs | 56 +- .../aster-frame/src/vm/page_table/test.rs | 10 +- kernel/aster-nix/src/vm/vmo/mod.rs | 2 +- 19 files changed, 1433 insertions(+), 205 deletions(-) rename framework/aster-frame/src/vm/{frame_allocator.rs => frame/allocator.rs} (62%) create mode 100644 framework/aster-frame/src/vm/frame/meta.rs create mode 100644 framework/aster-frame/src/vm/frame/mod.rs create mode 100644 framework/aster-frame/src/vm/page_table/boot_pt.rs diff --git a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs index 188897a23..acf910946 100644 --- a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs +++ b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs @@ -7,7 +7,7 @@ use pod::Pod; use crate::vm::{ page_prop::{CachePolicy, PageFlags, PrivilegedPageFlags as PrivFlags}, page_table::{PageTableEntryTrait, PageTableMode}, - Paddr, PageProperty, PagingConstsTrait, Vaddr, + Paddr, PageProperty, PagingConstsTrait, PagingLevel, Vaddr, }; /// The page table used by iommu maps the device address @@ -20,13 +20,14 @@ impl PageTableMode for DeviceMode { const VADDR_RANGE: Range = 0..0x1_0000_0000; } -#[derive(Debug)] +#[derive(Clone, Debug)] pub(super) struct PagingConsts {} impl PagingConstsTrait for PagingConsts { const BASE_PAGE_SIZE: usize = 4096; - const NR_LEVELS: usize = 3; - const HIGHEST_TRANSLATION_LEVEL: usize = 1; + const NR_LEVELS: PagingLevel = 3; + const ADDRESS_WIDTH: usize = 39; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 1; const PTE_SIZE: usize = core::mem::size_of::(); } diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index 34deb3c9d..6f68e060b 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -9,18 +9,19 @@ use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr}; use crate::vm::{ page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags as PrivFlags}, page_table::PageTableEntryTrait, - Paddr, PagingConstsTrait, Vaddr, PAGE_SIZE, + Paddr, PagingConstsTrait, PagingLevel, Vaddr, PAGE_SIZE, }; pub(crate) const NR_ENTRIES_PER_PAGE: usize = 512; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct PagingConsts {} impl PagingConstsTrait for PagingConsts { const BASE_PAGE_SIZE: usize = 4096; - const NR_LEVELS: usize = 4; - const HIGHEST_TRANSLATION_LEVEL: usize = 2; + const NR_LEVELS: PagingLevel = 4; + const ADDRESS_WIDTH: usize = 48; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 2; const PTE_SIZE: usize = core::mem::size_of::(); } diff --git a/framework/aster-frame/src/collections/xarray.rs b/framework/aster-frame/src/collections/xarray.rs index 07f11ef43..67464a59d 100644 --- a/framework/aster-frame/src/collections/xarray.rs +++ b/framework/aster-frame/src/collections/xarray.rs @@ -3,13 +3,12 @@ //! This module introduces the xarray crate and provides relevant support and interfaces for `XArray`. extern crate xarray as xarray_crate; -use alloc::sync::Arc; use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref}; use xarray_crate::ItemEntry; pub use xarray_crate::{Cursor, CursorMut, XArray, XMark}; -use crate::vm::VmFrame; +use crate::vm::{FrameMetaRef, VmFrame}; /// `VmFrameRef` is a struct that can work as `&'a VmFrame`. pub struct VmFrameRef<'a> { @@ -25,20 +24,20 @@ impl<'a> Deref for VmFrameRef<'a> { } } -// SAFETY: `VmFrame` is essentially an `Arc` smart pointer that points to a location which is aligned to 4, -// meeting the requirements of the `ItemEntry` for `XArray`. +// SAFETY: `VmFrame` is essentially an `*const FrameMeta` that could be used as a `*const` pointer. +// The pointer is also aligned to 4. unsafe impl ItemEntry for VmFrame { type Ref<'a> = VmFrameRef<'a> where Self: 'a; fn into_raw(self) -> *const () { - let ptr = Arc::as_ptr(&self.frame_index); + let ptr = self.meta.inner; let _ = ManuallyDrop::new(self); ptr.cast() } unsafe fn from_raw(raw: *const ()) -> Self { Self { - frame_index: Arc::from_raw(raw.cast()), + meta: FrameMetaRef { inner: raw.cast() }, } } diff --git a/framework/aster-frame/src/lib.rs b/framework/aster-frame/src/lib.rs index a05f23cac..573475ae6 100644 --- a/framework/aster-frame/src/lib.rs +++ b/framework/aster-frame/src/lib.rs @@ -68,17 +68,7 @@ pub fn init() { trap::init(); arch::after_all_init(); bus::init(); - // TODO: We activate the kernel page table here because the new kernel page table - // has mappings for MMIO which is required for the components initialization. We - // should refactor the initialization process to avoid this. - // SAFETY: we are activating the unique kernel page table. - unsafe { - vm::kspace::KERNEL_PAGE_TABLE - .get() - .unwrap() - .activate_unchecked(); - crate::arch::mm::tlb_flush_all_including_global(); - } + vm::kspace::activate_kernel_page_table(); invoke_ffi_init_funcs(); } diff --git a/framework/aster-frame/src/vm/frame_allocator.rs b/framework/aster-frame/src/vm/frame/allocator.rs similarity index 62% rename from framework/aster-frame/src/vm/frame_allocator.rs rename to framework/aster-frame/src/vm/frame/allocator.rs index 8fdf5cc14..7e73023b7 100644 --- a/framework/aster-frame/src/vm/frame_allocator.rs +++ b/framework/aster-frame/src/vm/frame/allocator.rs @@ -1,22 +1,23 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::vec::Vec; +use alloc::{alloc::Layout, vec::Vec}; use align_ext::AlignExt; use buddy_system_allocator::FrameAllocator; use log::info; use spin::Once; -use super::{frame::VmFrameFlags, VmFrame, VmFrameVec, VmSegment}; +use super::{VmFrame, VmFrameVec, VmSegment}; use crate::{ + arch::mm::PagingConsts, boot::memory_region::{MemoryRegion, MemoryRegionType}, sync::SpinLock, - vm::PAGE_SIZE, + vm::{nr_base_per_page, PagingLevel, PAGE_SIZE}, }; -pub(super) static FRAME_ALLOCATOR: Once> = Once::new(); +pub(in crate::vm) static FRAME_ALLOCATOR: Once> = Once::new(); -pub(crate) fn alloc(nframes: usize, flags: VmFrameFlags) -> Option { +pub(crate) fn alloc(nframes: usize) -> Option { FRAME_ALLOCATOR .get() .unwrap() @@ -24,27 +25,36 @@ pub(crate) fn alloc(nframes: usize, flags: VmFrameFlags) -> Option { .alloc(nframes) .map(|start| { let mut vector = Vec::new(); - // SAFETY: The frame index is valid. - unsafe { - for i in 0..nframes { - let frame = VmFrame::new( - (start + i) * PAGE_SIZE, - flags.union(VmFrameFlags::NEED_DEALLOC), - ); - vector.push(frame); - } + for i in 0..nframes { + let paddr = (start + i) * PAGE_SIZE; + // SAFETY: The frame index is valid. + let frame = unsafe { VmFrame::from_free_raw(paddr, 1) }; + vector.push(frame); } VmFrameVec(vector) }) } -pub(crate) fn alloc_single(flags: VmFrameFlags) -> Option { - FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).map(|idx| +pub(crate) fn alloc_single(level: PagingLevel) -> Option { + FRAME_ALLOCATOR + .get() + .unwrap() + .lock() + .alloc_aligned( + Layout::from_size_align( + nr_base_per_page::(level), + nr_base_per_page::(level), + ) + .unwrap(), + ) + .map(|idx| { + let paddr = idx * PAGE_SIZE; // SAFETY: The frame index is valid. - unsafe { VmFrame::new(idx * PAGE_SIZE, flags.union(VmFrameFlags::NEED_DEALLOC)) }) + unsafe { VmFrame::from_free_raw(paddr, level) } + }) } -pub(crate) fn alloc_contiguous(nframes: usize, flags: VmFrameFlags) -> Option { +pub(crate) fn alloc_contiguous(nframes: usize) -> Option { FRAME_ALLOCATOR .get() .unwrap() @@ -56,21 +66,10 @@ pub(crate) fn alloc_contiguous(nframes: usize, flags: VmFrameFlags) -> Option(paddr: Paddr, level: PagingLevel) -> Vaddr { + let kvspace = FRAME_METADATA_CAP_VADDR - FRAME_METADATA_BASE_VADDR; + let base = FRAME_METADATA_CAP_VADDR - (kvspace >> (level - 1)); + let offset = paddr / page_size::(level); + base + offset * size_of::() + } + + /// Convert a virtual address of the metadata container to the physical address of the page. + pub const fn meta_to_page(vaddr: Vaddr) -> Paddr { + let kvspace = FRAME_METADATA_CAP_VADDR - FRAME_METADATA_BASE_VADDR; + let level = level_of_meta(vaddr); + let base = FRAME_METADATA_CAP_VADDR - (kvspace >> (level - 1)); + let offset = (vaddr - base) / size_of::(); + offset * page_size::(level) + } + + /// Get the level of the page from the address of the metadata container. + pub const fn level_of_meta(vaddr: Vaddr) -> PagingLevel { + let kvspace = FRAME_METADATA_CAP_VADDR - FRAME_METADATA_BASE_VADDR; + (kvspace.ilog2() - (FRAME_METADATA_CAP_VADDR - (vaddr + 1)).ilog2()) as PagingLevel + } + + #[cfg(ktest)] + #[ktest] + fn test_meta_mapping() { + use crate::arch::mm::PagingConsts; + for level in 1..=3 { + let meta = page_to_meta::(0, level); + assert_eq!(meta_to_page::(meta), 0); + assert_eq!(level_of_meta(meta), level); + let paddr = 123456 * page_size::(level); + let meta = page_to_meta::(paddr, level); + assert_eq!(meta_to_page::(meta), paddr); + assert_eq!(level_of_meta(meta), level); + } + } +} + +/// A reference to the metadata of a physical frame. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct FrameMetaRef { + // FIXME: this shouldn't be public but XArray needs it. + pub(crate) inner: *const FrameMeta, +} + +impl FrameMetaRef { + /// Create a new reference to the metadata of a raw frame. + /// + /// # Safety + /// + /// The caller must ensure that + /// - the metadata is initialized before any access; + /// - the super page, if would be used, must be splitted. + pub unsafe fn from_raw(paddr: Paddr, level: PagingLevel) -> Self { + debug_assert_eq!(paddr % page_size::(level), 0); + let vaddr = mapping::page_to_meta::(paddr, level); + Self { + inner: vaddr as *const FrameMeta, + } + } + + /// # Safety + /// + /// The caller must ensure that the reference is the exclusive. + pub unsafe fn deref_mut(&mut self) -> &mut FrameMeta { + &mut *(self.inner as *mut FrameMeta) + } + + /// Get the physical address of the frame. + pub fn paddr(&self) -> Paddr { + mapping::meta_to_page::(self.inner as usize) + } + + /// Get the level of the page. + pub fn level(&self) -> PagingLevel { + mapping::level_of_meta(self.inner as usize) + } + + /// Get the size of the frame. + pub fn size(&self) -> usize { + page_size::(self.level()) + } +} + +impl Deref for FrameMetaRef { + type Target = FrameMeta; + + fn deref(&self) -> &Self::Target { + // SAFETY: The metadata container is ensured initialized before any access. + unsafe { &*self.inner } + } +} + +/// The metadata of a physical frame. +/// +/// If a physical frame exists, the unique metadata must be initialized somewhere +/// just for it. The place decided by the schema defined in [`mapping`]; +/// +/// The zero value of the metadata must be valid and it's used as the initial state +/// of a frame. +#[repr(C)] +pub struct FrameMeta { + pub frame_type: FrameType, // 1 byte + /// The first 8-bit counter. + /// Currently unused. + pub counter8_1: AtomicU8, // 1 byte + /// The second 8-bit counter. + /// Currently unused. + pub counter8_2: AtomicU8, // 1 byte + /// The third 8-bit counter. + /// Currently unused. + pub counter8_3: AtomicU8, // 1 byte + /// The first 32-bit counter. + /// It is used in different type of frame with different semantics. + /// - For [`FrameType::Anonymous`], it is the handle count. + pub counter32_1: AtomicU32, // 4 bytes +} + +const_assert_eq!(core::mem::size_of::(), 8); + +#[repr(u8)] +pub enum FrameType { + Free = 0, + /// The frame allocated to store metadata. + Meta, + Anonymous, + PageTable, +} diff --git a/framework/aster-frame/src/vm/frame/mod.rs b/framework/aster-frame/src/vm/frame/mod.rs new file mode 100644 index 000000000..390e289a2 --- /dev/null +++ b/framework/aster-frame/src/vm/frame/mod.rs @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: MPL-2.0 + +pub(crate) mod allocator; +pub(in crate::vm) mod meta; + +use alloc::vec; +use core::{ + mem::ManuallyDrop, + ops::Range, + sync::atomic::{self, Ordering}, +}; + +use meta::{FrameMetaRef, FrameType}; + +use crate::{ + prelude::*, + vm::{HasPaddr, PagingLevel, VmIo, VmReader, VmWriter, PAGE_SIZE}, + Error, +}; + +/// A collection of base page frames (regular physical memory pages). +/// +/// For the most parts, `VmFrameVec` is like `Vec`. But the +/// implementation may or may not be based on `Vec`. Having a dedicated +/// type to represent a series of page frames is convenient because, +/// more often than not, one needs to operate on a batch of frames rather +/// a single frame. +#[derive(Debug, Clone)] +pub struct VmFrameVec(pub(crate) Vec); + +impl VmFrameVec { + pub fn get(&self, index: usize) -> Option<&VmFrame> { + self.0.get(index) + } + + /// returns an empty VmFrame vec + pub fn empty() -> Self { + Self(Vec::new()) + } + + pub fn new_with_capacity(capacity: usize) -> Self { + Self(Vec::with_capacity(capacity)) + } + + /// Pushs a new frame to the collection. + pub fn push(&mut self, new_frame: VmFrame) { + self.0.push(new_frame); + } + + /// Pop a frame from the collection. + pub fn pop(&mut self) -> Option { + self.0.pop() + } + + /// Removes a frame at a position. + pub fn remove(&mut self, at: usize) -> VmFrame { + self.0.remove(at) + } + + /// Append some frames. + pub fn append(&mut self, more: &mut VmFrameVec) -> Result<()> { + self.0.append(&mut more.0); + Ok(()) + } + + /// Truncate some frames. + /// + /// If `new_len >= self.len()`, then this method has no effect. + pub fn truncate(&mut self, new_len: usize) { + if new_len >= self.0.len() { + return; + } + self.0.truncate(new_len) + } + + /// Returns an iterator + pub fn iter(&self) -> core::slice::Iter<'_, VmFrame> { + self.0.iter() + } + + /// Returns the number of frames. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns whether the frame collection is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the number of bytes. + /// + /// This method is equivalent to `self.len() * BASE_PAGE_SIZE`. + pub fn nbytes(&self) -> usize { + self.0.len() * PAGE_SIZE + } + + pub fn from_one_frame(frame: VmFrame) -> Self { + Self(vec![frame]) + } +} + +impl IntoIterator for VmFrameVec { + type Item = VmFrame; + + type IntoIter = alloc::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl VmIo for VmFrameVec { + fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.nbytes() { + return Err(Error::InvalidArgs); + } + + let num_unread_pages = offset / PAGE_SIZE; + let mut start = offset % PAGE_SIZE; + let mut buf_writer: VmWriter = buf.into(); + for frame in self.0.iter().skip(num_unread_pages) { + let read_len = frame.reader().skip(start).read(&mut buf_writer); + if read_len == 0 { + break; + } + start = 0; + } + Ok(()) + } + + fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.nbytes() { + return Err(Error::InvalidArgs); + } + + let num_unwrite_pages = offset / PAGE_SIZE; + let mut start = offset % PAGE_SIZE; + let mut buf_reader: VmReader = buf.into(); + for frame in self.0.iter().skip(num_unwrite_pages) { + let write_len = frame.writer().skip(start).write(&mut buf_reader); + if write_len == 0 { + break; + } + start = 0; + } + Ok(()) + } +} + +/// An iterator for frames. +pub struct FrameVecIter<'a> { + frames: &'a VmFrameVec, + current: usize, +} + +impl<'a> FrameVecIter<'a> { + pub fn new(frames: &'a VmFrameVec) -> Self { + Self { frames, current: 0 } + } +} + +impl<'a> Iterator for FrameVecIter<'a> { + type Item = &'a VmFrame; + + fn next(&mut self) -> Option { + if self.current >= self.frames.0.len() { + return None; + } + Some(self.frames.0.get(self.current).unwrap()) + } +} + +#[derive(Debug)] +/// A handle to a page frame. +/// +/// The referenced page frame could either be huge or regular, which can be +/// told by the [`VmFrame::size`] method. It is ensured that there would be +/// only one TLB entry for such a frame if it is mapped to a virtual address +/// and the architecture supports huge TLB entries. +/// +/// An instance of `VmFrame` is a handle to a page frame (a physical memory +/// page). A cloned `VmFrame` refers to the same page frame as the original. +/// As the original and cloned instances point to the same physical address, +/// they are treated as equal to each other. Behind the scene, a reference +/// counter is maintained for each page frame so that when all instances of +/// `VmFrame` that refer to the same page frame are dropped, the page frame +/// will be globally freed. +pub struct VmFrame { + pub(crate) meta: FrameMetaRef, +} + +unsafe impl Send for VmFrame {} +unsafe impl Sync for VmFrame {} + +impl Clone for VmFrame { + fn clone(&self) -> Self { + self.meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Self { meta: self.meta } + } +} + +impl HasPaddr for VmFrame { + fn paddr(&self) -> Paddr { + self.start_paddr() + } +} + +impl VmFrame { + /// Creates a new `VmFrame` from the given physical address and level. + /// + /// # Panic + /// + /// The function panics if the given frame is not free or is managed + /// by a non-free super page. + /// + /// # Safety + /// + /// The caller must ensure that the given physical address is valid, and + /// the page is free thus not accessed by any other objects or handles. + pub(crate) unsafe fn from_free_raw(paddr: Paddr, level: PagingLevel) -> Self { + let mut meta = FrameMetaRef::from_raw(paddr, level); + assert!(matches!(meta.frame_type, FrameType::Free)); + meta.deref_mut().frame_type = FrameType::Anonymous; + meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Self { meta } + } + + /// Returns the physical address of the page frame. + pub fn start_paddr(&self) -> Paddr { + self.meta.paddr() + } + + pub fn size(&self) -> usize { + self.meta.size() + } + + pub fn end_paddr(&self) -> Paddr { + self.start_paddr() + self.size() + } + + pub fn as_ptr(&self) -> *const u8 { + super::paddr_to_vaddr(self.start_paddr()) as *const u8 + } + + pub fn as_mut_ptr(&self) -> *mut u8 { + super::paddr_to_vaddr(self.start_paddr()) as *mut u8 + } + + pub fn copy_from(&self, src: &VmFrame) { + if self.meta == src.meta { + return; + } + if self.size() != src.size() { + panic!("The size of the source frame is different from the destination frame"); + } + // Safety: the source and the destination does not overlap. + unsafe { + core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.size()); + } + } +} + +impl<'a> VmFrame { + /// Returns a reader to read data from it. + pub fn reader(&'a self) -> VmReader<'a> { + // Safety: the memory of the page is contiguous and is valid during `'a`. + unsafe { VmReader::from_raw_parts(self.as_ptr(), self.size()) } + } + + /// Returns a writer to write data into it. + pub fn writer(&'a self) -> VmWriter<'a> { + // Safety: the memory of the page is contiguous and is valid during `'a`. + unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.size()) } + } +} + +impl VmIo for VmFrame { + fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.size() { + return Err(Error::InvalidArgs); + } + let len = self.reader().skip(offset).read(&mut buf.into()); + debug_assert!(len == buf.len()); + Ok(()) + } + + fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.size() { + return Err(Error::InvalidArgs); + } + let len = self.writer().skip(offset).write(&mut buf.into()); + debug_assert!(len == buf.len()); + Ok(()) + } +} + +impl Drop for VmFrame { + fn drop(&mut self) { + if self.meta.counter32_1.fetch_sub(1, Ordering::Release) == 1 { + // A fence is needed here with the same reasons stated in the implementation of + // `Arc::drop`: . + atomic::fence(Ordering::Acquire); + // Safety: the reference counter is 1 before decremented, so this is the only + // (exclusive) handle. + unsafe { self.meta.deref_mut().frame_type = FrameType::Free }; + // Safety: the page frame is valid. + unsafe { + allocator::dealloc_contiguous(self.paddr() / PAGE_SIZE, self.size() / PAGE_SIZE); + } + } + } +} + +/// A handle to a contiguous range of page frames (physical memory pages). +/// +/// The biggest difference between `VmSegment` and `VmFrameVec` is that +/// the page frames must be contiguous for `VmSegment`. +/// +/// A cloned `VmSegment` refers to the same page frames as the original. +/// As the original and cloned instances point to the same physical address, +/// they are treated as equal to each other. +/// +/// #Example +/// +/// ```rust +/// let vm_segment = VmAllocOptions::new(2) +/// .is_contiguous(true) +/// .alloc_contiguous()?; +/// vm_segment.write_bytes(0, buf)?; +/// ``` +#[derive(Debug, Clone)] +pub struct VmSegment { + inner: VmSegmentInner, + range: Range, +} + +unsafe impl Send for VmSegment {} +unsafe impl Sync for VmSegment {} + +#[derive(Debug)] +struct VmSegmentInner { + meta: FrameMetaRef, + nframes: usize, +} + +impl Clone for VmSegmentInner { + fn clone(&self) -> Self { + self.meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Self { + meta: self.meta, + nframes: self.nframes, + } + } +} + +impl VmSegmentInner { + /// Creates the inner part of 'VmSegment'. + /// + /// # Safety + /// + /// The constructor of 'VmSegment' ensures the safety. + unsafe fn new(paddr: Paddr, nframes: usize) -> Self { + assert_eq!(paddr % PAGE_SIZE, 0); + let mut meta = FrameMetaRef::from_raw(paddr, 1); + assert!(matches!(meta.frame_type, FrameType::Free)); + meta.deref_mut().frame_type = FrameType::Anonymous; + meta.counter32_1.fetch_add(1, Ordering::Relaxed); + Self { meta, nframes } + } + + fn start_frame_index(&self) -> usize { + self.start_paddr() / PAGE_SIZE + } + + fn start_paddr(&self) -> Paddr { + self.meta.paddr() + } +} + +impl HasPaddr for VmSegment { + fn paddr(&self) -> Paddr { + self.start_paddr() + } +} + +impl VmSegment { + /// Creates a new `VmSegment`. + /// + /// # Safety + /// + /// The given range of page frames must be contiguous and valid for use. + /// The given range of page frames must not have been allocated before, + /// as part of either a `VmFrame` or `VmSegment`. + pub(crate) unsafe fn new(paddr: Paddr, nframes: usize) -> Self { + Self { + inner: VmSegmentInner::new(paddr, nframes), + range: 0..nframes, + } + } + + /// Returns a part of the `VmSegment`. + /// + /// # Panic + /// + /// If `range` is not within the range of this `VmSegment`, + /// then the method panics. + pub fn range(&self, range: Range) -> Self { + let orig_range = &self.range; + let adj_range = (range.start + orig_range.start)..(range.end + orig_range.start); + assert!(!adj_range.is_empty() && adj_range.end <= orig_range.end); + + Self { + inner: self.inner.clone(), + range: adj_range, + } + } + + /// Returns the start physical address. + pub fn start_paddr(&self) -> Paddr { + self.start_frame_index() * PAGE_SIZE + } + + /// Returns the end physical address. + pub fn end_paddr(&self) -> Paddr { + (self.start_frame_index() + self.nframes()) * PAGE_SIZE + } + + /// Returns the number of page frames. + pub fn nframes(&self) -> usize { + self.range.len() + } + + /// Returns the number of bytes. + pub fn nbytes(&self) -> usize { + self.nframes() * PAGE_SIZE + } + + fn start_frame_index(&self) -> usize { + self.inner.start_frame_index() + self.range.start + } + + pub fn as_ptr(&self) -> *const u8 { + super::paddr_to_vaddr(self.start_paddr()) as *const u8 + } + + pub fn as_mut_ptr(&self) -> *mut u8 { + super::paddr_to_vaddr(self.start_paddr()) as *mut u8 + } +} + +impl<'a> VmSegment { + /// Returns a reader to read data from it. + pub fn reader(&'a self) -> VmReader<'a> { + // Safety: the memory of the page frames is contiguous and is valid during `'a`. + unsafe { VmReader::from_raw_parts(self.as_ptr(), self.nbytes()) } + } + + /// Returns a writer to write data into it. + pub fn writer(&'a self) -> VmWriter<'a> { + // Safety: the memory of the page frames is contiguous and is valid during `'a`. + unsafe { VmWriter::from_raw_parts_mut(self.as_mut_ptr(), self.nbytes()) } + } +} + +impl VmIo for VmSegment { + fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.nbytes() { + return Err(Error::InvalidArgs); + } + let len = self.reader().skip(offset).read(&mut buf.into()); + debug_assert!(len == buf.len()); + Ok(()) + } + + fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> { + // Do bound check with potential integer overflow in mind + let max_offset = offset.checked_add(buf.len()).ok_or(Error::Overflow)?; + if max_offset > self.nbytes() { + return Err(Error::InvalidArgs); + } + let len = self.writer().skip(offset).write(&mut buf.into()); + debug_assert!(len == buf.len()); + Ok(()) + } +} + +impl Drop for VmSegment { + fn drop(&mut self) { + if self.inner.meta.counter32_1.fetch_sub(1, Ordering::Release) == 1 { + // A fence is needed here with the same reasons stated in the implementation of + // `Arc::drop`: . + atomic::fence(Ordering::Acquire); + // Safety: the reference counter is 1 before decremented, so this is the only + // (exclusive) handle. + unsafe { self.inner.meta.deref_mut().frame_type = FrameType::Free }; + // Safety: the range of contiguous page frames is valid. + unsafe { + allocator::dealloc_contiguous(self.inner.start_frame_index(), self.inner.nframes); + } + } + } +} + +impl From for VmSegment { + fn from(frame: VmFrame) -> Self { + let segment = Self { + inner: VmSegmentInner { + meta: frame.meta, + nframes: 1, + }, + range: 0..1, + }; + let _ = ManuallyDrop::new(frame); + segment + } +} diff --git a/framework/aster-frame/src/vm/heap_allocator.rs b/framework/aster-frame/src/vm/heap_allocator.rs index 33b0b25fa..88d96270d 100644 --- a/framework/aster-frame/src/vm/heap_allocator.rs +++ b/framework/aster-frame/src/vm/heap_allocator.rs @@ -14,7 +14,7 @@ use crate::{ prelude::*, sync::SpinLock, trap::disable_local, - vm::{frame_allocator::FRAME_ALLOCATOR, PAGE_SIZE}, + vm::{frame::allocator::FRAME_ALLOCATOR, PAGE_SIZE}, Error, }; diff --git a/framework/aster-frame/src/vm/io.rs b/framework/aster-frame/src/vm/io.rs index f687b54e3..3aae6deb7 100644 --- a/framework/aster-frame/src/vm/io.rs +++ b/framework/aster-frame/src/vm/io.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 +use core::marker::PhantomData; + use align_ext::AlignExt; use inherit_methods_macro::inherit_methods; use pod::Pod; @@ -147,3 +149,238 @@ impl_vmio_pointer!(&T, "(**self)"); impl_vmio_pointer!(&mut T, "(**self)"); impl_vmio_pointer!(Box, "(**self)"); impl_vmio_pointer!(Arc, "(**self)"); + +/// VmReader is a reader for reading data from a contiguous range of memory. +pub struct VmReader<'a> { + cursor: *const u8, + end: *const u8, + phantom: PhantomData<&'a [u8]>, +} + +impl<'a> VmReader<'a> { + /// Constructs a VmReader from a pointer and a length. + /// + /// # Safety + /// + /// User must ensure the memory from `ptr` to `ptr.add(len)` is contiguous. + /// User must ensure the memory is valid during the entire period of `'a`. + pub const unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self { + Self { + cursor: ptr, + end: ptr.add(len), + phantom: PhantomData, + } + } + + /// Returns the number of bytes for the remaining data. + pub const fn remain(&self) -> usize { + // Safety: the end is equal to or greater than the cursor. + unsafe { self.end.sub_ptr(self.cursor) } + } + + /// Returns the cursor pointer, which refers to the address of the next byte to read. + pub const fn cursor(&self) -> *const u8 { + self.cursor + } + + /// Returns if it has remaining data to read. + pub const fn has_remain(&self) -> bool { + self.remain() > 0 + } + + /// Limits the length of remaining data. + /// + /// This method ensures the postcondition of `self.remain() <= max_remain`. + pub const fn limit(mut self, max_remain: usize) -> Self { + if max_remain < self.remain() { + // Safety: the new end is less than the old end. + unsafe { self.end = self.cursor.add(max_remain) }; + } + self + } + + /// Skips the first `nbytes` bytes of data. + /// The length of remaining data is decreased accordingly. + /// + /// # Panic + /// + /// If `nbytes` is greater than `self.remain()`, then the method panics. + pub fn skip(mut self, nbytes: usize) -> Self { + assert!(nbytes <= self.remain()); + + // Safety: the new cursor is less than or equal to the end. + unsafe { self.cursor = self.cursor.add(nbytes) }; + self + } + + /// Reads all data into the writer until one of the two conditions is met: + /// 1. The reader has no remaining data. + /// 2. The writer has no available space. + /// + /// Returns the number of bytes read. + /// + /// It pulls the number of bytes data from the reader and + /// fills in the writer with the number of bytes. + pub fn read(&mut self, writer: &mut VmWriter<'_>) -> usize { + let copy_len = self.remain().min(writer.avail()); + if copy_len == 0 { + return 0; + } + + // Safety: the memory range is valid since `copy_len` is the minimum + // of the reader's remaining data and the writer's available space. + unsafe { + core::ptr::copy(self.cursor, writer.cursor, copy_len); + self.cursor = self.cursor.add(copy_len); + writer.cursor = writer.cursor.add(copy_len); + } + copy_len + } + + /// Read a value of `Pod` type. + /// + /// # Panic + /// + /// If the length of the `Pod` type exceeds `self.remain()`, then this method will panic. + pub fn read_val(&mut self) -> T { + assert!(self.remain() >= core::mem::size_of::()); + + let mut val = T::new_uninit(); + let mut writer = VmWriter::from(val.as_bytes_mut()); + let read_len = self.read(&mut writer); + + val + } +} + +impl<'a> From<&'a [u8]> for VmReader<'a> { + fn from(slice: &'a [u8]) -> Self { + // Safety: the range of memory is contiguous and is valid during `'a`. + unsafe { Self::from_raw_parts(slice.as_ptr(), slice.len()) } + } +} + +/// VmWriter is a writer for writing data to a contiguous range of memory. +pub struct VmWriter<'a> { + cursor: *mut u8, + end: *mut u8, + phantom: PhantomData<&'a mut [u8]>, +} + +impl<'a> VmWriter<'a> { + /// Constructs a VmWriter from a pointer and a length. + /// + /// # Safety + /// + /// User must ensure the memory from `ptr` to `ptr.add(len)` is contiguous. + /// User must ensure the memory is valid during the entire period of `'a`. + pub const unsafe fn from_raw_parts_mut(ptr: *mut u8, len: usize) -> Self { + Self { + cursor: ptr, + end: ptr.add(len), + phantom: PhantomData, + } + } + + /// Returns the number of bytes for the available space. + pub const fn avail(&self) -> usize { + // Safety: the end is equal to or greater than the cursor. + unsafe { self.end.sub_ptr(self.cursor) } + } + + /// Returns the cursor pointer, which refers to the address of the next byte to write. + pub const fn cursor(&self) -> *mut u8 { + self.cursor + } + + /// Returns if it has avaliable space to write. + pub const fn has_avail(&self) -> bool { + self.avail() > 0 + } + + /// Limits the length of available space. + /// + /// This method ensures the postcondition of `self.avail() <= max_avail`. + pub const fn limit(mut self, max_avail: usize) -> Self { + if max_avail < self.avail() { + // Safety: the new end is less than the old end. + unsafe { self.end = self.cursor.add(max_avail) }; + } + self + } + + /// Skips the first `nbytes` bytes of data. + /// The length of available space is decreased accordingly. + /// + /// # Panic + /// + /// If `nbytes` is greater than `self.avail()`, then the method panics. + pub fn skip(mut self, nbytes: usize) -> Self { + assert!(nbytes <= self.avail()); + + // Safety: the new cursor is less than or equal to the end. + unsafe { self.cursor = self.cursor.add(nbytes) }; + self + } + + /// Writes data from the reader until one of the two conditions is met: + /// 1. The writer has no available space. + /// 2. The reader has no remaining data. + /// + /// Returns the number of bytes written. + /// + /// It pulls the number of bytes data from the reader and + /// fills in the writer with the number of bytes. + pub fn write(&mut self, reader: &mut VmReader<'_>) -> usize { + let copy_len = self.avail().min(reader.remain()); + if copy_len == 0 { + return 0; + } + + // Safety: the memory range is valid since `copy_len` is the minimum + // of the reader's remaining data and the writer's available space. + unsafe { + core::ptr::copy(reader.cursor, self.cursor, copy_len); + self.cursor = self.cursor.add(copy_len); + reader.cursor = reader.cursor.add(copy_len); + } + copy_len + } + + /// Fills the available space by repeating `value`. + /// + /// Returns the number of values written. + /// + /// # Panic + /// + /// The size of the available space must be a multiple of the size of `value`. + /// Otherwise, the method would panic. + pub fn fill(&mut self, value: T) -> usize { + let avail = self.avail(); + + assert!((self.cursor as *mut T).is_aligned()); + assert!(avail % core::mem::size_of::() == 0); + + let written_num = avail / core::mem::size_of::(); + + for i in 0..written_num { + // Safety: `written_num` is calculated by the avail size and the size of the type `T`, + // hence the `add` operation and `write` operation are valid and will only manipulate + // the memory managed by this writer. + unsafe { + (self.cursor as *mut T).add(i).write(value); + } + } + + // The available space has been filled so this cursor can be moved to the end. + self.cursor = self.end; + written_num + } +} + +impl<'a> From<&'a mut [u8]> for VmWriter<'a> { + fn from(slice: &'a mut [u8]) -> Self { + // Safety: the range of memory is contiguous and is valid during `'a`. + unsafe { Self::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) } + } +} diff --git a/framework/aster-frame/src/vm/kspace.rs b/framework/aster-frame/src/vm/kspace.rs index 7a2c6dddb..ac6c1e320 100644 --- a/framework/aster-frame/src/vm/kspace.rs +++ b/framework/aster-frame/src/vm/kspace.rs @@ -1,62 +1,103 @@ // SPDX-License-Identifier: MPL-2.0 //! Kernel memory space management. +//! +//! The kernel memory space is currently managed as follows, if the +//! address width is 48 bits (with 47 bits kernel space). +//! +//! ```text +//! +-+ <- the highest used address (0xffff_ffff_ffff_0000) +//! | | For the kernel code, 1 GiB. +//! +-+ <- 0xffff_ffff_8000_0000 +//! | | +//! | | Unused hole. +//! +-+ <- 0xffff_e200_0000_0000 +//! | | For frame metadata, 2 TiB. Mapped frames are tracked with handles. +//! +-+ <- 0xffff_e000_0000_0000 +//! | | +//! | | For vm alloc/io mappings, 32 TiB. +//! | | Mapped frames are tracked with handles. +//! | | +//! +-+ <- the middle of the higher half (0xffff_c000_0000_0000) +//! | | +//! | | +//! | | +//! | | For linear mappings, 64 TiB. +//! | | Mapped physical addresses are untracked. +//! | | +//! | | +//! | | +//! +-+ <- the base of high canonical address (0xffff_8000_0000_0000) +//! ``` +//! +//! If the address width is (according to [`crate::arch::mm::PagingConsts`]) +//! 39 bits or 57 bits, the memory space just adjust porportionally. -use core::ops::Range; +use alloc::vec::Vec; +use core::{mem::size_of, ops::Range}; use align_ext::AlignExt; use spin::Once; -use static_assertions::const_assert; use super::{ - page_table::{nr_ptes_per_node, KernelMode, PageTable}, - CachePolicy, MemoryRegionType, Paddr, PageFlags, PageProperty, PrivilegedPageFlags, Vaddr, - PAGE_SIZE, + frame::{ + allocator::FRAME_ALLOCATOR, + meta, + meta::{FrameMeta, FrameType}, + }, + nr_subpage_per_huge, + page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags}, + page_size, + page_table::{boot_pt::BootPageTable, KernelMode, PageTable}, + MemoryRegionType, Paddr, PagingConstsTrait, Vaddr, VmFrame, PAGE_SIZE, +}; +use crate::{ + arch::mm::{PageTableEntry, PagingConsts}, + sync::SpinLock, }; -use crate::arch::mm::{PageTableEntry, PagingConsts}; -/// The base address of the linear mapping of all physical -/// memory in the kernel address space. -pub(crate) const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000; +/// The shortest supported address width is 39 bits. And the literal +/// values are written for 48 bits address width. Adjust the values +/// by arithmetic left shift. +const ADDR_WIDTH_SHIFT: isize = PagingConsts::ADDRESS_WIDTH as isize - 48; -/// The maximum size of the direct mapping of physical memory. -/// -/// This size acts as a cap. If the actual memory size exceeds this value, -/// the remaining memory cannot be included in the direct mapping because -/// the maximum size of the direct mapping is limited by this value. On -/// the other hand, if the actual memory size is smaller, the direct -/// mapping can shrink to save memory consumption due to the page table. -/// -/// We do not currently have APIs to manually map MMIO pages, so we have -/// to rely on the direct mapping to perform MMIO operations. Therefore, -/// we set the maximum size to 127 TiB, which makes some surprisingly -/// high MMIO addresses usable (e.g., `0x7000_0000_7004` for VirtIO -/// devices in the TDX environment) and leaves the last 1 TiB for other -/// uses (e.g., the kernel code starting at [`kernel_loaded_offset()`]). -pub(crate) const LINEAR_MAPPING_MAX_SIZE: usize = 127 << 40; - -/// The address range of the direct mapping of physical memory. -/// -/// This range is constructed based on [`PHYS_MEM_BASE_VADDR`] and -/// [`PHYS_MEM_MAPPING_MAX_SIZE`]. -pub(crate) const LINEAR_MAPPING_VADDR_RANGE: Range = - LINEAR_MAPPING_BASE_VADDR..(LINEAR_MAPPING_BASE_VADDR + LINEAR_MAPPING_MAX_SIZE); +/// Start of the kernel address space. +/// This is the _lowest_ address of the x86-64's _high_ canonical addresses. +pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT; +/// End of the kernel address space (non inclusive). +pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000 << ADDR_WIDTH_SHIFT; /// The kernel code is linear mapped to this address. /// /// FIXME: This offset should be randomly chosen by the loader or the /// boot compatibility layer. But we disabled it because the framework /// doesn't support relocatable kernel yet. -pub const fn kernel_loaded_offset() -> usize { - 0xffff_ffff_8000_0000 +pub fn kernel_loaded_offset() -> usize { + KERNEL_CODE_BASE_VADDR } -const_assert!(LINEAR_MAPPING_VADDR_RANGE.end < kernel_loaded_offset()); + +const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000 << ADDR_WIDTH_SHIFT; + +pub(in crate::vm) const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_e200_0000_0000 << ADDR_WIDTH_SHIFT; +pub(in crate::vm) const FRAME_METADATA_BASE_VADDR: Vaddr = + 0xffff_e000_0000_0000 << ADDR_WIDTH_SHIFT; + +const VMALLOC_BASE_VADDR: Vaddr = 0xffff_c000_0000_0000 << ADDR_WIDTH_SHIFT; + +/// The base address of the linear mapping of all physical +/// memory in the kernel address space. +pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT; +pub const LINEAR_MAPPING_VADDR_RANGE: Range = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR; /// Convert physical address to virtual address using offset, only available inside aster-frame -pub(crate) fn paddr_to_vaddr(pa: Paddr) -> usize { +pub fn paddr_to_vaddr(pa: Paddr) -> usize { + debug_assert!(pa < VMALLOC_BASE_VADDR - LINEAR_MAPPING_BASE_VADDR); pa + LINEAR_MAPPING_BASE_VADDR } +/// This is for destructing the boot page table. +static BOOT_PAGE_TABLE: SpinLock>> = + SpinLock::new(None); pub static KERNEL_PAGE_TABLE: Once> = Once::new(); @@ -69,23 +110,35 @@ pub static KERNEL_PAGE_TABLE: Once::empty(); - kpt.make_shared_tables( - nr_ptes_per_node::() / 2..nr_ptes_per_node::(), - ); let regions = crate::boot::memory_regions(); + let phys_mem_cap = { + let mut end = 0; + for r in regions { + end = end.max(r.base() + r.len()); + } + end.align_up(PAGE_SIZE) + }; + + // The kernel page table should be built afther the metadata pages are initialized. + let (boot_pt, meta_frames) = init_boot_page_table_and_page_meta(phys_mem_cap); + // Move it to the global static to prolong it's life. + // There's identical mapping in it so we can't drop it and activate the kernel page table + // immediately in this function. + *BOOT_PAGE_TABLE.lock() = Some(boot_pt); + + // Starting to initialize the kernel page table. + let kpt = PageTable::::empty(); + + // Make shared the page tables mapped by the root table in the kernel space. + { + let pte_index_max = nr_subpage_per_huge::(); + kpt.make_shared_tables(pte_index_max / 2..pte_index_max); + } // Do linear mappings for the kernel. { - let linear_mapping_size = { - let mut end = 0; - for r in regions { - end = end.max(r.base() + r.len()); - } - end.align_up(PAGE_SIZE) - }; - let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + linear_mapping_size; - let to = 0..linear_mapping_size; + let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + phys_mem_cap; + let to = 0..phys_mem_cap; let prop = PageProperty { flags: PageFlags::RW, cache: CachePolicy::Writeback, @@ -97,6 +150,24 @@ pub fn init_kernel_page_table() { } } + // Map the metadata pages. + { + let start_va = meta::mapping::page_to_meta::(0, 1); + let from = start_va..start_va + meta_frames.len() * PAGE_SIZE; + let prop = PageProperty { + flags: PageFlags::RW, + cache: CachePolicy::Writeback, + priv_flags: PrivilegedPageFlags::GLOBAL, + }; + let mut cursor = kpt.cursor_mut(&from).unwrap(); + for frame in meta_frames { + // Safety: we are doing the metadata mappings for the kernel. + unsafe { + cursor.map(frame, prop); + } + } + } + // Map for the I/O area. // TODO: we need to have an allocator to allocate kernel space for // the I/O areas, rather than doing it using the linear mappings. @@ -138,3 +209,75 @@ pub fn init_kernel_page_table() { KERNEL_PAGE_TABLE.call_once(|| kpt); } + +pub fn activate_kernel_page_table() { + // Safety: the kernel page table is initialized properly. + unsafe { + KERNEL_PAGE_TABLE.get().unwrap().activate_unchecked(); + crate::arch::mm::tlb_flush_all_including_global(); + } + // Drop the boot page table. + *BOOT_PAGE_TABLE.lock() = None; +} + +/// Initialize the boot page table and the page metadata for all physical memories. +/// The boot page table created should be dropped after the kernel page table is initialized. +/// +/// It returns the metadata frames for each level of the page table. +fn init_boot_page_table_and_page_meta( + phys_mem_cap: usize, +) -> (BootPageTable, Vec) { + let mut boot_pt = { + let cur_pt_paddr = crate::arch::mm::current_page_table_paddr(); + BootPageTable::from_anonymous_boot_pt(cur_pt_paddr) + }; + + let num_pages = phys_mem_cap / page_size::(1); + let num_meta_pages = (num_pages * size_of::()).div_ceil(PAGE_SIZE); + let meta_frames = alloc_meta_frames(num_meta_pages); + + // Map the metadata pages. + for (i, frame_paddr) in meta_frames.iter().enumerate() { + let vaddr = meta::mapping::page_to_meta::(0, 1) + i * PAGE_SIZE; + let prop = PageProperty { + flags: PageFlags::RW, + cache: CachePolicy::Writeback, + priv_flags: PrivilegedPageFlags::GLOBAL, + }; + boot_pt.map_base_page(vaddr, frame_paddr / PAGE_SIZE, prop); + } + + // Now the metadata pages are mapped, we can initialize the metadata and + // turn meta frame addresses into `VmFrame`s. + let meta_frames = meta_frames + .into_iter() + .map(|paddr| { + // Safety: the frame is allocated but not initialized thus not referenced. + let mut frame = unsafe { VmFrame::from_free_raw(paddr, 1) }; + // Safety: this is the only reference to the frame so it's exclusive. + unsafe { frame.meta.deref_mut().frame_type = FrameType::Meta }; + frame + }) + .collect(); + + (boot_pt, meta_frames) +} + +fn alloc_meta_frames(nframes: usize) -> Vec { + let mut meta_pages = Vec::new(); + let start_frame = FRAME_ALLOCATOR + .get() + .unwrap() + .lock() + .alloc(nframes) + .unwrap() + * PAGE_SIZE; + // Zero them out as initialization. + let vaddr = paddr_to_vaddr(start_frame) as *mut u8; + unsafe { core::ptr::write_bytes(vaddr, 0, PAGE_SIZE * nframes) }; + for i in 0..nframes { + let paddr = start_frame + i * PAGE_SIZE; + meta_pages.push(paddr); + } + meta_pages +} diff --git a/framework/aster-frame/src/vm/mod.rs b/framework/aster-frame/src/vm/mod.rs index 5a4f9fe84..47fbfa501 100644 --- a/framework/aster-frame/src/vm/mod.rs +++ b/framework/aster-frame/src/vm/mod.rs @@ -10,7 +10,6 @@ pub type Paddr = usize; pub(crate) mod dma; mod frame; -mod frame_allocator; pub(crate) mod heap_allocator; mod io; pub(crate) mod kspace; @@ -27,19 +26,23 @@ use spin::Once; pub use self::{ dma::{Daddr, DmaCoherent, DmaDirection, DmaStream, DmaStreamSlice, HasDaddr}, - frame::{VmFrame, VmFrameVec, VmFrameVecIter, VmReader, VmSegment, VmWriter}, - io::VmIo, + frame::{FrameVecIter, VmFrame, VmFrameVec, VmSegment}, + io::{VmIo, VmReader, VmWriter}, options::VmAllocOptions, page_prop::{CachePolicy, PageFlags, PageProperty}, space::{VmMapOptions, VmSpace}, }; pub(crate) use self::{ - kspace::paddr_to_vaddr, page_prop::PrivilegedPageFlags, page_table::PageTable, + frame::meta::FrameMetaRef, kspace::paddr_to_vaddr, page_prop::PrivilegedPageFlags, + page_table::PageTable, +}; +use crate::{ + arch::mm::PagingConsts, + boot::memory_region::{MemoryRegion, MemoryRegionType}, }; -use crate::boot::memory_region::{MemoryRegion, MemoryRegionType}; -/// The size of a [`VmFrame`]. -pub const PAGE_SIZE: usize = 0x1000; +/// The level of a page table node or a frame. +pub type PagingLevel = u8; /// A minimal set of constants that determines the paging system. /// This provides an abstraction over most paging modes in common architectures. @@ -53,14 +56,35 @@ pub(crate) trait PagingConstsTrait: Debug + 'static { /// the level 1 to 5 on AMD64 corresponds to Page Tables, Page Directory Tables, /// Page Directory Pointer Tables, Page-Map Level-4 Table, and Page-Map Level-5 /// Table, respectively. - const NR_LEVELS: usize; + const NR_LEVELS: PagingLevel; /// The highest level that a PTE can be directly used to translate a VA. /// This affects the the largest page size supported by the page table. - const HIGHEST_TRANSLATION_LEVEL: usize; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel; /// The size of a PTE. const PTE_SIZE: usize; + + /// The address width may be BASE_PAGE_SIZE.ilog2() + NR_LEVELS * IN_FRAME_INDEX_BITS. + /// If it is shorter than that, the higher bits in the highest level are ignored. + const ADDRESS_WIDTH: usize; +} + +pub const PAGE_SIZE: usize = page_size::(1); + +/// The page size at a given level. +pub(crate) const fn page_size(level: PagingLevel) -> usize { + C::BASE_PAGE_SIZE << (nr_subpage_per_huge::().ilog2() as usize * (level as usize - 1)) +} + +/// The number of sub pages in a huge page. +pub(crate) const fn nr_subpage_per_huge() -> usize { + C::BASE_PAGE_SIZE / C::PTE_SIZE +} + +/// The number of base pages in a huge page at a given level. +pub(crate) const fn nr_base_per_page(level: PagingLevel) -> usize { + page_size::(level) / C::BASE_PAGE_SIZE } /// The maximum virtual address of user space (non inclusive). @@ -96,7 +120,7 @@ pub static FRAMEBUFFER_REGIONS: Once> = Once::new(); pub(crate) fn init() { let memory_regions = crate::boot::memory_regions().to_owned(); - frame_allocator::init(&memory_regions); + frame::allocator::init(&memory_regions); kspace::init_kernel_page_table(); dma::init(); diff --git a/framework/aster-frame/src/vm/options.rs b/framework/aster-frame/src/vm/options.rs index 898d58040..c9785ef9d 100644 --- a/framework/aster-frame/src/vm/options.rs +++ b/framework/aster-frame/src/vm/options.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MPL-2.0 -use super::{frame::VmFrameFlags, frame_allocator, VmFrame, VmFrameVec, VmSegment}; +use super::{frame::allocator, PagingLevel, VmFrame, VmFrameVec, VmSegment}; use crate::{prelude::*, Error}; /// Options for allocating physical memory pages (or frames). @@ -11,6 +11,7 @@ use crate::{prelude::*, Error}; /// the code and data segments of the OS kernel, the stack and heap /// allocated for the OS kernel. pub struct VmAllocOptions { + level: PagingLevel, nframes: usize, is_contiguous: bool, uninit: bool, @@ -20,12 +21,19 @@ impl VmAllocOptions { /// Creates new options for allocating the specified number of frames. pub fn new(nframes: usize) -> Self { Self { + level: 1, nframes, is_contiguous: false, uninit: false, } } + /// Sets the paging level for the allocated frames. + pub fn level(&mut self, level: PagingLevel) -> &mut Self { + self.level = level; + self + } + /// Sets whether the allocated frames should be contiguous. /// /// The default value is `false`. @@ -47,13 +55,12 @@ impl VmAllocOptions { /// Allocate a collection of page frames according to the given options. pub fn alloc(&self) -> Result { - let flags = self.flags(); let frames = if self.is_contiguous { - frame_allocator::alloc(self.nframes, flags).ok_or(Error::NoMemory)? + allocator::alloc(self.nframes).ok_or(Error::NoMemory)? } else { let mut frame_list = Vec::new(); for _ in 0..self.nframes { - frame_list.push(frame_allocator::alloc_single(flags).ok_or(Error::NoMemory)?); + frame_list.push(allocator::alloc_single(self.level).ok_or(Error::NoMemory)?); } VmFrameVec(frame_list) }; @@ -72,7 +79,7 @@ impl VmAllocOptions { return Err(Error::InvalidArgs); } - let frame = frame_allocator::alloc_single(self.flags()).ok_or(Error::NoMemory)?; + let frame = allocator::alloc_single(self.level).ok_or(Error::NoMemory)?; if !self.uninit { frame.writer().fill(0); } @@ -89,16 +96,36 @@ impl VmAllocOptions { return Err(Error::InvalidArgs); } - let segment = - frame_allocator::alloc_contiguous(self.nframes, self.flags()).ok_or(Error::NoMemory)?; + let segment = allocator::alloc_contiguous(self.nframes).ok_or(Error::NoMemory)?; if !self.uninit { segment.writer().fill(0); } Ok(segment) } +} - fn flags(&self) -> VmFrameFlags { - VmFrameFlags::empty() +#[cfg(ktest)] +#[ktest] +fn test_alloc_dealloc() { + // Here we allocate and deallocate frames in random orders to test the allocator. + // We expect the test to fail if the underlying implementation panics. + let single_options = VmAllocOptions::new(1); + let multi_options = VmAllocOptions::new(10); + let mut contiguous_options = VmAllocOptions::new(10); + contiguous_options.is_contiguous(true); + let mut remember_vec = Vec::new(); + for i in 0..10 { + for i in 0..10 { + let single_frame = single_options.alloc_single().unwrap(); + if i % 3 == 0 { + remember_vec.push(single_frame); + } + } + let contiguous_segment = contiguous_options.alloc_contiguous().unwrap(); + drop(contiguous_segment); + let multi_frames = multi_options.alloc().unwrap(); + remember_vec.extend(multi_frames.into_iter()); + remember_vec.pop(); } } diff --git a/framework/aster-frame/src/vm/page_prop.rs b/framework/aster-frame/src/vm/page_prop.rs index ddcf544f0..996925be5 100644 --- a/framework/aster-frame/src/vm/page_prop.rs +++ b/framework/aster-frame/src/vm/page_prop.rs @@ -7,7 +7,7 @@ use core::fmt::Debug; use bitflags::bitflags; /// The property of a mapped virtual memory page. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct PageProperty { pub flags: PageFlags, pub cache: CachePolicy, @@ -37,7 +37,7 @@ impl PageProperty { /// A type to control the cacheability of the main memory. /// /// The type currently follows the definition as defined by the AMD64 manual. -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum CachePolicy { /// Uncacheable (UC). /// diff --git a/framework/aster-frame/src/vm/page_table/boot_pt.rs b/framework/aster-frame/src/vm/page_table/boot_pt.rs new file mode 100644 index 000000000..a4284c9b1 --- /dev/null +++ b/framework/aster-frame/src/vm/page_table/boot_pt.rs @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Because that the page table implementation requires metadata initialized +//! and mapped, the boot page table is needed to do early stage page table setup +//! in order to initialize the running phase page tables. + +use alloc::vec::Vec; + +use super::{pte_index, PageTableEntryTrait}; +use crate::vm::{ + frame::allocator::FRAME_ALLOCATOR, paddr_to_vaddr, Paddr, PageProperty, PagingConstsTrait, + Vaddr, PAGE_SIZE, +}; + +type FrameNumber = usize; + +/// A simple boot page table for boot stage mapping management. +/// If applicable, the boot page table could track the lifetime of page table +/// frames that are set up by the firmware, loader or the setup code. +pub struct BootPageTable { + root_pt: FrameNumber, + // The frames allocated for this page table are not tracked with + // metadata [`crate::vm::frame::meta`]. Here is a record of it + // for deallocation. + frames: Vec, + _pretend_to_use: core::marker::PhantomData<(E, C)>, +} + +impl BootPageTable { + /// Create a new boot page table from the a page table root physical address. + /// The anonymous page table may be set up by the firmware, loader or the setup code. + pub fn from_anonymous_boot_pt(root_paddr: Paddr) -> Self { + Self { + root_pt: root_paddr / C::BASE_PAGE_SIZE, + frames: Vec::new(), + _pretend_to_use: core::marker::PhantomData, + } + } + + /// Map a base page to a frame. + /// This function will panic if the page is already mapped. + pub fn map_base_page(&mut self, from: Vaddr, to: FrameNumber, prop: PageProperty) { + let mut pt = self.root_pt; + let mut level = C::NR_LEVELS; + // Walk to the last level of the page table. + while level > 1 { + let index = pte_index::(from, level); + let pte_ptr = unsafe { (paddr_to_vaddr(pt * C::BASE_PAGE_SIZE) as *mut E).add(index) }; + let pte = unsafe { pte_ptr.read() }; + pt = if !pte.is_present() { + let frame = self.alloc_frame(); + let new_pte = E::new(frame * C::BASE_PAGE_SIZE, pte.prop(), false, false); + unsafe { pte_ptr.write(new_pte) }; + frame + } else if pte.is_huge() { + panic!("mapping an already mapped huge page in the boot page table"); + } else { + pte.paddr() / C::BASE_PAGE_SIZE + }; + level -= 1; + } + // Map the page in the last level page table. + let index = pte_index::(from, 1); + let pte_ptr = unsafe { (paddr_to_vaddr(pt * C::BASE_PAGE_SIZE) as *mut E).add(index) }; + let pte = unsafe { pte_ptr.read() }; + if pte.is_present() { + panic!("mapping an already mapped page in the boot page table"); + } + let new_pte = E::new(to * C::BASE_PAGE_SIZE, prop, false, true); + unsafe { pte_ptr.write(new_pte) }; + } + + fn alloc_frame(&mut self) -> FrameNumber { + let frame = FRAME_ALLOCATOR.get().unwrap().lock().alloc(1).unwrap(); + self.frames.push(frame); + // Zero it out. + let vaddr = paddr_to_vaddr(frame * PAGE_SIZE) as *mut u8; + unsafe { core::ptr::write_bytes(vaddr, 0, PAGE_SIZE) }; + frame + } +} + +impl Drop for BootPageTable { + fn drop(&mut self) { + for frame in &self.frames { + FRAME_ALLOCATOR.get().unwrap().lock().dealloc(*frame, 1); + } + } +} + +#[cfg(ktest)] +#[ktest] +fn test_boot_pt() { + use super::page_walk; + use crate::{ + arch::mm::{PageTableEntry, PagingConsts}, + vm::{CachePolicy, PageFlags, VmAllocOptions}, + }; + + let root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let root_paddr = root_frame.start_paddr(); + + let mut boot_pt = + BootPageTable::::from_anonymous_boot_pt(root_paddr); + + let from1 = 0x1000; + let to1 = 0x2; + let prop1 = PageProperty::new(PageFlags::RW, CachePolicy::Writeback); + boot_pt.map_base_page(from1, to1, prop1); + assert_eq!( + unsafe { page_walk::(root_paddr, from1 + 1) }, + Some((to1 * PAGE_SIZE + 1, prop1)) + ); + + let from2 = 0x2000; + let to2 = 0x3; + let prop2 = PageProperty::new(PageFlags::RX, CachePolicy::Uncacheable); + boot_pt.map_base_page(from2, to2, prop2); + assert_eq!( + unsafe { page_walk::(root_paddr, from2 + 2) }, + Some((to2 * PAGE_SIZE + 2, prop2)) + ); +} diff --git a/framework/aster-frame/src/vm/page_table/cursor.rs b/framework/aster-frame/src/vm/page_table/cursor.rs index 525385e54..e25367e0e 100644 --- a/framework/aster-frame/src/vm/page_table/cursor.rs +++ b/framework/aster-frame/src/vm/page_table/cursor.rs @@ -56,12 +56,12 @@ use core::{any::TypeId, ops::Range}; use align_ext::AlignExt; use super::{ - nr_ptes_per_node, page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, + nr_subpage_per_huge, page_size, pte_index, Child, KernelMode, PageTable, PageTableEntryTrait, PageTableError, PageTableFrame, PageTableMode, PagingConstsTrait, }; use crate::{ sync::{ArcSpinLockGuard, SpinLock}, - vm::{Paddr, PageProperty, Vaddr, VmFrame}, + vm::{Paddr, PageProperty, PagingLevel, Vaddr, VmFrame}, }; /// The cursor for traversal over the page table. @@ -79,21 +79,21 @@ use crate::{ /// provide concurrency. pub(crate) struct CursorMut<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { pt: &'a PageTable, - guards: [Option>>; C::NR_LEVELS], - level: usize, // current level - guard_level: usize, // from guard_level to level, the locks are held + guards: [Option>>; C::NR_LEVELS as usize], + level: PagingLevel, // current level + guard_level: PagingLevel, // from guard_level to level, the locks are held va: Vaddr, // current virtual address barrier_va: Range, // virtual address range that is locked } impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> CursorMut<'a, M, E, C> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { /// Create a cursor exclusively owning the locks for the given range. /// @@ -139,7 +139,7 @@ where break; } cursor.level_down(None); - cursor.guards[C::NR_LEVELS - cursor.level - 1] = None; + cursor.guards[(C::NR_LEVELS - cursor.level) as usize - 1] = None; cursor.guard_level -= 1; } Ok(cursor) @@ -429,7 +429,7 @@ where fn level_up(&mut self) { #[cfg(feature = "page_table_recycle")] let last_node_all_unmapped = self.cur_node().nr_valid_children() == 0; - self.guards[C::NR_LEVELS - self.level] = None; + self.guards[(C::NR_LEVELS - self.level) as usize] = None; self.level += 1; #[cfg(feature = "page_table_recycle")] { @@ -490,16 +490,20 @@ where panic!("Trying to level down when it is mapped to a typed frame"); } }); - self.guards[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame.lock_arc()); + self.guards[(C::NR_LEVELS - self.level) as usize + 1] = Some(nxt_lvl_frame.lock_arc()); self.level -= 1; } fn cur_node(&self) -> &ArcSpinLockGuard> { - self.guards[C::NR_LEVELS - self.level].as_ref().unwrap() + self.guards[(C::NR_LEVELS - self.level) as usize] + .as_ref() + .unwrap() } fn cur_node_mut(&mut self) -> &mut ArcSpinLockGuard> { - self.guards[C::NR_LEVELS - self.level].as_mut().unwrap() + self.guards[(C::NR_LEVELS - self.level) as usize] + .as_mut() + .unwrap() } fn cur_idx(&self) -> usize { @@ -518,8 +522,8 @@ where #[cfg(feature = "page_table_recycle")] impl Drop for CursorMut<'_, M, E, C> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { fn drop(&mut self) { // Recycle what we can recycle now. @@ -572,16 +576,16 @@ pub(crate) enum PageTableQueryResult { /// It implements the `Iterator` trait to provide a convenient way to query over the page table. pub(crate) struct Cursor<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { inner: CursorMut<'a, M, E, C>, } impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Cursor<'a, M, E, C> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { pub(super) fn new( pt: &'a PageTable, @@ -594,8 +598,8 @@ where impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> Iterator for Cursor<'a, M, E, C> where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { type Item = PageTableQueryResult; diff --git a/framework/aster-frame/src/vm/page_table/frame.rs b/framework/aster-frame/src/vm/page_table/frame.rs index 0d85203a3..a7ee48ef1 100644 --- a/framework/aster-frame/src/vm/page_table/frame.rs +++ b/framework/aster-frame/src/vm/page_table/frame.rs @@ -2,10 +2,10 @@ use alloc::{boxed::Box, sync::Arc}; -use super::{nr_ptes_per_node, page_size, PageTableEntryTrait}; +use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait}; use crate::{ sync::SpinLock, - vm::{page_prop::PageProperty, Paddr, PagingConstsTrait, VmAllocOptions, VmFrame}, + vm::{page_prop::PageProperty, Paddr, PagingConstsTrait, PagingLevel, VmAllocOptions, VmFrame}, }; /// A page table frame. @@ -14,14 +14,14 @@ use crate::{ #[derive(Debug)] pub(super) struct PageTableFrame where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { inner: VmFrame, /// TODO: all the following fields can be removed if frame metadata is introduced. /// Here we allow 2x space overhead each frame temporarily. #[allow(clippy::type_complexity)] - children: Box<[Child; nr_ptes_per_node::()]>, + children: Box<[Child; nr_subpage_per_huge::()]>, nr_valid_children: usize, } @@ -30,8 +30,8 @@ pub(super) type PtfRef = Arc>>; #[derive(Debug)] pub(super) enum Child where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { PageTable(PtfRef), Frame(VmFrame), @@ -42,8 +42,8 @@ where impl Child where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { pub(super) fn is_pt(&self) -> bool { matches!(self, Child::PageTable(_)) @@ -80,8 +80,8 @@ where impl Clone for Child where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { /// This is a shallow copy. fn clone(&self) -> Self { @@ -96,8 +96,8 @@ where impl PageTableFrame where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { pub(super) fn new() -> Self { Self { @@ -112,7 +112,7 @@ where } pub(super) fn child(&self, idx: usize) -> &Child { - debug_assert!(idx < nr_ptes_per_node::()); + debug_assert!(idx < nr_subpage_per_huge::()); &self.children[idx] } @@ -128,15 +128,15 @@ where } /// Split the untracked huge page mapped at `idx` to smaller pages. - pub(super) fn split_untracked_huge(&mut self, cur_level: usize, idx: usize) { - debug_assert!(idx < nr_ptes_per_node::()); + pub(super) fn split_untracked_huge(&mut self, cur_level: PagingLevel, idx: usize) { + debug_assert!(idx < nr_subpage_per_huge::()); debug_assert!(cur_level > 1); let Child::Untracked(pa) = self.children[idx] else { panic!("split_untracked_huge: not an untyped huge page"); }; let prop = self.read_pte_prop(idx); let mut new_frame = Self::new(); - for i in 0..nr_ptes_per_node::() { + for i in 0..nr_subpage_per_huge::() { let small_pa = pa + i * page_size::(cur_level - 1); new_frame.set_child(i, Child::Untracked(small_pa), Some(prop), cur_level - 1 > 1); } @@ -157,7 +157,7 @@ where prop: Option, huge: bool, ) { - assert!(idx < nr_ptes_per_node::()); + assert!(idx < nr_subpage_per_huge::()); // SAFETY: the index is within the bound and the PTE to be written is valid. // And the physical address of PTE points to initialized memory. // This applies to all the following `write_pte` invocations. @@ -193,7 +193,7 @@ where } /// Protect an already mapped child at a given index. - pub(super) fn protect(&mut self, idx: usize, prop: PageProperty, level: usize) { + pub(super) fn protect(&mut self, idx: usize, prop: PageProperty, level: PagingLevel) { debug_assert!(self.children[idx].is_some()); let paddr = self.children[idx].paddr().unwrap(); // SAFETY: the index is within the bound and the PTE is valid. @@ -206,7 +206,7 @@ where } fn read_pte(&self, idx: usize) -> E { - assert!(idx < nr_ptes_per_node::()); + assert!(idx < nr_subpage_per_huge::()); // SAFETY: the index is within the bound and PTE is plain-old-data. unsafe { (self.inner.as_ptr() as *const E).add(idx).read() } } @@ -225,8 +225,8 @@ where impl Clone for PageTableFrame where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { /// Make a deep copy of the page table. /// The child page tables are also being deep copied. diff --git a/framework/aster-frame/src/vm/page_table/mod.rs b/framework/aster-frame/src/vm/page_table/mod.rs index e9f00ad22..b730641a0 100644 --- a/framework/aster-frame/src/vm/page_table/mod.rs +++ b/framework/aster-frame/src/vm/page_table/mod.rs @@ -6,9 +6,9 @@ use core::{fmt::Debug, marker::PhantomData, ops::Range, panic}; use pod::Pod; use super::{ - paddr_to_vaddr, + nr_subpage_per_huge, paddr_to_vaddr, page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags}, - Paddr, PagingConstsTrait, Vaddr, + page_size, Paddr, PagingConstsTrait, PagingLevel, Vaddr, }; use crate::{ arch::mm::{activate_page_table, PageTableEntry, PagingConsts}, @@ -22,6 +22,8 @@ pub(crate) use cursor::{Cursor, CursorMut, PageTableQueryResult}; #[cfg(ktest)] mod test; +pub(in crate::vm) mod boot_pt; + #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum PageTableError { /// The virtual address range is invalid. @@ -63,25 +65,15 @@ impl PageTableMode for KernelMode { // Here are some const values that are determined by the paging constants. -/// The page size at a given level. -pub(crate) const fn page_size(level: usize) -> usize { - C::BASE_PAGE_SIZE << (nr_pte_index_bits::() * (level - 1)) -} - -/// The number of page table entries per page table frame. -pub(crate) const fn nr_ptes_per_node() -> usize { - C::BASE_PAGE_SIZE / C::PTE_SIZE -} - /// The number of virtual address bits used to index a PTE in a frame. const fn nr_pte_index_bits() -> usize { - nr_ptes_per_node::().ilog2() as usize + nr_subpage_per_huge::().ilog2() as usize } /// The index of a VA's PTE in a page table frame at the given level. -const fn pte_index(va: Vaddr, level: usize) -> usize { - va >> (C::BASE_PAGE_SIZE.ilog2() as usize + nr_pte_index_bits::() * (level - 1)) - & (nr_ptes_per_node::() - 1) +const fn pte_index(va: Vaddr, level: PagingLevel) -> usize { + va >> (C::BASE_PAGE_SIZE.ilog2() as usize + nr_pte_index_bits::() * (level as usize - 1)) + & (nr_subpage_per_huge::() - 1) } /// A handle to a page table. @@ -92,8 +84,8 @@ pub(crate) struct PageTable< E: PageTableEntryTrait = PageTableEntry, C: PagingConstsTrait = PagingConsts, > where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { root_frame: PtfRef, _phantom: PhantomData, @@ -101,8 +93,8 @@ pub(crate) struct PageTable< impl PageTable where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { pub(crate) fn activate(&self) { // SAFETY: The usermode page table is safe to activate since the kernel @@ -130,7 +122,7 @@ where }; let root_frame = cursor.leak_root_guard().unwrap(); let mut new_root_frame = PageTableFrame::::new(); - let half_of_entries = nr_ptes_per_node::() / 2; + let half_of_entries = nr_subpage_per_huge::() / 2; for i in 0..half_of_entries { // This is user space, deep copy the child. match root_frame.child(i) { @@ -150,7 +142,7 @@ where } } } - for i in half_of_entries..nr_ptes_per_node::() { + for i in half_of_entries..nr_subpage_per_huge::() { // This is kernel space, share the child. new_root_frame.set_child( i, @@ -168,8 +160,8 @@ where impl PageTable where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { /// Create a new user page table. /// @@ -181,7 +173,7 @@ where pub(crate) fn create_user_page_table(&self) -> PageTable { let mut new_root_frame = PageTableFrame::::new(); let root_frame = self.root_frame.lock(); - for i in nr_ptes_per_node::() / 2..nr_ptes_per_node::() { + for i in nr_subpage_per_huge::() / 2..nr_subpage_per_huge::() { new_root_frame.set_child( i, root_frame.child(i).clone(), @@ -202,10 +194,10 @@ where /// instead of the virtual address range. pub(crate) fn make_shared_tables(&self, root_index: Range) { let start = root_index.start; - debug_assert!(start >= nr_ptes_per_node::() / 2); - debug_assert!(start < nr_ptes_per_node::()); + debug_assert!(start >= nr_subpage_per_huge::() / 2); + debug_assert!(start < nr_subpage_per_huge::()); let end = root_index.end; - debug_assert!(end <= nr_ptes_per_node::()); + debug_assert!(end <= nr_subpage_per_huge::()); let mut root_frame = self.root_frame.lock(); for i in start..end { let no_such_child = root_frame.child(i).is_none(); @@ -228,8 +220,8 @@ where impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> PageTable where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { /// Create a new empty page table. Useful for the kernel page table and IOMMU page tables only. pub(crate) fn empty() -> Self { @@ -319,8 +311,8 @@ where impl Clone for PageTable where - [(); nr_ptes_per_node::()]:, - [(); C::NR_LEVELS]:, + [(); nr_subpage_per_huge::()]:, + [(); C::NR_LEVELS as usize]:, { fn clone(&self) -> Self { let frame = self.root_frame.lock(); diff --git a/framework/aster-frame/src/vm/page_table/test.rs b/framework/aster-frame/src/vm/page_table/test.rs index de39db0d2..97c715b9a 100644 --- a/framework/aster-frame/src/vm/page_table/test.rs +++ b/framework/aster-frame/src/vm/page_table/test.rs @@ -85,9 +85,10 @@ type Qr = PageTableQueryResult; struct BasePagingConsts {} impl PagingConstsTrait for BasePagingConsts { - const NR_LEVELS: usize = 4; + const NR_LEVELS: PagingLevel = 4; const BASE_PAGE_SIZE: usize = PAGE_SIZE; - const HIGHEST_TRANSLATION_LEVEL: usize = 1; + const ADDRESS_WIDTH: usize = 48; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 1; const PTE_SIZE: usize = core::mem::size_of::(); } @@ -122,9 +123,10 @@ fn test_base_protect_query() { struct VeryHugePagingConsts {} impl PagingConstsTrait for VeryHugePagingConsts { - const NR_LEVELS: usize = 4; + const NR_LEVELS: PagingLevel = 4; const BASE_PAGE_SIZE: usize = PAGE_SIZE; - const HIGHEST_TRANSLATION_LEVEL: usize = 3; + const ADDRESS_WIDTH: usize = 48; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 3; const PTE_SIZE: usize = core::mem::size_of::(); } diff --git a/kernel/aster-nix/src/vm/vmo/mod.rs b/kernel/aster-nix/src/vm/vmo/mod.rs index 8c4b3625f..34f9ce660 100644 --- a/kernel/aster-nix/src/vm/vmo/mod.rs +++ b/kernel/aster-nix/src/vm/vmo/mod.rs @@ -196,7 +196,7 @@ pub(super) struct Vmo_ { fn clone_page(page: &VmFrame) -> Result { let new_page = VmAllocOptions::new(1).alloc_single()?; - new_page.copy_from_frame(page); + new_page.copy_from(page); Ok(new_page) }