diff --git a/framework/aster-frame/src/arch/x86/boot/boot.S b/framework/aster-frame/src/arch/x86/boot/boot.S index cf758ec16..7dd1d5c2a 100644 --- a/framework/aster-frame/src/arch/x86/boot/boot.S +++ b/framework/aster-frame/src/arch/x86/boot/boot.S @@ -163,13 +163,6 @@ PTE_GLOBAL = (1 << 8) mov dword ptr [edi], eax mov dword ptr [edi + 4], 0 - // 1000 00000|000 100000|00 0000000|0 00000000 000 - // PDPT: 0xffff8008_00000000 ~ 0xffff8008_3fffffff - lea edi, [boot_pdpt + 0x20 * 8] - lea eax, [boot_pd_32g + (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL)] - mov dword ptr [edi], eax - mov dword ptr [edi + 4], 0 - // PDPT: 0xffffffff_80000000 ~ 0xffffffff_bfffffff lea edi, [boot_pdpt + 0x1fe * 8] lea eax, [boot_pd_0g_1g + (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL)] @@ -184,48 +177,15 @@ PTE_GLOBAL = (1 << 8) // Page Directory: map to low 1 GiB * 4 space lea edi, [boot_pd] - lea eax, [boot_pt + (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL)] + mov eax, PTE_PRESENT | PTE_WRITE | PTE_GLOBAL | PTE_HUGE mov ecx, 512 * 4 // (of entries in PD) * (number of PD) write_pd_entry: mov dword ptr [edi], eax mov dword ptr [edi + 4], 0 - add eax, 0x1000 // +4KiB to next table + add eax, 0x200000 // +2MiB add edi, 8 loop write_pd_entry - // Page Directory: map to 1 GiB space offset 32GiB - lea edi, [boot_pd_32g] - lea eax, [boot_pt_32g + (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL)] - mov ecx, 512 // (of entries in PD) -write_pd_32g_entry: - mov dword ptr [edi], eax - mov dword ptr [edi + 4], 0x0 - add eax, 0x1000 // +4KiB to next table - add edi, 8 - loop write_pd_32g_entry - - // Page Table: map to low 1 GiB * 4 space - lea edi, [boot_pt] - mov eax, (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL) // Offset 0 - mov ecx, 512 * 512 * 4 // (of entries in PT) * (number of PT) * (number of PD) -write_pt_entry: - mov dword ptr [edi], eax - mov dword ptr [edi + 4], 0 - add eax, 0x1000 // +4KiB - add edi, 8 - loop write_pt_entry - - // Page Table: map to 1 GiB space offset 32GiB - lea edi, [boot_pt_32g] - mov eax, (PTE_PRESENT | PTE_WRITE | PTE_GLOBAL) // Offset 0x8_00000000 but should write to high 32bits - mov ecx, 512 * 512 // (of entries in PT) * (number of PT) -write_pt_32g_entry: - mov dword ptr [edi], eax - mov dword ptr [edi + 4], 0x8 // Offset 0x8_00000000 - add eax, 0x1000 // +4KiB - add edi, 8 - loop write_pt_32g_entry - jmp enable_long_mode enable_long_mode: @@ -291,12 +251,6 @@ boot_pd_2g_3g: .skip 4096 boot_pd_3g_4g: .skip 4096 -boot_pt: - .skip 4096 * 512 * 4 -boot_pd_32g: - .skip 4096 -boot_pt_32g: - .skip 4096 * 512 boot_page_table_end: .global boot_stack_top diff --git a/framework/aster-frame/src/arch/x86/boot/linux_boot/mod.rs b/framework/aster-frame/src/arch/x86/boot/linux_boot/mod.rs index bfd72d7a9..c2e18bb6a 100644 --- a/framework/aster-frame/src/arch/x86/boot/linux_boot/mod.rs +++ b/framework/aster-frame/src/arch/x86/boot/linux_boot/mod.rs @@ -15,7 +15,7 @@ use crate::{ memory_region::{non_overlapping_regions_from, MemoryRegion, MemoryRegionType}, BootloaderAcpiArg, BootloaderFramebufferArg, }, - vm::{paddr_to_vaddr, PHYS_MEM_BASE_VADDR}, + vm::kspace::{paddr_to_vaddr, LINEAR_MAPPING_BASE_VADDR}, }; static BOOT_PARAMS: Once = Once::new(); @@ -73,7 +73,7 @@ fn init_initramfs(initramfs: &'static Once<&'static [u8]>) { return; } // We must return a slice composed by VA since kernel should read everything in VA. - let base_va = if ptr < PHYS_MEM_BASE_VADDR { + let base_va = if ptr < LINEAR_MAPPING_BASE_VADDR { paddr_to_vaddr(ptr) } else { ptr diff --git a/framework/aster-frame/src/arch/x86/boot/multiboot/mod.rs b/framework/aster-frame/src/arch/x86/boot/multiboot/mod.rs index 96f9dd04d..ee686c9ec 100644 --- a/framework/aster-frame/src/arch/x86/boot/multiboot/mod.rs +++ b/framework/aster-frame/src/arch/x86/boot/multiboot/mod.rs @@ -12,7 +12,7 @@ use crate::{ memory_region::{non_overlapping_regions_from, MemoryRegion, MemoryRegionType}, BootloaderAcpiArg, BootloaderFramebufferArg, }, - vm::{paddr_to_vaddr, PHYS_MEM_BASE_VADDR}, + vm::kspace::{paddr_to_vaddr, LINEAR_MAPPING_BASE_VADDR}, }; global_asm!(include_str!("header.S")); @@ -76,7 +76,7 @@ fn init_initramfs(initramfs: &'static Once<&'static [u8]>) { ) }; // We must return a slice composed by VA since kernel should read every in VA. - let base_va = if start < PHYS_MEM_BASE_VADDR { + let base_va = if start < LINEAR_MAPPING_BASE_VADDR { paddr_to_vaddr(start) } else { start diff --git a/framework/aster-frame/src/arch/x86/boot/multiboot2/mod.rs b/framework/aster-frame/src/arch/x86/boot/multiboot2/mod.rs index b70f7a65f..40c6fb739 100644 --- a/framework/aster-frame/src/arch/x86/boot/multiboot2/mod.rs +++ b/framework/aster-frame/src/arch/x86/boot/multiboot2/mod.rs @@ -9,16 +9,17 @@ use core::arch::global_asm; use multiboot2::{BootInformation, BootInformationHeader, MemoryAreaType}; use spin::Once; -use crate::boot::{ - kcmdline::KCmdlineArg, - memory_region::{non_overlapping_regions_from, MemoryRegion, MemoryRegionType}, - BootloaderAcpiArg, BootloaderFramebufferArg, +use crate::{ + boot::{ + kcmdline::KCmdlineArg, + memory_region::{non_overlapping_regions_from, MemoryRegion, MemoryRegionType}, + BootloaderAcpiArg, BootloaderFramebufferArg, + }, + vm::kspace::paddr_to_vaddr, }; global_asm!(include_str!("header.S")); -use crate::vm::{paddr_to_vaddr, PHYS_MEM_BASE_VADDR}; - pub(super) const MULTIBOOT2_ENTRY_MAGIC: u32 = 0x36d76289; static MB2_INFO: Once = Once::new(); @@ -54,12 +55,8 @@ fn init_initramfs(initramfs: &'static Once<&'static [u8]>) { return; }; let base_addr = mb2_module_tag.start_address() as usize; - // We must return a slice composed by VA since kernel should read every in VA. - let base_va = if base_addr < PHYS_MEM_BASE_VADDR { - paddr_to_vaddr(base_addr) - } else { - base_addr - }; + // We must return a slice composed by VA since kernel should read everything in VA. + let base_va = paddr_to_vaddr(base_addr); let length = mb2_module_tag.module_size() as usize; initramfs.call_once(|| unsafe { core::slice::from_raw_parts(base_va as *const u8, length) }); } diff --git a/framework/aster-frame/src/arch/x86/iommu/context_table.rs b/framework/aster-frame/src/arch/x86/iommu/context_table.rs index 8314641ad..43be7f505 100644 --- a/framework/aster-frame/src/arch/x86/iommu/context_table.rs +++ b/framework/aster-frame/src/arch/x86/iommu/context_table.rs @@ -6,13 +6,13 @@ use core::mem::size_of; use log::warn; use pod::Pod; -use super::second_stage::{PageTableEntry, PageTableFlags}; +use super::second_stage::{DeviceMode, PageTableConsts, PageTableEntry}; use crate::{ bus::pci::PciDeviceLocation, vm::{ dma::Daddr, - page_table::{DeviceMode, PageTableConfig, PageTableError}, - Paddr, PageTable, VmAllocOptions, VmFrame, VmIo, + page_table::{CachePolicy, MapProperty, PageTableError}, + Paddr, PageTable, VmAllocOptions, VmFrame, VmIo, VmPerm, PAGE_SIZE, }, }; @@ -123,7 +123,7 @@ impl RootTable { pub fn specify_device_page_table( &mut self, device_id: PciDeviceLocation, - page_table: PageTable, + page_table: PageTable, ) { let context_table = self.get_or_create_context_table(device_id); @@ -233,7 +233,7 @@ pub enum AddressWidth { pub struct ContextTable { /// Total 32 devices, each device has 8 functions. entries_frame: VmFrame, - page_tables: BTreeMap>, + page_tables: BTreeMap>, } impl ContextTable { @@ -251,7 +251,7 @@ impl ContextTable { fn get_or_create_page_table( &mut self, device: PciDeviceLocation, - ) -> &mut PageTable { + ) -> &mut PageTable { let bus_entry = self .entries_frame .read_val::( @@ -260,10 +260,7 @@ impl ContextTable { .unwrap(); if !bus_entry.is_present() { - let table: PageTable = - PageTable::::new(PageTableConfig { - address_width: crate::vm::page_table::AddressWidth::Level3, - }); + let table = PageTable::::empty(); let address = table.root_paddr(); self.page_tables.insert(address, table); let entry = ContextEntry(address as u128 | 3 | 0x1_0000_0000_0000_0000); @@ -282,10 +279,9 @@ impl ContextTable { } } - /// /// # Safety /// - /// User must ensure the given paddr is a valid one. + /// User must ensure that the given physical address is valid. unsafe fn map( &mut self, device: PciDeviceLocation, @@ -295,22 +291,25 @@ impl ContextTable { if device.device >= 32 || device.function >= 8 { return Err(ContextTableError::InvalidDeviceId); } - self.get_or_create_page_table(device) - .map_with_paddr( - daddr, - paddr, - PageTableFlags::WRITABLE | PageTableFlags::READABLE | PageTableFlags::LAST_PAGE, - ) - .map_err(ContextTableError::ModificationError) + self.get_or_create_page_table(device).map_unchecked( + &(daddr..daddr + PAGE_SIZE), + &(paddr..paddr + PAGE_SIZE), + MapProperty { + perm: VmPerm::RW, + cache: CachePolicy::Uncacheable, + }, + ); + Ok(()) } fn unmap(&mut self, device: PciDeviceLocation, daddr: Daddr) -> Result<(), ContextTableError> { if device.device >= 32 || device.function >= 8 { return Err(ContextTableError::InvalidDeviceId); } - - self.get_or_create_page_table(device) - .unmap(daddr) - .map_err(ContextTableError::ModificationError) + unsafe { + self.get_or_create_page_table(device) + .unmap_unchecked(&(daddr..daddr + PAGE_SIZE)); + } + Ok(()) } } diff --git a/framework/aster-frame/src/arch/x86/iommu/mod.rs b/framework/aster-frame/src/arch/x86/iommu/mod.rs index a40e3a072..e6c6e08c6 100644 --- a/framework/aster-frame/src/arch/x86/iommu/mod.rs +++ b/framework/aster-frame/src/arch/x86/iommu/mod.rs @@ -6,17 +6,14 @@ mod remapping; mod second_stage; use log::info; +use second_stage::{DeviceMode, PageTableConsts, PageTableEntry}; use spin::Once; use crate::{ - arch::iommu::{context_table::RootTable, second_stage::PageTableEntry}, + arch::iommu::context_table::RootTable, bus::pci::PciDeviceLocation, sync::Mutex, - vm::{ - dma::Daddr, - page_table::{DeviceMode, PageTableConfig, PageTableError}, - Paddr, PageTable, - }, + vm::{dma::Daddr, page_table::PageTableError, Paddr, PageTable}, }; #[derive(Debug)] @@ -64,12 +61,9 @@ pub(crate) fn unmap(daddr: Daddr) -> Result<(), IommuError> { pub(crate) fn init() -> Result<(), IommuError> { let mut root_table = RootTable::new(); // For all PCI Device, use the same page table. - let page_table: PageTable = - PageTable::::new(PageTableConfig { - address_width: crate::vm::page_table::AddressWidth::Level3, - }); + let page_table = PageTable::::empty(); for table in PciDeviceLocation::all() { - root_table.specify_device_page_table(table, page_table.clone()) + root_table.specify_device_page_table(table, unsafe { page_table.shallow_copy() }) } remapping::init(&root_table)?; PAGE_TABLE.call_once(|| Mutex::new(root_table)); diff --git a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs index cd485b993..dcdf10b1f 100644 --- a/framework/aster-frame/src/arch/x86/iommu/second_stage.rs +++ b/framework/aster-frame/src/arch/x86/iommu/second_stage.rs @@ -1,12 +1,37 @@ // SPDX-License-Identifier: MPL-2.0 +use core::ops::Range; + use pod::Pod; -use crate::{ - arch::x86::mm::NR_ENTRIES_PER_PAGE, - vm::page_table::{PageTableEntryTrait, PageTableFlagsTrait}, +use crate::vm::{ + page_table::{ + CachePolicy, MapInfo, MapProperty, MapStatus, PageTableConstsTrait, PageTableEntryTrait, + PageTableMode, + }, + Paddr, Vaddr, VmPerm, }; +/// The page table used by iommu maps the device address +/// space to the physical address space. +#[derive(Clone)] +pub(super) struct DeviceMode {} + +impl PageTableMode for DeviceMode { + /// The device address space is 32-bit. + const VADDR_RANGE: Range = 0..0x1_0000_0000; +} + +#[derive(Debug)] +pub(super) struct PageTableConsts {} + +impl PageTableConstsTrait for PageTableConsts { + const BASE_PAGE_SIZE: usize = 4096; + const NR_LEVELS: usize = 3; + const HIGHEST_TRANSLATION_LEVEL: usize = 1; + const ENTRY_SIZE: usize = core::mem::size_of::(); +} + bitflags::bitflags! { #[derive(Pod)] #[repr(C)] @@ -42,117 +67,67 @@ bitflags::bitflags! { #[repr(C)] pub struct PageTableEntry(u64); -impl PageTableFlagsTrait for PageTableFlags { - fn new() -> Self { - Self::empty() - } - - fn set_present(self, present: bool) -> Self { - self - } - - fn set_writable(mut self, writable: bool) -> Self { - self.set(Self::WRITABLE, writable); - self - } - - fn set_readable(mut self, readable: bool) -> Self { - self.set(Self::READABLE, readable); - self - } - - fn set_accessible_by_user(self, accessible: bool) -> Self { - self - } - - fn set_executable(self, executable: bool) -> Self { - self - } - - fn is_present(&self) -> bool { - self.contains(Self::WRITABLE) || self.contains(Self::READABLE) - } - - fn writable(&self) -> bool { - self.contains(Self::WRITABLE) - } - - fn readable(&self) -> bool { - self.contains(Self::READABLE) - } - - fn executable(&self) -> bool { - true - } - - fn has_accessed(&self) -> bool { - self.contains(Self::ACCESSED) - } - - fn is_dirty(&self) -> bool { - self.contains(Self::DIRTY) - } - - fn accessible_by_user(&self) -> bool { - true - } - - fn union(&self, other: &Self) -> Self { - (*self).union(*other) - } - - fn remove(&mut self, flags: &Self) { - self.remove(*flags) - } - - fn insert(&mut self, flags: &Self) { - self.insert(*flags) - } - - fn is_huge(&self) -> bool { - // FIXME: this is super bad - false - } - - fn set_huge(self, huge: bool) -> Self { - // FIXME: this is super bad - self - } -} - impl PageTableEntry { const PHYS_MASK: usize = 0xFFFF_FFFF_F000; } impl PageTableEntryTrait for PageTableEntry { - // bit 47~12 - type F = PageTableFlags; - fn new(paddr: crate::vm::Paddr, flags: PageTableFlags) -> Self { + fn new(paddr: crate::vm::Paddr, prop: MapProperty, huge: bool, last: bool) -> Self { + let mut flags = PageTableFlags::empty(); + if prop.perm.contains(VmPerm::W) { + flags |= PageTableFlags::WRITABLE; + } + if prop.perm.contains(VmPerm::R) { + flags |= PageTableFlags::READABLE; + } + if last { + flags |= PageTableFlags::LAST_PAGE; + } + if huge { + panic!("Huge page is not supported in iommu page table"); + } Self((paddr & Self::PHYS_MASK) as u64 | flags.bits) } - fn paddr(&self) -> crate::vm::Paddr { + fn paddr(&self) -> Paddr { (self.0 & Self::PHYS_MASK as u64) as usize } - fn flags(&self) -> PageTableFlags { - PageTableFlags::from_bits_truncate(self.0) + fn new_invalid() -> Self { + Self(0) } - fn is_used(&self) -> bool { - self.paddr() != 0 + fn is_valid(&self) -> bool { + self.0 & (PageTableFlags::READABLE | PageTableFlags::WRITABLE).bits() != 0 } - fn update(&mut self, paddr: crate::vm::Paddr, flags: Self::F) { - self.0 = (paddr & Self::PHYS_MASK) as u64 | flags.bits + fn info(&self) -> MapInfo { + let mut perm = VmPerm::empty(); + if self.0 & PageTableFlags::READABLE.bits() != 0 { + perm |= VmPerm::R; + } + if self.0 & PageTableFlags::WRITABLE.bits() != 0 { + perm |= VmPerm::W; + } + let cache = if self.0 & PageTableFlags::SNOOP.bits() != 0 { + CachePolicy::Writeback + } else { + CachePolicy::Uncacheable + }; + let mut status = MapStatus::empty(); + if self.0 & PageTableFlags::ACCESSED.bits() != 0 { + status |= MapStatus::ACCESSED; + } + if self.0 & PageTableFlags::DIRTY.bits() != 0 { + status |= MapStatus::DIRTY; + } + MapInfo { + prop: MapProperty { perm, cache }, + status, + } } - fn clear(&mut self) { - self.0 = 0; - } - - fn page_index(va: crate::vm::Vaddr, level: usize) -> usize { - debug_assert!((1..=5).contains(&level)); - va >> (12 + 9 * (level - 1)) & (NR_ENTRIES_PER_PAGE - 1) + fn is_huge(&self) -> bool { + false } } diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index 3e3fa3c29..9e6222787 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -1,18 +1,29 @@ // SPDX-License-Identifier: MPL-2.0 -use alloc::{collections::BTreeMap, fmt}; +use alloc::fmt; use pod::Pod; -use spin::Once; use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr}; use crate::vm::{ - page_table::{table_of, PageTableEntryTrait, PageTableFlagsTrait}, - Paddr, Vaddr, + page_table::{ + CachePolicy, MapInfo, MapProperty, MapStatus, PageTableConstsTrait, PageTableEntryTrait, + }, + Paddr, Vaddr, VmPerm, }; pub(crate) const NR_ENTRIES_PER_PAGE: usize = 512; +#[derive(Debug)] +pub struct PageTableConsts {} + +impl PageTableConstsTrait for PageTableConsts { + const BASE_PAGE_SIZE: usize = 4096; + const NR_LEVELS: usize = 4; + const HIGHEST_TRANSLATION_LEVEL: usize = 2; + const ENTRY_SIZE: usize = core::mem::size_of::(); +} + bitflags::bitflags! { #[derive(Pod)] #[repr(C)] @@ -50,133 +61,38 @@ pub fn tlb_flush(vaddr: Vaddr) { tlb::flush(VirtAddr::new(vaddr as u64)); } -pub const fn is_user_vaddr(vaddr: Vaddr) -> bool { - // FIXME: Support 3/5 level page table. - // 47 = 12(offset) + 4 * 9(index) - 1 - (vaddr >> 47) == 0 -} - -pub const fn is_kernel_vaddr(vaddr: Vaddr) -> bool { - // FIXME: Support 3/5 level page table. - // 47 = 12(offset) + 4 * 9(index) - 1 - ((vaddr >> 47) & 0x1) == 1 -} - #[derive(Clone, Copy, Pod)] #[repr(C)] pub struct PageTableEntry(usize); +/// Activate the given level 4 page table. +/// The cache policy of the root page table frame is controlled by `root_pt_cache`. +/// /// ## Safety /// /// Changing the level 4 page table is unsafe, because it's possible to violate memory safety by /// changing the page mapping. -pub unsafe fn activate_page_table(root_paddr: Paddr, flags: x86_64::registers::control::Cr3Flags) { +pub unsafe fn activate_page_table(root_paddr: Paddr, root_pt_cache: CachePolicy) { x86_64::registers::control::Cr3::write( PhysFrame::from_start_address(x86_64::PhysAddr::new(root_paddr as u64)).unwrap(), - flags, + match root_pt_cache { + CachePolicy::Writeback => x86_64::registers::control::Cr3Flags::empty(), + CachePolicy::Writethrough => { + x86_64::registers::control::Cr3Flags::PAGE_LEVEL_WRITETHROUGH + } + CachePolicy::Uncacheable => { + x86_64::registers::control::Cr3Flags::PAGE_LEVEL_CACHE_DISABLE + } + _ => panic!("unsupported cache policy for the root page table"), + }, ); } -pub(crate) static INIT_MAPPED_PTE: Once> = Once::new(); - -pub(crate) fn init() { - let (page_directory_base, _) = x86_64::registers::control::Cr3::read(); - let page_directory_base = page_directory_base.start_address().as_u64() as usize; - - // Safety: page_directory_base is read from Cr3, the address is valid. - let p4 = unsafe { table_of::(page_directory_base).unwrap() }; - // Cancel mapping in lowest addresses. - p4[0].clear(); - INIT_MAPPED_PTE.call_once(|| { - let mut mapped_pte = BTreeMap::new(); - for (i, p4_i) in p4.iter().enumerate().take(512) { - if p4_i.flags().contains(PageTableFlags::PRESENT) { - mapped_pte.insert(i, *p4_i); - } - } - mapped_pte - }); -} - -impl PageTableFlagsTrait for PageTableFlags { - fn new() -> Self { - Self::empty() - } - - fn set_present(mut self, present: bool) -> Self { - self.set(Self::PRESENT, present); - self - } - - fn set_writable(mut self, writable: bool) -> Self { - self.set(Self::WRITABLE, writable); - self - } - - fn set_readable(self, readable: bool) -> Self { - // do nothing - self - } - - fn set_accessible_by_user(mut self, accessible: bool) -> Self { - self.set(Self::USER, accessible); - self - } - - fn is_present(&self) -> bool { - self.contains(Self::PRESENT) - } - - fn writable(&self) -> bool { - self.contains(Self::WRITABLE) - } - - fn readable(&self) -> bool { - // always true - true - } - - fn accessible_by_user(&self) -> bool { - self.contains(Self::USER) - } - - fn set_executable(mut self, executable: bool) -> Self { - self.set(Self::NO_EXECUTE, !executable); - self - } - - fn executable(&self) -> bool { - !self.contains(Self::NO_EXECUTE) - } - - fn has_accessed(&self) -> bool { - self.contains(Self::ACCESSED) - } - - fn is_dirty(&self) -> bool { - self.contains(Self::DIRTY) - } - - fn union(&self, flags: &Self) -> Self { - (*self).union(*flags) - } - - fn remove(&mut self, flags: &Self) { - self.remove(*flags) - } - - fn insert(&mut self, flags: &Self) { - self.insert(*flags) - } - - fn is_huge(&self) -> bool { - self.contains(Self::HUGE) - } - - fn set_huge(mut self, huge: bool) -> Self { - self.set(Self::HUGE, huge); - self - } +pub fn current_page_table_paddr() -> Paddr { + x86_64::registers::control::Cr3::read() + .0 + .start_address() + .as_u64() as Paddr } impl PageTableEntry { @@ -188,40 +104,105 @@ impl PageTableEntry { } impl PageTableEntryTrait for PageTableEntry { - type F = PageTableFlags; - fn new(paddr: Paddr, flags: PageTableFlags) -> Self { - Self((paddr & Self::PHYS_ADDR_MASK) | flags.bits) + fn new_invalid() -> Self { + Self(0) } + + fn is_valid(&self) -> bool { + self.0 & PageTableFlags::PRESENT.bits() != 0 + } + + fn new(paddr: Paddr, prop: MapProperty, huge: bool, last: bool) -> Self { + let mut flags = PageTableFlags::PRESENT; + if !huge && !last { + // In x86 if it's an intermediate PTE, it's better to have the same permissions + // as the most permissive child (to reduce hardware page walk accesses). But we + // don't have a mechanism to keep it generic across architectures, thus just + // setting it to be the most permissive. + flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::USER; + } else { + if prop.perm.contains(VmPerm::W) { + flags |= PageTableFlags::WRITABLE; + } + if !prop.perm.contains(VmPerm::X) { + flags |= PageTableFlags::NO_EXECUTE; + } + if prop.perm.contains(VmPerm::U) { + flags |= PageTableFlags::USER; + } + if prop.perm.contains(VmPerm::G) { + flags |= PageTableFlags::GLOBAL; + } + } + if prop.cache == CachePolicy::Uncacheable { + flags |= PageTableFlags::NO_CACHE; + } + if prop.cache == CachePolicy::Writethrough { + flags |= PageTableFlags::WRITE_THROUGH; + } + if huge { + flags |= PageTableFlags::HUGE; + } + Self(paddr & Self::PHYS_ADDR_MASK | flags.bits()) + } + fn paddr(&self) -> Paddr { self.0 & Self::PHYS_ADDR_MASK } - fn flags(&self) -> PageTableFlags { - PageTableFlags::from_bits_truncate(self.0) - } - fn is_used(&self) -> bool { - self.0 != 0 + + fn info(&self) -> MapInfo { + let mut perm = VmPerm::empty(); + if self.0 & PageTableFlags::PRESENT.bits() != 0 { + perm |= VmPerm::R; + } + if self.0 & PageTableFlags::WRITABLE.bits() != 0 { + perm |= VmPerm::W; + } + if self.0 & PageTableFlags::NO_EXECUTE.bits() == 0 { + perm |= VmPerm::X; + } + if self.0 & PageTableFlags::USER.bits() != 0 { + perm |= VmPerm::U; + } + if self.0 & PageTableFlags::GLOBAL.bits() != 0 { + perm |= VmPerm::G; + } + let cache = if self.0 & PageTableFlags::NO_CACHE.bits() != 0 { + CachePolicy::Uncacheable + } else if self.0 & PageTableFlags::WRITE_THROUGH.bits() != 0 { + CachePolicy::Writethrough + } else { + CachePolicy::Writeback + }; + let mut status = MapStatus::empty(); + if self.0 & PageTableFlags::ACCESSED.bits() != 0 { + status |= MapStatus::ACCESSED; + } + if self.0 & PageTableFlags::DIRTY.bits() != 0 { + status |= MapStatus::DIRTY; + } + MapInfo { + prop: MapProperty { perm, cache }, + status, + } } - fn update(&mut self, paddr: Paddr, flags: Self::F) { - self.0 = (paddr & Self::PHYS_ADDR_MASK) | flags.bits; - } - - fn clear(&mut self) { - self.0 = 0; - } - - fn page_index(va: crate::vm::Vaddr, level: usize) -> usize { - debug_assert!((1..=5).contains(&level)); - va >> (12 + 9 * (level - 1)) & (NR_ENTRIES_PER_PAGE - 1) + fn is_huge(&self) -> bool { + self.0 & PageTableFlags::HUGE.bits() != 0 } } impl fmt::Debug for PageTableEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut f = f.debug_struct("PageTableEntry"); - f.field("raw", &self.0) - .field("paddr", &self.paddr()) - .field("flags", &self.flags()) + f.field("raw", &format_args!("{:#x}", self.0)) + .field("paddr", &format_args!("{:#x}", self.paddr())) + .field("valid", &self.is_valid()) + .field( + "flags", + &PageTableFlags::from_bits_truncate(self.0 & !Self::PHYS_ADDR_MASK), + ) + .field("info", &self.info()) .finish() } } diff --git a/framework/aster-frame/src/arch/x86/mod.rs b/framework/aster-frame/src/arch/x86/mod.rs index bf95e93a3..5d6437685 100644 --- a/framework/aster-frame/src/arch/x86/mod.rs +++ b/framework/aster-frame/src/arch/x86/mod.rs @@ -28,7 +28,6 @@ pub(crate) fn before_all_init() { pub(crate) fn after_all_init() { irq::init(); - mm::init(); kernel::acpi::init(); match kernel::apic::init() { Ok(_) => { diff --git a/framework/aster-frame/src/arch/x86/tdx_guest.rs b/framework/aster-frame/src/arch/x86/tdx_guest.rs index e2c6d6cce..68ad1b297 100644 --- a/framework/aster-frame/src/arch/x86/tdx_guest.rs +++ b/framework/aster-frame/src/arch/x86/tdx_guest.rs @@ -11,10 +11,11 @@ use tdx_guest::{ }; use crate::{ - arch::mm::{is_kernel_vaddr, PageTableFlags}, + arch::mm::PageTableFlags, vm::{ paddr_to_vaddr, page_table::{PageTableError, KERNEL_PAGE_TABLE}, + KERNEL_BASE_VADDR, KERNEL_END_VADDR, }, PAGE_SIZE, }; @@ -323,7 +324,7 @@ fn handle_mmio(trapframe: &mut dyn TdxTrapFrame, ve_info: &TdgVeInfo) -> Result< } fn decode_instr(rip: usize) -> Result { - if !is_kernel_vaddr(rip) { + if !(KERNEL_BASE_VADDR..KERNEL_END_VADDR).contains(rip) { return Err(MmioError::InvalidAddress); } let code_data = { diff --git a/framework/aster-frame/src/io_mem.rs b/framework/aster-frame/src/io_mem.rs index 7cb6d913b..289013aa7 100644 --- a/framework/aster-frame/src/io_mem.rs +++ b/framework/aster-frame/src/io_mem.rs @@ -61,7 +61,7 @@ impl HasPaddr for IoMem { impl IoMem { /// # Safety /// - /// User must ensure the range is in the I/O memory region. + /// User must ensure the given physical range is in the I/O memory region. pub(crate) unsafe fn new(range: Range) -> IoMem { IoMem { virtual_address: paddr_to_vaddr(range.start), diff --git a/framework/aster-frame/src/lib.rs b/framework/aster-frame/src/lib.rs index 1c1f8ee89..b6d021d3a 100644 --- a/framework/aster-frame/src/lib.rs +++ b/framework/aster-frame/src/lib.rs @@ -7,6 +7,7 @@ #![feature(const_trait_impl)] #![feature(coroutines)] #![feature(fn_traits)] +#![feature(generic_const_exprs)] #![feature(iter_from_coroutine)] #![feature(let_chains)] #![feature(negative_impls)] @@ -15,8 +16,12 @@ #![feature(ptr_sub_ptr)] #![feature(strict_provenance)] #![feature(pointer_is_aligned)] +#![feature(unboxed_closures)] #![allow(dead_code)] #![allow(unused_variables)] +// The `generic_const_exprs` feature is incomplete however required for the page table +// const generic implementation. We are using this feature in a conservative manner. +#![allow(incomplete_features)] #![no_std] extern crate alloc; @@ -66,9 +71,23 @@ pub fn init() { trap::init(); arch::after_all_init(); bus::init(); + // TODO: We activate the kernel page table here because the new kernel page table + // has mappings for MMIO which is required for the components initialization. We + // should refactor the initialization process to avoid this. + // Safety: we are activating the unique kernel page table. + unsafe { + vm::kspace::KERNEL_PAGE_TABLE + .get() + .unwrap() + .lock() + .activate_unchecked(); + } invoke_ffi_init_funcs(); } +/// Invoke the initialization functions defined in the FFI. +/// The component system uses this function to call the initialization functions of +/// the components. fn invoke_ffi_init_funcs() { extern "C" { fn __sinit_array(); diff --git a/framework/aster-frame/src/task/task.rs b/framework/aster-frame/src/task/task.rs index 926840f63..bfbcab677 100644 --- a/framework/aster-frame/src/task/task.rs +++ b/framework/aster-frame/src/task/task.rs @@ -8,12 +8,14 @@ use super::{ processor::{current_task, schedule}, }; use crate::{ - arch::mm::PageTableFlags, cpu::CpuSet, prelude::*, sync::{SpinLock, SpinLockGuard}, user::UserSpace, - vm::{page_table::KERNEL_PAGE_TABLE, VmAllocOptions, VmSegment, PAGE_SIZE}, + vm::{ + kspace::KERNEL_PAGE_TABLE, page_table::MapProperty, VmAllocOptions, VmPerm, VmSegment, + PAGE_SIZE, + }, }; pub const KERNEL_STACK_SIZE: usize = PAGE_SIZE * 64; @@ -45,14 +47,14 @@ extern "C" { pub struct KernelStack { segment: VmSegment, - old_guard_page_flag: Option, + has_guard_page: bool, } impl KernelStack { pub fn new() -> Result { Ok(Self { segment: VmAllocOptions::new(KERNEL_STACK_SIZE / PAGE_SIZE).alloc_contiguous()?, - old_guard_page_flag: None, + has_guard_page: false, }) } @@ -61,37 +63,62 @@ impl KernelStack { pub fn new_with_guard_page() -> Result { let stack_segment = VmAllocOptions::new(KERNEL_STACK_SIZE / PAGE_SIZE + 1).alloc_contiguous()?; - let unpresent_flag = PageTableFlags::empty(); - let old_guard_page_flag = Self::protect_guard_page(&stack_segment, unpresent_flag); + // FIXME: modifying the the linear mapping is bad. + let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let guard_page_vaddr = { + let guard_page_paddr = stack_segment.start_paddr(); + crate::vm::paddr_to_vaddr(guard_page_paddr) + }; + // Safety: the physical guard page address is exclusively used since we allocated it. + unsafe { + page_table + .protect_unchecked(&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), |info| { + assert!( + info.prop.perm.contains(VmPerm::RW), + "linear mapping shoud be readable and writable" + ); + MapProperty { + perm: info.prop.perm - VmPerm::RW, + cache: info.prop.cache, + } + }) + .unwrap(); + } Ok(Self { segment: stack_segment, - old_guard_page_flag: Some(old_guard_page_flag), + has_guard_page: true, }) } pub fn end_paddr(&self) -> Paddr { self.segment.end_paddr() } - - pub fn has_guard_page(&self) -> bool { - self.old_guard_page_flag.is_some() - } - - fn protect_guard_page(stack_segment: &VmSegment, flags: PageTableFlags) -> PageTableFlags { - let mut kernel_pt = KERNEL_PAGE_TABLE.get().unwrap().lock(); - let guard_page_vaddr = { - let guard_page_paddr = stack_segment.start_paddr(); - crate::vm::paddr_to_vaddr(guard_page_paddr) - }; - // Safety: The protected address must be the address of guard page hence it should be safe and valid. - unsafe { kernel_pt.protect(guard_page_vaddr, flags).unwrap() } - } } impl Drop for KernelStack { fn drop(&mut self) { - if self.has_guard_page() { - Self::protect_guard_page(&self.segment, self.old_guard_page_flag.unwrap()); + if self.has_guard_page { + // FIXME: modifying the the linear mapping is bad. + let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let guard_page_vaddr = { + let guard_page_paddr = self.segment.start_paddr(); + crate::vm::paddr_to_vaddr(guard_page_paddr) + }; + // Safety: the physical guard page address is exclusively used since we allocated it. + unsafe { + page_table + .protect_unchecked(&(guard_page_vaddr..guard_page_vaddr + PAGE_SIZE), |info| { + assert!( + !info.prop.perm.contains(VmPerm::RW), + "we should have removed the permission of the guard page" + ); + MapProperty { + perm: info.prop.perm | VmPerm::RW, + cache: info.prop.cache, + } + }) + .unwrap(); + } } } } diff --git a/framework/aster-frame/src/trap/handler.rs b/framework/aster-frame/src/trap/handler.rs index 7f3a16156..86400fcc1 100644 --- a/framework/aster-frame/src/trap/handler.rs +++ b/framework/aster-frame/src/trap/handler.rs @@ -2,6 +2,7 @@ use core::sync::atomic::{AtomicBool, Ordering}; +use align_ext::AlignExt; use log::debug; #[cfg(feature = "intel_tdx")] use tdx_guest::tdcall; @@ -15,13 +16,14 @@ use crate::arch::{ tdx_guest::{handle_virtual_exception, TdxTrapFrame}, }; use crate::{ - arch::{ - irq::IRQ_LIST, - mm::{PageTableEntry, PageTableFlags}, - }, + arch::irq::IRQ_LIST, cpu::{CpuException, PageFaultErrorCode, PAGE_FAULT}, cpu_local, - vm::{PageTable, PHYS_MEM_BASE_VADDR, PHYS_MEM_VADDR_RANGE}, + vm::{ + kspace::{KERNEL_PAGE_TABLE, LINEAR_MAPPING_BASE_VADDR}, + page_table::{CachePolicy, MapProperty}, + VmPerm, PAGE_SIZE, PHYS_MEM_VADDR_RANGE, + }, }; #[cfg(feature = "intel_tdx")] @@ -180,6 +182,8 @@ pub fn in_interrupt_context() -> bool { IN_INTERRUPT_CONTEXT.load(Ordering::Acquire) } +/// FIXME: this is a hack because we don't allocate kernel space for IO memory. We are currently +/// using the linear mapping for IO memory. This is not a good practice. fn handle_kernel_page_fault(f: &TrapFrame) { let page_fault_vaddr = x86_64::registers::control::Cr2::read().as_u64(); let error_code = PageFaultErrorCode::from_bits_truncate(f.error_code); @@ -210,21 +214,28 @@ fn handle_kernel_page_fault(f: &TrapFrame) { "kernel page fault: the direct mapping already exists", ); - // FIXME: Is it safe to call `PageTable::from_root_register` here? - let mut page_table: PageTable = - unsafe { PageTable::from_root_register() }; - - let paddr = page_fault_vaddr as usize - PHYS_MEM_BASE_VADDR; - let flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; + // Do the mapping + let mut page_table = KERNEL_PAGE_TABLE + .get() + .expect("The kernel page table is not initialized when kernel page fault happens") + .lock(); + let vaddr = (page_fault_vaddr as usize).align_down(PAGE_SIZE); + let paddr = vaddr - LINEAR_MAPPING_BASE_VADDR; // SAFETY: // 1. We have checked that the page fault address falls within the address range of the direct // mapping of physical memory. // 2. We map the address to the correct physical page with the correct flags, where the // correctness follows the semantics of the direct mapping of physical memory. + // Do the mapping unsafe { - page_table - .map(page_fault_vaddr as usize, paddr, flags) - .unwrap(); + page_table.map_unchecked( + &(vaddr..vaddr + PAGE_SIZE), + &(paddr..paddr + PAGE_SIZE), + MapProperty { + perm: VmPerm::RW | VmPerm::G, + cache: CachePolicy::Uncacheable, + }, + ) } } diff --git a/framework/aster-frame/src/user.rs b/framework/aster-frame/src/user.rs index cdcee2de3..be122f0d3 100644 --- a/framework/aster-frame/src/user.rs +++ b/framework/aster-frame/src/user.rs @@ -138,9 +138,7 @@ impl<'a> UserMode<'a> { /// After handling the user event and updating the user-mode CPU context, /// this method can be invoked again to go back to the user space. pub fn execute(&mut self) -> UserEvent { - unsafe { - self.user_space.vm_space().activate(); - } + self.user_space.vm_space().activate(); debug_assert!(Arc::ptr_eq(&self.current, &Task::current())); self.context.execute() } diff --git a/framework/aster-frame/src/vm/dma/dma_coherent.rs b/framework/aster-frame/src/vm/dma/dma_coherent.rs index 3d2f419ca..a0db9a717 100644 --- a/framework/aster-frame/src/vm/dma/dma_coherent.rs +++ b/framework/aster-frame/src/vm/dma/dma_coherent.rs @@ -10,11 +10,11 @@ use super::{check_and_insert_dma_mapping, remove_dma_mapping, DmaError, HasDaddr #[cfg(feature = "intel_tdx")] use crate::arch::tdx_guest; use crate::{ - arch::{iommu, mm::PageTableFlags}, + arch::iommu, vm::{ dma::{dma_type, Daddr, DmaType}, - paddr_to_vaddr, - page_table::KERNEL_PAGE_TABLE, + kspace::{paddr_to_vaddr, KERNEL_PAGE_TABLE}, + page_table::{CachePolicy, MapProperty}, HasPaddr, Paddr, VmIo, VmReader, VmSegment, VmWriter, PAGE_SIZE, }, }; @@ -57,16 +57,16 @@ impl DmaCoherent { start_paddr.checked_add(frame_count * PAGE_SIZE).unwrap(); if !is_cache_coherent { let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); - for i in 0..frame_count { - let paddr = start_paddr + (i * PAGE_SIZE); - let vaddr = paddr_to_vaddr(paddr); - let flags = page_table.flags(vaddr).unwrap(); - // Safety: the address is in the range of `vm_segment`. - unsafe { - page_table - .protect(vaddr, flags.union(PageTableFlags::NO_CACHE)) - .unwrap(); - } + let vaddr = paddr_to_vaddr(start_paddr); + let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); + // Safety: the address is in the range of `vm_segment`. + unsafe { + page_table + .protect_unchecked(&va_range, |info| MapProperty { + perm: info.prop.perm, + cache: CachePolicy::Uncacheable, + }) + .unwrap(); } } let start_daddr = match dma_type() { @@ -147,15 +147,16 @@ impl Drop for DmaCoherentInner { } if !self.is_cache_coherent { let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); - for i in 0..frame_count { - let paddr = start_paddr + (i * PAGE_SIZE); - let vaddr = paddr_to_vaddr(paddr); - let mut flags = page_table.flags(vaddr).unwrap(); - flags.remove(PageTableFlags::NO_CACHE); - // Safety: the address is in the range of `vm_segment`. - unsafe { - page_table.protect(vaddr, flags).unwrap(); - } + let vaddr = paddr_to_vaddr(start_paddr); + let va_range = vaddr..vaddr + (frame_count * PAGE_SIZE); + // Safety: the address is in the range of `vm_segment`. + unsafe { + page_table + .protect_unchecked(&va_range, |info| MapProperty { + perm: info.prop.perm, + cache: CachePolicy::Writeback, + }) + .unwrap(); } } remove_dma_mapping(start_paddr, frame_count); @@ -215,11 +216,19 @@ mod test { .unwrap(); let dma_coherent = DmaCoherent::map(vm_segment.clone(), false).unwrap(); assert!(dma_coherent.paddr() == vm_segment.paddr()); - let mut page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); - assert!(page_table - .flags(paddr_to_vaddr(vm_segment.paddr())) - .unwrap() - .contains(PageTableFlags::NO_CACHE)) + let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); + let vaddr = paddr_to_vaddr(vm_segment.paddr()); + assert!( + page_table + .query(vaddr..vaddr + PAGE_SIZE) + .unwrap() + .next() + .unwrap() + .info + .prop + .cache + == CachePolicy::Uncacheable + ); } #[ktest] diff --git a/framework/aster-frame/src/vm/kspace.rs b/framework/aster-frame/src/vm/kspace.rs new file mode 100644 index 000000000..41ec420e6 --- /dev/null +++ b/framework/aster-frame/src/vm/kspace.rs @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Kernel memory space management. + +use align_ext::AlignExt; +use spin::Once; + +use super::page_table::PageTableConstsTrait; +use crate::{ + arch::mm::{PageTableConsts, PageTableEntry}, + sync::SpinLock, + vm::{ + page_table::{page_walk, CachePolicy, KernelMode, MapProperty, PageTable}, + space::VmPerm, + MemoryRegionType, Paddr, Vaddr, PAGE_SIZE, + }, +}; + +/// The base address of the linear mapping of all physical +/// memory in the kernel address space. +pub(crate) const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000; + +/// The kernel code is linear mapped to this address. +/// +/// FIXME: This offset should be randomly chosen by the loader or the +/// boot compatibility layer. But we disabled it because the framework +/// doesn't support relocatable kernel yet. +pub fn kernel_loaded_offset() -> usize { + 0xffff_ffff_8000_0000 +} + +pub fn vaddr_to_paddr(va: Vaddr) -> Option { + if (LINEAR_MAPPING_BASE_VADDR..=kernel_loaded_offset()).contains(&va) { + // can use offset to get the physical address + Some(va - LINEAR_MAPPING_BASE_VADDR) + } else { + let root_paddr = crate::arch::mm::current_page_table_paddr(); + // Safety: the root page table is valid since we read it from the register. + unsafe { page_walk::(root_paddr, va) } + } +} + +/// Convert physical address to virtual address using offset, only available inside aster-frame +pub(crate) fn paddr_to_vaddr(pa: Paddr) -> usize { + pa + LINEAR_MAPPING_BASE_VADDR +} + +pub static KERNEL_PAGE_TABLE: Once< + SpinLock>, +> = Once::new(); + +/// Initialize the kernel page table. +/// +/// This function should be called after: +/// - the page allocator and the heap allocator are initialized; +/// - the memory regions are initialized. +/// +/// This function should be called before: +/// - any initializer that modifies the kernel page table. +pub fn init_kernel_page_table() { + let mut kpt = PageTable::::empty(); + kpt.make_shared_tables( + PageTableConsts::NR_ENTRIES_PER_FRAME / 2..PageTableConsts::NR_ENTRIES_PER_FRAME, + ); + let regions = crate::boot::memory_regions(); + // Do linear mappings for the kernel. + let linear_mapping_size = { + let mut end = 0; + for r in regions { + end = end.max(r.base() + r.len()); + } + end.align_up(PAGE_SIZE) + }; + let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + linear_mapping_size; + let to = 0..linear_mapping_size; + let prop = MapProperty { + perm: VmPerm::RW | VmPerm::G, + cache: CachePolicy::Writeback, + }; + // Safety: we are doing the linear mapping for the kernel. + unsafe { + kpt.map_unchecked(&from, &to, prop); + } + // Map for the I/O area. + // TODO: we need to have an allocator to allocate kernel space for + // the I/O areas, rather than doing it using the linear mappings. + let to = 0x8_0000_0000..0x9_0000_0000; + let from = LINEAR_MAPPING_BASE_VADDR + to.start..LINEAR_MAPPING_BASE_VADDR + to.end; + let prop = MapProperty { + perm: VmPerm::RW | VmPerm::G, + cache: CachePolicy::Uncacheable, + }; + // Safety: we are doing I/O mappings for the kernel. + unsafe { + kpt.map_unchecked(&from, &to, prop); + } + // Map for the kernel code itself. + // TODO: set separated permissions for each segments in the kernel. + let region = regions + .iter() + .find(|r| r.typ() == MemoryRegionType::Kernel) + .unwrap(); + let offset = kernel_loaded_offset(); + let to = + region.base().align_down(PAGE_SIZE)..(region.base() + region.len()).align_up(PAGE_SIZE); + let from = to.start + offset..to.end + offset; + let prop = MapProperty { + perm: VmPerm::RWX | VmPerm::G, + cache: CachePolicy::Writeback, + }; + // Safety: we are doing mappings for the kernel. + unsafe { + kpt.map_unchecked(&from, &to, prop); + } + KERNEL_PAGE_TABLE.call_once(|| SpinLock::new(kpt)); +} diff --git a/framework/aster-frame/src/vm/memory_set.rs b/framework/aster-frame/src/vm/memory_set.rs index eb0eec79b..246c09095 100644 --- a/framework/aster-frame/src/vm/memory_set.rs +++ b/framework/aster-frame/src/vm/memory_set.rs @@ -3,27 +3,31 @@ use alloc::collections::{btree_map::Entry, BTreeMap}; use core::fmt; -use super::page_table::{PageTable, PageTableConfig, UserMode}; +use align_ext::AlignExt; + +use super::{ + kspace::KERNEL_PAGE_TABLE, + page_table::{MapInfo, MapOp, MapProperty, PageTable, UserMode}, +}; use crate::{ - arch::mm::{PageTableEntry, PageTableFlags, INIT_MAPPED_PTE}, prelude::*, vm::{ - is_page_aligned, VmAllocOptions, VmFrame, VmFrameVec, VmReader, VmWriter, PAGE_SIZE, - PHYS_MEM_BASE_VADDR, + is_page_aligned, page_table::MapStatus, VmAllocOptions, VmFrame, VmFrameVec, VmPerm, + VmReader, VmWriter, PAGE_SIZE, }, Error, }; #[derive(Debug, Clone)] pub struct MapArea { - pub flags: PageTableFlags, + pub info: MapInfo, pub start_va: Vaddr, pub size: usize, pub mapper: BTreeMap, } pub struct MemorySet { - pub pt: PageTable, + pub pt: PageTable, /// all the map area, sort by the start virtual address areas: BTreeMap, } @@ -37,7 +41,7 @@ impl MapArea { pub fn new( start_va: Vaddr, size: usize, - flags: PageTableFlags, + prop: MapProperty, physical_frames: VmFrameVec, ) -> Self { assert!( @@ -47,7 +51,10 @@ impl MapArea { ); let mut map_area = Self { - flags, + info: MapInfo { + prop, + status: MapStatus::empty(), + }, start_va, size, mapper: BTreeMap::new(), @@ -56,7 +63,7 @@ impl MapArea { let page_size = size / PAGE_SIZE; let mut phy_frame_iter = physical_frames.iter(); - for i in 0..page_size { + for _ in 0..page_size { let vm_frame = phy_frame_iter.next().unwrap(); map_area.map_with_physical_address(current_va, vm_frame.clone()); current_va += PAGE_SIZE; @@ -133,8 +140,7 @@ impl MemorySet { if let Entry::Vacant(e) = self.areas.entry(area.start_va) { let area = e.insert(area); for (va, frame) in area.mapper.iter() { - debug_assert!(frame.start_paddr() < PHYS_MEM_BASE_VADDR); - self.pt.map(*va, frame, area.flags).unwrap(); + self.pt.map_frame(*va, frame, area.info.prop).unwrap(); } } else { panic!( @@ -147,26 +153,24 @@ impl MemorySet { /// Determine whether a Vaddr is in a mapped area pub fn is_mapped(&self, vaddr: Vaddr) -> bool { - self.pt.is_mapped(vaddr) + let vaddr = vaddr.align_down(PAGE_SIZE); + self.pt + .query(&(vaddr..vaddr + PAGE_SIZE)) + .map(|mut i| i.next().is_some()) + .unwrap_or(false) } - /// Return the flags of the PTE for the target virtual memory address. - /// If the PTE does not exist, return `None`. - pub fn flags(&self, vaddr: Vaddr) -> Option { - self.pt.flags(vaddr) + /// Return the information of the PTE for the target virtual memory address. + pub fn info(&self, vaddr: Vaddr) -> Option { + let vaddr = vaddr.align_down(PAGE_SIZE); + self.pt + .query(&(vaddr..vaddr + PAGE_SIZE)) + .map(|mut i| i.next().unwrap().info) + .ok() } pub fn new() -> Self { - let mut page_table = PageTable::::new(PageTableConfig { - address_width: super::page_table::AddressWidth::Level4, - }); - let mapped_pte = INIT_MAPPED_PTE.get().unwrap(); - for (index, pte) in mapped_pte.iter() { - // Safety: These PTEs are all valid PTEs fetched from the initial page table during memory initialization. - unsafe { - page_table.add_root_mapping(*index, pte); - } - } + let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock().fork(); Self { pt: page_table, areas: BTreeMap::new(), @@ -176,7 +180,7 @@ impl MemorySet { pub fn unmap(&mut self, va: Vaddr) -> Result<()> { if let Some(area) = self.areas.remove(&va) { for (va, _) in area.mapper.iter() { - self.pt.unmap(*va).unwrap(); + self.pt.unmap(&(*va..*va + PAGE_SIZE)).unwrap(); } Ok(()) } else { @@ -187,7 +191,7 @@ impl MemorySet { pub fn clear(&mut self) { for area in self.areas.values_mut() { for (va, _) in area.mapper.iter() { - self.pt.unmap(*va).unwrap(); + self.pt.unmap(&(*va..*va + PAGE_SIZE)).unwrap(); } } self.areas.clear(); @@ -200,7 +204,7 @@ impl MemorySet { let mut offset = 0usize; for (va, area) in self.areas.iter_mut() { if current_addr >= *va && current_addr < area.size + va { - if !area.flags.contains(PageTableFlags::WRITABLE) { + if !area.info.prop.perm.contains(VmPerm::W) { return Err(Error::PageFault); } let write_len = remain.min(area.size + va - current_addr); @@ -242,14 +246,14 @@ impl MemorySet { Err(Error::PageFault) } - pub fn protect(&mut self, addr: Vaddr, flags: PageTableFlags) { - let va = addr; + pub fn protect(&mut self, addr: Vaddr, op: impl MapOp) { + let va = addr..addr + PAGE_SIZE; // Temporary solution, since the `MapArea` currently only represents // a single `VmFrame`. - if let Some(areas) = self.areas.get_mut(&va) { - areas.flags = flags; + if let Some(areas) = self.areas.get_mut(&addr) { + areas.info.prop = op(areas.info); } - self.pt.protect(va, flags).unwrap(); + self.pt.protect(&va, op).unwrap(); } } diff --git a/framework/aster-frame/src/vm/mod.rs b/framework/aster-frame/src/vm/mod.rs index 2e7b7d1ae..77d27a55d 100644 --- a/framework/aster-frame/src/vm/mod.rs +++ b/framework/aster-frame/src/vm/mod.rs @@ -13,6 +13,7 @@ mod frame; mod frame_allocator; pub(crate) mod heap_allocator; mod io; +pub(crate) mod kspace; mod memory_set; mod offset; mod options; @@ -24,10 +25,12 @@ use core::ops::Range; use spin::Once; +pub(crate) use self::kspace::paddr_to_vaddr; pub use self::{ dma::{Daddr, DmaCoherent, DmaDirection, DmaStream, DmaStreamSlice, HasDaddr}, frame::{VmFrame, VmFrameVec, VmFrameVecIter, VmReader, VmSegment, VmWriter}, io::VmIo, + kspace::vaddr_to_paddr, memory_set::{MapArea, MemorySet}, options::VmAllocOptions, page_table::PageTable, @@ -85,29 +88,21 @@ pub const fn kernel_loaded_offset() -> usize { } const_assert!(PHYS_MEM_VADDR_RANGE.end < kernel_loaded_offset()); +/// Start of the kernel address space. +/// This is the _lowest_ address of the x86-64's _high_ canonical addresses. +pub(crate) const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000; +/// End of the kernel address space (non inclusive). +pub(crate) const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000; + /// Get physical address trait pub trait HasPaddr { fn paddr(&self) -> Paddr; } -pub fn vaddr_to_paddr(va: Vaddr) -> Option { - if PHYS_MEM_VADDR_RANGE.contains(&va) { - // can use offset to get the physical address - Some(va - PHYS_MEM_BASE_VADDR) - } else { - page_table::vaddr_to_paddr(va) - } -} - pub const fn is_page_aligned(p: usize) -> bool { (p & (PAGE_SIZE - 1)) == 0 } -/// Convert physical address to virtual address using offset, only available inside aster-frame -pub(crate) fn paddr_to_vaddr(pa: usize) -> usize { - pa + PHYS_MEM_BASE_VADDR -} - /// Only available inside aster-frame pub(crate) static MEMORY_REGIONS: Once> = Once::new(); @@ -116,7 +111,7 @@ pub static FRAMEBUFFER_REGIONS: Once> = Once::new(); pub(crate) fn init() { let memory_regions = crate::boot::memory_regions().to_owned(); frame_allocator::init(&memory_regions); - page_table::init(); + kspace::init_kernel_page_table(); dma::init(); let mut framebuffer_regions = Vec::new(); diff --git a/framework/aster-frame/src/vm/page_table.rs b/framework/aster-frame/src/vm/page_table.rs deleted file mode 100644 index 10eec2cc7..000000000 --- a/framework/aster-frame/src/vm/page_table.rs +++ /dev/null @@ -1,487 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -use alloc::{vec, vec::Vec}; -use core::{fmt::Debug, marker::PhantomData, mem::size_of}; - -use log::trace; -use pod::Pod; -use spin::Once; - -use super::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions}; -use crate::{ - arch::mm::{is_kernel_vaddr, is_user_vaddr, tlb_flush, PageTableEntry, NR_ENTRIES_PER_PAGE}, - sync::SpinLock, - vm::{VmFrame, PAGE_SIZE}, -}; - -pub trait PageTableFlagsTrait: Clone + Copy + Sized + Pod + Debug { - fn new() -> Self; - - fn set_present(self, present: bool) -> Self; - - fn set_writable(self, writable: bool) -> Self; - - fn set_readable(self, readable: bool) -> Self; - - fn set_accessible_by_user(self, accessible: bool) -> Self; - - fn set_executable(self, executable: bool) -> Self; - - fn set_huge(self, huge: bool) -> Self; - - fn is_present(&self) -> bool; - - fn writable(&self) -> bool; - - fn readable(&self) -> bool; - - fn executable(&self) -> bool; - - fn has_accessed(&self) -> bool; - - fn is_dirty(&self) -> bool; - - fn is_huge(&self) -> bool; - - fn accessible_by_user(&self) -> bool; - - /// Returns a new set of flags, containing any flags present in either self or other. It is similar to the OR operation. - fn union(&self, other: &Self) -> Self; - - /// Remove the specified flags. - fn remove(&mut self, flags: &Self); - - /// Insert the specified flags. - fn insert(&mut self, flags: &Self); -} - -pub trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug { - type F: PageTableFlagsTrait; - - fn new(paddr: Paddr, flags: Self::F) -> Self; - - fn paddr(&self) -> Paddr; - - fn flags(&self) -> Self::F; - - fn update(&mut self, paddr: Paddr, flags: Self::F); - - /// To determine whether the PTE is used, it usually checks whether it is 0. - /// - /// The page table will first use this value to determine whether a new page needs to be created to complete the mapping. - fn is_used(&self) -> bool; - - /// Clear the PTE and reset it to the initial state, which is usually 0. - fn clear(&mut self); - - /// The index of the next PTE is determined based on the virtual address and the current level, and the level range is [1,5]. - /// - /// For example, in x86 we use the following expression to get the index (NR_ENTRIES_PER_PAGE is 512): - /// ``` - /// va >> (12 + 9 * (level - 1)) & (NR_ENTRIES_PER_PAGE - 1) - /// ``` - /// - fn page_index(va: Vaddr, level: usize) -> usize; -} - -#[derive(Debug, Clone, Copy)] -pub struct PageTableConfig { - pub address_width: AddressWidth, -} - -#[derive(Debug, Clone, Copy)] -#[repr(usize)] -pub enum AddressWidth { - Level3 = 3, - Level4 = 4, - Level5 = 5, -} - -#[derive(Debug)] -pub enum PageTableError { - /// Modifications to page tables (map, unmap, protect, etc.) are invalid for the following reasons: - /// - /// 1. The mapping is present before map operation. - /// 2. The mapping is already invalid before unmap operation. - /// 3. The mapping is not exists before protect operation. - InvalidModification, - InvalidVaddr, -} - -pub static KERNEL_PAGE_TABLE: Once>> = Once::new(); - -#[derive(Clone)] -pub struct UserMode {} - -#[derive(Clone)] -pub struct KernelMode {} - -/// The page table used by iommu maps the device address -/// space to the physical address space. -#[derive(Clone)] -pub struct DeviceMode {} - -#[derive(Clone, Debug)] -pub struct PageTable { - root_paddr: Paddr, - /// store all the physical frame that the page table need to map all the frame e.g. the frame of the root_pa - tables: Vec, - config: PageTableConfig, - _phantom: PhantomData<(T, M)>, -} - -impl PageTable { - pub fn new(config: PageTableConfig) -> Self { - let root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - Self { - root_paddr: root_frame.start_paddr(), - tables: vec![root_frame], - config, - _phantom: PhantomData, - } - } - - pub fn map( - &mut self, - vaddr: Vaddr, - frame: &VmFrame, - flags: T::F, - ) -> Result<(), PageTableError> { - if is_kernel_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - // Safety: - // 1. The vaddr belongs to user mode program and does not affect the kernel mapping. - // 2. The area where the physical address islocated at untyped memory and does not affect kernel security. - unsafe { self.do_map(vaddr, frame.start_paddr(), flags) } - } - - pub fn unmap(&mut self, vaddr: Vaddr) -> Result<(), PageTableError> { - if is_kernel_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - // Safety: The vaddr belongs to user mode program and does not affect the kernel mapping. - unsafe { self.do_unmap(vaddr) } - } - - pub fn protect(&mut self, vaddr: Vaddr, flags: T::F) -> Result { - if is_kernel_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - // Safety: The vaddr belongs to user mode program and does not affect the kernel mapping. - unsafe { self.do_protect(vaddr, flags) } - } - - /// Add a new mapping directly in the root page table. - /// - /// # Safety - /// - /// User must guarantee the validity of the PTE. - pub(crate) unsafe fn add_root_mapping(&mut self, index: usize, pte: &T) { - debug_assert!((index + 1) * size_of::() <= PAGE_SIZE); - // Safety: The root_paddr is refer to the root of a valid page table. - let root_ptes: &mut [T] = table_of(self.root_paddr).unwrap(); - root_ptes[index] = *pte; - } -} - -impl PageTable { - /// Mapping `vaddr` to `paddr` with flags. The `vaddr` should not be at the low address - /// (memory belonging to the user mode program). - /// - /// # Safety - /// - /// Modifying kernel mappings is considered unsafe, and incorrect operation may cause crashes. - /// User must take care of the consequences when using this API. - pub unsafe fn map( - &mut self, - vaddr: Vaddr, - paddr: Paddr, - flags: T::F, - ) -> Result<(), PageTableError> { - if is_user_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - self.do_map(vaddr, paddr, flags) - } - - /// Unmap `vaddr`. The `vaddr` should not be at the low address - /// (memory belonging to the user mode program). - /// - /// # Safety - /// - /// Modifying kernel mappings is considered unsafe, and incorrect operation may cause crashes. - /// User must take care of the consequences when using this API. - pub unsafe fn unmap(&mut self, vaddr: Vaddr) -> Result<(), PageTableError> { - if is_user_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - self.do_unmap(vaddr) - } - - /// Modify the flags mapped at `vaddr`. The `vaddr` should not be at the low address - /// (memory belonging to the user mode program). - /// If the modification succeeds, it will return the old flags of `vaddr`. - /// - /// # Safety - /// - /// Modifying kernel mappings is considered unsafe, and incorrect operation may cause crashes. - /// User must take care of the consequences when using this API. - pub unsafe fn protect(&mut self, vaddr: Vaddr, flags: T::F) -> Result { - if is_user_vaddr(vaddr) { - return Err(PageTableError::InvalidVaddr); - } - self.do_protect(vaddr, flags) - } -} - -impl PageTable { - pub fn new(config: PageTableConfig) -> Self { - let root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - Self { - root_paddr: root_frame.start_paddr(), - tables: vec![root_frame], - config, - _phantom: PhantomData, - } - } - - /// Mapping directly from a virtual address to a physical address. - /// The virtual address should be in the device address space. - /// - /// # Safety - /// - /// User must ensure the given paddr is a valid one (e.g. from the VmSegment). - pub unsafe fn map_with_paddr( - &mut self, - vaddr: Vaddr, - paddr: Paddr, - flags: T::F, - ) -> Result<(), PageTableError> { - self.do_map(vaddr, paddr, flags) - } - - pub fn unmap(&mut self, vaddr: Vaddr) -> Result<(), PageTableError> { - // Safety: the `vaddr` is in the device address space. - unsafe { self.do_unmap(vaddr) } - } -} - -impl PageTable { - /// Mapping `vaddr` to `paddr` with flags. - /// - /// # Safety - /// - /// This function allows arbitrary modifications to the page table. - /// Incorrect modifications may cause the kernel to crash (e.g., changing the linear mapping.). - unsafe fn do_map( - &mut self, - vaddr: Vaddr, - paddr: Paddr, - flags: T::F, - ) -> Result<(), PageTableError> { - let last_entry = self.do_page_walk_mut(vaddr, true).unwrap(); - trace!( - "Page Table: Map vaddr:{:x?}, paddr:{:x?}, flags:{:x?}", - vaddr, - paddr, - flags - ); - if last_entry.is_used() && last_entry.flags().is_present() { - return Err(PageTableError::InvalidModification); - } - last_entry.update(paddr, flags); - tlb_flush(vaddr); - Ok(()) - } - - /// Find the last PTE and return its mutable reference. - /// - /// If create is set, it will create the next table until the last PTE. - /// If not, it will return `None` if it cannot reach the last PTE. - fn do_page_walk_mut(&mut self, vaddr: Vaddr, create: bool) -> Option<&mut T> { - let mut level = self.config.address_width as usize; - // Safety: The offset does not exceed the value of PAGE_SIZE. - // It only change the memory controlled by page table. - let mut current: &mut T = - unsafe { &mut *(calculate_pte_vaddr::(self.root_paddr, vaddr, level) as *mut T) }; - - while level > 1 { - if !current.flags().is_present() { - if !create { - return None; - } - // Create next table - let frame = VmAllocOptions::new(1).alloc_single().unwrap(); - // Default flags: read, write, user, present - let flags = T::F::new() - .set_present(true) - .set_accessible_by_user(true) - .set_readable(true) - .set_writable(true); - current.update(frame.start_paddr(), flags); - self.tables.push(frame); - } - if current.flags().is_huge() { - break; - } - level -= 1; - // Safety: The offset does not exceed the value of PAGE_SIZE. - // It only change the memory controlled by page table. - current = unsafe { - &mut *(calculate_pte_vaddr::(current.paddr(), vaddr, level) as *mut T) - }; - } - Some(current) - } - - /// Find the last PTE and return its immutable reference. - /// - /// This function will return `None` if it cannot reach the last PTE. - /// Note that finding an entry does not mean the corresponding virtual memory address is mapped - /// since the entry may be empty. - fn do_page_walk(&self, vaddr: Vaddr) -> Option<&T> { - let mut level = self.config.address_width as usize; - // Safety: The offset does not exceed the value of PAGE_SIZE. - // It only change the memory controlled by page table. - let mut current: &T = - unsafe { &*(calculate_pte_vaddr::(self.root_paddr, vaddr, level) as *const T) }; - - while level > 1 { - if !current.flags().is_present() { - return None; - } - if current.flags().is_huge() { - break; - } - level -= 1; - // Safety: The offset does not exceed the value of PAGE_SIZE. - // It only change the memory controlled by page table. - current = - unsafe { &*(calculate_pte_vaddr::(current.paddr(), vaddr, level) as *const T) }; - } - Some(current) - } - - /// Unmap `vaddr`. - /// - /// # Safety - /// - /// This function allows arbitrary modifications to the page table. - /// Incorrect modifications may cause the kernel to crash (e.g., unmap the linear mapping.). - unsafe fn do_unmap(&mut self, vaddr: Vaddr) -> Result<(), PageTableError> { - let last_entry = self - .do_page_walk_mut(vaddr, false) - .ok_or(PageTableError::InvalidModification)?; - trace!("Page Table: Unmap vaddr:{:x?}", vaddr); - if !last_entry.is_used() || !last_entry.flags().is_present() { - return Err(PageTableError::InvalidModification); - } - last_entry.clear(); - tlb_flush(vaddr); - Ok(()) - } - - /// Modify the flags mapped at `vaddr`. - /// If the modification succeeds, it will return the old flags of `vaddr`. - /// - /// # Safety - /// - /// This function allows arbitrary modifications to the page table. - /// Incorrect modifications may cause the kernel to crash - /// (e.g., make the linear mapping visible to the user mode applications.). - unsafe fn do_protect(&mut self, vaddr: Vaddr, new_flags: T::F) -> Result { - let last_entry = self - .do_page_walk_mut(vaddr, false) - .ok_or(PageTableError::InvalidModification)?; - let old_flags = last_entry.flags(); - trace!( - "Page Table: Protect vaddr:{:x?}, flags:{:x?}", - vaddr, - new_flags - ); - if !last_entry.is_used() || !old_flags.is_present() { - return Err(PageTableError::InvalidModification); - } - last_entry.update(last_entry.paddr(), new_flags); - tlb_flush(vaddr); - Ok(old_flags) - } - - /// Construct a page table instance from root registers (CR3 in x86) - /// - /// # Safety - /// - /// This function bypasses Rust's ownership model and directly constructs an instance of a - /// page table. - pub(crate) unsafe fn from_root_register() -> Self { - #[cfg(target_arch = "x86_64")] - let (page_directory_base, _) = x86_64::registers::control::Cr3::read(); - PageTable { - root_paddr: page_directory_base.start_address().as_u64() as usize, - tables: Vec::new(), - config: PageTableConfig { - address_width: AddressWidth::Level4, - }, - _phantom: PhantomData, - } - } - - /// Return the flags of the PTE for the target virtual memory address. - /// If the PTE does not exist, return `None`. - pub fn flags(&self, vaddr: Vaddr) -> Option { - self.do_page_walk(vaddr).map(|entry| entry.flags()) - } - - /// Return the root physical address of current `PageTable`. - pub fn root_paddr(&self) -> Paddr { - self.root_paddr - } - - /// Determine whether the target virtual memory address is mapped. - pub fn is_mapped(&self, vaddr: Vaddr) -> bool { - self.do_page_walk(vaddr) - .is_some_and(|last_entry| last_entry.is_used() && last_entry.flags().is_present()) - } -} - -/// Read `NR_ENTRIES_PER_PAGE` of PageTableEntry from an address -/// -/// # Safety -/// -/// User must ensure that the physical address refers to the root of a valid page table. -/// -pub unsafe fn table_of<'a, T: PageTableEntryTrait>(pa: Paddr) -> Option<&'a mut [T]> { - if pa == 0 { - return None; - } - let ptr = super::paddr_to_vaddr(pa) as *mut _; - Some(core::slice::from_raw_parts_mut(ptr, NR_ENTRIES_PER_PAGE)) -} - -/// translate a virtual address to physical address which cannot use offset to get physical address -pub fn vaddr_to_paddr(vaddr: Vaddr) -> Option { - let page_table = KERNEL_PAGE_TABLE.get().unwrap().lock(); - // Although we bypass the unsafe APIs provided by KernelMode, the purpose here is - // only to obtain the corresponding physical address according to the mapping. - let last_entry = page_table.do_page_walk(vaddr)?; - // FIXME: Support huge page - Some(last_entry.paddr() + (vaddr & (PAGE_SIZE - 1))) -} - -fn calculate_pte_vaddr( - root_pa: Paddr, - target_va: Vaddr, - level: usize, -) -> Vaddr { - debug_assert!(size_of::() * (T::page_index(target_va, level) + 1) <= PAGE_SIZE); - paddr_to_vaddr(root_pa + size_of::() * T::page_index(target_va, level)) -} - -pub fn init() { - KERNEL_PAGE_TABLE.call_once(|| { - // Safety: The `KERENL_PAGE_TABLE` is the only page table that is used to modify the initialize - // mapping. - SpinLock::new(unsafe { PageTable::from_root_register() }) - }); -} diff --git a/framework/aster-frame/src/vm/page_table/cursor.rs b/framework/aster-frame/src/vm/page_table/cursor.rs new file mode 100644 index 000000000..eae42ae87 --- /dev/null +++ b/framework/aster-frame/src/vm/page_table/cursor.rs @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: MPL-2.0 + +use alloc::{boxed::Box, sync::Arc}; +use core::{any::TypeId, marker::PhantomData, mem::size_of, ops::Range}; + +use super::{ + KernelMode, MapInfo, MapOp, MapProperty, PageTable, PageTableConstsTrait, PageTableEntryTrait, + PageTableError, PageTableFrame, PageTableMode, PtfRef, +}; +use crate::{ + sync::SpinLock, + vm::{paddr_to_vaddr, Paddr, Vaddr}, +}; + +/// The cursor for forward traversal over the page table. +/// +/// Doing mapping is somewhat like a depth-first search on a tree, except +/// that we modify the tree while traversing it. We use a stack to simulate +/// the recursion. +pub(super) struct PageTableCursor< + 'a, + M: PageTableMode, + E: PageTableEntryTrait, + C: PageTableConstsTrait, +> where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + stack: [Option>; C::NR_LEVELS], + level: usize, + va: Vaddr, + _phantom_ref: PhantomData<&'a PageTable>, +} + +impl PageTableCursor<'_, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(super) fn new(pt: &PageTable, va: Vaddr) -> Self { + let mut stack = core::array::from_fn(|_| None); + stack[0] = Some(pt.root_frame.clone()); + Self { + stack, + level: C::NR_LEVELS, + va, + _phantom_ref: PhantomData, + } + } + + /// Map or unmap the range starting from the current address. + /// + /// The argument `create` allows you to map the continuous range to a physical + /// range with the given map property. + /// + /// The function will map as more huge pages as possible, and it will split + /// the huge pages into smaller pages if necessary. If the input range is large, + /// the resulting mappings may look like this (if very huge pages supported): + /// + /// ```text + /// start end + /// |----|----------------|--------------------------------|----|----| + /// base huge very huge base base + /// 4KiB 2MiB 1GiB 4KiB 4KiB + /// ``` + /// + /// In practice it is suggested to use simple wrappers for this API that maps + /// frames for safety and conciseness. + /// + /// # Safety + /// + /// This function manipulates the page table directly, and it is unsafe because + /// it may cause undefined behavior if the caller does not ensure that the + /// mapped address is valid and the page table is not corrupted if it is used + /// by the kernel. + pub(super) unsafe fn map(&mut self, len: usize, create: Option<(Paddr, MapProperty)>) { + let end = self.va + len; + let mut create = create; + while self.va != end { + let top_spin = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); + let mut top_ptf = top_spin.lock(); + // Go down if the page size is too big or alignment is not satisfied. + let is_pa_not_aligned = create + .map(|(pa, _)| pa % C::page_size(self.level) != 0) + .unwrap_or(false); + // We ensure not mapping in reserved kernel shared tables or releasing it. + // Although it may be an invariant for all architectures and will be optimized + // out by the compiler since `C::NR_LEVELS - 1 > C::HIGHEST_TRANSLATION_LEVEL`. + let kshared_lvl_down = + TypeId::of::() == TypeId::of::() && self.level >= C::NR_LEVELS - 1; + if self.level > C::HIGHEST_TRANSLATION_LEVEL + || kshared_lvl_down + || self.va % C::page_size(self.level) != 0 + || self.va + C::page_size(self.level) > end + || is_pa_not_aligned + { + let ld_prop = create + .map(|(pa, prop)| prop) + .unwrap_or(MapProperty::new_invalid()); + self.level_down(&mut top_ptf, Some(ld_prop)); + continue; + } + self.map_page(&mut top_ptf, create); + create = create.map(|(pa, prop)| (pa + C::page_size(self.level), prop)); + drop(top_ptf); + self.next_slot(); + } + } + + /// Apply the given operation to all the mappings within the range. + pub(super) unsafe fn protect( + &mut self, + len: usize, + op: impl MapOp, + ) -> Result<(), PageTableError> { + let end = self.va + len; + while self.va != end { + let top_spin = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); + let mut top_ptf = top_spin.lock(); + let cur_pte = unsafe { self.cur_pte_ptr(&top_ptf).read() }; + if !cur_pte.is_valid() { + return Err(PageTableError::ProtectingInvalid); + } + // Go down if it's not a last node or if the page size is too big. + if !(cur_pte.is_huge() || self.level == 1) + || (self.va % C::page_size(self.level)) != 0 + || self.va + C::page_size(self.level) > end + { + self.level_down(&mut top_ptf, Some(op(cur_pte.info()))); + continue; + } + // Apply the operation. + unsafe { + self.cur_pte_ptr(&top_ptf).write(E::new( + cur_pte.paddr(), + op(cur_pte.info()), + cur_pte.is_huge(), + true, + )) + }; + drop(top_ptf); + self.next_slot(); + } + Ok(()) + } + + fn cur_pte_ptr(&self, ptf: &PageTableFrame) -> *mut E { + let frame_addr = paddr_to_vaddr(ptf.inner.start_paddr()); + let offset = C::in_frame_index(self.va, self.level); + (frame_addr + offset * size_of::()) as *mut E + } + + /// Traverse forward in the current level to the next PTE. + /// If reached the end of a page table frame, it leads itself up to the next frame of the parent frame. + fn next_slot(&mut self) { + let page_size = C::page_size(self.level); + while self.level < C::NR_LEVELS && C::in_frame_index(self.va + page_size, self.level) == 0 { + self.level_up(); + } + self.va += page_size; + } + + /// Go up a level. We release the current frame if it has no mappings since the cursor only moves + /// forward. And we will do the final cleanup using `level_up` when the cursor is dropped. + fn level_up(&mut self) { + let last_map_cnt_is_zero = { + let top_ptf_ref = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); + let top_ptf = top_ptf_ref.lock(); + top_ptf.map_count == 0 + }; + self.stack[C::NR_LEVELS - self.level] = None; + self.level += 1; + let can_release_child = + TypeId::of::() == TypeId::of::() && self.level < C::NR_LEVELS; + if can_release_child && last_map_cnt_is_zero { + let top_ptf_ref = self.stack[C::NR_LEVELS - self.level].clone().unwrap(); + let mut top_ptf = top_ptf_ref.lock(); + let frame_addr = paddr_to_vaddr(top_ptf.inner.start_paddr()); + let offset = C::in_frame_index(self.va, self.level); + unsafe { ((frame_addr + offset) as *mut E).write(E::new_invalid()) } + let idx = C::in_frame_index(self.va, self.level); + top_ptf.child.as_mut().unwrap()[idx] = None; + top_ptf.map_count -= 1; + } + } + + /// A level down operation during traversal. It may split a huge page into + /// smaller pages if we have an end address within the next mapped huge page. + /// It may also create a new child frame if the current frame does not have one. + /// If that may happen the map property of intermediate level `prop` should be + /// passed in correctly. Whether the map property matters in an intermediate + /// level is architecture-dependent. + unsafe fn level_down(&mut self, top_ptf: &mut PageTableFrame, prop: Option) { + if top_ptf.child.is_none() { + top_ptf.child = Some(Box::new(core::array::from_fn(|_| None))); + }; + let nxt_lvl_frame = if let Some(nxt_lvl_frame) = + top_ptf.child.as_ref().unwrap()[C::in_frame_index(self.va, self.level)].clone() + { + nxt_lvl_frame + } else { + let mut new_frame = PageTableFrame::::new(); + // If it already maps a huge page, we should split it. + let pte = unsafe { self.cur_pte_ptr(top_ptf).read() }; + if pte.is_valid() && pte.is_huge() { + let pa = pte.paddr(); + let prop = pte.info().prop; + for i in 0..C::NR_ENTRIES_PER_FRAME { + let nxt_level = self.level - 1; + let nxt_pte = { + let frame_addr = paddr_to_vaddr(new_frame.inner.start_paddr()); + &mut *(frame_addr as *mut E).add(i) + }; + *nxt_pte = E::new(pa + i * C::page_size(nxt_level), prop, nxt_level > 1, true); + } + new_frame.map_count = C::NR_ENTRIES_PER_FRAME; + unsafe { + self.cur_pte_ptr(top_ptf).write(E::new( + new_frame.inner.start_paddr(), + prop, + false, + false, + )) + } + } else { + // The child couldn't be valid here cause child is none and it's not huge. + debug_assert!(!pte.is_valid()); + unsafe { + self.cur_pte_ptr(top_ptf).write(E::new( + new_frame.inner.start_paddr(), + prop.unwrap(), + false, + false, + )) + } + } + top_ptf.map_count += 1; + let new_frame_ref = Arc::new(SpinLock::new(new_frame)); + top_ptf.child.as_mut().unwrap()[C::in_frame_index(self.va, self.level)] = + Some(new_frame_ref.clone()); + new_frame_ref + }; + self.stack[C::NR_LEVELS - self.level + 1] = Some(nxt_lvl_frame); + self.level -= 1; + } + + /// Map or unmap the page pointed to by the cursor (which could be large). + /// If the physical address and the map property are not provided, it unmaps + /// the current page. + unsafe fn map_page( + &mut self, + top_ptf: &mut PageTableFrame, + create: Option<(Paddr, MapProperty)>, + ) { + let already_mapped = unsafe { self.cur_pte_ptr(top_ptf).read().is_valid() }; + if let Some((pa, prop)) = create { + unsafe { + self.cur_pte_ptr(top_ptf) + .write(E::new(pa, prop, self.level > 1, true)) + } + if !already_mapped { + top_ptf.map_count += 1; + } + } else { + unsafe { self.cur_pte_ptr(top_ptf).write(E::new_invalid()) } + if already_mapped { + top_ptf.map_count -= 1; + } + } + // If it dismantle a child page table frame by mapping a huge page + // we ensure it to be released. + if let Some(child) = &mut top_ptf.child { + let idx = C::in_frame_index(self.va, self.level); + if child[idx].is_some() { + child[idx] = None; + } + }; + } +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Drop + for PageTableCursor<'a, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + fn drop(&mut self) { + while self.level < C::NR_LEVELS { + self.level_up(); + } + } +} + +/// The iterator for querying over the page table without modifying it. +pub struct PageTableIter<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + cursor: PageTableCursor<'a, M, E, C>, + end_va: Vaddr, +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> + PageTableIter<'a, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(super) fn new(pt: &'a PageTable, va: &Range) -> Self { + Self { + cursor: PageTableCursor::new(pt, va.start), + end_va: va.end, + } + } +} + +pub struct PageTableQueryResult { + pub va: Range, + pub info: MapInfo, +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> Iterator + for PageTableIter<'a, M, E, C> +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + type Item = PageTableQueryResult; + + fn next(&mut self) -> Option { + if self.cursor.va >= self.end_va { + return None; + } + loop { + let level = self.cursor.level; + let va = self.cursor.va; + let top_spin = self.cursor.stack[C::NR_LEVELS - level].clone().unwrap(); + let mut top_ptf = top_spin.lock(); + let cur_pte = unsafe { self.cursor.cur_pte_ptr(&top_ptf).read() }; + // Yeild if it's not a valid node. + if !cur_pte.is_valid() { + return None; + } + // Go down if it's not a last node. + if !(cur_pte.is_huge() || level == 1) { + // Safety: alignment checked and there should be a child frame here. + unsafe { + self.cursor.level_down(&mut top_ptf, None); + } + continue; + } + // Yield the current mapping. + let mapped_range = self.cursor.va..self.cursor.va + C::page_size(self.cursor.level); + let map_info = cur_pte.info(); + drop(top_ptf); + self.cursor.next_slot(); + return Some(PageTableQueryResult { + va: mapped_range, + info: map_info, + }); + } + } +} diff --git a/framework/aster-frame/src/vm/page_table/mod.rs b/framework/aster-frame/src/vm/page_table/mod.rs new file mode 100644 index 000000000..3f49ab28a --- /dev/null +++ b/framework/aster-frame/src/vm/page_table/mod.rs @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: MPL-2.0 + +use alloc::{boxed::Box, sync::Arc}; +use core::{fmt::Debug, marker::PhantomData, mem::size_of, ops::Range}; + +use crate::{ + arch::mm::{activate_page_table, PageTableConsts, PageTableEntry}, + sync::SpinLock, + vm::{paddr_to_vaddr, Paddr, Vaddr, VmAllocOptions, VmFrame, VmFrameVec, VmPerm, PAGE_SIZE}, +}; + +mod properties; +pub use properties::*; +mod cursor; +use cursor::*; + +#[derive(Debug)] +pub enum PageTableError { + InvalidVaddr(Vaddr), + InvalidVaddrRange(Range), + VaddrNotAligned(Vaddr), + VaddrRangeNotAligned(Range), + PaddrNotAligned(Paddr), + PaddrRangeNotAligned(Range), + // Protecting a mapping that does not exist. + ProtectingInvalid, +} + +/// This is a compile-time technique to force the frame developers to distinguish +/// between the kernel global page table instance, process specific user page table +/// instance, and device page table instances. +pub trait PageTableMode: 'static { + /// The range of virtual addresses that the page table can manage. + const VADDR_RANGE: Range; +} + +#[derive(Clone)] +pub struct UserMode {} + +impl PageTableMode for UserMode { + const VADDR_RANGE: Range = 0..super::MAX_USERSPACE_VADDR; +} + +#[derive(Clone)] +pub struct KernelMode {} + +impl PageTableMode for KernelMode { + const VADDR_RANGE: Range = super::KERNEL_BASE_VADDR..super::KERNEL_END_VADDR; +} + +/// A page table instance. +pub struct PageTable< + M: PageTableMode, + E: PageTableEntryTrait = PageTableEntry, + C: PageTableConstsTrait = PageTableConsts, +> where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + root_frame: PtfRef, + _phantom: PhantomData, +} + +/// A page table frame. +/// It's also frequently referred to as a page table in many architectural documentations. +#[derive(Debug)] +struct PageTableFrame +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub inner: VmFrame, + #[allow(clippy::type_complexity)] + pub child: Option>; C::NR_ENTRIES_PER_FRAME]>>, + /// The number of mapped frames or page tables. + /// This is to track if we can free itself. + pub map_count: usize, +} + +type PtfRef = Arc>>; + +impl PageTableFrame +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(crate) fn new() -> Self { + Self { + inner: VmAllocOptions::new(1).alloc_single().unwrap(), + child: None, + map_count: 0, + } + } +} + +impl PageTable +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + pub(crate) fn map_frame( + &mut self, + vaddr: Vaddr, + frame: &VmFrame, + prop: MapProperty, + ) -> Result<(), PageTableError> { + if vaddr % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::VaddrNotAligned(vaddr)); + } + let va_range = vaddr + ..vaddr + .checked_add(PAGE_SIZE) + .ok_or(PageTableError::InvalidVaddr(vaddr))?; + if !range_contains(&UserMode::VADDR_RANGE, &va_range) { + return Err(PageTableError::InvalidVaddrRange(va_range)); + } + // Safety: modification to the user page table is safe. + unsafe { + self.map_frame_unchecked(vaddr, frame, prop); + } + Ok(()) + } + + pub(crate) fn map_frames( + &mut self, + vaddr: Vaddr, + frames: &VmFrameVec, + prop: MapProperty, + ) -> Result<(), PageTableError> { + if vaddr % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::VaddrNotAligned(vaddr)); + } + let va_range = vaddr + ..vaddr + .checked_add(frames.nbytes()) + .ok_or(PageTableError::InvalidVaddr(vaddr))?; + if !range_contains(&UserMode::VADDR_RANGE, &va_range) { + return Err(PageTableError::InvalidVaddrRange(va_range)); + } + // Safety: modification to the user page table is safe. + unsafe { + self.map_frames_unchecked(vaddr, frames, prop); + } + Ok(()) + } + + pub(crate) fn map( + &mut self, + vaddr: &Range, + paddr: &Range, + prop: MapProperty, + ) -> Result<(), PageTableError> { + if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); + } + if paddr.start % C::BASE_PAGE_SIZE != 0 || paddr.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::PaddrRangeNotAligned(paddr.clone())); + } + if !range_contains(&UserMode::VADDR_RANGE, vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + } + // Safety: modification to the user page table is safe. + unsafe { + self.map_unchecked(vaddr, paddr, prop); + } + Ok(()) + } + + pub(crate) fn unmap(&mut self, vaddr: &Range) -> Result<(), PageTableError> { + if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); + } + if !range_contains(&UserMode::VADDR_RANGE, vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + } + // Safety: modification to the user page table is safe. + unsafe { + self.unmap_unchecked(vaddr); + } + Ok(()) + } + + pub(crate) fn protect( + &mut self, + vaddr: &Range, + op: impl MapOp, + ) -> Result<(), PageTableError> { + if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::VaddrRangeNotAligned(vaddr.clone())); + } + if !range_contains(&UserMode::VADDR_RANGE, vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + } + // Safety: modification to the user page table is safe. + unsafe { self.protect_unchecked(vaddr, op) } + } + + pub(crate) fn activate(&self) { + // Safety: The usermode page table is safe to activate since the kernel + // mappings are shared. + unsafe { + self.activate_unchecked(); + } + } +} + +impl PageTable +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + /// Create a new user page table. + /// + /// This should be the only way to create a user page table, that is + /// to fork the kernel page table with all the kernel mappings shared. + pub(crate) fn fork(&self) -> PageTable { + let new_root_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + let root_frame = self.root_frame.lock(); + // Safety: The root_paddr is the root of a valid page table and + // it does not overlap with the new page. + unsafe { + let src = paddr_to_vaddr(root_frame.inner.start_paddr()) as *const E; + let dst = paddr_to_vaddr(new_root_frame.start_paddr()) as *mut E; + core::ptr::copy_nonoverlapping(src, dst, C::NR_ENTRIES_PER_FRAME); + } + PageTable:: { + root_frame: Arc::new(SpinLock::new(PageTableFrame:: { + inner: new_root_frame, + child: root_frame.child.clone(), + map_count: root_frame.map_count, + })), + _phantom: PhantomData, + } + } + + /// Explicitly make a range of virtual addresses shared between the kernel and user + /// page tables. Mapped pages before generating user page tables are shared either. + /// The virtual address range should be aligned to the root level page size. Considering + /// usize overflows, the caller should provide the index range of the root level pages + /// instead of the virtual address range. + pub(crate) fn make_shared_tables(&self, root_index: Range) { + let start = root_index.start; + assert!(start < C::NR_ENTRIES_PER_FRAME); + let end = root_index.end; + assert!(end <= C::NR_ENTRIES_PER_FRAME); + let mut root_frame = self.root_frame.lock(); + if root_frame.child.is_none() { + root_frame.child = Some(Box::new(core::array::from_fn(|_| None))); + } + for i in start..end { + let no_such_child = root_frame.child.as_ref().unwrap()[i].is_none(); + if no_such_child { + let frame = PageTableFrame::::new(); + let pte_ptr = (root_frame.inner.start_paddr() + i * size_of::()) as *mut E; + unsafe { + pte_ptr.write(E::new( + frame.inner.start_paddr(), + MapProperty { + perm: VmPerm::RWX, + cache: CachePolicy::Uncacheable, + }, + false, + false, + )); + } + let child_array = root_frame.child.as_mut().unwrap(); + child_array[i] = Some(Arc::new(SpinLock::new(frame))); + root_frame.map_count += 1; + } + } + } +} + +impl<'a, M: PageTableMode, E: PageTableEntryTrait, C: PageTableConstsTrait> PageTable +where + [(); C::NR_ENTRIES_PER_FRAME]:, + [(); C::NR_LEVELS]:, +{ + /// Create a new empty page table. Useful for the kernel page table and IOMMU page tables only. + pub(crate) fn empty() -> Self { + PageTable { + root_frame: Arc::new(SpinLock::new(PageTableFrame::::new())), + _phantom: PhantomData, + } + } + + /// The physical address of the root page table. + pub(crate) fn root_paddr(&self) -> Paddr { + self.root_frame.lock().inner.start_paddr() + } + + /// Translate a virtual address to a physical address using the page table. + pub(crate) fn translate(&self, vaddr: Vaddr) -> Option { + // Safety: The root frame is a valid page table frame so the address is valid. + unsafe { page_walk::(self.root_paddr(), vaddr) } + } + + pub(crate) unsafe fn map_frame_unchecked( + &mut self, + vaddr: Vaddr, + frame: &VmFrame, + prop: MapProperty, + ) { + self.cursor(vaddr) + .map(PAGE_SIZE, Some((frame.start_paddr(), prop))); + } + + pub(crate) unsafe fn map_frames_unchecked( + &mut self, + vaddr: Vaddr, + frames: &VmFrameVec, + prop: MapProperty, + ) { + let mut cursor = self.cursor(vaddr); + for frame in frames.iter() { + cursor.map(PAGE_SIZE, Some((frame.start_paddr(), prop))); + } + } + + pub(crate) unsafe fn map_unchecked( + &mut self, + vaddr: &Range, + paddr: &Range, + prop: MapProperty, + ) { + self.cursor(vaddr.start) + .map(vaddr.len(), Some((paddr.start, prop))); + } + + pub(crate) unsafe fn unmap_unchecked(&mut self, vaddr: &Range) { + self.cursor(vaddr.start).map(vaddr.len(), None); + } + + pub(crate) unsafe fn protect_unchecked( + &mut self, + vaddr: &Range, + op: impl MapOp, + ) -> Result<(), PageTableError> { + self.cursor(vaddr.start).protect(vaddr.len(), op) + } + + pub(crate) fn query( + &'a self, + vaddr: &Range, + ) -> Result, PageTableError> { + if vaddr.start % C::BASE_PAGE_SIZE != 0 || vaddr.end % C::BASE_PAGE_SIZE != 0 { + return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + } + if !range_contains(&M::VADDR_RANGE, vaddr) { + return Err(PageTableError::InvalidVaddrRange(vaddr.clone())); + } + Ok(PageTableIter::new(self, vaddr)) + } + + pub(crate) unsafe fn activate_unchecked(&self) { + activate_page_table(self.root_paddr(), CachePolicy::Writeback); + } + + /// Create a new cursor for the page table initialized at the given virtual address. + fn cursor(&self, va: usize) -> PageTableCursor<'a, M, E, C> { + PageTableCursor::new(self, va) + } + + /// Create a new reference to the same page table. + /// The caller must ensure that the kernel page table is not copied. + /// This is only useful for IOMMU page tables. Think twice before using it in other cases. + pub(crate) unsafe fn shallow_copy(&self) -> Self { + PageTable { + root_frame: self.root_frame.clone(), + _phantom: PhantomData, + } + } +} + +/// A software emulation of the MMU address translation process. +/// It returns the physical address of the given virtual address if a valid mapping +/// exists for the given virtual address. +/// +/// # Safety +/// +/// The caller must ensure that the root_paddr is a valid pointer to the root +/// page table frame. +pub(super) unsafe fn page_walk( + root_paddr: Paddr, + vaddr: Vaddr, +) -> Option { + let mut cur_level = C::NR_LEVELS; + let mut cur_pte = { + let frame_addr = paddr_to_vaddr(root_paddr); + let offset = C::in_frame_index(vaddr, cur_level); + // Safety: The offset does not exceed the value of PAGE_SIZE. + unsafe { &*(frame_addr as *const E).add(offset) } + }; + + while cur_level > 1 { + if !cur_pte.is_valid() { + return None; + } + if cur_pte.is_huge() { + debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL); + break; + } + cur_level -= 1; + cur_pte = { + let frame_addr = paddr_to_vaddr(cur_pte.paddr()); + let offset = C::in_frame_index(vaddr, cur_level); + // Safety: The offset does not exceed the value of PAGE_SIZE. + unsafe { &*(frame_addr as *const E).add(offset) } + }; + } + + if cur_pte.is_valid() { + Some(cur_pte.paddr() + (vaddr & (C::page_size(cur_level) - 1))) + } else { + None + } +} + +fn range_contains>(parent: &Range, child: &Range) -> bool { + parent.start <= child.start && parent.end >= child.end +} diff --git a/framework/aster-frame/src/vm/page_table/properties.rs b/framework/aster-frame/src/vm/page_table/properties.rs new file mode 100644 index 000000000..2aef17b02 --- /dev/null +++ b/framework/aster-frame/src/vm/page_table/properties.rs @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::fmt::Debug; + +use pod::Pod; + +use crate::vm::{Paddr, Vaddr, VmPerm}; + +/// A minimal set of constants that determines the flags of the page table. +/// This provides an abstraction over most paging modes in common architectures. +pub trait PageTableConstsTrait: Debug + 'static { + /// The smallest page size. + const BASE_PAGE_SIZE: usize; + + /// The number of levels in the page table. + /// The level 1 is the leaf level, and the level `NR_LEVELS` is the root level. + const NR_LEVELS: usize; + + /// The highest level that a PTE can be directly used to translate a VA. + /// This affects the the largest page size supported by the page table. + const HIGHEST_TRANSLATION_LEVEL: usize; + + /// The size of a PTE. + const ENTRY_SIZE: usize; + + // Here are some const values that are determined by the page table constants. + + /// The number of PTEs per page table frame. + const NR_ENTRIES_PER_FRAME: usize = Self::BASE_PAGE_SIZE / Self::ENTRY_SIZE; + + /// The number of bits used to index a PTE in a page table frame. + const IN_FRAME_INDEX_BITS: usize = Self::NR_ENTRIES_PER_FRAME.ilog2() as usize; + + /// The index of a VA's PTE in a page table frame at the given level. + fn in_frame_index(va: Vaddr, level: usize) -> usize { + va >> (Self::BASE_PAGE_SIZE.ilog2() as usize + Self::IN_FRAME_INDEX_BITS * (level - 1)) + & (Self::NR_ENTRIES_PER_FRAME - 1) + } + + /// The page size at a given level. + fn page_size(level: usize) -> usize { + Self::BASE_PAGE_SIZE << (Self::IN_FRAME_INDEX_BITS * (level - 1)) + } +} + +bitflags::bitflags! { + /// The status of a memory mapping recorded by the hardware. + pub struct MapStatus: u32 { + const ACCESSED = 0b0000_0001; + const DIRTY = 0b0000_0010; + } +} + +// TODO: Make it more abstract when supporting other architectures. +/// A type to control the cacheability of the main memory. +/// +/// The type currently follows the definition as defined by the AMD64 manual. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum CachePolicy { + /// Uncacheable (UC). + /// + /// Reads from, and writes to, UC memory are not cacheable. + /// Reads from UC memory cannot be speculative. + /// Write-combining to UC memory is not allowed. + /// Reads from or writes to UC memory cause the write buffers to be written to memory + /// and be invalidated prior to the access to UC memory. + /// + /// The UC memory type is useful for memory-mapped I/O devices + /// where strict ordering of reads and writes is important. + Uncacheable, + /// Write-Combining (WC). + /// + /// Reads from, and writes to, WC memory are not cacheable. + /// Reads from WC memory can be speculative. + /// + /// Writes to this memory type can be combined internally by the processor + /// and written to memory as a single write operation to reduce memory accesses. + /// + /// The WC memory type is useful for graphics-display memory buffers + /// where the order of writes is not important. + WriteCombining, + /// Write-Protect (WP). + /// + /// Reads from WP memory are cacheable and allocate cache lines on a read miss. + /// Reads from WP memory can be speculative. + /// + /// Writes to WP memory that hit in the cache do not update the cache. + /// Instead, all writes update memory (write to memory), + /// and writes that hit in the cache invalidate the cache line. + /// Write buffering of WP memory is allowed. + /// + /// The WP memory type is useful for shadowed-ROM memory + /// where updates must be immediately visible to all devices that read the shadow locations. + WriteProtected, + /// Writethrough (WT). + /// + /// Reads from WT memory are cacheable and allocate cache lines on a read miss. + /// Reads from WT memory can be speculative. + /// + /// All writes to WT memory update main memory, + /// and writes that hit in the cache update the cache line. + /// Writes that miss the cache do not allocate a cache line. + /// Write buffering of WT memory is allowed. + Writethrough, + /// Writeback (WB). + /// + /// The WB memory is the "normal" memory. See detailed descriptions in the manual. + /// + /// This type of memory provides the highest-possible performance + /// and is useful for most software and data stored in system memory (DRAM). + Writeback, +} + +#[derive(Clone, Copy, Debug)] +pub struct MapProperty { + pub perm: VmPerm, + pub cache: CachePolicy, +} + +/// Any functions that could be used to modify the map property of a memory mapping. +pub trait MapOp: Fn(MapInfo) -> MapProperty {} +impl MapOp for F where F: Fn(MapInfo) -> MapProperty {} + +// These implementations allow a property to be used as an overriding map operation. +// Other usages seems pointless. +impl FnOnce<(MapInfo,)> for MapProperty { + type Output = MapProperty; + extern "rust-call" fn call_once(self, _: (MapInfo,)) -> MapProperty { + self + } +} +impl FnMut<(MapInfo,)> for MapProperty { + extern "rust-call" fn call_mut(&mut self, _: (MapInfo,)) -> MapProperty { + *self + } +} +impl Fn<(MapInfo,)> for MapProperty { + extern "rust-call" fn call(&self, _: (MapInfo,)) -> MapProperty { + *self + } +} + +impl MapProperty { + pub fn new_invalid() -> Self { + Self { + perm: VmPerm::empty(), + cache: CachePolicy::Uncacheable, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct MapInfo { + pub prop: MapProperty, + pub status: MapStatus, +} + +pub trait PageTableEntryTrait: Clone + Copy + Sized + Pod + Debug { + /// Create a new invalid page table flags that causes page faults + /// when the MMU meets them. + fn new_invalid() -> Self; + /// If the flags are valid. + /// Note that the invalid PTE may be _valid_ in representation, but + /// just causing page faults when the MMU meets them. + fn is_valid(&self) -> bool; + + /// Create a new PTE with the given physical address and flags. + /// The huge flag indicates that the PTE maps a huge page. + /// The last flag indicates that the PTE is the last level page table. + /// If the huge and last flags are both false, the PTE maps a page + /// table frame. + fn new(paddr: Paddr, prop: MapProperty, huge: bool, last: bool) -> Self; + + /// Get the physical address from the PTE. + /// The physical address recorded in the PTE is either: + /// - the physical address of the next level page table; + /// - or the physical address of the page frame it maps to. + fn paddr(&self) -> Paddr; + + fn info(&self) -> MapInfo; + + /// If the PTE maps a huge page or a page table frame. + fn is_huge(&self) -> bool; +} diff --git a/framework/aster-frame/src/vm/space.rs b/framework/aster-frame/src/vm/space.rs index 9727bfad0..b510bd183 100644 --- a/framework/aster-frame/src/vm/space.rs +++ b/framework/aster-frame/src/vm/space.rs @@ -4,8 +4,14 @@ use core::ops::Range; use bitflags::bitflags; -use super::{is_page_aligned, MapArea, MemorySet, VmFrameVec, VmIo}; -use crate::{arch::mm::PageTableFlags, prelude::*, sync::Mutex, vm::PAGE_SIZE, Error}; +use super::{is_page_aligned, page_table::CachePolicy, MapArea, MemorySet, VmFrameVec, VmIo}; +use crate::{ + arch::mm::PageTableFlags, + prelude::*, + sync::Mutex, + vm::{page_table::MapProperty, PAGE_SIZE}, + Error, +}; /// Virtual memory space. /// @@ -31,15 +37,9 @@ impl VmSpace { memory_set: Arc::new(Mutex::new(MemorySet::new())), } } - - /// Activate the page table, load root physical address to cr3 - #[allow(clippy::missing_safety_doc)] - pub unsafe fn activate(&self) { - #[cfg(target_arch = "x86_64")] - crate::arch::x86::mm::activate_page_table( - self.memory_set.lock().pt.root_paddr(), - x86_64::registers::control::Cr3Flags::PAGE_LEVEL_CACHE_DISABLE, - ); + /// Activate the page table. + pub fn activate(&self) { + self.memory_set.lock().pt.activate(); } /// Maps some physical memory pages into the VM space according to the given @@ -49,7 +49,6 @@ impl VmSpace { /// /// For more information, see `VmMapOptions`. pub fn map(&self, frames: VmFrameVec, options: &VmMapOptions) -> Result { - let flags = PageTableFlags::from(options.perm); if options.addr.is_none() { return Err(Error::InvalidArgs); } @@ -75,7 +74,15 @@ impl VmSpace { for (idx, frame) in frames.into_iter().enumerate() { let addr = base_addr + idx * PAGE_SIZE; let frames = VmFrameVec::from_one_frame(frame); - memory_set.map(MapArea::new(addr, PAGE_SIZE, flags, frames)); + memory_set.map(MapArea::new( + addr, + PAGE_SIZE, + MapProperty { + perm: options.perm, + cache: CachePolicy::Writeback, + }, + frames, + )); } Ok(base_addr) @@ -90,15 +97,15 @@ impl VmSpace { /// Determine whether the target `vaddr` is writable based on the page table. pub fn is_writable(&self, vaddr: Vaddr) -> bool { let memory_set = self.memory_set.lock(); - let flags = memory_set.flags(vaddr); - flags.is_some_and(|flags| flags.contains(PageTableFlags::WRITABLE)) + let info = memory_set.info(vaddr); + info.is_some_and(|info| info.prop.perm.contains(VmPerm::W)) } /// Determine whether the target `vaddr` is executable based on the page table. pub fn is_executable(&self, vaddr: Vaddr) -> bool { let memory_set = self.memory_set.lock(); - let flags = memory_set.flags(vaddr); - flags.is_some_and(|flags| !flags.contains(PageTableFlags::NO_EXECUTE)) + let info = memory_set.info(vaddr); + info.is_some_and(|info| info.prop.perm.contains(VmPerm::X)) } /// Unmaps the physical memory pages within the VM address range. @@ -136,10 +143,15 @@ impl VmSpace { debug_assert!(range.end % PAGE_SIZE == 0); let start_page = range.start / PAGE_SIZE; let end_page = range.end / PAGE_SIZE; - let flags = PageTableFlags::from(perm); for page_idx in start_page..end_page { let addr = page_idx * PAGE_SIZE; - self.memory_set.lock().protect(addr, flags) + self.memory_set.lock().protect( + addr, + MapProperty { + perm, + cache: CachePolicy::Writeback, + }, + ) } Ok(()) } @@ -253,8 +265,11 @@ bitflags! { const W = 0b00000010; /// Executable. const X = 0b00000100; - /// User + /// User accessible. const U = 0b00001000; + /// Global. + /// A global page is not evicted from the TLB when TLB is flushed. + const G = 0b00010000; /// Readable + writable. const RW = Self::R.bits | Self::W.bits; /// Readable + execuable. diff --git a/kernel/aster-nix/src/vm/perms.rs b/kernel/aster-nix/src/vm/perms.rs index fdb2c69c2..cfef199b8 100644 --- a/kernel/aster-nix/src/vm/perms.rs +++ b/kernel/aster-nix/src/vm/perms.rs @@ -66,7 +66,7 @@ impl From for VmPerms { impl From for VmPerm { fn from(perms: VmPerms) -> Self { - let mut perm = VmPerm::empty(); + let mut perm = VmPerm::U; if perms.contains(VmPerms::READ) { perm |= VmPerm::R; }