From e7e613db4ed8dedc088e3424e7f2677bffdae166 Mon Sep 17 00:00:00 2001 From: Yuke Peng Date: Fri, 22 Mar 2024 12:19:42 +0800 Subject: [PATCH] Implement kernel page fault handler --- framework/aster-frame/src/arch/x86/cpu.rs | 28 +++++ framework/aster-frame/src/arch/x86/mm/mod.rs | 27 ++--- framework/aster-frame/src/trap/handler.rs | 114 ++++++++++++++----- framework/aster-frame/src/vm/memory_set.rs | 4 +- framework/aster-frame/src/vm/page_table.rs | 57 ++++++---- 5 files changed, 165 insertions(+), 65 deletions(-) diff --git a/framework/aster-frame/src/arch/x86/cpu.rs b/framework/aster-frame/src/arch/x86/cpu.rs index fb2ad1199..e7d9d2101 100644 --- a/framework/aster-frame/src/arch/x86/cpu.rs +++ b/framework/aster-frame/src/arch/x86/cpu.rs @@ -8,6 +8,7 @@ use core::{ fmt::Debug, }; +use bitflags::bitflags; use bitvec::{ prelude::{BitVec, Lsb0}, slice::IterOnes, @@ -398,6 +399,33 @@ define_cpu_exception!( [RESERVED_31 = 31, Reserved] ); +bitflags! { + /// Page Fault error code. Following the Intel Architectures Software Developer's Manual Volume 3 + pub struct PageFaultErrorCode : usize{ + /// 0 if no translation for the linear address. + const PRESENT = 1 << 0; + /// 1 if the access was a write. + const WRITE = 1 << 1; + /// 1 if the access was a user-mode access. + const USER = 1 << 2; + /// 1 if there is no translation for the linear address + /// because a reserved bit was set. + const RESERVED = 1 << 3; + /// 1 if the access was an instruction fetch. + const INSTRUCTION = 1 << 4; + /// 1 if the access was a data access to a linear address with a protection key for which + /// the protection-key rights registers disallow access. + const PROTECTION = 1 << 5; + /// 1 if the access was a shadow-stack access. + const SHADOW_STACK = 1 << 6; + /// 1 if there is no translation for the linear address using HLAT paging. + const HLAT = 1 << 7; + /// 1 if the exception is unrelated to paging and resulted from violation of SGX-specific + /// access-control requirements. + const SGX = 1 << 15; + } +} + impl CpuException { pub fn is_cpu_exception(trap_num: u16) -> bool { trap_num < EXCEPTION_LIST.len() as u16 diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index dc52a9bb4..3e3fa3c29 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -3,14 +3,12 @@ use alloc::{collections::BTreeMap, fmt}; use pod::Pod; +use spin::Once; use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr}; -use crate::{ - sync::Mutex, - vm::{ - page_table::{table_of, PageTableEntryTrait, PageTableFlagsTrait}, - Paddr, Vaddr, - }, +use crate::vm::{ + page_table::{table_of, PageTableEntryTrait, PageTableFlagsTrait}, + Paddr, Vaddr, }; pub(crate) const NR_ENTRIES_PER_PAGE: usize = 512; @@ -79,9 +77,9 @@ pub unsafe fn activate_page_table(root_paddr: Paddr, flags: x86_64::registers::c ); } -pub static ALL_MAPPED_PTE: Mutex> = Mutex::new(BTreeMap::new()); +pub(crate) static INIT_MAPPED_PTE: Once> = Once::new(); -pub fn init() { +pub(crate) fn init() { let (page_directory_base, _) = x86_64::registers::control::Cr3::read(); let page_directory_base = page_directory_base.start_address().as_u64() as usize; @@ -89,12 +87,15 @@ pub fn init() { let p4 = unsafe { table_of::(page_directory_base).unwrap() }; // Cancel mapping in lowest addresses. p4[0].clear(); - let mut map_pte = ALL_MAPPED_PTE.lock(); - for (i, p4_i) in p4.iter().enumerate().take(512) { - if p4_i.flags().contains(PageTableFlags::PRESENT) { - map_pte.insert(i, *p4_i); + INIT_MAPPED_PTE.call_once(|| { + let mut mapped_pte = BTreeMap::new(); + for (i, p4_i) in p4.iter().enumerate().take(512) { + if p4_i.flags().contains(PageTableFlags::PRESENT) { + mapped_pte.insert(i, *p4_i); + } } - } + mapped_pte + }); } impl PageTableFlagsTrait for PageTableFlags { diff --git a/framework/aster-frame/src/trap/handler.rs b/framework/aster-frame/src/trap/handler.rs index dbea33f89..a3df0c563 100644 --- a/framework/aster-frame/src/trap/handler.rs +++ b/framework/aster-frame/src/trap/handler.rs @@ -2,6 +2,7 @@ use core::sync::atomic::{AtomicBool, Ordering}; +use log::debug; #[cfg(feature = "intel_tdx")] use tdx_guest::tdcall; use trapframe::TrapFrame; @@ -13,9 +14,19 @@ use crate::arch::{ mm::PageTableFlags, tdx_guest::{handle_virtual_exception, TdxTrapFrame}, }; -#[cfg(feature = "intel_tdx")] -use crate::vm::{page_table::KERNEL_PAGE_TABLE, vaddr_to_paddr}; -use crate::{arch::irq::IRQ_LIST, cpu::CpuException, cpu_local}; +use crate::{ + arch::{ + irq::IRQ_LIST, + mm::{is_kernel_vaddr, PageTableEntry, PageTableFlags}, + }, + boot::memory_region::MemoryRegion, + cpu::{CpuException, PageFaultErrorCode, PAGE_FAULT}, + cpu_local, + vm::{ + page_table::PageTableFlagsTrait, + PageTable, PHYS_MEM_BASE_VADDR, + }, +}; #[cfg(feature = "intel_tdx")] impl TdxTrapFrame for TrapFrame { @@ -121,32 +132,22 @@ impl TdxTrapFrame for TrapFrame { #[no_mangle] extern "sysv64" fn trap_handler(f: &mut TrapFrame) { if CpuException::is_cpu_exception(f.trap_num as u16) { - const VIRTUALIZATION_EXCEPTION: u16 = 20; - const PAGE_FAULT: u16 = 14; - #[cfg(feature = "intel_tdx")] - if f.trap_num as u16 == VIRTUALIZATION_EXCEPTION { - let ve_info = tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n"); - handle_virtual_exception(f, &ve_info); - return; - } - #[cfg(feature = "intel_tdx")] - if f.trap_num as u16 == PAGE_FAULT { - let mut pt = KERNEL_PAGE_TABLE.get().unwrap().lock(); - // Safety: Map virtio addr when set shared bit in a TD. Only add the `PageTableFlags::SHARED` flag. - unsafe { - let page_fault_vaddr = x86::controlregs::cr2(); - let _ = pt.map( - page_fault_vaddr, - vaddr_to_paddr(page_fault_vaddr).unwrap(), - PageTableFlags::SHARED | PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + match CpuException::to_cpu_exception(f.trap_num as u16).unwrap() { + #[cfg(feature = "intel_tdx")] + &VIRTUALIZATION_EXCEPTION => { + let ve_info = tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n"); + handle_virtual_exception(f, &ve_info); + } + &PAGE_FAULT => { + handle_kernel_page_fault(f); + } + exception => { + panic!( + "Cannot handle kernel cpu exception:{:?}. Error code:{:x?}; Trapframe:{:#x?}.", + exception, f.error_code, f ); - }; - return; + } } - panic!( - "cannot handle this kernel cpu fault now, information:{:#x?}", - f - ); } else { call_irq_callback_functions(f); } @@ -182,3 +183,62 @@ cpu_local! { pub fn in_interrupt_context() -> bool { IN_INTERRUPT_CONTEXT.load(Ordering::Acquire) } + +fn handle_kernel_page_fault(f: &TrapFrame) { + // We only create mapping: `vaddr = paddr + PHYS_OFFSET` in kernel page fault handler. + let page_fault_vaddr = x86_64::registers::control::Cr2::read().as_u64(); + debug_assert!(is_kernel_vaddr(page_fault_vaddr as usize)); + + // Check kernel region + // FIXME: The modification to the offset mapping of the kernel code and data should not permitted. + debug_assert!({ + let kernel_region = MemoryRegion::kernel(); + let start = kernel_region.base(); + let end = start + kernel_region.base() + kernel_region.len(); + !((start..end).contains(&(page_fault_vaddr as usize))) + }); + + // Check error code and construct flags + // FIXME: The `PageFaultErrorCode` may not be the same on other platforms such as RISC-V. + let error_code = PageFaultErrorCode::from_bits_truncate(f.error_code); + debug!( + "Handling kernel page fault. Page fault address:{:x?}; Error code:{:?}", + page_fault_vaddr, error_code + ); + debug_assert!(!error_code.contains(PageFaultErrorCode::USER)); + debug_assert!(!error_code.contains(PageFaultErrorCode::INSTRUCTION)); + let mut flags = PageTableFlags::empty() + .set_present(true) + .set_executable(false); + #[cfg(feature = "intel_tdx")] + { + // FIXME: Adding shared bit directly will have security issues. + flags = flags | PageTableFlags::SHARED; + } + if error_code.contains(PageFaultErrorCode::WRITE) { + flags = flags.set_writable(true); + } + + // Handle page fault + let mut page_table: PageTable = + unsafe { PageTable::from_root_register() }; + if error_code.contains(PageFaultErrorCode::PRESENT) { + // FIXME: We should define the initialize mapping and the protect method here should not change the + // permission of the initialize mapping. + // + // Safety: The page fault address has been checked and the flags is constructed based on error code. + unsafe { + page_table + .protect(page_fault_vaddr as usize, flags) + .unwrap(); + } + } else { + // Safety: The page fault address has been checked and the flags is constructed based on error code. + let paddr = page_fault_vaddr as usize - PHYS_MEM_BASE_VADDR; + unsafe { + page_table + .map(page_fault_vaddr as usize, paddr, flags) + .unwrap(); + } + } +} diff --git a/framework/aster-frame/src/vm/memory_set.rs b/framework/aster-frame/src/vm/memory_set.rs index 7c2a27cac..36323fd6b 100644 --- a/framework/aster-frame/src/vm/memory_set.rs +++ b/framework/aster-frame/src/vm/memory_set.rs @@ -5,7 +5,7 @@ use core::fmt; use super::page_table::{PageTable, PageTableConfig, UserMode}; use crate::{ - arch::mm::{PageTableEntry, PageTableFlags}, + arch::mm::{PageTableEntry, PageTableFlags, INIT_MAPPED_PTE}, prelude::*, vm::{ is_page_aligned, VmAllocOptions, VmFrame, VmFrameVec, VmReader, VmWriter, PAGE_SIZE, @@ -179,7 +179,7 @@ impl MemorySet { let mut page_table = PageTable::::new(PageTableConfig { address_width: super::page_table::AddressWidth::Level4, }); - let mapped_pte = crate::arch::mm::ALL_MAPPED_PTE.lock(); + let mapped_pte = INIT_MAPPED_PTE.get().unwrap(); for (index, pte) in mapped_pte.iter() { // Safety: These PTEs are all valid PTEs fetched from the initial page table during memory initialization. unsafe { diff --git a/framework/aster-frame/src/vm/page_table.rs b/framework/aster-frame/src/vm/page_table.rs index ab0ad9bae..030de390f 100644 --- a/framework/aster-frame/src/vm/page_table.rs +++ b/framework/aster-frame/src/vm/page_table.rs @@ -171,6 +171,18 @@ impl PageTable { // Safety: The vaddr belongs to user mode program and does not affect the kernel mapping. unsafe { self.do_protect(vaddr, flags) } } + + /// Add a new mapping directly in the root page table. + /// + /// # Safety + /// + /// User must guarantee the validity of the PTE. + pub(crate) unsafe fn add_root_mapping(&mut self, index: usize, pte: &T) { + debug_assert!((index + 1) * size_of::() <= PAGE_SIZE); + // Safety: The root_paddr is refer to the root of a valid page table. + let root_ptes: &mut [T] = table_of(self.root_paddr).unwrap(); + root_ptes[index] = *pte; + } } impl PageTable { @@ -256,19 +268,6 @@ impl PageTable { } impl PageTable { - /// Add a new mapping directly in the root page table. - /// - /// # Safety - /// - /// User must guarantee the validity of the PTE. - /// - pub unsafe fn add_root_mapping(&mut self, index: usize, pte: &T) { - debug_assert!((index + 1) * size_of::() <= PAGE_SIZE); - // Safety: The root_paddr is refer to the root of a valid page table. - let root_ptes: &mut [T] = table_of(self.root_paddr).unwrap(); - root_ptes[index] = *pte; - } - /// Mapping `vaddr` to `paddr` with flags. /// /// # Safety @@ -384,6 +383,25 @@ impl PageTable { Ok(old_flags) } + /// Construct a page table instance from root registers (CR3 in x86) + /// + /// # Safety + /// + /// This function bypasses Rust's ownership model and directly constructs an instance of a + /// page table. + pub(crate) unsafe fn from_root_register() -> Self { + #[cfg(target_arch = "x86_64")] + let (page_directory_base, _) = x86_64::registers::control::Cr3::read(); + PageTable { + root_paddr: page_directory_base.start_address().as_u64() as usize, + tables: Vec::new(), + config: PageTableConfig { + address_width: AddressWidth::Level4, + }, + _phantom: PhantomData, + } + } + pub fn flags(&mut self, vaddr: Vaddr) -> Option { let last_entry = self.page_walk(vaddr, false)?; Some(last_entry.flags()) @@ -420,15 +438,8 @@ pub fn vaddr_to_paddr(vaddr: Vaddr) -> Option { pub fn init() { KERNEL_PAGE_TABLE.call_once(|| { - #[cfg(target_arch = "x86_64")] - let (page_directory_base, _) = x86_64::registers::control::Cr3::read(); - SpinLock::new(PageTable { - root_paddr: page_directory_base.start_address().as_u64() as usize, - tables: Vec::new(), - config: PageTableConfig { - address_width: AddressWidth::Level4, - }, - _phantom: PhantomData, - }) + // Safety: The `KERENL_PAGE_TABLE` is the only page table that is used to modify the initialize + // mapping. + SpinLock::new(unsafe { PageTable::from_root_register() }) }); }