Implement kernel page fault handler

This commit is contained in:
Yuke Peng 2024-03-22 12:19:42 +08:00 committed by Tate, Hongliang Tian
parent 97974c9cdc
commit e7e613db4e
5 changed files with 165 additions and 65 deletions

View File

@ -8,6 +8,7 @@ use core::{
fmt::Debug,
};
use bitflags::bitflags;
use bitvec::{
prelude::{BitVec, Lsb0},
slice::IterOnes,
@ -398,6 +399,33 @@ define_cpu_exception!(
[RESERVED_31 = 31, Reserved]
);
bitflags! {
/// Page Fault error code. Following the Intel Architectures Software Developer's Manual Volume 3
pub struct PageFaultErrorCode : usize{
/// 0 if no translation for the linear address.
const PRESENT = 1 << 0;
/// 1 if the access was a write.
const WRITE = 1 << 1;
/// 1 if the access was a user-mode access.
const USER = 1 << 2;
/// 1 if there is no translation for the linear address
/// because a reserved bit was set.
const RESERVED = 1 << 3;
/// 1 if the access was an instruction fetch.
const INSTRUCTION = 1 << 4;
/// 1 if the access was a data access to a linear address with a protection key for which
/// the protection-key rights registers disallow access.
const PROTECTION = 1 << 5;
/// 1 if the access was a shadow-stack access.
const SHADOW_STACK = 1 << 6;
/// 1 if there is no translation for the linear address using HLAT paging.
const HLAT = 1 << 7;
/// 1 if the exception is unrelated to paging and resulted from violation of SGX-specific
/// access-control requirements.
const SGX = 1 << 15;
}
}
impl CpuException {
pub fn is_cpu_exception(trap_num: u16) -> bool {
trap_num < EXCEPTION_LIST.len() as u16

View File

@ -3,14 +3,12 @@
use alloc::{collections::BTreeMap, fmt};
use pod::Pod;
use spin::Once;
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
use crate::{
sync::Mutex,
vm::{
page_table::{table_of, PageTableEntryTrait, PageTableFlagsTrait},
Paddr, Vaddr,
},
use crate::vm::{
page_table::{table_of, PageTableEntryTrait, PageTableFlagsTrait},
Paddr, Vaddr,
};
pub(crate) const NR_ENTRIES_PER_PAGE: usize = 512;
@ -79,9 +77,9 @@ pub unsafe fn activate_page_table(root_paddr: Paddr, flags: x86_64::registers::c
);
}
pub static ALL_MAPPED_PTE: Mutex<BTreeMap<usize, PageTableEntry>> = Mutex::new(BTreeMap::new());
pub(crate) static INIT_MAPPED_PTE: Once<BTreeMap<usize, PageTableEntry>> = Once::new();
pub fn init() {
pub(crate) fn init() {
let (page_directory_base, _) = x86_64::registers::control::Cr3::read();
let page_directory_base = page_directory_base.start_address().as_u64() as usize;
@ -89,12 +87,15 @@ pub fn init() {
let p4 = unsafe { table_of::<PageTableEntry>(page_directory_base).unwrap() };
// Cancel mapping in lowest addresses.
p4[0].clear();
let mut map_pte = ALL_MAPPED_PTE.lock();
for (i, p4_i) in p4.iter().enumerate().take(512) {
if p4_i.flags().contains(PageTableFlags::PRESENT) {
map_pte.insert(i, *p4_i);
INIT_MAPPED_PTE.call_once(|| {
let mut mapped_pte = BTreeMap::new();
for (i, p4_i) in p4.iter().enumerate().take(512) {
if p4_i.flags().contains(PageTableFlags::PRESENT) {
mapped_pte.insert(i, *p4_i);
}
}
}
mapped_pte
});
}
impl PageTableFlagsTrait for PageTableFlags {

View File

@ -2,6 +2,7 @@
use core::sync::atomic::{AtomicBool, Ordering};
use log::debug;
#[cfg(feature = "intel_tdx")]
use tdx_guest::tdcall;
use trapframe::TrapFrame;
@ -13,9 +14,19 @@ use crate::arch::{
mm::PageTableFlags,
tdx_guest::{handle_virtual_exception, TdxTrapFrame},
};
#[cfg(feature = "intel_tdx")]
use crate::vm::{page_table::KERNEL_PAGE_TABLE, vaddr_to_paddr};
use crate::{arch::irq::IRQ_LIST, cpu::CpuException, cpu_local};
use crate::{
arch::{
irq::IRQ_LIST,
mm::{is_kernel_vaddr, PageTableEntry, PageTableFlags},
},
boot::memory_region::MemoryRegion,
cpu::{CpuException, PageFaultErrorCode, PAGE_FAULT},
cpu_local,
vm::{
page_table::PageTableFlagsTrait,
PageTable, PHYS_MEM_BASE_VADDR,
},
};
#[cfg(feature = "intel_tdx")]
impl TdxTrapFrame for TrapFrame {
@ -121,32 +132,22 @@ impl TdxTrapFrame for TrapFrame {
#[no_mangle]
extern "sysv64" fn trap_handler(f: &mut TrapFrame) {
if CpuException::is_cpu_exception(f.trap_num as u16) {
const VIRTUALIZATION_EXCEPTION: u16 = 20;
const PAGE_FAULT: u16 = 14;
#[cfg(feature = "intel_tdx")]
if f.trap_num as u16 == VIRTUALIZATION_EXCEPTION {
let ve_info = tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n");
handle_virtual_exception(f, &ve_info);
return;
}
#[cfg(feature = "intel_tdx")]
if f.trap_num as u16 == PAGE_FAULT {
let mut pt = KERNEL_PAGE_TABLE.get().unwrap().lock();
// Safety: Map virtio addr when set shared bit in a TD. Only add the `PageTableFlags::SHARED` flag.
unsafe {
let page_fault_vaddr = x86::controlregs::cr2();
let _ = pt.map(
page_fault_vaddr,
vaddr_to_paddr(page_fault_vaddr).unwrap(),
PageTableFlags::SHARED | PageTableFlags::PRESENT | PageTableFlags::WRITABLE,
match CpuException::to_cpu_exception(f.trap_num as u16).unwrap() {
#[cfg(feature = "intel_tdx")]
&VIRTUALIZATION_EXCEPTION => {
let ve_info = tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n");
handle_virtual_exception(f, &ve_info);
}
&PAGE_FAULT => {
handle_kernel_page_fault(f);
}
exception => {
panic!(
"Cannot handle kernel cpu exception:{:?}. Error code:{:x?}; Trapframe:{:#x?}.",
exception, f.error_code, f
);
};
return;
}
}
panic!(
"cannot handle this kernel cpu fault now, information:{:#x?}",
f
);
} else {
call_irq_callback_functions(f);
}
@ -182,3 +183,62 @@ cpu_local! {
pub fn in_interrupt_context() -> bool {
IN_INTERRUPT_CONTEXT.load(Ordering::Acquire)
}
fn handle_kernel_page_fault(f: &TrapFrame) {
// We only create mapping: `vaddr = paddr + PHYS_OFFSET` in kernel page fault handler.
let page_fault_vaddr = x86_64::registers::control::Cr2::read().as_u64();
debug_assert!(is_kernel_vaddr(page_fault_vaddr as usize));
// Check kernel region
// FIXME: The modification to the offset mapping of the kernel code and data should not permitted.
debug_assert!({
let kernel_region = MemoryRegion::kernel();
let start = kernel_region.base();
let end = start + kernel_region.base() + kernel_region.len();
!((start..end).contains(&(page_fault_vaddr as usize)))
});
// Check error code and construct flags
// FIXME: The `PageFaultErrorCode` may not be the same on other platforms such as RISC-V.
let error_code = PageFaultErrorCode::from_bits_truncate(f.error_code);
debug!(
"Handling kernel page fault. Page fault address:{:x?}; Error code:{:?}",
page_fault_vaddr, error_code
);
debug_assert!(!error_code.contains(PageFaultErrorCode::USER));
debug_assert!(!error_code.contains(PageFaultErrorCode::INSTRUCTION));
let mut flags = PageTableFlags::empty()
.set_present(true)
.set_executable(false);
#[cfg(feature = "intel_tdx")]
{
// FIXME: Adding shared bit directly will have security issues.
flags = flags | PageTableFlags::SHARED;
}
if error_code.contains(PageFaultErrorCode::WRITE) {
flags = flags.set_writable(true);
}
// Handle page fault
let mut page_table: PageTable<PageTableEntry, crate::vm::page_table::KernelMode> =
unsafe { PageTable::from_root_register() };
if error_code.contains(PageFaultErrorCode::PRESENT) {
// FIXME: We should define the initialize mapping and the protect method here should not change the
// permission of the initialize mapping.
//
// Safety: The page fault address has been checked and the flags is constructed based on error code.
unsafe {
page_table
.protect(page_fault_vaddr as usize, flags)
.unwrap();
}
} else {
// Safety: The page fault address has been checked and the flags is constructed based on error code.
let paddr = page_fault_vaddr as usize - PHYS_MEM_BASE_VADDR;
unsafe {
page_table
.map(page_fault_vaddr as usize, paddr, flags)
.unwrap();
}
}
}

View File

@ -5,7 +5,7 @@ use core::fmt;
use super::page_table::{PageTable, PageTableConfig, UserMode};
use crate::{
arch::mm::{PageTableEntry, PageTableFlags},
arch::mm::{PageTableEntry, PageTableFlags, INIT_MAPPED_PTE},
prelude::*,
vm::{
is_page_aligned, VmAllocOptions, VmFrame, VmFrameVec, VmReader, VmWriter, PAGE_SIZE,
@ -179,7 +179,7 @@ impl MemorySet {
let mut page_table = PageTable::<PageTableEntry, UserMode>::new(PageTableConfig {
address_width: super::page_table::AddressWidth::Level4,
});
let mapped_pte = crate::arch::mm::ALL_MAPPED_PTE.lock();
let mapped_pte = INIT_MAPPED_PTE.get().unwrap();
for (index, pte) in mapped_pte.iter() {
// Safety: These PTEs are all valid PTEs fetched from the initial page table during memory initialization.
unsafe {

View File

@ -171,6 +171,18 @@ impl<T: PageTableEntryTrait> PageTable<T, UserMode> {
// Safety: The vaddr belongs to user mode program and does not affect the kernel mapping.
unsafe { self.do_protect(vaddr, flags) }
}
/// Add a new mapping directly in the root page table.
///
/// # Safety
///
/// User must guarantee the validity of the PTE.
pub(crate) unsafe fn add_root_mapping(&mut self, index: usize, pte: &T) {
debug_assert!((index + 1) * size_of::<T>() <= PAGE_SIZE);
// Safety: The root_paddr is refer to the root of a valid page table.
let root_ptes: &mut [T] = table_of(self.root_paddr).unwrap();
root_ptes[index] = *pte;
}
}
impl<T: PageTableEntryTrait> PageTable<T, KernelMode> {
@ -256,19 +268,6 @@ impl<T: PageTableEntryTrait> PageTable<T, DeviceMode> {
}
impl<T: PageTableEntryTrait, M> PageTable<T, M> {
/// Add a new mapping directly in the root page table.
///
/// # Safety
///
/// User must guarantee the validity of the PTE.
///
pub unsafe fn add_root_mapping(&mut self, index: usize, pte: &T) {
debug_assert!((index + 1) * size_of::<T>() <= PAGE_SIZE);
// Safety: The root_paddr is refer to the root of a valid page table.
let root_ptes: &mut [T] = table_of(self.root_paddr).unwrap();
root_ptes[index] = *pte;
}
/// Mapping `vaddr` to `paddr` with flags.
///
/// # Safety
@ -384,6 +383,25 @@ impl<T: PageTableEntryTrait, M> PageTable<T, M> {
Ok(old_flags)
}
/// Construct a page table instance from root registers (CR3 in x86)
///
/// # Safety
///
/// This function bypasses Rust's ownership model and directly constructs an instance of a
/// page table.
pub(crate) unsafe fn from_root_register() -> Self {
#[cfg(target_arch = "x86_64")]
let (page_directory_base, _) = x86_64::registers::control::Cr3::read();
PageTable {
root_paddr: page_directory_base.start_address().as_u64() as usize,
tables: Vec::new(),
config: PageTableConfig {
address_width: AddressWidth::Level4,
},
_phantom: PhantomData,
}
}
pub fn flags(&mut self, vaddr: Vaddr) -> Option<T::F> {
let last_entry = self.page_walk(vaddr, false)?;
Some(last_entry.flags())
@ -420,15 +438,8 @@ pub fn vaddr_to_paddr(vaddr: Vaddr) -> Option<Paddr> {
pub fn init() {
KERNEL_PAGE_TABLE.call_once(|| {
#[cfg(target_arch = "x86_64")]
let (page_directory_base, _) = x86_64::registers::control::Cr3::read();
SpinLock::new(PageTable {
root_paddr: page_directory_base.start_address().as_u64() as usize,
tables: Vec::new(),
config: PageTableConfig {
address_width: AddressWidth::Level4,
},
_phantom: PhantomData,
})
// Safety: The `KERENL_PAGE_TABLE` is the only page table that is used to modify the initialize
// mapping.
SpinLock::new(unsafe { PageTable::from_root_register() })
});
}