Retire page table implementation in EFI stub

This commit is contained in:
Ruihan Li 2025-03-04 23:52:09 +08:00 committed by Tate, Hongliang Tian
parent eb74d87bed
commit ca9ec119e4
6 changed files with 64 additions and 329 deletions

18
Cargo.lock generated
View File

@ -1038,7 +1038,6 @@ dependencies = [
name = "linux-bzimage-setup"
version = "0.12.0"
dependencies = [
"bitflags 2.6.0",
"cfg-if",
"core2",
"libflate",
@ -1048,7 +1047,6 @@ dependencies = [
"uart_16550",
"uefi",
"uefi-raw",
"x86_64 0.15.2",
"xmas-elf 0.9.1",
]
@ -1305,7 +1303,7 @@ dependencies = [
"unwinding",
"volatile 0.6.1",
"x86",
"x86_64 0.14.13",
"x86_64",
"xarray",
]
@ -1691,7 +1689,7 @@ dependencies = [
"bitflags 1.3.2",
"iced-x86",
"raw-cpuid",
"x86_64 0.14.13",
"x86_64",
]
[[package]]
@ -1949,18 +1947,6 @@ dependencies = [
"volatile 0.4.6",
]
[[package]]
name = "x86_64"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f042214de98141e9c8706e8192b73f56494087cc55ebec28ce10f26c5c364ae"
dependencies = [
"bit_field",
"bitflags 2.6.0",
"rustversion",
"volatile 0.4.6",
]
[[package]]
name = "xarray"
version = "0.1.0"

View File

@ -21,11 +21,9 @@ uart_16550 = "0.3.0"
xmas-elf = "0.9.1"
[target.x86_64-unknown-none.dependencies]
bitflags = "2.4.1"
log = "0.4.20"
uefi = { version = "0.32.0", features = ["global_allocator", "panic_handler", "logger", "qemu"]}
uefi-raw = "0.8.0"
x86_64 = "0.15.1"
tdx-guest = { version = "0.2.1", optional = true }
[features]

View File

@ -9,11 +9,7 @@ use uefi::{
};
use uefi_raw::table::system::SystemTable;
use super::{
decoder::decode_payload,
paging::{Ia32eFlags, PageNumber, PageTableCreator},
relocation::apply_rela_relocations,
};
use super::{decoder::decode_payload, relocation::apply_rela_relocations};
const PAGE_SIZE: u64 = 4096;
@ -170,64 +166,6 @@ fn efi_phase_runtime(memory_map: MemoryMapOwned, boot_params: &mut BootParams) -
}
boot_params.e820_entries = e820_entries as u8;
unsafe {
crate::console::print_str("[EFI stub] Setting up the page table.\n");
}
// Make a new linear page table. The linear page table will be stored at
// 0x4000000, hoping that the firmware will not use this area.
let mut creator = unsafe {
PageTableCreator::new(
PageNumber::from_addr(0x4000000),
PageNumber::from_addr(0x8000000),
)
};
// Map the following regions:
// - 0x0: identity map the first 4GiB;
// - 0xffff8000_00000000: linear map 4GiB to low 4 GiB;
// - 0xffffffff_80000000: linear map 2GiB to low 2 GiB;
// - 0xffff8008_00000000: linear map 1GiB to 0x00000008_00000000.
let flags = Ia32eFlags::PRESENT | Ia32eFlags::WRITABLE;
for i in 0..4 * 1024 * 1024 * 1024 / PAGE_SIZE {
let from_vpn = PageNumber::from_addr(i * PAGE_SIZE);
let from_vpn2 = PageNumber::from_addr(i * PAGE_SIZE + 0xffff8000_00000000);
let to_low_pfn = PageNumber::from_addr(i * PAGE_SIZE);
creator.map(from_vpn, to_low_pfn, flags);
creator.map(from_vpn2, to_low_pfn, flags);
}
for i in 0..2 * 1024 * 1024 * 1024 / PAGE_SIZE {
let from_vpn = PageNumber::from_addr(i * PAGE_SIZE + 0xffffffff_80000000);
let to_low_pfn = PageNumber::from_addr(i * PAGE_SIZE);
creator.map(from_vpn, to_low_pfn, flags);
}
for i in 0..1024 * 1024 * 1024 / PAGE_SIZE {
let from_vpn = PageNumber::from_addr(i * PAGE_SIZE + 0xffff8008_00000000);
let to_pfn = PageNumber::from_addr(i * PAGE_SIZE + 0x00000008_00000000);
creator.map(from_vpn, to_pfn, flags);
}
// Mark this as reserved in e820 table.
e820_table[e820_entries] = linux_boot_params::BootE820Entry {
addr: 0x4000000,
size: creator.nr_frames_used() as u64 * PAGE_SIZE,
typ: linux_boot_params::E820Type::Reserved,
};
e820_entries += 1;
boot_params.e820_entries = e820_entries as u8;
#[cfg(feature = "debug_print")]
unsafe {
crate::console::print_str("[EFI stub] Activating the new page table.\n");
}
unsafe {
creator.activate(x86_64::registers::control::Cr3Flags::PAGE_LEVEL_CACHE_DISABLE);
}
#[cfg(feature = "debug_print")]
unsafe {
crate::console::print_str("[EFI stub] Page table activated.\n");
}
unsafe {
use crate::console::{print_hex, print_str};
print_str("[EFI stub] Entering Asterinas entrypoint at ");

View File

@ -2,7 +2,6 @@
mod decoder;
mod efi;
mod paging;
mod relocation;
use core::arch::{asm, global_asm};

View File

@ -1,206 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
//! This module provides abstraction over the Intel IA32E paging mechanism. And
//! offers method to create linear page tables.
//!
//! Notebly, the 4-level page table has a paging structure named as follows:
//! - Level-4: Page Map Level 4 (PML4), or "the root page table";
//! - Level-3: Page Directory Pointer Table (PDPT);
//! - Level-2: Page Directory (PD);
//! - Level-1: Page Table (PT).
//!
//! We sometimes use "level-n" page table to refer to the page table described
//! above, avoiding the use of complicated names in the Intel manual.
use x86_64::structures::paging::PhysFrame;
const TABLE_ENTRY_COUNT: usize = 512;
bitflags::bitflags! {
#[derive(Clone, Copy)]
#[repr(C)]
pub struct Ia32eFlags: u64 {
const PRESENT = 1 << 0;
const WRITABLE = 1 << 1;
const USER = 1 << 2;
const WRITE_THROUGH = 1 << 3;
const NO_CACHE = 1 << 4;
const ACCESSED = 1 << 5;
const DIRTY = 1 << 6;
const HUGE = 1 << 7;
const GLOBAL = 1 << 8;
const NO_EXECUTE = 1 << 63;
}
}
#[repr(C)]
pub struct Ia32eEntry(u64);
/// The table in the IA32E paging specification that occupies a physical page frame.
#[repr(C)]
pub struct Ia32eTable([Ia32eEntry; TABLE_ENTRY_COUNT]);
/// A page number. It could be either a physical page number or a virtual page number.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct PageNumber(u64);
fn is_4k_page_aligned(addr: u64) -> bool {
addr & 0xfff == 0
}
impl PageNumber {
/// Creates a new page number from the given address.
pub fn from_addr(addr: u64) -> Self {
assert!(is_4k_page_aligned(addr));
Self(addr >> 12)
}
/// Returns the address of the page.
pub fn addr(&self) -> u64 {
self.0 << 12
}
/// Get the physical page frame as slice.
///
/// # Safety
/// The caller must ensure that the page number is a physical page number and
/// it is identically mapped when running the code.
unsafe fn get_page_frame(&self) -> &'static mut [u8] {
core::slice::from_raw_parts_mut(self.addr() as *mut u8, 4096)
}
}
impl core::ops::Add<usize> for PageNumber {
type Output = Self;
fn add(self, rhs: usize) -> Self::Output {
Self(self.0 + rhs as u64)
}
}
impl core::ops::AddAssign<usize> for PageNumber {
fn add_assign(&mut self, rhs: usize) {
self.0 += rhs as u64;
}
}
impl core::ops::Sub<PageNumber> for PageNumber {
type Output = u64;
fn sub(self, rhs: PageNumber) -> Self::Output {
self.0 - rhs.0
}
}
/// A creator for a page table.
///
/// It allocates page frames from the given physical memory range. And the first
/// page frame is always used for the PML4 table (root page table).
pub struct PageTableCreator {
first_pfn: PageNumber,
next_pfn: PageNumber,
end_pfn: PageNumber,
}
/// Fills the given slice with the given value.
///
/// TODO: use `Slice::fill` instead. But it currently will fail with "invalid opcode".
unsafe fn memset(dst: &mut [u8], val: u8) {
core::arch::asm!(
"rep stosb",
inout("rcx") dst.len() => _,
inout("rdi") dst.as_mut_ptr() => _,
in("al") val,
options(nostack),
);
}
impl PageTableCreator {
/// Creates a new page table creator.
///
/// The input physical memory range must be at least 4 page frames. New
/// mappings will be written into the given physical memory range.
///
/// # Safety
/// The caller must ensure that the given physical memory range is valid.
pub unsafe fn new(first_pfn: PageNumber, end_pfn: PageNumber) -> Self {
assert!(end_pfn - first_pfn >= 4);
// Clear the first page for the PML4 table.
memset(first_pfn.get_page_frame(), 0);
Self {
first_pfn,
next_pfn: first_pfn + 1,
end_pfn,
}
}
fn allocate(&mut self) -> PageNumber {
assert!(self.next_pfn < self.end_pfn);
let pfn = self.next_pfn;
self.next_pfn += 1;
unsafe {
memset(pfn.get_page_frame(), 0);
}
pfn
}
pub fn map(&mut self, from: PageNumber, to: PageNumber, flags: Ia32eFlags) {
let pml4 = unsafe { &mut *(self.first_pfn.addr() as *mut Ia32eTable) };
let pml4e = pml4.index(4, from.addr());
if !pml4e.flags().contains(Ia32eFlags::PRESENT) {
let pdpt_pfn = self.allocate();
pml4e.update(pdpt_pfn.addr(), flags);
}
let pdpt = unsafe { &mut *(pml4e.paddr() as *mut Ia32eTable) };
let pdpte = pdpt.index(3, from.addr());
if !pdpte.flags().contains(Ia32eFlags::PRESENT) {
let pd_pfn = self.allocate();
pdpte.update(pd_pfn.addr(), flags);
}
let pd = unsafe { &mut *(pdpte.paddr() as *mut Ia32eTable) };
let pde = pd.index(2, from.addr());
if !pde.flags().contains(Ia32eFlags::PRESENT) {
let pt_pfn = self.allocate();
pde.update(pt_pfn.addr(), flags);
}
let pt = unsafe { &mut *(pde.paddr() as *mut Ia32eTable) };
let pte = pt.index(1, from.addr());
// In level-1 PTE, the HUGE bit is the PAT bit (page attribute table).
// We use it as the "valid" bit for the page table entry.
pte.update(to.addr(), flags | Ia32eFlags::HUGE);
}
pub fn nr_frames_used(&self) -> usize {
(self.next_pfn - self.first_pfn).try_into().unwrap()
}
/// Activates the created page table.
///
/// # Safety
/// The caller must ensure that the page table is valid.
pub unsafe fn activate(&self, flags: x86_64::registers::control::Cr3Flags) {
x86_64::registers::control::Cr3::write(
PhysFrame::from_start_address(x86_64::PhysAddr::new(self.first_pfn.addr())).unwrap(),
flags,
);
}
}
impl Ia32eTable {
fn index(&mut self, level: usize, va: u64) -> &mut Ia32eEntry {
debug_assert!((1..=5).contains(&level));
let index = (va as usize >> (12 + 9 * (level - 1))) & (TABLE_ENTRY_COUNT - 1);
&mut self.0[index]
}
}
impl Ia32eEntry {
/// 51:12
const PHYS_ADDR_MASK: u64 = 0xF_FFFF_FFFF_F000;
fn paddr(&self) -> u64 {
self.0 & Self::PHYS_ADDR_MASK
}
fn flags(&self) -> Ia32eFlags {
Ia32eFlags::from_bits_truncate(self.0)
}
fn update(&mut self, paddr: u64, flags: Ia32eFlags) {
self.0 = (paddr & Self::PHYS_ADDR_MASK) | flags.bits();
}
}

View File

@ -44,15 +44,31 @@ __linux32_boot:
// Must be located at 0x8001200, ABI immutable!
.code64
.org 0x200
.global __linux64_boot_tag
__linux64_boot_tag:
.global __linux64_boot
__linux64_boot:
cli
cld
// Set the kernel call stack.
lea rsp, [boot_stack_top]
lea rsp, [rip + boot_stack_top]
push rsi // boot_params ptr from the loader
push ENTRYTYPE_LINUX_64
// Here RSP/RIP are still using low address.
jmp long_mode_in_low_address
// Set up the page table and load it.
call page_table_setup_64
lea rdx, [rip + boot_pml4]
mov cr3, rdx
// Prepare far return. The default operation size of
// far returns is 32 bits even in long mode.
lea edx, [rip + long_mode_in_low_address]
mov rax, (8 << 32)
or rdx, rax
push rdx
// Switch to our own temporary GDT.
lgdt [boot_gdtr]
retf
// The multiboot & multiboot2 entry point.
.code32
@ -85,8 +101,7 @@ magic_is_mb2:
initial_boot_setup:
// Prepare for far return. We use a far return as a fence after setting GDT.
mov eax, 24
push eax
push 24
lea edx, [protected_mode]
push edx
@ -102,13 +117,44 @@ protected_mode:
mov fs, ax
mov gs, ax
page_table_setup:
// Set up the page table.
call page_table_setup_32
// Enable PAE and PGE.
mov eax, cr4
or eax, 0xa0
mov cr4, eax
// Set the page table address.
lea eax, [boot_pml4]
mov cr3, eax
// Enable long mode.
mov ecx, 0xc0000080
rdmsr
or eax, 0x0100
wrmsr
// Prepare for far return.
push 8
lea edx, [long_mode_in_low_address]
push edx
// Enable paging.
mov eax, cr0
or eax, 0x80000000
mov cr0, eax
retf
.macro define_page_table_setup bits
.code\bits
page_table_setup_\bits:
// Zero out the page table.
mov al, 0x00
lea edi, [boot_page_table_start]
lea ecx, [boot_page_table_end]
sub ecx, edi
cld
rep stosb
// PTE flags used in this file.
@ -183,43 +229,18 @@ PTE_GLOBAL = (1 << 8)
lea edi, [boot_pd]
mov eax, PTE_PRESENT | PTE_WRITE | PTE_GLOBAL | PTE_HUGE
mov ecx, 512 * 4 // (of entries in PD) * (number of PD)
write_pd_entry:
write_pd_entry_\bits:
mov dword ptr [edi], eax
mov dword ptr [edi + 4], 0
add eax, 0x200000 // +2MiB
add edi, 8
loop write_pd_entry
loop write_pd_entry_\bits
jmp enable_long_mode
ret
.endm
enable_long_mode:
// Enable PAE and PGE.
mov eax, cr4
or eax, 0xa0
mov cr4, eax
// Set the page table address.
lea eax, [boot_pml4]
mov cr3, eax
// Enable long mode.
mov ecx, 0xc0000080
rdmsr
or eax, 0x0100
wrmsr
// Prepare for far return.
mov eax, 8
push eax
lea edx, [long_mode_in_low_address]
push edx
// Enable paging.
mov eax, cr0
or eax, 0x80000000
mov cr0, eax
retf
define_page_table_setup 32
define_page_table_setup 64
// Temporary GDTR/GDT entries. This must be located in the .boot section as its
// address (gdt) must be physical to load.
@ -286,7 +307,6 @@ long_mode:
lea rdi, [rip + __bss]
lea rcx, [rip + __bss_end]
sub rcx, rdi
cld
rep stosb
// Call the corresponding Rust entrypoint according to the boot entrypoint