diff --git a/Cargo.lock b/Cargo.lock index cf3551361..e1b2f18d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1038,7 +1038,6 @@ dependencies = [ name = "linux-bzimage-setup" version = "0.12.0" dependencies = [ - "bitflags 2.6.0", "cfg-if", "core2", "libflate", @@ -1048,7 +1047,6 @@ dependencies = [ "uart_16550", "uefi", "uefi-raw", - "x86_64 0.15.2", "xmas-elf 0.9.1", ] @@ -1305,7 +1303,7 @@ dependencies = [ "unwinding", "volatile 0.6.1", "x86", - "x86_64 0.14.13", + "x86_64", "xarray", ] @@ -1691,7 +1689,7 @@ dependencies = [ "bitflags 1.3.2", "iced-x86", "raw-cpuid", - "x86_64 0.14.13", + "x86_64", ] [[package]] @@ -1949,18 +1947,6 @@ dependencies = [ "volatile 0.4.6", ] -[[package]] -name = "x86_64" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f042214de98141e9c8706e8192b73f56494087cc55ebec28ce10f26c5c364ae" -dependencies = [ - "bit_field", - "bitflags 2.6.0", - "rustversion", - "volatile 0.4.6", -] - [[package]] name = "xarray" version = "0.1.0" diff --git a/ostd/libs/linux-bzimage/setup/Cargo.toml b/ostd/libs/linux-bzimage/setup/Cargo.toml index 5071af8e1..24e2face1 100644 --- a/ostd/libs/linux-bzimage/setup/Cargo.toml +++ b/ostd/libs/linux-bzimage/setup/Cargo.toml @@ -21,11 +21,9 @@ uart_16550 = "0.3.0" xmas-elf = "0.9.1" [target.x86_64-unknown-none.dependencies] -bitflags = "2.4.1" log = "0.4.20" uefi = { version = "0.32.0", features = ["global_allocator", "panic_handler", "logger", "qemu"]} uefi-raw = "0.8.0" -x86_64 = "0.15.1" tdx-guest = { version = "0.2.1", optional = true } [features] diff --git a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/efi.rs b/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/efi.rs index dc1428b3c..7589b9bf5 100644 --- a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/efi.rs +++ b/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/efi.rs @@ -9,11 +9,7 @@ use uefi::{ }; use uefi_raw::table::system::SystemTable; -use super::{ - decoder::decode_payload, - paging::{Ia32eFlags, PageNumber, PageTableCreator}, - relocation::apply_rela_relocations, -}; +use super::{decoder::decode_payload, relocation::apply_rela_relocations}; const PAGE_SIZE: u64 = 4096; @@ -170,64 +166,6 @@ fn efi_phase_runtime(memory_map: MemoryMapOwned, boot_params: &mut BootParams) - } boot_params.e820_entries = e820_entries as u8; - unsafe { - crate::console::print_str("[EFI stub] Setting up the page table.\n"); - } - - // Make a new linear page table. The linear page table will be stored at - // 0x4000000, hoping that the firmware will not use this area. - let mut creator = unsafe { - PageTableCreator::new( - PageNumber::from_addr(0x4000000), - PageNumber::from_addr(0x8000000), - ) - }; - // Map the following regions: - // - 0x0: identity map the first 4GiB; - // - 0xffff8000_00000000: linear map 4GiB to low 4 GiB; - // - 0xffffffff_80000000: linear map 2GiB to low 2 GiB; - // - 0xffff8008_00000000: linear map 1GiB to 0x00000008_00000000. - let flags = Ia32eFlags::PRESENT | Ia32eFlags::WRITABLE; - for i in 0..4 * 1024 * 1024 * 1024 / PAGE_SIZE { - let from_vpn = PageNumber::from_addr(i * PAGE_SIZE); - let from_vpn2 = PageNumber::from_addr(i * PAGE_SIZE + 0xffff8000_00000000); - let to_low_pfn = PageNumber::from_addr(i * PAGE_SIZE); - creator.map(from_vpn, to_low_pfn, flags); - creator.map(from_vpn2, to_low_pfn, flags); - } - for i in 0..2 * 1024 * 1024 * 1024 / PAGE_SIZE { - let from_vpn = PageNumber::from_addr(i * PAGE_SIZE + 0xffffffff_80000000); - let to_low_pfn = PageNumber::from_addr(i * PAGE_SIZE); - creator.map(from_vpn, to_low_pfn, flags); - } - for i in 0..1024 * 1024 * 1024 / PAGE_SIZE { - let from_vpn = PageNumber::from_addr(i * PAGE_SIZE + 0xffff8008_00000000); - let to_pfn = PageNumber::from_addr(i * PAGE_SIZE + 0x00000008_00000000); - creator.map(from_vpn, to_pfn, flags); - } - // Mark this as reserved in e820 table. - e820_table[e820_entries] = linux_boot_params::BootE820Entry { - addr: 0x4000000, - size: creator.nr_frames_used() as u64 * PAGE_SIZE, - typ: linux_boot_params::E820Type::Reserved, - }; - e820_entries += 1; - boot_params.e820_entries = e820_entries as u8; - - #[cfg(feature = "debug_print")] - unsafe { - crate::console::print_str("[EFI stub] Activating the new page table.\n"); - } - - unsafe { - creator.activate(x86_64::registers::control::Cr3Flags::PAGE_LEVEL_CACHE_DISABLE); - } - - #[cfg(feature = "debug_print")] - unsafe { - crate::console::print_str("[EFI stub] Page table activated.\n"); - } - unsafe { use crate::console::{print_hex, print_str}; print_str("[EFI stub] Entering Asterinas entrypoint at "); diff --git a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/mod.rs b/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/mod.rs index 7620e1b50..2b15a197f 100644 --- a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/mod.rs +++ b/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/mod.rs @@ -2,7 +2,6 @@ mod decoder; mod efi; -mod paging; mod relocation; use core::arch::{asm, global_asm}; diff --git a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/paging.rs b/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/paging.rs deleted file mode 100644 index ae5a7d662..000000000 --- a/ostd/libs/linux-bzimage/setup/src/x86/amd64_efi/paging.rs +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -//! This module provides abstraction over the Intel IA32E paging mechanism. And -//! offers method to create linear page tables. -//! -//! Notebly, the 4-level page table has a paging structure named as follows: -//! - Level-4: Page Map Level 4 (PML4), or "the root page table"; -//! - Level-3: Page Directory Pointer Table (PDPT); -//! - Level-2: Page Directory (PD); -//! - Level-1: Page Table (PT). -//! -//! We sometimes use "level-n" page table to refer to the page table described -//! above, avoiding the use of complicated names in the Intel manual. - -use x86_64::structures::paging::PhysFrame; - -const TABLE_ENTRY_COUNT: usize = 512; - -bitflags::bitflags! { - #[derive(Clone, Copy)] - #[repr(C)] - pub struct Ia32eFlags: u64 { - const PRESENT = 1 << 0; - const WRITABLE = 1 << 1; - const USER = 1 << 2; - const WRITE_THROUGH = 1 << 3; - const NO_CACHE = 1 << 4; - const ACCESSED = 1 << 5; - const DIRTY = 1 << 6; - const HUGE = 1 << 7; - const GLOBAL = 1 << 8; - const NO_EXECUTE = 1 << 63; - } -} - -#[repr(C)] -pub struct Ia32eEntry(u64); - -/// The table in the IA32E paging specification that occupies a physical page frame. -#[repr(C)] -pub struct Ia32eTable([Ia32eEntry; TABLE_ENTRY_COUNT]); - -/// A page number. It could be either a physical page number or a virtual page number. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct PageNumber(u64); - -fn is_4k_page_aligned(addr: u64) -> bool { - addr & 0xfff == 0 -} - -impl PageNumber { - /// Creates a new page number from the given address. - pub fn from_addr(addr: u64) -> Self { - assert!(is_4k_page_aligned(addr)); - Self(addr >> 12) - } - /// Returns the address of the page. - pub fn addr(&self) -> u64 { - self.0 << 12 - } - /// Get the physical page frame as slice. - /// - /// # Safety - /// The caller must ensure that the page number is a physical page number and - /// it is identically mapped when running the code. - unsafe fn get_page_frame(&self) -> &'static mut [u8] { - core::slice::from_raw_parts_mut(self.addr() as *mut u8, 4096) - } -} - -impl core::ops::Add for PageNumber { - type Output = Self; - fn add(self, rhs: usize) -> Self::Output { - Self(self.0 + rhs as u64) - } -} - -impl core::ops::AddAssign for PageNumber { - fn add_assign(&mut self, rhs: usize) { - self.0 += rhs as u64; - } -} - -impl core::ops::Sub for PageNumber { - type Output = u64; - fn sub(self, rhs: PageNumber) -> Self::Output { - self.0 - rhs.0 - } -} - -/// A creator for a page table. -/// -/// It allocates page frames from the given physical memory range. And the first -/// page frame is always used for the PML4 table (root page table). -pub struct PageTableCreator { - first_pfn: PageNumber, - next_pfn: PageNumber, - end_pfn: PageNumber, -} - -/// Fills the given slice with the given value. -/// -/// TODO: use `Slice::fill` instead. But it currently will fail with "invalid opcode". -unsafe fn memset(dst: &mut [u8], val: u8) { - core::arch::asm!( - "rep stosb", - inout("rcx") dst.len() => _, - inout("rdi") dst.as_mut_ptr() => _, - in("al") val, - options(nostack), - ); -} - -impl PageTableCreator { - /// Creates a new page table creator. - /// - /// The input physical memory range must be at least 4 page frames. New - /// mappings will be written into the given physical memory range. - /// - /// # Safety - /// The caller must ensure that the given physical memory range is valid. - pub unsafe fn new(first_pfn: PageNumber, end_pfn: PageNumber) -> Self { - assert!(end_pfn - first_pfn >= 4); - // Clear the first page for the PML4 table. - memset(first_pfn.get_page_frame(), 0); - Self { - first_pfn, - next_pfn: first_pfn + 1, - end_pfn, - } - } - - fn allocate(&mut self) -> PageNumber { - assert!(self.next_pfn < self.end_pfn); - let pfn = self.next_pfn; - self.next_pfn += 1; - unsafe { - memset(pfn.get_page_frame(), 0); - } - pfn - } - - pub fn map(&mut self, from: PageNumber, to: PageNumber, flags: Ia32eFlags) { - let pml4 = unsafe { &mut *(self.first_pfn.addr() as *mut Ia32eTable) }; - let pml4e = pml4.index(4, from.addr()); - if !pml4e.flags().contains(Ia32eFlags::PRESENT) { - let pdpt_pfn = self.allocate(); - pml4e.update(pdpt_pfn.addr(), flags); - } - let pdpt = unsafe { &mut *(pml4e.paddr() as *mut Ia32eTable) }; - let pdpte = pdpt.index(3, from.addr()); - if !pdpte.flags().contains(Ia32eFlags::PRESENT) { - let pd_pfn = self.allocate(); - pdpte.update(pd_pfn.addr(), flags); - } - let pd = unsafe { &mut *(pdpte.paddr() as *mut Ia32eTable) }; - let pde = pd.index(2, from.addr()); - if !pde.flags().contains(Ia32eFlags::PRESENT) { - let pt_pfn = self.allocate(); - pde.update(pt_pfn.addr(), flags); - } - let pt = unsafe { &mut *(pde.paddr() as *mut Ia32eTable) }; - let pte = pt.index(1, from.addr()); - // In level-1 PTE, the HUGE bit is the PAT bit (page attribute table). - // We use it as the "valid" bit for the page table entry. - pte.update(to.addr(), flags | Ia32eFlags::HUGE); - } - - pub fn nr_frames_used(&self) -> usize { - (self.next_pfn - self.first_pfn).try_into().unwrap() - } - - /// Activates the created page table. - /// - /// # Safety - /// The caller must ensure that the page table is valid. - pub unsafe fn activate(&self, flags: x86_64::registers::control::Cr3Flags) { - x86_64::registers::control::Cr3::write( - PhysFrame::from_start_address(x86_64::PhysAddr::new(self.first_pfn.addr())).unwrap(), - flags, - ); - } -} - -impl Ia32eTable { - fn index(&mut self, level: usize, va: u64) -> &mut Ia32eEntry { - debug_assert!((1..=5).contains(&level)); - let index = (va as usize >> (12 + 9 * (level - 1))) & (TABLE_ENTRY_COUNT - 1); - &mut self.0[index] - } -} - -impl Ia32eEntry { - /// 51:12 - const PHYS_ADDR_MASK: u64 = 0xF_FFFF_FFFF_F000; - - fn paddr(&self) -> u64 { - self.0 & Self::PHYS_ADDR_MASK - } - fn flags(&self) -> Ia32eFlags { - Ia32eFlags::from_bits_truncate(self.0) - } - fn update(&mut self, paddr: u64, flags: Ia32eFlags) { - self.0 = (paddr & Self::PHYS_ADDR_MASK) | flags.bits(); - } -} diff --git a/ostd/src/arch/x86/boot/bsp_boot.S b/ostd/src/arch/x86/boot/bsp_boot.S index b38cbb650..667da2151 100644 --- a/ostd/src/arch/x86/boot/bsp_boot.S +++ b/ostd/src/arch/x86/boot/bsp_boot.S @@ -44,15 +44,31 @@ __linux32_boot: // Must be located at 0x8001200, ABI immutable! .code64 .org 0x200 -.global __linux64_boot_tag -__linux64_boot_tag: +.global __linux64_boot +__linux64_boot: + cli + cld + // Set the kernel call stack. - lea rsp, [boot_stack_top] + lea rsp, [rip + boot_stack_top] push rsi // boot_params ptr from the loader push ENTRYTYPE_LINUX_64 - // Here RSP/RIP are still using low address. - jmp long_mode_in_low_address + // Set up the page table and load it. + call page_table_setup_64 + lea rdx, [rip + boot_pml4] + mov cr3, rdx + + // Prepare far return. The default operation size of + // far returns is 32 bits even in long mode. + lea edx, [rip + long_mode_in_low_address] + mov rax, (8 << 32) + or rdx, rax + push rdx + + // Switch to our own temporary GDT. + lgdt [boot_gdtr] + retf // The multiboot & multiboot2 entry point. .code32 @@ -85,8 +101,7 @@ magic_is_mb2: initial_boot_setup: // Prepare for far return. We use a far return as a fence after setting GDT. - mov eax, 24 - push eax + push 24 lea edx, [protected_mode] push edx @@ -102,13 +117,44 @@ protected_mode: mov fs, ax mov gs, ax -page_table_setup: + // Set up the page table. + call page_table_setup_32 + + // Enable PAE and PGE. + mov eax, cr4 + or eax, 0xa0 + mov cr4, eax + + // Set the page table address. + lea eax, [boot_pml4] + mov cr3, eax + + // Enable long mode. + mov ecx, 0xc0000080 + rdmsr + or eax, 0x0100 + wrmsr + + // Prepare for far return. + push 8 + lea edx, [long_mode_in_low_address] + push edx + + // Enable paging. + mov eax, cr0 + or eax, 0x80000000 + mov cr0, eax + + retf + +.macro define_page_table_setup bits +.code\bits +page_table_setup_\bits: // Zero out the page table. mov al, 0x00 lea edi, [boot_page_table_start] lea ecx, [boot_page_table_end] sub ecx, edi - cld rep stosb // PTE flags used in this file. @@ -183,43 +229,18 @@ PTE_GLOBAL = (1 << 8) lea edi, [boot_pd] mov eax, PTE_PRESENT | PTE_WRITE | PTE_GLOBAL | PTE_HUGE mov ecx, 512 * 4 // (of entries in PD) * (number of PD) -write_pd_entry: +write_pd_entry_\bits: mov dword ptr [edi], eax mov dword ptr [edi + 4], 0 add eax, 0x200000 // +2MiB add edi, 8 - loop write_pd_entry + loop write_pd_entry_\bits - jmp enable_long_mode + ret +.endm -enable_long_mode: - // Enable PAE and PGE. - mov eax, cr4 - or eax, 0xa0 - mov cr4, eax - - // Set the page table address. - lea eax, [boot_pml4] - mov cr3, eax - - // Enable long mode. - mov ecx, 0xc0000080 - rdmsr - or eax, 0x0100 - wrmsr - - // Prepare for far return. - mov eax, 8 - push eax - lea edx, [long_mode_in_low_address] - push edx - - // Enable paging. - mov eax, cr0 - or eax, 0x80000000 - mov cr0, eax - - retf +define_page_table_setup 32 +define_page_table_setup 64 // Temporary GDTR/GDT entries. This must be located in the .boot section as its // address (gdt) must be physical to load. @@ -286,7 +307,6 @@ long_mode: lea rdi, [rip + __bss] lea rcx, [rip + __bss_end] sub rcx, rdi - cld rep stosb // Call the corresponding Rust entrypoint according to the boot entrypoint