From a17651b14b86dd70655090381db4a2f710853aa1 Mon Sep 17 00:00:00 2001 From: MemoryShore <105195940+MemoryShore@users.noreply.github.com> Date: Mon, 22 Apr 2024 15:10:47 +0800 Subject: [PATCH] =?UTF-8?q?feat(mm):=20=E5=AE=9E=E7=8E=B0=E7=BC=BA?= =?UTF-8?q?=E9=A1=B5=E4=B8=AD=E6=96=AD=E5=A4=84=E7=90=86=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E9=A1=B5=E9=9D=A2=E5=BB=B6=E8=BF=9F=E5=88=86=E9=85=8D?= =?UTF-8?q?=E5=92=8C=E5=86=99=E6=97=B6=E6=8B=B7=E8=B4=9D=EF=BC=8C=E4=BB=A5?= =?UTF-8?q?=E5=8F=8A=E7=94=A8=E6=88=B7=E6=A0=88=E8=87=AA=E5=8A=A8=E6=8B=93?= =?UTF-8?q?=E5=B1=95=20(#715)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 实现缺页中断处理 * 完善页表拷贝逻辑 * 优化代码结构 * 完善缺页异常信息 * 修改大页映射逻辑 * 修正大页映射错误 * 添加缺页中断支持标志 * 实现用户栈自动拓展功能 --- kernel/crates/klog_types/src/lib.rs | 1 + kernel/src/arch/riscv64/mm/mod.rs | 17 +- kernel/src/arch/x86_64/interrupt/trap.rs | 129 +++++-- kernel/src/arch/x86_64/mm/fault.rs | 300 ++++++++++++++++ kernel/src/arch/x86_64/mm/mod.rs | 36 +- kernel/src/arch/x86_64/mm/pkru.rs | 56 +++ kernel/src/arch/x86_64/process/syscall.rs | 18 +- kernel/src/libs/elf.rs | 5 +- kernel/src/mm/fault.rs | 395 ++++++++++++++++++++++ kernel/src/mm/madvise.rs | 84 +++++ kernel/src/mm/mod.rs | 55 ++- kernel/src/mm/page.rs | 292 +++++++++++++++- kernel/src/mm/syscall.rs | 101 ++++++ kernel/src/mm/ucontext.rs | 290 ++++++++++++---- kernel/src/syscall/mod.rs | 11 +- user/dadk/config/nova_shell-0.1.0.dadk | 2 +- 16 files changed, 1657 insertions(+), 135 deletions(-) create mode 100644 kernel/src/arch/x86_64/mm/fault.rs create mode 100644 kernel/src/arch/x86_64/mm/pkru.rs create mode 100644 kernel/src/mm/fault.rs create mode 100644 kernel/src/mm/madvise.rs diff --git a/kernel/crates/klog_types/src/lib.rs b/kernel/crates/klog_types/src/lib.rs index 0598616c..a9b180d7 100644 --- a/kernel/crates/klog_types/src/lib.rs +++ b/kernel/crates/klog_types/src/lib.rs @@ -121,6 +121,7 @@ pub enum AllocatorLogType { Alloc(AllocLogItem), AllocZeroed(AllocLogItem), Free(AllocLogItem), + LazyAlloc(AllocLogItem), } #[repr(C)] diff --git a/kernel/src/arch/riscv64/mm/mod.rs b/kernel/src/arch/riscv64/mm/mod.rs index ea4a8baf..44baed13 100644 --- a/kernel/src/arch/riscv64/mm/mod.rs +++ b/kernel/src/arch/riscv64/mm/mod.rs @@ -42,8 +42,6 @@ pub(self) static INNER_ALLOCATOR: SpinLock>> = Spi pub struct RiscV64MMArch; impl RiscV64MMArch { - pub const ENTRY_FLAG_GLOBAL: usize = 1 << 5; - /// 使远程cpu的TLB中,指定地址范围的页失效 pub fn remote_invalidate_page( cpu: ProcessorId, @@ -85,6 +83,9 @@ const KERNEL_TOP_PAGE_ENTRY_NO: usize = (RiscV64MMArch::PHYS_OFFSET >> (RiscV64MMArch::ENTRY_ADDRESS_SHIFT - RiscV64MMArch::PAGE_ENTRY_SHIFT); impl MemoryManagementArch for RiscV64MMArch { + /// riscv64暂不支持缺页中断 + const PAGE_FAULT_ENABLED: bool = false; + const PAGE_SHIFT: usize = 12; const PAGE_ENTRY_SHIFT: usize = 9; @@ -119,6 +120,7 @@ impl MemoryManagementArch for RiscV64MMArch { const ENTRY_FLAG_EXEC: usize = (1 << 3); const ENTRY_FLAG_ACCESSED: usize = (1 << 6); const ENTRY_FLAG_DIRTY: usize = (1 << 7); + const ENTRY_FLAG_GLOBAL: usize = (1 << 5); const PHYS_OFFSET: usize = 0xffff_ffc0_0000_0000; const KERNEL_LINK_OFFSET: usize = 0x1000000; @@ -139,6 +141,8 @@ impl MemoryManagementArch for RiscV64MMArch { /// 设置1g的MMIO空间 const MMIO_SIZE: usize = 1 << PAGE_1G_SHIFT; + const ENTRY_FLAG_HUGE_PAGE: usize = Self::ENTRY_FLAG_PRESENT | Self::ENTRY_FLAG_READWRITE; + #[inline(never)] unsafe fn init() { riscv_mm_init().expect("init kernel memory management architecture failed"); @@ -239,6 +243,15 @@ impl MemoryManagementArch for RiscV64MMArch { let r = ((ppn & ((1 << 54) - 1)) << 10) | page_flags; return r; } + + fn vma_access_permitted( + _vma: alloc::sync::Arc, + _write: bool, + _execute: bool, + _foreign: bool, + ) -> bool { + true + } } impl VirtAddr { diff --git a/kernel/src/arch/x86_64/interrupt/trap.rs b/kernel/src/arch/x86_64/interrupt/trap.rs index 4dfa54c2..a75c7371 100644 --- a/kernel/src/arch/x86_64/interrupt/trap.rs +++ b/kernel/src/arch/x86_64/interrupt/trap.rs @@ -1,8 +1,12 @@ use system_error::SystemError; use crate::{ - arch::CurrentIrqArch, exception::InterruptArch, kerror, kwarn, mm::VirtAddr, print, - process::ProcessManager, smp::core::smp_get_processor_id, + arch::{CurrentIrqArch, MMArch}, + exception::InterruptArch, + kerror, kwarn, + mm::VirtAddr, + process::ProcessManager, + smp::core::smp_get_processor_id, }; use super::{ @@ -33,6 +37,46 @@ extern "C" { fn trap_virtualization_exception(); } +bitflags! { + pub struct TrapNr: u64 { + const X86_TRAP_DE = 0; + const X86_TRAP_DB = 1; + const X86_TRAP_NMI = 2; + const X86_TRAP_BP = 3; + const X86_TRAP_OF = 4; + const X86_TRAP_BR = 5; + const X86_TRAP_UD = 6; + const X86_TRAP_NM = 7; + const X86_TRAP_DF = 8; + const X86_TRAP_OLD_MF = 9; + const X86_TRAP_TS = 10; + const X86_TRAP_NP = 11; + const X86_TRAP_SS = 12; + const X86_TRAP_GP = 13; + const X86_TRAP_PF = 14; + const X86_TRAP_SPURIOUS = 15; + const X86_TRAP_MF = 16; + const X86_TRAP_AC = 17; + const X86_TRAP_MC = 18; + const X86_TRAP_XF = 19; + const X86_TRAP_VE = 20; + const X86_TRAP_CP = 21; + const X86_TRAP_VC = 29; + const X86_TRAP_IRET = 32; + } + + pub struct X86PfErrorCode : u32{ + const X86_PF_PROT = 1 << 0; + const X86_PF_WRITE = 1 << 1; + const X86_PF_USER = 1 << 2; + const X86_PF_RSVD = 1 << 3; + const X86_PF_INSTR = 1 << 4; + const X86_PF_PK = 1 << 5; + const X86_PF_SHSTK = 1 << 6; + const X86_PF_SGX = 1 << 15; + } +} + #[inline(never)] pub fn arch_trap_init() -> Result<(), SystemError> { unsafe { @@ -319,42 +363,59 @@ Segment Selector Index: {:#x}\n /// 处理页错误 14 #PF #[no_mangle] unsafe extern "C" fn do_page_fault(regs: &'static TrapFrame, error_code: u64) { - kerror!( - "do_page_fault(14), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}, \nFault Address: {:#x}", - error_code, - regs.rsp, - regs.rip, - smp_get_processor_id().data(), - ProcessManager::current_pid(), - x86::controlregs::cr2() - ); + // kerror!( + // "do_page_fault(14), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}, \nFault Address: {:#x}", + // error_code, + // regs.rsp, + // regs.rip, + // smp_get_processor_id().data(), + // ProcessManager::current_pid(), + // x86::controlregs::cr2() + // ); - if (error_code & 0x01) == 0 { - print!("Page Not Present,\t"); - } - if (error_code & 0x02) != 0 { - print!("Write Access,\t"); + // if (error_code & 0x01) == 0 { + // print!("Page Not Present,\t"); + // } + // if (error_code & 0x02) != 0 { + // print!("Write Access,\t"); + // } else { + // print!("Read Access,\t"); + // } + + // if (error_code & 0x04) != 0 { + // print!("Fault in user(3),\t"); + // } else { + // print!("Fault in supervisor(0,1,2),\t"); + // } + + // if (error_code & 0x08) != 0 { + // print!("Reserved bit violation cause fault,\t"); + // } + + // if (error_code & 0x10) != 0 { + // print!("Instruction fetch cause fault,\t"); + // } + // print!("\n"); + + // CurrentIrqArch::interrupt_enable(); + // panic!("Page Fault"); + CurrentIrqArch::interrupt_disable(); + let address = x86::controlregs::cr2(); + // crate::kinfo!( + // "fault address: {:#x}, error_code: {:#b}, pid: {}\n", + // address, + // error_code, + // ProcessManager::current_pid().data() + // ); + + let address = VirtAddr::new(address); + let error_code = X86PfErrorCode::from_bits_truncate(error_code as u32); + if address.check_user() { + MMArch::do_user_addr_fault(regs, error_code, address); } else { - print!("Read Access,\t"); + MMArch::do_kern_addr_fault(regs, error_code, address); } - - if (error_code & 0x04) != 0 { - print!("Fault in user(3),\t"); - } else { - print!("Fault in supervisor(0,1,2),\t"); - } - - if (error_code & 0x08) != 0 { - print!("Reserved bit violation cause fault,\t"); - } - - if (error_code & 0x10) != 0 { - print!("Instruction fetch cause fault,\t"); - } - print!("\n"); - CurrentIrqArch::interrupt_enable(); - panic!("Page Fault"); } /// 处理x87 FPU错误 16 #MF diff --git a/kernel/src/arch/x86_64/mm/fault.rs b/kernel/src/arch/x86_64/mm/fault.rs new file mode 100644 index 00000000..02f00cbd --- /dev/null +++ b/kernel/src/arch/x86_64/mm/fault.rs @@ -0,0 +1,300 @@ +use core::{ + intrinsics::{likely, unlikely}, + panic, +}; + +use alloc::sync::Arc; +use x86::{bits64::rflags::RFlags, controlregs::Cr4}; + +use crate::{ + arch::{ + interrupt::{trap::X86PfErrorCode, TrapFrame}, + mm::{MemoryManagementArch, X86_64MMArch}, + CurrentIrqArch, MMArch, + }, + exception::InterruptArch, + kerror, + mm::{ + fault::{FaultFlags, PageFaultHandler, PageFaultMessage}, + ucontext::{AddressSpace, LockedVMA}, + VirtAddr, VmFaultReason, VmFlags, + }, +}; + +use super::LockedFrameAllocator; + +pub type PageMapper = + crate::mm::page::PageMapper; + +impl X86_64MMArch { + pub fn vma_access_error(vma: Arc, error_code: X86PfErrorCode) -> bool { + let vm_flags = *vma.lock().vm_flags(); + let foreign = false; + if error_code.contains(X86PfErrorCode::X86_PF_PK) { + return true; + } + + if unlikely(error_code.contains(X86PfErrorCode::X86_PF_SGX)) { + return true; + } + + if !Self::vma_access_permitted( + vma.clone(), + error_code.contains(X86PfErrorCode::X86_PF_WRITE), + error_code.contains(X86PfErrorCode::X86_PF_INSTR), + foreign, + ) { + return true; + } + + if error_code.contains(X86PfErrorCode::X86_PF_WRITE) { + if unlikely(!vm_flags.contains(VmFlags::VM_WRITE)) { + return true; + } + return false; + } + + if unlikely(error_code.contains(X86PfErrorCode::X86_PF_PROT)) { + return true; + } + + if unlikely(!vma.is_accessible()) { + return true; + } + false + } + + pub fn show_fault_oops( + regs: &'static TrapFrame, + error_code: X86PfErrorCode, + address: VirtAddr, + ) { + let mapper = + unsafe { PageMapper::current(crate::mm::PageTableKind::User, LockedFrameAllocator) }; + if let Some(entry) = mapper.get_entry(address, 0) { + if entry.present() { + if !entry.flags().has_execute() { + kerror!("kernel tried to execute NX-protected page - exploit attempt?"); + } else if mapper.table().phys().data() & MMArch::ENTRY_FLAG_USER != 0 + && unsafe { x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_SMEP) } + { + kerror!("unable to execute userspace code (SMEP?)"); + } + } + } + if address.data() < X86_64MMArch::PAGE_SIZE && !regs.is_from_user() { + kerror!( + "BUG: kernel NULL pointer dereference, address: {:#x}", + address.data() + ); + } else { + kerror!( + "BUG: unable to handle page fault for address: {:#x}", + address.data() + ); + } + + kerror!( + "#PF: {} {} in {} mode\n", + if error_code.contains(X86PfErrorCode::X86_PF_USER) { + "user" + } else { + "supervisor" + }, + if error_code.contains(X86PfErrorCode::X86_PF_INSTR) { + "instruction fetch" + } else if error_code.contains(X86PfErrorCode::X86_PF_WRITE) { + "write access" + } else { + "read access" + }, + if regs.is_from_user() { + "user" + } else { + "kernel" + } + ); + kerror!( + "#PF: error_code({:#04x}) - {}\n", + error_code, + if !error_code.contains(X86PfErrorCode::X86_PF_PROT) { + "not-present page" + } else if error_code.contains(X86PfErrorCode::X86_PF_RSVD) { + "reserved bit violation" + } else if error_code.contains(X86PfErrorCode::X86_PF_PK) { + "protection keys violation" + } else { + "permissions violation" + } + ); + } + + pub fn page_fault_oops( + regs: &'static TrapFrame, + error_code: X86PfErrorCode, + address: VirtAddr, + ) { + if regs.is_from_user() { + Self::show_fault_oops(regs, error_code, address); + } + panic!() + } + + /// 内核态缺页异常处理 + /// ## 参数 + /// + /// - `regs`: 中断栈帧 + /// - `error_code`: 错误标志 + /// - `address`: 发生缺页异常的虚拟地址 + pub fn do_kern_addr_fault( + _regs: &'static TrapFrame, + error_code: X86PfErrorCode, + address: VirtAddr, + ) { + panic!( + "do_kern_addr_fault has not yet been implemented, + fault address: {:#x}, + error_code: {:#b}, + pid: {}\n", + address.data(), + error_code, + crate::process::ProcessManager::current_pid().data() + ); + //TODO https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/mm/fault.c#do_kern_addr_fault + } + + /// 用户态缺页异常处理 + /// ## 参数 + /// + /// - `regs`: 中断栈帧 + /// - `error_code`: 错误标志 + /// - `address`: 发生缺页异常的虚拟地址 + pub unsafe fn do_user_addr_fault( + regs: &'static TrapFrame, + error_code: X86PfErrorCode, + address: VirtAddr, + ) { + let rflags = RFlags::from_bits_truncate(regs.rflags); + let mut flags: FaultFlags = FaultFlags::FAULT_FLAG_ALLOW_RETRY + | FaultFlags::FAULT_FLAG_KILLABLE + | FaultFlags::FAULT_FLAG_INTERRUPTIBLE; + + if error_code & (X86PfErrorCode::X86_PF_USER | X86PfErrorCode::X86_PF_INSTR) + == X86PfErrorCode::X86_PF_INSTR + { + Self::page_fault_oops(regs, error_code, address); + } + + let feature = x86::cpuid::CpuId::new() + .get_extended_feature_info() + .unwrap(); + if unlikely( + feature.has_smap() + && !error_code.contains(X86PfErrorCode::X86_PF_USER) + && rflags.contains(RFlags::FLAGS_AC), + ) { + Self::page_fault_oops(regs, error_code, address); + } + + if unlikely(error_code.contains(X86PfErrorCode::X86_PF_RSVD)) { + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/mm/fault.c#pgtable_bad + panic!( + "Reserved bits are never expected to be set, error_code: {:#b}, address: {:#x}", + error_code, + address.data() + ); + } + + if regs.is_from_user() { + unsafe { CurrentIrqArch::interrupt_enable() }; + flags |= FaultFlags::FAULT_FLAG_USER; + } else if rflags.contains(RFlags::FLAGS_IF) { + unsafe { CurrentIrqArch::interrupt_enable() }; + } + + if error_code.contains(X86PfErrorCode::X86_PF_SHSTK) { + flags |= FaultFlags::FAULT_FLAG_WRITE; + } + if error_code.contains(X86PfErrorCode::X86_PF_WRITE) { + flags |= FaultFlags::FAULT_FLAG_WRITE; + } + if error_code.contains(X86PfErrorCode::X86_PF_INSTR) { + flags |= FaultFlags::FAULT_FLAG_INSTRUCTION; + } + + let current_address_space: Arc = AddressSpace::current().unwrap(); + let mut space_guard = current_address_space.write(); + let mut fault; + loop { + let vma = space_guard.mappings.find_nearest(address); + // let vma = space_guard.mappings.contains(address); + + let vma = vma.unwrap_or_else(|| { + panic!( + "can not find nearest vma, error_code: {:#b}, address: {:#x}", + error_code, + address.data(), + ) + }); + let guard = vma.lock(); + let region = *guard.region(); + let vm_flags = *guard.vm_flags(); + drop(guard); + + if !region.contains(address) { + if vm_flags.contains(VmFlags::VM_GROWSDOWN) { + space_guard + .extend_stack(region.start() - address) + .unwrap_or_else(|_| { + panic!( + "user stack extend failed, error_code: {:#b}, address: {:#x}", + error_code, + address.data(), + ) + }); + } else { + panic!( + "No mapped vma, error_code: {:#b}, address: {:#x}", + error_code, + address.data(), + ) + } + } + + if unlikely(Self::vma_access_error(vma.clone(), error_code)) { + panic!( + "vma access error, error_code: {:#b}, address: {:#x}", + error_code, + address.data(), + ); + } + let mapper = &mut space_guard.user_mapper.utable; + + fault = PageFaultHandler::handle_mm_fault( + PageFaultMessage::new(vma.clone(), address, flags), + mapper, + ); + + if fault.contains(VmFaultReason::VM_FAULT_COMPLETED) { + return; + } + + if unlikely(fault.contains(VmFaultReason::VM_FAULT_RETRY)) { + flags |= FaultFlags::FAULT_FLAG_TRIED; + } else { + break; + } + } + + let vm_fault_error = VmFaultReason::VM_FAULT_OOM + | VmFaultReason::VM_FAULT_SIGBUS + | VmFaultReason::VM_FAULT_SIGSEGV + | VmFaultReason::VM_FAULT_HWPOISON + | VmFaultReason::VM_FAULT_HWPOISON_LARGE + | VmFaultReason::VM_FAULT_FALLBACK; + + if likely(!fault.contains(vm_fault_error)) { + panic!("fault error: {:?}", fault) + } + } +} diff --git a/kernel/src/arch/x86_64/mm/mod.rs b/kernel/src/arch/x86_64/mm/mod.rs index b9519596..7d545758 100644 --- a/kernel/src/arch/x86_64/mm/mod.rs +++ b/kernel/src/arch/x86_64/mm/mod.rs @@ -1,6 +1,9 @@ pub mod barrier; pub mod bump; +pub mod fault; +pub mod pkru; +use alloc::sync::Arc; use alloc::vec::Vec; use hashbrown::HashSet; use x86::time::rdtsc; @@ -17,6 +20,7 @@ use crate::libs::spinlock::SpinLock; use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PageFrameUsage}; use crate::mm::memblock::mem_block_manager; +use crate::mm::ucontext::LockedVMA; use crate::{ arch::MMArch, mm::allocator::{buddy::BuddyAllocator, bump::BumpAllocator}, @@ -44,10 +48,6 @@ pub type PageMapper = /// 初始的CR3寄存器的值,用于内存管理初始化时,创建的第一个内核页表的位置 static mut INITIAL_CR3_VALUE: PhysAddr = PhysAddr::new(0); -/// 内核的第一个页表在pml4中的索引 -/// 顶级页表的[256, 512)项是内核的页表 -static KERNEL_PML4E_NO: usize = (X86_64MMArch::PHYS_OFFSET & ((1 << 48) - 1)) >> 39; - static INNER_ALLOCATOR: SpinLock>> = SpinLock::new(None); #[derive(Clone, Copy, Debug)] @@ -70,6 +70,8 @@ pub struct X86_64MMArch; static XD_RESERVED: AtomicBool = AtomicBool::new(false); impl MemoryManagementArch for X86_64MMArch { + /// X86目前支持缺页中断 + const PAGE_FAULT_ENABLED: bool = true; /// 4K页 const PAGE_SHIFT: usize = 12; @@ -104,8 +106,10 @@ impl MemoryManagementArch for X86_64MMArch { /// x86_64不存在EXEC标志位,只有NO_EXEC(XD)标志位 const ENTRY_FLAG_EXEC: usize = 0; - const ENTRY_FLAG_ACCESSED: usize = 0; - const ENTRY_FLAG_DIRTY: usize = 0; + const ENTRY_FLAG_ACCESSED: usize = 1 << 5; + const ENTRY_FLAG_DIRTY: usize = 1 << 6; + const ENTRY_FLAG_HUGE_PAGE: usize = 1 << 7; + const ENTRY_FLAG_GLOBAL: usize = 1 << 8; /// 物理地址与虚拟地址的偏移量 /// 0xffff_8000_0000_0000 @@ -237,7 +241,7 @@ impl MemoryManagementArch for X86_64MMArch { }; // 复制内核的映射 - for pml4_entry_no in KERNEL_PML4E_NO..512 { + for pml4_entry_no in MMArch::PAGE_KERNEL_INDEX..MMArch::PAGE_ENTRY_NUM { copy_mapping(pml4_entry_no); } @@ -262,6 +266,9 @@ impl MemoryManagementArch for X86_64MMArch { const PAGE_ENTRY_MASK: usize = Self::PAGE_ENTRY_NUM - 1; + const PAGE_KERNEL_INDEX: usize = (Self::PHYS_OFFSET & Self::PAGE_ADDRESS_MASK) + >> (Self::PAGE_ADDRESS_SHIFT - Self::PAGE_ENTRY_SHIFT); + const PAGE_NEGATIVE_MASK: usize = !((Self::PAGE_ADDRESS_SIZE) - 1); const ENTRY_ADDRESS_SIZE: usize = 1 << Self::ENTRY_ADDRESS_SHIFT; @@ -302,6 +309,21 @@ impl MemoryManagementArch for X86_64MMArch { fn make_entry(paddr: PhysAddr, page_flags: usize) -> usize { return paddr.data() | page_flags; } + + fn vma_access_permitted( + vma: Arc, + write: bool, + execute: bool, + foreign: bool, + ) -> bool { + if execute { + return true; + } + if foreign | vma.is_foreign() { + return true; + } + pkru::pkru_allows_pkey(pkru::vma_pkey(vma), write) + } } impl X86_64MMArch { diff --git a/kernel/src/arch/x86_64/mm/pkru.rs b/kernel/src/arch/x86_64/mm/pkru.rs new file mode 100644 index 00000000..f467f8d1 --- /dev/null +++ b/kernel/src/arch/x86_64/mm/pkru.rs @@ -0,0 +1,56 @@ +use alloc::sync::Arc; + +use crate::mm::ucontext::LockedVMA; + +const VM_PKEY_SHIFT: usize = 32; + +/// X86_64架构的ProtectionKey使用32、33、34、35四个比特位 +const PKEY_MASK: usize = 1 << 32 | 1 << 33 | 1 << 34 | 1 << 35; + +/// 获取vma的protection_key +/// +/// ## 参数 +/// +/// - `vma`: VMA +/// +/// ## 返回值 +/// - `u16`: vma的protection_key +pub fn vma_pkey(vma: Arc) -> u16 { + let guard = vma.lock(); + ((guard.vm_flags().bits() & PKEY_MASK as u64) >> VM_PKEY_SHIFT) as u16 +} + +// TODO pkru实现参考:https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/pkru.h + +const PKRU_AD_BIT: u16 = 0x1; +const PKRU_WD_BIT: u16 = 0x2; +const PKRU_BITS_PER_PKEY: u32 = 2; + +pub fn pkru_allows_pkey(pkey: u16, write: bool) -> bool { + let pkru = read_pkru(); + + if !pkru_allows_read(pkru, pkey) { + return false; + } + if write & !pkru_allows_write(pkru, pkey) { + return false; + } + + true +} + +pub fn pkru_allows_read(pkru: u32, pkey: u16) -> bool { + let pkru_pkey_bits: u32 = pkey as u32 * PKRU_BITS_PER_PKEY; + pkru & ((PKRU_AD_BIT as u32) << pkru_pkey_bits) > 0 +} + +pub fn pkru_allows_write(pkru: u32, pkey: u16) -> bool { + let pkru_pkey_bits: u32 = pkey as u32 * PKRU_BITS_PER_PKEY; + pkru & (((PKRU_AD_BIT | PKRU_WD_BIT) as u32) << pkru_pkey_bits) > 0 +} + +pub fn read_pkru() -> u32 { + // TODO 实现读取pkru逻辑 + // https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/pkru.h?fi=read_pkru#read_pkru + 0 +} diff --git a/kernel/src/arch/x86_64/process/syscall.rs b/kernel/src/arch/x86_64/process/syscall.rs index ec488bbd..42a7f3ad 100644 --- a/kernel/src/arch/x86_64/process/syscall.rs +++ b/kernel/src/arch/x86_64/process/syscall.rs @@ -74,18 +74,26 @@ impl Syscall { param.init_info_mut().envs = envp; // 把proc_init_info写到用户栈上 - + let mut ustack_message = unsafe { + address_space + .write() + .user_stack_mut() + .expect("No user stack found") + .clone_info_only() + }; let (user_sp, argv_ptr) = unsafe { param .init_info() .push_at( - address_space - .write() - .user_stack_mut() - .expect("No user stack found"), + // address_space + // .write() + // .user_stack_mut() + // .expect("No user stack found"), + &mut ustack_message, ) .expect("Failed to push proc_init_info to user stack") }; + address_space.write().user_stack = Some(ustack_message); // kdebug!("write proc_init_info to user stack done"); diff --git a/kernel/src/libs/elf.rs b/kernel/src/libs/elf.rs index 2cb4f7e3..7469814c 100644 --- a/kernel/src/libs/elf.rs +++ b/kernel/src/libs/elf.rs @@ -131,6 +131,7 @@ impl ElfLoader { prot_flags, MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED_NOREPLACE, false, + true, ); if r.is_err() { kerror!("set_elf_brk: map_anonymous failed, err={:?}", r); @@ -256,7 +257,7 @@ impl ElfLoader { // kdebug!("total_size={}", total_size); map_addr = user_vm_guard - .map_anonymous(addr_to_map, total_size, tmp_prot, *map_flags, false) + .map_anonymous(addr_to_map, total_size, tmp_prot, *map_flags, false, true) .map_err(map_err_handler)? .virt_address(); // kdebug!("map ok: addr_to_map={:?}", addr_to_map); @@ -288,7 +289,7 @@ impl ElfLoader { // kdebug!("total size = 0"); map_addr = user_vm_guard - .map_anonymous(addr_to_map, map_size, tmp_prot, *map_flags, false)? + .map_anonymous(addr_to_map, map_size, tmp_prot, *map_flags, false, true)? .virt_address(); // kdebug!( // "map ok: addr_to_map={:?}, map_addr={map_addr:?},beginning_page_offset={beginning_page_offset:?}", diff --git a/kernel/src/mm/fault.rs b/kernel/src/mm/fault.rs new file mode 100644 index 00000000..9f045ac5 --- /dev/null +++ b/kernel/src/mm/fault.rs @@ -0,0 +1,395 @@ +use core::{alloc::Layout, intrinsics::unlikely, panic}; + +use alloc::sync::Arc; + +use crate::{ + arch::{mm::PageMapper, MMArch}, + mm::{ + page::{page_manager_lock_irqsave, PageFlags}, + ucontext::LockedVMA, + VirtAddr, VmFaultReason, VmFlags, + }, + process::{ProcessManager, ProcessState}, +}; + +use crate::mm::MemoryManagementArch; + +bitflags! { + pub struct FaultFlags: u64{ + const FAULT_FLAG_WRITE = 1 << 0; + const FAULT_FLAG_MKWRITE = 1 << 1; + const FAULT_FLAG_ALLOW_RETRY = 1 << 2; + const FAULT_FLAG_RETRY_NOWAIT = 1 << 3; + const FAULT_FLAG_KILLABLE = 1 << 4; + const FAULT_FLAG_TRIED = 1 << 5; + const FAULT_FLAG_USER = 1 << 6; + const FAULT_FLAG_REMOTE = 1 << 7; + const FAULT_FLAG_INSTRUCTION = 1 << 8; + const FAULT_FLAG_INTERRUPTIBLE =1 << 9; + const FAULT_FLAG_UNSHARE = 1 << 10; + const FAULT_FLAG_ORIG_PTE_VALID = 1 << 11; + const FAULT_FLAG_VMA_LOCK = 1 << 12; + } +} + +/// # 缺页异常信息结构体 +/// 包含了页面错误处理的相关信息,例如出错的地址、VMA等 +#[derive(Debug)] +pub struct PageFaultMessage { + vma: Arc, + address: VirtAddr, + flags: FaultFlags, +} + +impl PageFaultMessage { + pub fn new(vma: Arc, address: VirtAddr, flags: FaultFlags) -> Self { + Self { + vma: vma.clone(), + address, + flags, + } + } + + #[inline(always)] + #[allow(dead_code)] + pub fn vma(&self) -> Arc { + self.vma.clone() + } + + #[inline(always)] + #[allow(dead_code)] + pub fn address(&self) -> VirtAddr { + self.address + } + + #[inline(always)] + #[allow(dead_code)] + pub fn address_aligned_down(&self) -> VirtAddr { + VirtAddr::new(crate::libs::align::page_align_down(self.address.data())) + } + + #[inline(always)] + #[allow(dead_code)] + pub fn flags(&self) -> FaultFlags { + self.flags + } +} + +/// 缺页中断处理结构体 +pub struct PageFaultHandler; + +impl PageFaultHandler { + /// 处理缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + pub unsafe fn handle_mm_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + let flags = pfm.flags(); + let vma = pfm.vma(); + let current_pcb = ProcessManager::current_pcb(); + let mut guard = current_pcb.sched_info().inner_lock_write_irqsave(); + guard.set_state(ProcessState::Runnable); + + if !MMArch::vma_access_permitted( + vma.clone(), + flags.contains(FaultFlags::FAULT_FLAG_WRITE), + flags.contains(FaultFlags::FAULT_FLAG_INSTRUCTION), + flags.contains(FaultFlags::FAULT_FLAG_REMOTE), + ) { + return VmFaultReason::VM_FAULT_SIGSEGV; + } + + let guard = vma.lock(); + let vm_flags = *guard.vm_flags(); + drop(guard); + if unlikely(vm_flags.contains(VmFlags::VM_HUGETLB)) { + //TODO: 添加handle_hugetlb_fault处理大页缺页异常 + } else { + Self::handle_normal_fault(pfm, mapper); + } + + VmFaultReason::VM_FAULT_COMPLETED + } + + /// 处理普通页缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + pub unsafe fn handle_normal_fault( + pfm: PageFaultMessage, + mapper: &mut PageMapper, + ) -> VmFaultReason { + let address = pfm.address_aligned_down(); + let vma = pfm.vma.clone(); + if mapper.get_entry(address, 3).is_none() { + mapper + .allocate_table(address, 2) + .expect("failed to allocate PUD table"); + } + let page_flags = vma.lock().flags(); + + for level in 2..=3 { + let level = MMArch::PAGE_LEVELS - level; + if mapper.get_entry(address, level).is_none() { + if vma.is_hugepage() { + if vma.is_anonymous() { + mapper.map_huge_page(address, page_flags); + } + } else if mapper.allocate_table(address, level - 1).is_none() { + return VmFaultReason::VM_FAULT_OOM; + } + } + } + + Self::handle_pte_fault(pfm, mapper) + } + + /// 处理页表项异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + pub unsafe fn handle_pte_fault( + pfm: PageFaultMessage, + mapper: &mut PageMapper, + ) -> VmFaultReason { + let address = pfm.address_aligned_down(); + let flags = pfm.flags; + let vma = pfm.vma.clone(); + if let Some(mut entry) = mapper.get_entry(address, 0) { + if !entry.present() { + return Self::do_swap_page(pfm, mapper); + } + if entry.protnone() && vma.is_accessible() { + return Self::do_numa_page(pfm, mapper); + } + if flags.intersects(FaultFlags::FAULT_FLAG_WRITE | FaultFlags::FAULT_FLAG_UNSHARE) { + if !entry.write() { + return Self::do_wp_page(pfm, mapper); + } else { + entry.set_flags(PageFlags::from_data(MMArch::ENTRY_FLAG_DIRTY)); + } + } + } else if vma.is_anonymous() { + return Self::do_anonymous_page(pfm, mapper); + } else { + return Self::do_fault(pfm, mapper); + } + + VmFaultReason::VM_FAULT_COMPLETED + } + + /// 处理匿名映射页缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + pub unsafe fn do_anonymous_page( + pfm: PageFaultMessage, + mapper: &mut PageMapper, + ) -> VmFaultReason { + let address = pfm.address_aligned_down(); + let vma = pfm.vma.clone(); + let guard = vma.lock(); + if let Some(flush) = mapper.map(address, guard.flags()) { + flush.flush(); + crate::debug::klog::mm::mm_debug_log( + klog_types::AllocatorLogType::LazyAlloc(klog_types::AllocLogItem::new( + Layout::from_size_align(MMArch::PAGE_SIZE, MMArch::PAGE_SIZE).unwrap(), + Some(address.data()), + Some(mapper.translate(address).unwrap().0.data()), + )), + klog_types::LogSource::Buddy, + ); + let paddr = mapper.translate(address).unwrap().0; + let mut anon_vma_guard = page_manager_lock_irqsave(); + let page = anon_vma_guard.get_mut(&paddr); + page.insert_vma(vma.clone()); + VmFaultReason::VM_FAULT_COMPLETED + } else { + VmFaultReason::VM_FAULT_OOM + } + } + + /// 处理文件映射页的缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(unused_variables)] + pub unsafe fn do_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_fault has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_fault + } + + /// 处理私有文件映射的写时复制 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(dead_code, unused_variables)] + pub unsafe fn do_cow_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_cow_fault has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_cow_fault + } + + /// 处理文件映射页的缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(dead_code, unused_variables)] + pub unsafe fn do_read_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_read_fault has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_read_fault + } + + /// 处理对共享文件映射区写入引起的缺页 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(dead_code, unused_variables)] + pub unsafe fn do_shared_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_shared_fault has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_shared_fault + } + + /// 处理被置换页面的缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(unused_variables)] + pub unsafe fn do_swap_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_swap_page has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_swap_page + } + + /// 处理NUMA的缺页异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + #[allow(unused_variables)] + pub unsafe fn do_numa_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + panic!( + "do_numa_page has not yet been implemented, + fault message: {:?}, + pid: {}\n", + pfm, + crate::process::ProcessManager::current_pid().data() + ); + // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_numa_page + } + + /// 处理写保护页面的写保护异常 + /// ## 参数 + /// + /// - `pfm`: 缺页异常信息 + /// - `mapper`: 页表映射器 + /// + /// ## 返回值 + /// - VmFaultReason: 页面错误处理信息标志 + pub unsafe fn do_wp_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason { + let address = pfm.address_aligned_down(); + let vma = pfm.vma.clone(); + let old_paddr = mapper.translate(address).unwrap().0; + let mut page_manager = page_manager_lock_irqsave(); + let map_count = page_manager.get_mut(&old_paddr).map_count(); + drop(page_manager); + + let mut entry = mapper.get_entry(address, 0).unwrap(); + let new_flags = entry.flags().set_write(true); + + if map_count == 1 { + let table = mapper.get_table(address, 0).unwrap(); + let i = table.index_of(address).unwrap(); + entry.set_flags(new_flags); + table.set_entry(i, entry); + VmFaultReason::VM_FAULT_COMPLETED + } else if let Some(flush) = mapper.map(address, new_flags) { + let mut page_manager = page_manager_lock_irqsave(); + let old_page = page_manager.get_mut(&old_paddr); + old_page.remove_vma(&vma); + drop(page_manager); + + flush.flush(); + let paddr = mapper.translate(address).unwrap().0; + let mut anon_vma_guard = page_manager_lock_irqsave(); + let page = anon_vma_guard.get_mut(&paddr); + page.insert_vma(vma.clone()); + + (MMArch::phys_2_virt(paddr).unwrap().data() as *mut u8).copy_from_nonoverlapping( + MMArch::phys_2_virt(old_paddr).unwrap().data() as *mut u8, + MMArch::PAGE_SIZE, + ); + + VmFaultReason::VM_FAULT_COMPLETED + } else { + VmFaultReason::VM_FAULT_OOM + } + } +} diff --git a/kernel/src/mm/madvise.rs b/kernel/src/mm/madvise.rs new file mode 100644 index 00000000..5e9587a4 --- /dev/null +++ b/kernel/src/mm/madvise.rs @@ -0,0 +1,84 @@ +use system_error::SystemError; + +use crate::arch::{mm::PageMapper, MMArch}; + +use super::{page::Flusher, syscall::MadvFlags, ucontext::LockedVMA, VmFlags}; + +impl LockedVMA { + pub fn do_madvise( + &self, + behavior: MadvFlags, + _mapper: &mut PageMapper, + _flusher: impl Flusher, + ) -> Result<(), SystemError> { + //TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/madvise.c?fi=madvise#do_madvise + let mut vma = self.lock(); + let mut new_flags = *vma.vm_flags(); + match behavior { + MadvFlags::MADV_REMOVE => { + // TODO + } + + MadvFlags::MADV_WILLNEED => { + // TODO + } + + MadvFlags::MADV_COLD => { + // TODO + } + + MadvFlags::MADV_PAGEOUT => { + // TODO + } + + MadvFlags::MADV_FREE => { + // TODO + } + + MadvFlags::MADV_POPULATE_READ | MadvFlags::MADV_POPULATE_WRITE => { + // TODO + } + + MadvFlags::MADV_NORMAL => { + new_flags = new_flags & !VmFlags::VM_RAND_READ & !VmFlags::VM_SEQ_READ + } + + MadvFlags::MADV_SEQUENTIAL => { + new_flags = (new_flags & !VmFlags::VM_RAND_READ) | VmFlags::VM_SEQ_READ + } + MadvFlags::MADV_RANDOM => { + new_flags = (new_flags & !VmFlags::VM_SEQ_READ) | VmFlags::VM_RAND_READ + } + + MadvFlags::MADV_DONTFORK => new_flags |= VmFlags::VM_DONTCOPY, + + MadvFlags::MADV_DOFORK => { + if vma.vm_flags().contains(VmFlags::VM_IO) { + return Err(SystemError::EINVAL); + } + new_flags &= !VmFlags::VM_DONTCOPY; + } + + MadvFlags::MADV_WIPEONFORK => { + //MADV_WIPEONFORK仅支持匿名映射,后续实现其他映射方式后要在此处添加判断条件 + new_flags |= VmFlags::VM_WIPEONFORK; + } + + MadvFlags::MADV_KEEPONFORK => new_flags &= !VmFlags::VM_WIPEONFORK, + + MadvFlags::MADV_DONTDUMP => new_flags |= VmFlags::VM_DONTDUMP, + + //MADV_DODUMP不支持巨页映射,后续需要添加判断条件 + MadvFlags::MADV_DODUMP => new_flags &= !VmFlags::VM_DONTDUMP, + + MadvFlags::MADV_MERGEABLE | MadvFlags::MADV_UNMERGEABLE => {} + + MadvFlags::MADV_HUGEPAGE | MadvFlags::MADV_NOHUGEPAGE => {} + + MadvFlags::MADV_COLLAPSE => {} + _ => {} + } + vma.set_vm_flags(new_flags); + Ok(()) + } +} diff --git a/kernel/src/mm/mod.rs b/kernel/src/mm/mod.rs index c2dae170..b37ba5a6 100644 --- a/kernel/src/mm/mod.rs +++ b/kernel/src/mm/mod.rs @@ -16,14 +16,16 @@ use self::{ allocator::page_frame::{VirtPageFrame, VirtPageFrameIter}, memblock::MemoryAreaAttr, page::round_up_to_page_size, - ucontext::{AddressSpace, UserMapper}, + ucontext::{AddressSpace, LockedVMA, UserMapper}, }; pub mod allocator; pub mod c_adapter; pub mod early_ioremap; +pub mod fault; pub mod init; pub mod kernel_mapper; +pub mod madvise; pub mod memblock; pub mod mmio_buddy; pub mod no_init; @@ -38,7 +40,7 @@ static mut __IDLE_PROCESS_ADDRESS_SPACE: Option> = None; bitflags! { /// Virtual memory flags #[allow(clippy::bad_bit_mask)] - pub struct VmFlags:u32{ + pub struct VmFlags:u64{ const VM_NONE = 0x00000000; const VM_READ = 0x00000001; @@ -73,6 +75,25 @@ bitflags! { const VM_WIPEONFORK = 0x02000000; const VM_DONTDUMP = 0x04000000; } + + /// 描述页面错误处理过程中发生的不同情况或结果 + pub struct VmFaultReason:u32 { + const VM_FAULT_OOM = 0x000001; + const VM_FAULT_SIGBUS = 0x000002; + const VM_FAULT_MAJOR = 0x000004; + const VM_FAULT_WRITE = 0x000008; + const VM_FAULT_HWPOISON = 0x000010; + const VM_FAULT_HWPOISON_LARGE = 0x000020; + const VM_FAULT_SIGSEGV = 0x000040; + const VM_FAULT_NOPAGE = 0x000100; + const VM_FAULT_LOCKED = 0x000200; + const VM_FAULT_RETRY = 0x000400; + const VM_FAULT_FALLBACK = 0x000800; + const VM_FAULT_DONE_COW = 0x001000; + const VM_FAULT_NEEDDSYNC = 0x002000; + const VM_FAULT_COMPLETED = 0x004000; + const VM_FAULT_HINDEX_MASK = 0x0f0000; + } } /// 获取内核IDLE进程的用户地址空间结构体 @@ -407,6 +428,8 @@ impl Default for PhysMemoryArea { } pub trait MemoryManagementArch: Clone + Copy + Debug { + /// 是否支持缺页中断 + const PAGE_FAULT_ENABLED: bool; /// 页面大小的shift(假如页面4K,那么这个值就是12,因为2^12=4096) const PAGE_SHIFT: usize; /// 每个页表的页表项数目。(以2^n次幂来表示)假如有512个页表项,那么这个值就是9 @@ -440,6 +463,10 @@ pub trait MemoryManagementArch: Clone + Copy + Debug { const ENTRY_FLAG_DIRTY: usize; /// 当该位为1时,代表这个页面被处理器访问过 const ENTRY_FLAG_ACCESSED: usize; + /// 标记该页表项指向的页是否为大页 + const ENTRY_FLAG_HUGE_PAGE: usize; + /// 当该位为1时,代表该页表项是全局的 + const ENTRY_FLAG_GLOBAL: usize; /// 虚拟地址与物理地址的偏移量 const PHYS_OFFSET: usize; @@ -468,6 +495,9 @@ pub trait MemoryManagementArch: Clone + Copy + Debug { const PAGE_ENTRY_NUM: usize = 1 << Self::PAGE_ENTRY_SHIFT; /// 该字段用于根据虚拟地址,获取该虚拟地址在对应的页表中是第几个页表项 const PAGE_ENTRY_MASK: usize = Self::PAGE_ENTRY_NUM - 1; + /// 内核页表在顶级页表的第一个页表项的索引 + const PAGE_KERNEL_INDEX: usize = (Self::PHYS_OFFSET & Self::PAGE_ADDRESS_MASK) + >> (Self::PAGE_ADDRESS_SHIFT - Self::PAGE_ENTRY_SHIFT); const PAGE_NEGATIVE_MASK: usize = !((Self::PAGE_ADDRESS_SIZE) - 1); @@ -589,6 +619,27 @@ pub trait MemoryManagementArch: Clone + Copy + Debug { /// /// 页表项的值 fn make_entry(paddr: PhysAddr, page_flags: usize) -> usize; + + /// 判断一个VMA是否允许访问 + /// + /// ## 参数 + /// + /// - `vma`: 进行判断的VMA + /// - `write`: 是否需要写入权限(true 表示需要写权限) + /// - `execute`: 是否需要执行权限(true 表示需要执行权限) + /// - `foreign`: 是否是外部的(即非当前进程的)VMA + /// + /// ## 返回值 + /// - `true`: VMA允许访问 + /// - `false`: 错误的说明 + fn vma_access_permitted( + _vma: Arc, + _write: bool, + _execute: bool, + _foreign: bool, + ) -> bool { + true + } } /// @brief 虚拟地址范围 diff --git a/kernel/src/mm/page.rs b/kernel/src/mm/page.rs index bc203c58..5785ed4e 100644 --- a/kernel/src/mm/page.rs +++ b/kernel/src/mm/page.rs @@ -13,12 +13,14 @@ use crate::{ arch::{interrupt::ipi::send_ipi, MMArch}, exception::ipi::{IpiKind, IpiTarget}, ipc::shm::ShmId, - kerror, kwarn, + kerror, libs::spinlock::{SpinLock, SpinLockGuard}, }; use super::{ - allocator::page_frame::FrameAllocator, syscall::ProtFlags, ucontext::LockedVMA, + allocator::page_frame::{FrameAllocator, PageFrameCount}, + syscall::ProtFlags, + ucontext::LockedVMA, MemoryManagementArch, PageTableKind, PhysAddr, VirtAddr, }; @@ -70,7 +72,9 @@ impl PageManager { } pub fn get_mut(&mut self, paddr: &PhysAddr) -> &mut Page { - self.phys2page.get_mut(paddr).unwrap() + self.phys2page + .get_mut(paddr) + .unwrap_or_else(|| panic!("{:?}", paddr)) } pub fn insert(&mut self, paddr: PhysAddr, page: Page) { @@ -141,9 +145,15 @@ impl Page { self.free_when_zero = dealloc_when_zero; } + #[inline(always)] pub fn anon_vma(&self) -> &HashSet> { &self.anon_vma } + + #[inline(always)] + pub fn map_count(&self) -> usize { + self.map_count + } } #[derive(Debug)] @@ -265,7 +275,7 @@ impl PageTable { /// ## 返回值 /// /// 页表项在页表中的下标。如果addr不在当前页表所表示的虚拟地址空间中,则返回None - pub unsafe fn index_of(&self, addr: VirtAddr) -> Option { + pub fn index_of(&self, addr: VirtAddr) -> Option { let addr = VirtAddr::new(addr.data() & Arch::PAGE_ADDRESS_MASK); let shift = self.level * Arch::PAGE_ENTRY_SHIFT + Arch::PAGE_SHIFT; @@ -290,6 +300,61 @@ impl PageTable { self.level - 1, )); } + + /// 拷贝页表 + /// ## 参数 + /// + /// - `allocator`: 物理页框分配器 + /// - `copy_on_write`: 是否写时复制 + pub unsafe fn clone( + &self, + allocator: &mut impl FrameAllocator, + copy_on_write: bool, + ) -> Option> { + // 分配新页面作为新的页表 + let phys = allocator.allocate_one()?; + let frame = MMArch::phys_2_virt(phys).unwrap(); + MMArch::write_bytes(frame, 0, MMArch::PAGE_SIZE); + let new_table = PageTable::new(self.base, phys, self.level); + if self.level == 0 { + for i in 0..Arch::PAGE_ENTRY_NUM { + if let Some(mut entry) = self.entry(i) { + if entry.present() { + if copy_on_write { + let mut new_flags = entry.flags().set_write(false); + entry.set_flags(new_flags); + self.set_entry(i, entry); + new_flags = new_flags.set_dirty(false); + entry.set_flags(new_flags); + new_table.set_entry(i, entry); + } else { + let phys = allocator.allocate_one()?; + let mut anon_vma_guard = page_manager_lock_irqsave(); + anon_vma_guard.insert(phys, Page::new(false)); + let old_phys = entry.address().unwrap(); + let frame = MMArch::phys_2_virt(phys).unwrap().data() as *mut u8; + frame.copy_from_nonoverlapping( + MMArch::phys_2_virt(old_phys).unwrap().data() as *mut u8, + MMArch::PAGE_SIZE, + ); + new_table.set_entry(i, PageEntry::new(phys, entry.flags())); + } + } + } + } + } else { + // 非一级页表拷贝时,对每个页表项对应的页表都进行拷贝 + for i in 0..MMArch::PAGE_ENTRY_NUM { + if let Some(next_table) = self.next_level_table(i) { + let table = next_table.clone(allocator, copy_on_write)?; + let old_entry = self.entry(i).unwrap(); + let entry = PageEntry::new(table.phys(), old_entry.flags()); + new_table.set_entry(i, entry); + } + } + } + Some(new_table) + } } /// 页表项 @@ -368,6 +433,22 @@ impl PageEntry { pub fn present(&self) -> bool { return self.data & Arch::ENTRY_FLAG_PRESENT != 0; } + + #[inline(always)] + pub fn empty(&self) -> bool { + self.data & !(Arch::ENTRY_FLAG_DIRTY & Arch::ENTRY_FLAG_ACCESSED) == 0 + } + + #[inline(always)] + pub fn protnone(&self) -> bool { + return self.data & (Arch::ENTRY_FLAG_PRESENT | Arch::ENTRY_FLAG_GLOBAL) + == Arch::ENTRY_FLAG_GLOBAL; + } + + #[inline(always)] + pub fn write(&self) -> bool { + return self.data & Arch::ENTRY_FLAG_READWRITE != 0; + } } /// 页表项的标志位 @@ -605,6 +686,36 @@ impl PageFlags { return self.has_flag(Arch::ENTRY_FLAG_WRITE_THROUGH); } + /// 设置当前页表是否为脏页 + /// + /// ## 参数 + /// + /// - value: 如果为true,那么将当前页表项的写穿策略设置为写穿。 + #[inline(always)] + pub fn set_dirty(self, value: bool) -> Self { + return self.update_flags(Arch::ENTRY_FLAG_DIRTY, value); + } + + /// 设置当前页表被访问 + /// + /// ## 参数 + /// + /// - value: 如果为true,那么将当前页表项的访问标志设置为已访问。 + #[inline(always)] + pub fn set_access(self, value: bool) -> Self { + return self.update_flags(Arch::ENTRY_FLAG_ACCESSED, value); + } + + /// 设置指向的页是否为大页 + /// + /// ## 参数 + /// + /// - value: 如果为true,那么将当前页表项的访问标志设置为已访问。 + #[inline(always)] + pub fn set_huge_page(self, value: bool) -> Self { + return self.update_flags(Arch::ENTRY_FLAG_HUGE_PAGE, value); + } + /// MMIO内存的页表项标志 #[inline(always)] pub fn mmio_flags() -> Self { @@ -758,12 +869,6 @@ impl PageMapper { let i = table.index_of(virt)?; assert!(i < Arch::PAGE_ENTRY_NUM); if table.level() == 0 { - // todo: 检查是否已经映射 - // 现在不检查的原因是,刚刚启动系统时,内核会映射一些页。 - if table.entry_mapped(i)? { - kwarn!("Page {:?} already mapped", virt); - } - compiler_fence(Ordering::SeqCst); table.set_entry(i, entry); @@ -797,6 +902,173 @@ impl PageMapper { } } + /// 进行大页映射 + pub unsafe fn map_huge_page( + &mut self, + virt: VirtAddr, + flags: PageFlags, + ) -> Option> { + // 验证虚拟地址是否对齐 + if !(virt.check_aligned(Arch::PAGE_SIZE)) { + kerror!("Try to map unaligned page: virt={:?}", virt); + return None; + } + + let virt = VirtAddr::new(virt.data() & (!Arch::PAGE_NEGATIVE_MASK)); + + let mut table = self.table(); + loop { + let i = table.index_of(virt)?; + assert!(i < Arch::PAGE_ENTRY_NUM); + let next_table = table.next_level_table(i); + if let Some(next_table) = next_table { + table = next_table; + } else { + break; + } + } + + // 支持2M、1G大页,即页表层级为1、2级的页表可以映射大页 + if table.level == 0 || table.level > 2 { + return None; + } + + let (phys, count) = self.frame_allocator.allocate(PageFrameCount::new( + Arch::PAGE_ENTRY_NUM.pow(table.level as u32), + ))?; + + MMArch::write_bytes( + MMArch::phys_2_virt(phys).unwrap(), + 0, + MMArch::PAGE_SIZE * count.data(), + ); + + table.set_entry( + table.index_of(virt)?, + PageEntry::new(phys, flags.set_huge_page(true)), + )?; + Some(PageFlush::new(virt)) + } + + /// 为虚拟地址分配指定层级的页表 + /// ## 参数 + /// + /// - `virt`: 虚拟地址 + /// - `level`: 指定页表层级 + /// + /// ## 返回值 + /// - Some(PageTable): 虚拟地址对应层级的页表 + /// - None: 对应页表不存在 + pub unsafe fn allocate_table( + &mut self, + virt: VirtAddr, + level: usize, + ) -> Option> { + let table = self.get_table(virt, level + 1)?; + let i = table.index_of(virt)?; + let frame = self.frame_allocator.allocate_one()?; + + // 清空这个页帧 + MMArch::write_bytes(MMArch::phys_2_virt(frame).unwrap(), 0, MMArch::PAGE_SIZE); + + // 设置页表项的flags + let flags: PageFlags = PageFlags::new_page_table(virt.kind() == PageTableKind::User); + + table.set_entry(i, PageEntry::new(frame, flags)); + table.next_level_table(i) + } + + /// 获取虚拟地址的指定层级页表 + /// ## 参数 + /// + /// - `virt`: 虚拟地址 + /// - `level`: 指定页表层级 + /// + /// ## 返回值 + /// - Some(PageTable): 虚拟地址对应层级的页表 + /// - None: 对应页表不存在 + pub fn get_table(&self, virt: VirtAddr, level: usize) -> Option> { + let mut table = self.table(); + if level > Arch::PAGE_LEVELS - 1 { + return None; + } + + unsafe { + loop { + if table.level == level { + return Some(table); + } + let i = table.index_of(virt)?; + assert!(i < Arch::PAGE_ENTRY_NUM); + + table = table.next_level_table(i)?; + } + } + } + + /// 获取虚拟地址在指定层级页表的PageEntry + /// ## 参数 + /// + /// - `virt`: 虚拟地址 + /// - `level`: 指定页表层级 + /// + /// ## 返回值 + /// - Some(PageEntry): 虚拟地址在指定层级的页表的有效PageEntry + /// - None: 无对应的有效PageEntry + pub fn get_entry(&self, virt: VirtAddr, level: usize) -> Option> { + let table = self.get_table(virt, level)?; + let i = table.index_of(virt)?; + let entry = unsafe { table.entry(i) }?; + + if !entry.empty() { + Some(entry) + } else { + None + } + + // let mut table = self.table(); + // if level > Arch::PAGE_LEVELS - 1 { + // return None; + // } + // unsafe { + // loop { + // let i = table.index_of(virt)?; + // assert!(i < Arch::PAGE_ENTRY_NUM); + + // if table.level == level { + // let entry = table.entry(i)?; + // if !entry.empty() { + // return Some(entry); + // } else { + // return None; + // } + // } + + // table = table.next_level_table(i)?; + // } + // } + } + + /// 拷贝用户空间映射 + /// ## 参数 + /// + /// - `umapper`: 要拷贝的用户空间 + /// - `copy_on_write`: 是否写时复制 + pub unsafe fn clone_user_mapping(&mut self, umapper: &mut Self, copy_on_write: bool) { + let old_table = umapper.table(); + let new_table = self.table(); + let allocator = self.allocator_mut(); + // 顶级页表的[0, PAGE_KERNEL_INDEX)项为用户空间映射 + for entry_index in 0..Arch::PAGE_KERNEL_INDEX { + if let Some(next_table) = old_table.next_level_table(entry_index) { + let table = next_table.clone(allocator, copy_on_write).unwrap(); + let old_entry = old_table.entry(entry_index).unwrap(); + let entry = PageEntry::new(table.phys(), old_entry.flags()); + new_table.set_entry(entry_index, entry); + } + } + } + /// 将物理地址映射到具有线性偏移量的虚拟地址 #[allow(dead_code)] pub unsafe fn map_linearly( diff --git a/kernel/src/mm/syscall.rs b/kernel/src/mm/syscall.rs index 743db840..99997ee5 100644 --- a/kernel/src/mm/syscall.rs +++ b/kernel/src/mm/syscall.rs @@ -72,6 +72,70 @@ bitflags! { const MREMAP_FIXED = 2; const MREMAP_DONTUNMAP = 4; } + + + pub struct MadvFlags: u64 { + /// 默认行为,系统会进行一定的预读和预写,适用于一般读取场景 + const MADV_NORMAL = 0; + /// 随机访问模式,系统会尽量最小化数据读取量,适用于随机访问的场景 + const MADV_RANDOM = 1; + /// 顺序访问模式,系统会进行积极的预读,访问后的页面可以尽快释放,适用于顺序读取场景 + const MADV_SEQUENTIAL = 2; + /// 通知系统预读某些页面,用于应用程序提前准备数据 + const MADV_WILLNEED = 3; + /// 通知系统应用程序不再需要某些页面,内核可以释放相关资源 + const MADV_DONTNEED = 4; + + /// 将指定范围的页面标记为延迟释放,真正的释放会延迟至内存压力发生时 + const MADV_FREE = 8; + /// 应用程序请求释放指定范围的页面和相关的后备存储 + const MADV_REMOVE = 9; + /// 在 fork 时排除指定区域 + const MADV_DONTFORK = 10; + /// 取消 MADV_DONTFORK 的效果,不再在 fork 时排除指定区域 + const MADV_DOFORK = 11; + /// 模拟内存硬件错误,触发内存错误处理器处理 + const MADV_HWPOISON = 100; + /// 尝试软下线指定的内存范围 + const MADV_SOFT_OFFLINE = 101; + + /// 应用程序建议内核尝试合并指定范围内内容相同的页面 + const MADV_MERGEABLE = 12; + /// 取消 MADV_MERGEABLE 的效果,不再合并页面 + const MADV_UNMERGEABLE = 13; + + /// 应用程序希望将指定范围以透明大页方式支持 + const MADV_HUGEPAGE = 14; + /// 将指定范围标记为不值得用透明大页支持 + const MADV_NOHUGEPAGE = 15; + + /// 应用程序请求在核心转储时排除指定范围内的页面 + const MADV_DONTDUMP = 16; + /// 取消 MADV_DONTDUMP 的效果,不再排除核心转储时的页面 + const MADV_DODUMP = 17; + + /// 在 fork 时将子进程的该区域内存填充为零 + const MADV_WIPEONFORK = 18; + /// 取消 `MADV_WIPEONFORK` 的效果,不再在 fork 时填充子进程的内存 + const MADV_KEEPONFORK = 19; + + /// 应用程序不会立刻使用这些内存,内核将页面设置为非活动状态以便在内存压力发生时轻松回收 + const MADV_COLD = 20; + /// 应用程序不会立刻使用这些内存,内核立即将这些页面换出 + const MADV_PAGEOUT = 21; + + /// 预先填充页面表,可读,通过触发读取故障 + const MADV_POPULATE_READ = 22; + /// 预先填充页面表,可写,通过触发写入故障 + const MADV_POPULATE_WRITE = 23; + + /// 与 `MADV_DONTNEED` 类似,会将被锁定的页面释放 + const MADV_DONTNEED_LOCKED = 24; + + /// 同步将页面合并为新的透明大页 + const MADV_COLLAPSE = 25; + + } } impl From for VmFlags { @@ -265,6 +329,7 @@ impl Syscall { prot_flags, map_flags, true, + true, )?; return Ok(start_page.virt_address().data()); } @@ -423,4 +488,40 @@ impl Syscall { .map_err(|_| SystemError::EINVAL)?; return Ok(0); } + + /// ## madvise系统调用 + /// + /// ## 参数 + /// + /// - `start_vaddr`:起始地址(已经对齐到页) + /// - `len`:长度(已经对齐到页) + /// - `madv_flags`:建议标志 + pub fn madvise( + start_vaddr: VirtAddr, + len: usize, + madv_flags: usize, + ) -> Result { + if !start_vaddr.check_aligned(MMArch::PAGE_SIZE) || !check_aligned(len, MMArch::PAGE_SIZE) { + return Err(SystemError::EINVAL); + } + + if unlikely(verify_area(start_vaddr, len).is_err()) { + return Err(SystemError::EINVAL); + } + if unlikely(len == 0) { + return Err(SystemError::EINVAL); + } + + let madv_flags = MadvFlags::from_bits(madv_flags as u64).ok_or(SystemError::EINVAL)?; + + let current_address_space: Arc = AddressSpace::current()?; + let start_frame = VirtPageFrame::new(start_vaddr); + let page_count = PageFrameCount::new(len / MMArch::PAGE_SIZE); + + current_address_space + .write() + .madvise(start_frame, page_count, madv_flags) + .map_err(|_| SystemError::EINVAL)?; + return Ok(0); + } } diff --git a/kernel/src/mm/ucontext.rs b/kernel/src/mm/ucontext.rs index a26beee6..49c3408b 100644 --- a/kernel/src/mm/ucontext.rs +++ b/kernel/src/mm/ucontext.rs @@ -22,7 +22,7 @@ use crate::{ exception::InterruptArch, libs::{ align::page_align_up, - rwlock::{RwLock, RwLockWriteGuard}, + rwlock::RwLock, spinlock::{SpinLock, SpinLockGuard}, }, mm::page::page_manager_lock_irqsave, @@ -35,7 +35,7 @@ use super::{ deallocate_page_frames, PageFrameCount, PhysPageFrame, VirtPageFrame, VirtPageFrameIter, }, page::{Flusher, InactiveFlusher, PageFlags, PageFlushAll}, - syscall::{MapFlags, MremapFlags, ProtFlags}, + syscall::{MadvFlags, MapFlags, MremapFlags, ProtFlags}, MemoryManagementArch, PageTableKind, VirtAddr, VirtRegion, VmFlags, }; @@ -160,6 +160,11 @@ impl InnerAddressSpace { let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; let new_addr_space = AddressSpace::new(false)?; let mut new_guard = new_addr_space.write(); + unsafe { + new_guard + .user_mapper + .clone_from(&mut self.user_mapper, MMArch::PAGE_FAULT_ENABLED) + }; // 拷贝用户栈的结构体信息,但是不拷贝用户栈的内容(因为后面VMA的拷贝会拷贝用户栈的内容) unsafe { @@ -167,8 +172,6 @@ impl InnerAddressSpace { } let _current_stack_size = self.user_stack.as_ref().unwrap().stack_size(); - let current_mapper = &mut self.user_mapper.utable; - // 拷贝空洞 new_guard.mappings.vm_holes = self.mappings.vm_holes.clone(); @@ -176,55 +179,23 @@ impl InnerAddressSpace { // TODO: 增加对VMA是否为文件映射的判断,如果是的话,就跳过 let vma_guard: SpinLockGuard<'_, VMA> = vma.lock(); - let old_flags = vma_guard.flags(); - let tmp_flags: PageFlags = PageFlags::new().set_write(true); - // 分配内存页并创建新的VMA - let new_vma = VMA::zeroed( - VirtPageFrame::new(vma_guard.region.start()), - PageFrameCount::new(vma_guard.region.size() / MMArch::PAGE_SIZE), - *vma_guard.vm_flags(), - tmp_flags, - &mut new_guard.user_mapper.utable, - (), - )?; + // 仅拷贝VMA信息并添加反向映射,因为UserMapper克隆时已经分配了新的物理页 + let new_vma = LockedVMA::new(vma_guard.clone_info_only()); new_guard.mappings.vmas.insert(new_vma.clone()); // kdebug!("new vma: {:x?}", new_vma); - let mut new_vma_guard = new_vma.lock(); + let new_vma_guard = new_vma.lock(); + let new_mapper = &new_guard.user_mapper.utable; + let mut anon_vma_guard = page_manager_lock_irqsave(); for page in new_vma_guard.pages().map(|p| p.virt_address()) { - // kdebug!("page: {:x?}", page); - let current_frame = unsafe { - MMArch::phys_2_virt( - current_mapper - .translate(page) - .expect("VMA page not mapped") - .0, - ) - } - .expect("Phys2Virt: vaddr overflow.") - .data() as *mut u8; - - let new_frame = unsafe { - MMArch::phys_2_virt( - new_guard - .user_mapper - .utable - .translate(page) - .expect("VMA page not mapped") - .0, - ) - } - .expect("Phys2Virt: vaddr overflow.") - .data() as *mut u8; - - unsafe { - // 拷贝数据 - new_frame.copy_from_nonoverlapping(current_frame, MMArch::PAGE_SIZE); + if let Some((paddr, _)) = new_mapper.translate(page) { + let page = anon_vma_guard.get_mut(&paddr); + page.insert_vma(new_vma.clone()); } } - drop(vma_guard); - new_vma_guard.remap(old_flags, &mut new_guard.user_mapper.utable, ())?; + drop(anon_vma_guard); + drop(vma_guard); drop(new_vma_guard); } drop(new_guard); @@ -232,6 +203,24 @@ impl InnerAddressSpace { return Ok(new_addr_space); } + /// 拓展用户栈 + /// ## 参数 + /// + /// - `bytes`: 拓展大小 + #[allow(dead_code)] + pub fn extend_stack(&mut self, mut bytes: usize) -> Result<(), SystemError> { + // kdebug!("extend user stack"); + let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC; + let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_GROWSDOWN; + let stack = self.user_stack.as_mut().unwrap(); + + bytes = page_align_up(bytes); + stack.mapped_size += bytes; + let len = stack.stack_bottom - stack.mapped_size; + self.map_anonymous(len, bytes, prot_flags, map_flags, false, false)?; + return Ok(()); + } + /// 判断当前的地址空间是否是当前进程的地址空间 #[inline] pub fn is_current(&self) -> bool { @@ -247,6 +236,7 @@ impl InnerAddressSpace { /// - `prot_flags`:保护标志 /// - `map_flags`:映射标志 /// - `round_to_min`:是否将`start_vaddr`对齐到`mmap_min`,如果为`true`,则当`start_vaddr`不为0时,会对齐到`mmap_min`,否则仅向下对齐到页边界 + /// - `allocate_at_once`:是否立即分配物理空间 /// /// ## 返回 /// @@ -258,7 +248,13 @@ impl InnerAddressSpace { prot_flags: ProtFlags, map_flags: MapFlags, round_to_min: bool, + allocate_at_once: bool, ) -> Result { + let allocate_at_once = if MMArch::PAGE_FAULT_ENABLED { + allocate_at_once + } else { + true + }; // 用于对齐hint的函数 let round_hint_to_min = |hint: VirtAddr| { // 先把hint向下对齐到页边界 @@ -286,15 +282,38 @@ impl InnerAddressSpace { // kdebug!("map_anonymous: len = {}", len); - let start_page: VirtPageFrame = self.mmap( - round_hint_to_min(start_vaddr), - PageFrameCount::from_bytes(len).unwrap(), - prot_flags, - map_flags, - move |page, count, flags, mapper, flusher| { - VMA::zeroed(page, count, vm_flags, flags, mapper, flusher) - }, - )?; + let start_page: VirtPageFrame = if allocate_at_once { + self.mmap( + round_hint_to_min(start_vaddr), + PageFrameCount::from_bytes(len).unwrap(), + prot_flags, + map_flags, + move |page, count, flags, mapper, flusher| { + VMA::zeroed(page, count, vm_flags, flags, mapper, flusher) + }, + )? + } else { + self.mmap( + round_hint_to_min(start_vaddr), + PageFrameCount::from_bytes(len).unwrap(), + prot_flags, + map_flags, + move |page, count, flags, _mapper, _flusher| { + Ok(LockedVMA::new(VMA { + region: VirtRegion::new( + page.virt_address(), + count.data() * MMArch::PAGE_SIZE, + ), + vm_flags, + flags, + mapped: true, + user_address_space: None, + self_ref: Weak::default(), + provider: Provider::Allocated, + })) + }, + )? + }; return Ok(start_page); } @@ -428,7 +447,7 @@ impl InnerAddressSpace { } // 获取映射后的新内存页面 - let new_page = self.map_anonymous(new_vaddr, new_len, prot_flags, map_flags, true)?; + let new_page = self.map_anonymous(new_vaddr, new_len, prot_flags, map_flags, true, true)?; let new_page_vaddr = new_page.virt_address(); // 拷贝旧内存区域内容到新内存区域 @@ -556,6 +575,47 @@ impl InnerAddressSpace { return Ok(()); } + pub fn madvise( + &mut self, + start_page: VirtPageFrame, + page_count: PageFrameCount, + behavior: MadvFlags, + ) -> Result<(), SystemError> { + let (mut active, mut inactive); + let mut flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + + let mapper = &mut self.user_mapper.utable; + + let region = VirtRegion::new(start_page.virt_address(), page_count.bytes()); + let regions = self.mappings.conflicts(region).collect::>(); + + for r in regions { + let r = *r.lock().region(); + let r = self.mappings.remove_vma(&r).unwrap(); + + let intersection = r.lock().region().intersect(®ion).unwrap(); + let split_result = r + .extract(intersection, mapper) + .expect("Failed to extract VMA"); + + if let Some(before) = split_result.prev { + self.mappings.insert_vma(before); + } + if let Some(after) = split_result.after { + self.mappings.insert_vma(after); + } + r.do_madvise(behavior, mapper, &mut flusher)?; + self.mappings.insert_vma(r); + } + Ok(()) + } + /// 创建新的用户栈 /// /// ## 参数 @@ -605,7 +665,7 @@ impl InnerAddressSpace { let len = new_brk - self.brk; let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC; let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED; - self.map_anonymous(old_brk, len, prot_flags, map_flags, true)?; + self.map_anonymous(old_brk, len, prot_flags, map_flags, true, false)?; self.brk = new_brk; return Ok(old_brk); @@ -658,6 +718,16 @@ impl UserMapper { pub fn new(utable: PageMapper) -> Self { return Self { utable }; } + + /// 拷贝用户空间映射 + /// ## 参数 + /// + /// - `umapper`: 要拷贝的用户空间 + /// - `copy_on_write`: 是否写时复制 + pub unsafe fn clone_from(&mut self, umapper: &mut Self, copy_on_write: bool) { + self.utable + .clone_user_mapping(&mut umapper.utable, copy_on_write); + } } impl Drop for UserMapper { @@ -710,6 +780,35 @@ impl UserMappings { return None; } + /// 向下寻找距离虚拟地址最近的VMA + /// ## 参数 + /// + /// - `vaddr`: 虚拟地址 + /// + /// ## 返回值 + /// - Some(Arc): 虚拟地址所在的或最近的下一个VMA + /// - None: 未找到VMA + #[allow(dead_code)] + pub fn find_nearest(&self, vaddr: VirtAddr) -> Option> { + let mut nearest: Option> = None; + for v in self.vmas.iter() { + let guard = v.lock(); + if guard.region.contains(vaddr) { + return Some(v.clone()); + } + if guard.region.start > vaddr + && if let Some(ref nearest) = nearest { + guard.region.start < nearest.lock().region.start + } else { + true + } + { + nearest = Some(v.clone()); + } + } + return nearest; + } + /// 获取当前进程的地址空间中,与给定虚拟地址范围有重叠的VMA的迭代器。 pub fn conflicts(&self, request: VirtRegion) -> impl Iterator> + '_ { let r = self @@ -959,6 +1058,9 @@ impl LockedVMA { let mut page_manager_guard: SpinLockGuard<'_, crate::mm::page::PageManager> = page_manager_lock_irqsave(); for page in guard.region.pages() { + if mapper.translate(page.virt_address()).is_none() { + continue; + } let (paddr, _, flush) = unsafe { mapper.unmap_phys(page.virt_address(), true) } .expect("Failed to unmap, beacuse of some page is not mapped"); @@ -1065,6 +1167,39 @@ impl LockedVMA { after, )); } + + /// 判断VMA是否为外部(非当前进程空间)的VMA + pub fn is_foreign(&self) -> bool { + let guard = self.lock(); + if let Some(space) = guard.user_address_space.clone() { + if let Some(space) = space.upgrade() { + return AddressSpace::is_current(&space); + } else { + return true; + } + } else { + return true; + } + } + + /// 判断VMA是否可访问 + pub fn is_accessible(&self) -> bool { + let guard = self.lock(); + let vm_access_flags: VmFlags = VmFlags::VM_READ | VmFlags::VM_WRITE | VmFlags::VM_EXEC; + guard.vm_flags().intersects(vm_access_flags) + } + + /// 判断VMA是否为匿名映射 + pub fn is_anonymous(&self) -> bool { + //TODO: 实现匿名映射判断逻辑,目前仅支持匿名映射 + true + } + + /// 判断VMA是否为大页映射 + pub fn is_hugepage(&self) -> bool { + //TODO: 实现巨页映射判断逻辑,目前不支持巨页映射 + false + } } impl Drop for LockedVMA { @@ -1182,6 +1317,18 @@ impl VMA { }; } + pub fn clone_info_only(&self) -> Self { + return Self { + region: self.region, + vm_flags: self.vm_flags, + flags: self.flags, + mapped: self.mapped, + user_address_space: None, + self_ref: Weak::default(), + provider: Provider::Allocated, + }; + } + #[inline(always)] pub fn flags(&self) -> PageFlags { return self.flags; @@ -1203,15 +1350,15 @@ impl VMA { assert!(self.mapped); for page in self.region.pages() { // kdebug!("remap page {:?}", page.virt_address()); - // 暂时要求所有的页帧都已经映射到页表 - // TODO: 引入Lazy Mapping, 通过缺页中断来映射页帧,这里就不必要求所有的页帧都已经映射到页表了 - let r = unsafe { - mapper - .remap(page.virt_address(), flags) - .expect("Failed to remap, beacuse of some page is not mapped") - }; + if mapper.translate(page.virt_address()).is_some() { + let r = unsafe { + mapper + .remap(page.virt_address(), flags) + .expect("Failed to remap") + }; + flusher.consume(r); + } // kdebug!("consume page {:?}", page.virt_address()); - flusher.consume(r); // kdebug!("remap page {:?} done", page.virt_address()); } self.flags = flags; @@ -1426,8 +1573,10 @@ impl UserStack { let actual_stack_bottom = stack_bottom - guard_size; let mut prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE; - let map_flags = - MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED_NOREPLACE; + let map_flags = MapFlags::MAP_PRIVATE + | MapFlags::MAP_ANONYMOUS + | MapFlags::MAP_FIXED_NOREPLACE + | MapFlags::MAP_GROWSDOWN; // kdebug!( // "map anonymous stack: {:?} {}", // actual_stack_bottom, @@ -1439,6 +1588,7 @@ impl UserStack { prot_flags, map_flags, false, + false, )?; // test_buddy(); // 设置保护页只读 @@ -1479,7 +1629,7 @@ impl UserStack { mut bytes: usize, ) -> Result<(), SystemError> { let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC; - let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS; + let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_GROWSDOWN; bytes = page_align_up(bytes); self.mapped_size += bytes; @@ -1490,6 +1640,7 @@ impl UserStack { prot_flags, map_flags, false, + false, )?; return Ok(()); @@ -1509,7 +1660,7 @@ impl UserStack { #[allow(dead_code)] pub fn extend( &mut self, - vm: &mut RwLockWriteGuard, + vm: &mut InnerAddressSpace, mut bytes: usize, ) -> Result<(), SystemError> { let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC; @@ -1524,6 +1675,7 @@ impl UserStack { prot_flags, map_flags, false, + false, )?; return Ok(()); diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 02dfb121..3f54948f 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -855,10 +855,15 @@ impl Syscall { } SYS_MADVISE => { - // 这个太吵了,总是打印,先注释掉 - // kwarn!("SYS_MADVISE has not yet been implemented"); - Ok(0) + let addr = args[0]; + let len = page_align_up(args[1]); + if addr & (MMArch::PAGE_SIZE - 1) != 0 { + Err(SystemError::EINVAL) + } else { + Self::madvise(VirtAddr::new(addr), len, args[2]) + } } + SYS_GETTID => Self::gettid().map(|tid| tid.into()), SYS_GETUID => Self::getuid(), diff --git a/user/dadk/config/nova_shell-0.1.0.dadk b/user/dadk/config/nova_shell-0.1.0.dadk index 42afa961..a4a3aacb 100644 --- a/user/dadk/config/nova_shell-0.1.0.dadk +++ b/user/dadk/config/nova_shell-0.1.0.dadk @@ -6,7 +6,7 @@ "BuildFromSource": { "Git": { "url": "https://git.mirrors.dragonos.org.cn/DragonOS-Community/NovaShell.git", - "revision": "c6454d3220" + "revision": "dcf45035c1" } } },