Z Fan 597315b04d
feat(virtualization): 内核虚拟化支持 (#1073)
* 几个结构体

* 通过vmx_init以及create_vm,create_vcpu部分TODO

* kvm_run完成一半

* 能够成功vmlaunch,但是在vmexit时候还有些问题未排查出来

* 解决了vmlaunch导致的cpu_reset的问题

* 整理代码

* 暂时性push到hyc仓库

* 修改内存虚拟化部分参数传入,解决死锁问题

* 初步完成ept映射.但不停EPT_VIOLATION

* 初步完成了EPT映射,但是读写内存还是有点问题

* fixme

* 更新了一些truncate到from_bits_unchecked的实现

* 完成内存虚拟化EPT_VIOLATION的映射

* fmt

* Remove /fixme from .gitignore

* Remove /fixme file

* Update kernel/src/init/init.rs

Co-authored-by: Samuel Dai <samuka007@dragonos.org>

* Update kernel/src/init/init.rs

Co-authored-by: Samuel Dai <samuka007@dragonos.org>

* 修改了注释格式,删除了附带的一些文件操作

* feat(syscall): 实现syscall restart (#1075)

能够在系统调用返回ERESTARTSYS时,信号处理结束后,自动重启系统调用.

TODO: 实现wait等需要restart_block的系统调用的重启

Signed-off-by: longjin <longjin@DragonOS.org>

* chore: update docker image version in script && update doc (#1076)

* chore: update docker image version in script

* chore: replace lots of spaces with newline in doc

* fix: 修复wait4系统调用部分语义与Linux不一致的问题 (#1080)

* fix: 修复wait4系统调用部分语义与Linux不一致的问题

解决wait不住/wait之后卡死的bug

---------

Signed-off-by: longjin <longjin@DragonOS.org>

* feat(fs/syscall): 实现fchdir系统调用 (#1081)

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(mm): 修复fat文件系统的PageCache同步问题 (#1005)


---------

Co-authored-by: longjin <longjin@DragonOS.org>

* fix: 修正nographic启动时,控制台日志未能输出到文件的问题 (#1082)

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(process): 修复copy_process的一些bug & 支持默认init进程传参 (#1083)

- 修复`copy_process`函数对标志位处理不正确的bug
- init进程搜索列表中,支持为默认init程序传入参数

Signed-off-by: longjin <longjin@DragonOS.org>

* feat: 完善sys_reboot (#1084)

* fix(process): 修复copy_process的一些bug & 支持默认init进程传参

- 修复`copy_process`函数对标志位处理不正确的bug
- init进程搜索列表中,支持为默认init程序传入参数

Signed-off-by: longjin <longjin@DragonOS.org>

* feat: 完善sys_reboot

- 校验magic number
- 支持多个cmd (具体内容未实现)

Signed-off-by: longjin <longjin@DragonOS.org>

---------

Signed-off-by: longjin <longjin@DragonOS.org>

* fix: 修复do_wait函数在wait所有子进程时,忘了释放锁就sleep的bug (#1089)

Signed-off-by: longjin <longjin@DragonOS.org>

* pull主线并且fmt

---------

Signed-off-by: longjin <longjin@DragonOS.org>
Co-authored-by: GnoCiYeH <heyicong@dragonos.org>
Co-authored-by: Samuel Dai <samuka007@dragonos.org>
Co-authored-by: LoGin <longjin@DragonOS.org>
Co-authored-by: LIU Yuwei <22045841+Marsman1996@users.noreply.github.com>
Co-authored-by: MemoryShore <1353318529@qq.com>
2025-03-04 10:56:20 +08:00

249 lines
8.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use crate::{
arch::kvm::vmx::ept::EptMapper,
libs::mutex::Mutex,
mm::{page::EntryFlags, syscall::ProtFlags},
virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT},
};
use bitfield_struct::bitfield;
use log::debug;
use system_error::SystemError;
use super::{
ept::check_ept_features,
vcpu::VmxVcpu,
vmcs::VmcsFields,
vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite},
};
use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR;
// pub const PT64_ROOT_LEVEL: u32 = 4;
// pub const PT32_ROOT_LEVEL: u32 = 2;
// pub const PT32E_ROOT_LEVEL: u32 = 3;
// pub struct KvmMmuPage{
// gfn: u64, // 管理地址范围的起始地址对应的 gfn
// role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等
// // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page
// }
#[bitfield(u32)]
pub struct KvmMmuPageRole {
#[bits(4)]
level: usize, // 页所处的层级
cr4_pae: bool, // cr4.pae1 表示使用 64bit gpte
#[bits(2)]
quadrant: usize, // 如果 cr4.pae=0则 gpte 为 32bit但 spte 为 64bit因此需要用多个 spte 来表示一个 gpte该字段指示是 gpte 的第几块
direct: bool,
#[bits(3)]
access: usize, // 访问权限
invalid: bool, // 失效,一旦 unpin 就会被销毁
nxe: bool, // efer.nxe不可执行
cr0_wp: bool, // cr0.wp, 写保护
smep_andnot_wp: bool, // smep && !cr0.wpSMEP启用用户模式代码将无法执行位于内核地址空间中的指令。
smap_andnot_wp: bool, // smap && !cr0.wp
#[bits(8)]
unused: usize,
#[bits(8)]
smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM
}
// We don't want allocation failures within the mmu code, so we preallocate
// enough memory for a single page fault in a cache.
// pub struct KvmMmuMemoryCache {
// num_objs: u32,
// objs: [*mut u8; KVM_NR_MEM_OBJS as usize],
// }
pub type KvmMmuPageFaultHandler =
fn(vcpu: &mut VmxVcpu, gpa: u64, error_code: u32, prefault: bool) -> Result<(), SystemError>;
#[derive(Default)]
pub struct KvmMmu {
pub root_hpa: u64,
pub root_level: u32,
pub base_role: KvmMmuPageRole,
// ...还有一些变量不知道用来做什么
pub get_cr3: Option<fn(&VmxVcpu) -> u64>,
pub set_eptp: Option<fn(u64) -> Result<(), SystemError>>,
pub page_fault: Option<KvmMmuPageFaultHandler>,
// get_pdptr: Option<fn(& VmxVcpu, index:u32) -> u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么
// inject_page_fault: Option<fn(&mut VmxVcpu, fault: &X86Exception)>,
// gva_to_gpa: Option<fn(&mut VmxVcpu, gva: u64, access: u32, exception: &X86Exception) -> u64>,
// translate_gpa: Option<fn(&mut VmxVcpu, gpa: u64, access: u32, exception: &X86Exception) -> u64>,
// sync_page: Option<fn(&mut VmxVcpu, &mut KvmMmuPage)>,
// invlpg: Option<fn(&mut VmxVcpu, gva: u64)>, // invalid entry
// update_pte: Option<fn(&mut VmxVcpu, sp: &KvmMmuPage, spte: u64, pte: u64)>,
}
impl core::fmt::Debug for KvmMmu {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("KvmMmu")
.field("root_hpa", &self.root_hpa)
.field("root_level", &self.root_level)
.field("base_role", &self.base_role)
.finish()
}
}
fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 {
let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp");
return guest_cr3;
}
pub fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> {
// 设置权限位,目前是写死的,可读可写可执行
// EPT paging-structure memory type: Uncacheable
let mut eptp = 0x0_u64;
// This value is 1 less than the EPT page-walk length. 3 means 4-level paging.
eptp |= 0x3 << 3;
eptp |= root_hpa & (PAGE_MASK as u64);
vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?;
Ok(())
}
fn tdp_page_fault(
vcpu: &mut VmxVcpu,
gpa: u64,
error_code: u32,
prefault: bool,
) -> Result<(), SystemError> {
debug!("tdp_page_fault");
let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言)
// 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间
mmu_topup_memory_caches(vcpu)?;
// TODO获取gfn使用的level处理hugepage的问题
let level = 1; // 4KB page
// TODO: 快速处理由读写操作引起violation即present同时有写权限的非mmio page fault
// fast_page_fault(vcpu, gpa, level, error_code)
// gfn->pfn
let mut map_writable = false;
let write = error_code & ((1_u32) << 1);
let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?;
// direct map就是映射ept页表的过程
__direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?;
Ok(())
}
/*
* Caculate mmu pages needed for kvm.
*/
// pub fn kvm_mmu_calculate_mmu_pages() -> u32 {
// let mut nr_mmu_pages:u32;
// let mut nr_pages = 0;
// let kvm = vm(0).unwrap();
// for as_id in 0..KVM_ADDRESS_SPACE_NUM {
// let slots = kvm.memslots[as_id];
// for i in 0..KVM_MEM_SLOTS_NUM {
// let memslot = slots.memslots[i as usize];
// nr_pages += memslot.npages;
// }
// }
// nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000;
// nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES);
// return nr_mmu_pages;
// }
// pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){
// let kvm = KVM();
// // 释放多余的mmu page
// if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
// while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
// if !prepare_zap_oldest_mmu_page() {
// break;
// }
// }
// // kvm_mmu_commit_zap_page();
// goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages;
// }
// kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages;
// }
// pub fn prepare_zap_oldest_mmu_page() -> bool {
// return false;
// }
pub fn kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>) {
// TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu)
init_kvm_tdp_mmu(vcpu);
}
pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
check_ept_features()?;
Ok(())
}
pub fn init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>) {
let context = &mut vcpu.lock().mmu;
context.page_fault = Some(tdp_page_fault);
context.get_cr3 = Some(tdp_get_cr3);
context.set_eptp = Some(tdp_set_eptp);
// context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault
// context.invlpg = nonpaging_invlpg;
// context.sync_page = nonpaging_sync_page;
// context.update_pte = nonpaging_update_pte;
// TODO: gva to gpa in kvm
// if !is_paging(vcpu) { // vcpu不分页
// context.gva_to_gpa = nonpaging_gva_to_gpa;
// context.root_level = 0;
// } else if (is_long_mode(vcpu)) {
// context.gva_to_gpa = paging64_gva_to_gpa;
// context.root_level = PT64_ROOT_LEVEL;
// TODO:: different paging strategy
// } else if (is_pae(vcpu)) {
// context.gva_to_gpa = paging64_gva_to_gpa;
// context.root_level = PT32E_ROOT_LEVEL;
// } else {
// context.gva_to_gpa = paging32_gva_to_gpa;
// context.root_level = PT32_ROOT_LEVEL;
// }
}
#[allow(clippy::too_many_arguments)]
pub fn __direct_map(
vcpu: &mut VmxVcpu,
gpa: u64,
_write: u32,
_map_writable: bool,
_level: i32,
_gfn: u64,
pfn: u64,
_prefault: bool,
) -> Result<u32, SystemError> {
debug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa);
// 判断vcpu.mmu.root_hpa是否有效
if vcpu.mmu.root_hpa == 0 {
return Err(SystemError::KVM_HVA_ERR_BAD);
}
// 把gpa映射到hpa
let mut ept_mapper = EptMapper::lock();
let page_flags = EntryFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7_u64), false);
unsafe {
assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok());
}
return Ok(0);
}
pub fn mmu_gfn_to_pfn_fast(
vcpu: &mut VmxVcpu,
_gpa: u64,
_prefault: bool,
gfn: u64,
write: bool,
writable: &mut bool,
) -> Result<u64, SystemError> {
let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?;
Ok(pfn)
}
// TODO: 添加cache
pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> {
// 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配
// pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建
// mmu_topup_memory_cache(vcpu.mmu_page_header_cache,
// mmu_page_header_cache, 4);
Ok(())
}