diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 4c539f15..d7e577b6 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -17,6 +17,7 @@ members = [ "src/libs/intertrait" ] x86 = "0.52.0" x86_64 = "0.14.10" bitflags = "1.3.2" +bitfield-struct = "0.5.3" virtio-drivers = { git = "https://git.mirrors.dragonos.org/DragonOS-Community/virtio-drivers.git", rev = "f1d1cbb" } # 一个无锁MPSC队列 thingbuf = { version = "0.1.3", default-features = false, features = ["alloc"] } @@ -46,7 +47,10 @@ version = "1.4.0" # 由于在no_std环境,而lazy_static依赖了spin库,因此需要指定其使用no_std features = ["spin_no_std"] - +# The development profile, used for `cargo build` +[profile.dev] +# opt-level = 0 # Controls the --opt-level the compiler builds with +debug = true # Controls whether the compiler passes `-g` # The release profile, used for `cargo build --release` [profile.release] debug = false diff --git a/kernel/src/arch/x86_64/kvm/mod.rs b/kernel/src/arch/x86_64/kvm/mod.rs new file mode 100644 index 00000000..f622a198 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/mod.rs @@ -0,0 +1,117 @@ +use crate::arch::kvm::vmx::vmcs::VmcsFields; +use crate::arch::kvm::vmx::vmx_asm_wrapper::{vmx_vmlaunch, vmx_vmread}; +use crate::libs::mutex::Mutex; +use crate::virt::kvm::vm; +use crate::{ + kdebug, + kerror, + // libs::spinlock::{SpinLock, SpinLockGuard}, + syscall::SystemError, +}; +use alloc::sync::Arc; +use core::arch::asm; +use raw_cpuid::CpuId; +// use crate::virt::kvm::guest_code; +use self::vmx::mmu::{kvm_mmu_setup, kvm_vcpu_mtrr_init}; +use self::vmx::vcpu::VmxVcpu; +pub mod vmx; + +#[derive(Default, Debug, Clone)] +pub struct X86_64KVMArch { + // n_used_mmu_pages: u32, + // n_requested_mmu_pages: u32, + // n_max_mmu_pages: u32, + // mmu_valid_gen: u64, + // // mmu_page_hash:[], + // active_mmu_pages: LinkedList, // 所有分配的mmu page都挂到active_mmu_pages上 + // zapped_obsolete_pages: LinkedList, // 释放的mmu page都挂到zapped_obsolete_pages上,一个全局的invalid_list +} + +impl X86_64KVMArch { + /// @brief 查看CPU是否支持虚拟化 + pub fn kvm_arch_cpu_supports_vm() -> Result<(), SystemError> { + let cpuid = CpuId::new(); + // Check to see if CPU is Intel (“GenuineIntel”). + if let Some(vi) = cpuid.get_vendor_info() { + if vi.as_str() != "GenuineIntel" { + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + } + // Check processor supports for Virtual Machine Extension (VMX) technology + // CPUID.1:ECX.VMX[bit 5] = 1 (Intel Manual: 24.6 Discovering Support for VMX) + if let Some(fi) = cpuid.get_feature_info() { + if !fi.has_vmx() { + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + } + Ok(()) + } + + /// @brief 初始化KVM + pub fn kvm_arch_init() -> Result<(), SystemError> { + Ok(()) + } + + pub fn kvm_arch_dev_ioctl(cmd: u32, _arg: usize) -> Result { + match cmd { + _ => { + kerror!("unknown kvm ioctl cmd: {}", cmd); + return Err(SystemError::EINVAL); + } + } + } + + pub fn kvm_arch_vcpu_create(id: u32) -> Result>, SystemError> { + // let guest_rip = current_kvm.lock().memslots[0].memslots[0].userspace_addr; + let vcpu = VmxVcpu::new(id, vm(0).unwrap()).unwrap(); + return Ok(Arc::new(Mutex::new(vcpu))); + } + + pub fn kvm_arch_vcpu_setup(vcpu: &Mutex) -> Result<(), SystemError> { + kvm_vcpu_mtrr_init(vcpu)?; + kvm_mmu_setup(vcpu); + Ok(()) + } + pub fn kvm_arch_vcpu_ioctl_run(_vcpu: &Mutex) -> Result<(), SystemError> { + match vmx_vmlaunch() { + Ok(_) => {} + Err(e) => { + let vmx_err = vmx_vmread(VmcsFields::VMEXIT_INSTR_ERR as u32).unwrap(); + kdebug!("vmlaunch failed: {:?}", vmx_err); + return Err(e); + } + } + Ok(()) + } + + // pub fn kvm_arch_create_memslot(_slot: &mut KvmMemorySlot, _npages: u64) { + + // } + + // pub fn kvm_arch_commit_memory_region( + // _mem: &KvmUserspaceMemoryRegion, + // _new_slot: &KvmMemorySlot, + // _old_slot: &KvmMemorySlot, + // _change: KvmMemoryChange) { + // // let kvm = KVM(); + // // let mut num_mmu_pages = 0; + // // if kvm.lock().arch.n_requested_mmu_pages == 0{ + // // num_mmu_pages = kvm_mmu_calculate_mmu_pages(); + // // } + // // if num_mmu_pages != 0 { + // // // kvm_mmu_change_mmu_pages(num_mmu_pages); + // // } + // } +} + +#[no_mangle] +pub extern "C" fn guest_code() { + kdebug!("guest_code"); + loop { + unsafe { + asm!("mov rax, 0", "mov rcx, 0", "cpuid"); + } + unsafe { asm!("nop") }; + kdebug!("guest_code"); + } +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/ept.rs b/kernel/src/arch/x86_64/kvm/vmx/ept.rs new file mode 100644 index 00000000..be62ab1a --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/ept.rs @@ -0,0 +1,112 @@ +use crate::arch::mm::PageMapper; +use crate::arch::MMArch; +use crate::mm::page::PageFlags; +use crate::mm::{PageTableKind, PhysAddr, VirtAddr}; +use crate::smp::core::smp_get_processor_id; +use crate::{arch::mm::LockedFrameAllocator, syscall::SystemError}; +use core::sync::atomic::{compiler_fence, AtomicUsize, Ordering}; +use x86::msr; + +/// Check if MTRR is supported +pub fn check_ept_features() -> Result<(), SystemError> { + const MTRR_ENABLE_BIT: u64 = 1 << 11; + let ia32_mtrr_def_type = unsafe { msr::rdmsr(msr::IA32_MTRR_DEF_TYPE) }; + if (ia32_mtrr_def_type & MTRR_ENABLE_BIT) == 0 { + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + Ok(()) +} + +// pub fn ept_build_mtrr_map() -> Result<(), SystemError> { +// let ia32_mtrr_cap = unsafe { msr::rdmsr(msr::IA32_MTRRCAP) }; +// Ok(()) +// } + +/// 标志当前没有处理器持有内核映射器的锁 +/// 之所以需要这个标志,是因为AtomicUsize::new(0)会把0当作一个处理器的id +const EPT_MAPPER_NO_PROCESSOR: usize = !0; +/// 当前持有内核映射器锁的处理器 +static EPT_MAPPER_LOCK_OWNER: AtomicUsize = AtomicUsize::new(EPT_MAPPER_NO_PROCESSOR); +/// 内核映射器的锁计数器 +static EPT_MAPPER_LOCK_COUNT: AtomicUsize = AtomicUsize::new(0); + +pub struct EptMapper { + /// EPT页表映射器 + mapper: PageMapper, + /// 标记当前映射器是否为只读 + readonly: bool, + // EPT页表根地址 + // root_hpa: PhysAddr, +} + +impl EptMapper { + fn lock_cpu(cpuid: usize, mapper: PageMapper) -> Self { + loop { + match EPT_MAPPER_LOCK_OWNER.compare_exchange_weak( + EPT_MAPPER_NO_PROCESSOR, + cpuid, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => break, + // 当前处理器已经持有了锁 + Err(id) if id == cpuid => break, + // either CAS failed, or some other hardware thread holds the lock + Err(_) => core::hint::spin_loop(), + } + } + + let prev_count = EPT_MAPPER_LOCK_COUNT.fetch_add(1, Ordering::Relaxed); + compiler_fence(Ordering::Acquire); + + // 本地核心已经持有过锁,因此标记当前加锁获得的映射器为只读 + let readonly = prev_count > 0; + + return Self { mapper, readonly }; + } + + /// @brief 锁定内核映射器, 并返回一个内核映射器对象 + #[inline(always)] + pub fn lock() -> Self { + let cpuid = smp_get_processor_id() as usize; + let mapper = unsafe { PageMapper::current(PageTableKind::EPT, LockedFrameAllocator) }; + return Self::lock_cpu(cpuid, mapper); + } + + /// 映射guest physical addr(gpa)到指定的host physical addr(hpa)。 + /// + /// ## 参数 + /// + /// - `gpa`: 要映射的guest physical addr + /// - `hpa`: 要映射的host physical addr + /// - `flags`: 页面标志 + /// + /// ## 返回 + /// + /// - 成功:返回Ok(()) + /// - 失败: 如果当前映射器为只读,则返回EAGAIN_OR_EWOULDBLOCK + pub unsafe fn walk( + &mut self, + gpa: u64, + hpa: u64, + flags: PageFlags, + ) -> Result<(), SystemError> { + if self.readonly { + return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); + } + self.mapper + .map_phys( + VirtAddr::new(gpa as usize), + PhysAddr::new(hpa as usize), + flags, + ) + .unwrap() + .flush(); + return Ok(()); + } + + // fn get_ept_index(addr: u64, level: usize) -> u64 { + // let pt64_level_shift = PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS; + // (addr >> pt64_level_shift) & ((1 << PT64_LEVEL_BITS) - 1) + // } +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/kvm_emulation.rs b/kernel/src/arch/x86_64/kvm/vmx/kvm_emulation.rs new file mode 100644 index 00000000..6be1b456 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/kvm_emulation.rs @@ -0,0 +1,7 @@ +// pub struct X86Exception { +// vector: u8, +// error_code_valid: bool, +// error_code: u16, +// // bool nested_page_fault; +// address: u64, /* cr2 or nested page fault gpa */ +// } diff --git a/kernel/src/arch/x86_64/kvm/vmx/mmu.rs b/kernel/src/arch/x86_64/kvm/vmx/mmu.rs new file mode 100644 index 00000000..764841a9 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/mmu.rs @@ -0,0 +1,254 @@ +use crate::{ + arch::kvm::vmx::ept::EptMapper, + kdebug, + libs::mutex::Mutex, + mm::{page::PageFlags, syscall::ProtFlags}, + syscall::SystemError, + virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT}, +}; +use bitfield_struct::bitfield; + +use super::{ + ept::check_ept_features, + vcpu::VmxVcpu, + vmcs::VmcsFields, + vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite}, +}; +use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR; + +// pub const PT64_ROOT_LEVEL: u32 = 4; +// pub const PT32_ROOT_LEVEL: u32 = 2; +// pub const PT32E_ROOT_LEVEL: u32 = 3; + +// pub struct KvmMmuPage{ +// gfn: u64, // 管理地址范围的起始地址对应的 gfn +// role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等 +// // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page +// } + +#[bitfield(u32)] +pub struct KvmMmuPageRole { + #[bits(4)] + level: usize, // 页所处的层级 + cr4_pae: bool, // cr4.pae,1 表示使用 64bit gpte + #[bits(2)] + quadrant: usize, // 如果 cr4.pae=0,则 gpte 为 32bit,但 spte 为 64bit,因此需要用多个 spte 来表示一个 gpte,该字段指示是 gpte 的第几块 + direct: bool, + #[bits(3)] + access: usize, // 访问权限 + invalid: bool, // 失效,一旦 unpin 就会被销毁 + nxe: bool, // efer.nxe,不可执行 + cr0_wp: bool, // cr0.wp, 写保护 + smep_andnot_wp: bool, // smep && !cr0.wp,SMEP启用,用户模式代码将无法执行位于内核地址空间中的指令。 + smap_andnot_wp: bool, // smap && !cr0.wp + #[bits(8)] + unused: usize, + #[bits(8)] + smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM +} + +// We don't want allocation failures within the mmu code, so we preallocate +// enough memory for a single page fault in a cache. +// pub struct KvmMmuMemoryCache { +// num_objs: u32, +// objs: [*mut u8; KVM_NR_MEM_OBJS as usize], +// } + +#[derive(Default)] +pub struct KvmMmu { + pub root_hpa: u64, + pub root_level: u32, + pub base_role: KvmMmuPageRole, + // ...还有一些变量不知道用来做什么 + pub get_cr3: Option u64>, + pub set_eptp: Option Result<(), SystemError>>, + pub page_fault: Option< + fn( + vcpu: &mut VmxVcpu, + gpa: u64, + error_code: u32, + prefault: bool, + ) -> Result<(), SystemError>, + >, + // get_pdptr: Option u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么 + // inject_page_fault: Option, + // gva_to_gpa: Option u64>, + // translate_gpa: Option u64>, + // sync_page: Option, + // invlpg: Option, // invalid entry + // update_pte: Option, +} + +impl core::fmt::Debug for KvmMmu { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("KvmMmu") + .field("root_hpa", &self.root_hpa) + .field("root_level", &self.root_level) + .field("base_role", &self.base_role) + .finish() + } +} + +fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 { + let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp"); + return guest_cr3; +} + +fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> { + // 设置权限位,目前是写死的,可读可写可执行 + // EPT paging-structure memory type: Uncacheable + let mut eptp = 0x0 as u64; + // This value is 1 less than the EPT page-walk length. 3 means 4-level paging. + eptp |= 0x3 << 3; + eptp |= root_hpa & (PAGE_MASK as u64); + vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?; + Ok(()) +} + +fn tdp_page_fault( + vcpu: &mut VmxVcpu, + gpa: u64, + error_code: u32, + prefault: bool, +) -> Result<(), SystemError> { + kdebug!("tdp_page_fault"); + let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言) + // 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间 + mmu_topup_memory_caches(vcpu)?; + // TODO:获取gfn使用的level,处理hugepage的问题 + let level = 1; // 4KB page + // TODO: 快速处理由读写操作引起violation,即present同时有写权限的非mmio page fault + // fast_page_fault(vcpu, gpa, level, error_code) + // gfn->pfn + let mut map_writable = false; + let write = error_code & ((1 as u32) << 1); + let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?; + // direct map就是映射ept页表的过程 + __direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?; + Ok(()) +} + +/* + * Caculate mmu pages needed for kvm. + */ +// pub fn kvm_mmu_calculate_mmu_pages() -> u32 { +// let mut nr_mmu_pages:u32; +// let mut nr_pages = 0; + +// let kvm = vm(0).unwrap(); +// for as_id in 0..KVM_ADDRESS_SPACE_NUM { +// let slots = kvm.memslots[as_id]; +// for i in 0..KVM_MEM_SLOTS_NUM { +// let memslot = slots.memslots[i as usize]; +// nr_pages += memslot.npages; +// } +// } + +// nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000; +// nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES); +// return nr_mmu_pages; +// } + +// pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){ +// let kvm = KVM(); +// // 释放多余的mmu page +// if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages { +// while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages { +// if !prepare_zap_oldest_mmu_page() { +// break; +// } +// } +// // kvm_mmu_commit_zap_page(); +// goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages; + +// } +// kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages; +// } + +// pub fn prepare_zap_oldest_mmu_page() -> bool { +// return false; +// } + +pub fn kvm_mmu_setup(vcpu: &Mutex) { + // TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu) + init_kvm_tdp_mmu(vcpu); +} + +pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex) -> Result<(), SystemError> { + check_ept_features()?; + Ok(()) +} + +pub fn init_kvm_tdp_mmu(vcpu: &Mutex) { + let context = &mut vcpu.lock().mmu; + context.page_fault = Some(tdp_page_fault); + context.get_cr3 = Some(tdp_get_cr3); + context.set_eptp = Some(tdp_set_eptp); + // context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault + // context.invlpg = nonpaging_invlpg; + // context.sync_page = nonpaging_sync_page; + // context.update_pte = nonpaging_update_pte; + + // TODO: gva to gpa in kvm + // if !is_paging(vcpu) { // vcpu不分页 + // context.gva_to_gpa = nonpaging_gva_to_gpa; + // context.root_level = 0; + // } else if (is_long_mode(vcpu)) { + // context.gva_to_gpa = paging64_gva_to_gpa; + // context.root_level = PT64_ROOT_LEVEL; + // TODO:: different paging strategy + // } else if (is_pae(vcpu)) { + // context.gva_to_gpa = paging64_gva_to_gpa; + // context.root_level = PT32E_ROOT_LEVEL; + // } else { + // context.gva_to_gpa = paging32_gva_to_gpa; + // context.root_level = PT32_ROOT_LEVEL; + // } +} + +pub fn __direct_map( + vcpu: &mut VmxVcpu, + gpa: u64, + _write: u32, + _map_writable: bool, + _level: i32, + _gfn: u64, + pfn: u64, + _prefault: bool, +) -> Result { + kdebug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa); + // 判断vcpu.mmu.root_hpa是否有效 + if vcpu.mmu.root_hpa == 0 { + return Err(SystemError::KVM_HVA_ERR_BAD); + } + // 把gpa映射到hpa + let mut ept_mapper = EptMapper::lock(); + let page_flags = PageFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7 as u64), false); + unsafe { + assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok()); + } + drop(ept_mapper); + return Ok(0); +} + +pub fn mmu_gfn_to_pfn_fast( + vcpu: &mut VmxVcpu, + _gpa: u64, + _prefault: bool, + gfn: u64, + write: bool, + writable: &mut bool, +) -> Result { + let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?; + Ok(pfn) +} + +// TODO: 添加cache +pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> { + // 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配 + // pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建 + // mmu_topup_memory_cache(vcpu.mmu_page_header_cache, + // mmu_page_header_cache, 4); + Ok(()) +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/mod.rs b/kernel/src/arch/x86_64/kvm/vmx/mod.rs new file mode 100644 index 00000000..79353c24 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/mod.rs @@ -0,0 +1,45 @@ +pub mod ept; +pub mod kvm_emulation; +pub mod mmu; +pub mod seg; +pub mod vcpu; +pub mod vmcs; +pub mod vmexit; +pub mod vmx_asm_wrapper; + +#[allow(dead_code)] +pub enum VcpuRegIndex { + Rax = 0, + Rbx = 1, + Rcx = 2, + Rdx = 3, + Rsi = 4, + Rdi = 5, + Rsp = 6, + Rbp = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, +} + +bitflags! { + #[allow(non_camel_case_types)] + pub struct X86_CR0: u32{ + const CR0_PE = 1 << 0; /* Protection Enable */ + const CR0_MP = 1 << 1; /* Monitor Coprocessor */ + const CR0_EM = 1 << 2; /* Emulation */ + const CR0_TS = 1 << 3; /* Task Switched */ + const CR0_ET = 1 << 4; /* Extension Type */ + const CR0_NE = 1 << 5; /* Numeric Error */ + const CR0_WP = 1 << 16; /* Write Protect */ + const CR0_AM = 1 << 18; /* Alignment Mask */ + const CR0_NW = 1 << 29; /* Not Write-through */ + const CR0_CD = 1 << 30; /* Cache Disable */ + const CR0_PG = 1 << 31; /* Paging */ + } +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/seg.rs b/kernel/src/arch/x86_64/kvm/vmx/seg.rs new file mode 100644 index 00000000..4c5ad05d --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/seg.rs @@ -0,0 +1,89 @@ +use crate::arch::kvm::VmcsFields::{ + GUEST_CS_ACCESS_RIGHTS, GUEST_CS_BASE, GUEST_CS_LIMIT, GUEST_CS_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_DS_ACCESS_RIGHTS, GUEST_DS_BASE, GUEST_DS_LIMIT, GUEST_DS_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_ES_ACCESS_RIGHTS, GUEST_ES_BASE, GUEST_ES_LIMIT, GUEST_ES_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_FS_ACCESS_RIGHTS, GUEST_FS_BASE, GUEST_FS_LIMIT, GUEST_FS_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_GS_ACCESS_RIGHTS, GUEST_GS_BASE, GUEST_GS_LIMIT, GUEST_GS_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_LDTR_ACCESS_RIGHTS, GUEST_LDTR_BASE, GUEST_LDTR_LIMIT, GUEST_LDTR_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_SS_ACCESS_RIGHTS, GUEST_SS_BASE, GUEST_SS_LIMIT, GUEST_SS_SELECTOR, +}; +use crate::arch::kvm::VmcsFields::{ + GUEST_TR_ACCESS_RIGHTS, GUEST_TR_BASE, GUEST_TR_LIMIT, GUEST_TR_SELECTOR, +}; +use crate::syscall::SystemError; + +use super::vmx_asm_wrapper::vmx_vmwrite; + +// pub const TSS_IOPB_BASE_OFFSET: usize = 0x66; +// pub const TSS_BASE_SIZE: usize = 0x68; +// pub const TSS_IOPB_SIZE: usize = 65536 / 8; +// pub const TSS_REDIRECTION_SIZE: usize = 256 / 8; +// pub const RMODE_TSS_SIZE: usize = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1; + +#[derive(Debug)] +pub struct KvmVmxSegmentField { + selector: u32, + base: u32, + limit: u32, + access_rights: u32, +} + +macro_rules! VMX_SEGMENT_FIELD { + ($struct_name: ident) => { + KvmVmxSegmentField { + selector: concat_idents!(GUEST_, $struct_name, _SELECTOR) as u32, + base: concat_idents!(GUEST_, $struct_name, _BASE) as u32, + limit: concat_idents!(GUEST_, $struct_name, _LIMIT) as u32, + access_rights: concat_idents!(GUEST_, $struct_name, _ACCESS_RIGHTS) as u32, + } + }; +} +#[derive(FromPrimitive)] +pub enum Sreg { + ES = 0, + CS = 1, + SS = 2, + DS = 3, + FS = 4, + GS = 5, + TR = 6, + LDTR = 7, +} + +static KVM_VMX_SEGMENT_FIELDS: [KvmVmxSegmentField; 8] = [ + VMX_SEGMENT_FIELD!(ES), + VMX_SEGMENT_FIELD!(CS), + VMX_SEGMENT_FIELD!(SS), + VMX_SEGMENT_FIELD!(DS), + VMX_SEGMENT_FIELD!(FS), + VMX_SEGMENT_FIELD!(GS), + VMX_SEGMENT_FIELD!(TR), + VMX_SEGMENT_FIELD!(LDTR), +]; + +pub fn seg_setup(seg: usize) -> Result<(), SystemError> { + let seg_field = &KVM_VMX_SEGMENT_FIELDS[seg]; + let mut access_rigt = 0x0093; + if seg == Sreg::CS as usize { + access_rigt |= 0x08; + } + // setup segment fields + vmx_vmwrite(seg_field.selector, 0)?; + vmx_vmwrite(seg_field.base, 0)?; + vmx_vmwrite(seg_field.limit, 0x0000_FFFF)?; + vmx_vmwrite(seg_field.access_rights, access_rigt)?; + + Ok(()) +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/vcpu.rs b/kernel/src/arch/x86_64/kvm/vmx/vcpu.rs new file mode 100644 index 00000000..2136f8a7 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/vcpu.rs @@ -0,0 +1,653 @@ +use super::vmcs::{ + VMCSRegion, VmcsFields, VmxEntryCtrl, VmxPrimaryExitCtrl, VmxPrimaryProcessBasedExecuteCtrl, + VmxSecondaryProcessBasedExecuteCtrl, +}; +use super::vmx_asm_wrapper::{vmx_vmclear, vmx_vmptrld, vmx_vmread, vmx_vmwrite, vmxoff, vmxon}; +use crate::arch::kvm::vmx::mmu::KvmMmu; +use crate::arch::kvm::vmx::seg::{seg_setup, Sreg}; +use crate::arch::kvm::vmx::{VcpuRegIndex, X86_CR0}; +use crate::arch::mm::{LockedFrameAllocator, PageMapper}; +use crate::arch::x86_64::mm::X86_64MMArch; +use crate::arch::MMArch; +use crate::kdebug; +use crate::mm::{phys_2_virt, VirtAddr}; +use crate::mm::{MemoryManagementArch, PageTableKind}; +use crate::syscall::SystemError; +use crate::virt::kvm::vcpu::Vcpu; +use crate::virt::kvm::vm::Vm; +use alloc::alloc::Global; +use alloc::boxed::Box; +use core::slice; +use raw_cpuid::CpuId; +use x86; +use x86::{controlregs, msr, segmentation}; +// use crate::arch::kvm::vmx::seg::RMODE_TSS_SIZE; +// use crate::virt::kvm::{KVM}; + +// KERNEL_ALLOCATOR +pub const PAGE_SIZE: usize = 0x1000; +pub const NR_VCPU_REGS: usize = 16; + +#[repr(C, align(4096))] +#[derive(Debug)] +pub struct VmxonRegion { + pub revision_id: u32, + pub data: [u8; PAGE_SIZE - 4], +} + +#[repr(C, align(4096))] +#[derive(Debug)] +pub struct MSRBitmap { + pub data: [u8; PAGE_SIZE], +} + +#[derive(Debug)] +pub struct VcpuData { + /// The virtual and physical address of the Vmxon naturally aligned 4-KByte region of memory + pub vmxon_region: Box, + pub vmxon_region_physical_address: u64, // vmxon需要该地址 + /// The virtual and physical address of the Vmcs naturally aligned 4-KByte region of memory + /// holds the complete CPU state of both the host and the guest. + /// includes the segment registers, GDT, IDT, TR, various MSR’s + /// and control field structures for handling exit and entry operations + pub vmcs_region: Box, + pub vmcs_region_physical_address: u64, // vmptrld, vmclear需要该地址 + pub msr_bitmap: Box, + pub msr_bitmap_physical_address: u64, +} + +#[derive(Default, Debug)] +#[repr(C)] +pub struct VcpuContextFrame { + pub regs: [usize; NR_VCPU_REGS], // 通用寄存器 + pub rip: usize, + pub rflags: usize, +} + +#[derive(Debug)] +#[allow(dead_code)] +pub enum VcpuState { + VcpuInv = 0, + VcpuPend = 1, + VcpuAct = 2, +} + +#[derive(Debug)] +pub struct VmxVcpu { + pub vcpu_id: u32, + pub vcpu_ctx: VcpuContextFrame, // 保存vcpu切换时的上下文,如通用寄存器等 + pub vcpu_state: VcpuState, // vcpu当前运行状态 + pub mmu: KvmMmu, // vcpu的内存管理单元 + pub data: VcpuData, // vcpu的数据 + pub parent_vm: Vm, // parent KVM +} + +impl VcpuData { + pub fn alloc() -> Result { + let vmxon_region: Box = unsafe { + Box::try_new_zeroed_in(Global) + .expect("Try new zeroed fail!") + .assume_init() + }; + let vmcs_region: Box = unsafe { + Box::try_new_zeroed_in(Global) + .expect("Try new zeroed fail!") + .assume_init() + }; + let msr_bitmap: Box = unsafe { + Box::try_new_zeroed_in(Global) + .expect("Try new zeroed fail!") + .assume_init() + }; + // FIXME: virt_2_phys的转换正确性存疑 + let vmxon_region_physical_address = { + let vaddr = VirtAddr::new(vmxon_region.as_ref() as *const _ as _); + unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 } + }; + let vmcs_region_physical_address = { + let vaddr = VirtAddr::new(vmcs_region.as_ref() as *const _ as _); + unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 } + }; + let msr_bitmap_physical_address = { + let vaddr = VirtAddr::new(msr_bitmap.as_ref() as *const _ as _); + unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 } + }; + + let mut instance = Self { + // Allocate a naturally aligned 4-KByte VMXON region of memory to enable VMX operation (Intel Manual: 25.11.5 VMXON Region) + vmxon_region, + vmxon_region_physical_address, + // Allocate a naturally aligned 4-KByte VMCS region of memory + vmcs_region, + vmcs_region_physical_address, + msr_bitmap, + msr_bitmap_physical_address, + }; + // printk_color!(GREEN, BLACK, "[+] init_region\n"); + instance.init_region()?; + Ok(instance) + } + + pub fn init_region(&mut self) -> Result<(), SystemError> { + // Get the Virtual Machine Control Structure revision identifier (VMCS revision ID) + // (Intel Manual: 25.11.5 VMXON Region) + let revision_id = unsafe { (msr::rdmsr(msr::IA32_VMX_BASIC) as u32) & 0x7FFF_FFFF }; + kdebug!("[+] VMXON Region Virtual Address: {:p}", self.vmxon_region); + kdebug!( + "[+] VMXON Region Physical Addresss: 0x{:x}", + self.vmxon_region_physical_address + ); + kdebug!("[+] VMCS Region Virtual Address: {:p}", self.vmcs_region); + kdebug!( + "[+] VMCS Region Physical Address1: 0x{:x}", + self.vmcs_region_physical_address + ); + self.vmxon_region.revision_id = revision_id; + self.vmcs_region.revision_id = revision_id; + return Ok(()); + } +} + +impl VmxVcpu { + pub fn new(vcpu_id: u32, parent_vm: Vm) -> Result { + kdebug!("Creating processor {}", vcpu_id); + let instance = Self { + vcpu_id, + vcpu_ctx: VcpuContextFrame { + regs: [0; NR_VCPU_REGS], + rip: 0, + rflags: 0, + }, + vcpu_state: VcpuState::VcpuInv, + mmu: KvmMmu::default(), + data: VcpuData::alloc()?, + parent_vm, + }; + Ok(instance) + } + + pub fn vmx_set_cr0(cr0: X86_CR0) -> Result<(), SystemError> { + let mut hw_cr0 = cr0 & !(X86_CR0::CR0_NW | X86_CR0::CR0_CD); + hw_cr0 |= X86_CR0::CR0_WP | X86_CR0::CR0_NE; + + vmx_vmwrite(VmcsFields::GUEST_CR0 as u32, cr0.bits() as u64)?; + Ok(()) + } + + pub fn vmcs_init_guest(&self) -> Result<(), SystemError> { + // https://www.sandpile.org/x86/initial.htm + // segment field initialization + seg_setup(Sreg::CS as usize)?; + vmx_vmwrite(VmcsFields::GUEST_CS_SELECTOR as u32, 0xf000)?; + vmx_vmwrite(VmcsFields::GUEST_CS_BASE as u32, 0xffff0000)?; + + seg_setup(Sreg::DS as usize)?; + seg_setup(Sreg::ES as usize)?; + seg_setup(Sreg::FS as usize)?; + seg_setup(Sreg::GS as usize)?; + seg_setup(Sreg::SS as usize)?; + + vmx_vmwrite(VmcsFields::GUEST_TR_SELECTOR as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_TR_BASE as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_TR_LIMIT as u32, 0xffff)?; + vmx_vmwrite(VmcsFields::GUEST_TR_ACCESS_RIGHTS as u32, 0x008b)?; + + vmx_vmwrite(VmcsFields::GUEST_LDTR_SELECTOR as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_LDTR_BASE as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_LDTR_LIMIT as u32, 0xffff)?; + vmx_vmwrite(VmcsFields::GUEST_LDTR_ACCESS_RIGHTS as u32, 0x00082)?; + + vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, 2)?; + + vmx_vmwrite(VmcsFields::GUEST_GDTR_BASE as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_GDTR_LIMIT as u32, 0x0000_FFFF as u64)?; + + vmx_vmwrite(VmcsFields::GUEST_IDTR_BASE as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_IDTR_LIMIT as u32, 0x0000_FFFF as u64)?; + + vmx_vmwrite(VmcsFields::GUEST_ACTIVITY_STATE as u32, 0)?; // State = Active + vmx_vmwrite(VmcsFields::GUEST_INTERRUPTIBILITY_STATE as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_PENDING_DBG_EXCEPTIONS as u32, 0)?; + + vmx_vmwrite(VmcsFields::CTRL_VM_ENTRY_INTR_INFO_FIELD as u32, 0)?; + + let cr0 = X86_CR0::CR0_NW | X86_CR0::CR0_CD | X86_CR0::CR0_ET; + Self::vmx_set_cr0(cr0)?; + + vmx_vmwrite(VmcsFields::GUEST_CR0 as u32, cr0.bits() as u64)?; + + vmx_vmwrite( + VmcsFields::GUEST_SYSENTER_CS as u32, + vmx_vmread(VmcsFields::HOST_SYSENTER_CS as u32).unwrap(), + )?; + vmx_vmwrite(VmcsFields::GUEST_VMX_PREEMPT_TIMER_VALUE as u32, 0)?; + + vmx_vmwrite(VmcsFields::GUEST_INTR_STATUS as u32, 0)?; + vmx_vmwrite(VmcsFields::GUEST_PML_INDEX as u32, 0)?; + + vmx_vmwrite(VmcsFields::GUEST_VMCS_LINK_PTR as u32, u64::MAX)?; + vmx_vmwrite(VmcsFields::GUEST_DEBUGCTL as u32, unsafe { + msr::rdmsr(msr::IA32_DEBUGCTL) + })?; + + vmx_vmwrite( + VmcsFields::GUEST_SYSENTER_ESP as u32, + vmx_vmread(VmcsFields::HOST_SYSENTER_ESP as u32).unwrap(), + )?; + vmx_vmwrite( + VmcsFields::GUEST_SYSENTER_EIP as u32, + vmx_vmread(VmcsFields::HOST_SYSENTER_EIP as u32).unwrap(), + )?; + + // Self::vmx_set_cr0(); + vmx_vmwrite(VmcsFields::GUEST_CR3 as u32, 0)?; + vmx_vmwrite( + VmcsFields::GUEST_CR4 as u32, + 1, // enable vme + )?; + vmx_vmwrite(VmcsFields::GUEST_DR7 as u32, 0x0000_0000_0000_0400)?; + vmx_vmwrite( + VmcsFields::GUEST_RSP as u32, + self.vcpu_ctx.regs[VcpuRegIndex::Rsp as usize] as u64, + )?; + vmx_vmwrite(VmcsFields::GUEST_RIP as u32, self.vcpu_ctx.rip as u64)?; + kdebug!("vmcs init guest rip: {:#x}", self.vcpu_ctx.rip as u64); + kdebug!( + "vmcs init guest rsp: {:#x}", + self.vcpu_ctx.regs[VcpuRegIndex::Rsp as usize] as u64 + ); + + // vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, x86::bits64::rflags::read().bits())?; + Ok(()) + } + + #[allow(deprecated)] + pub fn vmcs_init_host(&self) -> Result<(), SystemError> { + vmx_vmwrite(VmcsFields::HOST_CR0 as u32, unsafe { + controlregs::cr0().bits().try_into().unwrap() + })?; + vmx_vmwrite(VmcsFields::HOST_CR3 as u32, unsafe { controlregs::cr3() })?; + vmx_vmwrite(VmcsFields::HOST_CR4 as u32, unsafe { + controlregs::cr4().bits().try_into().unwrap() + })?; + vmx_vmwrite( + VmcsFields::HOST_ES_SELECTOR as u32, + (segmentation::es().bits() & (!0x07)).into(), + )?; + vmx_vmwrite( + VmcsFields::HOST_CS_SELECTOR as u32, + (segmentation::cs().bits() & (!0x07)).into(), + )?; + vmx_vmwrite( + VmcsFields::HOST_SS_SELECTOR as u32, + (segmentation::ss().bits() & (!0x07)).into(), + )?; + vmx_vmwrite( + VmcsFields::HOST_DS_SELECTOR as u32, + (segmentation::ds().bits() & (!0x07)).into(), + )?; + vmx_vmwrite( + VmcsFields::HOST_FS_SELECTOR as u32, + (segmentation::fs().bits() & (!0x07)).into(), + )?; + vmx_vmwrite( + VmcsFields::HOST_GS_SELECTOR as u32, + (segmentation::gs().bits() & (!0x07)).into(), + )?; + vmx_vmwrite(VmcsFields::HOST_TR_SELECTOR as u32, unsafe { + (x86::task::tr().bits() & (!0x07)).into() + })?; + vmx_vmwrite(VmcsFields::HOST_FS_BASE as u32, unsafe { + msr::rdmsr(msr::IA32_FS_BASE) + })?; + vmx_vmwrite(VmcsFields::HOST_GS_BASE as u32, unsafe { + msr::rdmsr(msr::IA32_GS_BASE) + })?; + + let mut pseudo_descriptpr: x86::dtables::DescriptorTablePointer = Default::default(); + unsafe { + x86::dtables::sgdt(&mut pseudo_descriptpr); + }; + + vmx_vmwrite( + VmcsFields::HOST_TR_BASE as u32, + get_segment_base(pseudo_descriptpr.base, pseudo_descriptpr.limit, unsafe { + x86::task::tr().bits().into() + }), + )?; + vmx_vmwrite( + VmcsFields::HOST_GDTR_BASE as u32, + pseudo_descriptpr.base.to_bits() as u64, + )?; + vmx_vmwrite(VmcsFields::HOST_IDTR_BASE as u32, unsafe { + let mut pseudo_descriptpr: x86::dtables::DescriptorTablePointer = + Default::default(); + x86::dtables::sidt(&mut pseudo_descriptpr); + pseudo_descriptpr.base.to_bits() as u64 + })?; + + // fast entry into the kernel + vmx_vmwrite(VmcsFields::HOST_SYSENTER_ESP as u32, unsafe { + msr::rdmsr(msr::IA32_SYSENTER_ESP) + })?; + vmx_vmwrite(VmcsFields::HOST_SYSENTER_EIP as u32, unsafe { + msr::rdmsr(msr::IA32_SYSENTER_EIP) + })?; + vmx_vmwrite(VmcsFields::HOST_SYSENTER_CS as u32, unsafe { + msr::rdmsr(msr::IA32_SYSENTER_CS) + })?; + + // vmx_vmwrite(VmcsFields::HOST_RIP as u32, vmx_return as *const () as u64)?; + // kdebug!("vmcs init host rip: {:#x}", vmx_return as *const () as u64); + + Ok(()) + } + + // Intel SDM Volume 3C Chapter 25.3 “Organization of VMCS Data” + pub fn vmcs_init(&self) -> Result<(), SystemError> { + vmx_vmwrite(VmcsFields::CTRL_PAGE_FAULT_ERR_CODE_MASK as u32, 0)?; + vmx_vmwrite(VmcsFields::CTRL_PAGE_FAULT_ERR_CODE_MATCH as u32, 0)?; + vmx_vmwrite(VmcsFields::CTRL_CR3_TARGET_COUNT as u32, 0)?; + + vmx_vmwrite( + VmcsFields::CTRL_PIN_BASED_VM_EXEC_CTRLS as u32, + adjust_vmx_pinbased_controls() as u64, + )?; + + vmx_vmwrite( + VmcsFields::CTRL_MSR_BITMAP_ADDR as u32, + self.data.msr_bitmap_physical_address, + )?; + + vmx_vmwrite(VmcsFields::CTRL_CR0_READ_SHADOW as u32, unsafe { + controlregs::cr0().bits().try_into().unwrap() + })?; + vmx_vmwrite(VmcsFields::CTRL_CR4_READ_SHADOW as u32, unsafe { + controlregs::cr4().bits().try_into().unwrap() + })?; + vmx_vmwrite( + VmcsFields::CTRL_VM_ENTRY_CTRLS as u32, + adjust_vmx_entry_controls() as u64, + )?; + vmx_vmwrite( + VmcsFields::CTRL_PRIMARY_VM_EXIT_CTRLS as u32, + adjust_vmx_exit_controls() as u64, + )?; + vmx_vmwrite( + VmcsFields::CTRL_PRIMARY_PROCESSOR_VM_EXEC_CTRLS as u32, + adjust_vmx_primary_process_exec_controls() as u64, + )?; + vmx_vmwrite( + VmcsFields::CTRL_SECONDARY_PROCESSOR_VM_EXEC_CTRLS as u32, + adjust_vmx_secondary_process_exec_controls() as u64, + )?; + + self.vmcs_init_host()?; + self.vmcs_init_guest()?; + Ok(()) + } + + fn kvm_mmu_load(&mut self) -> Result<(), SystemError> { + kdebug!("kvm_mmu_load!"); + // 申请并创建新的页表 + let mapper: crate::mm::page::PageMapper = unsafe { + PageMapper::create(PageTableKind::EPT, LockedFrameAllocator) + .ok_or(SystemError::ENOMEM)? + }; + + let ept_root_hpa = mapper.table().phys(); + let set_eptp_fn = self.mmu.set_eptp.unwrap(); + set_eptp_fn(ept_root_hpa.data() as u64)?; + self.mmu.root_hpa = ept_root_hpa.data() as u64; + kdebug!("ept_root_hpa:{:x}!", ept_root_hpa.data() as u64); + + return Ok(()); + } + + pub fn set_regs(&mut self, regs: VcpuContextFrame) -> Result<(), SystemError> { + self.vcpu_ctx = regs; + Ok(()) + } +} + +impl Vcpu for VmxVcpu { + /// Virtualize the CPU + fn virtualize_cpu(&mut self) -> Result<(), SystemError> { + match has_intel_vmx_support() { + Ok(_) => { + kdebug!("[+] CPU supports Intel VMX"); + } + Err(e) => { + kdebug!("[-] CPU does not support Intel VMX: {:?}", e); + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + }; + + match enable_vmx_operation() { + Ok(_) => { + kdebug!("[+] Enabling Virtual Machine Extensions (VMX)"); + } + Err(_) => { + kdebug!("[-] VMX operation is not supported on this processor."); + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + } + + vmxon(self.data.vmxon_region_physical_address)?; + kdebug!("[+] VMXON successful!"); + vmx_vmclear(self.data.vmcs_region_physical_address)?; + vmx_vmptrld(self.data.vmcs_region_physical_address)?; + kdebug!("[+] VMPTRLD successful!"); + self.vmcs_init().expect("vncs_init fail"); + kdebug!("[+] VMCS init!"); + // kdebug!("vmcs init host rip: {:#x}", vmx_return as *const () as u64); + // kdebug!("vmcs init host rsp: {:#x}", x86::bits64::registers::rsp()); + // vmx_vmwrite(VmcsFields::HOST_RSP as u32, x86::bits64::registers::rsp())?; + // vmx_vmwrite(VmcsFields::HOST_RIP as u32, vmx_return as *const () as u64)?; + // vmx_vmwrite(VmcsFields::HOST_RSP as u32, x86::bits64::registers::rsp())?; + self.kvm_mmu_load()?; + Ok(()) + } + + fn devirtualize_cpu(&self) -> Result<(), SystemError> { + vmxoff()?; + Ok(()) + } + + /// Gets the index of the current logical/virtual processor + fn id(&self) -> u32 { + self.vcpu_id + } +} + +pub fn get_segment_base(gdt_base: *const u64, gdt_size: u16, segment_selector: u16) -> u64 { + let table = segment_selector & 0x0004; // get table indicator in selector + let index = (segment_selector >> 3) as usize; // get index in selector + if table == 0 && index == 0 { + return 0; + } + let descriptor_table = unsafe { slice::from_raw_parts(gdt_base, gdt_size.into()) }; + let descriptor = descriptor_table[index]; + + let base_high = (descriptor & 0xFF00_0000_0000_0000) >> 32; + let base_mid = (descriptor & 0x0000_00FF_0000_0000) >> 16; + let base_low = (descriptor & 0x0000_0000_FFFF_0000) >> 16; + let segment_base = (base_high | base_mid | base_low) & 0xFFFFFFFF; + let virtaddr = phys_2_virt(segment_base.try_into().unwrap()) + .try_into() + .unwrap(); + kdebug!( + "segment_base={:x}", + phys_2_virt(segment_base.try_into().unwrap()) + ); + return virtaddr; +} + +// FIXME: may have bug +// pub fn read_segment_access_rights(segement_selector: u16) -> u32{ +// let table = segement_selector & 0x0004; // get table indicator in selector +// let index = segement_selector & 0xFFF8; // get index in selector +// let mut flag: u16; +// if table==0 && index==0 { +// return 0; +// } +// unsafe{ +// asm!( +// "lar {0:r}, rcx", +// "mov {1:r}, {0:r}", +// in(reg) segement_selector, +// out(reg) flag, +// ); +// } +// return (flag >> 8) as u32; +// } +pub fn adjust_vmx_controls(ctl_min: u32, ctl_opt: u32, msr: u32, result: &mut u32) { + let vmx_msr_low: u32 = unsafe { (msr::rdmsr(msr) & 0x0000_0000_FFFF_FFFF) as u32 }; + let vmx_msr_high: u32 = unsafe { (msr::rdmsr(msr) << 32) as u32 }; + let mut ctl: u32 = ctl_min | ctl_opt; + ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ + ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ + *result = ctl; +} + +pub fn adjust_vmx_entry_controls() -> u32 { + let mut entry_controls: u32 = 0; + adjust_vmx_controls( + VmxEntryCtrl::LOAD_DBG_CTRLS.bits(), + VmxEntryCtrl::IA32E_MODE_GUEST.bits(), + msr::IA32_VMX_ENTRY_CTLS, //Capability Reporting Register of VM-entry Controls (R/O) + &mut entry_controls, + ); + return entry_controls; + // msr::IA32_VMX_TRUE_ENTRY_CTLS//Capability Reporting Register of VM-entry Flex Controls (R/O) See Table 35-2 +} + +pub fn adjust_vmx_exit_controls() -> u32 { + let mut exit_controls: u32 = 0; + adjust_vmx_controls( + VmxPrimaryExitCtrl::SAVE_DBG_CTRLS.bits(), + VmxPrimaryExitCtrl::HOST_ADDR_SPACE_SIZE.bits(), + msr::IA32_VMX_EXIT_CTLS, + &mut exit_controls, + ); + return exit_controls; +} + +pub fn adjust_vmx_pinbased_controls() -> u32 { + let mut controls: u32 = 0000_0016; + adjust_vmx_controls(0, 0, msr::IA32_VMX_TRUE_PINBASED_CTLS, &mut controls); + // kdebug!("adjust_vmx_pinbased_controls: {:x}", controls); + return controls; +} + +pub fn adjust_vmx_primary_process_exec_controls() -> u32 { + let mut controls: u32 = 0; + adjust_vmx_controls( + 0, + VmxPrimaryProcessBasedExecuteCtrl::USE_MSR_BITMAPS.bits() + | VmxPrimaryProcessBasedExecuteCtrl::ACTIVATE_SECONDARY_CONTROLS.bits(), + msr::IA32_VMX_PROCBASED_CTLS, + &mut controls, + ); + return controls; +} + +pub fn adjust_vmx_secondary_process_exec_controls() -> u32 { + let mut controls: u32 = 0; + adjust_vmx_controls( + 0, + VmxSecondaryProcessBasedExecuteCtrl::ENABLE_RDTSCP.bits() + | VmxSecondaryProcessBasedExecuteCtrl::ENABLE_XSAVES_XRSTORS.bits() + | VmxSecondaryProcessBasedExecuteCtrl::ENABLE_INVPCID.bits() + | VmxSecondaryProcessBasedExecuteCtrl::ENABLE_EPT.bits() + | VmxSecondaryProcessBasedExecuteCtrl::UNRESTRICTED_GUEST.bits(), + msr::IA32_VMX_PROCBASED_CTLS2, + &mut controls, + ); + return controls; +} + +/// Check to see if CPU is Intel (“GenuineIntel”). +/// Check processor supports for Virtual Machine Extension (VMX) technology +// CPUID.1:ECX.VMX[bit 5] = 1 (Intel Manual: 24.6 Discovering Support for VMX) +pub fn has_intel_vmx_support() -> Result<(), SystemError> { + let cpuid = CpuId::new(); + if let Some(vi) = cpuid.get_vendor_info() { + if vi.as_str() != "GenuineIntel" { + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + } + if let Some(fi) = cpuid.get_feature_info() { + if !fi.has_vmx() { + return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); + } + } + Ok(()) +} + +/// Enables Virtual Machine Extensions +// - CR4.VMXE[bit 13] = 1 (Intel Manual: 24.7 Enabling and Entering VMX Operation) +pub fn enable_vmx_operation() -> Result<(), SystemError> { + let mut cr4 = unsafe { controlregs::cr4() }; + cr4.set(controlregs::Cr4::CR4_ENABLE_VMX, true); + unsafe { controlregs::cr4_write(cr4) }; + + set_lock_bit()?; + kdebug!("[+] Lock bit set via IA32_FEATURE_CONTROL"); + set_cr0_bits(); + kdebug!("[+] Mandatory bits in CR0 set/cleared"); + set_cr4_bits(); + kdebug!("[+] Mandatory bits in CR4 set/cleared"); + + Ok(()) +} + +/// Check if we need to set bits in IA32_FEATURE_CONTROL +// (Intel Manual: 24.7 Enabling and Entering VMX Operation) +fn set_lock_bit() -> Result<(), SystemError> { + const VMX_LOCK_BIT: u64 = 1 << 0; + const VMXON_OUTSIDE_SMX: u64 = 1 << 2; + + let ia32_feature_control = unsafe { msr::rdmsr(msr::IA32_FEATURE_CONTROL) }; + + if (ia32_feature_control & VMX_LOCK_BIT) == 0 { + unsafe { + msr::wrmsr( + msr::IA32_FEATURE_CONTROL, + VMXON_OUTSIDE_SMX | VMX_LOCK_BIT | ia32_feature_control, + ) + }; + } else if (ia32_feature_control & VMXON_OUTSIDE_SMX) == 0 { + return Err(SystemError::EPERM); + } + + Ok(()) +} + +/// Set the mandatory bits in CR0 and clear bits that are mandatory zero +/// (Intel Manual: 24.8 Restrictions on VMX Operation) +fn set_cr0_bits() { + let ia32_vmx_cr0_fixed0 = unsafe { msr::rdmsr(msr::IA32_VMX_CR0_FIXED0) }; + let ia32_vmx_cr0_fixed1 = unsafe { msr::rdmsr(msr::IA32_VMX_CR0_FIXED1) }; + + let mut cr0 = unsafe { controlregs::cr0() }; + + cr0 |= controlregs::Cr0::from_bits_truncate(ia32_vmx_cr0_fixed0 as usize); + cr0 &= controlregs::Cr0::from_bits_truncate(ia32_vmx_cr0_fixed1 as usize); + + unsafe { controlregs::cr0_write(cr0) }; +} + +/// Set the mandatory bits in CR4 and clear bits that are mandatory zero +/// (Intel Manual: 24.8 Restrictions on VMX Operation) +fn set_cr4_bits() { + let ia32_vmx_cr4_fixed0 = unsafe { msr::rdmsr(msr::IA32_VMX_CR4_FIXED0) }; + let ia32_vmx_cr4_fixed1 = unsafe { msr::rdmsr(msr::IA32_VMX_CR4_FIXED1) }; + + let mut cr4 = unsafe { controlregs::cr4() }; + + cr4 |= controlregs::Cr4::from_bits_truncate(ia32_vmx_cr4_fixed0 as usize); + cr4 &= controlregs::Cr4::from_bits_truncate(ia32_vmx_cr4_fixed1 as usize); + + unsafe { controlregs::cr4_write(cr4) }; +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/vmcs.rs b/kernel/src/arch/x86_64/kvm/vmx/vmcs.rs new file mode 100644 index 00000000..abbfe540 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/vmcs.rs @@ -0,0 +1,539 @@ +use bitflags::bitflags; +use num_derive::FromPrimitive; + +pub const PAGE_SIZE: usize = 0x1000; + +#[repr(C, align(4096))] +#[derive(Clone, Debug)] +pub struct VMCSRegion { + pub revision_id: u32, + pub abort_indicator: u32, + data: [u8; PAGE_SIZE - 8], +} + +// (Intel Manual: 25.11.2 VMREAD, VMWRITE, and Encodings of VMCS Fields) +#[derive(FromPrimitive)] +enum VmcsAccessType { + FULL = 0, + HIGH = 1, +} + +#[derive(FromPrimitive)] +enum VmcsType { + CONTROL = 0, + VMEXIT = 1, + GUEST = 2, + HOST = 3, +} + +#[derive(FromPrimitive)] +enum VmcsWidth { + BIT16 = 0, + BIT64 = 1, + BIT32 = 2, + NATURAL = 3, +} + +#[derive(FromPrimitive)] +#[allow(non_camel_case_types)] +// (Intel Manual: APPENDIX B FIELD ENCODING IN VMCS) +pub enum VmcsFields { + // [CONTROL] fields + // 16-bit control fields + CTRL_VIRT_PROC_ID = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 0) as isize, + CTRL_POSTED_INTR_N_VECTOR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 1) as isize, + CTRL_EPTP_INDEX = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 2) as isize, + // 64-bit control fields + CTRL_IO_BITMAP_A_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 0) as isize, + CTRL_IO_BITMAP_B_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 1) as isize, + CTRL_MSR_BITMAP_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 2) as isize, // control whether RDMSR or WRMSR cause VM exit + CTRL_VMEXIT_MSR_STORE_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 3) as isize, + CTRL_VMEXIT_MSR_LOAD_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 4) as isize, + CTRL_VMENTRY_MSR_LOAD_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 5) as isize, + CTRL_EXECUTIVE_VMCS_PTR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 6) as isize, + CTRL_PML_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 7) as isize, + CTRL_TSC_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 8) as isize, + CTRL_VIRT_APIC_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 9) as isize, + CTRL_APIC_ACCESS_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 10) as isize, + CTRL_POSTED_INTR_DESC_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 11) as isize, + CTRL_VMFUNC_CTRL = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 12) as isize, + CTRL_EPTP_PTR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 13) as isize, + CTRL_EOI_EXIT_BITMAP_0 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 14) as isize, + CTRL_EOI_EXIT_BITMAP_1 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 15) as isize, + CTRL_EOI_EXIT_BITMAP_2 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 16) as isize, + CTRL_EOI_EXIT_BITMAP_3 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 17) as isize, + CTRL_EPT_LIST_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 18) as isize, + CTRL_VMREAD_BITMAP_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 19) as isize, + CTRL_VMWRITE_BITMAP_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 20) as isize, + CTRL_VIRT_EXECPT_INFO_ADDR = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 21) as isize, + CTRL_XSS_EXITING_BITMAP = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 22) as isize, + CTRL_ENCLS_EXITING_BITMAP = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 23) as isize, + CTRL_TSC_MULTIPLIER = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 25) as isize, + // 32-bit control fields + CTRL_PIN_BASED_VM_EXEC_CTRLS = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 0) as isize, // control async event handling (i.e. interrupts) + CTRL_PRIMARY_PROCESSOR_VM_EXEC_CTRLS = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 1) as isize, // control sync event handling (i.e. instruction exits) + CTRL_EXPECTION_BITMAP = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 2) as isize, // bitmap to control exceptions that cause a VM exit + CTRL_PAGE_FAULT_ERR_CODE_MASK = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 3) as isize, + CTRL_PAGE_FAULT_ERR_CODE_MATCH = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 4) as isize, + CTRL_CR3_TARGET_COUNT = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 5) as isize, + CTRL_PRIMARY_VM_EXIT_CTRLS = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 6) as isize, + CTRL_VM_EXIT_MSR_STORE_COUNT = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 7) as isize, + CTRL_VM_EXIT_MSR_LOAD_COUNT = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 8) as isize, + CTRL_VM_ENTRY_CTRLS = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 9) as isize, + CTRL_VM_ENTRY_MSR_LOAD_COUNT = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 10) as isize, + CTRL_VM_ENTRY_INTR_INFO_FIELD = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 11) as isize, + CTRL_VM_ENTRY_EXCEPTION_ERR_CODE = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 12) as isize, + CTRL_VM_ENTRY_INSTR_LEN = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 13) as isize, + CTRL_TPR_THRESHOLD = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 14) as isize, + CTRL_SECONDARY_PROCESSOR_VM_EXEC_CTRLS = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 15) as isize, + CTRL_PLE_GAP = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 16) as isize, + CTRL_PLE_WINDOW = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 17) as isize, + // natural control fields + CTRL_CR0_GUEST_HOST_MASK = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 0) as isize, // control executions of insts that access cr0 + CTRL_CR4_GUEST_HOST_MASK = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 1) as isize, + CTRL_CR0_READ_SHADOW = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 2) as isize, // control executions of insts that access cr0 + CTRL_CR4_READ_SHADOW = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 3) as isize, + CTRL_CR3_TARGET_VALUE_0 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 4) as isize, + CTRL_CR3_TARGET_VALUE_1 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 5) as isize, + CTRL_CR3_TARGET_VALUE_2 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 6) as isize, + CTRL_CR3_TARGET_VALUE_3 = + encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 7) as isize, + + // [VMEXIT] fields read-only + // No 16-bit vmexit fields + // 64-bit vmexit fields + VMEXIT_GUEST_PHY_ADDR = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT64, 0) as isize, + // 32-bit vmexit fields + VMEXIT_INSTR_ERR = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 0) as isize, + VMEXIT_EXIT_REASON = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 1) as isize, + VMEXIT_INT_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 2) as isize, + VMEXIT_INT_ERR_CODE = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 3) as isize, + VMEXIT_IDT_VECTOR_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 4) as isize, + VMEXIT_IDT_VECTOR_ERR_CODE = + encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 5) as isize, + VMEXIT_INSTR_LEN = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 6) as isize, + VMEXIT_INSTR_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 7) as isize, + // natural vmexit fields + VMEXIT_QUALIFICATION = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 0) as isize, + VMEXIT_IO_RCX = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 1) as isize, + VMEXIT_IO_RSX = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 2) as isize, + VMEXIT_IO_RDI = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 3) as isize, + VMEXIT_IO_RIP = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 4) as isize, + VMEXIT_GUEST_LINEAR_ADDR = + encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 5) as isize, + + // [GUEST] fields + // 16-bit guest fields + GUEST_ES_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 0) as isize, + GUEST_CS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 1) as isize, + GUEST_SS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 2) as isize, + GUEST_DS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 3) as isize, + GUEST_FS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 4) as isize, + GUEST_GS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 5) as isize, + GUEST_LDTR_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 6) as isize, + GUEST_TR_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 7) as isize, + GUEST_INTR_STATUS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 8) as isize, + GUEST_PML_INDEX = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 9) as isize, + // 64-bit guest fields + GUEST_VMCS_LINK_PTR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 0) as isize, + GUEST_DEBUGCTL = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 1) as isize, + GUEST_PAT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 2) as isize, + GUEST_EFER = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 3) as isize, + GUEST_PERF_GLOBAL_CTRL = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 4) as isize, + GUEST_PDPTE0 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 5) as isize, + GUEST_PDPTE1 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 6) as isize, + GUEST_PDPTE2 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 7) as isize, + GUEST_PDPTE3 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 8) as isize, + // 32-bit guest fields + GUEST_ES_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 0) as isize, + GUEST_CS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 1) as isize, + GUEST_SS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 2) as isize, + GUEST_DS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 3) as isize, + GUEST_FS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 4) as isize, + GUEST_GS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 5) as isize, + GUEST_LDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 6) as isize, + GUEST_TR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 7) as isize, + GUEST_GDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 8) as isize, + GUEST_IDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 9) as isize, + GUEST_ES_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 10) as isize, + GUEST_CS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 11) as isize, + GUEST_SS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 12) as isize, + GUEST_DS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 13) as isize, + GUEST_FS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 14) as isize, + GUEST_GS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 15) as isize, + GUEST_LDTR_ACCESS_RIGHTS = + encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 16) as isize, + GUEST_TR_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 17) as isize, + GUEST_INTERRUPTIBILITY_STATE = + encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 18) as isize, + GUEST_ACTIVITY_STATE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 19) as isize, + GUEST_SMBASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 20) as isize, + GUEST_SYSENTER_CS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 21) as isize, + GUEST_VMX_PREEMPT_TIMER_VALUE = 0x482E as isize, + // natural guest fields + GUEST_CR0 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 0) as isize, + GUEST_CR3 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 1) as isize, + GUEST_CR4 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 2) as isize, + GUEST_ES_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 3) as isize, + GUEST_CS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 4) as isize, + GUEST_SS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 5) as isize, + GUEST_DS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 6) as isize, + GUEST_FS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 7) as isize, + GUEST_GS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 8) as isize, + GUEST_LDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 9) as isize, + GUEST_TR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 10) as isize, + GUEST_GDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 11) as isize, + GUEST_IDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 12) as isize, + GUEST_DR7 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 13) as isize, + GUEST_RSP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 14) as isize, + GUEST_RIP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 15) as isize, + GUEST_RFLAGS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 16) as isize, + GUEST_PENDING_DBG_EXCEPTIONS = + encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 17) as isize, + GUEST_SYSENTER_ESP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 18) as isize, + GUEST_SYSENTER_EIP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 19) as isize, + + // [HOST] fields + // host 16 bit fields + HOST_ES_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 0) as isize, + HOST_CS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 1) as isize, + HOST_SS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 2) as isize, + HOST_DS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 3) as isize, + HOST_FS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 4) as isize, + HOST_GS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 5) as isize, + HOST_TR_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 6) as isize, + // host 64 bit fields + HOST_PAT = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 0) as isize, + HOST_EFER = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 1) as isize, + HOST_PERF_GLOBAL_CTRL = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 2) as isize, + // host 32 bit fields + HOST_SYSENTER_CS = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT32, 0) as isize, + // host natural fields + HOST_CR0 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 0) as isize, + HOST_CR3 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 1) as isize, + HOST_CR4 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 2) as isize, + HOST_FS_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 3) as isize, + HOST_GS_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 4) as isize, + HOST_TR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 5) as isize, + HOST_GDTR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 6) as isize, + HOST_IDTR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 7) as isize, + HOST_SYSENTER_ESP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 8) as isize, + HOST_SYSENTER_EIP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 9) as isize, + HOST_RSP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 10) as isize, + HOST_RIP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 11) as isize, +} + +// (Intel Manual: 25.6 VM-EXECUTION CONTROL FIELDS) +bitflags! { + // (Intel Manual: 25.6.1 Pin-Based VM-Execution Controls) + #[allow(non_camel_case_types)] + pub struct VmxPinBasedExecuteCtrl: u32 { + const EXTERNAL_INTERRUPT_EXITING = 1 << 0; // external interrupts cause VM exits + const NMI_EXITING = 1 << 3; // non-maskable interrupts (NMIs) cause VM exits. + const VIRTUAL_NMIS = 1 << 5; // NMIs are never blocked and the “blocking by NMI” bit (bit 3) in the interruptibility-state field indicates “virtual-NMI blocking” + const VMX_PREEMPTION_TIMER = 1 << 6; // the VMX-preemption timer counts down in VMX non-root operation + const PROCESS_POSTED_INTERRUPTS = 1 << 7; // he processor treats interrupts with the posted-interrupt notification vector + } + + // (Intel Manual: 25.6.2 Processor-Based VM-Execution Controls) + #[allow(non_camel_case_types)] + pub struct VmxPrimaryProcessBasedExecuteCtrl: u32{ + const INTERRUPT_WINDOW_EXITING = 1 << 2; // VM exits on interrupt window RFLAGS.IF = 1 + const USE_TSC_OFFSETTING = 1 << 3; // TSC offsetting is enabled + const HLT_EXITING = 1 << 7; + const INVLPG_EXITING = 1 << 9; + const MWAIT_EXITING = 1 << 10; + const RDPMC_EXITING = 1 << 11; + const RDTSC_EXITING = 1 << 12; + const CR3_LOAD_EXITING = 1 << 15; + const CR3_STR_EXITING = 1 << 16; + const CR8_LOAD_EXITING = 1 << 19; + const CR8_STR_EXITING = 1 << 20; + const USE_TPR_SHADOW = 1 << 21; + const NMI_WINDOW_EXITING = 1 << 22; + const MOV_DR_EXITING = 1 << 23; + const UNCOND_IO_EXITING = 1 << 24; + const USE_IO_BITMAPS = 1 << 25; + const MONITOR_TRAP_FLAG = 1 << 27; + const USE_MSR_BITMAPS = 1 << 28; + const MONITOR_EXITING = 1 << 29; + const PAUSE_EXITING = 1 << 30; + const ACTIVATE_SECONDARY_CONTROLS = 1 << 31; + } + + // (Intel Manual: 25.6.2 Processor-Based VM-Execution Controls) + pub struct VmxSecondaryProcessBasedExecuteCtrl: u32{ + const VIRT_APIC_ACCESS = 1 << 0; + const ENABLE_EPT = 1 << 1; + const DESCRIPTOR_TABLE_EXITING = 1 << 2; + const ENABLE_RDTSCP = 1 << 3; + const VIRT_X2APIC_MODE = 1 << 4; + const ENABLE_VPID = 1 << 5; + const WBINVD_EXITING = 1 << 6; + const UNRESTRICTED_GUEST = 1 << 7; + const APCI_REGISTER_VIRT = 1 << 8; + const VIRT_INTR_DELIVERY = 1 << 9; + const PAUSE_LOOP_EXITING = 1 << 10; + const RDRAND_EXITING = 1 << 11; + const ENABLE_INVPCID = 1 << 12; + const ENABLE_VM_FUNCTIONS = 1 << 13; + const VMCS_SHADOWING = 1 << 14; + const ENABLE_ENCLS_EXITING = 1 << 15; + const RDSEED_EXITING = 1 << 16; + const ENABLE_PML = 1 << 17; + const EPT_VIOLATION_VE = 1 << 18; + const CONCEAL_VMX_FROM_PT = 1 << 19; + const ENABLE_XSAVES_XRSTORS = 1 << 20; + const PASID_TRANSLATION = 1 << 21; + const MODE_BASED_EPT_EXEC = 1 << 22; + const SUB_PAGE_WRITE_PERM = 1 << 23; + const PT_USE_GUEST_PYH_ADDR = 1 << 24; + const USE_TSC_SCALING = 1 << 25; + const ENABLE_USER_WAIT_PAUSE = 1 << 26; + const ENABLE_PCONFIG = 1 << 27; + const ENABLE_ENCLV_EXITING = 1 << 28; + const VMM_BUS_LOCK_DETECTION = 1 << 30; + const INST_TIMEOUT = 1 << 31; + } + + // (Intel Manual: 25.7.1 VM-Exit Controls) + #[allow(non_camel_case_types)] + pub struct VmxPrimaryExitCtrl: u32 { + const SAVE_DBG_CTRLS = 1 << 2; + const HOST_ADDR_SPACE_SIZE = 1 << 9; // determines if a virtual processor will be in 64-bit mode after a VM exit + const LOAD_IA32_PERF_GLOBAL_CTRL = 1 << 12; + const ACK_INTERRUPT_ON_EXIT = 1 << 15; + const SAVE_IA32_PAT = 1 << 18; + const LOAD_IA32_PAT = 1 << 19; + const SAVE_IA32_EFER = 1 << 20; + const LOAD_IA32_EFER = 1 << 21; + const SAVE_VMX_PREEMPT_TIMER_VALUE = 1 << 22; + const CLEAR_IA32_BNDCFGS = 1 << 23; + const CONCEAL_VMX_FROM_PT = 1 << 24; + const CLEAR_IA32_RTIT_CTL = 1 << 25; + const CLEAR_IA32_LBR_CTL = 1 << 26; + const CLEAR_UINV = 1 << 27; + const LOAD_CET_STATE = 1 << 28; + const LOAD_PKRS = 1 << 29; + const SAVE_IA32_PERF_GLOBAL_CTL = 1 << 30; + const ACTIVATE_SECONDARY_CONTROLS = 1 << 31; + } + + // (Intel Manual: 25.8.1 VM-Entry Controls) + #[allow(non_camel_case_types)] + pub struct VmxEntryCtrl: u32 { + const LOAD_DBG_CTRLS = 1 << 2; + const IA32E_MODE_GUEST = 1 << 9; + const ENTRY_TO_SMM = 1 << 10; + const DEACTIVATE_DUAL_MONITOR = 1 << 11; + const LOAD_IA32_PERF_GLOBAL_CTRL = 1 << 13; + const LOAD_IA32_PAT = 1 << 14; + const LOAD_IA32_EFER = 1 << 15; + const LOAD_IA32_BNDCFGS = 1 << 16; + const CONCEAL_VMX_FROM_PT = 1 << 17; + const LOAD_IA32_RTIT_CTL = 1 << 18; + const LOAD_UINV = 1 << 19; + const LOAD_CET_STATE = 1 << 20; + const LOAD_PKRS = 1 << 21; + const LOAD_IA32_PERF_GLOBAL_CTL = 1 << 22; + } + +} + +#[derive(FromPrimitive)] +#[allow(non_camel_case_types)] +pub enum VmxExitReason { + EXCEPTION_OR_NMI = 0, + EXTERNAL_INTERRUPT = 1, + TRIPLE_FAULT = 2, + INIT_SIGNAL = 3, + SIPI = 4, + IO_SMI = 5, + OTHER_SMI = 6, + INTERRUPT_WINDOW = 7, + NMI_WINDOW = 8, + TASK_SWITCH = 9, + CPUID = 10, + GETSEC = 11, + HLT = 12, + INVD = 13, + INVLPG = 14, + RDPMC = 15, + RDTSC = 16, + RSM = 17, + VMCALL = 18, + VMCLEAR = 19, + VMLAUNCH = 20, + VMPTRLD = 21, + VMPTRST = 22, + VMREAD = 23, + VMRESUME = 24, + VMWRITE = 25, + VMXOFF = 26, + VMXON = 27, + CR_ACCESS = 28, + DR_ACCESS = 29, + IO_INSTRUCTION = 30, + RDMSR = 31, + WRMSR = 32, + VM_ENTRY_FAILURE_INVALID_GUEST_STATE = 33, + VM_ENTRY_FAILURE_MSR_LOADING = 34, + MWAIT = 36, + MONITOR_TRAP_FLAG = 37, + MONITOR = 39, + PAUSE = 40, + VM_ENTRY_FAILURE_MACHINE_CHECK_EVENT = 41, + TPR_BELOW_THRESHOLD = 43, + APIC_ACCESS = 44, + VIRTUALIZED_EOI = 45, + ACCESS_GDTR_OR_IDTR = 46, + ACCESS_LDTR_OR_TR = 47, + EPT_VIOLATION = 48, + EPT_MISCONFIG = 49, + INVEPT = 50, + RDTSCP = 51, + VMX_PREEMPTION_TIMER_EXPIRED = 52, + INVVPID = 53, + WBINVD = 54, + XSETBV = 55, + APIC_WRITE = 56, + RDRAND = 57, + INVPCID = 58, + VMFUNC = 59, + ENCLS = 60, + RDSEED = 61, + PML_FULL = 62, + XSAVES = 63, + XRSTORS = 64, +} + +impl From for VmxExitReason { + fn from(num: i32) -> Self { + match num { + 0 => VmxExitReason::EXCEPTION_OR_NMI, + 1 => VmxExitReason::EXTERNAL_INTERRUPT, + 2 => VmxExitReason::TRIPLE_FAULT, + 3 => VmxExitReason::INIT_SIGNAL, + 4 => VmxExitReason::SIPI, + 5 => VmxExitReason::IO_SMI, + 6 => VmxExitReason::OTHER_SMI, + 7 => VmxExitReason::INTERRUPT_WINDOW, + 8 => VmxExitReason::NMI_WINDOW, + 9 => VmxExitReason::TASK_SWITCH, + 10 => VmxExitReason::CPUID, + 11 => VmxExitReason::GETSEC, + 12 => VmxExitReason::HLT, + 13 => VmxExitReason::INVD, + 14 => VmxExitReason::INVLPG, + 15 => VmxExitReason::RDPMC, + 16 => VmxExitReason::RDTSC, + 17 => VmxExitReason::RSM, + 18 => VmxExitReason::VMCALL, + 19 => VmxExitReason::VMCLEAR, + 20 => VmxExitReason::VMLAUNCH, + 21 => VmxExitReason::VMPTRLD, + 22 => VmxExitReason::VMPTRST, + 23 => VmxExitReason::VMREAD, + 24 => VmxExitReason::VMRESUME, + 25 => VmxExitReason::VMWRITE, + 26 => VmxExitReason::VMXOFF, + 27 => VmxExitReason::VMXON, + 28 => VmxExitReason::CR_ACCESS, + 29 => VmxExitReason::DR_ACCESS, + 30 => VmxExitReason::IO_INSTRUCTION, + 31 => VmxExitReason::RDMSR, + 32 => VmxExitReason::WRMSR, + 33 => VmxExitReason::VM_ENTRY_FAILURE_INVALID_GUEST_STATE, + 34 => VmxExitReason::VM_ENTRY_FAILURE_MSR_LOADING, + 36 => VmxExitReason::MWAIT, + 37 => VmxExitReason::MONITOR_TRAP_FLAG, + 39 => VmxExitReason::MONITOR, + 40 => VmxExitReason::PAUSE, + 41 => VmxExitReason::VM_ENTRY_FAILURE_MACHINE_CHECK_EVENT, + 43 => VmxExitReason::TPR_BELOW_THRESHOLD, + 44 => VmxExitReason::APIC_ACCESS, + 45 => VmxExitReason::VIRTUALIZED_EOI, + 46 => VmxExitReason::ACCESS_GDTR_OR_IDTR, + 47 => VmxExitReason::ACCESS_LDTR_OR_TR, + 48 => VmxExitReason::EPT_VIOLATION, + 49 => VmxExitReason::EPT_MISCONFIG, + 50 => VmxExitReason::INVEPT, + 51 => VmxExitReason::RDTSCP, + 52 => VmxExitReason::VMX_PREEMPTION_TIMER_EXPIRED, + 53 => VmxExitReason::INVVPID, + 54 => VmxExitReason::WBINVD, + 55 => VmxExitReason::XSETBV, + 56 => VmxExitReason::APIC_WRITE, + 57 => VmxExitReason::RDRAND, + 58 => VmxExitReason::INVPCID, + 59 => VmxExitReason::VMFUNC, + 60 => VmxExitReason::ENCLS, + 61 => VmxExitReason::RDSEED, + 62 => VmxExitReason::PML_FULL, + 63 => VmxExitReason::XSAVES, + 64 => VmxExitReason::XRSTORS, + _ => panic!("Invalid VmxExitReason number: {}", num), + } + } +} + +const fn encode_vmcs_field( + access_type: VmcsAccessType, + vmcs_type: VmcsType, + vmcs_width: VmcsWidth, + index: u32, +) -> u32 { + let mut encoding: u32 = 0; + encoding |= (access_type as u32) + | (index as u32) << 1 + | (vmcs_type as u32) << 10 + | (vmcs_width as u32) << 13; + return encoding; +} + +const fn encode_vmcs_field_full(vmcs_type: VmcsType, vmcs_width: VmcsWidth, index: u32) -> u32 { + encode_vmcs_field(VmcsAccessType::FULL, vmcs_type, vmcs_width, index) +} + +// fn decode_vmcs_field(field: u32) -> (VmcsAccessType, VmcsType, VmcsWidth, u16){ +// (FromPrimitive::from_u32(field & 1).unwrap() , +// FromPrimitive::from_u32((field>>10) & 0x3).unwrap(), +// FromPrimitive::from_u32((field>>13) & 0x3).unwrap(), +// ((field>>1) & 0x1ff) as u16 +// ) +// } diff --git a/kernel/src/arch/x86_64/kvm/vmx/vmexit.rs b/kernel/src/arch/x86_64/kvm/vmx/vmexit.rs new file mode 100644 index 00000000..ead82cd2 --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/vmexit.rs @@ -0,0 +1,269 @@ +use super::vmcs::{VmcsFields, VmxExitReason}; +use super::vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite}; +use crate::kdebug; +use crate::{syscall::SystemError, virt::kvm::vm}; +use core::arch::asm; +use x86::vmx::vmcs::ro::GUEST_PHYSICAL_ADDR_FULL; + +#[derive(FromPrimitive)] +#[allow(non_camel_case_types)] +pub enum APICExceptionVectors { + EXCEPTION_DIVIDE_ERROR, + EXCEPTION_DEBUG_BREAKPOINT, + EXCEPTION_NMI, + EXCEPTION_BREAKPOINT, + EXCEPTION_OVERFLOW, + EXCEPTION_BOUND_RANGE_EXCEEDED, + EXCEPTION_UNDEFINED_OPCODE, + EXCEPTION_NO_MATH_COPROCESSOR, + EXCEPTION_DOUBLE_FAULT, + EXCEPTION_RESERVED0, + EXCEPTION_INVALID_TASK_SEGMENT_SELECTOR, + EXCEPTION_SEGMENT_NOT_PRESENT, + EXCEPTION_STACK_SEGMENT_FAULT, + EXCEPTION_GENERAL_PROTECTION_FAULT, + EXCEPTION_PAGE_FAULT, + EXCEPTION_RESERVED1, + EXCEPTION_MATH_FAULT, + EXCEPTION_ALIGNMENT_CHECK, + EXCEPTION_MACHINE_CHECK, + EXCEPTION_SIMD_FLOATING_POINT_NUMERIC_ERROR, + EXCEPTION_VIRTUAL_EXCEPTION, + EXCEPTION_RESERVED2, + EXCEPTION_RESERVED3, + EXCEPTION_RESERVED4, + EXCEPTION_RESERVED5, + EXCEPTION_RESERVED6, + EXCEPTION_RESERVED7, + EXCEPTION_RESERVED8, + EXCEPTION_RESERVED9, + EXCEPTION_RESERVED10, + EXCEPTION_RESERVED11, + EXCEPTION_RESERVED12, +} + +#[derive(FromPrimitive)] +#[allow(non_camel_case_types)] +pub enum InterruptType { + INTERRUPT_TYPE_EXTERNAL_INTERRUPT = 0, + INTERRUPT_TYPE_RESERVED = 1, + INTERRUPT_TYPE_NMI = 2, + INTERRUPT_TYPE_HARDWARE_EXCEPTION = 3, + INTERRUPT_TYPE_SOFTWARE_INTERRUPT = 4, + INTERRUPT_TYPE_PRIVILEGED_SOFTWARE_INTERRUPT = 5, + INTERRUPT_TYPE_SOFTWARE_EXCEPTION = 6, + INTERRUPT_TYPE_OTHER_EVENT = 7, +} + +pub fn vmexit_vmx_instruction_executed() -> Result<(), SystemError> { + let valid: u32 = 1; + let vector: u32 = APICExceptionVectors::EXCEPTION_UNDEFINED_OPCODE as u32; + let interrupt_type = InterruptType::INTERRUPT_TYPE_HARDWARE_EXCEPTION as u32; + let deliver_code: u32 = 0; + let interrupt_info = valid << 31 | interrupt_type << 8 | deliver_code << 11 | vector; + vmx_vmwrite( + VmcsFields::CTRL_VM_ENTRY_INTR_INFO_FIELD as u32, + interrupt_info as u64, + )?; + vmx_vmwrite(VmcsFields::CTRL_VM_ENTRY_INSTR_LEN as u32, 0)?; + let rflags: u64 = vmx_vmread(VmcsFields::GUEST_RFLAGS as u32).unwrap() | 0x0001_0000; // set RF flags + vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, rflags)?; + Ok(()) +} + +// pub fn vmexit_cpuid_handler(guest_cpu_context: &mut GuestCpuContext) -> Result<(), SystemError>{ +// let rax = guest_cpu_context.rax; +// let rcx = guest_cpu_context.rcx; +// // let rdx = guest_cpu_context.rdx; +// // let rbx = guest_cpu_context.rbx; +// cpuid!(rax, rcx); +// unsafe{asm!("mov {}, rax", out(reg) guest_cpu_context.rax)}; +// unsafe{asm!("mov {}, rcx", out(reg) guest_cpu_context.rcx)}; +// unsafe{asm!("mov {}, rdx", out(reg) guest_cpu_context.rdx)}; +// unsafe{asm!("mov {}, rbx", out(reg) guest_cpu_context.rbx)}; +// Ok(()) +// } + +unsafe fn save_rpg() { + asm!( + "push rax", + "push rcx", + "push rdx", + "push rbx", + "push rbp", + "push rsi", + "push rdi", + "push r8", + "push r9", + "push r10", + "push r11", + "push r12", + "push r13", + "push r14", + "push r15", + ); +} + +unsafe fn restore_rpg() { + asm!( + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rdi", + "pop rsi", + "pop rbp", + "pop rbx", + "pop rdx", + "pop rcx", + "pop rax", + ); +} + +#[repr(C)] +#[allow(dead_code)] +pub struct GuestCpuContext { + pub r15: u64, + pub r14: u64, + pub r13: u64, + pub r12: u64, + pub r11: u64, + pub r10: u64, + pub r9: u64, + pub r8: u64, + pub rdi: u64, + pub rsi: u64, + pub rbp: u64, + pub rbx: u64, + pub rdx: u64, + pub rcx: u64, + pub rax: u64, +} + +#[no_mangle] +pub extern "C" fn vmx_return() { + kdebug!("vmx_return!"); + unsafe { save_rpg() }; + vmexit_handler(); + // XMM registers are vector registers. They're renamed onto the FP/SIMD register file + // unsafe {asm!( + // "sub rsp, 60h", + // "movaps xmmword ptr [rsp + 0h], xmm0", + // "movaps xmmword ptr [rsp + 10h], xmm1", + // "movaps xmmword ptr [rsp + 20h], xmm2", + // "movaps xmmword ptr [rsp + 30h], xmm3", + // "movaps xmmword ptr [rsp + 40h], xmm4", + // "movaps xmmword ptr [rsp + 50h], xmm5", + + // "mov rdi, rsp", + // "sub rsp, 20h", + // "call vmexit_handler", + // "add rsp, 20h", + + // "movaps xmm0, xmmword ptr [rsp + 0h]", + // "movaps xmm1, xmmword ptr [rsp + 10h]", + // "movaps xmm2, xmmword ptr [rsp + 20h]", + // "movaps xmm3, xmmword ptr [rsp + 30h]", + // "movaps xmm4, xmmword ptr [rsp + 40h]", + // "movaps xmm5, xmmword ptr [rsp + 50h]", + // "add rsp, 60h", + // clobber_abi("C"), + // )}; + unsafe { restore_rpg() }; + unsafe { asm!("vmresume",) }; +} + +#[no_mangle] +extern "C" fn vmexit_handler() { + // let guest_cpu_context = unsafe { guest_cpu_context_ptr.as_mut().unwrap() }; + // kdebug!("guest_cpu_context_ptr={:p}",guest_cpu_context_ptr); + kdebug!("vmexit handler!"); + + let exit_reason = vmx_vmread(VmcsFields::VMEXIT_EXIT_REASON as u32).unwrap() as u32; + let exit_basic_reason = exit_reason & 0x0000_ffff; + let guest_rip = vmx_vmread(VmcsFields::GUEST_RIP as u32).unwrap(); + // let guest_rsp = vmx_vmread(VmcsFields::GUEST_RSP as u32).unwrap(); + kdebug!("guest_rip={:x}", guest_rip); + let _guest_rflags = vmx_vmread(VmcsFields::GUEST_RFLAGS as u32).unwrap(); + + match VmxExitReason::from(exit_basic_reason as i32) { + VmxExitReason::VMCALL + | VmxExitReason::VMCLEAR + | VmxExitReason::VMLAUNCH + | VmxExitReason::VMPTRLD + | VmxExitReason::VMPTRST + | VmxExitReason::VMREAD + | VmxExitReason::VMRESUME + | VmxExitReason::VMWRITE + | VmxExitReason::VMXOFF + | VmxExitReason::VMXON + | VmxExitReason::VMFUNC + | VmxExitReason::INVEPT + | VmxExitReason::INVVPID => { + kdebug!("vmexit handler: vmx instruction!"); + vmexit_vmx_instruction_executed().expect("previledge instruction handle error"); + } + VmxExitReason::CPUID => { + kdebug!("vmexit handler: cpuid instruction!"); + // vmexit_cpuid_handler(guest_cpu_context); + adjust_rip(guest_rip).unwrap(); + } + VmxExitReason::RDMSR => { + kdebug!("vmexit handler: rdmsr instruction!"); + adjust_rip(guest_rip).unwrap(); + } + VmxExitReason::WRMSR => { + kdebug!("vmexit handler: wrmsr instruction!"); + adjust_rip(guest_rip).unwrap(); + } + VmxExitReason::TRIPLE_FAULT => { + kdebug!("vmexit handler: triple fault!"); + adjust_rip(guest_rip).unwrap(); + } + VmxExitReason::EPT_VIOLATION => { + kdebug!("vmexit handler: ept violation!"); + let gpa = vmx_vmread(GUEST_PHYSICAL_ADDR_FULL as u32).unwrap(); + let exit_qualification = vmx_vmread(VmcsFields::VMEXIT_QUALIFICATION as u32).unwrap(); + /* It is a write fault? */ + let mut error_code = exit_qualification & (1 << 1); + /* It is a fetch fault? */ + error_code |= (exit_qualification << 2) & (1 << 4); + /* ept page table is present? */ + error_code |= (exit_qualification >> 3) & (1 << 0); + + let kvm = vm(0).unwrap(); + let vcpu = kvm.vcpu[0].clone(); + // Use the data + let kvm_ept_page_fault = vcpu.lock().mmu.page_fault.unwrap(); + kvm_ept_page_fault(&mut (*vcpu.lock()), gpa, error_code as u32, false) + .expect("ept page fault error"); + } + _ => { + kdebug!( + "vmexit handler: unhandled vmexit reason: {}!", + exit_basic_reason + ); + + let info = vmx_vmread(VmcsFields::VMEXIT_INSTR_LEN as u32).unwrap() as u32; + kdebug!("vmexit handler: VMEXIT_INSTR_LEN: {}!", info); + let info = vmx_vmread(VmcsFields::VMEXIT_INSTR_INFO as u32).unwrap() as u32; + kdebug!("vmexit handler: VMEXIT_INSTR_INFO: {}!", info); + let info = vmx_vmread(VmcsFields::CTRL_EXPECTION_BITMAP as u32).unwrap() as u32; + kdebug!("vmexit handler: CTRL_EXPECTION_BITMAP: {}!", info); + + adjust_rip(guest_rip).unwrap(); + // panic!(); + } + } +} + +#[no_mangle] +fn adjust_rip(rip: u64) -> Result<(), SystemError> { + let instruction_length = vmx_vmread(VmcsFields::VMEXIT_INSTR_LEN as u32)?; + vmx_vmwrite(VmcsFields::GUEST_RIP as u32, rip + instruction_length)?; + Ok(()) +} diff --git a/kernel/src/arch/x86_64/kvm/vmx/vmx_asm_wrapper.rs b/kernel/src/arch/x86_64/kvm/vmx/vmx_asm_wrapper.rs new file mode 100644 index 00000000..1d7b66fb --- /dev/null +++ b/kernel/src/arch/x86_64/kvm/vmx/vmx_asm_wrapper.rs @@ -0,0 +1,96 @@ +use super::vmcs::VmcsFields; +use crate::kdebug; +use crate::syscall::SystemError; +use core::arch::asm; +use x86; +/// Enable VMX operation. +pub fn vmxon(vmxon_pa: u64) -> Result<(), SystemError> { + match unsafe { x86::bits64::vmx::vmxon(vmxon_pa) } { + Ok(_) => Ok(()), + Err(e) => { + kdebug!("vmxon fail: {:?}", e); + Err(SystemError::EVMXONFailed) + } + } +} + +/// Disable VMX operation. +pub fn vmxoff() -> Result<(), SystemError> { + match unsafe { x86::bits64::vmx::vmxoff() } { + Ok(_) => Ok(()), + Err(_) => Err(SystemError::EVMXOFFFailed), + } +} + +/// vmrite the current VMCS. +pub fn vmx_vmwrite(vmcs_field: u32, value: u64) -> Result<(), SystemError> { + match unsafe { x86::bits64::vmx::vmwrite(vmcs_field, value) } { + Ok(_) => Ok(()), + Err(e) => { + kdebug!("vmx_write fail: {:?}", e); + kdebug!("vmcs_field: {:x}", vmcs_field); + Err(SystemError::EVMWRITEFailed) + } + } +} + +/// vmread the current VMCS. +pub fn vmx_vmread(vmcs_field: u32) -> Result { + match unsafe { x86::bits64::vmx::vmread(vmcs_field) } { + Ok(value) => Ok(value), + Err(e) => { + kdebug!("vmx_read fail: {:?}", e); + Err(SystemError::EVMREADFailed) + } + } +} + +pub fn vmx_vmptrld(vmcs_pa: u64) -> Result<(), SystemError> { + match unsafe { x86::bits64::vmx::vmptrld(vmcs_pa) } { + Ok(_) => Ok(()), + Err(_) => Err(SystemError::EVMPRTLDFailed), + } +} + +pub fn vmx_vmlaunch() -> Result<(), SystemError> { + let host_rsp = VmcsFields::HOST_RSP as u32; + let host_rip = VmcsFields::HOST_RIP as u32; + unsafe { + asm!( + "push rbp", + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "vmwrite {0:r}, rsp", + "lea rax, 1f[rip]", + "vmwrite {1:r}, rax", + "vmlaunch", + "1:", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "pop rbp", + "call vmx_return", + in(reg) host_rsp, + in(reg) host_rip, + clobber_abi("C"), + ) + } + Ok(()) + // match unsafe { x86::bits64::vmx::vmlaunch() } { + // Ok(_) => Ok(()), + // Err(e) => { + // kdebug!("vmx_launch fail: {:?}", e); + // Err(SystemError::EVMLAUNCHFailed) + // }, + // } +} + +pub fn vmx_vmclear(vmcs_pa: u64) -> Result<(), SystemError> { + match unsafe { x86::bits64::vmx::vmclear(vmcs_pa) } { + Ok(_) => Ok(()), + Err(_) => Err(SystemError::EVMPRTLDFailed), + } +} diff --git a/kernel/src/arch/x86_64/mm/mod.rs b/kernel/src/arch/x86_64/mm/mod.rs index ab9a86ea..39a59355 100644 --- a/kernel/src/arch/x86_64/mm/mod.rs +++ b/kernel/src/arch/x86_64/mm/mod.rs @@ -34,6 +34,9 @@ use core::mem::{self}; use core::sync::atomic::{compiler_fence, AtomicBool, Ordering}; +use super::kvm::vmx::vmcs::VmcsFields; +use super::kvm::vmx::vmx_asm_wrapper::vmx_vmread; + pub type PageMapper = crate::mm::page::PageMapper; @@ -169,12 +172,21 @@ impl MemoryManagementArch for X86_64MMArch { } /// @brief 获取顶级页表的物理地址 - unsafe fn table(_table_kind: PageTableKind) -> PhysAddr { - let paddr: usize; - compiler_fence(Ordering::SeqCst); - asm!("mov {}, cr3", out(reg) paddr, options(nomem, nostack, preserves_flags)); - compiler_fence(Ordering::SeqCst); - return PhysAddr::new(paddr); + unsafe fn table(table_kind: PageTableKind) -> PhysAddr { + match table_kind { + PageTableKind::Kernel | PageTableKind::User => { + let paddr: usize; + compiler_fence(Ordering::SeqCst); + asm!("mov {}, cr3", out(reg) paddr, options(nomem, nostack, preserves_flags)); + compiler_fence(Ordering::SeqCst); + return PhysAddr::new(paddr); + } + PageTableKind::EPT => { + let eptp = + vmx_vmread(VmcsFields::CTRL_EPTP_PTR as u32).expect("Failed to read eptp"); + return PhysAddr::new(eptp as usize); + } + } } /// @brief 设置顶级页表的物理地址到处理器中 diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 3be7b192..a0fcbfb9 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -6,6 +6,7 @@ pub mod cpu; pub mod fpu; pub mod interrupt; pub mod ipc; +pub mod kvm; pub mod libs; pub mod mm; pub mod msi; @@ -25,4 +26,6 @@ pub use self::mm::X86_64MMArch as MMArch; pub use interrupt::X86_64InterruptArch as CurrentIrqArch; pub use crate::arch::asm::pio::X86_64PortIOArch as CurrentPortIOArch; +pub use kvm::X86_64KVMArch as KVMArch; + pub use crate::arch::ipc::signal::X86_64SignalArch as CurrentSignalArch; diff --git a/kernel/src/filesystem/devfs/mod.rs b/kernel/src/filesystem/devfs/mod.rs index 89be49fd..a5aec8ff 100644 --- a/kernel/src/filesystem/devfs/mod.rs +++ b/kernel/src/filesystem/devfs/mod.rs @@ -149,6 +149,11 @@ impl DevFS { dev_block_inode.add_dev(name, device.clone())?; device.set_fs(dev_block_inode.0.lock().fs.clone()); } + FileType::KvmDevice => { + dev_root_inode + .add_dev(name, device.clone()) + .expect("DevFS: Failed to register /dev/kvm"); + } _ => { return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP); } diff --git a/kernel/src/filesystem/vfs/file.rs b/kernel/src/filesystem/vfs/file.rs index c6495d61..67073af0 100644 --- a/kernel/src/filesystem/vfs/file.rs +++ b/kernel/src/filesystem/vfs/file.rs @@ -136,6 +136,7 @@ impl File { }; // kdebug!("inode:{:?}",f.inode); f.inode.open(&mut f.private_data, &mode)?; + return Ok(f); } diff --git a/kernel/src/filesystem/vfs/mod.rs b/kernel/src/filesystem/vfs/mod.rs index f01d69b8..0197a6b8 100644 --- a/kernel/src/filesystem/vfs/mod.rs +++ b/kernel/src/filesystem/vfs/mod.rs @@ -39,6 +39,8 @@ pub enum FileType { BlockDevice, /// 字符设备 CharDevice, + /// kvm设备 + KvmDevice, /// 管道文件 Pipe, /// 符号链接 @@ -88,6 +90,7 @@ impl FileType { FileType::Dir => DT_DIR, FileType::BlockDevice => DT_BLK, FileType::CharDevice => DT_CHR, + FileType::KvmDevice => DT_CHR, FileType::Pipe => DT_FIFO, FileType::SymLink => DT_LNK, FileType::Socket => DT_SOCK, diff --git a/kernel/src/filesystem/vfs/syscall.rs b/kernel/src/filesystem/vfs/syscall.rs index 6a39570d..e4ca340f 100644 --- a/kernel/src/filesystem/vfs/syscall.rs +++ b/kernel/src/filesystem/vfs/syscall.rs @@ -25,6 +25,7 @@ use super::{ utils::rsplit_path, Dirent, FileType, IndexNode, MAX_PATHLEN, ROOT_INODE, VFS_MAX_FOLLOW_SYMLINK_TIMES, }; +// use crate::kdebug; pub const SEEK_SET: u32 = 0; pub const SEEK_CUR: u32 = 1; @@ -207,7 +208,6 @@ impl Syscall { if mode.contains(FileMode::O_APPEND) { file.lseek(SeekFrom::SeekEnd(0))?; } - // 把文件对象存入pcb let r = ProcessManager::current_pcb() .fd_table() @@ -232,6 +232,27 @@ impl Syscall { return res; } + /// @brief 发送命令到文件描述符对应的设备, + /// + /// @param fd 文件描述符编号 + /// @param cmd 设备相关的请求类型 + /// + /// @return Ok(usize) 成功返回0 + /// @return Err(SystemError) 读取失败,返回posix错误码 + pub fn ioctl(fd: usize, cmd: u32, data: usize) -> Result { + let binding = ProcessManager::current_pcb().fd_table(); + let fd_table_guard = binding.read(); + + let file = fd_table_guard + .get_file_by_fd(fd as i32) + .ok_or(SystemError::EBADF)?; + + // drop guard 以避免无法调度的问题 + drop(fd_table_guard); + let r = file.lock_no_preempt().inode().ioctl(cmd, data); + return r; + } + /// @brief 根据文件描述符,读取文件数据。尝试读取的数据长度与buf的长度相同。 /// /// @param fd 文件描述符编号 @@ -700,6 +721,7 @@ impl Syscall { FileType::SymLink => kstat.mode.insert(ModeType::S_IFLNK), FileType::Socket => kstat.mode.insert(ModeType::S_IFSOCK), FileType::Pipe => kstat.mode.insert(ModeType::S_IFIFO), + FileType::KvmDevice => kstat.mode.insert(ModeType::S_IFCHR), } return Ok(kstat); diff --git a/kernel/src/ktest/ktest.h b/kernel/src/ktest/ktest.h index cd6b041f..3b4b54b1 100644 --- a/kernel/src/ktest/ktest.h +++ b/kernel/src/ktest/ktest.h @@ -5,6 +5,7 @@ int ktest_test_bitree(void* arg); int ktest_test_kfifo(void* arg); int ktest_test_mutex(void* arg); int ktest_test_idr(void* arg); +int ktest_test_kvm(void* arg); /** * @brief 开启一个新的内核线程以进行测试 diff --git a/kernel/src/ktest/test-kvm.c b/kernel/src/ktest/test-kvm.c new file mode 100644 index 00000000..66cc4f63 --- /dev/null +++ b/kernel/src/ktest/test-kvm.c @@ -0,0 +1,23 @@ +#include "ktest.h" +#include "ktest_utils.h" + +static long ktest_kvm_case0_1(uint64_t arg0, uint64_t arg1){ + kTEST("Testing /dev/kvm device..."); + +} + +static ktest_case_table kt_kvm_func_table[] = { + ktest_kvm_case0_1, +}; + +int ktest_test_kvm(void* arg) +{ + kTEST("Testing kvm..."); + for (int i = 0; i < sizeof(kt_kvm_func_table) / sizeof(ktest_case_table); ++i) + { + kTEST("Testing case %d", i); + kt_kvm_func_table[i](i, 0); + } + kTEST("kvm Test done."); + return 0; +} diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 5f2caac8..7aacda35 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -16,6 +16,9 @@ #![feature(trait_upcasting)] #![feature(slice_ptr_get)] #![feature(vec_into_raw_parts)] +#![feature(new_uninit)] +#![feature(ptr_to_from_bits)] +#![feature(concat_idents)] #![cfg_attr(target_os = "none", no_std)] #[cfg(test)] @@ -46,6 +49,7 @@ mod sched; mod smp; mod syscall; mod time; +mod virt; #[macro_use] extern crate alloc; diff --git a/kernel/src/main.c b/kernel/src/main.c index d911d1b4..eb86248f 100644 --- a/kernel/src/main.c +++ b/kernel/src/main.c @@ -30,6 +30,7 @@ #include