Z Fan 3364e0424f feat(virtualization): 内核虚拟化支持 (#1073)
* 几个结构体

* 通过vmx_init以及create_vm,create_vcpu部分TODO

* kvm_run完成一半

* 能够成功vmlaunch,但是在vmexit时候还有些问题未排查出来

* 解决了vmlaunch导致的cpu_reset的问题

* 整理代码

* 暂时性push到hyc仓库

* 修改内存虚拟化部分参数传入,解决死锁问题

* 初步完成ept映射.但不停EPT_VIOLATION

* 初步完成了EPT映射,但是读写内存还是有点问题

* fixme

* 更新了一些truncate到from_bits_unchecked的实现

* 完成内存虚拟化EPT_VIOLATION的映射

* fmt

* Remove /fixme from .gitignore

* Remove /fixme file

* Update kernel/src/init/init.rs

Co-authored-by: Samuel Dai <samuka007@dragonos.org>

* Update kernel/src/init/init.rs

Co-authored-by: Samuel Dai <samuka007@dragonos.org>

* 修改了注释格式,删除了附带的一些文件操作

* feat(syscall): 实现syscall restart (#1075)

能够在系统调用返回ERESTARTSYS时,信号处理结束后,自动重启系统调用.

TODO: 实现wait等需要restart_block的系统调用的重启

Signed-off-by: longjin <longjin@DragonOS.org>

* chore: update docker image version in script && update doc (#1076)

* chore: update docker image version in script

* chore: replace lots of spaces with newline in doc

* fix: 修复wait4系统调用部分语义与Linux不一致的问题 (#1080)

* fix: 修复wait4系统调用部分语义与Linux不一致的问题

解决wait不住/wait之后卡死的bug

---------

Signed-off-by: longjin <longjin@DragonOS.org>

* feat(fs/syscall): 实现fchdir系统调用 (#1081)

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(mm): 修复fat文件系统的PageCache同步问题 (#1005)


---------

Co-authored-by: longjin <longjin@DragonOS.org>

* fix: 修正nographic启动时,控制台日志未能输出到文件的问题 (#1082)

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(process): 修复copy_process的一些bug & 支持默认init进程传参 (#1083)

- 修复`copy_process`函数对标志位处理不正确的bug
- init进程搜索列表中,支持为默认init程序传入参数

Signed-off-by: longjin <longjin@DragonOS.org>

* feat: 完善sys_reboot (#1084)

* fix(process): 修复copy_process的一些bug & 支持默认init进程传参

- 修复`copy_process`函数对标志位处理不正确的bug
- init进程搜索列表中,支持为默认init程序传入参数

Signed-off-by: longjin <longjin@DragonOS.org>

* feat: 完善sys_reboot

- 校验magic number
- 支持多个cmd (具体内容未实现)

Signed-off-by: longjin <longjin@DragonOS.org>

---------

Signed-off-by: longjin <longjin@DragonOS.org>

* fix: 修复do_wait函数在wait所有子进程时,忘了释放锁就sleep的bug (#1089)

Signed-off-by: longjin <longjin@DragonOS.org>

* pull主线并且fmt

---------

Signed-off-by: longjin <longjin@DragonOS.org>
Co-authored-by: GnoCiYeH <heyicong@dragonos.org>
Co-authored-by: Samuel Dai <samuka007@dragonos.org>
Co-authored-by: LoGin <longjin@DragonOS.org>
Co-authored-by: LIU Yuwei <22045841+Marsman1996@users.noreply.github.com>
Co-authored-by: MemoryShore <1353318529@qq.com>
2025-03-11 09:06:54 +08:00

3776 lines
118 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use core::intrinsics::likely;
use core::intrinsics::unlikely;
use core::sync::atomic::{AtomicBool, Ordering};
use exit::VmxExitHandlers;
use log::debug;
use log::error;
use log::warn;
use x86_64::registers::control::Cr3Flags;
use x86_64::structures::paging::PhysFrame;
use crate::arch::process::table::USER_DS;
use crate::arch::vm::mmu::kvm_mmu::KvmMmu;
use crate::arch::vm::uapi::kvm_exit;
use crate::arch::vm::uapi::{
AC_VECTOR, BP_VECTOR, DB_VECTOR, GP_VECTOR, MC_VECTOR, NM_VECTOR, PF_VECTOR, UD_VECTOR,
};
use crate::arch::vm::vmx::vmcs::VmcsIntrHelper;
use crate::libs::spinlock::SpinLockGuard;
use crate::mm::VirtAddr;
use crate::process::ProcessManager;
use crate::virt::vm::kvm_host::vcpu::GuestDebug;
use crate::{
arch::{
vm::{
asm::KvmX86Asm,
kvm_host::{vcpu::VirtCpuRequest, X86KvmArch},
vmx::vmcs::vmx_area,
},
CurrentIrqArch, MMArch, VirtCpuArch,
},
exception::InterruptArch,
libs::spinlock::SpinLock,
mm::{
percpu::{PerCpu, PerCpuVar},
MemoryManagementArch,
},
smp::{core::smp_get_processor_id, cpu::ProcessorId},
virt::vm::{kvm_dev::kvm_init, kvm_host::vcpu::VirtCpu, user_api::UapiKvmSegment},
};
use alloc::{alloc::Global, boxed::Box, collections::LinkedList, sync::Arc, vec::Vec};
use asm::VMX_EPTP_AD_ENABLE_BIT;
use asm::VMX_EPTP_MT_WB;
use asm::VMX_EPTP_PWL_4;
use asm::VMX_EPTP_PWL_5;
use bitfield_struct::bitfield;
use bitmap::{traits::BitMapOps, AllocBitmap};
use raw_cpuid::CpuId;
use system_error::SystemError;
use x86::controlregs::{cr2, cr2_write};
use x86::dtables::ldtr;
use x86::msr::wrmsr;
use x86::segmentation::load_ds;
use x86::segmentation::load_es;
use x86::segmentation::{ds, es, fs, gs};
use x86::vmx::vmcs::ro;
use x86::{
bits64::rflags::RFlags,
controlregs::{cr0, cr4, Cr0, Cr4, Xcr0},
msr::{self, rdmsr},
segmentation::{self},
vmx::vmcs::{
control::{
self, EntryControls, ExitControls, PinbasedControls, PrimaryControls, SecondaryControls,
},
guest, host,
},
};
use x86_64::registers::control::Cr3;
use x86_64::{instructions::tables::sidt, registers::control::EferFlags};
use crate::{
arch::{
vm::{vmx::vmcs::feat::VmxFeat, x86_kvm_manager_mut, McgCap},
KvmArch,
},
libs::rwlock::RwLock,
virt::vm::kvm_host::Vm,
};
use self::exit::ExitFastpathCompletion;
use self::exit::VmxExitReason;
use self::exit::VmxExitReasonBasic;
use self::vmcs::LoadedVmcs;
use self::{
capabilities::{ProcessorTraceMode, VmcsConfig, VmxCapability},
vmcs::{
current_loaded_vmcs_list_mut, current_vmcs, current_vmcs_mut, ControlsType,
LockedLoadedVmcs, VMControlStructure, VmxMsrBitmapAccess, VmxMsrBitmapAction,
PERCPU_LOADED_VMCS_LIST, PERCPU_VMCS, VMXAREA,
},
};
use super::asm::IntrInfo;
use super::asm::SegmentCacheField;
use super::kvm_host::vcpu::KvmIntrType;
use super::kvm_host::RMODE_TSS_SIZE;
use super::x86_kvm_ops;
use super::{
asm::{VcpuSegment, VmxAsm, VmxMsrEntry},
init_kvm_arch,
kvm_host::{KvmFunc, KvmInitFunc, KvmIrqChipMode, KvmReg, MsrFilterType, NotifyVmExitFlags},
x86_kvm_manager, KvmArchManager,
};
pub mod asm;
pub mod capabilities;
pub mod ept;
pub mod exit;
pub mod vmcs;
extern "C" {
fn vmx_vmexit();
}
pub struct VmxKvmInitFunc;
impl VmxKvmInitFunc {
pub fn setup_per_cpu(&self) {
let mut vmcs_areas = Vec::new();
vmcs_areas.resize(PerCpu::MAX_CPU_NUM as usize, VMControlStructure::new());
unsafe { VMXAREA = PerCpuVar::new(vmcs_areas) };
let mut percpu_current_vmcs = Vec::new();
percpu_current_vmcs.resize(PerCpu::MAX_CPU_NUM as usize, None);
unsafe { PERCPU_VMCS = PerCpuVar::new(percpu_current_vmcs) }
let mut percpu_loaded_vmcs_lists = Vec::new();
percpu_loaded_vmcs_lists.resize(PerCpu::MAX_CPU_NUM as usize, LinkedList::new());
unsafe { PERCPU_LOADED_VMCS_LIST = PerCpuVar::new(percpu_loaded_vmcs_lists) }
}
}
impl KvmInitFunc for VmxKvmInitFunc {
#[allow(clippy::borrow_interior_mutable_const)]
#[inline(never)]
fn hardware_setup(&self) -> Result<(), SystemError> {
let idt = sidt();
let cpuid = CpuId::new();
let cpu_extend_feature = cpuid
.get_extended_processor_and_feature_identifiers()
.ok_or(SystemError::ENOSYS)?;
let mut vmx_init: Box<Vmx> = unsafe {
Box::try_new_zeroed_in(Global)
.map_err(|_| SystemError::ENOMEM)?
.assume_init()
};
vmx_init.init();
vmx_init.host_idt_base = idt.base.as_u64();
Vmx::set_up_user_return_msrs();
Vmx::setup_vmcs_config(&mut vmx_init.vmcs_config, &mut vmx_init.vmx_cap)?;
let manager = x86_kvm_manager_mut();
let kvm_cap = &mut manager.kvm_caps;
if vmx_init.has_mpx() {
kvm_cap.supported_xcr0 &= !(Xcr0::XCR0_BNDREG_STATE | Xcr0::XCR0_BNDCSR_STATE);
}
// 判断是否启用vpid
if !vmx_init.has_vpid()
|| !vmx_init.has_invvpid()
|| !vmx_init.has_invvpid_single()
|| !vmx_init.has_invvpid_global()
{
vmx_init.enable_vpid = false;
}
if !vmx_init.has_ept()
|| !vmx_init.has_ept_4levels()
|| !vmx_init.has_ept_mt_wb()
|| !vmx_init.has_invept_global()
{
vmx_init.enable_ept = false;
}
// 是否启用了 EPT 并且检查 CPU 是否支持 Execute DisableNX功能
// Execute Disable 是一种 CPU 功能,可以防止代码在数据内存区域上执行
if !vmx_init.enable_ept && !cpu_extend_feature.has_execute_disable() {
error!("[KVM] NX (Execute Disable) not supported");
return Err(SystemError::ENOSYS);
}
if !vmx_init.has_ept_ad_bits() || !vmx_init.enable_ept {
vmx_init.enable_ept_ad = false;
}
if !vmx_init.has_unrestricted_guest() || !vmx_init.enable_ept {
vmx_init.enable_unrestricted_guest = false;
}
if !vmx_init.has_flexproirity() {
vmx_init.enable_flexpriority = false;
}
if !vmx_init.has_virtual_nmis() {
vmx_init.enable_vnmi = false;
}
if !vmx_init.has_encls_vmexit() {
vmx_init.enable_sgx = false;
}
if !vmx_init.enable_flexpriority {
VmxKvmFunc::CONFIG.write().have_set_apic_access_page_addr = false;
}
if !vmx_init.has_tpr_shadow() {
VmxKvmFunc::CONFIG.write().have_update_cr8_intercept = false;
}
// TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kvm/vmx/vmx.c#8501 - 8513
if !vmx_init.has_ple() {
vmx_init.ple_gap = 0;
vmx_init.ple_window = 0;
vmx_init.ple_window_grow = 0;
vmx_init.ple_window_max = 0;
vmx_init.ple_window_shrink = 0;
}
if !vmx_init.has_apicv() {
vmx_init.enable_apicv = false;
}
if !vmx_init.enable_apicv {
// TODO: 设置sync_pir_to_irr
}
if !vmx_init.enable_apicv || !vmx_init.has_ipiv() {
vmx_init.enable_ipiv = false;
}
if vmx_init.has_tsc_scaling() {
kvm_cap.has_tsc_control = true;
}
kvm_cap.max_tsc_scaling_ratio = 0xffffffffffffffff;
kvm_cap.tsc_scaling_ratio_frac_bits = 48;
kvm_cap.has_bus_lock_exit = vmx_init.has_bus_lock_detection();
kvm_cap.has_notify_vmexit = vmx_init.has_notify_vmexit();
// vmx_init.vpid_bitmap.lock().set_all(false);
if vmx_init.enable_ept {
// TODO: mmu_set_ept_masks
warn!("mmu_set_ept_masks TODO!");
}
warn!("vmx_setup_me_spte_mask TODO!");
KvmMmu::kvm_configure_mmu(
vmx_init.enable_ept,
0,
vmx_init.get_max_ept_level(),
vmx_init.ept_cap_to_lpage_level(),
);
if !vmx_init.enable_ept || !vmx_init.enable_ept_ad || !vmx_init.has_pml() {
vmx_init.enable_pml = false;
}
if !vmx_init.enable_pml {
// TODO: Set cpu dirty log size
}
if !vmx_init.has_preemption_timer() {
vmx_init.enable_preemption_timer = false;
}
if vmx_init.enable_preemption_timer {
// TODO
}
if !vmx_init.enable_preemption_timer {
// TODO
}
kvm_cap
.supported_mce_cap
.insert(McgCap::MCG_LMCE_P | McgCap::MCG_CMCI_P);
// TODO: pt_mode
// TODO: setup_default_sgx_lepubkeyhash
// TODO: nested
// TODO: vmx_set_cpu_caps
init_vmx(vmx_init);
self.setup_per_cpu();
warn!("hardware setup finish");
Ok(())
}
fn handle_intel_pt_intr(&self) -> u32 {
todo!()
}
fn runtime_funcs(&self) -> &'static dyn super::kvm_host::KvmFunc {
&VmxKvmFunc
}
}
#[derive(Debug)]
pub struct VmxKvmFunc;
pub struct VmxKvmFuncConfig {
pub have_set_apic_access_page_addr: bool,
pub have_update_cr8_intercept: bool,
}
impl VmxKvmFunc {
#[allow(clippy::declare_interior_mutable_const)]
pub const CONFIG: RwLock<VmxKvmFuncConfig> = RwLock::new(VmxKvmFuncConfig {
have_set_apic_access_page_addr: true,
have_update_cr8_intercept: true,
});
pub fn vcpu_load_vmcs(
vcpu: &mut VirtCpu,
cpu: ProcessorId,
_buddy: Option<Arc<LockedLoadedVmcs>>,
) {
let vmx = vcpu.vmx();
let already_loaded = vmx.loaded_vmcs.lock().cpu == cpu;
if !already_loaded {
Self::loaded_vmcs_clear(&vmx.loaded_vmcs);
let _irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
current_loaded_vmcs_list_mut().push_back(vmx.loaded_vmcs.clone());
}
if let Some(prev) = current_vmcs() {
let vmcs = vmx.loaded_vmcs.lock().vmcs.clone();
if !Arc::ptr_eq(&vmcs, prev) {
VmxAsm::vmcs_load(vmcs.phys_addr());
*current_vmcs_mut() = Some(vmcs);
// TODO:buddy barrier?
}
} else {
let vmcs = vmx.loaded_vmcs.lock().vmcs.clone();
VmxAsm::vmcs_load(vmcs.phys_addr());
*current_vmcs_mut() = Some(vmcs);
// TODO:buddy barrier?
}
if !already_loaded {
let mut pseudo_descriptpr: x86::dtables::DescriptorTablePointer<u64> =
Default::default();
unsafe {
x86::dtables::sgdt(&mut pseudo_descriptpr);
};
vmx.loaded_vmcs.lock().cpu = cpu;
let id = vmx.loaded_vmcs.lock().vmcs.lock().revision_id();
debug!(
"revision_id {id} req {:?}",
VirtCpuRequest::KVM_REQ_TLB_FLUSH
);
vcpu.request(VirtCpuRequest::KVM_REQ_TLB_FLUSH);
VmxAsm::vmx_vmwrite(
host::TR_BASE,
KvmX86Asm::get_segment_base(
pseudo_descriptpr.base,
pseudo_descriptpr.limit,
unsafe { x86::task::tr().bits() },
),
);
VmxAsm::vmx_vmwrite(host::GDTR_BASE, pseudo_descriptpr.base as usize as u64);
VmxAsm::vmx_vmwrite(host::IA32_SYSENTER_ESP, unsafe {
rdmsr(msr::IA32_SYSENTER_ESP)
});
}
}
pub fn loaded_vmcs_clear(loaded_vmcs: &Arc<LockedLoadedVmcs>) {
let mut guard = loaded_vmcs.lock();
if guard.cpu == ProcessorId::INVALID {
return;
}
if guard.cpu == smp_get_processor_id() {
if let Some(vmcs) = current_vmcs() {
if Arc::ptr_eq(vmcs, &guard.vmcs) {
*current_vmcs_mut() = None;
}
}
VmxAsm::vmclear(guard.vmcs.phys_addr());
if let Some(shadow) = &guard.shadow_vmcs {
if guard.launched {
VmxAsm::vmclear(shadow.phys_addr());
}
}
let _ = current_loaded_vmcs_list_mut().extract_if(|x| Arc::ptr_eq(x, loaded_vmcs));
guard.cpu = ProcessorId::INVALID;
guard.launched = false;
} else {
// 交由对应cpu处理
todo!()
}
}
pub fn seg_setup(&self, seg: VcpuSegment) {
let seg_field = &KVM_VMX_SEGMENT_FIELDS[seg as usize];
VmxAsm::vmx_vmwrite(seg_field.selector, 0);
VmxAsm::vmx_vmwrite(seg_field.base, 0);
VmxAsm::vmx_vmwrite(seg_field.limit, 0xffff);
let mut ar = 0x93;
if seg == VcpuSegment::CS {
ar |= 0x08;
}
VmxAsm::vmx_vmwrite(seg_field.ar_bytes, ar);
}
}
impl KvmFunc for VmxKvmFunc {
fn name(&self) -> &'static str {
"VMX"
}
fn hardware_enable(&self) -> Result<(), SystemError> {
let vmcs = vmx_area().get().as_ref();
debug!("vmcs idx {}", vmcs.abort);
let phys_addr =
unsafe { MMArch::virt_2_phys(VirtAddr::new(vmcs as *const _ as usize)).unwrap() };
// TODO: intel_pt_handle_vmx(1);
VmxAsm::kvm_cpu_vmxon(phys_addr)?;
Ok(())
}
fn vm_init(&self) -> X86KvmArch {
let vmx_init = vmx_info();
let mut arch = X86KvmArch::default();
if vmx_init.ple_gap == 0 {
arch.pause_in_guest = true;
}
return arch;
}
fn vcpu_create(&self, vcpu: &mut VirtCpu, vm: &Vm) {
VmxVCpuPriv::init(vcpu, vm);
}
fn vcpu_load(&self, vcpu: &mut VirtCpu, cpu: crate::smp::cpu::ProcessorId) {
Self::vcpu_load_vmcs(vcpu, cpu, None);
// TODO: vmx_vcpu_pi_load
}
fn cache_reg(&self, vcpu: &mut VirtCpuArch, reg: KvmReg) {
vcpu.mark_register_available(reg);
match reg {
KvmReg::VcpuRegsRsp => {
vcpu.regs[reg as usize] = VmxAsm::vmx_vmread(guest::RSP);
}
KvmReg::VcpuRegsRip => {
vcpu.regs[reg as usize] = VmxAsm::vmx_vmread(guest::RIP);
}
// VCPU_EXREG_PDPTR
KvmReg::NrVcpuRegs => {
if vmx_info().enable_ept {
todo!()
}
}
KvmReg::VcpuExregCr0 => {
let guest_owned = vcpu.cr0_guest_owned_bits;
vcpu.cr0.remove(guest_owned);
vcpu.cr0.insert(
Cr0::from_bits_truncate(VmxAsm::vmx_vmread(guest::CR0) as usize) & guest_owned,
);
}
KvmReg::VcpuExregCr3 => {
//当拦截CR3加载时例如用于影子分页KVMKernel-based Virtual Machine的CR3会被加载到硬件中而不是客户机的CR3。
//暂时先直接读寄存器
vcpu.cr3 = VmxAsm::vmx_vmread(guest::CR3);
//todo!()
}
KvmReg::VcpuExregCr4 => {
let guest_owned = vcpu.cr4_guest_owned_bits;
vcpu.cr4.remove(guest_owned);
vcpu.cr4.insert(
Cr4::from_bits_truncate(VmxAsm::vmx_vmread(guest::CR4) as usize) & guest_owned,
);
}
_ => {
todo!()
}
}
}
fn apicv_pre_state_restore(&self, _vcpu: &mut VirtCpu) {
// https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kvm/vmx/vmx.c#6924
// TODO: pi
// todo!()
}
fn set_msr(&self, vcpu: &mut VirtCpu, msr: super::asm::MsrData) -> Result<(), SystemError> {
let vmx = vcpu.vmx_mut();
let msr_index = msr.index;
let data = msr.data;
match msr_index {
msr::IA32_EFER => {
todo!("IA32_EFER")
}
msr::IA32_FS_BASE => {
todo!("IA32_FS_BASE")
}
msr::IA32_GS_BASE => {
todo!("IA32_GS_BASE")
}
msr::IA32_KERNEL_GSBASE => {
todo!("IA32_KERNEL_GSBASE")
}
0x000001c4 => {
todo!("MSR_IA32_XFD")
}
msr::IA32_SYSENTER_CS => {
todo!("IA32_SYSENTER_CS")
}
msr::IA32_SYSENTER_EIP => {
todo!("IA32_SYSENTER_EIP")
}
msr::IA32_SYSENTER_ESP => {
todo!("IA32_SYSENTER_ESP")
}
msr::IA32_DEBUGCTL => {
todo!("IA32_DEBUGCTL")
}
msr::MSR_C1_PMON_EVNT_SEL0 => {
todo!("MSR_IA32_BNDCFGS")
}
0xe1 => {
todo!("MSR_IA32_UMWAIT_CONTROL ")
}
0x48 => {
todo!("MSR_IA32_SPEC_CTRL")
}
msr::MSR_IA32_TSX_CTRL => {
todo!("MSR_IA32_TSX_CTRL")
}
msr::IA32_PAT => {
todo!("IA32_PAT")
}
0x4d0 => {
todo!("MSR_IA32_MCG_EXT_CTL")
}
msr::IA32_FEATURE_CONTROL => {
todo!("IA32_FEATURE_CONTROL")
}
0x8c..=0x8f => {
todo!("MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3 {msr_index}")
}
msr::IA32_VMX_BASIC..=msr::IA32_VMX_VMFUNC => {
todo!("msr::IA32_VMX_BASIC..=msr::IA32_VMX_VMFUNC")
}
msr::MSR_IA32_RTIT_CTL => {
todo!("MSR_IA32_RTIT_CTL")
}
msr::MSR_IA32_RTIT_STATUS => {
todo!("MSR_IA32_RTIT_STATUS")
}
msr::MSR_IA32_RTIT_OUTPUT_BASE => {
todo!("MSR_IA32_RTIT_OUTPUT_BASE")
}
0x572 => {
todo!("MSR_IA32_RTIT_CR3_MATCH")
}
msr::MSR_IA32_RTIT_OUTPUT_MASK_PTRS => {
todo!("MSR_IA32_RTIT_OUTPUT_MASK_PTRS")
}
msr::MSR_IA32_ADDR0_START..=msr::MSR_IA32_ADDR3_END => {
todo!("msr::MSR_IA32_ADDR0_START..=msr::MSR_IA32_ADDR3_END")
}
msr::MSR_PERF_CAPABILITIES => {
todo!("MSR_PERF_CAPABILITIES")
}
_ => {
let uret_msr = vmx.find_uret_msr(msr_index);
if let Some((idx, _msr)) = uret_msr {
vmx.set_guest_uret_msr(idx, data)?;
vmx.set_uret_msr(msr_index, data);
} else {
vcpu.arch.set_msr_common(&msr);
};
}
}
if msr_index == 0x10a {
// MSR_IA32_ARCH_CAPABILITIES
todo!()
}
Ok(())
}
fn vcpu_reset(&self, vcpu: &mut VirtCpu, vm: &Vm, init_event: bool) {
if !init_event {
vmx_info_mut().vmx_reset_vcpu(vcpu, vm)
}
vcpu.kvm_set_cr8(0);
let vmx = vcpu.vmx_mut();
vmx.rmode.vm86_active = false;
vmx.spec_ctrl = 0;
vmx.msr_ia32_umwait_control = 0;
vmx.hv_deadline_tsc = u64::MAX;
vmx.segment_cache_clear();
vcpu.arch.mark_register_available(KvmReg::VcpuExregSegments);
self.seg_setup(VcpuSegment::CS);
VmxAsm::vmx_vmwrite(guest::CS_SELECTOR, 0xf000);
VmxAsm::vmx_vmwrite(guest::CS_BASE, 0xffff0000);
self.seg_setup(VcpuSegment::DS);
self.seg_setup(VcpuSegment::ES);
self.seg_setup(VcpuSegment::FS);
self.seg_setup(VcpuSegment::GS);
self.seg_setup(VcpuSegment::SS);
VmxAsm::vmx_vmwrite(guest::TR_SELECTOR, 0);
VmxAsm::vmx_vmwrite(guest::TR_BASE, 0);
VmxAsm::vmx_vmwrite(guest::TR_LIMIT, 0xffff);
VmxAsm::vmx_vmwrite(guest::TR_ACCESS_RIGHTS, 0x008b);
VmxAsm::vmx_vmwrite(guest::LDTR_SELECTOR, 0);
VmxAsm::vmx_vmwrite(guest::LDTR_BASE, 0);
VmxAsm::vmx_vmwrite(guest::LDTR_LIMIT, 0xffff);
VmxAsm::vmx_vmwrite(guest::LDTR_ACCESS_RIGHTS, 0x00082);
VmxAsm::vmx_vmwrite(guest::GDTR_BASE, 0);
VmxAsm::vmx_vmwrite(guest::GDTR_LIMIT, 0xffff);
VmxAsm::vmx_vmwrite(guest::IDTR_BASE, 0);
VmxAsm::vmx_vmwrite(guest::IDTR_LIMIT, 0xffff);
VmxAsm::vmx_vmwrite(guest::ACTIVITY_STATE, 0);
VmxAsm::vmx_vmwrite(guest::INTERRUPTIBILITY_STATE, 0);
VmxAsm::vmx_vmwrite(guest::PENDING_DBG_EXCEPTIONS, 0);
if x86_kvm_manager().mpx_supported() {
VmxAsm::vmx_vmwrite(guest::IA32_BNDCFGS_FULL, 0);
}
VmxAsm::vmx_vmwrite(control::VMENTRY_INTERRUPTION_INFO_FIELD, 0);
vcpu.request(VirtCpuRequest::MAKE_KVM_REQ_APIC_PAGE_RELOAD);
vmx_info().vpid_sync_context(vcpu.vmx().vpid);
warn!("TODO: vmx_update_fb_clear_dis");
}
fn set_rflags(&self, vcpu: &mut VirtCpu, mut rflags: x86::bits64::rflags::RFlags) {
if vcpu.is_unrestricted_guest() {
vcpu.arch.mark_register_available(KvmReg::VcpuExregRflags);
vcpu.vmx_mut().rflags = rflags;
VmxAsm::vmx_vmwrite(guest::RFLAGS, rflags.bits());
return;
}
let old_rflags = self.get_rflags(vcpu);
let vmx = vcpu.vmx_mut();
vmx.rflags = rflags;
if vmx.rmode.vm86_active {
vmx.rmode.save_rflags = rflags;
rflags.insert(RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM);
}
VmxAsm::vmx_vmwrite(guest::RFLAGS, rflags.bits());
if (old_rflags ^ vmx.rflags).contains(RFlags::FLAGS_VM) {
let emulation_required = vmx_info().emulation_required(vcpu);
vcpu.vmx_mut().emulation_required = emulation_required;
}
}
fn set_cr0(&self, vm: &Vm, vcpu: &mut VirtCpu, cr0: x86::controlregs::Cr0) {
let old_cr0_pg = vcpu.arch.read_cr0_bits(Cr0::CR0_ENABLE_PAGING);
let mut hw_cr0 = cr0 & (!(Cr0::CR0_NOT_WRITE_THROUGH | Cr0::CR0_CACHE_DISABLE));
if vmx_info().enable_unrestricted_guest {
hw_cr0.insert(Cr0::CR0_NUMERIC_ERROR);
} else {
hw_cr0
.insert(Cr0::CR0_NUMERIC_ERROR | Cr0::CR0_ENABLE_PAGING | Cr0::CR0_PROTECTED_MODE);
if !vmx_info().enable_ept {
hw_cr0.insert(Cr0::CR0_WRITE_PROTECT);
}
if vcpu.vmx().rmode.vm86_active && cr0.contains(Cr0::CR0_PROTECTED_MODE) {
vmx_info().enter_pmode(vcpu);
}
if !vcpu.vmx().rmode.vm86_active && !cr0.contains(Cr0::CR0_PROTECTED_MODE) {
vmx_info().enter_rmode(vcpu, vm);
}
}
VmxAsm::vmx_vmwrite(control::CR0_READ_SHADOW, cr0.bits() as u64);
VmxAsm::vmx_vmwrite(guest::CR0, hw_cr0.bits() as u64);
vcpu.arch.cr0 = cr0;
vcpu.arch.mark_register_available(KvmReg::VcpuExregCr0);
if vcpu.arch.efer.contains(EferFlags::LONG_MODE_ENABLE) {
if old_cr0_pg.is_empty() && cr0.contains(Cr0::CR0_ENABLE_PAGING) {
todo!("enter lmode todo");
} else if !old_cr0_pg.is_empty() && !cr0.contains(Cr0::CR0_ENABLE_PAGING) {
todo!("exit lmode todo");
}
}
if vmx_info().enable_ept && !vmx_info().enable_unrestricted_guest {
todo!()
}
vcpu.vmx_mut().emulation_required = vmx_info().emulation_required(vcpu);
}
fn set_cr4(&self, vcpu: &mut VirtCpu, cr4_flags: x86::controlregs::Cr4) {
let old_cr4 = vcpu.arch.read_cr4_bits(Cr4::all());
let mut hw_cr4 = (unsafe { cr4() } & Cr4::CR4_ENABLE_MACHINE_CHECK)
| (cr4_flags & (!Cr4::CR4_ENABLE_MACHINE_CHECK));
if vmx_info().enable_unrestricted_guest {
hw_cr4.insert(Cr4::CR4_ENABLE_VMX);
} else if vcpu.vmx().rmode.vm86_active {
hw_cr4.insert(Cr4::CR4_ENABLE_PAE | Cr4::CR4_ENABLE_VMX | Cr4::CR4_ENABLE_VME);
} else {
hw_cr4.insert(Cr4::CR4_ENABLE_PAE | Cr4::CR4_ENABLE_VMX);
}
if vmx_info().vmx_umip_emulated() {
if cr4_flags.contains(Cr4::CR4_ENABLE_UMIP) {
vcpu.vmx().loaded_vmcs().controls_set(
ControlsType::SecondaryExec,
SecondaryControls::DTABLE_EXITING.bits() as u64,
);
hw_cr4.remove(Cr4::CR4_ENABLE_UMIP);
} else if !vcpu.arch.is_guest_mode() {
vcpu.vmx().loaded_vmcs().controls_clearbit(
ControlsType::SecondaryExec,
SecondaryControls::DTABLE_EXITING.bits() as u64,
);
}
}
vcpu.arch.cr4 = cr4_flags;
vcpu.arch.mark_register_available(KvmReg::VcpuExregCr4);
if !vmx_info().enable_unrestricted_guest {
if vmx_info().enable_ept {
if vcpu.arch.read_cr0_bits(Cr0::CR0_ENABLE_PAGING).is_empty() {
hw_cr4.remove(Cr4::CR4_ENABLE_PAE);
hw_cr4.insert(Cr4::CR4_ENABLE_PSE);
} else if !cr4_flags.contains(Cr4::CR4_ENABLE_PAE) {
hw_cr4.remove(Cr4::CR4_ENABLE_PAE);
}
}
if vcpu.arch.read_cr0_bits(Cr0::CR0_ENABLE_PAGING).is_empty() {
hw_cr4.remove(
Cr4::CR4_ENABLE_SMEP | Cr4::CR4_ENABLE_SMAP | Cr4::CR4_ENABLE_PROTECTION_KEY,
);
}
}
VmxAsm::vmx_vmwrite(control::CR4_READ_SHADOW, cr4_flags.bits() as u64);
VmxAsm::vmx_vmwrite(guest::CR4, hw_cr4.bits() as u64);
if (cr4_flags ^ old_cr4).contains(Cr4::CR4_ENABLE_OS_XSAVE | Cr4::CR4_ENABLE_PROTECTION_KEY)
{
// TODO: update_cpuid_runtime
}
}
fn set_efer(&self, vcpu: &mut VirtCpu, efer: x86_64::registers::control::EferFlags) {
if vcpu.vmx().find_uret_msr(msr::IA32_EFER).is_none() {
return;
}
vcpu.arch.efer = efer;
if efer.contains(EferFlags::LONG_MODE_ACTIVE) {
vcpu.vmx().loaded_vmcs().controls_setbit(
ControlsType::VmEntry,
EntryControls::IA32E_MODE_GUEST.bits().into(),
);
} else {
vcpu.vmx().loaded_vmcs().controls_clearbit(
ControlsType::VmEntry,
EntryControls::IA32E_MODE_GUEST.bits().into(),
);
}
vmx_info().setup_uret_msrs(vcpu);
}
fn update_exception_bitmap(&self, vcpu: &mut VirtCpu) {
let mut eb = (1u32 << PF_VECTOR)
| (1 << UD_VECTOR)
| (1 << MC_VECTOR)
| (1 << DB_VECTOR)
| (1 << AC_VECTOR);
if vmx_info().enable_vmware_backdoor {
eb |= 1 << GP_VECTOR;
}
if vcpu.guest_debug & (GuestDebug::ENABLE | GuestDebug::USE_SW_BP)
== (GuestDebug::ENABLE | GuestDebug::USE_SW_BP)
{
eb |= 1 << BP_VECTOR;
}
if vcpu.vmx().rmode.vm86_active {
eb = !0;
}
if !vmx_info().vmx_need_pf_intercept(vcpu) {
eb &= !(1 << PF_VECTOR);
}
if vcpu.arch.is_guest_mode() {
todo!()
} else {
let mut mask = PageFaultErr::empty();
let mut match_code = PageFaultErr::empty();
if vmx_info().enable_ept && (eb & (1 << PF_VECTOR) != 0) {
mask = PageFaultErr::PFERR_PRESENT | PageFaultErr::PFERR_RSVD;
match_code = PageFaultErr::PFERR_PRESENT;
}
VmxAsm::vmx_vmwrite(control::PAGE_FAULT_ERR_CODE_MASK, mask.bits);
VmxAsm::vmx_vmwrite(control::PAGE_FAULT_ERR_CODE_MATCH, match_code.bits);
}
if vcpu.arch.xfd_no_write_intercept {
eb |= 1 << NM_VECTOR;
}
VmxAsm::vmx_vmwrite(control::EXCEPTION_BITMAP, eb as u64);
}
fn has_emulated_msr(&self, msr: u32) -> bool {
match msr {
msr::IA32_SMBASE => {
return vmx_info().enable_unrestricted_guest
|| vmx_info().emulate_invalid_guest_state;
}
msr::IA32_VMX_BASIC..=msr::IA32_VMX_VMFUNC => {
return vmx_info().nested;
}
0xc001011f | 0xc0000104 => {
// MSR_AMD64_VIRT_SPEC_CTRL | MSR_AMD64_TSC_RATIO
return false;
}
_ => {
return true;
}
}
}
fn get_msr_feature(&self, msr: &mut super::asm::VmxMsrEntry) -> bool {
match msr.index {
msr::IA32_VMX_BASIC..=msr::IA32_VMX_VMFUNC => {
if !vmx_info().nested {
return false;
}
match vmx_info().vmcs_config.nested.get_vmx_msr(msr.index) {
Some(data) => {
msr.data = data;
return true;
}
None => {
return false;
}
}
}
_ => {
return false;
}
}
}
fn get_rflags(&self, vcpu: &mut VirtCpu) -> x86::bits64::rflags::RFlags {
if !vcpu.arch.is_register_available(KvmReg::VcpuExregRflags) {
vcpu.arch.mark_register_available(KvmReg::VcpuExregRflags);
let mut rflags = RFlags::from_bits_truncate(VmxAsm::vmx_vmread(guest::RFLAGS));
if vcpu.vmx_mut().rmode.vm86_active {
rflags.remove(RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM);
let save_rflags = vcpu.vmx_mut().rmode.save_rflags;
rflags.insert(save_rflags & !(RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM));
}
vcpu.vmx_mut().rflags = rflags;
}
return vcpu.vmx_mut().rflags;
}
fn vcpu_precreate(&self, vm: &mut Vm) -> Result<(), SystemError> {
if vm.arch.irqchip_mode != KvmIrqChipMode::None || !vmx_info().enable_ipiv {
return Ok(());
}
let kvm_vmx = vm.kvm_vmx_mut();
if kvm_vmx.pid_table.is_some() {
return Ok(());
}
kvm_vmx.pid_table = Some(unsafe { Box::new_zeroed().assume_init() });
Ok(())
}
fn set_segment(&self, vcpu: &mut VirtCpu, var: &mut UapiKvmSegment, seg: VcpuSegment) {
vcpu.vmx_mut().emulation_required = vmx_info().emulation_required(vcpu);
*var = vmx_info()._vmx_set_segment(vcpu, *var, seg);
}
fn get_segment(
&self,
vcpu: &mut VirtCpu,
var: UapiKvmSegment,
seg: VcpuSegment,
) -> UapiKvmSegment {
return vmx_info().vmx_get_segment(vcpu, var, seg);
}
fn get_idt(&self, _vcpu: &mut VirtCpu, dt: &mut x86::dtables::DescriptorTablePointer<u8>) {
dt.limit = VmxAsm::vmx_vmread(guest::IDTR_LIMIT) as u16;
dt.base = VmxAsm::vmx_vmread(guest::IDTR_BASE) as usize as *const _;
}
fn set_idt(&self, _vcpu: &mut VirtCpu, dt: &x86::dtables::DescriptorTablePointer<u8>) {
VmxAsm::vmx_vmwrite(guest::IDTR_LIMIT, dt.limit as u64);
VmxAsm::vmx_vmwrite(guest::IDTR_BASE, dt.base as usize as u64);
}
fn get_gdt(&self, _vcpu: &mut VirtCpu, dt: &mut x86::dtables::DescriptorTablePointer<u8>) {
dt.limit = VmxAsm::vmx_vmread(guest::GDTR_LIMIT) as u16;
dt.base = VmxAsm::vmx_vmread(guest::GDTR_BASE) as usize as *const _;
}
fn set_gdt(&self, _vcpu: &mut VirtCpu, dt: &x86::dtables::DescriptorTablePointer<u8>) {
VmxAsm::vmx_vmwrite(guest::GDTR_LIMIT, dt.limit as u64);
VmxAsm::vmx_vmwrite(guest::GDTR_BASE, dt.base as usize as u64);
}
fn is_vaild_cr0(&self, vcpu: &VirtCpu, _cr0: Cr0) -> bool {
if vcpu.arch.is_guest_mode() {
todo!()
}
// TODO: 判断vmx->nested->vmxon
true
}
fn is_vaild_cr4(&self, vcpu: &VirtCpu, cr4: Cr4) -> bool {
if cr4.contains(Cr4::CR4_ENABLE_VMX) && vcpu.arch.is_smm() {
return false;
}
// TODO: 判断vmx->nested->vmxon
return true;
}
fn post_set_cr3(&self, _vcpu: &VirtCpu, _cr3: u64) {
// Do Nothing
}
fn vcpu_run(&self, vcpu: &mut VirtCpu) -> ExitFastpathCompletion {
if unlikely(vmx_info().enable_vnmi && vcpu.vmx().loaded_vmcs().soft_vnmi_blocked) {
todo!()
}
if unlikely(vcpu.vmx().emulation_required) {
todo!()
}
if vcpu.vmx().ple_window_dirty {
vcpu.vmx_mut().ple_window_dirty = false;
VmxAsm::vmx_vmwrite(control::PLE_WINDOW, vcpu.vmx().ple_window as u64);
}
if vcpu.arch.is_register_dirty(KvmReg::VcpuRegsRsp) {
VmxAsm::vmx_vmwrite(guest::RSP, vcpu.arch.regs[KvmReg::VcpuRegsRsp as usize]);
}
if vcpu.arch.is_register_dirty(KvmReg::VcpuRegsRip) {
VmxAsm::vmx_vmwrite(guest::RIP, vcpu.arch.regs[KvmReg::VcpuRegsRip as usize]);
}
vcpu.arch.clear_dirty();
let cr3: (PhysFrame, Cr3Flags) = Cr3::read();
if unlikely(cr3 != vcpu.vmx().loaded_vmcs().host_state.cr3) {
let cr3_combined: u64 =
(cr3.0.start_address().as_u64() & 0xFFFF_FFFF_FFFF_F000) | (cr3.1.bits() & 0xFFF);
VmxAsm::vmx_vmwrite(host::CR3, cr3_combined);
vcpu.vmx().loaded_vmcs().host_state.cr3 = cr3;
}
let cr4 = unsafe { cr4() };
if unlikely(cr4 != vcpu.vmx().loaded_vmcs().host_state.cr4) {
VmxAsm::vmx_vmwrite(host::CR4, cr4.bits() as u64);
vcpu.vmx().loaded_vmcs().host_state.cr4 = cr4;
}
// TODO: set_debugreg
if vcpu.guest_debug.contains(GuestDebug::SINGLESTEP) {
todo!()
}
vcpu.load_guest_xsave_state();
// TODO: pt_guest_enter
// TODO: atomic_switch_perf_msrs
if vmx_info().enable_preemption_timer {
// todo!()
warn!("vmx_update_hv_timer TODO");
}
Vmx::vmx_vcpu_enter_exit(vcpu, vcpu.vmx().vmx_vcpu_run_flags());
unsafe {
load_ds(USER_DS);
load_es(USER_DS);
};
// TODO: pt_guest_exit
// TODO: kvm_load_host_xsave_state
if vcpu.arch.is_guest_mode() {
todo!()
}
if unlikely(vcpu.vmx().fail != 0) {
return ExitFastpathCompletion::None;
}
if unlikely(
vcpu.vmx().exit_reason.basic()
== VmxExitReasonBasic::VM_ENTRY_FAILURE_MACHINE_CHECK_EVENT as u16,
) {
todo!()
}
if unlikely(vcpu.vmx().exit_reason.failed_vmentry()) {
return ExitFastpathCompletion::None;
}
vcpu.vmx().loaded_vmcs().launched = true;
// TODO: 处理中断
if vcpu.arch.is_guest_mode() {
return ExitFastpathCompletion::None;
}
return Vmx::vmx_exit_handlers_fastpath(vcpu);
}
fn prepare_switch_to_guest(&self, vcpu: &mut VirtCpu) {
// let cpu = smp_get_processor_id();
let vmx = vcpu.vmx_mut();
vmx.req_immediate_exit = false;
if !vmx.guest_uret_msrs_loaded {
vmx.guest_uret_msrs_loaded = true;
for (idx, msr) in vmx.guest_uret_msrs.iter().enumerate() {
if msr.load_into_hardware {
x86_kvm_manager().kvm_set_user_return_msr(idx, msr.data, msr.mask);
}
}
}
// TODO: nested
if vmx.guest_state_loaded {
return;
}
// fixme: 这里读的是当前cpu的gsbase正确安全做法应该为将gsbase设置为percpu变量
let gs_base = unsafe { rdmsr(msr::IA32_KERNEL_GSBASE) };
let current = ProcessManager::current_pcb();
let mut pcb_arch = current.arch_info_irqsave();
let fs_sel = fs().bits();
let gs_sel = gs().bits();
unsafe {
pcb_arch.save_fsbase();
pcb_arch.save_gsbase();
}
let fs_base = pcb_arch.fsbase();
vmx.msr_host_kernel_gs_base = pcb_arch.gsbase() as u64;
unsafe { wrmsr(msr::IA32_KERNEL_GSBASE, vmx.msr_guest_kernel_gs_base) };
let mut loaded_vmcs = vmx.loaded_vmcs();
let host_state = &mut loaded_vmcs.host_state;
host_state.ldt_sel = unsafe { ldtr() }.bits();
host_state.ds_sel = ds().bits();
host_state.es_sel = es().bits();
host_state.set_host_fsgs(fs_sel, gs_sel, fs_base, gs_base as usize);
drop(loaded_vmcs);
vmx.guest_state_loaded = true;
}
fn flush_tlb_all(&self, vcpu: &mut VirtCpu) {
if vmx_info().enable_ept {
VmxAsm::ept_sync_global();
} else if vmx_info().has_invvpid_global() {
VmxAsm::sync_vcpu_global();
} else {
VmxAsm::sync_vcpu_single(vcpu.vmx().vpid);
// TODO: 嵌套VmxAsm::sync_vcpu_single(vcpu.vmx().nested.vpid02);
}
}
fn handle_exit_irqoff(&self, vcpu: &mut VirtCpu) {
if vcpu.vmx().emulation_required {
return;
}
let basic = VmxExitReasonBasic::from(vcpu.vmx().exit_reason.basic());
if basic == VmxExitReasonBasic::EXTERNAL_INTERRUPT {
Vmx::handle_external_interrupt_irqoff(vcpu);
} else if basic == VmxExitReasonBasic::EXCEPTION_OR_NMI {
//todo!()
}
}
fn handle_exit(
//vmx_handle_exit
&self,
vcpu: &mut VirtCpu,
vm: &Vm,
fastpath: ExitFastpathCompletion,
) -> Result<i32, SystemError> {
let r = vmx_info().vmx_handle_exit(vcpu, vm, fastpath);
if vcpu.vmx().exit_reason.bus_lock_detected() {
todo!()
}
r
}
fn load_mmu_pgd(&self, vcpu: &mut VirtCpu, _vm: &Vm, root_hpa: u64, root_level: u32) {
let guest_cr3;
let eptp;
if vmx_info().enable_ept {
eptp = vmx_info().construct_eptp(vcpu, root_hpa, root_level);
VmxAsm::vmx_vmwrite(control::EPTP_FULL, eptp);
if !vmx_info().enable_unrestricted_guest
&& !vcpu.arch.cr0.contains(Cr0::CR0_ENABLE_PAGING)
{
todo!()
} else if vcpu.arch.is_register_dirty(KvmReg::VcpuExregCr3) {
guest_cr3 = vcpu.arch.cr3;
debug!("load_mmu_pgd: guest_cr3 = {:#x}", guest_cr3);
} else {
return;
}
} else {
todo!();
}
vcpu.load_pdptrs();
VmxAsm::vmx_vmwrite(guest::CR3, guest_cr3);
}
}
static mut VMX: Option<Vmx> = None;
#[inline]
pub fn vmx_info() -> &'static Vmx {
unsafe { VMX.as_ref().unwrap() }
}
#[inline]
pub fn vmx_info_mut() -> &'static mut Vmx {
unsafe { VMX.as_mut().unwrap() }
}
#[inline(never)]
pub fn init_vmx(vmx: Box<Vmx>) {
static INIT_ONCE: AtomicBool = AtomicBool::new(false);
if INIT_ONCE
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
.is_ok()
{
unsafe { VMX = Some(*vmx) };
} else {
panic!("init_vmx can only be called once");
}
}
#[derive(Debug)]
pub struct Vmx {
pub host_idt_base: u64,
pub vmcs_config: VmcsConfig,
pub vmx_cap: VmxCapability,
pub vpid_bitmap: SpinLock<AllocBitmap>,
pub enable_vpid: bool,
pub enable_ept: bool,
pub enable_ept_ad: bool,
pub enable_unrestricted_guest: bool,
pub emulate_invalid_guest_state: bool,
pub enable_flexpriority: bool,
pub enable_vnmi: bool,
pub enable_sgx: bool,
pub enable_apicv: bool,
pub enable_ipiv: bool,
pub enable_pml: bool,
pub enable_preemption_timer: bool,
pub enable_vmware_backdoor: bool,
pub nested: bool,
pub ple_gap: u32,
pub ple_window: u32,
pub ple_window_grow: u32,
pub ple_window_max: u32,
pub ple_window_shrink: u32,
pub pt_mode: ProcessorTraceMode,
}
impl Vmx {
fn init(&mut self) {
let mut bitmap = AllocBitmap::new(1 << 16);
// 0为vpid的非法值
bitmap.set(0, true);
self.host_idt_base = Default::default();
self.vmcs_config = Default::default();
self.vmx_cap = Default::default();
self.vpid_bitmap = SpinLock::new(bitmap);
self.enable_vpid = true;
self.enable_ept = true;
self.enable_ept_ad = true;
self.enable_unrestricted_guest = true;
self.enable_flexpriority = true;
self.enable_vnmi = true;
self.enable_sgx = true;
self.ple_gap = 128;
self.ple_window = 4096;
self.ple_window_grow = 2;
self.ple_window_max = u32::MAX;
self.ple_window_shrink = 0;
self.enable_apicv = true;
self.enable_ipiv = true;
self.enable_pml = true;
self.enable_preemption_timer = true;
self.pt_mode = ProcessorTraceMode::System;
self.emulate_invalid_guest_state = true;
// 目前先不管嵌套虚拟化,后续再实现
self.nested = false;
self.enable_vmware_backdoor = false;
}
/*
* Internal error codes that are used to indicate that MSR emulation encountered
* an error that should result in #GP in the guest, unless userspace
* handles it.
*/
#[allow(dead_code)]
pub const KVM_MSR_RET_INVALID: u32 = 2; /* in-kernel MSR emulation #GP condition */
#[allow(dead_code)]
pub const KVM_MSR_RET_FILTERED: u32 = 3; /* #GP due to userspace MSR filter */
pub const MAX_POSSIBLE_PASSTHROUGH_MSRS: usize = 16;
pub const VMX_POSSIBLE_PASSTHROUGH_MSRS: [u32; Self::MAX_POSSIBLE_PASSTHROUGH_MSRS] = [
0x48, // MSR_IA32_SPEC_CTRL
0x49, // MSR_IA32_PRED_CMD
0x10b, // MSR_IA32_FLUSH_CMD
msr::IA32_TIME_STAMP_COUNTER,
msr::IA32_FS_BASE,
msr::IA32_GS_BASE,
msr::IA32_KERNEL_GSBASE,
0x1c4, // MSR_IA32_XFD
0x1c5, // MSR_IA32_XFD_ERR
msr::IA32_SYSENTER_CS,
msr::IA32_SYSENTER_ESP,
msr::IA32_SYSENTER_EIP,
msr::MSR_CORE_C1_RESIDENCY,
msr::MSR_CORE_C3_RESIDENCY,
msr::MSR_CORE_C6_RESIDENCY,
msr::MSR_CORE_C7_RESIDENCY,
];
/// ### 查看CPU是否支持虚拟化
#[allow(dead_code)]
pub fn check_vmx_support() -> bool {
let cpuid = CpuId::new();
// Check to see if CPU is Intel (“GenuineIntel”).
if let Some(vi) = cpuid.get_vendor_info() {
if vi.as_str() != "GenuineIntel" {
return false;
}
}
// Check processor supports for Virtual Machine Extension (VMX) technology
// CPUID.1:ECX.VMX[bit 5] = 1 (Intel Manual: 24.6 Discovering Support for VMX)
if let Some(fi) = cpuid.get_feature_info() {
if !fi.has_vmx() {
return false;
}
}
return true;
}
#[inline(never)]
pub fn set_up_user_return_msrs() {
const VMX_URET_MSRS_LIST: &[u32] = &[
msr::IA32_FMASK,
msr::IA32_LSTAR,
msr::IA32_CSTAR,
msr::IA32_EFER,
msr::IA32_TSC_AUX,
msr::IA32_STAR,
// 这个寄存器会出错<,先注释掉
// MSR_IA32_TSX_CTRL,
];
let manager = x86_kvm_manager_mut();
for msr in VMX_URET_MSRS_LIST {
manager.add_user_return_msr(*msr);
}
}
/// 初始化设置vmcs的config
#[inline(never)]
pub fn setup_vmcs_config(
vmcs_config: &mut VmcsConfig,
vmx_cap: &mut VmxCapability,
) -> Result<(), SystemError> {
const VMCS_ENTRY_EXIT_PAIRS: &[VmcsEntryExitPair] = &[
VmcsEntryExitPair::new(
EntryControls::LOAD_IA32_PERF_GLOBAL_CTRL,
ExitControls::LOAD_IA32_PERF_GLOBAL_CTRL,
),
VmcsEntryExitPair::new(EntryControls::LOAD_IA32_PAT, ExitControls::LOAD_IA32_PAT),
VmcsEntryExitPair::new(EntryControls::LOAD_IA32_EFER, ExitControls::LOAD_IA32_EFER),
VmcsEntryExitPair::new(
EntryControls::LOAD_IA32_BNDCFGS,
ExitControls::CLEAR_IA32_BNDCFGS,
),
VmcsEntryExitPair::new(
EntryControls::LOAD_IA32_RTIT_CTL,
ExitControls::CLEAR_IA32_RTIT_CTL,
),
];
let mut cpu_based_exec_control = VmxFeat::adjust_primary_controls()?;
let mut cpu_based_2nd_exec_control =
if cpu_based_exec_control.contains(PrimaryControls::SECONDARY_CONTROLS) {
VmxFeat::adjust_secondary_controls()?
} else {
SecondaryControls::empty()
};
if cpu_based_2nd_exec_control.contains(SecondaryControls::VIRTUALIZE_APIC) {
cpu_based_exec_control.remove(PrimaryControls::USE_TPR_SHADOW)
}
if !cpu_based_exec_control.contains(PrimaryControls::USE_TPR_SHADOW) {
cpu_based_2nd_exec_control.remove(
SecondaryControls::VIRTUALIZE_APIC_REGISTER
| SecondaryControls::VIRTUALIZE_X2APIC
| SecondaryControls::VIRTUAL_INTERRUPT_DELIVERY,
)
}
let cap = unsafe { rdmsr(msr::IA32_VMX_EPT_VPID_CAP) };
vmx_cap.set_val_from_msr_val(cap);
// 不支持ept但是读取到了值
if !cpu_based_2nd_exec_control.contains(SecondaryControls::ENABLE_EPT)
&& !vmx_cap.ept.is_empty()
{
warn!("EPT CAP should not exist if not support. 1-setting enable EPT VM-execution control");
return Err(SystemError::EIO);
}
if !cpu_based_2nd_exec_control.contains(SecondaryControls::ENABLE_VPID)
&& !vmx_cap.vpid.is_empty()
{
warn!("VPID CAP should not exist if not support. 1-setting enable VPID VM-execution control");
return Err(SystemError::EIO);
}
let cpuid = CpuId::new();
let cpu_extend_feat = cpuid
.get_extended_feature_info()
.ok_or(SystemError::ENOSYS)?;
if !cpu_extend_feat.has_sgx() {
cpu_based_2nd_exec_control.remove(SecondaryControls::ENCLS_EXITING);
}
let cpu_based_3rd_exec_control = 0;
// if cpu_based_exec_control.contains(SecondaryControls::TERTIARY_CONTROLS) {
// // Self::adjust_vmx_controls64(VmxFeature::IPI_VIRT, IA32_CTLS3)
// todo!()
// } else {
// 0
// };
let vmxexit_control = VmxFeat::adjust_exit_controls()?;
let pin_based_exec_control = VmxFeat::adjust_pin_based_controls()?;
// TODO: broken timer?
// https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kvm/vmx/vmx.c#2676
let vmentry_control = VmxFeat::adjust_entry_controls()?;
for pair in VMCS_ENTRY_EXIT_PAIRS {
let n_ctrl = pair.entry;
let x_ctrl = pair.exit;
// if !(vmentry_control.bits() & n_ctrl.bits) == !(vmxexit_control.bits() & x_ctrl.bits) {
// continue;
// }
if (vmentry_control.contains(n_ctrl)) == (vmxexit_control.contains(x_ctrl)) {
continue;
}
warn!(
"Inconsistent VM-Entry/VM-Exit pair, entry = {:?}, exit = {:?}",
vmentry_control & n_ctrl,
vmxexit_control & x_ctrl,
);
return Err(SystemError::EIO);
}
let basic = unsafe { rdmsr(msr::IA32_VMX_BASIC) };
let vmx_msr_high = (basic >> 32) as u32;
let vmx_msr_low = basic as u32;
// 64位cpuVMX_BASIC[48] == 0
if vmx_msr_high & (1 << 16) != 0 {
return Err(SystemError::EIO);
}
// 判断是否为写回(WB)
if (vmx_msr_high >> 18) & 15 != 6 {
return Err(SystemError::EIO);
}
let misc_msr = unsafe { rdmsr(msr::IA32_VMX_MISC) };
vmcs_config.size = vmx_msr_high & 0x1fff;
vmcs_config.basic_cap = vmx_msr_high & !0x1fff;
vmcs_config.revision_id = vmx_msr_low;
vmcs_config.pin_based_exec_ctrl = pin_based_exec_control;
vmcs_config.cpu_based_exec_ctrl = cpu_based_exec_control;
vmcs_config.cpu_based_2nd_exec_ctrl = cpu_based_2nd_exec_control;
vmcs_config.cpu_based_3rd_exec_ctrl = cpu_based_3rd_exec_control;
vmcs_config.vmentry_ctrl = vmentry_control;
vmcs_config.vmexit_ctrl = vmxexit_control;
vmcs_config.misc = misc_msr;
Ok(())
}
fn adjust_vmx_controls(ctl_min: u32, ctl_opt: u32, msr: u32) -> Result<u32, SystemError> {
let mut ctl = ctl_min | ctl_opt;
let val = unsafe { rdmsr(msr) };
let low = val as u32;
let high = (val >> 32) as u32;
ctl &= high;
ctl |= low;
if ctl_min & !ctl != 0 {
return Err(SystemError::EIO);
}
return Ok(ctl);
}
#[allow(dead_code)]
fn adjust_vmx_controls64(ctl_opt: u32, msr: u32) -> u32 {
let allow = unsafe { rdmsr(msr) } as u32;
ctl_opt & allow
}
pub fn alloc_vpid(&self) -> Option<usize> {
if !self.enable_vpid {
return None;
}
let mut bitmap_guard = self.vpid_bitmap.lock();
let idx = bitmap_guard.first_false_index();
if let Some(idx) = idx {
bitmap_guard.set(idx, true);
}
return idx;
}
#[allow(dead_code)]
pub fn free_vpid(&self, vpid: Option<usize>) {
if !self.enable_vpid || vpid.is_none() {
return;
}
self.vpid_bitmap.lock().set(vpid.unwrap(), false);
}
pub fn is_valid_passthrough_msr(msr: u32) -> bool {
match msr {
0x800..0x8ff => {
// x2Apic msr寄存器
return true;
}
msr::MSR_IA32_RTIT_STATUS
| msr::MSR_IA32_RTIT_OUTPUT_BASE
| msr::MSR_IA32_RTIT_OUTPUT_MASK_PTRS
| msr::MSR_IA32_CR3_MATCH
| msr::MSR_LBR_SELECT
| msr::MSR_LASTBRANCH_TOS => {
return true;
}
msr::MSR_IA32_ADDR0_START..msr::MSR_IA32_ADDR3_END => {
return true;
}
0xdc0..0xddf => {
// MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31
return true;
}
0x680..0x69f => {
// MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31
return true;
}
0x6c0..0x6df => {
// MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31
return true;
}
0x40..0x48 => {
// MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8
return true;
}
0x60..0x68 => {
// MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8
return true;
}
_ => {
return Self::possible_passthrough_msr_slot(msr).is_some();
}
}
}
pub fn vpid_sync_context(&self, vpid: u16) {
if self.has_invvpid_single() {
VmxAsm::sync_vcpu_single(vpid);
} else if vpid != 0 {
VmxAsm::sync_vcpu_global();
}
}
pub fn possible_passthrough_msr_slot(msr: u32) -> Option<usize> {
for (idx, val) in Self::VMX_POSSIBLE_PASSTHROUGH_MSRS.iter().enumerate() {
if *val == msr {
return Some(idx);
}
}
return None;
}
pub fn tdp_enabled(&self) -> bool {
self.enable_ept
}
fn setup_l1d_flush(&self) {
// TODO:先这样写
*L1TF_VMX_MITIGATION.write() = VmxL1dFlushState::NotRequired;
}
pub fn construct_eptp(&self, vcpu: &mut VirtCpu, root_hpa: u64, root_level: u32) -> u64 {
let mut eptp = VMX_EPTP_MT_WB;
eptp |= if root_level == 5 {
VMX_EPTP_PWL_5
} else {
VMX_EPTP_PWL_4
};
if self.enable_ept_ad && !vcpu.arch.is_guest_mode() {
eptp |= VMX_EPTP_AD_ENABLE_BIT;
}
eptp |= root_hpa;
return eptp;
}
fn vmx_reset_vcpu(&mut self, vcpu: &mut VirtCpu, vm: &Vm) {
self.init_vmcs(vcpu, vm);
if self.nested {
todo!()
}
// TODO: vcpu_setup_sgx_lepubkeyhash
// TODO: nested
vcpu.arch.microcode_version = 0x100000000;
let vmx = vcpu.vmx_mut();
vmx.msr_ia32_feature_control_valid_bits = 1 << 0;
vmx.post_intr_desc.control.set_nv(0xf2);
vmx.post_intr_desc.control.set_sn(true);
}
fn init_vmcs(&mut self, vcpu: &mut VirtCpu, vm: &Vm) {
let kvm_vmx = vm.kvm_vmx();
if vmx_info().nested {
todo!()
}
if vmx_info().has_msr_bitmap() {
debug!(
"msr_bitmap addr 0x{:x}",
vcpu.vmx().vmcs01.lock().msr_bitmap.phys_addr() as u64
);
VmxAsm::vmx_vmwrite(
control::MSR_BITMAPS_ADDR_FULL,
vcpu.vmx().vmcs01.lock().msr_bitmap.phys_addr() as u64,
)
}
VmxAsm::vmx_vmwrite(guest::LINK_PTR_FULL, u64::MAX);
let mut loaded_vmcs = vcpu.vmx().loaded_vmcs.lock();
loaded_vmcs.controls_set(
ControlsType::Pin,
self.get_pin_based_exec_controls(vcpu).bits() as u64,
);
loaded_vmcs.controls_set(
ControlsType::Exec,
self.get_exec_controls(vcpu, &vm.arch).bits() as u64,
);
if self.has_sceondary_exec_ctrls() {
loaded_vmcs.controls_set(
ControlsType::SecondaryExec,
self.get_secondary_exec_controls(vcpu, vm).bits() as u64,
)
}
if self.has_tertiary_exec_ctrls() {
todo!()
}
drop(loaded_vmcs);
if self.enable_apicv && vcpu.arch.lapic_in_kernel() {
VmxAsm::vmx_vmwrite(control::EOI_EXIT0_FULL, 0);
VmxAsm::vmx_vmwrite(control::EOI_EXIT1_FULL, 0);
VmxAsm::vmx_vmwrite(control::EOI_EXIT2_FULL, 0);
VmxAsm::vmx_vmwrite(control::EOI_EXIT3_FULL, 0);
VmxAsm::vmx_vmwrite(guest::INTERRUPT_STATUS, 0);
VmxAsm::vmx_vmwrite(control::POSTED_INTERRUPT_NOTIFICATION_VECTOR, 0xf2);
VmxAsm::vmx_vmwrite(control::POSTED_INTERRUPT_DESC_ADDR_FULL, unsafe {
MMArch::virt_2_phys(VirtAddr::new(
&vcpu.vmx().post_intr_desc as *const _ as usize,
))
.unwrap()
.data() as u64
})
}
if self.enable_apicv && vcpu.arch.lapic_in_kernel() {
// PID_POINTER_TABLE
VmxAsm::vmx_vmwrite(0x2042, unsafe {
MMArch::virt_2_phys(VirtAddr::new(kvm_vmx.pid_table().as_ptr() as usize))
.unwrap()
.data() as u64
});
// LAST_PID_POINTER_INDEX
VmxAsm::vmx_vmwrite(0x08, vm.arch.max_vcpu_ids as u64 - 1);
}
if !vm.arch.pause_in_guest {
VmxAsm::vmx_vmwrite(control::PLE_GAP, self.ple_gap as u64);
vcpu.vmx_mut().ple_window = self.ple_window;
vcpu.vmx_mut().ple_window_dirty = true;
}
if vm
.arch
.notify_vmexit_flags
.contains(NotifyVmExitFlags::KVM_X86_NOTIFY_VMEXIT_ENABLED)
{
// NOTIFY_WINDOW
VmxAsm::vmx_vmwrite(0x4024, vm.arch.notify_window as u64);
}
VmxAsm::vmx_vmwrite(control::PAGE_FAULT_ERR_CODE_MASK, 0);
VmxAsm::vmx_vmwrite(control::PAGE_FAULT_ERR_CODE_MATCH, 0);
VmxAsm::vmx_vmwrite(control::CR3_TARGET_COUNT, 0);
VmxAsm::vmx_vmwrite(host::FS_SELECTOR, 0);
VmxAsm::vmx_vmwrite(host::GS_SELECTOR, 0);
self.set_constant_host_state(vcpu);
VmxAsm::vmx_vmwrite(host::FS_BASE, 0);
VmxAsm::vmx_vmwrite(host::GS_BASE, 0);
if self.has_vmfunc() {
VmxAsm::vmx_vmwrite(control::VM_FUNCTION_CONTROLS_FULL, 0);
}
VmxAsm::vmx_vmwrite(control::VMEXIT_MSR_STORE_COUNT, 0);
VmxAsm::vmx_vmwrite(control::VMEXIT_MSR_LOAD_COUNT, 0);
VmxAsm::vmx_vmwrite(control::VMEXIT_MSR_LOAD_ADDR_FULL, unsafe {
MMArch::virt_2_phys(VirtAddr::new(
vcpu.vmx().msr_autoload.host.val.as_ptr() as *const _ as usize,
))
.unwrap()
.data() as u64
});
VmxAsm::vmx_vmwrite(control::VMENTRY_MSR_LOAD_COUNT, 0);
VmxAsm::vmx_vmwrite(control::VMENTRY_MSR_LOAD_ADDR_FULL, unsafe {
MMArch::virt_2_phys(VirtAddr::new(
vcpu.vmx().msr_autoload.guest.val.as_ptr() as usize
))
.unwrap()
.data() as u64
});
if self
.vmcs_config
.vmentry_ctrl
.contains(EntryControls::LOAD_IA32_PAT)
{
VmxAsm::vmx_vmwrite(guest::IA32_PAT_FULL, vcpu.arch.pat) //todo
}
let mut loaded_vmcs = vcpu.vmx().loaded_vmcs.lock();
loaded_vmcs.controls_set(
ControlsType::VmExit,
self.get_vmexit_controls().bits() as u64,
);
loaded_vmcs.controls_set(
ControlsType::VmEntry,
self.get_vmentry_controls().bits() as u64,
);
drop(loaded_vmcs);
vcpu.arch.cr0_guest_owned_bits = self.l1_guest_owned_cr0_bits();
VmxAsm::vmx_vmwrite(
control::CR0_GUEST_HOST_MASK,
(!vcpu.arch.cr0_guest_owned_bits).bits() as u64,
);
self.set_cr4_guest_host_mask(&mut vcpu.arch);
if vcpu.vmx().vpid != 0 {
VmxAsm::vmx_vmwrite(control::VPID, vcpu.vmx().vpid as u64);
}
if self.has_xsaves() {
VmxAsm::vmx_vmwrite(control::XSS_EXITING_BITMAP_FULL, 0);
}
if self.enable_pml {
VmxAsm::vmx_vmwrite(control::PML_ADDR_FULL, unsafe {
MMArch::virt_2_phys(VirtAddr::new(vcpu.vmx().pml_pg.as_ref().as_ptr() as usize))
.unwrap()
.data() as u64
});
VmxAsm::vmx_vmwrite(guest::PML_INDEX, VmxVCpuPriv::PML_ENTITY_NUM as u64 - 1);
}
// TODO: vmx_write_encls_bitmap
if self.pt_mode == ProcessorTraceMode::HostGuest {
todo!()
}
VmxAsm::vmx_vmwrite(guest::IA32_SYSENTER_CS, 0);
VmxAsm::vmx_vmwrite(guest::IA32_SYSENTER_ESP, 0);
VmxAsm::vmx_vmwrite(guest::IA32_SYSENTER_EIP, 0);
VmxAsm::vmx_vmwrite(guest::IA32_DEBUGCTL_FULL, 0);
if self.has_tpr_shadow() {
VmxAsm::vmx_vmwrite(control::VIRT_APIC_ADDR_FULL, 0);
if vcpu.arch.lapic_in_kernel() {
VmxAsm::vmx_vmwrite(control::VIRT_APIC_ADDR_FULL, unsafe {
MMArch::virt_2_phys(VirtAddr::new(vcpu.arch.lapic().regs.as_ptr() as usize))
.unwrap()
.data() as u64
});
}
VmxAsm::vmx_vmwrite(control::TPR_THRESHOLD, 0);
}
self.setup_uret_msrs(vcpu);
}
/// 打印VMCS信息用于debug
pub fn dump_vmcs(&self, vcpu: &VirtCpu) {
let vmentry_ctl = unsafe {
EntryControls::from_bits_unchecked(self.vmread(control::VMENTRY_CONTROLS) as u32)
};
let vmexit_ctl = unsafe {
ExitControls::from_bits_unchecked(self.vmread(control::VMEXIT_CONTROLS) as u32)
};
let cpu_based_exec_ctl = PrimaryControls::from_bits_truncate(
self.vmread(control::PRIMARY_PROCBASED_EXEC_CONTROLS) as u32,
);
let pin_based_exec_ctl = PinbasedControls::from_bits_truncate(
self.vmread(control::PINBASED_EXEC_CONTROLS) as u32,
);
// let cr4 = Cr4::from_bits_truncate(self.vmread(guest::CR4) as usize);
let secondary_exec_control = if self.has_sceondary_exec_ctrls() {
unsafe {
SecondaryControls::from_bits_unchecked(
self.vmread(control::SECONDARY_PROCBASED_EXEC_CONTROLS) as u32,
)
}
} else {
SecondaryControls::empty()
};
if self.has_tertiary_exec_ctrls() {
todo!()
}
error!(
"VMCS addr: 0x{:x}, last attempted VM-entry on CPU {:?}",
vcpu.vmx().loaded_vmcs().vmcs.lock().as_ref() as *const _ as usize,
vcpu.arch.last_vmentry_cpu
);
error!("--- GUEST STATE ---");
error!(
"CR0: actual = 0x{:x}, shadow = 0x{:x}, gh_mask = 0x{:x}",
self.vmread(guest::CR0),
self.vmread(control::CR0_READ_SHADOW),
self.vmread(control::CR0_GUEST_HOST_MASK)
);
error!(
"CR4: actual = 0x{:x}, shadow = 0x{:x}, gh_mask = 0x{:x}",
self.vmread(guest::CR4),
self.vmread(control::CR4_READ_SHADOW),
self.vmread(control::CR4_GUEST_HOST_MASK)
);
error!("CR3: actual = 0x{:x}", self.vmread(guest::CR3));
if self.has_ept() {
error!(
"PDPTR0 = 0x{:x}, PDPTR1 = 0x{:x}",
self.vmread(guest::PDPTE0_FULL),
self.vmread(guest::PDPTE1_FULL)
);
error!(
"PDPTR2 = 0x{:x}, PDPTR3 = 0x{:x}",
self.vmread(guest::PDPTE2_FULL),
self.vmread(guest::PDPTE3_FULL)
);
}
error!(
"RSP = 0x{:x}, RIP = 0x{:x}",
self.vmread(guest::RSP),
self.vmread(guest::RIP)
);
error!(
"RFLAGS = 0x{:x}, DR7 = 0x{:x}",
self.vmread(guest::RFLAGS),
self.vmread(guest::DR7)
);
error!(
"Sysenter RSP = 0x{:x}, CS:RIP = 0x{:x}:0x{:x}",
self.vmread(guest::IA32_SYSENTER_ESP),
self.vmread(guest::IA32_SYSENTER_CS),
self.vmread(guest::IA32_SYSENTER_EIP),
);
self.dump_sel("CS: ", guest::CS_SELECTOR);
self.dump_sel("DS: ", guest::DS_SELECTOR);
self.dump_sel("SS: ", guest::SS_SELECTOR);
self.dump_sel("ES: ", guest::ES_SELECTOR);
self.dump_sel("FS: ", guest::FS_SELECTOR);
self.dump_sel("GS: ", guest::GS_SELECTOR);
self.dump_dtsel("GDTR: ", guest::GDTR_LIMIT);
self.dump_sel("LDTR: ", guest::LDTR_SELECTOR);
self.dump_dtsel("IDTR: ", guest::IDTR_LIMIT);
self.dump_sel("TR: ", guest::TR_SELECTOR);
let efer_slot = vcpu
.vmx()
.msr_autoload
.guest
.find_loadstore_msr_slot(msr::IA32_EFER);
if vmentry_ctl.contains(EntryControls::LOAD_IA32_EFER) {
error!("EFER = 0x{:x}", self.vmread(guest::IA32_EFER_FULL));
} else if let Some(slot) = efer_slot {
error!(
"EFER = 0x{:x} (autoload)",
vcpu.vmx().msr_autoload.guest.val[slot].data
);
} else if vmentry_ctl.contains(EntryControls::IA32E_MODE_GUEST) {
error!(
"EFER = 0x{:x} (effective)",
vcpu.arch.efer | (EferFlags::LONG_MODE_ACTIVE | EferFlags::LONG_MODE_ENABLE)
);
} else {
error!(
"EFER = 0x{:x} (effective)",
vcpu.arch.efer & !(EferFlags::LONG_MODE_ACTIVE | EferFlags::LONG_MODE_ENABLE)
);
}
if vmentry_ctl.contains(EntryControls::LOAD_IA32_PAT) {
error!("PAT = 0x{:x}", self.vmread(guest::IA32_PAT_FULL));
}
error!(
"DebugCtl = 0x{:x}, DebugExceptions = 0x{:x}",
self.vmread(guest::IA32_DEBUGCTL_FULL),
self.vmread(guest::PENDING_DBG_EXCEPTIONS)
);
if self.has_load_perf_global_ctrl()
&& vmentry_ctl.contains(EntryControls::LOAD_IA32_PERF_GLOBAL_CTRL)
{
error!(
"PerfGlobCtl = 0x{:x}",
self.vmread(guest::IA32_PERF_GLOBAL_CTRL_FULL)
);
}
if vmentry_ctl.contains(EntryControls::LOAD_IA32_BNDCFGS) {
error!("BndCfgS = 0x{:x}", self.vmread(guest::IA32_BNDCFGS_FULL));
}
error!(
"Interruptibility = 0x{:x}, ActivityState = 0x{:x}",
self.vmread(guest::INTERRUPT_STATUS),
self.vmread(guest::ACTIVITY_STATE)
);
if secondary_exec_control.contains(SecondaryControls::VIRTUAL_INTERRUPT_DELIVERY) {
error!(
"InterruptStatus = 0x{:x}",
self.vmread(guest::INTERRUPT_STATUS)
);
}
if self.vmread(control::VMENTRY_MSR_LOAD_COUNT) > 0 {
self.dump_msrs("guest autoload", &vcpu.vmx().msr_autoload.guest);
}
if self.vmread(control::VMEXIT_MSR_LOAD_COUNT) > 0 {
self.dump_msrs("guest autostore", &vcpu.vmx().msr_autostore);
}
error!("\n--- HOST STATE ---");
error!(
"RIP = 0x{:x}, RSP = 0x{:x}",
self.vmread(host::RIP),
self.vmread(host::RSP)
);
error!(
"CS = 0x{:x}, SS = 0x{:x}, DS = 0x{:x}, ES = 0x{:x}, FS = 0x{:x}, GS = 0x{:x}, TR = 0x{:x}",
self.vmread(host::CS_SELECTOR),
self.vmread(host::SS_SELECTOR),
self.vmread(host::DS_SELECTOR),
self.vmread(host::ES_SELECTOR),
self.vmread(host::FS_SELECTOR),
self.vmread(host::GS_SELECTOR),
self.vmread(host::TR_SELECTOR)
);
error!(
"FSBase = 0x{:x}, GSBase = 0x{:x}, TRBase = 0x{:x}",
self.vmread(host::FS_BASE),
self.vmread(host::GS_BASE),
self.vmread(host::TR_BASE),
);
error!(
"GDTBase = 0x{:x}, IDTBase = 0x{:x}",
self.vmread(host::GDTR_BASE),
self.vmread(host::IDTR_BASE),
);
error!(
"CR0 = 0x{:x}, CR3 = 0x{:x}, CR4 = 0x{:x}",
self.vmread(host::CR0),
self.vmread(host::CR3),
self.vmread(host::CR4),
);
error!(
"Sysenter RSP = 0x{:x}, CS:RIP=0x{:x}:0x{:x}",
self.vmread(host::IA32_SYSENTER_ESP),
self.vmread(host::IA32_SYSENTER_CS),
self.vmread(host::IA32_SYSENTER_EIP),
);
if vmexit_ctl.contains(ExitControls::LOAD_IA32_EFER) {
error!("EFER = 0x{:x}", self.vmread(host::IA32_EFER_FULL));
}
if vmexit_ctl.contains(ExitControls::LOAD_IA32_PAT) {
error!("PAT = 0x{:x}", self.vmread(host::IA32_PAT_FULL));
}
if self.has_load_perf_global_ctrl()
&& vmexit_ctl.contains(ExitControls::LOAD_IA32_PERF_GLOBAL_CTRL)
{
error!(
"PerfGlobCtl = 0x{:x}",
self.vmread(host::IA32_PERF_GLOBAL_CTRL_FULL)
);
}
if self.vmread(control::VMEXIT_MSR_LOAD_COUNT) > 0 {
self.dump_msrs("host autoload", &vcpu.vmx().msr_autoload.host);
}
error!("\n--- CONTROL STATE ---");
error!(
"\nCPUBased = {:?},\nSecondaryExec = 0x{:x},\nTertiaryExec = 0(Unused)",
cpu_based_exec_ctl, secondary_exec_control,
);
error!(
"\nPinBased = {:?},\nEntryControls = {:?},\nExitControls = {:?}",
pin_based_exec_ctl, vmentry_ctl, vmexit_ctl,
);
error!(
"ExceptionBitmap = 0x{:x}, PFECmask = 0x{:x}, PFECmatch = 0x{:x}",
self.vmread(control::EXCEPTION_BITMAP),
self.vmread(control::PAGE_FAULT_ERR_CODE_MASK),
self.vmread(control::PAGE_FAULT_ERR_CODE_MATCH),
);
error!(
"VMEntry: intr_info = 0x{:x}, errcode = 0x{:x}, ilen = 0x{:x}",
self.vmread(control::VMENTRY_INTERRUPTION_INFO_FIELD),
self.vmread(control::VMENTRY_EXCEPTION_ERR_CODE),
self.vmread(control::VMENTRY_INSTRUCTION_LEN),
);
error!(
"VMExit: intr_info = 0x{:x}, errcode = 0x{:x}, ilen = 0x{:x}",
self.vmread(ro::VMEXIT_INSTRUCTION_INFO),
self.vmread(ro::VMEXIT_INTERRUPTION_ERR_CODE),
self.vmread(ro::VMEXIT_INSTRUCTION_LEN),
);
error!(
" reason = 0x{:x}, qualification = 0x{:x}",
self.vmread(ro::EXIT_REASON),
self.vmread(ro::EXIT_QUALIFICATION),
);
error!(
"IDTVectoring: info = 0x{:x}, errcode = 0x{:x}",
self.vmread(ro::IDT_VECTORING_INFO),
self.vmread(ro::IDT_VECTORING_ERR_CODE),
);
error!("TSC Offset = 0x{:x}", self.vmread(control::TSC_OFFSET_FULL));
if secondary_exec_control.contains(SecondaryControls::USE_TSC_SCALING) {
error!(
"TSC Multiplier = 0x{:x}",
self.vmread(control::TSC_MULTIPLIER_FULL)
);
}
if cpu_based_exec_ctl.contains(PrimaryControls::USE_TPR_SHADOW) {
if secondary_exec_control.contains(SecondaryControls::VIRTUAL_INTERRUPT_DELIVERY) {
let status = self.vmread(guest::INTERRUPT_STATUS);
error!("SVI|RVI = 0x{:x}|0x{:x}", status >> 8, status & 0xff);
}
error!(
"TPR Threshold = 0x{:x}",
self.vmread(control::TPR_THRESHOLD)
);
if secondary_exec_control.contains(SecondaryControls::VIRTUALIZE_APIC) {
error!(
"APIC-access addr = 0x{:x}",
self.vmread(control::APIC_ACCESS_ADDR_FULL)
);
}
error!(
"virt-APIC addr = 0x{:x}",
self.vmread(control::VIRT_APIC_ADDR_FULL)
);
}
if pin_based_exec_ctl.contains(PinbasedControls::POSTED_INTERRUPTS) {
error!(
"PostedIntrVec = 0x{:x}",
self.vmread(control::POSTED_INTERRUPT_NOTIFICATION_VECTOR)
);
}
if secondary_exec_control.contains(SecondaryControls::ENABLE_EPT) {
error!("EPT pointer = 0x{:x}", self.vmread(control::EPTP_FULL));
}
if secondary_exec_control.contains(SecondaryControls::PAUSE_LOOP_EXITING) {
error!(
"PLE Gap = 0x{:x}, Window = 0x{:x}",
self.vmread(control::PLE_GAP),
self.vmread(control::PLE_WINDOW)
);
}
if secondary_exec_control.contains(SecondaryControls::ENABLE_VPID) {
error!("Virtual processor ID = 0x{:x}", self.vmread(control::VPID));
}
}
pub fn dump_sel(&self, name: &'static str, sel: u32) {
error!(
"{name} sel = 0x{:x}, attr = 0x{:x}, limit = 0x{:x}, base = 0x{:x}",
self.vmread(sel),
self.vmread(sel + guest::ES_ACCESS_RIGHTS - guest::ES_SELECTOR),
self.vmread(sel + guest::ES_LIMIT - guest::ES_SELECTOR),
self.vmread(sel + guest::ES_BASE - guest::ES_SELECTOR),
);
}
pub fn dump_dtsel(&self, name: &'static str, limit: u32) {
error!(
"{name} limit = 0x{:x}, base = 0x{:x}",
self.vmread(limit),
self.vmread(limit + guest::GDTR_BASE - guest::GDTR_LIMIT)
);
}
pub fn dump_msrs(&self, name: &'static str, msr: &VmxMsrs) {
error!("MSR {name}:");
for (idx, msr) in msr.val.iter().enumerate() {
error!("{idx}: msr = 0x{:x}, value = 0x{:x}", msr.index, msr.data);
}
}
#[inline]
pub fn vmread(&self, field: u32) -> u64 {
VmxAsm::vmx_vmread(field)
}
fn setup_uret_msrs(&self, vcpu: &mut VirtCpu) {
// 是否加载syscall相关msr
let load_syscall_msrs =
vcpu.arch.is_long_mode() && vcpu.arch.efer.contains(EferFlags::SYSTEM_CALL_EXTENSIONS);
self.setup_uret_msr(vcpu, msr::IA32_STAR, load_syscall_msrs);
self.setup_uret_msr(vcpu, msr::IA32_LSTAR, load_syscall_msrs);
self.setup_uret_msr(vcpu, msr::IA32_FMASK, load_syscall_msrs);
let load_efer = self.update_transition_efer(vcpu);
self.setup_uret_msr(vcpu, msr::IA32_EFER, load_efer);
// TODO: MSR_TSC_AUX
self.setup_uret_msr(
vcpu,
msr::MSR_IA32_TSX_CTRL,
CpuId::default()
.get_extended_feature_info()
.unwrap()
.has_rtm(),
);
vcpu.vmx_mut().guest_uret_msrs_loaded = false;
}
fn setup_uret_msr(&self, vcpu: &mut VirtCpu, msr: u32, load_into_hardware: bool) {
let uret_msr = vcpu.vmx_mut().find_uret_msr_mut(msr);
if let Some((_idx, msr)) = uret_msr {
msr.load_into_hardware = load_into_hardware;
}
}
fn update_transition_efer(&self, vcpu: &mut VirtCpu) -> bool {
let mut guest_efer = vcpu.arch.efer;
let mut ignore_efer = EferFlags::empty();
if !self.enable_ept {
guest_efer.insert(EferFlags::NO_EXECUTE_ENABLE);
}
ignore_efer.insert(EferFlags::SYSTEM_CALL_EXTENSIONS);
ignore_efer.insert(EferFlags::LONG_MODE_ACTIVE | EferFlags::LONG_MODE_ENABLE);
if guest_efer.contains(EferFlags::LONG_MODE_ACTIVE) {
ignore_efer.remove(EferFlags::SYSTEM_CALL_EXTENSIONS);
}
if self.has_load_ia32_efer()
|| (self.enable_ept
&& (vcpu.arch.efer ^ x86_kvm_manager().host_efer)
.contains(EferFlags::NO_EXECUTE_ENABLE))
{
if !guest_efer.contains(EferFlags::LONG_MODE_ACTIVE) {
guest_efer.remove(EferFlags::LONG_MODE_ENABLE);
}
if guest_efer != x86_kvm_manager().host_efer {
vcpu.vmx_mut().add_atomic_switch_msr(
msr::IA32_EFER,
guest_efer.bits(),
x86_kvm_manager().host_efer.bits(),
false,
);
} else {
vcpu.vmx_mut().clear_atomic_switch_msr(msr::IA32_EFER);
}
return false;
}
let idx = x86_kvm_manager().find_user_return_msr_idx(msr::IA32_EFER);
if let Some(i) = idx {
vcpu.vmx_mut().clear_atomic_switch_msr(msr::IA32_EFER);
guest_efer.remove(ignore_efer);
guest_efer.insert(x86_kvm_manager().host_efer & ignore_efer);
vcpu.vmx_mut().guest_uret_msrs[i].data = guest_efer.bits();
vcpu.vmx_mut().guest_uret_msrs[i].mask = (!ignore_efer).bits();
return true;
} else {
return false;
}
}
fn set_cr4_guest_host_mask(&self, arch: &mut VirtCpuArch) {
arch.cr4_guest_owned_bits =
x86_kvm_manager().possible_cr4_guest & (!arch.cr4_guest_rsvd_bits);
if !self.enable_ept {
arch.cr4_guest_owned_bits
.remove(x86_kvm_manager().cr4_tlbflush_bits);
arch.cr4_guest_owned_bits
.remove(x86_kvm_manager().cr4_pdptr_bits);
}
if arch.is_guest_mode() {
// 嵌套todo
todo!()
}
VmxAsm::vmx_vmwrite(
control::CR4_GUEST_HOST_MASK,
(!arch.cr4_guest_owned_bits).bits() as u64,
);
}
fn l1_guest_owned_cr0_bits(&self) -> Cr0 {
let mut cr0 = x86_kvm_manager().possible_cr0_guest;
if !self.enable_ept {
cr0.remove(Cr0::CR0_WRITE_PROTECT)
}
return cr0;
}
/// 设置在guest生命周期中host不变的部分
fn set_constant_host_state(&self, vcpu: &mut VirtCpu) {
let loaded_vmcs_host_state = &mut vcpu.vmx().loaded_vmcs.lock().host_state;
VmxAsm::vmx_vmwrite(host::CR0, unsafe { cr0() }.bits() as u64);
let cr3: (PhysFrame, Cr3Flags) = Cr3::read();
let cr3_combined: u64 =
(cr3.0.start_address().as_u64() & 0xFFFF_FFFF_FFFF_F000) | (cr3.1.bits() & 0xFFF);
VmxAsm::vmx_vmwrite(host::CR3, cr3_combined);
loaded_vmcs_host_state.cr3 = cr3;
let cr4 = unsafe { cr4() };
VmxAsm::vmx_vmwrite(host::CR4, cr4.bits() as u64);
loaded_vmcs_host_state.cr4 = cr4;
VmxAsm::vmx_vmwrite(
host::CS_SELECTOR,
(segmentation::cs().bits() & (!0x07)).into(),
);
VmxAsm::vmx_vmwrite(host::DS_SELECTOR, 0);
VmxAsm::vmx_vmwrite(host::ES_SELECTOR, 0);
VmxAsm::vmx_vmwrite(
host::SS_SELECTOR,
(segmentation::ds().bits() & (!0x07)).into(),
);
VmxAsm::vmx_vmwrite(
host::TR_SELECTOR,
(unsafe { x86::task::tr().bits() } & (!0x07)).into(),
);
VmxAsm::vmx_vmwrite(host::IDTR_BASE, self.host_idt_base);
VmxAsm::vmx_vmwrite(host::RIP, vmx_vmexit as usize as u64);
let val = unsafe { rdmsr(msr::IA32_SYSENTER_CS) };
// low32
VmxAsm::vmx_vmwrite(host::IA32_SYSENTER_CS, (val << 32) >> 32);
// VmxAsm::vmx_vmwrite(host::IA32_SYSENTER_ESP, 0);
let tmp = unsafe { rdmsr(msr::IA32_SYSENTER_EIP) };
VmxAsm::vmx_vmwrite(host::IA32_SYSENTER_EIP, (tmp << 32) >> 32);
if self
.vmcs_config
.vmexit_ctrl
.contains(ExitControls::LOAD_IA32_PAT)
{
VmxAsm::vmx_vmwrite(host::IA32_PAT_FULL, unsafe { rdmsr(msr::IA32_PAT) });
}
if self.has_load_ia32_efer() {
VmxAsm::vmx_vmwrite(
host::IA32_EFER_FULL,
x86_kvm_manager().host_efer.bits() as u64,
);
}
}
fn get_pin_based_exec_controls(&self, vcpu: &VirtCpu) -> PinbasedControls {
let mut ctrls = self.vmcs_config.pin_based_exec_ctrl;
if !vcpu.arch.vcpu_apicv_active() {
ctrls.remove(PinbasedControls::POSTED_INTERRUPTS);
}
if !self.enable_vnmi {
ctrls.remove(PinbasedControls::VIRTUAL_NMIS);
}
if !self.enable_preemption_timer {
ctrls.remove(PinbasedControls::VMX_PREEMPTION_TIMER);
}
return ctrls;
}
fn get_exec_controls(&self, vcpu: &VirtCpu, vmarch: &KvmArch) -> PrimaryControls {
let mut ctrls = self.vmcs_config.cpu_based_exec_ctrl;
ctrls.remove(
PrimaryControls::RDTSC_EXITING
| PrimaryControls::USE_IO_BITMAPS
| PrimaryControls::MONITOR_TRAP_FLAG
| PrimaryControls::PAUSE_EXITING,
);
ctrls.remove(
PrimaryControls::NMI_WINDOW_EXITING | PrimaryControls::INTERRUPT_WINDOW_EXITING,
);
ctrls.remove(PrimaryControls::MOV_DR_EXITING);
if vcpu.arch.lapic_in_kernel() && self.has_tpr_shadow() {
ctrls.remove(PrimaryControls::USE_TPR_SHADOW);
}
if ctrls.contains(PrimaryControls::USE_TPR_SHADOW) {
ctrls.remove(PrimaryControls::CR8_LOAD_EXITING | PrimaryControls::CR8_STORE_EXITING);
} else {
ctrls.insert(PrimaryControls::CR8_LOAD_EXITING | PrimaryControls::CR8_STORE_EXITING);
}
if self.enable_ept {
ctrls.remove(
PrimaryControls::CR3_LOAD_EXITING
| PrimaryControls::CR3_STORE_EXITING
| PrimaryControls::INVLPG_EXITING,
);
}
if vmarch.mwait_in_guest {
ctrls.remove(PrimaryControls::MWAIT_EXITING | PrimaryControls::MONITOR_EXITING);
}
if vmarch.hlt_in_guest {
ctrls.remove(PrimaryControls::HLT_EXITING);
}
return ctrls;
}
fn get_secondary_exec_controls(&mut self, vcpu: &VirtCpu, vm: &Vm) -> SecondaryControls {
let mut ctrls = self.vmcs_config.cpu_based_2nd_exec_ctrl;
if self.pt_mode == ProcessorTraceMode::System {
ctrls.remove(
SecondaryControls::INTEL_PT_GUEST_PHYSICAL | SecondaryControls::CONCEAL_VMX_FROM_PT,
);
}
if !(self.enable_flexpriority && vcpu.arch.lapic_in_kernel()) {
ctrls.remove(SecondaryControls::VIRTUALIZE_APIC)
}
if vcpu.vmx().vpid == 0 {
ctrls.remove(SecondaryControls::ENABLE_VPID);
}
if !self.enable_ept {
ctrls.remove(SecondaryControls::ENABLE_EPT);
self.enable_unrestricted_guest = false;
}
if !self.enable_unrestricted_guest {
ctrls.remove(SecondaryControls::UNRESTRICTED_GUEST);
}
if vm.arch.pause_in_guest {
ctrls.remove(SecondaryControls::PAUSE_LOOP_EXITING);
}
if !vcpu.arch.vcpu_apicv_active() {
ctrls.remove(
SecondaryControls::VIRTUALIZE_APIC_REGISTER
| SecondaryControls::VIRTUAL_INTERRUPT_DELIVERY,
);
}
ctrls.remove(SecondaryControls::VIRTUALIZE_X2APIC);
ctrls.remove(SecondaryControls::ENABLE_VM_FUNCTIONS);
ctrls.remove(SecondaryControls::DTABLE_EXITING);
ctrls.remove(SecondaryControls::VMCS_SHADOWING);
if !self.enable_pml || vm.nr_memslots_dirty_logging == 0 {
ctrls.remove(SecondaryControls::ENABLE_PML);
}
// TODO: vmx_adjust_sec_exec_feature
if self.has_rdtscp() {
warn!("adjust RDTSCP todo!");
// todo!()
}
return ctrls;
}
fn get_vmexit_controls(&self) -> ExitControls {
let mut ctrls = self.vmcs_config.vmexit_ctrl;
ctrls.remove(
ExitControls::SAVE_IA32_PAT
| ExitControls::SAVE_IA32_EFER
| ExitControls::SAVE_VMX_PREEMPTION_TIMER,
);
if self.pt_mode == ProcessorTraceMode::System {
ctrls.remove(ExitControls::CONCEAL_VMX_FROM_PT | ExitControls::CLEAR_IA32_RTIT_CTL);
}
// todo: cpu_has_perf_global_ctrl_bug
ctrls.remove(ExitControls::LOAD_IA32_PERF_GLOBAL_CTRL | ExitControls::LOAD_IA32_EFER);
ctrls
}
fn get_vmentry_controls(&self) -> EntryControls {
let mut ctrls = self.vmcs_config.vmentry_ctrl;
if self.pt_mode == ProcessorTraceMode::System {
ctrls.remove(EntryControls::CONCEAL_VMX_FROM_PT | EntryControls::LOAD_IA32_RTIT_CTL);
}
ctrls.remove(
EntryControls::LOAD_IA32_PERF_GLOBAL_CTRL
| EntryControls::LOAD_IA32_EFER
| EntryControls::IA32E_MODE_GUEST,
);
// todo: cpu_has_perf_global_ctrl_bug
ctrls
}
pub fn emulation_required(&self, vcpu: &mut VirtCpu) -> bool {
return self.emulate_invalid_guest_state && !self.guest_state_valid(vcpu);
}
pub fn guest_state_valid(&self, vcpu: &mut VirtCpu) -> bool {
return vcpu.is_unrestricted_guest() || self.__guest_state_valid(vcpu);
}
pub fn __guest_state_valid(&self, vcpu: &mut VirtCpu) -> bool {
if vcpu.arch.is_portected_mode()
|| x86_kvm_ops().get_rflags(vcpu).contains(RFlags::FLAGS_VM)
{
if !self.rmode_segment_valid(vcpu, VcpuSegment::CS) {
return false;
}
if !self.rmode_segment_valid(vcpu, VcpuSegment::SS) {
return false;
}
if !self.rmode_segment_valid(vcpu, VcpuSegment::DS) {
return false;
}
if !self.rmode_segment_valid(vcpu, VcpuSegment::ES) {
return false;
}
if !self.rmode_segment_valid(vcpu, VcpuSegment::FS) {
return false;
}
if !self.rmode_segment_valid(vcpu, VcpuSegment::GS) {
return false;
}
} else {
todo!("protected mode guest state checks todo");
}
return true;
}
pub fn vmx_get_segment(
&self,
vcpu: &mut VirtCpu,
mut var: UapiKvmSegment,
seg: VcpuSegment,
) -> UapiKvmSegment {
if vcpu.vmx().rmode.vm86_active && seg != VcpuSegment::LDTR {
var = vcpu.vmx().rmode.segs[seg as usize];
if seg == VcpuSegment::TR || var.selector == Vmx::vmx_read_guest_seg_selector(vcpu, seg)
{
return var;
}
var.base = Vmx::vmx_read_guest_seg_base(vcpu, seg);
var.selector = Vmx::vmx_read_guest_seg_selector(vcpu, seg);
return var;
}
var.base = Vmx::vmx_read_guest_seg_base(vcpu, seg);
var.limit = Vmx::vmx_read_guest_seg_limit(vcpu, seg);
var.selector = Vmx::vmx_read_guest_seg_selector(vcpu, seg);
let ar = Vmx::vmx_read_guest_seg_ar(vcpu, seg);
var.unusable = ((ar >> 16) & 1) as u8;
var.type_ = (ar & 15) as u8;
var.s = ((ar >> 4) & 1) as u8;
var.dpl = ((ar >> 5) & 3) as u8;
var.present = !var.unusable;
var.avl = ((ar >> 12) & 1) as u8;
var.l = ((ar >> 13) & 1) as u8;
var.db = ((ar >> 14) & 1) as u8;
var.g = ((ar >> 15) & 1) as u8;
return var;
}
pub fn _vmx_set_segment(
&self,
vcpu: &mut VirtCpu,
mut var: UapiKvmSegment,
seg: VcpuSegment,
) -> UapiKvmSegment {
let sf = &KVM_VMX_SEGMENT_FIELDS[seg as usize];
vcpu.vmx_mut().segment_cache_clear();
if vcpu.vmx().rmode.vm86_active && seg != VcpuSegment::LDTR {
vcpu.vmx_mut().rmode.segs[seg as usize] = var;
if seg == VcpuSegment::TR {
VmxAsm::vmx_vmwrite(sf.selector, var.selector as u64);
} else if var.s != 0 {
Vmx::fix_rmode_seg(seg, &vcpu.vmx().rmode.segs[seg as usize]);
}
return var;
}
VmxAsm::vmx_vmwrite(sf.base, var.base);
VmxAsm::vmx_vmwrite(sf.limit, var.limit as u64);
VmxAsm::vmx_vmwrite(sf.selector, var.selector as u64);
if vcpu.is_unrestricted_guest() && seg != VcpuSegment::LDTR {
var.type_ |= 0x1;
}
VmxAsm::vmx_vmwrite(sf.ar_bytes, var.vmx_segment_access_rights() as u64);
return var;
}
pub fn rmode_segment_valid(&self, vcpu: &mut VirtCpu, seg: VcpuSegment) -> bool {
let mut var = UapiKvmSegment::default();
var = self.vmx_get_segment(vcpu, var, seg);
var.dpl = 0x3;
if seg == VcpuSegment::CS {
var.type_ = 0x3;
}
let ar = var.vmx_segment_access_rights();
if var.base != ((var.selector as u64) << 4) {
return false;
}
if var.limit != 0xffff {
return false;
}
if ar != 0xf3 {
return false;
}
true
}
pub fn fix_rmode_seg(seg: VcpuSegment, save: &UapiKvmSegment) {
let sf = &KVM_VMX_SEGMENT_FIELDS[seg as usize];
let mut var = *save;
var.dpl = 0x3;
if seg == VcpuSegment::CS {
var.type_ = 0x3;
}
if !vmx_info().emulate_invalid_guest_state {
var.selector = (var.base >> 4) as u16;
var.base &= 0xffff0;
var.limit = 0xffff;
var.g = 0;
var.db = 0;
var.present = 1;
var.s = 1;
var.l = 0;
var.unusable = 0;
var.type_ = 0x3;
var.avl = 0;
if save.base & 0xf != 0 {
warn!("segment base is not paragraph aligned when entering protected mode (seg={seg:?})");
}
}
VmxAsm::vmx_vmwrite(sf.selector, var.selector as u64);
VmxAsm::vmx_vmwrite(sf.base, var.base);
VmxAsm::vmx_vmwrite(sf.limit, var.limit as u64);
VmxAsm::vmx_vmwrite(sf.ar_bytes, var.vmx_segment_access_rights() as u64);
}
pub fn fix_pmode_seg(
&self,
vcpu: &mut VirtCpu,
seg: VcpuSegment,
mut save: UapiKvmSegment,
) -> UapiKvmSegment {
if self.emulate_invalid_guest_state {
if seg == VcpuSegment::CS || seg == VcpuSegment::SS {
save.selector &= !0x3;
}
save.dpl = (save.selector & 0x3) as u8;
save.s = 1;
}
self._vmx_set_segment(vcpu, save, seg);
return save;
}
pub fn enter_pmode(&self, vcpu: &mut VirtCpu) {
self.get_segment_with_rmode(vcpu, VcpuSegment::ES);
self.get_segment_with_rmode(vcpu, VcpuSegment::DS);
self.get_segment_with_rmode(vcpu, VcpuSegment::FS);
self.get_segment_with_rmode(vcpu, VcpuSegment::GS);
self.get_segment_with_rmode(vcpu, VcpuSegment::SS);
self.get_segment_with_rmode(vcpu, VcpuSegment::CS);
vcpu.vmx_mut().rmode.vm86_active = false;
self.set_segment_with_rmode(vcpu, VcpuSegment::TR);
let mut flags = RFlags::from_bits_truncate(VmxAsm::vmx_vmread(guest::RFLAGS));
flags.remove(RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM);
flags.insert(vcpu.vmx().rmode.save_rflags & (RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM));
VmxAsm::vmx_vmwrite(guest::RFLAGS, flags.bits());
let cr4 = (Cr4::from_bits_truncate(VmxAsm::vmx_vmread(guest::CR4) as usize)
& (!Cr4::CR4_ENABLE_VME))
| (Cr4::from_bits_truncate(VmxAsm::vmx_vmread(control::CR4_READ_SHADOW) as usize)
& Cr4::CR4_ENABLE_VME);
VmxAsm::vmx_vmwrite(guest::CR4, cr4.bits() as u64);
VmxKvmFunc.update_exception_bitmap(vcpu);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::CS);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::SS);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::ES);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::DS);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::FS);
self.fix_pmode_seg_with_rmode(vcpu, VcpuSegment::GS);
}
fn fix_pmode_seg_with_rmode(&self, vcpu: &mut VirtCpu, seg: VcpuSegment) {
let segment = vcpu.vmx().rmode.segs[seg as usize];
vcpu.vmx_mut().rmode.segs[seg as usize] = self.fix_pmode_seg(vcpu, seg, segment);
}
fn get_segment_with_rmode(&self, vcpu: &mut VirtCpu, seg: VcpuSegment) {
let segment = vcpu.vmx().rmode.segs[seg as usize];
vcpu.vmx_mut().rmode.segs[seg as usize] = self.vmx_get_segment(vcpu, segment, seg);
}
fn set_segment_with_rmode(&self, vcpu: &mut VirtCpu, seg: VcpuSegment) {
let segment = vcpu.vmx().rmode.segs[seg as usize];
vcpu.vmx_mut().rmode.segs[seg as usize] = self._vmx_set_segment(vcpu, segment, seg);
}
pub fn enter_rmode(&self, vcpu: &mut VirtCpu, vm: &Vm) {
let kvm_vmx = vm.kvm_vmx();
self.get_segment_with_rmode(vcpu, VcpuSegment::TR);
self.get_segment_with_rmode(vcpu, VcpuSegment::ES);
self.get_segment_with_rmode(vcpu, VcpuSegment::DS);
self.get_segment_with_rmode(vcpu, VcpuSegment::FS);
self.get_segment_with_rmode(vcpu, VcpuSegment::GS);
self.get_segment_with_rmode(vcpu, VcpuSegment::SS);
self.get_segment_with_rmode(vcpu, VcpuSegment::CS);
vcpu.vmx_mut().rmode.vm86_active = true;
vcpu.vmx_mut().segment_cache_clear();
VmxAsm::vmx_vmwrite(guest::TR_BASE, kvm_vmx.tss_addr as u64);
VmxAsm::vmx_vmwrite(guest::TR_LIMIT, RMODE_TSS_SIZE as u64 - 1);
VmxAsm::vmx_vmwrite(guest::TR_ACCESS_RIGHTS, 0x008b);
let mut flags = RFlags::from_bits_truncate(VmxAsm::vmx_vmread(guest::RFLAGS));
vcpu.vmx_mut().rmode.save_rflags = flags;
flags.insert(RFlags::FLAGS_IOPL3 | RFlags::FLAGS_VM);
VmxAsm::vmx_vmwrite(guest::RFLAGS, flags.bits());
VmxAsm::vmx_vmwrite(
guest::CR4,
VmxAsm::vmx_vmread(guest::CR4) | Cr4::CR4_ENABLE_VME.bits() as u64,
);
VmxKvmFunc.update_exception_bitmap(vcpu);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::SS);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::CS);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::ES);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::DS);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::GS);
self.fix_rmode_seg_with_rmode(vcpu, VcpuSegment::FS);
}
fn fix_rmode_seg_with_rmode(&self, vcpu: &VirtCpu, seg: VcpuSegment) {
Vmx::fix_rmode_seg(seg, &vcpu.vmx().rmode.segs[seg as usize]);
}
pub fn vmx_read_guest_seg_ar(vcpu: &mut VirtCpu, seg: VcpuSegment) -> u32 {
if !Vmx::vmx_segment_cache_test_set(vcpu, seg, SegmentCacheField::AR) {
vcpu.vmx_mut().segment_cache.seg[seg as usize].ar =
VmxAsm::vmx_vmread(KVM_VMX_SEGMENT_FIELDS[seg as usize].ar_bytes) as u32;
}
return vcpu.vmx().segment_cache.seg[seg as usize].ar;
}
pub fn vmx_read_guest_seg_selector(vcpu: &mut VirtCpu, seg: VcpuSegment) -> u16 {
if !Vmx::vmx_segment_cache_test_set(vcpu, seg, SegmentCacheField::SEL) {
vcpu.vmx_mut().segment_cache.seg[seg as usize].selector =
VmxAsm::vmx_vmread(KVM_VMX_SEGMENT_FIELDS[seg as usize].selector) as u16;
}
return vcpu.vmx().segment_cache.seg[seg as usize].selector;
}
pub fn vmx_read_guest_seg_base(vcpu: &mut VirtCpu, seg: VcpuSegment) -> u64 {
if !Vmx::vmx_segment_cache_test_set(vcpu, seg, SegmentCacheField::BASE) {
vcpu.vmx_mut().segment_cache.seg[seg as usize].base =
VmxAsm::vmx_vmread(KVM_VMX_SEGMENT_FIELDS[seg as usize].base);
}
return vcpu.vmx().segment_cache.seg[seg as usize].base;
}
pub fn vmx_read_guest_seg_limit(vcpu: &mut VirtCpu, seg: VcpuSegment) -> u32 {
if !Vmx::vmx_segment_cache_test_set(vcpu, seg, SegmentCacheField::LIMIT) {
vcpu.vmx_mut().segment_cache.seg[seg as usize].limit =
VmxAsm::vmx_vmread(KVM_VMX_SEGMENT_FIELDS[seg as usize].limit) as u32;
}
return vcpu.vmx().segment_cache.seg[seg as usize].limit;
}
fn vmx_segment_cache_test_set(
vcpu: &mut VirtCpu,
seg: VcpuSegment,
field: SegmentCacheField,
) -> bool {
let mask = 1u32 << (seg as usize * SegmentCacheField::NR as usize + field as usize);
if !vcpu.arch.is_register_available(KvmReg::VcpuExregSegments) {
vcpu.arch.mark_register_available(KvmReg::VcpuExregSegments);
vcpu.vmx_mut().segment_cache_clear();
}
let ret = vcpu.vmx().segment_cache.bitmask & mask;
vcpu.vmx_mut().segment_cache.bitmask |= mask;
return ret != 0;
}
pub fn vmx_vcpu_enter_exit(vcpu: &mut VirtCpu, flags: VmxRunFlag) {
// TODO: vmx_l1d_should_flush and mmio_stale_data_clear
// TODO: vmx_disable_fb_clear
if vcpu.arch.cr2 != unsafe { cr2() } as u64 {
unsafe { cr2_write(vcpu.arch.cr2) };
}
let fail =
unsafe { __vmx_vcpu_run(vcpu.vmx(), vcpu.arch.regs.as_ptr(), flags.bits as u32) };
vcpu.vmx_mut().fail = fail as u8;
vcpu.arch.cr2 = unsafe { cr2() } as u64;
vcpu.arch.regs_avail.set_all(true);
// 这些寄存器需要更新缓存
for reg_idx in Vmx::VMX_REGS_LAZY_LOAD_SET {
vcpu.arch.regs_avail.set(*reg_idx, false);
}
vcpu.vmx_mut().idt_vectoring_info = IntrInfo::empty();
// TODO: enable_fb_clear
if unlikely(vcpu.vmx().fail != 0) {
vcpu.vmx_mut().exit_reason = VmxExitReason::from(0xdead);
return;
}
vcpu.vmx_mut().exit_reason =
VmxExitReason::from(VmxAsm::vmx_vmread(ro::EXIT_REASON) as u32);
if likely(!vcpu.vmx().exit_reason.failed_vmentry()) {
vcpu.vmx_mut().idt_vectoring_info =
IntrInfo::from_bits_truncate(VmxAsm::vmx_vmread(ro::IDT_VECTORING_INFO) as u32);
}
if VmxExitReasonBasic::from(vcpu.vmx().exit_reason.basic())
== VmxExitReasonBasic::EXCEPTION_OR_NMI
&& VmcsIntrHelper::is_nmi(&Vmx::vmx_get_intr_info(vcpu))
{
todo!()
}
}
fn vmx_get_intr_info(vcpu: &mut VirtCpu) -> IntrInfo {
if !vcpu
.arch
.test_and_mark_available(KvmReg::VcpuExregExitInfo2)
{
vcpu.vmx_mut().exit_intr_info = IntrInfo::from_bits_truncate(VmxAsm::vmx_vmread(
ro::VMEXIT_INTERRUPTION_INFO,
) as u32);
}
return vcpu.vmx_mut().exit_intr_info;
}
pub fn vmx_exit_handlers_fastpath(vcpu: &mut VirtCpu) -> ExitFastpathCompletion {
match VmxExitReasonBasic::from(vcpu.vmx().exit_reason.basic()) {
VmxExitReasonBasic::WRMSR => {
todo!()
}
VmxExitReasonBasic::VMX_PREEMPTION_TIMER_EXPIRED => {
todo!()
}
_ => ExitFastpathCompletion::None,
}
}
pub fn vmx_handle_exit(
&self,
vcpu: &mut VirtCpu,
vm: &Vm,
exit_fastpath: ExitFastpathCompletion,
) -> Result<i32, SystemError> {
let exit_reason = vcpu.vmx().exit_reason;
// self.dump_vmcs(vcpu);
{
let reason = self.vmread(ro::EXIT_REASON);
debug!("vm_exit reason 0x{:x}\n", reason);
}
let unexpected_vmexit = |vcpu: &mut VirtCpu| -> Result<i32, SystemError> {
error!("vmx: unexpected exit reason {:?}\n", exit_reason);
self.dump_vmcs(vcpu);
let cpu = vcpu.arch.last_vmentry_cpu.into() as u64;
let run = vcpu.kvm_run_mut();
run.exit_reason = kvm_exit::KVM_EXIT_INTERNAL_ERROR;
unsafe {
run.__bindgen_anon_1.internal.ndata = 2;
run.__bindgen_anon_1.internal.data[0] = Into::<u32>::into(exit_reason) as u64;
run.__bindgen_anon_1.internal.data[1] = cpu;
}
return Ok(0);
};
let vectoring_info = vcpu.vmx().idt_vectoring_info;
if self.enable_pml && !vcpu.arch.is_guest_mode() {
todo!()
}
if vcpu.arch.is_guest_mode() {
if exit_reason.basic() == VmxExitReasonBasic::PML_FULL as u16 {
return unexpected_vmexit(vcpu);
}
todo!()
}
if vcpu.vmx().emulation_required {
todo!()
}
if exit_reason.failed_vmentry() {
self.dump_vmcs(vcpu);
todo!()
}
if unlikely(vcpu.vmx().fail != 0) {
self.dump_vmcs(vcpu);
todo!()
}
let basic = VmxExitReasonBasic::from(exit_reason.basic());
if vectoring_info.contains(IntrInfo::INTR_INFO_VALID_MASK)
&& basic != VmxExitReasonBasic::EXCEPTION_OR_NMI
&& basic != VmxExitReasonBasic::EPT_VIOLATION
&& basic != VmxExitReasonBasic::PML_FULL
&& basic != VmxExitReasonBasic::APIC_ACCESS
&& basic != VmxExitReasonBasic::TASK_SWITCH
&& basic != VmxExitReasonBasic::NOTIFY
{
todo!()
}
if unlikely(!self.enable_pml && vcpu.vmx().loaded_vmcs().soft_vnmi_blocked) {
todo!()
}
if exit_fastpath != ExitFastpathCompletion::None {
return Err(SystemError::EINVAL);
}
match VmxExitHandlers::try_handle_exit(
vcpu,
vm,
VmxExitReasonBasic::from(exit_reason.basic()),
) {
Some(Ok(r)) => {
debug!("vmx: handled exit return {:?}\n", r);
return Ok(r);
}
Some(Err(_)) | None => unexpected_vmexit(vcpu),
}
}
#[allow(unreachable_code)]
pub fn handle_external_interrupt_irqoff(vcpu: &mut VirtCpu) {
let intr_info = Vmx::vmx_get_intr_info(vcpu);
let _vector = intr_info & IntrInfo::INTR_INFO_VECTOR_MASK;
// let desc = vmx_info().host_idt_base + vector.bits() as u64;
if !VmcsIntrHelper::is_external_intr(&intr_info) {
error!("unexpected VM-Exit interrupt info: {:?}", intr_info);
return;
}
vcpu.arch.kvm_before_interrupt(KvmIntrType::Irq);
// TODO
warn!("handle_external_interrupt_irqoff TODO");
vcpu.arch.kvm_after_interrupt();
vcpu.arch.at_instruction_boundary = true;
}
/// 需要在缓存中更新的寄存器集。此处未列出的其他寄存器在 VM 退出后立即同步到缓存。
pub const VMX_REGS_LAZY_LOAD_SET: &'static [usize] = &[
KvmReg::VcpuRegsRip as usize,
KvmReg::VcpuRegsRsp as usize,
KvmReg::VcpuExregRflags as usize,
KvmReg::NrVcpuRegs as usize,
KvmReg::VcpuExregSegments as usize,
KvmReg::VcpuExregCr0 as usize,
KvmReg::VcpuExregCr3 as usize,
KvmReg::VcpuExregCr4 as usize,
KvmReg::VcpuExregExitInfo1 as usize,
KvmReg::VcpuExregExitInfo2 as usize,
];
}
extern "C" {
/// #[allow(improper_ctypes)]因为只需要在内部调用而无需与C交互
#[allow(improper_ctypes)]
fn __vmx_vcpu_run(vmx: &VmxVCpuPriv, regs: *const u64, flags: u32) -> i32;
}
struct VmcsEntryExitPair {
entry: EntryControls,
exit: ExitControls,
}
impl VmcsEntryExitPair {
pub const fn new(entry: EntryControls, exit: ExitControls) -> Self {
Self { entry, exit }
}
}
#[derive(Debug, Default)]
#[repr(C, align(64))]
pub struct PostedIntrDesc {
pir: [u32; 8],
control: PostedIntrDescControl,
// 保留位
rsvd: [u32; 6],
}
#[bitfield(u64)]
pub struct PostedIntrDescControl {
#[bits(1)]
on: bool,
#[bits(1)]
sn: bool,
#[bits(14)]
rsvd_1: u16,
nv: u8,
rsvd_2: u8,
ndst: u32,
}
#[derive(Debug, Default, Clone, Copy)]
pub struct VmxUretMsr {
load_into_hardware: bool,
data: u64,
mask: u64,
}
#[derive(Debug, Default)]
pub struct VmxMsrs {
nr: usize,
val: [VmxMsrEntry; Self::MAX_NR_LOADSTORE_MSRS],
}
impl VmxMsrs {
pub const MAX_NR_LOADSTORE_MSRS: usize = 8;
pub fn find_loadstore_msr_slot(&self, msr: u32) -> Option<usize> {
return (0..self.nr).find(|&i| self.val[i].index == msr);
}
}
#[derive(Debug, Default)]
pub struct VmxMsrAutoLoad {
guest: VmxMsrs,
host: VmxMsrs,
}
#[derive(Debug)]
pub struct VmxRMode {
pub vm86_active: bool,
pub save_rflags: RFlags,
pub segs: [UapiKvmSegment; 8],
}
impl Default for VmxRMode {
fn default() -> Self {
Self {
vm86_active: false,
save_rflags: RFlags::empty(),
segs: [UapiKvmSegment::default(); 8],
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct VmxSaveSegment {
selector: u16,
base: u64,
limit: u32,
ar: u32,
}
#[derive(Debug, Default)]
pub struct VmxSegmentCache {
pub bitmask: u32,
pub seg: [VmxSaveSegment; 8],
}
#[derive(Debug)]
#[allow(dead_code)]
pub struct VmxVCpuPriv {
vpid: u16,
fail: u8,
exit_reason: VmxExitReason,
exit_intr_info: IntrInfo,
idt_vectoring_info: IntrInfo,
vmcs01: Arc<LockedLoadedVmcs>,
loaded_vmcs: Arc<LockedLoadedVmcs>,
guest_uret_msrs: [VmxUretMsr; KvmArchManager::KVM_MAX_NR_USER_RETURN_MSRS],
guest_uret_msrs_loaded: bool,
post_intr_desc: PostedIntrDesc,
shadow_msr_intercept_read: AllocBitmap,
shadow_msr_intercept_write: AllocBitmap,
msr_ia32_feature_control: u64,
msr_ia32_feature_control_valid_bits: u64,
msr_host_kernel_gs_base: u64,
msr_guest_kernel_gs_base: u64,
emulation_required: bool,
rflags: RFlags,
ple_window: u32,
ple_window_dirty: bool,
msr_autoload: VmxMsrAutoLoad,
msr_autostore: VmxMsrs,
pml_pg: Box<[u8; MMArch::PAGE_SIZE]>,
rmode: VmxRMode,
spec_ctrl: u64,
msr_ia32_umwait_control: u32,
hv_deadline_tsc: u64,
segment_cache: VmxSegmentCache,
req_immediate_exit: bool,
guest_state_loaded: bool,
exit_qualification: u64, //暂时不知道用处fztodo
}
#[derive(Debug, Default)]
#[allow(dead_code)]
pub struct KvmVmx {
tss_addr: usize,
ept_identity_pagetable_done: bool,
ept_identity_map_addr: u64,
pid_table: Option<Box<[u64; MMArch::PAGE_SIZE]>>,
}
impl KvmVmx {
pub fn pid_table(&self) -> &[u64; MMArch::PAGE_SIZE] {
self.pid_table.as_ref().unwrap().as_ref()
}
}
impl VmxVCpuPriv {
pub const PML_ENTITY_NUM: usize = 512;
pub fn loaded_vmcs(&self) -> SpinLockGuard<LoadedVmcs> {
self.loaded_vmcs.lock()
}
/// 参考https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kvm/vmx/vmx.c#7452
pub fn init(vcpu: &mut VirtCpu, vm: &Vm) {
let vmcs = LockedLoadedVmcs::new();
// TODO: 改堆分配
let mut vmx = Self {
vpid: 0,
fail: 0,
vmcs01: vmcs.clone(),
loaded_vmcs: vmcs,
guest_uret_msrs: [VmxUretMsr::default(); KvmArchManager::KVM_MAX_NR_USER_RETURN_MSRS],
shadow_msr_intercept_read: AllocBitmap::new(16),
shadow_msr_intercept_write: AllocBitmap::new(16),
post_intr_desc: PostedIntrDesc::default(),
ple_window: 0,
ple_window_dirty: false,
msr_autoload: VmxMsrAutoLoad::default(),
pml_pg: unsafe { Box::new_zeroed().assume_init() },
guest_uret_msrs_loaded: false,
msr_ia32_feature_control: 0,
msr_ia32_feature_control_valid_bits: 0,
rmode: VmxRMode::default(),
spec_ctrl: 0,
msr_ia32_umwait_control: 0,
hv_deadline_tsc: u64::MAX,
segment_cache: VmxSegmentCache::default(),
emulation_required: false,
rflags: RFlags::empty(),
req_immediate_exit: false,
guest_state_loaded: false,
msr_host_kernel_gs_base: 0,
msr_guest_kernel_gs_base: 0,
idt_vectoring_info: IntrInfo::empty(),
exit_reason: VmxExitReason::new(),
exit_intr_info: IntrInfo::empty(),
msr_autostore: VmxMsrs::default(),
exit_qualification: 0, //fztodo
};
vmx.vpid = vmx_info().alloc_vpid().unwrap_or_default() as u16;
for i in 0..x86_kvm_manager().kvm_uret_msrs_list.len() {
vmx.guest_uret_msrs[i].mask = u64::MAX;
}
if CpuId::new().get_extended_feature_info().unwrap().has_rtm() {
let tsx_ctrl = vmx.find_uret_msr_mut(msr::MSR_IA32_TSX_CTRL);
if let Some((_idx, tsx_ctrl)) = tsx_ctrl {
// Disable TSX enumeration
tsx_ctrl.mask = !(1 << 1);
}
}
vmx.shadow_msr_intercept_read.set_all(true);
vmx.shadow_msr_intercept_write.set_all(true);
let arch = &vm.arch;
vmx.disable_intercept_for_msr(arch, msr::IA32_TIME_STAMP_COUNTER, MsrType::READ);
vmx.disable_intercept_for_msr(arch, msr::IA32_FS_BASE, MsrType::RW);
vmx.disable_intercept_for_msr(arch, msr::IA32_GS_BASE, MsrType::RW);
vmx.disable_intercept_for_msr(arch, msr::IA32_KERNEL_GSBASE, MsrType::RW);
vmx.disable_intercept_for_msr(arch, msr::IA32_SYSENTER_CS, MsrType::RW);
vmx.disable_intercept_for_msr(arch, msr::IA32_SYSENTER_ESP, MsrType::RW);
vmx.disable_intercept_for_msr(arch, msr::IA32_SYSENTER_EIP, MsrType::RW);
if arch.pause_in_guest {
vmx.disable_intercept_for_msr(arch, msr::MSR_CORE_C1_RESIDENCY, MsrType::READ);
vmx.disable_intercept_for_msr(arch, msr::MSR_CORE_C3_RESIDENCY, MsrType::READ);
vmx.disable_intercept_for_msr(arch, msr::MSR_CORE_C6_RESIDENCY, MsrType::READ);
vmx.disable_intercept_for_msr(arch, msr::MSR_CORE_C7_RESIDENCY, MsrType::READ);
}
if vmx_info().enable_flexpriority && vcpu.arch.lapic_in_kernel() {
todo!()
}
if vmx_info().enable_ept && !vmx_info().enable_unrestricted_guest {
todo!()
}
if vcpu.arch.lapic_in_kernel() && vmx_info().enable_ipiv {
todo!()
}
// 初始化vmx私有信息
vcpu.private = Some(vmx);
}
pub fn find_uret_msr(&self, msr: u32) -> Option<(usize, &VmxUretMsr)> {
let idx = x86_kvm_manager().find_user_return_msr_idx(msr);
if let Some(index) = idx {
return Some((index, &self.guest_uret_msrs[index]));
} else {
return None;
}
}
fn set_uret_msr(&mut self, msr: u32, data: u64) {
if let Some((_idx, msr)) = self.find_uret_msr_mut(msr) {
msr.data = data;
}
}
pub fn find_uret_msr_mut(&mut self, msr: u32) -> Option<(usize, &mut VmxUretMsr)> {
let idx = x86_kvm_manager().find_user_return_msr_idx(msr);
if let Some(index) = idx {
return Some((index, &mut self.guest_uret_msrs[index]));
} else {
return None;
}
}
fn set_guest_uret_msr(&mut self, slot: usize, data: u64) -> Result<(), SystemError> {
let msr = &mut self.guest_uret_msrs[slot];
if msr.load_into_hardware {
x86_kvm_manager().kvm_set_user_return_msr(slot, data, msr.mask);
}
msr.data = data;
Ok(())
}
/// ## 禁用对特定的 MSR 的拦截
fn disable_intercept_for_msr(&mut self, arch: &KvmArch, msr: u32, mut msr_type: MsrType) {
if !vmx_info().has_msr_bitmap() {
return;
}
let msr_bitmap = &mut self.vmcs01.lock().msr_bitmap;
// TODO: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kvm/vmx/vmx.c#3974
// 嵌套vmx处理
if Vmx::is_valid_passthrough_msr(msr) {
if let Some(idx) = Vmx::possible_passthrough_msr_slot(msr) {
if msr_type.contains(MsrType::READ) {
self.shadow_msr_intercept_read.set(idx, false);
}
if msr_type.contains(MsrType::WRITE) {
self.shadow_msr_intercept_write.set(idx, false);
}
}
}
if msr_type.contains(MsrType::READ)
&& !arch.msr_allowed(msr, MsrFilterType::KVM_MSR_FILTER_READ)
{
msr_bitmap.ctl(msr, VmxMsrBitmapAction::Set, VmxMsrBitmapAccess::Read);
msr_type.remove(MsrType::READ);
}
if msr_type.contains(MsrType::WRITE)
&& !arch.msr_allowed(msr, MsrFilterType::KVM_MSR_FILTER_WRITE)
{
msr_bitmap.ctl(msr, VmxMsrBitmapAction::Set, VmxMsrBitmapAccess::Write);
msr_type.remove(MsrType::WRITE);
}
if msr_type.contains(MsrType::READ) {
msr_bitmap.ctl(msr, VmxMsrBitmapAction::Clear, VmxMsrBitmapAccess::Read);
}
if msr_type.contains(MsrType::WRITE) {
msr_bitmap.ctl(msr, VmxMsrBitmapAction::Clear, VmxMsrBitmapAccess::Write);
}
}
#[inline]
pub fn segment_cache_clear(&mut self) {
self.segment_cache.bitmask = 0;
}
pub fn clear_atomic_switch_msr(&mut self, msr: u32) {
match msr {
msr::IA32_EFER => {
if vmx_info().has_load_ia32_efer() {
self.clear_stomic_switch_msr_special(
EntryControls::LOAD_IA32_EFER.bits().into(),
ExitControls::LOAD_IA32_EFER.bits().into(),
);
return;
}
}
msr::MSR_PERF_GLOBAL_CTRL => {
if vmx_info().has_load_perf_global_ctrl() {
self.clear_stomic_switch_msr_special(
EntryControls::LOAD_IA32_PERF_GLOBAL_CTRL.bits().into(),
ExitControls::LOAD_IA32_PERF_GLOBAL_CTRL.bits().into(),
);
return;
}
}
_ => {}
}
let m = &mut self.msr_autoload;
let i = m.guest.find_loadstore_msr_slot(msr);
if let Some(i) = i {
m.guest.nr -= 1;
m.guest.val[i] = m.guest.val[m.guest.nr];
VmxAsm::vmx_vmwrite(control::VMENTRY_MSR_LOAD_COUNT, m.guest.nr as u64);
}
let i = m.host.find_loadstore_msr_slot(msr);
if let Some(i) = i {
m.host.nr -= 1;
m.host.val[i] = m.host.val[m.host.nr];
VmxAsm::vmx_vmwrite(control::VMEXIT_MSR_LOAD_COUNT, m.host.nr as u64);
}
}
fn clear_stomic_switch_msr_special(&self, entry: u64, exit: u64) {
let mut guard = self.loaded_vmcs.lock();
guard.controls_clearbit(ControlsType::VmEntry, entry);
guard.controls_clearbit(ControlsType::VmExit, exit);
}
pub fn add_atomic_switch_msr(
&mut self,
msr: u32,
guest_val: u64,
host_val: u64,
entry_only: bool,
) {
match msr {
msr::IA32_EFER => {
if vmx_info().has_load_ia32_efer() {
self.add_atomic_switch_msr_special(
EntryControls::LOAD_IA32_EFER.bits() as u64,
ExitControls::LOAD_IA32_EFER.bits() as u64,
guest::IA32_EFER_FULL,
host::IA32_EFER_FULL,
guest_val,
host_val,
);
return;
}
}
msr::MSR_PERF_GLOBAL_CTRL => {
if vmx_info().has_load_perf_global_ctrl() {
self.add_atomic_switch_msr_special(
EntryControls::LOAD_IA32_PERF_GLOBAL_CTRL.bits().into(),
ExitControls::LOAD_IA32_PERF_GLOBAL_CTRL.bits().into(),
guest::IA32_PERF_GLOBAL_CTRL_FULL,
host::IA32_PERF_GLOBAL_CTRL_FULL,
guest_val,
host_val,
);
return;
}
}
msr::MSR_PEBS_ENABLE => {
unsafe { wrmsr(msr::MSR_PEBS_ENABLE, 0) };
}
_ => {}
}
let m = &mut self.msr_autoload;
let i = m.guest.find_loadstore_msr_slot(msr);
let j = if !entry_only {
m.host.find_loadstore_msr_slot(msr)
} else {
Some(0)
};
if (i.is_none() && m.guest.nr == VmxMsrs::MAX_NR_LOADSTORE_MSRS)
|| (j.is_none() && m.host.nr == VmxMsrs::MAX_NR_LOADSTORE_MSRS)
{
warn!("Not enough msr switch entries. Can't add msr 0x{:x}", msr);
return;
}
let i = if let Some(i) = i {
i
} else {
m.guest.nr += 1;
VmxAsm::vmx_vmwrite(control::VMENTRY_MSR_LOAD_COUNT, m.guest.nr as u64);
m.guest.nr
};
m.guest.val[i].index = msr;
m.guest.val[i].data = guest_val;
if entry_only {
return;
}
let j = if let Some(j) = j {
j
} else {
m.host.nr += 1;
VmxAsm::vmx_vmwrite(control::VMEXIT_MSR_LOAD_COUNT, m.host.nr as u64);
m.host.nr
};
m.host.val[j].index = msr;
m.host.val[j].data = host_val;
}
fn add_atomic_switch_msr_special(
&self,
entry: u64,
exit: u64,
guest_val_vmcs: u32,
host_val_vmcs: u32,
guest_val: u64,
host_val: u64,
) {
VmxAsm::vmx_vmwrite(guest_val_vmcs, guest_val);
if host_val_vmcs != host::IA32_EFER_FULL {
VmxAsm::vmx_vmwrite(host_val_vmcs, host_val);
}
let mut guard = self.loaded_vmcs.lock();
guard.controls_setbit(ControlsType::VmEntry, entry);
guard.controls_setbit(ControlsType::VmExit, exit);
}
pub fn vmx_vcpu_run_flags(&self) -> VmxRunFlag {
let mut flags = VmxRunFlag::empty();
if self.loaded_vmcs().launched {
flags.insert(VmxRunFlag::VMRESUME);
}
// MSR_IA32_SPEC_CTRL
if !self.loaded_vmcs().msr_write_intercepted(0x48) {
flags.insert(VmxRunFlag::SAVE_SPEC_CTRL);
}
flags
}
pub fn get_exit_qual(&self) -> u64 {
self.exit_qualification
}
pub fn vmread_exit_qual(&mut self) {
self.exit_qualification = VmxAsm::vmx_vmread(ro::EXIT_QUALIFICATION);
}
}
bitflags! {
pub struct MsrType: u8 {
const READ = 1;
const WRITE = 2;
const RW = 3;
}
//https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/kvm_host.h#249
pub struct PageFaultErr: u64 {
const PFERR_PRESENT = 1 << 0;
const PFERR_WRITE = 1 << 1;
const PFERR_USER = 1 << 2;
const PFERR_RSVD = 1 << 3;
const PFERR_FETCH = 1 << 4;
const PFERR_PK = 1 << 5;
const PFERR_SGX = 1 << 15;
const PFERR_GUEST_FINAL = 1 << 32;
const PFERR_GUEST_PAGE = 1 << 33;
const PFERR_IMPLICIT_ACCESS = 1 << 48;
}
pub struct VmxRunFlag: u8 {
const VMRESUME = 1 << 0;
const SAVE_SPEC_CTRL = 1 << 1;
}
}
#[derive(Debug, PartialEq)]
#[allow(dead_code)]
pub enum VmxL1dFlushState {
Auto,
Never,
Cond,
Always,
EptDisabled,
NotRequired,
}
#[derive(Debug, PartialEq)]
pub struct VmxSegmentField {
selector: u32,
base: u32,
limit: u32,
ar_bytes: u32,
}
//fix
pub const KVM_VMX_SEGMENT_FIELDS: &[VmxSegmentField] = &[
// ES
VmxSegmentField {
selector: guest::ES_SELECTOR,
base: guest::ES_BASE,
limit: guest::ES_LIMIT,
ar_bytes: guest::ES_ACCESS_RIGHTS,
},
// CS
VmxSegmentField {
selector: guest::CS_SELECTOR,
base: guest::CS_BASE,
limit: guest::CS_LIMIT,
ar_bytes: guest::CS_ACCESS_RIGHTS,
},
// SS
VmxSegmentField {
selector: guest::SS_SELECTOR,
base: guest::SS_BASE,
limit: guest::SS_LIMIT,
ar_bytes: guest::SS_ACCESS_RIGHTS,
},
// DS
VmxSegmentField {
selector: guest::DS_SELECTOR,
base: guest::DS_BASE,
limit: guest::DS_LIMIT,
ar_bytes: guest::DS_ACCESS_RIGHTS,
},
// FS
VmxSegmentField {
selector: guest::FS_SELECTOR,
base: guest::FS_BASE,
limit: guest::FS_LIMIT,
ar_bytes: guest::FS_ACCESS_RIGHTS,
},
// GS
VmxSegmentField {
selector: guest::GS_SELECTOR,
base: guest::GS_BASE,
limit: guest::GS_LIMIT,
ar_bytes: guest::GS_ACCESS_RIGHTS,
},
// TR
VmxSegmentField {
selector: guest::TR_SELECTOR,
base: guest::TR_BASE,
limit: guest::TR_LIMIT,
ar_bytes: guest::TR_ACCESS_RIGHTS,
},
// LDTR
VmxSegmentField {
selector: guest::LDTR_SELECTOR,
base: guest::LDTR_BASE,
limit: guest::LDTR_LIMIT,
ar_bytes: guest::LDTR_ACCESS_RIGHTS,
},
];
pub static L1TF_VMX_MITIGATION: RwLock<VmxL1dFlushState> = RwLock::new(VmxL1dFlushState::Auto);
pub fn vmx_init() -> Result<(), SystemError> {
let cpuid = CpuId::new();
let cpu_feat = cpuid.get_feature_info().ok_or(SystemError::ENOSYS)?;
if !cpu_feat.has_vmx() {
return Err(SystemError::ENOSYS);
}
init_kvm_arch();
x86_kvm_manager_mut().vendor_init(&VmxKvmInitFunc)?;
vmx_info().setup_l1d_flush();
kvm_init()?;
Ok(())
}
#[no_mangle]
unsafe extern "C" fn vmx_update_host_rsp(vcpu_vmx: &VmxVCpuPriv, host_rsp: usize) {
warn!("vmx_update_host_rsp");
let mut guard = vcpu_vmx.loaded_vmcs.lock();
if unlikely(host_rsp != guard.host_state.rsp) {
guard.host_state.rsp = host_rsp;
VmxAsm::vmx_vmwrite(host::RSP, host_rsp as u64);
}
}
#[no_mangle]
unsafe extern "C" fn vmx_spec_ctrl_restore_host(_vcpu_vmx: &VmxVCpuPriv, _flags: u32) {
// TODO
warn!("vmx_spec_ctrl_restore_host todo!");
}