DragonOS虚拟化 (#389)

* try some ioctl flow & kvm device

* add sys ioctl

* 删掉一些debug信息

* 修改run-qemu.sh脚本,在QEMU中enable vmx

* 修改cr0,cr4,msr寄存器enable VMX operations

* enable vmx operation

* allocate memory for vmcs with bug

* allocate memory for vmcs

* cpu virt-50%

* single vcpu virt

* add vmcs fields

* CPU virt overall flow with bug

* run vmlaunch success

* run CPU virt with bug

* 成功运行non-root模式的guest

* 成功运行vmexit,进入vmx_return函数

* 成功运行vmlaunch, vmexit, vmresume

* vmexit handler with bug

* 完成vmexit cpuid handler

* fix vmresume guest状态恢复的bug

* 增加vm ioctl

* refactor kvm 50%

* refactor kvm 80%

* FIXME: kvm vmlaunch failed

* vmlaunch success

* FIXME: output error

* update guest_rsp

* cpu virt refactor

* add mmu related struct

* add usermemory region workflow

* add mem-virt workflow

* add mem-virt

* refactor code

* add vcpu ioctl set_regs

* rename hypervisor to vm & solve some deadlock bugs

* workout mem pipeline

* fix vmcs control setting bugs

* refactor segment regs initialization

* resovle conficts

* resovle conficts

* format code
This commit is contained in:
Xiaoye Zheng 2023-10-24 14:31:56 +08:00 committed by GitHub
parent 485e248761
commit 40314b30ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 3652 additions and 12 deletions

View File

@ -17,6 +17,7 @@ members = [ "src/libs/intertrait" ]
x86 = "0.52.0"
x86_64 = "0.14.10"
bitflags = "1.3.2"
bitfield-struct = "0.5.3"
virtio-drivers = { git = "https://git.mirrors.dragonos.org/DragonOS-Community/virtio-drivers.git", rev = "f1d1cbb" }
# 一个无锁MPSC队列
thingbuf = { version = "0.1.3", default-features = false, features = ["alloc"] }
@ -46,7 +47,10 @@ version = "1.4.0"
# 由于在no_std环境而lazy_static依赖了spin库因此需要指定其使用no_std
features = ["spin_no_std"]
# The development profile, used for `cargo build`
[profile.dev]
# opt-level = 0 # Controls the --opt-level the compiler builds with
debug = true # Controls whether the compiler passes `-g`
# The release profile, used for `cargo build --release`
[profile.release]
debug = false

View File

@ -0,0 +1,117 @@
use crate::arch::kvm::vmx::vmcs::VmcsFields;
use crate::arch::kvm::vmx::vmx_asm_wrapper::{vmx_vmlaunch, vmx_vmread};
use crate::libs::mutex::Mutex;
use crate::virt::kvm::vm;
use crate::{
kdebug,
kerror,
// libs::spinlock::{SpinLock, SpinLockGuard},
syscall::SystemError,
};
use alloc::sync::Arc;
use core::arch::asm;
use raw_cpuid::CpuId;
// use crate::virt::kvm::guest_code;
use self::vmx::mmu::{kvm_mmu_setup, kvm_vcpu_mtrr_init};
use self::vmx::vcpu::VmxVcpu;
pub mod vmx;
#[derive(Default, Debug, Clone)]
pub struct X86_64KVMArch {
// n_used_mmu_pages: u32,
// n_requested_mmu_pages: u32,
// n_max_mmu_pages: u32,
// mmu_valid_gen: u64,
// // mmu_page_hash:[],
// active_mmu_pages: LinkedList<KvmMmuPage>, // 所有分配的mmu page都挂到active_mmu_pages上
// zapped_obsolete_pages: LinkedList<KvmMmuPage>, // 释放的mmu page都挂到zapped_obsolete_pages上,一个全局的invalid_list
}
impl X86_64KVMArch {
/// @brief 查看CPU是否支持虚拟化
pub fn kvm_arch_cpu_supports_vm() -> Result<(), SystemError> {
let cpuid = CpuId::new();
// Check to see if CPU is Intel (“GenuineIntel”).
if let Some(vi) = cpuid.get_vendor_info() {
if vi.as_str() != "GenuineIntel" {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
}
// Check processor supports for Virtual Machine Extension (VMX) technology
// CPUID.1:ECX.VMX[bit 5] = 1 (Intel Manual: 24.6 Discovering Support for VMX)
if let Some(fi) = cpuid.get_feature_info() {
if !fi.has_vmx() {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
}
Ok(())
}
/// @brief 初始化KVM
pub fn kvm_arch_init() -> Result<(), SystemError> {
Ok(())
}
pub fn kvm_arch_dev_ioctl(cmd: u32, _arg: usize) -> Result<usize, SystemError> {
match cmd {
_ => {
kerror!("unknown kvm ioctl cmd: {}", cmd);
return Err(SystemError::EINVAL);
}
}
}
pub fn kvm_arch_vcpu_create(id: u32) -> Result<Arc<Mutex<VmxVcpu>>, SystemError> {
// let guest_rip = current_kvm.lock().memslots[0].memslots[0].userspace_addr;
let vcpu = VmxVcpu::new(id, vm(0).unwrap()).unwrap();
return Ok(Arc::new(Mutex::new(vcpu)));
}
pub fn kvm_arch_vcpu_setup(vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
kvm_vcpu_mtrr_init(vcpu)?;
kvm_mmu_setup(vcpu);
Ok(())
}
pub fn kvm_arch_vcpu_ioctl_run(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
match vmx_vmlaunch() {
Ok(_) => {}
Err(e) => {
let vmx_err = vmx_vmread(VmcsFields::VMEXIT_INSTR_ERR as u32).unwrap();
kdebug!("vmlaunch failed: {:?}", vmx_err);
return Err(e);
}
}
Ok(())
}
// pub fn kvm_arch_create_memslot(_slot: &mut KvmMemorySlot, _npages: u64) {
// }
// pub fn kvm_arch_commit_memory_region(
// _mem: &KvmUserspaceMemoryRegion,
// _new_slot: &KvmMemorySlot,
// _old_slot: &KvmMemorySlot,
// _change: KvmMemoryChange) {
// // let kvm = KVM();
// // let mut num_mmu_pages = 0;
// // if kvm.lock().arch.n_requested_mmu_pages == 0{
// // num_mmu_pages = kvm_mmu_calculate_mmu_pages();
// // }
// // if num_mmu_pages != 0 {
// // // kvm_mmu_change_mmu_pages(num_mmu_pages);
// // }
// }
}
#[no_mangle]
pub extern "C" fn guest_code() {
kdebug!("guest_code");
loop {
unsafe {
asm!("mov rax, 0", "mov rcx, 0", "cpuid");
}
unsafe { asm!("nop") };
kdebug!("guest_code");
}
}

View File

@ -0,0 +1,112 @@
use crate::arch::mm::PageMapper;
use crate::arch::MMArch;
use crate::mm::page::PageFlags;
use crate::mm::{PageTableKind, PhysAddr, VirtAddr};
use crate::smp::core::smp_get_processor_id;
use crate::{arch::mm::LockedFrameAllocator, syscall::SystemError};
use core::sync::atomic::{compiler_fence, AtomicUsize, Ordering};
use x86::msr;
/// Check if MTRR is supported
pub fn check_ept_features() -> Result<(), SystemError> {
const MTRR_ENABLE_BIT: u64 = 1 << 11;
let ia32_mtrr_def_type = unsafe { msr::rdmsr(msr::IA32_MTRR_DEF_TYPE) };
if (ia32_mtrr_def_type & MTRR_ENABLE_BIT) == 0 {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
Ok(())
}
// pub fn ept_build_mtrr_map() -> Result<(), SystemError> {
// let ia32_mtrr_cap = unsafe { msr::rdmsr(msr::IA32_MTRRCAP) };
// Ok(())
// }
/// 标志当前没有处理器持有内核映射器的锁
/// 之所以需要这个标志是因为AtomicUsize::new(0)会把0当作一个处理器的id
const EPT_MAPPER_NO_PROCESSOR: usize = !0;
/// 当前持有内核映射器锁的处理器
static EPT_MAPPER_LOCK_OWNER: AtomicUsize = AtomicUsize::new(EPT_MAPPER_NO_PROCESSOR);
/// 内核映射器的锁计数器
static EPT_MAPPER_LOCK_COUNT: AtomicUsize = AtomicUsize::new(0);
pub struct EptMapper {
/// EPT页表映射器
mapper: PageMapper,
/// 标记当前映射器是否为只读
readonly: bool,
// EPT页表根地址
// root_hpa: PhysAddr,
}
impl EptMapper {
fn lock_cpu(cpuid: usize, mapper: PageMapper) -> Self {
loop {
match EPT_MAPPER_LOCK_OWNER.compare_exchange_weak(
EPT_MAPPER_NO_PROCESSOR,
cpuid,
Ordering::Acquire,
Ordering::Relaxed,
) {
Ok(_) => break,
// 当前处理器已经持有了锁
Err(id) if id == cpuid => break,
// either CAS failed, or some other hardware thread holds the lock
Err(_) => core::hint::spin_loop(),
}
}
let prev_count = EPT_MAPPER_LOCK_COUNT.fetch_add(1, Ordering::Relaxed);
compiler_fence(Ordering::Acquire);
// 本地核心已经持有过锁,因此标记当前加锁获得的映射器为只读
let readonly = prev_count > 0;
return Self { mapper, readonly };
}
/// @brief 锁定内核映射器, 并返回一个内核映射器对象
#[inline(always)]
pub fn lock() -> Self {
let cpuid = smp_get_processor_id() as usize;
let mapper = unsafe { PageMapper::current(PageTableKind::EPT, LockedFrameAllocator) };
return Self::lock_cpu(cpuid, mapper);
}
/// 映射guest physical addr(gpa)到指定的host physical addr(hpa)。
///
/// ## 参数
///
/// - `gpa`: 要映射的guest physical addr
/// - `hpa`: 要映射的host physical addr
/// - `flags`: 页面标志
///
/// ## 返回
///
/// - 成功返回Ok(())
/// - 失败: 如果当前映射器为只读则返回EAGAIN_OR_EWOULDBLOCK
pub unsafe fn walk(
&mut self,
gpa: u64,
hpa: u64,
flags: PageFlags<MMArch>,
) -> Result<(), SystemError> {
if self.readonly {
return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
}
self.mapper
.map_phys(
VirtAddr::new(gpa as usize),
PhysAddr::new(hpa as usize),
flags,
)
.unwrap()
.flush();
return Ok(());
}
// fn get_ept_index(addr: u64, level: usize) -> u64 {
// let pt64_level_shift = PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS;
// (addr >> pt64_level_shift) & ((1 << PT64_LEVEL_BITS) - 1)
// }
}

View File

@ -0,0 +1,7 @@
// pub struct X86Exception {
// vector: u8,
// error_code_valid: bool,
// error_code: u16,
// // bool nested_page_fault;
// address: u64, /* cr2 or nested page fault gpa */
// }

View File

@ -0,0 +1,254 @@
use crate::{
arch::kvm::vmx::ept::EptMapper,
kdebug,
libs::mutex::Mutex,
mm::{page::PageFlags, syscall::ProtFlags},
syscall::SystemError,
virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT},
};
use bitfield_struct::bitfield;
use super::{
ept::check_ept_features,
vcpu::VmxVcpu,
vmcs::VmcsFields,
vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite},
};
use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR;
// pub const PT64_ROOT_LEVEL: u32 = 4;
// pub const PT32_ROOT_LEVEL: u32 = 2;
// pub const PT32E_ROOT_LEVEL: u32 = 3;
// pub struct KvmMmuPage{
// gfn: u64, // 管理地址范围的起始地址对应的 gfn
// role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等
// // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page
// }
#[bitfield(u32)]
pub struct KvmMmuPageRole {
#[bits(4)]
level: usize, // 页所处的层级
cr4_pae: bool, // cr4.pae1 表示使用 64bit gpte
#[bits(2)]
quadrant: usize, // 如果 cr4.pae=0则 gpte 为 32bit但 spte 为 64bit因此需要用多个 spte 来表示一个 gpte该字段指示是 gpte 的第几块
direct: bool,
#[bits(3)]
access: usize, // 访问权限
invalid: bool, // 失效,一旦 unpin 就会被销毁
nxe: bool, // efer.nxe不可执行
cr0_wp: bool, // cr0.wp, 写保护
smep_andnot_wp: bool, // smep && !cr0.wpSMEP启用用户模式代码将无法执行位于内核地址空间中的指令。
smap_andnot_wp: bool, // smap && !cr0.wp
#[bits(8)]
unused: usize,
#[bits(8)]
smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM
}
// We don't want allocation failures within the mmu code, so we preallocate
// enough memory for a single page fault in a cache.
// pub struct KvmMmuMemoryCache {
// num_objs: u32,
// objs: [*mut u8; KVM_NR_MEM_OBJS as usize],
// }
#[derive(Default)]
pub struct KvmMmu {
pub root_hpa: u64,
pub root_level: u32,
pub base_role: KvmMmuPageRole,
// ...还有一些变量不知道用来做什么
pub get_cr3: Option<fn(&VmxVcpu) -> u64>,
pub set_eptp: Option<fn(u64) -> Result<(), SystemError>>,
pub page_fault: Option<
fn(
vcpu: &mut VmxVcpu,
gpa: u64,
error_code: u32,
prefault: bool,
) -> Result<(), SystemError>,
>,
// get_pdptr: Option<fn(& VmxVcpu, index:u32) -> u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么
// inject_page_fault: Option<fn(&mut VmxVcpu, fault: &X86Exception)>,
// gva_to_gpa: Option<fn(&mut VmxVcpu, gva: u64, access: u32, exception: &X86Exception) -> u64>,
// translate_gpa: Option<fn(&mut VmxVcpu, gpa: u64, access: u32, exception: &X86Exception) -> u64>,
// sync_page: Option<fn(&mut VmxVcpu, &mut KvmMmuPage)>,
// invlpg: Option<fn(&mut VmxVcpu, gva: u64)>, // invalid entry
// update_pte: Option<fn(&mut VmxVcpu, sp: &KvmMmuPage, spte: u64, pte: u64)>,
}
impl core::fmt::Debug for KvmMmu {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("KvmMmu")
.field("root_hpa", &self.root_hpa)
.field("root_level", &self.root_level)
.field("base_role", &self.base_role)
.finish()
}
}
fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 {
let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp");
return guest_cr3;
}
fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> {
// 设置权限位,目前是写死的,可读可写可执行
// EPT paging-structure memory type: Uncacheable
let mut eptp = 0x0 as u64;
// This value is 1 less than the EPT page-walk length. 3 means 4-level paging.
eptp |= 0x3 << 3;
eptp |= root_hpa & (PAGE_MASK as u64);
vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?;
Ok(())
}
fn tdp_page_fault(
vcpu: &mut VmxVcpu,
gpa: u64,
error_code: u32,
prefault: bool,
) -> Result<(), SystemError> {
kdebug!("tdp_page_fault");
let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言)
// 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间
mmu_topup_memory_caches(vcpu)?;
// TODO获取gfn使用的level处理hugepage的问题
let level = 1; // 4KB page
// TODO: 快速处理由读写操作引起violation即present同时有写权限的非mmio page fault
// fast_page_fault(vcpu, gpa, level, error_code)
// gfn->pfn
let mut map_writable = false;
let write = error_code & ((1 as u32) << 1);
let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?;
// direct map就是映射ept页表的过程
__direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?;
Ok(())
}
/*
* Caculate mmu pages needed for kvm.
*/
// pub fn kvm_mmu_calculate_mmu_pages() -> u32 {
// let mut nr_mmu_pages:u32;
// let mut nr_pages = 0;
// let kvm = vm(0).unwrap();
// for as_id in 0..KVM_ADDRESS_SPACE_NUM {
// let slots = kvm.memslots[as_id];
// for i in 0..KVM_MEM_SLOTS_NUM {
// let memslot = slots.memslots[i as usize];
// nr_pages += memslot.npages;
// }
// }
// nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000;
// nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES);
// return nr_mmu_pages;
// }
// pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){
// let kvm = KVM();
// // 释放多余的mmu page
// if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
// while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
// if !prepare_zap_oldest_mmu_page() {
// break;
// }
// }
// // kvm_mmu_commit_zap_page();
// goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages;
// }
// kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages;
// }
// pub fn prepare_zap_oldest_mmu_page() -> bool {
// return false;
// }
pub fn kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>) {
// TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu)
init_kvm_tdp_mmu(vcpu);
}
pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
check_ept_features()?;
Ok(())
}
pub fn init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>) {
let context = &mut vcpu.lock().mmu;
context.page_fault = Some(tdp_page_fault);
context.get_cr3 = Some(tdp_get_cr3);
context.set_eptp = Some(tdp_set_eptp);
// context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault
// context.invlpg = nonpaging_invlpg;
// context.sync_page = nonpaging_sync_page;
// context.update_pte = nonpaging_update_pte;
// TODO: gva to gpa in kvm
// if !is_paging(vcpu) { // vcpu不分页
// context.gva_to_gpa = nonpaging_gva_to_gpa;
// context.root_level = 0;
// } else if (is_long_mode(vcpu)) {
// context.gva_to_gpa = paging64_gva_to_gpa;
// context.root_level = PT64_ROOT_LEVEL;
// TODO:: different paging strategy
// } else if (is_pae(vcpu)) {
// context.gva_to_gpa = paging64_gva_to_gpa;
// context.root_level = PT32E_ROOT_LEVEL;
// } else {
// context.gva_to_gpa = paging32_gva_to_gpa;
// context.root_level = PT32_ROOT_LEVEL;
// }
}
pub fn __direct_map(
vcpu: &mut VmxVcpu,
gpa: u64,
_write: u32,
_map_writable: bool,
_level: i32,
_gfn: u64,
pfn: u64,
_prefault: bool,
) -> Result<u32, SystemError> {
kdebug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa);
// 判断vcpu.mmu.root_hpa是否有效
if vcpu.mmu.root_hpa == 0 {
return Err(SystemError::KVM_HVA_ERR_BAD);
}
// 把gpa映射到hpa
let mut ept_mapper = EptMapper::lock();
let page_flags = PageFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7 as u64), false);
unsafe {
assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok());
}
drop(ept_mapper);
return Ok(0);
}
pub fn mmu_gfn_to_pfn_fast(
vcpu: &mut VmxVcpu,
_gpa: u64,
_prefault: bool,
gfn: u64,
write: bool,
writable: &mut bool,
) -> Result<u64, SystemError> {
let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?;
Ok(pfn)
}
// TODO: 添加cache
pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> {
// 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配
// pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建
// mmu_topup_memory_cache(vcpu.mmu_page_header_cache,
// mmu_page_header_cache, 4);
Ok(())
}

View File

@ -0,0 +1,45 @@
pub mod ept;
pub mod kvm_emulation;
pub mod mmu;
pub mod seg;
pub mod vcpu;
pub mod vmcs;
pub mod vmexit;
pub mod vmx_asm_wrapper;
#[allow(dead_code)]
pub enum VcpuRegIndex {
Rax = 0,
Rbx = 1,
Rcx = 2,
Rdx = 3,
Rsi = 4,
Rdi = 5,
Rsp = 6,
Rbp = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
}
bitflags! {
#[allow(non_camel_case_types)]
pub struct X86_CR0: u32{
const CR0_PE = 1 << 0; /* Protection Enable */
const CR0_MP = 1 << 1; /* Monitor Coprocessor */
const CR0_EM = 1 << 2; /* Emulation */
const CR0_TS = 1 << 3; /* Task Switched */
const CR0_ET = 1 << 4; /* Extension Type */
const CR0_NE = 1 << 5; /* Numeric Error */
const CR0_WP = 1 << 16; /* Write Protect */
const CR0_AM = 1 << 18; /* Alignment Mask */
const CR0_NW = 1 << 29; /* Not Write-through */
const CR0_CD = 1 << 30; /* Cache Disable */
const CR0_PG = 1 << 31; /* Paging */
}
}

View File

@ -0,0 +1,89 @@
use crate::arch::kvm::VmcsFields::{
GUEST_CS_ACCESS_RIGHTS, GUEST_CS_BASE, GUEST_CS_LIMIT, GUEST_CS_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_DS_ACCESS_RIGHTS, GUEST_DS_BASE, GUEST_DS_LIMIT, GUEST_DS_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_ES_ACCESS_RIGHTS, GUEST_ES_BASE, GUEST_ES_LIMIT, GUEST_ES_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_FS_ACCESS_RIGHTS, GUEST_FS_BASE, GUEST_FS_LIMIT, GUEST_FS_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_GS_ACCESS_RIGHTS, GUEST_GS_BASE, GUEST_GS_LIMIT, GUEST_GS_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_LDTR_ACCESS_RIGHTS, GUEST_LDTR_BASE, GUEST_LDTR_LIMIT, GUEST_LDTR_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_SS_ACCESS_RIGHTS, GUEST_SS_BASE, GUEST_SS_LIMIT, GUEST_SS_SELECTOR,
};
use crate::arch::kvm::VmcsFields::{
GUEST_TR_ACCESS_RIGHTS, GUEST_TR_BASE, GUEST_TR_LIMIT, GUEST_TR_SELECTOR,
};
use crate::syscall::SystemError;
use super::vmx_asm_wrapper::vmx_vmwrite;
// pub const TSS_IOPB_BASE_OFFSET: usize = 0x66;
// pub const TSS_BASE_SIZE: usize = 0x68;
// pub const TSS_IOPB_SIZE: usize = 65536 / 8;
// pub const TSS_REDIRECTION_SIZE: usize = 256 / 8;
// pub const RMODE_TSS_SIZE: usize = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1;
#[derive(Debug)]
pub struct KvmVmxSegmentField {
selector: u32,
base: u32,
limit: u32,
access_rights: u32,
}
macro_rules! VMX_SEGMENT_FIELD {
($struct_name: ident) => {
KvmVmxSegmentField {
selector: concat_idents!(GUEST_, $struct_name, _SELECTOR) as u32,
base: concat_idents!(GUEST_, $struct_name, _BASE) as u32,
limit: concat_idents!(GUEST_, $struct_name, _LIMIT) as u32,
access_rights: concat_idents!(GUEST_, $struct_name, _ACCESS_RIGHTS) as u32,
}
};
}
#[derive(FromPrimitive)]
pub enum Sreg {
ES = 0,
CS = 1,
SS = 2,
DS = 3,
FS = 4,
GS = 5,
TR = 6,
LDTR = 7,
}
static KVM_VMX_SEGMENT_FIELDS: [KvmVmxSegmentField; 8] = [
VMX_SEGMENT_FIELD!(ES),
VMX_SEGMENT_FIELD!(CS),
VMX_SEGMENT_FIELD!(SS),
VMX_SEGMENT_FIELD!(DS),
VMX_SEGMENT_FIELD!(FS),
VMX_SEGMENT_FIELD!(GS),
VMX_SEGMENT_FIELD!(TR),
VMX_SEGMENT_FIELD!(LDTR),
];
pub fn seg_setup(seg: usize) -> Result<(), SystemError> {
let seg_field = &KVM_VMX_SEGMENT_FIELDS[seg];
let mut access_rigt = 0x0093;
if seg == Sreg::CS as usize {
access_rigt |= 0x08;
}
// setup segment fields
vmx_vmwrite(seg_field.selector, 0)?;
vmx_vmwrite(seg_field.base, 0)?;
vmx_vmwrite(seg_field.limit, 0x0000_FFFF)?;
vmx_vmwrite(seg_field.access_rights, access_rigt)?;
Ok(())
}

View File

@ -0,0 +1,653 @@
use super::vmcs::{
VMCSRegion, VmcsFields, VmxEntryCtrl, VmxPrimaryExitCtrl, VmxPrimaryProcessBasedExecuteCtrl,
VmxSecondaryProcessBasedExecuteCtrl,
};
use super::vmx_asm_wrapper::{vmx_vmclear, vmx_vmptrld, vmx_vmread, vmx_vmwrite, vmxoff, vmxon};
use crate::arch::kvm::vmx::mmu::KvmMmu;
use crate::arch::kvm::vmx::seg::{seg_setup, Sreg};
use crate::arch::kvm::vmx::{VcpuRegIndex, X86_CR0};
use crate::arch::mm::{LockedFrameAllocator, PageMapper};
use crate::arch::x86_64::mm::X86_64MMArch;
use crate::arch::MMArch;
use crate::kdebug;
use crate::mm::{phys_2_virt, VirtAddr};
use crate::mm::{MemoryManagementArch, PageTableKind};
use crate::syscall::SystemError;
use crate::virt::kvm::vcpu::Vcpu;
use crate::virt::kvm::vm::Vm;
use alloc::alloc::Global;
use alloc::boxed::Box;
use core::slice;
use raw_cpuid::CpuId;
use x86;
use x86::{controlregs, msr, segmentation};
// use crate::arch::kvm::vmx::seg::RMODE_TSS_SIZE;
// use crate::virt::kvm::{KVM};
// KERNEL_ALLOCATOR
pub const PAGE_SIZE: usize = 0x1000;
pub const NR_VCPU_REGS: usize = 16;
#[repr(C, align(4096))]
#[derive(Debug)]
pub struct VmxonRegion {
pub revision_id: u32,
pub data: [u8; PAGE_SIZE - 4],
}
#[repr(C, align(4096))]
#[derive(Debug)]
pub struct MSRBitmap {
pub data: [u8; PAGE_SIZE],
}
#[derive(Debug)]
pub struct VcpuData {
/// The virtual and physical address of the Vmxon naturally aligned 4-KByte region of memory
pub vmxon_region: Box<VmxonRegion>,
pub vmxon_region_physical_address: u64, // vmxon需要该地址
/// The virtual and physical address of the Vmcs naturally aligned 4-KByte region of memory
/// holds the complete CPU state of both the host and the guest.
/// includes the segment registers, GDT, IDT, TR, various MSRs
/// and control field structures for handling exit and entry operations
pub vmcs_region: Box<VMCSRegion>,
pub vmcs_region_physical_address: u64, // vmptrld, vmclear需要该地址
pub msr_bitmap: Box<MSRBitmap>,
pub msr_bitmap_physical_address: u64,
}
#[derive(Default, Debug)]
#[repr(C)]
pub struct VcpuContextFrame {
pub regs: [usize; NR_VCPU_REGS], // 通用寄存器
pub rip: usize,
pub rflags: usize,
}
#[derive(Debug)]
#[allow(dead_code)]
pub enum VcpuState {
VcpuInv = 0,
VcpuPend = 1,
VcpuAct = 2,
}
#[derive(Debug)]
pub struct VmxVcpu {
pub vcpu_id: u32,
pub vcpu_ctx: VcpuContextFrame, // 保存vcpu切换时的上下文如通用寄存器等
pub vcpu_state: VcpuState, // vcpu当前运行状态
pub mmu: KvmMmu, // vcpu的内存管理单元
pub data: VcpuData, // vcpu的数据
pub parent_vm: Vm, // parent KVM
}
impl VcpuData {
pub fn alloc() -> Result<Self, SystemError> {
let vmxon_region: Box<VmxonRegion> = unsafe {
Box::try_new_zeroed_in(Global)
.expect("Try new zeroed fail!")
.assume_init()
};
let vmcs_region: Box<VMCSRegion> = unsafe {
Box::try_new_zeroed_in(Global)
.expect("Try new zeroed fail!")
.assume_init()
};
let msr_bitmap: Box<MSRBitmap> = unsafe {
Box::try_new_zeroed_in(Global)
.expect("Try new zeroed fail!")
.assume_init()
};
// FIXME: virt_2_phys的转换正确性存疑
let vmxon_region_physical_address = {
let vaddr = VirtAddr::new(vmxon_region.as_ref() as *const _ as _);
unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 }
};
let vmcs_region_physical_address = {
let vaddr = VirtAddr::new(vmcs_region.as_ref() as *const _ as _);
unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 }
};
let msr_bitmap_physical_address = {
let vaddr = VirtAddr::new(msr_bitmap.as_ref() as *const _ as _);
unsafe { MMArch::virt_2_phys(vaddr).unwrap().data() as u64 }
};
let mut instance = Self {
// Allocate a naturally aligned 4-KByte VMXON region of memory to enable VMX operation (Intel Manual: 25.11.5 VMXON Region)
vmxon_region,
vmxon_region_physical_address,
// Allocate a naturally aligned 4-KByte VMCS region of memory
vmcs_region,
vmcs_region_physical_address,
msr_bitmap,
msr_bitmap_physical_address,
};
// printk_color!(GREEN, BLACK, "[+] init_region\n");
instance.init_region()?;
Ok(instance)
}
pub fn init_region(&mut self) -> Result<(), SystemError> {
// Get the Virtual Machine Control Structure revision identifier (VMCS revision ID)
// (Intel Manual: 25.11.5 VMXON Region)
let revision_id = unsafe { (msr::rdmsr(msr::IA32_VMX_BASIC) as u32) & 0x7FFF_FFFF };
kdebug!("[+] VMXON Region Virtual Address: {:p}", self.vmxon_region);
kdebug!(
"[+] VMXON Region Physical Addresss: 0x{:x}",
self.vmxon_region_physical_address
);
kdebug!("[+] VMCS Region Virtual Address: {:p}", self.vmcs_region);
kdebug!(
"[+] VMCS Region Physical Address1: 0x{:x}",
self.vmcs_region_physical_address
);
self.vmxon_region.revision_id = revision_id;
self.vmcs_region.revision_id = revision_id;
return Ok(());
}
}
impl VmxVcpu {
pub fn new(vcpu_id: u32, parent_vm: Vm) -> Result<Self, SystemError> {
kdebug!("Creating processor {}", vcpu_id);
let instance = Self {
vcpu_id,
vcpu_ctx: VcpuContextFrame {
regs: [0; NR_VCPU_REGS],
rip: 0,
rflags: 0,
},
vcpu_state: VcpuState::VcpuInv,
mmu: KvmMmu::default(),
data: VcpuData::alloc()?,
parent_vm,
};
Ok(instance)
}
pub fn vmx_set_cr0(cr0: X86_CR0) -> Result<(), SystemError> {
let mut hw_cr0 = cr0 & !(X86_CR0::CR0_NW | X86_CR0::CR0_CD);
hw_cr0 |= X86_CR0::CR0_WP | X86_CR0::CR0_NE;
vmx_vmwrite(VmcsFields::GUEST_CR0 as u32, cr0.bits() as u64)?;
Ok(())
}
pub fn vmcs_init_guest(&self) -> Result<(), SystemError> {
// https://www.sandpile.org/x86/initial.htm
// segment field initialization
seg_setup(Sreg::CS as usize)?;
vmx_vmwrite(VmcsFields::GUEST_CS_SELECTOR as u32, 0xf000)?;
vmx_vmwrite(VmcsFields::GUEST_CS_BASE as u32, 0xffff0000)?;
seg_setup(Sreg::DS as usize)?;
seg_setup(Sreg::ES as usize)?;
seg_setup(Sreg::FS as usize)?;
seg_setup(Sreg::GS as usize)?;
seg_setup(Sreg::SS as usize)?;
vmx_vmwrite(VmcsFields::GUEST_TR_SELECTOR as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_TR_BASE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_TR_LIMIT as u32, 0xffff)?;
vmx_vmwrite(VmcsFields::GUEST_TR_ACCESS_RIGHTS as u32, 0x008b)?;
vmx_vmwrite(VmcsFields::GUEST_LDTR_SELECTOR as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_LDTR_BASE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_LDTR_LIMIT as u32, 0xffff)?;
vmx_vmwrite(VmcsFields::GUEST_LDTR_ACCESS_RIGHTS as u32, 0x00082)?;
vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, 2)?;
vmx_vmwrite(VmcsFields::GUEST_GDTR_BASE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_GDTR_LIMIT as u32, 0x0000_FFFF as u64)?;
vmx_vmwrite(VmcsFields::GUEST_IDTR_BASE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_IDTR_LIMIT as u32, 0x0000_FFFF as u64)?;
vmx_vmwrite(VmcsFields::GUEST_ACTIVITY_STATE as u32, 0)?; // State = Active
vmx_vmwrite(VmcsFields::GUEST_INTERRUPTIBILITY_STATE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_PENDING_DBG_EXCEPTIONS as u32, 0)?;
vmx_vmwrite(VmcsFields::CTRL_VM_ENTRY_INTR_INFO_FIELD as u32, 0)?;
let cr0 = X86_CR0::CR0_NW | X86_CR0::CR0_CD | X86_CR0::CR0_ET;
Self::vmx_set_cr0(cr0)?;
vmx_vmwrite(VmcsFields::GUEST_CR0 as u32, cr0.bits() as u64)?;
vmx_vmwrite(
VmcsFields::GUEST_SYSENTER_CS as u32,
vmx_vmread(VmcsFields::HOST_SYSENTER_CS as u32).unwrap(),
)?;
vmx_vmwrite(VmcsFields::GUEST_VMX_PREEMPT_TIMER_VALUE as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_INTR_STATUS as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_PML_INDEX as u32, 0)?;
vmx_vmwrite(VmcsFields::GUEST_VMCS_LINK_PTR as u32, u64::MAX)?;
vmx_vmwrite(VmcsFields::GUEST_DEBUGCTL as u32, unsafe {
msr::rdmsr(msr::IA32_DEBUGCTL)
})?;
vmx_vmwrite(
VmcsFields::GUEST_SYSENTER_ESP as u32,
vmx_vmread(VmcsFields::HOST_SYSENTER_ESP as u32).unwrap(),
)?;
vmx_vmwrite(
VmcsFields::GUEST_SYSENTER_EIP as u32,
vmx_vmread(VmcsFields::HOST_SYSENTER_EIP as u32).unwrap(),
)?;
// Self::vmx_set_cr0();
vmx_vmwrite(VmcsFields::GUEST_CR3 as u32, 0)?;
vmx_vmwrite(
VmcsFields::GUEST_CR4 as u32,
1, // enable vme
)?;
vmx_vmwrite(VmcsFields::GUEST_DR7 as u32, 0x0000_0000_0000_0400)?;
vmx_vmwrite(
VmcsFields::GUEST_RSP as u32,
self.vcpu_ctx.regs[VcpuRegIndex::Rsp as usize] as u64,
)?;
vmx_vmwrite(VmcsFields::GUEST_RIP as u32, self.vcpu_ctx.rip as u64)?;
kdebug!("vmcs init guest rip: {:#x}", self.vcpu_ctx.rip as u64);
kdebug!(
"vmcs init guest rsp: {:#x}",
self.vcpu_ctx.regs[VcpuRegIndex::Rsp as usize] as u64
);
// vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, x86::bits64::rflags::read().bits())?;
Ok(())
}
#[allow(deprecated)]
pub fn vmcs_init_host(&self) -> Result<(), SystemError> {
vmx_vmwrite(VmcsFields::HOST_CR0 as u32, unsafe {
controlregs::cr0().bits().try_into().unwrap()
})?;
vmx_vmwrite(VmcsFields::HOST_CR3 as u32, unsafe { controlregs::cr3() })?;
vmx_vmwrite(VmcsFields::HOST_CR4 as u32, unsafe {
controlregs::cr4().bits().try_into().unwrap()
})?;
vmx_vmwrite(
VmcsFields::HOST_ES_SELECTOR as u32,
(segmentation::es().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(
VmcsFields::HOST_CS_SELECTOR as u32,
(segmentation::cs().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(
VmcsFields::HOST_SS_SELECTOR as u32,
(segmentation::ss().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(
VmcsFields::HOST_DS_SELECTOR as u32,
(segmentation::ds().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(
VmcsFields::HOST_FS_SELECTOR as u32,
(segmentation::fs().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(
VmcsFields::HOST_GS_SELECTOR as u32,
(segmentation::gs().bits() & (!0x07)).into(),
)?;
vmx_vmwrite(VmcsFields::HOST_TR_SELECTOR as u32, unsafe {
(x86::task::tr().bits() & (!0x07)).into()
})?;
vmx_vmwrite(VmcsFields::HOST_FS_BASE as u32, unsafe {
msr::rdmsr(msr::IA32_FS_BASE)
})?;
vmx_vmwrite(VmcsFields::HOST_GS_BASE as u32, unsafe {
msr::rdmsr(msr::IA32_GS_BASE)
})?;
let mut pseudo_descriptpr: x86::dtables::DescriptorTablePointer<u64> = Default::default();
unsafe {
x86::dtables::sgdt(&mut pseudo_descriptpr);
};
vmx_vmwrite(
VmcsFields::HOST_TR_BASE as u32,
get_segment_base(pseudo_descriptpr.base, pseudo_descriptpr.limit, unsafe {
x86::task::tr().bits().into()
}),
)?;
vmx_vmwrite(
VmcsFields::HOST_GDTR_BASE as u32,
pseudo_descriptpr.base.to_bits() as u64,
)?;
vmx_vmwrite(VmcsFields::HOST_IDTR_BASE as u32, unsafe {
let mut pseudo_descriptpr: x86::dtables::DescriptorTablePointer<u64> =
Default::default();
x86::dtables::sidt(&mut pseudo_descriptpr);
pseudo_descriptpr.base.to_bits() as u64
})?;
// fast entry into the kernel
vmx_vmwrite(VmcsFields::HOST_SYSENTER_ESP as u32, unsafe {
msr::rdmsr(msr::IA32_SYSENTER_ESP)
})?;
vmx_vmwrite(VmcsFields::HOST_SYSENTER_EIP as u32, unsafe {
msr::rdmsr(msr::IA32_SYSENTER_EIP)
})?;
vmx_vmwrite(VmcsFields::HOST_SYSENTER_CS as u32, unsafe {
msr::rdmsr(msr::IA32_SYSENTER_CS)
})?;
// vmx_vmwrite(VmcsFields::HOST_RIP as u32, vmx_return as *const () as u64)?;
// kdebug!("vmcs init host rip: {:#x}", vmx_return as *const () as u64);
Ok(())
}
// Intel SDM Volume 3C Chapter 25.3 “Organization of VMCS Data”
pub fn vmcs_init(&self) -> Result<(), SystemError> {
vmx_vmwrite(VmcsFields::CTRL_PAGE_FAULT_ERR_CODE_MASK as u32, 0)?;
vmx_vmwrite(VmcsFields::CTRL_PAGE_FAULT_ERR_CODE_MATCH as u32, 0)?;
vmx_vmwrite(VmcsFields::CTRL_CR3_TARGET_COUNT as u32, 0)?;
vmx_vmwrite(
VmcsFields::CTRL_PIN_BASED_VM_EXEC_CTRLS as u32,
adjust_vmx_pinbased_controls() as u64,
)?;
vmx_vmwrite(
VmcsFields::CTRL_MSR_BITMAP_ADDR as u32,
self.data.msr_bitmap_physical_address,
)?;
vmx_vmwrite(VmcsFields::CTRL_CR0_READ_SHADOW as u32, unsafe {
controlregs::cr0().bits().try_into().unwrap()
})?;
vmx_vmwrite(VmcsFields::CTRL_CR4_READ_SHADOW as u32, unsafe {
controlregs::cr4().bits().try_into().unwrap()
})?;
vmx_vmwrite(
VmcsFields::CTRL_VM_ENTRY_CTRLS as u32,
adjust_vmx_entry_controls() as u64,
)?;
vmx_vmwrite(
VmcsFields::CTRL_PRIMARY_VM_EXIT_CTRLS as u32,
adjust_vmx_exit_controls() as u64,
)?;
vmx_vmwrite(
VmcsFields::CTRL_PRIMARY_PROCESSOR_VM_EXEC_CTRLS as u32,
adjust_vmx_primary_process_exec_controls() as u64,
)?;
vmx_vmwrite(
VmcsFields::CTRL_SECONDARY_PROCESSOR_VM_EXEC_CTRLS as u32,
adjust_vmx_secondary_process_exec_controls() as u64,
)?;
self.vmcs_init_host()?;
self.vmcs_init_guest()?;
Ok(())
}
fn kvm_mmu_load(&mut self) -> Result<(), SystemError> {
kdebug!("kvm_mmu_load!");
// 申请并创建新的页表
let mapper: crate::mm::page::PageMapper<X86_64MMArch, LockedFrameAllocator> = unsafe {
PageMapper::create(PageTableKind::EPT, LockedFrameAllocator)
.ok_or(SystemError::ENOMEM)?
};
let ept_root_hpa = mapper.table().phys();
let set_eptp_fn = self.mmu.set_eptp.unwrap();
set_eptp_fn(ept_root_hpa.data() as u64)?;
self.mmu.root_hpa = ept_root_hpa.data() as u64;
kdebug!("ept_root_hpa:{:x}!", ept_root_hpa.data() as u64);
return Ok(());
}
pub fn set_regs(&mut self, regs: VcpuContextFrame) -> Result<(), SystemError> {
self.vcpu_ctx = regs;
Ok(())
}
}
impl Vcpu for VmxVcpu {
/// Virtualize the CPU
fn virtualize_cpu(&mut self) -> Result<(), SystemError> {
match has_intel_vmx_support() {
Ok(_) => {
kdebug!("[+] CPU supports Intel VMX");
}
Err(e) => {
kdebug!("[-] CPU does not support Intel VMX: {:?}", e);
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
};
match enable_vmx_operation() {
Ok(_) => {
kdebug!("[+] Enabling Virtual Machine Extensions (VMX)");
}
Err(_) => {
kdebug!("[-] VMX operation is not supported on this processor.");
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
}
vmxon(self.data.vmxon_region_physical_address)?;
kdebug!("[+] VMXON successful!");
vmx_vmclear(self.data.vmcs_region_physical_address)?;
vmx_vmptrld(self.data.vmcs_region_physical_address)?;
kdebug!("[+] VMPTRLD successful!");
self.vmcs_init().expect("vncs_init fail");
kdebug!("[+] VMCS init!");
// kdebug!("vmcs init host rip: {:#x}", vmx_return as *const () as u64);
// kdebug!("vmcs init host rsp: {:#x}", x86::bits64::registers::rsp());
// vmx_vmwrite(VmcsFields::HOST_RSP as u32, x86::bits64::registers::rsp())?;
// vmx_vmwrite(VmcsFields::HOST_RIP as u32, vmx_return as *const () as u64)?;
// vmx_vmwrite(VmcsFields::HOST_RSP as u32, x86::bits64::registers::rsp())?;
self.kvm_mmu_load()?;
Ok(())
}
fn devirtualize_cpu(&self) -> Result<(), SystemError> {
vmxoff()?;
Ok(())
}
/// Gets the index of the current logical/virtual processor
fn id(&self) -> u32 {
self.vcpu_id
}
}
pub fn get_segment_base(gdt_base: *const u64, gdt_size: u16, segment_selector: u16) -> u64 {
let table = segment_selector & 0x0004; // get table indicator in selector
let index = (segment_selector >> 3) as usize; // get index in selector
if table == 0 && index == 0 {
return 0;
}
let descriptor_table = unsafe { slice::from_raw_parts(gdt_base, gdt_size.into()) };
let descriptor = descriptor_table[index];
let base_high = (descriptor & 0xFF00_0000_0000_0000) >> 32;
let base_mid = (descriptor & 0x0000_00FF_0000_0000) >> 16;
let base_low = (descriptor & 0x0000_0000_FFFF_0000) >> 16;
let segment_base = (base_high | base_mid | base_low) & 0xFFFFFFFF;
let virtaddr = phys_2_virt(segment_base.try_into().unwrap())
.try_into()
.unwrap();
kdebug!(
"segment_base={:x}",
phys_2_virt(segment_base.try_into().unwrap())
);
return virtaddr;
}
// FIXME: may have bug
// pub fn read_segment_access_rights(segement_selector: u16) -> u32{
// let table = segement_selector & 0x0004; // get table indicator in selector
// let index = segement_selector & 0xFFF8; // get index in selector
// let mut flag: u16;
// if table==0 && index==0 {
// return 0;
// }
// unsafe{
// asm!(
// "lar {0:r}, rcx",
// "mov {1:r}, {0:r}",
// in(reg) segement_selector,
// out(reg) flag,
// );
// }
// return (flag >> 8) as u32;
// }
pub fn adjust_vmx_controls(ctl_min: u32, ctl_opt: u32, msr: u32, result: &mut u32) {
let vmx_msr_low: u32 = unsafe { (msr::rdmsr(msr) & 0x0000_0000_FFFF_FFFF) as u32 };
let vmx_msr_high: u32 = unsafe { (msr::rdmsr(msr) << 32) as u32 };
let mut ctl: u32 = ctl_min | ctl_opt;
ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
*result = ctl;
}
pub fn adjust_vmx_entry_controls() -> u32 {
let mut entry_controls: u32 = 0;
adjust_vmx_controls(
VmxEntryCtrl::LOAD_DBG_CTRLS.bits(),
VmxEntryCtrl::IA32E_MODE_GUEST.bits(),
msr::IA32_VMX_ENTRY_CTLS, //Capability Reporting Register of VM-entry Controls (R/O)
&mut entry_controls,
);
return entry_controls;
// msr::IA32_VMX_TRUE_ENTRY_CTLS//Capability Reporting Register of VM-entry Flex Controls (R/O) See Table 35-2
}
pub fn adjust_vmx_exit_controls() -> u32 {
let mut exit_controls: u32 = 0;
adjust_vmx_controls(
VmxPrimaryExitCtrl::SAVE_DBG_CTRLS.bits(),
VmxPrimaryExitCtrl::HOST_ADDR_SPACE_SIZE.bits(),
msr::IA32_VMX_EXIT_CTLS,
&mut exit_controls,
);
return exit_controls;
}
pub fn adjust_vmx_pinbased_controls() -> u32 {
let mut controls: u32 = 0000_0016;
adjust_vmx_controls(0, 0, msr::IA32_VMX_TRUE_PINBASED_CTLS, &mut controls);
// kdebug!("adjust_vmx_pinbased_controls: {:x}", controls);
return controls;
}
pub fn adjust_vmx_primary_process_exec_controls() -> u32 {
let mut controls: u32 = 0;
adjust_vmx_controls(
0,
VmxPrimaryProcessBasedExecuteCtrl::USE_MSR_BITMAPS.bits()
| VmxPrimaryProcessBasedExecuteCtrl::ACTIVATE_SECONDARY_CONTROLS.bits(),
msr::IA32_VMX_PROCBASED_CTLS,
&mut controls,
);
return controls;
}
pub fn adjust_vmx_secondary_process_exec_controls() -> u32 {
let mut controls: u32 = 0;
adjust_vmx_controls(
0,
VmxSecondaryProcessBasedExecuteCtrl::ENABLE_RDTSCP.bits()
| VmxSecondaryProcessBasedExecuteCtrl::ENABLE_XSAVES_XRSTORS.bits()
| VmxSecondaryProcessBasedExecuteCtrl::ENABLE_INVPCID.bits()
| VmxSecondaryProcessBasedExecuteCtrl::ENABLE_EPT.bits()
| VmxSecondaryProcessBasedExecuteCtrl::UNRESTRICTED_GUEST.bits(),
msr::IA32_VMX_PROCBASED_CTLS2,
&mut controls,
);
return controls;
}
/// Check to see if CPU is Intel (“GenuineIntel”).
/// Check processor supports for Virtual Machine Extension (VMX) technology
// CPUID.1:ECX.VMX[bit 5] = 1 (Intel Manual: 24.6 Discovering Support for VMX)
pub fn has_intel_vmx_support() -> Result<(), SystemError> {
let cpuid = CpuId::new();
if let Some(vi) = cpuid.get_vendor_info() {
if vi.as_str() != "GenuineIntel" {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
}
if let Some(fi) = cpuid.get_feature_info() {
if !fi.has_vmx() {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}
}
Ok(())
}
/// Enables Virtual Machine Extensions
// - CR4.VMXE[bit 13] = 1 (Intel Manual: 24.7 Enabling and Entering VMX Operation)
pub fn enable_vmx_operation() -> Result<(), SystemError> {
let mut cr4 = unsafe { controlregs::cr4() };
cr4.set(controlregs::Cr4::CR4_ENABLE_VMX, true);
unsafe { controlregs::cr4_write(cr4) };
set_lock_bit()?;
kdebug!("[+] Lock bit set via IA32_FEATURE_CONTROL");
set_cr0_bits();
kdebug!("[+] Mandatory bits in CR0 set/cleared");
set_cr4_bits();
kdebug!("[+] Mandatory bits in CR4 set/cleared");
Ok(())
}
/// Check if we need to set bits in IA32_FEATURE_CONTROL
// (Intel Manual: 24.7 Enabling and Entering VMX Operation)
fn set_lock_bit() -> Result<(), SystemError> {
const VMX_LOCK_BIT: u64 = 1 << 0;
const VMXON_OUTSIDE_SMX: u64 = 1 << 2;
let ia32_feature_control = unsafe { msr::rdmsr(msr::IA32_FEATURE_CONTROL) };
if (ia32_feature_control & VMX_LOCK_BIT) == 0 {
unsafe {
msr::wrmsr(
msr::IA32_FEATURE_CONTROL,
VMXON_OUTSIDE_SMX | VMX_LOCK_BIT | ia32_feature_control,
)
};
} else if (ia32_feature_control & VMXON_OUTSIDE_SMX) == 0 {
return Err(SystemError::EPERM);
}
Ok(())
}
/// Set the mandatory bits in CR0 and clear bits that are mandatory zero
/// (Intel Manual: 24.8 Restrictions on VMX Operation)
fn set_cr0_bits() {
let ia32_vmx_cr0_fixed0 = unsafe { msr::rdmsr(msr::IA32_VMX_CR0_FIXED0) };
let ia32_vmx_cr0_fixed1 = unsafe { msr::rdmsr(msr::IA32_VMX_CR0_FIXED1) };
let mut cr0 = unsafe { controlregs::cr0() };
cr0 |= controlregs::Cr0::from_bits_truncate(ia32_vmx_cr0_fixed0 as usize);
cr0 &= controlregs::Cr0::from_bits_truncate(ia32_vmx_cr0_fixed1 as usize);
unsafe { controlregs::cr0_write(cr0) };
}
/// Set the mandatory bits in CR4 and clear bits that are mandatory zero
/// (Intel Manual: 24.8 Restrictions on VMX Operation)
fn set_cr4_bits() {
let ia32_vmx_cr4_fixed0 = unsafe { msr::rdmsr(msr::IA32_VMX_CR4_FIXED0) };
let ia32_vmx_cr4_fixed1 = unsafe { msr::rdmsr(msr::IA32_VMX_CR4_FIXED1) };
let mut cr4 = unsafe { controlregs::cr4() };
cr4 |= controlregs::Cr4::from_bits_truncate(ia32_vmx_cr4_fixed0 as usize);
cr4 &= controlregs::Cr4::from_bits_truncate(ia32_vmx_cr4_fixed1 as usize);
unsafe { controlregs::cr4_write(cr4) };
}

View File

@ -0,0 +1,539 @@
use bitflags::bitflags;
use num_derive::FromPrimitive;
pub const PAGE_SIZE: usize = 0x1000;
#[repr(C, align(4096))]
#[derive(Clone, Debug)]
pub struct VMCSRegion {
pub revision_id: u32,
pub abort_indicator: u32,
data: [u8; PAGE_SIZE - 8],
}
// (Intel Manual: 25.11.2 VMREAD, VMWRITE, and Encodings of VMCS Fields)
#[derive(FromPrimitive)]
enum VmcsAccessType {
FULL = 0,
HIGH = 1,
}
#[derive(FromPrimitive)]
enum VmcsType {
CONTROL = 0,
VMEXIT = 1,
GUEST = 2,
HOST = 3,
}
#[derive(FromPrimitive)]
enum VmcsWidth {
BIT16 = 0,
BIT64 = 1,
BIT32 = 2,
NATURAL = 3,
}
#[derive(FromPrimitive)]
#[allow(non_camel_case_types)]
// (Intel Manual: APPENDIX B FIELD ENCODING IN VMCS)
pub enum VmcsFields {
// [CONTROL] fields
// 16-bit control fields
CTRL_VIRT_PROC_ID = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 0) as isize,
CTRL_POSTED_INTR_N_VECTOR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 1) as isize,
CTRL_EPTP_INDEX = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT16, 2) as isize,
// 64-bit control fields
CTRL_IO_BITMAP_A_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 0) as isize,
CTRL_IO_BITMAP_B_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 1) as isize,
CTRL_MSR_BITMAP_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 2) as isize, // control whether RDMSR or WRMSR cause VM exit
CTRL_VMEXIT_MSR_STORE_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 3) as isize,
CTRL_VMEXIT_MSR_LOAD_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 4) as isize,
CTRL_VMENTRY_MSR_LOAD_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 5) as isize,
CTRL_EXECUTIVE_VMCS_PTR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 6) as isize,
CTRL_PML_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 7) as isize,
CTRL_TSC_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 8) as isize,
CTRL_VIRT_APIC_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 9) as isize,
CTRL_APIC_ACCESS_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 10) as isize,
CTRL_POSTED_INTR_DESC_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 11) as isize,
CTRL_VMFUNC_CTRL = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 12) as isize,
CTRL_EPTP_PTR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 13) as isize,
CTRL_EOI_EXIT_BITMAP_0 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 14) as isize,
CTRL_EOI_EXIT_BITMAP_1 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 15) as isize,
CTRL_EOI_EXIT_BITMAP_2 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 16) as isize,
CTRL_EOI_EXIT_BITMAP_3 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 17) as isize,
CTRL_EPT_LIST_ADDR = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 18) as isize,
CTRL_VMREAD_BITMAP_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 19) as isize,
CTRL_VMWRITE_BITMAP_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 20) as isize,
CTRL_VIRT_EXECPT_INFO_ADDR =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 21) as isize,
CTRL_XSS_EXITING_BITMAP =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 22) as isize,
CTRL_ENCLS_EXITING_BITMAP =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 23) as isize,
CTRL_TSC_MULTIPLIER = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT64, 25) as isize,
// 32-bit control fields
CTRL_PIN_BASED_VM_EXEC_CTRLS =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 0) as isize, // control async event handling (i.e. interrupts)
CTRL_PRIMARY_PROCESSOR_VM_EXEC_CTRLS =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 1) as isize, // control sync event handling (i.e. instruction exits)
CTRL_EXPECTION_BITMAP = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 2) as isize, // bitmap to control exceptions that cause a VM exit
CTRL_PAGE_FAULT_ERR_CODE_MASK =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 3) as isize,
CTRL_PAGE_FAULT_ERR_CODE_MATCH =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 4) as isize,
CTRL_CR3_TARGET_COUNT = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 5) as isize,
CTRL_PRIMARY_VM_EXIT_CTRLS =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 6) as isize,
CTRL_VM_EXIT_MSR_STORE_COUNT =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 7) as isize,
CTRL_VM_EXIT_MSR_LOAD_COUNT =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 8) as isize,
CTRL_VM_ENTRY_CTRLS = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 9) as isize,
CTRL_VM_ENTRY_MSR_LOAD_COUNT =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 10) as isize,
CTRL_VM_ENTRY_INTR_INFO_FIELD =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 11) as isize,
CTRL_VM_ENTRY_EXCEPTION_ERR_CODE =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 12) as isize,
CTRL_VM_ENTRY_INSTR_LEN =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 13) as isize,
CTRL_TPR_THRESHOLD = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 14) as isize,
CTRL_SECONDARY_PROCESSOR_VM_EXEC_CTRLS =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 15) as isize,
CTRL_PLE_GAP = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 16) as isize,
CTRL_PLE_WINDOW = encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::BIT32, 17) as isize,
// natural control fields
CTRL_CR0_GUEST_HOST_MASK =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 0) as isize, // control executions of insts that access cr0
CTRL_CR4_GUEST_HOST_MASK =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 1) as isize,
CTRL_CR0_READ_SHADOW =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 2) as isize, // control executions of insts that access cr0
CTRL_CR4_READ_SHADOW =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 3) as isize,
CTRL_CR3_TARGET_VALUE_0 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 4) as isize,
CTRL_CR3_TARGET_VALUE_1 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 5) as isize,
CTRL_CR3_TARGET_VALUE_2 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 6) as isize,
CTRL_CR3_TARGET_VALUE_3 =
encode_vmcs_field_full(VmcsType::CONTROL, VmcsWidth::NATURAL, 7) as isize,
// [VMEXIT] fields read-only
// No 16-bit vmexit fields
// 64-bit vmexit fields
VMEXIT_GUEST_PHY_ADDR = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT64, 0) as isize,
// 32-bit vmexit fields
VMEXIT_INSTR_ERR = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 0) as isize,
VMEXIT_EXIT_REASON = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 1) as isize,
VMEXIT_INT_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 2) as isize,
VMEXIT_INT_ERR_CODE = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 3) as isize,
VMEXIT_IDT_VECTOR_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 4) as isize,
VMEXIT_IDT_VECTOR_ERR_CODE =
encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 5) as isize,
VMEXIT_INSTR_LEN = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 6) as isize,
VMEXIT_INSTR_INFO = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::BIT32, 7) as isize,
// natural vmexit fields
VMEXIT_QUALIFICATION = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 0) as isize,
VMEXIT_IO_RCX = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 1) as isize,
VMEXIT_IO_RSX = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 2) as isize,
VMEXIT_IO_RDI = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 3) as isize,
VMEXIT_IO_RIP = encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 4) as isize,
VMEXIT_GUEST_LINEAR_ADDR =
encode_vmcs_field_full(VmcsType::VMEXIT, VmcsWidth::NATURAL, 5) as isize,
// [GUEST] fields
// 16-bit guest fields
GUEST_ES_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 0) as isize,
GUEST_CS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 1) as isize,
GUEST_SS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 2) as isize,
GUEST_DS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 3) as isize,
GUEST_FS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 4) as isize,
GUEST_GS_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 5) as isize,
GUEST_LDTR_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 6) as isize,
GUEST_TR_SELECTOR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 7) as isize,
GUEST_INTR_STATUS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 8) as isize,
GUEST_PML_INDEX = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT16, 9) as isize,
// 64-bit guest fields
GUEST_VMCS_LINK_PTR = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 0) as isize,
GUEST_DEBUGCTL = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 1) as isize,
GUEST_PAT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 2) as isize,
GUEST_EFER = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 3) as isize,
GUEST_PERF_GLOBAL_CTRL = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 4) as isize,
GUEST_PDPTE0 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 5) as isize,
GUEST_PDPTE1 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 6) as isize,
GUEST_PDPTE2 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 7) as isize,
GUEST_PDPTE3 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT64, 8) as isize,
// 32-bit guest fields
GUEST_ES_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 0) as isize,
GUEST_CS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 1) as isize,
GUEST_SS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 2) as isize,
GUEST_DS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 3) as isize,
GUEST_FS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 4) as isize,
GUEST_GS_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 5) as isize,
GUEST_LDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 6) as isize,
GUEST_TR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 7) as isize,
GUEST_GDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 8) as isize,
GUEST_IDTR_LIMIT = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 9) as isize,
GUEST_ES_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 10) as isize,
GUEST_CS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 11) as isize,
GUEST_SS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 12) as isize,
GUEST_DS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 13) as isize,
GUEST_FS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 14) as isize,
GUEST_GS_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 15) as isize,
GUEST_LDTR_ACCESS_RIGHTS =
encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 16) as isize,
GUEST_TR_ACCESS_RIGHTS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 17) as isize,
GUEST_INTERRUPTIBILITY_STATE =
encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 18) as isize,
GUEST_ACTIVITY_STATE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 19) as isize,
GUEST_SMBASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 20) as isize,
GUEST_SYSENTER_CS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::BIT32, 21) as isize,
GUEST_VMX_PREEMPT_TIMER_VALUE = 0x482E as isize,
// natural guest fields
GUEST_CR0 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 0) as isize,
GUEST_CR3 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 1) as isize,
GUEST_CR4 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 2) as isize,
GUEST_ES_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 3) as isize,
GUEST_CS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 4) as isize,
GUEST_SS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 5) as isize,
GUEST_DS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 6) as isize,
GUEST_FS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 7) as isize,
GUEST_GS_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 8) as isize,
GUEST_LDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 9) as isize,
GUEST_TR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 10) as isize,
GUEST_GDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 11) as isize,
GUEST_IDTR_BASE = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 12) as isize,
GUEST_DR7 = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 13) as isize,
GUEST_RSP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 14) as isize,
GUEST_RIP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 15) as isize,
GUEST_RFLAGS = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 16) as isize,
GUEST_PENDING_DBG_EXCEPTIONS =
encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 17) as isize,
GUEST_SYSENTER_ESP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 18) as isize,
GUEST_SYSENTER_EIP = encode_vmcs_field_full(VmcsType::GUEST, VmcsWidth::NATURAL, 19) as isize,
// [HOST] fields
// host 16 bit fields
HOST_ES_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 0) as isize,
HOST_CS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 1) as isize,
HOST_SS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 2) as isize,
HOST_DS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 3) as isize,
HOST_FS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 4) as isize,
HOST_GS_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 5) as isize,
HOST_TR_SELECTOR = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT16, 6) as isize,
// host 64 bit fields
HOST_PAT = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 0) as isize,
HOST_EFER = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 1) as isize,
HOST_PERF_GLOBAL_CTRL = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT64, 2) as isize,
// host 32 bit fields
HOST_SYSENTER_CS = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::BIT32, 0) as isize,
// host natural fields
HOST_CR0 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 0) as isize,
HOST_CR3 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 1) as isize,
HOST_CR4 = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 2) as isize,
HOST_FS_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 3) as isize,
HOST_GS_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 4) as isize,
HOST_TR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 5) as isize,
HOST_GDTR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 6) as isize,
HOST_IDTR_BASE = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 7) as isize,
HOST_SYSENTER_ESP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 8) as isize,
HOST_SYSENTER_EIP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 9) as isize,
HOST_RSP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 10) as isize,
HOST_RIP = encode_vmcs_field_full(VmcsType::HOST, VmcsWidth::NATURAL, 11) as isize,
}
// (Intel Manual: 25.6 VM-EXECUTION CONTROL FIELDS)
bitflags! {
// (Intel Manual: 25.6.1 Pin-Based VM-Execution Controls)
#[allow(non_camel_case_types)]
pub struct VmxPinBasedExecuteCtrl: u32 {
const EXTERNAL_INTERRUPT_EXITING = 1 << 0; // external interrupts cause VM exits
const NMI_EXITING = 1 << 3; // non-maskable interrupts (NMIs) cause VM exits.
const VIRTUAL_NMIS = 1 << 5; // NMIs are never blocked and the “blocking by NMI” bit (bit 3) in the interruptibility-state field indicates “virtual-NMI blocking”
const VMX_PREEMPTION_TIMER = 1 << 6; // the VMX-preemption timer counts down in VMX non-root operation
const PROCESS_POSTED_INTERRUPTS = 1 << 7; // he processor treats interrupts with the posted-interrupt notification vector
}
// (Intel Manual: 25.6.2 Processor-Based VM-Execution Controls)
#[allow(non_camel_case_types)]
pub struct VmxPrimaryProcessBasedExecuteCtrl: u32{
const INTERRUPT_WINDOW_EXITING = 1 << 2; // VM exits on interrupt window RFLAGS.IF = 1
const USE_TSC_OFFSETTING = 1 << 3; // TSC offsetting is enabled
const HLT_EXITING = 1 << 7;
const INVLPG_EXITING = 1 << 9;
const MWAIT_EXITING = 1 << 10;
const RDPMC_EXITING = 1 << 11;
const RDTSC_EXITING = 1 << 12;
const CR3_LOAD_EXITING = 1 << 15;
const CR3_STR_EXITING = 1 << 16;
const CR8_LOAD_EXITING = 1 << 19;
const CR8_STR_EXITING = 1 << 20;
const USE_TPR_SHADOW = 1 << 21;
const NMI_WINDOW_EXITING = 1 << 22;
const MOV_DR_EXITING = 1 << 23;
const UNCOND_IO_EXITING = 1 << 24;
const USE_IO_BITMAPS = 1 << 25;
const MONITOR_TRAP_FLAG = 1 << 27;
const USE_MSR_BITMAPS = 1 << 28;
const MONITOR_EXITING = 1 << 29;
const PAUSE_EXITING = 1 << 30;
const ACTIVATE_SECONDARY_CONTROLS = 1 << 31;
}
// (Intel Manual: 25.6.2 Processor-Based VM-Execution Controls)
pub struct VmxSecondaryProcessBasedExecuteCtrl: u32{
const VIRT_APIC_ACCESS = 1 << 0;
const ENABLE_EPT = 1 << 1;
const DESCRIPTOR_TABLE_EXITING = 1 << 2;
const ENABLE_RDTSCP = 1 << 3;
const VIRT_X2APIC_MODE = 1 << 4;
const ENABLE_VPID = 1 << 5;
const WBINVD_EXITING = 1 << 6;
const UNRESTRICTED_GUEST = 1 << 7;
const APCI_REGISTER_VIRT = 1 << 8;
const VIRT_INTR_DELIVERY = 1 << 9;
const PAUSE_LOOP_EXITING = 1 << 10;
const RDRAND_EXITING = 1 << 11;
const ENABLE_INVPCID = 1 << 12;
const ENABLE_VM_FUNCTIONS = 1 << 13;
const VMCS_SHADOWING = 1 << 14;
const ENABLE_ENCLS_EXITING = 1 << 15;
const RDSEED_EXITING = 1 << 16;
const ENABLE_PML = 1 << 17;
const EPT_VIOLATION_VE = 1 << 18;
const CONCEAL_VMX_FROM_PT = 1 << 19;
const ENABLE_XSAVES_XRSTORS = 1 << 20;
const PASID_TRANSLATION = 1 << 21;
const MODE_BASED_EPT_EXEC = 1 << 22;
const SUB_PAGE_WRITE_PERM = 1 << 23;
const PT_USE_GUEST_PYH_ADDR = 1 << 24;
const USE_TSC_SCALING = 1 << 25;
const ENABLE_USER_WAIT_PAUSE = 1 << 26;
const ENABLE_PCONFIG = 1 << 27;
const ENABLE_ENCLV_EXITING = 1 << 28;
const VMM_BUS_LOCK_DETECTION = 1 << 30;
const INST_TIMEOUT = 1 << 31;
}
// (Intel Manual: 25.7.1 VM-Exit Controls)
#[allow(non_camel_case_types)]
pub struct VmxPrimaryExitCtrl: u32 {
const SAVE_DBG_CTRLS = 1 << 2;
const HOST_ADDR_SPACE_SIZE = 1 << 9; // determines if a virtual processor will be in 64-bit mode after a VM exit
const LOAD_IA32_PERF_GLOBAL_CTRL = 1 << 12;
const ACK_INTERRUPT_ON_EXIT = 1 << 15;
const SAVE_IA32_PAT = 1 << 18;
const LOAD_IA32_PAT = 1 << 19;
const SAVE_IA32_EFER = 1 << 20;
const LOAD_IA32_EFER = 1 << 21;
const SAVE_VMX_PREEMPT_TIMER_VALUE = 1 << 22;
const CLEAR_IA32_BNDCFGS = 1 << 23;
const CONCEAL_VMX_FROM_PT = 1 << 24;
const CLEAR_IA32_RTIT_CTL = 1 << 25;
const CLEAR_IA32_LBR_CTL = 1 << 26;
const CLEAR_UINV = 1 << 27;
const LOAD_CET_STATE = 1 << 28;
const LOAD_PKRS = 1 << 29;
const SAVE_IA32_PERF_GLOBAL_CTL = 1 << 30;
const ACTIVATE_SECONDARY_CONTROLS = 1 << 31;
}
// (Intel Manual: 25.8.1 VM-Entry Controls)
#[allow(non_camel_case_types)]
pub struct VmxEntryCtrl: u32 {
const LOAD_DBG_CTRLS = 1 << 2;
const IA32E_MODE_GUEST = 1 << 9;
const ENTRY_TO_SMM = 1 << 10;
const DEACTIVATE_DUAL_MONITOR = 1 << 11;
const LOAD_IA32_PERF_GLOBAL_CTRL = 1 << 13;
const LOAD_IA32_PAT = 1 << 14;
const LOAD_IA32_EFER = 1 << 15;
const LOAD_IA32_BNDCFGS = 1 << 16;
const CONCEAL_VMX_FROM_PT = 1 << 17;
const LOAD_IA32_RTIT_CTL = 1 << 18;
const LOAD_UINV = 1 << 19;
const LOAD_CET_STATE = 1 << 20;
const LOAD_PKRS = 1 << 21;
const LOAD_IA32_PERF_GLOBAL_CTL = 1 << 22;
}
}
#[derive(FromPrimitive)]
#[allow(non_camel_case_types)]
pub enum VmxExitReason {
EXCEPTION_OR_NMI = 0,
EXTERNAL_INTERRUPT = 1,
TRIPLE_FAULT = 2,
INIT_SIGNAL = 3,
SIPI = 4,
IO_SMI = 5,
OTHER_SMI = 6,
INTERRUPT_WINDOW = 7,
NMI_WINDOW = 8,
TASK_SWITCH = 9,
CPUID = 10,
GETSEC = 11,
HLT = 12,
INVD = 13,
INVLPG = 14,
RDPMC = 15,
RDTSC = 16,
RSM = 17,
VMCALL = 18,
VMCLEAR = 19,
VMLAUNCH = 20,
VMPTRLD = 21,
VMPTRST = 22,
VMREAD = 23,
VMRESUME = 24,
VMWRITE = 25,
VMXOFF = 26,
VMXON = 27,
CR_ACCESS = 28,
DR_ACCESS = 29,
IO_INSTRUCTION = 30,
RDMSR = 31,
WRMSR = 32,
VM_ENTRY_FAILURE_INVALID_GUEST_STATE = 33,
VM_ENTRY_FAILURE_MSR_LOADING = 34,
MWAIT = 36,
MONITOR_TRAP_FLAG = 37,
MONITOR = 39,
PAUSE = 40,
VM_ENTRY_FAILURE_MACHINE_CHECK_EVENT = 41,
TPR_BELOW_THRESHOLD = 43,
APIC_ACCESS = 44,
VIRTUALIZED_EOI = 45,
ACCESS_GDTR_OR_IDTR = 46,
ACCESS_LDTR_OR_TR = 47,
EPT_VIOLATION = 48,
EPT_MISCONFIG = 49,
INVEPT = 50,
RDTSCP = 51,
VMX_PREEMPTION_TIMER_EXPIRED = 52,
INVVPID = 53,
WBINVD = 54,
XSETBV = 55,
APIC_WRITE = 56,
RDRAND = 57,
INVPCID = 58,
VMFUNC = 59,
ENCLS = 60,
RDSEED = 61,
PML_FULL = 62,
XSAVES = 63,
XRSTORS = 64,
}
impl From<i32> for VmxExitReason {
fn from(num: i32) -> Self {
match num {
0 => VmxExitReason::EXCEPTION_OR_NMI,
1 => VmxExitReason::EXTERNAL_INTERRUPT,
2 => VmxExitReason::TRIPLE_FAULT,
3 => VmxExitReason::INIT_SIGNAL,
4 => VmxExitReason::SIPI,
5 => VmxExitReason::IO_SMI,
6 => VmxExitReason::OTHER_SMI,
7 => VmxExitReason::INTERRUPT_WINDOW,
8 => VmxExitReason::NMI_WINDOW,
9 => VmxExitReason::TASK_SWITCH,
10 => VmxExitReason::CPUID,
11 => VmxExitReason::GETSEC,
12 => VmxExitReason::HLT,
13 => VmxExitReason::INVD,
14 => VmxExitReason::INVLPG,
15 => VmxExitReason::RDPMC,
16 => VmxExitReason::RDTSC,
17 => VmxExitReason::RSM,
18 => VmxExitReason::VMCALL,
19 => VmxExitReason::VMCLEAR,
20 => VmxExitReason::VMLAUNCH,
21 => VmxExitReason::VMPTRLD,
22 => VmxExitReason::VMPTRST,
23 => VmxExitReason::VMREAD,
24 => VmxExitReason::VMRESUME,
25 => VmxExitReason::VMWRITE,
26 => VmxExitReason::VMXOFF,
27 => VmxExitReason::VMXON,
28 => VmxExitReason::CR_ACCESS,
29 => VmxExitReason::DR_ACCESS,
30 => VmxExitReason::IO_INSTRUCTION,
31 => VmxExitReason::RDMSR,
32 => VmxExitReason::WRMSR,
33 => VmxExitReason::VM_ENTRY_FAILURE_INVALID_GUEST_STATE,
34 => VmxExitReason::VM_ENTRY_FAILURE_MSR_LOADING,
36 => VmxExitReason::MWAIT,
37 => VmxExitReason::MONITOR_TRAP_FLAG,
39 => VmxExitReason::MONITOR,
40 => VmxExitReason::PAUSE,
41 => VmxExitReason::VM_ENTRY_FAILURE_MACHINE_CHECK_EVENT,
43 => VmxExitReason::TPR_BELOW_THRESHOLD,
44 => VmxExitReason::APIC_ACCESS,
45 => VmxExitReason::VIRTUALIZED_EOI,
46 => VmxExitReason::ACCESS_GDTR_OR_IDTR,
47 => VmxExitReason::ACCESS_LDTR_OR_TR,
48 => VmxExitReason::EPT_VIOLATION,
49 => VmxExitReason::EPT_MISCONFIG,
50 => VmxExitReason::INVEPT,
51 => VmxExitReason::RDTSCP,
52 => VmxExitReason::VMX_PREEMPTION_TIMER_EXPIRED,
53 => VmxExitReason::INVVPID,
54 => VmxExitReason::WBINVD,
55 => VmxExitReason::XSETBV,
56 => VmxExitReason::APIC_WRITE,
57 => VmxExitReason::RDRAND,
58 => VmxExitReason::INVPCID,
59 => VmxExitReason::VMFUNC,
60 => VmxExitReason::ENCLS,
61 => VmxExitReason::RDSEED,
62 => VmxExitReason::PML_FULL,
63 => VmxExitReason::XSAVES,
64 => VmxExitReason::XRSTORS,
_ => panic!("Invalid VmxExitReason number: {}", num),
}
}
}
const fn encode_vmcs_field(
access_type: VmcsAccessType,
vmcs_type: VmcsType,
vmcs_width: VmcsWidth,
index: u32,
) -> u32 {
let mut encoding: u32 = 0;
encoding |= (access_type as u32)
| (index as u32) << 1
| (vmcs_type as u32) << 10
| (vmcs_width as u32) << 13;
return encoding;
}
const fn encode_vmcs_field_full(vmcs_type: VmcsType, vmcs_width: VmcsWidth, index: u32) -> u32 {
encode_vmcs_field(VmcsAccessType::FULL, vmcs_type, vmcs_width, index)
}
// fn decode_vmcs_field(field: u32) -> (VmcsAccessType, VmcsType, VmcsWidth, u16){
// (FromPrimitive::from_u32(field & 1).unwrap() ,
// FromPrimitive::from_u32((field>>10) & 0x3).unwrap(),
// FromPrimitive::from_u32((field>>13) & 0x3).unwrap(),
// ((field>>1) & 0x1ff) as u16
// )
// }

View File

@ -0,0 +1,269 @@
use super::vmcs::{VmcsFields, VmxExitReason};
use super::vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite};
use crate::kdebug;
use crate::{syscall::SystemError, virt::kvm::vm};
use core::arch::asm;
use x86::vmx::vmcs::ro::GUEST_PHYSICAL_ADDR_FULL;
#[derive(FromPrimitive)]
#[allow(non_camel_case_types)]
pub enum APICExceptionVectors {
EXCEPTION_DIVIDE_ERROR,
EXCEPTION_DEBUG_BREAKPOINT,
EXCEPTION_NMI,
EXCEPTION_BREAKPOINT,
EXCEPTION_OVERFLOW,
EXCEPTION_BOUND_RANGE_EXCEEDED,
EXCEPTION_UNDEFINED_OPCODE,
EXCEPTION_NO_MATH_COPROCESSOR,
EXCEPTION_DOUBLE_FAULT,
EXCEPTION_RESERVED0,
EXCEPTION_INVALID_TASK_SEGMENT_SELECTOR,
EXCEPTION_SEGMENT_NOT_PRESENT,
EXCEPTION_STACK_SEGMENT_FAULT,
EXCEPTION_GENERAL_PROTECTION_FAULT,
EXCEPTION_PAGE_FAULT,
EXCEPTION_RESERVED1,
EXCEPTION_MATH_FAULT,
EXCEPTION_ALIGNMENT_CHECK,
EXCEPTION_MACHINE_CHECK,
EXCEPTION_SIMD_FLOATING_POINT_NUMERIC_ERROR,
EXCEPTION_VIRTUAL_EXCEPTION,
EXCEPTION_RESERVED2,
EXCEPTION_RESERVED3,
EXCEPTION_RESERVED4,
EXCEPTION_RESERVED5,
EXCEPTION_RESERVED6,
EXCEPTION_RESERVED7,
EXCEPTION_RESERVED8,
EXCEPTION_RESERVED9,
EXCEPTION_RESERVED10,
EXCEPTION_RESERVED11,
EXCEPTION_RESERVED12,
}
#[derive(FromPrimitive)]
#[allow(non_camel_case_types)]
pub enum InterruptType {
INTERRUPT_TYPE_EXTERNAL_INTERRUPT = 0,
INTERRUPT_TYPE_RESERVED = 1,
INTERRUPT_TYPE_NMI = 2,
INTERRUPT_TYPE_HARDWARE_EXCEPTION = 3,
INTERRUPT_TYPE_SOFTWARE_INTERRUPT = 4,
INTERRUPT_TYPE_PRIVILEGED_SOFTWARE_INTERRUPT = 5,
INTERRUPT_TYPE_SOFTWARE_EXCEPTION = 6,
INTERRUPT_TYPE_OTHER_EVENT = 7,
}
pub fn vmexit_vmx_instruction_executed() -> Result<(), SystemError> {
let valid: u32 = 1;
let vector: u32 = APICExceptionVectors::EXCEPTION_UNDEFINED_OPCODE as u32;
let interrupt_type = InterruptType::INTERRUPT_TYPE_HARDWARE_EXCEPTION as u32;
let deliver_code: u32 = 0;
let interrupt_info = valid << 31 | interrupt_type << 8 | deliver_code << 11 | vector;
vmx_vmwrite(
VmcsFields::CTRL_VM_ENTRY_INTR_INFO_FIELD as u32,
interrupt_info as u64,
)?;
vmx_vmwrite(VmcsFields::CTRL_VM_ENTRY_INSTR_LEN as u32, 0)?;
let rflags: u64 = vmx_vmread(VmcsFields::GUEST_RFLAGS as u32).unwrap() | 0x0001_0000; // set RF flags
vmx_vmwrite(VmcsFields::GUEST_RFLAGS as u32, rflags)?;
Ok(())
}
// pub fn vmexit_cpuid_handler(guest_cpu_context: &mut GuestCpuContext) -> Result<(), SystemError>{
// let rax = guest_cpu_context.rax;
// let rcx = guest_cpu_context.rcx;
// // let rdx = guest_cpu_context.rdx;
// // let rbx = guest_cpu_context.rbx;
// cpuid!(rax, rcx);
// unsafe{asm!("mov {}, rax", out(reg) guest_cpu_context.rax)};
// unsafe{asm!("mov {}, rcx", out(reg) guest_cpu_context.rcx)};
// unsafe{asm!("mov {}, rdx", out(reg) guest_cpu_context.rdx)};
// unsafe{asm!("mov {}, rbx", out(reg) guest_cpu_context.rbx)};
// Ok(())
// }
unsafe fn save_rpg() {
asm!(
"push rax",
"push rcx",
"push rdx",
"push rbx",
"push rbp",
"push rsi",
"push rdi",
"push r8",
"push r9",
"push r10",
"push r11",
"push r12",
"push r13",
"push r14",
"push r15",
);
}
unsafe fn restore_rpg() {
asm!(
"pop r15",
"pop r14",
"pop r13",
"pop r12",
"pop r11",
"pop r10",
"pop r9",
"pop r8",
"pop rdi",
"pop rsi",
"pop rbp",
"pop rbx",
"pop rdx",
"pop rcx",
"pop rax",
);
}
#[repr(C)]
#[allow(dead_code)]
pub struct GuestCpuContext {
pub r15: u64,
pub r14: u64,
pub r13: u64,
pub r12: u64,
pub r11: u64,
pub r10: u64,
pub r9: u64,
pub r8: u64,
pub rdi: u64,
pub rsi: u64,
pub rbp: u64,
pub rbx: u64,
pub rdx: u64,
pub rcx: u64,
pub rax: u64,
}
#[no_mangle]
pub extern "C" fn vmx_return() {
kdebug!("vmx_return!");
unsafe { save_rpg() };
vmexit_handler();
// XMM registers are vector registers. They're renamed onto the FP/SIMD register file
// unsafe {asm!(
// "sub rsp, 60h",
// "movaps xmmword ptr [rsp + 0h], xmm0",
// "movaps xmmword ptr [rsp + 10h], xmm1",
// "movaps xmmword ptr [rsp + 20h], xmm2",
// "movaps xmmword ptr [rsp + 30h], xmm3",
// "movaps xmmword ptr [rsp + 40h], xmm4",
// "movaps xmmword ptr [rsp + 50h], xmm5",
// "mov rdi, rsp",
// "sub rsp, 20h",
// "call vmexit_handler",
// "add rsp, 20h",
// "movaps xmm0, xmmword ptr [rsp + 0h]",
// "movaps xmm1, xmmword ptr [rsp + 10h]",
// "movaps xmm2, xmmword ptr [rsp + 20h]",
// "movaps xmm3, xmmword ptr [rsp + 30h]",
// "movaps xmm4, xmmword ptr [rsp + 40h]",
// "movaps xmm5, xmmword ptr [rsp + 50h]",
// "add rsp, 60h",
// clobber_abi("C"),
// )};
unsafe { restore_rpg() };
unsafe { asm!("vmresume",) };
}
#[no_mangle]
extern "C" fn vmexit_handler() {
// let guest_cpu_context = unsafe { guest_cpu_context_ptr.as_mut().unwrap() };
// kdebug!("guest_cpu_context_ptr={:p}",guest_cpu_context_ptr);
kdebug!("vmexit handler!");
let exit_reason = vmx_vmread(VmcsFields::VMEXIT_EXIT_REASON as u32).unwrap() as u32;
let exit_basic_reason = exit_reason & 0x0000_ffff;
let guest_rip = vmx_vmread(VmcsFields::GUEST_RIP as u32).unwrap();
// let guest_rsp = vmx_vmread(VmcsFields::GUEST_RSP as u32).unwrap();
kdebug!("guest_rip={:x}", guest_rip);
let _guest_rflags = vmx_vmread(VmcsFields::GUEST_RFLAGS as u32).unwrap();
match VmxExitReason::from(exit_basic_reason as i32) {
VmxExitReason::VMCALL
| VmxExitReason::VMCLEAR
| VmxExitReason::VMLAUNCH
| VmxExitReason::VMPTRLD
| VmxExitReason::VMPTRST
| VmxExitReason::VMREAD
| VmxExitReason::VMRESUME
| VmxExitReason::VMWRITE
| VmxExitReason::VMXOFF
| VmxExitReason::VMXON
| VmxExitReason::VMFUNC
| VmxExitReason::INVEPT
| VmxExitReason::INVVPID => {
kdebug!("vmexit handler: vmx instruction!");
vmexit_vmx_instruction_executed().expect("previledge instruction handle error");
}
VmxExitReason::CPUID => {
kdebug!("vmexit handler: cpuid instruction!");
// vmexit_cpuid_handler(guest_cpu_context);
adjust_rip(guest_rip).unwrap();
}
VmxExitReason::RDMSR => {
kdebug!("vmexit handler: rdmsr instruction!");
adjust_rip(guest_rip).unwrap();
}
VmxExitReason::WRMSR => {
kdebug!("vmexit handler: wrmsr instruction!");
adjust_rip(guest_rip).unwrap();
}
VmxExitReason::TRIPLE_FAULT => {
kdebug!("vmexit handler: triple fault!");
adjust_rip(guest_rip).unwrap();
}
VmxExitReason::EPT_VIOLATION => {
kdebug!("vmexit handler: ept violation!");
let gpa = vmx_vmread(GUEST_PHYSICAL_ADDR_FULL as u32).unwrap();
let exit_qualification = vmx_vmread(VmcsFields::VMEXIT_QUALIFICATION as u32).unwrap();
/* It is a write fault? */
let mut error_code = exit_qualification & (1 << 1);
/* It is a fetch fault? */
error_code |= (exit_qualification << 2) & (1 << 4);
/* ept page table is present? */
error_code |= (exit_qualification >> 3) & (1 << 0);
let kvm = vm(0).unwrap();
let vcpu = kvm.vcpu[0].clone();
// Use the data
let kvm_ept_page_fault = vcpu.lock().mmu.page_fault.unwrap();
kvm_ept_page_fault(&mut (*vcpu.lock()), gpa, error_code as u32, false)
.expect("ept page fault error");
}
_ => {
kdebug!(
"vmexit handler: unhandled vmexit reason: {}!",
exit_basic_reason
);
let info = vmx_vmread(VmcsFields::VMEXIT_INSTR_LEN as u32).unwrap() as u32;
kdebug!("vmexit handler: VMEXIT_INSTR_LEN: {}!", info);
let info = vmx_vmread(VmcsFields::VMEXIT_INSTR_INFO as u32).unwrap() as u32;
kdebug!("vmexit handler: VMEXIT_INSTR_INFO: {}!", info);
let info = vmx_vmread(VmcsFields::CTRL_EXPECTION_BITMAP as u32).unwrap() as u32;
kdebug!("vmexit handler: CTRL_EXPECTION_BITMAP: {}!", info);
adjust_rip(guest_rip).unwrap();
// panic!();
}
}
}
#[no_mangle]
fn adjust_rip(rip: u64) -> Result<(), SystemError> {
let instruction_length = vmx_vmread(VmcsFields::VMEXIT_INSTR_LEN as u32)?;
vmx_vmwrite(VmcsFields::GUEST_RIP as u32, rip + instruction_length)?;
Ok(())
}

View File

@ -0,0 +1,96 @@
use super::vmcs::VmcsFields;
use crate::kdebug;
use crate::syscall::SystemError;
use core::arch::asm;
use x86;
/// Enable VMX operation.
pub fn vmxon(vmxon_pa: u64) -> Result<(), SystemError> {
match unsafe { x86::bits64::vmx::vmxon(vmxon_pa) } {
Ok(_) => Ok(()),
Err(e) => {
kdebug!("vmxon fail: {:?}", e);
Err(SystemError::EVMXONFailed)
}
}
}
/// Disable VMX operation.
pub fn vmxoff() -> Result<(), SystemError> {
match unsafe { x86::bits64::vmx::vmxoff() } {
Ok(_) => Ok(()),
Err(_) => Err(SystemError::EVMXOFFFailed),
}
}
/// vmrite the current VMCS.
pub fn vmx_vmwrite(vmcs_field: u32, value: u64) -> Result<(), SystemError> {
match unsafe { x86::bits64::vmx::vmwrite(vmcs_field, value) } {
Ok(_) => Ok(()),
Err(e) => {
kdebug!("vmx_write fail: {:?}", e);
kdebug!("vmcs_field: {:x}", vmcs_field);
Err(SystemError::EVMWRITEFailed)
}
}
}
/// vmread the current VMCS.
pub fn vmx_vmread(vmcs_field: u32) -> Result<u64, SystemError> {
match unsafe { x86::bits64::vmx::vmread(vmcs_field) } {
Ok(value) => Ok(value),
Err(e) => {
kdebug!("vmx_read fail: {:?}", e);
Err(SystemError::EVMREADFailed)
}
}
}
pub fn vmx_vmptrld(vmcs_pa: u64) -> Result<(), SystemError> {
match unsafe { x86::bits64::vmx::vmptrld(vmcs_pa) } {
Ok(_) => Ok(()),
Err(_) => Err(SystemError::EVMPRTLDFailed),
}
}
pub fn vmx_vmlaunch() -> Result<(), SystemError> {
let host_rsp = VmcsFields::HOST_RSP as u32;
let host_rip = VmcsFields::HOST_RIP as u32;
unsafe {
asm!(
"push rbp",
"push rcx",
"push rdx",
"push rsi",
"push rdi",
"vmwrite {0:r}, rsp",
"lea rax, 1f[rip]",
"vmwrite {1:r}, rax",
"vmlaunch",
"1:",
"pop rdi",
"pop rsi",
"pop rdx",
"pop rcx",
"pop rbp",
"call vmx_return",
in(reg) host_rsp,
in(reg) host_rip,
clobber_abi("C"),
)
}
Ok(())
// match unsafe { x86::bits64::vmx::vmlaunch() } {
// Ok(_) => Ok(()),
// Err(e) => {
// kdebug!("vmx_launch fail: {:?}", e);
// Err(SystemError::EVMLAUNCHFailed)
// },
// }
}
pub fn vmx_vmclear(vmcs_pa: u64) -> Result<(), SystemError> {
match unsafe { x86::bits64::vmx::vmclear(vmcs_pa) } {
Ok(_) => Ok(()),
Err(_) => Err(SystemError::EVMPRTLDFailed),
}
}

View File

@ -34,6 +34,9 @@ use core::mem::{self};
use core::sync::atomic::{compiler_fence, AtomicBool, Ordering};
use super::kvm::vmx::vmcs::VmcsFields;
use super::kvm::vmx::vmx_asm_wrapper::vmx_vmread;
pub type PageMapper =
crate::mm::page::PageMapper<crate::arch::x86_64::mm::X86_64MMArch, LockedFrameAllocator>;
@ -169,12 +172,21 @@ impl MemoryManagementArch for X86_64MMArch {
}
/// @brief 获取顶级页表的物理地址
unsafe fn table(_table_kind: PageTableKind) -> PhysAddr {
let paddr: usize;
compiler_fence(Ordering::SeqCst);
asm!("mov {}, cr3", out(reg) paddr, options(nomem, nostack, preserves_flags));
compiler_fence(Ordering::SeqCst);
return PhysAddr::new(paddr);
unsafe fn table(table_kind: PageTableKind) -> PhysAddr {
match table_kind {
PageTableKind::Kernel | PageTableKind::User => {
let paddr: usize;
compiler_fence(Ordering::SeqCst);
asm!("mov {}, cr3", out(reg) paddr, options(nomem, nostack, preserves_flags));
compiler_fence(Ordering::SeqCst);
return PhysAddr::new(paddr);
}
PageTableKind::EPT => {
let eptp =
vmx_vmread(VmcsFields::CTRL_EPTP_PTR as u32).expect("Failed to read eptp");
return PhysAddr::new(eptp as usize);
}
}
}
/// @brief 设置顶级页表的物理地址到处理器中

View File

@ -6,6 +6,7 @@ pub mod cpu;
pub mod fpu;
pub mod interrupt;
pub mod ipc;
pub mod kvm;
pub mod libs;
pub mod mm;
pub mod msi;
@ -25,4 +26,6 @@ pub use self::mm::X86_64MMArch as MMArch;
pub use interrupt::X86_64InterruptArch as CurrentIrqArch;
pub use crate::arch::asm::pio::X86_64PortIOArch as CurrentPortIOArch;
pub use kvm::X86_64KVMArch as KVMArch;
pub use crate::arch::ipc::signal::X86_64SignalArch as CurrentSignalArch;

View File

@ -149,6 +149,11 @@ impl DevFS {
dev_block_inode.add_dev(name, device.clone())?;
device.set_fs(dev_block_inode.0.lock().fs.clone());
}
FileType::KvmDevice => {
dev_root_inode
.add_dev(name, device.clone())
.expect("DevFS: Failed to register /dev/kvm");
}
_ => {
return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
}

View File

@ -136,6 +136,7 @@ impl File {
};
// kdebug!("inode:{:?}",f.inode);
f.inode.open(&mut f.private_data, &mode)?;
return Ok(f);
}

View File

@ -39,6 +39,8 @@ pub enum FileType {
BlockDevice,
/// 字符设备
CharDevice,
/// kvm设备
KvmDevice,
/// 管道文件
Pipe,
/// 符号链接
@ -88,6 +90,7 @@ impl FileType {
FileType::Dir => DT_DIR,
FileType::BlockDevice => DT_BLK,
FileType::CharDevice => DT_CHR,
FileType::KvmDevice => DT_CHR,
FileType::Pipe => DT_FIFO,
FileType::SymLink => DT_LNK,
FileType::Socket => DT_SOCK,

View File

@ -25,6 +25,7 @@ use super::{
utils::rsplit_path,
Dirent, FileType, IndexNode, MAX_PATHLEN, ROOT_INODE, VFS_MAX_FOLLOW_SYMLINK_TIMES,
};
// use crate::kdebug;
pub const SEEK_SET: u32 = 0;
pub const SEEK_CUR: u32 = 1;
@ -207,7 +208,6 @@ impl Syscall {
if mode.contains(FileMode::O_APPEND) {
file.lseek(SeekFrom::SeekEnd(0))?;
}
// 把文件对象存入pcb
let r = ProcessManager::current_pcb()
.fd_table()
@ -232,6 +232,27 @@ impl Syscall {
return res;
}
/// @brief 发送命令到文件描述符对应的设备,
///
/// @param fd 文件描述符编号
/// @param cmd 设备相关的请求类型
///
/// @return Ok(usize) 成功返回0
/// @return Err(SystemError) 读取失败返回posix错误码
pub fn ioctl(fd: usize, cmd: u32, data: usize) -> Result<usize, SystemError> {
let binding = ProcessManager::current_pcb().fd_table();
let fd_table_guard = binding.read();
let file = fd_table_guard
.get_file_by_fd(fd as i32)
.ok_or(SystemError::EBADF)?;
// drop guard 以避免无法调度的问题
drop(fd_table_guard);
let r = file.lock_no_preempt().inode().ioctl(cmd, data);
return r;
}
/// @brief 根据文件描述符读取文件数据。尝试读取的数据长度与buf的长度相同。
///
/// @param fd 文件描述符编号
@ -700,6 +721,7 @@ impl Syscall {
FileType::SymLink => kstat.mode.insert(ModeType::S_IFLNK),
FileType::Socket => kstat.mode.insert(ModeType::S_IFSOCK),
FileType::Pipe => kstat.mode.insert(ModeType::S_IFIFO),
FileType::KvmDevice => kstat.mode.insert(ModeType::S_IFCHR),
}
return Ok(kstat);

View File

@ -5,6 +5,7 @@ int ktest_test_bitree(void* arg);
int ktest_test_kfifo(void* arg);
int ktest_test_mutex(void* arg);
int ktest_test_idr(void* arg);
int ktest_test_kvm(void* arg);
/**
* @brief 线

View File

@ -0,0 +1,23 @@
#include "ktest.h"
#include "ktest_utils.h"
static long ktest_kvm_case0_1(uint64_t arg0, uint64_t arg1){
kTEST("Testing /dev/kvm device...");
}
static ktest_case_table kt_kvm_func_table[] = {
ktest_kvm_case0_1,
};
int ktest_test_kvm(void* arg)
{
kTEST("Testing kvm...");
for (int i = 0; i < sizeof(kt_kvm_func_table) / sizeof(ktest_case_table); ++i)
{
kTEST("Testing case %d", i);
kt_kvm_func_table[i](i, 0);
}
kTEST("kvm Test done.");
return 0;
}

View File

@ -16,6 +16,9 @@
#![feature(trait_upcasting)]
#![feature(slice_ptr_get)]
#![feature(vec_into_raw_parts)]
#![feature(new_uninit)]
#![feature(ptr_to_from_bits)]
#![feature(concat_idents)]
#![cfg_attr(target_os = "none", no_std)]
#[cfg(test)]
@ -46,6 +49,7 @@ mod sched;
mod smp;
mod syscall;
mod time;
mod virt;
#[macro_use]
extern crate alloc;

View File

@ -30,6 +30,7 @@
#include <time/timer.h>
#include <driver/interrupt/apic/apic_timer.h>
#include <virt/kvm/kvm.h>
extern int rs_driver_init();
extern void rs_softirq_init();
@ -158,6 +159,10 @@ void system_initialize()
cli();
HPET_enable();
io_mfence();
kvm_init();
io_mfence();
// 系统初始化到此结束,剩下的初始化功能应当放在初始内核线程中执行

View File

@ -94,6 +94,33 @@ unsafe impl GlobalAlloc for KernelAllocator {
}
}
/// 为内核slab分配器实现Allocator特性
// unsafe impl Allocator for KernelAllocator {
// fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
// let memory = unsafe {self.local_alloc(layout)};
// if memory.is_null() {
// Err(AllocError)
// } else {
// let slice = unsafe { core::slice::from_raw_parts_mut(memory, layout.size()) };
// Ok(unsafe { NonNull::new_unchecked(slice) })
// }
// }
// fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
// let memory = unsafe {self.local_alloc_zeroed(layout)};
// if memory.is_null() {
// Err(AllocError)
// } else {
// let slice = unsafe { core::slice::from_raw_parts_mut(memory, layout.size()) };
// Ok(unsafe { NonNull::new_unchecked(slice) })
// }
// }
// unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
// self.local_dealloc(ptr.cast().as_ptr(), layout);
// }
// }
/// 内存分配错误处理函数
#[cfg(target_os = "none")]
#[alloc_error_handler]

View File

@ -73,6 +73,8 @@ pub enum PageTableKind {
User,
/// 内核页表
Kernel,
/// 内存虚拟化中使用的EPT
EPT,
}
/// 物理内存地址
@ -365,6 +367,8 @@ pub trait MemoryManagementArch: Clone + Copy + Debug {
const PAGE_SIZE: usize = 1 << Self::PAGE_SHIFT;
/// 通过这个mask获取地址的页内偏移量
const PAGE_OFFSET_MASK: usize = Self::PAGE_SIZE - 1;
/// 通过这个mask获取页的首地址
const PAGE_MASK: usize = !(Self::PAGE_OFFSET_MASK);
/// 页表项的地址、数据部分的shift。
/// 打个比方如果这个值为52,那么意味着页表项的[0, 52)位,用于表示地址以及其他的标志位
const PAGE_ADDRESS_SHIFT: usize = Self::PAGE_LEVELS * Self::PAGE_ENTRY_SHIFT + Self::PAGE_SHIFT;

View File

@ -3,6 +3,8 @@ use core::{
sync::atomic::{AtomicBool, Ordering},
};
use crate::kdebug;
use num_traits::{FromPrimitive, ToPrimitive};
use crate::{
@ -297,6 +299,16 @@ pub enum SystemError {
EOWNERDEAD = 129,
/// 状态不可恢复 State not recoverable.
ENOTRECOVERABLE = 130,
// VMX on 虚拟化开启指令出错
EVMXONFailed = 131,
// VMX off 虚拟化关闭指令出错
EVMXOFFFailed = 132,
// VMX VMWRITE 写入虚拟化VMCS内存出错
EVMWRITEFailed = 133,
EVMREADFailed = 134,
EVMPRTLDFailed = 135,
EVMLAUNCHFailed = 136,
KVM_HVA_ERR_BAD = 137,
}
impl SystemError {
@ -377,6 +389,8 @@ pub const SYS_FCNTL: usize = 51;
pub const SYS_FTRUNCATE: usize = 52;
pub const SYS_MKNOD: usize = 53;
pub const SYS_IOCTL: usize = 54;
#[derive(Debug)]
pub struct Syscall;
@ -477,6 +491,13 @@ impl Syscall {
Self::lseek(fd, w)
}
SYS_IOCTL => {
kdebug!("SYS_IOCTL");
let fd = args[0];
let cmd = args[1];
let data = args[2];
Self::ioctl(fd, cmd as u32, data)
}
SYS_FORK => Self::fork(frame),
SYS_VFORK => Self::vfork(frame),

View File

@ -0,0 +1,190 @@
use super::{vcpu::Vcpu, vm};
use crate::{
kdebug,
mm::{kernel_mapper::KernelMapper, page::PageFlags, VirtAddr},
syscall::SystemError,
};
/*
* Address types:
*
* gva - guest virtual address
* gpa - guest physical address
* gfn - guest frame number
* hva - host virtual address
* hpa - host physical address
* hfn - host frame number
*/
pub const KVM_USER_MEM_SLOTS: u32 = 16;
pub const KVM_PRIVATE_MEM_SLOTS: u32 = 3;
pub const KVM_MEM_SLOTS_NUM: u32 = KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS;
pub const KVM_ADDRESS_SPACE_NUM: usize = 2;
pub const KVM_MEM_LOG_DIRTY_PAGES: u32 = 1 << 0;
pub const KVM_MEM_READONLY: u32 = 1 << 1;
pub const KVM_MEM_MAX_NR_PAGES: u32 = (1 << 31) - 1;
/*
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
* in kvm, other bits are visible for userspace which are defined in
* include/linux/kvm_h.
*/
pub const KVM_MEMSLOT_INVALID: u32 = 1 << 16;
// pub const KVM_MEMSLOT_INCOHERENT:u32 = 1 << 17;
// pub const KVM_PERMILLE_MMU_PAGES: u32 = 20; // the proportion of MMU pages required per thousand (out of 1000) memory pages.
// pub const KVM_MIN_ALLOC_MMU_PAGES: u32 = 64;
pub const PAGE_SHIFT: u32 = 12;
pub const PAGE_SIZE: u32 = 1 << PAGE_SHIFT;
pub const PAGE_MASK: u32 = !(PAGE_SIZE - 1);
#[repr(C)]
/// 通过这个结构可以将虚拟机的物理地址对应到用户进程的虚拟地址
/// 用来表示虚拟机的一段物理内存
pub struct KvmUserspaceMemoryRegion {
pub slot: u32, // 要在哪个slot上注册内存区间
// flags有两个取值KVM_MEM_LOG_DIRTY_PAGES和KVM_MEM_READONLY用来指示kvm针对这段内存应该做的事情。
// KVM_MEM_LOG_DIRTY_PAGES用来开启内存脏页KVM_MEM_READONLY用来开启内存只读。
pub flags: u32,
pub guest_phys_addr: u64, // 虚机内存区间起始物理地址
pub memory_size: u64, // 虚机内存区间大小
pub userspace_addr: u64, // 虚机内存区间对应的主机虚拟地址
}
#[derive(Default, Clone, Copy, Debug)]
pub struct KvmMemorySlot {
pub base_gfn: u64, // 虚机内存区间起始物理页框号
pub npages: u64, // 虚机内存区间页数,即内存区间的大小
pub userspace_addr: u64, // 虚机内存区间对应的主机虚拟地址
pub flags: u32, // 虚机内存区间属性
pub id: u16, // 虚机内存区间id
// 用来记录虚机内存区间的脏页信息每个bit对应一个页如果bit为1表示对应的页是脏页如果bit为0表示对应的页是干净页。
// pub dirty_bitmap: *mut u8,
// unsigned long *rmap[KVM_NR_PAGE_SIZES]; 反向映射相关的结构, 创建EPT页表项时就记录GPA对应的页表项地址(GPA-->页表项地址),暂时不需要
}
#[derive(Default, Clone, Copy, Debug)]
pub struct KvmMemorySlots {
pub memslots: [KvmMemorySlot; KVM_MEM_SLOTS_NUM as usize], // 虚机内存区间数组
pub used_slots: u32, // 已经使用的slot数量
}
#[derive(PartialEq, Eq, Debug)]
pub enum KvmMemoryChange {
Create,
Delete,
Move,
FlagsOnly,
}
impl Default for KvmUserspaceMemoryRegion {
fn default() -> KvmUserspaceMemoryRegion {
KvmUserspaceMemoryRegion {
slot: 0,
flags: 0,
guest_phys_addr: 0,
memory_size: 0,
userspace_addr: 0,
}
}
}
pub fn kvm_vcpu_memslots(_vcpu: &mut dyn Vcpu) -> KvmMemorySlots {
let kvm = vm(0).unwrap();
let as_id = 0;
return kvm.memslots[as_id];
}
fn __gfn_to_memslot(slots: KvmMemorySlots, gfn: u64) -> Option<KvmMemorySlot> {
kdebug!("__gfn_to_memslot");
// TODO: 使用二分查找的方式优化
for i in 0..slots.used_slots {
let memslot = slots.memslots[i as usize];
if gfn >= memslot.base_gfn && gfn < memslot.base_gfn + memslot.npages {
return Some(memslot);
}
}
return None;
}
fn __gfn_to_hva(slot: KvmMemorySlot, gfn: u64) -> u64 {
return slot.userspace_addr + (gfn - slot.base_gfn) * (PAGE_SIZE as u64);
}
fn __gfn_to_hva_many(
slot: Option<KvmMemorySlot>,
gfn: u64,
nr_pages: Option<&mut u64>,
write: bool,
) -> Result<u64, SystemError> {
kdebug!("__gfn_to_hva_many");
if slot.is_none() {
return Err(SystemError::KVM_HVA_ERR_BAD);
}
let slot = slot.unwrap();
if slot.flags & KVM_MEMSLOT_INVALID != 0 || (slot.flags & KVM_MEM_READONLY != 0) && write {
return Err(SystemError::KVM_HVA_ERR_BAD);
}
if nr_pages.is_some() {
let nr_pages = nr_pages.unwrap();
*nr_pages = slot.npages - (gfn - slot.base_gfn);
}
return Ok(__gfn_to_hva(slot, gfn));
}
/* From Linux kernel
* Pin guest page in memory and return its pfn.
* @addr: host virtual address which maps memory to the guest
* @atomic: whether this function can sleep
* @async: whether this function need to wait IO complete if the
* host page is not in the memory
* @write_fault: whether we should get a writable host page
* @writable: whether it allows to map a writable host page for !@write_fault
*
* The function will map a writable host page for these two cases:
* 1): @write_fault = true
* 2): @write_fault = false && @writable, @writable will tell the caller
* whether the mapping is writable.
*/
// 计算 HVA 对应的 pfn同时确保该物理页在内存中
// host端虚拟地址到物理地址的转换有两种方式hva_to_pfn_fast、hva_to_pfn_slow
// 正确性待验证
fn hva_to_pfn(addr: u64, _atomic: bool, _writable: &mut bool) -> Result<u64, SystemError> {
kdebug!("hva_to_pfn");
unsafe {
let raw = addr as *const i32;
kdebug!("raw={:x}", *raw);
}
// let hpa = MMArch::virt_2_phys(VirtAddr::new(addr)).unwrap().data() as u64;
let hva = VirtAddr::new(addr as usize);
let mut mapper = KernelMapper::lock();
let mapper = mapper.as_mut().unwrap();
if let Some((hpa, _)) = mapper.translate(hva) {
return Ok(hpa.data() as u64 >> PAGE_SHIFT);
}
unsafe {
mapper.map(hva, PageFlags::mmio_flags());
}
let (hpa, _) = mapper.translate(hva).unwrap();
return Ok(hpa.data() as u64 >> PAGE_SHIFT);
}
pub fn __gfn_to_pfn(
slot: Option<KvmMemorySlot>,
gfn: u64,
atomic: bool,
write: bool,
writable: &mut bool,
) -> Result<u64, SystemError> {
kdebug!("__gfn_to_pfn");
let mut nr_pages = 0;
let addr = __gfn_to_hva_many(slot, gfn, Some(&mut nr_pages), write)?;
let pfn = hva_to_pfn(addr, atomic, writable)?;
kdebug!("hva={}, pfn={}", addr, pfn);
return Ok(pfn);
}
pub fn kvm_vcpu_gfn_to_memslot(vcpu: &mut dyn Vcpu, gfn: u64) -> Option<KvmMemorySlot> {
return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
}

View File

@ -0,0 +1,2 @@
// ================= Rust 实现 =============
extern void kvm_init();

View File

@ -0,0 +1,188 @@
use crate::filesystem::devfs::{DevFS, DeviceINode};
use crate::filesystem::vfs::{
core::generate_inode_id,
file::{File, FileMode},
make_rawdev, FilePrivateData, FileSystem, FileType, IndexNode, Metadata, PollStatus,
};
use crate::process::ProcessManager;
use crate::{arch::KVMArch, libs::spinlock::SpinLock, syscall::SystemError, time::TimeSpec};
use crate::{filesystem, kdebug};
// use crate::virt::kvm::{host_stack};
use super::push_vm;
use crate::virt::kvm::vm_dev::LockedVmInode;
use alloc::{
string::String,
sync::{Arc, Weak},
vec::Vec,
};
pub const KVM_API_VERSION: u32 = 12;
// use crate::virt::kvm::kvm_dev_ioctl_create_vm;
/*
* ioctls for /dev/kvm fds:
*/
pub const KVM_GET_API_VERSION: u32 = 0x00;
pub const KVM_CREATE_VM: u32 = 0x01;
pub const KVM_CHECK_EXTENSION: u32 = 0x03;
pub const KVM_GET_VCPU_MMAP_SIZE: u32 = 0x04; // Get size for mmap(vcpu_fd) in bytes
pub const KVM_TRACE_ENABLE: u32 = 0x05;
pub const KVM_TRACE_PAUSE: u32 = 0x06;
pub const KVM_TRACE_DISABLE: u32 = 0x07;
#[derive(Debug)]
pub struct KvmInode {
/// uuid 暂时不知道有什么用x
// uuid: Uuid,
/// 指向自身的弱引用
self_ref: Weak<LockedKvmInode>,
/// 指向inode所在的文件系统对象的指针
fs: Weak<DevFS>,
/// INode 元数据
metadata: Metadata,
}
#[derive(Debug)]
pub struct LockedKvmInode(SpinLock<KvmInode>);
impl LockedKvmInode {
pub fn new() -> Arc<Self> {
let inode = KvmInode {
self_ref: Weak::default(),
fs: Weak::default(),
metadata: Metadata {
dev_id: 1,
inode_id: generate_inode_id(),
size: 0,
blk_size: 0,
blocks: 0,
atime: TimeSpec::default(),
mtime: TimeSpec::default(),
ctime: TimeSpec::default(),
file_type: FileType::KvmDevice, // 文件夹block设备char设备
mode: filesystem::vfs::syscall::ModeType::S_IALLUGO,
nlinks: 1,
uid: 0,
gid: 0,
raw_dev: make_rawdev(1, 4), // 这里用来作为device number
},
};
let result = Arc::new(LockedKvmInode(SpinLock::new(inode)));
result.0.lock().self_ref = Arc::downgrade(&result);
return result;
}
}
impl DeviceINode for LockedKvmInode {
fn set_fs(&self, fs: Weak<DevFS>) {
self.0.lock().fs = fs;
}
}
impl IndexNode for LockedKvmInode {
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn open(&self, _data: &mut FilePrivateData, _mode: &FileMode) -> Result<(), SystemError> {
kdebug!("file private data:{:?}", _data);
return Ok(());
}
fn close(&self, _data: &mut FilePrivateData) -> Result<(), SystemError> {
return Ok(());
}
fn metadata(&self) -> Result<Metadata, SystemError> {
return Ok(self.0.lock().metadata.clone());
}
fn fs(&self) -> Arc<dyn FileSystem> {
return self.0.lock().fs.upgrade().unwrap();
}
fn list(&self) -> Result<Vec<String>, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
fn set_metadata(&self, metadata: &Metadata) -> Result<(), SystemError> {
let mut inode = self.0.lock();
inode.metadata.atime = metadata.atime;
inode.metadata.mtime = metadata.mtime;
inode.metadata.ctime = metadata.ctime;
inode.metadata.mode = metadata.mode;
inode.metadata.uid = metadata.uid;
inode.metadata.gid = metadata.gid;
return Ok(());
}
fn poll(&self) -> Result<PollStatus, SystemError> {
return Ok(PollStatus::READ | PollStatus::WRITE);
}
/// @brief io control接口
///
/// @param cmd 命令
/// @param data 数据
///
/// @return 成功Ok()
/// 失败Err(错误码)
fn ioctl(&self, cmd: u32, data: usize) -> Result<usize, SystemError> {
match cmd {
0xdeadbeef => {
kdebug!("kvm ioctl");
Ok(0)
}
KVM_GET_API_VERSION => Ok(KVM_API_VERSION as usize),
KVM_CREATE_VM => {
kdebug!("kvm KVM_CREATE_VM");
kvm_dev_ioctl_create_vm(data)
}
KVM_CHECK_EXTENSION
| KVM_GET_VCPU_MMAP_SIZE
| KVM_TRACE_ENABLE
| KVM_TRACE_PAUSE
| KVM_TRACE_DISABLE => Err(SystemError::EOPNOTSUPP_OR_ENOTSUP),
_ => KVMArch::kvm_arch_dev_ioctl(cmd, data),
}
}
/// 读设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
/// 写设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
}
#[no_mangle]
pub fn kvm_dev_ioctl_create_vm(_vmtype: usize) -> Result<usize, SystemError> {
push_vm(0).expect("need a valid vm!");
// 创建vm文件返回文件描述符
let vm_inode = LockedVmInode::new();
let file: File = File::new(vm_inode, FileMode::O_RDWR)?;
let r = ProcessManager::current_pcb()
.fd_table()
.write()
.alloc_fd(file, None)
.map(|fd| fd as usize);
return r;
}

View File

@ -0,0 +1,85 @@
use self::kvm_dev::LockedKvmInode;
use crate::arch::KVMArch;
use crate::filesystem::devfs::devfs_register;
use crate::kdebug;
use crate::libs::mutex::Mutex;
use alloc::vec::Vec;
use vm::Vm;
pub mod host_mem;
mod kvm_dev;
pub mod vcpu;
mod vcpu_dev;
pub mod vm;
mod vm_dev;
// pub const KVM_MAX_VCPUS:u32 = 255;
// pub const GUEST_STACK_SIZE:usize = 1024;
// pub const HOST_STACK_SIZE:usize = 0x1000 * 6;
/// @brief 获取全局的VM list
pub static VM_LIST: Mutex<Vec<Vm>> = Mutex::new(Vec::new());
pub fn push_vm(id: usize) -> Result<(), ()> {
let mut vm_list = VM_LIST.lock();
if vm_list.iter().any(|x| x.id == id) {
kdebug!("push_vm: vm {} already exists", id);
Err(())
} else {
vm_list.push(Vm::new(id).unwrap());
Ok(())
}
}
pub fn remove_vm(id: usize) -> Vm {
let mut vm_list = VM_LIST.lock();
match vm_list.iter().position(|x| x.id == id) {
None => {
panic!("VM[{}] not exist in VM LIST", id);
}
Some(idx) => vm_list.remove(idx),
}
}
pub fn update_vm(id: usize, new_vm: Vm) {
remove_vm(id);
let mut vm_list = VM_LIST.lock();
vm_list.push(new_vm);
}
pub fn vm(id: usize) -> Option<Vm> {
let vm_list = VM_LIST.lock();
vm_list.iter().find(|&x| x.id == id).cloned()
}
#[no_mangle]
pub extern "C" fn kvm_init() {
kdebug!("kvm init");
match KVMArch::kvm_arch_cpu_supports_vm() {
Ok(_) => {
kdebug!("[+] CPU supports Intel VMX");
}
Err(e) => {
kdebug!("[-] CPU does not support Intel VMX: {:?}", e);
}
};
KVMArch::kvm_arch_init().expect("kvm arch init");
devfs_register("kvm", LockedKvmInode::new()).expect("Failed to register /dev/kvm");
// let r = devfs_register("kvm", LockedKvmInode::new());
// if r.is_err() {
// panic!("Failed to register /dev/kvm");
// }
// let guest_stack = vec![0xCC; GUEST_STACK_SIZE];
// let host_stack = vec![0xCC; HOST_STACK_SIZE];
// let guest_rsp = guest_stack.as_ptr() as u64 + GUEST_STACK_SIZE as u64;
// let host_rsp = (host_stack.as_ptr() as u64) + HOST_STACK_SIZE as u64;
// kdebug!("guest rsp: {:x}", guest_rsp);
// kdebug!("guest rip: {:x}", guest_code as *const () as u64);
// kdebug!("host rsp: {:x}", host_rsp);
// let hypervisor = Hypervisor::new(1, host_rsp, 0).expect("Cannot create hypervisor");
// let vcpu = VmxVcpu::new(1, Arc::new(Mutex::new(hypervisor)), host_rsp, guest_rsp, guest_code as *const () as u64).expect("Cannot create VcpuData");
// vcpu.virtualize_cpu().expect("Cannot virtualize cpu");
}

View File

@ -0,0 +1,9 @@
use crate::syscall::SystemError;
pub trait Vcpu: Send + Sync {
/// Virtualize the CPU
fn virtualize_cpu(&mut self) -> Result<(), SystemError>;
fn devirtualize_cpu(&self) -> Result<(), SystemError>;
/// Gets the index of the current logical/virtual processor
fn id(&self) -> u32;
}

View File

@ -0,0 +1,212 @@
use crate::arch::kvm::vmx::vcpu::VcpuContextFrame;
use crate::arch::KVMArch;
use crate::filesystem::devfs::DevFS;
use crate::filesystem::vfs::{
core::generate_inode_id, file::FileMode, make_rawdev, FilePrivateData, FileSystem, FileType,
IndexNode, Metadata, PollStatus,
};
use crate::mm::VirtAddr;
use crate::syscall::user_access::copy_from_user;
use crate::virt::kvm::vcpu::Vcpu;
use crate::virt::kvm::vm;
use crate::{filesystem, kdebug};
use crate::{libs::spinlock::SpinLock, syscall::SystemError, time::TimeSpec};
use alloc::{
string::String,
sync::{Arc, Weak},
vec::Vec,
};
// pub const KVM_API_VERSION:u32 = 12;
pub const KVM_RUN: u32 = 0x00;
// pub const KVM_GET_REGS: u32 = 0x01;
pub const KVM_SET_REGS: u32 = 0x02;
// pub const GUEST_STACK_SIZE:usize = 1024;
// pub const HOST_STACK_SIZE:usize = 0x1000 * 6;
/*
* ioctls for /dev/vm fds:
*/
// pub const KVM_CREATE_VCPU: u32 = 0x00;
// pub const KVM_SET_USER_MEMORY_REGION: u32 = 0x01;
// pub const KVM_GET_DIRTY_LOG: u32 = 0x02;
// pub const KVM_IRQFD: u32 = 0x03;
// pub const KVM_IOEVENTFD: u32 = 0x04;
// pub const KVM_IRQ_LINE_STATUS: u32 = 0x05;
// #[derive(Debug)]
// pub struct InodeInfo {
// kvm: Arc<Hypervisor>,
// }
#[derive(Debug)]
pub struct VcpuInode {
/// uuid 暂时不知道有什么用x
// uuid: Uuid,
/// 指向自身的弱引用
self_ref: Weak<LockedVcpuInode>,
/// 指向inode所在的文件系统对象的指针
fs: Weak<DevFS>,
/// INode 元数据
metadata: Metadata,
// fdata: InodeInfo,
}
#[derive(Debug)]
pub struct LockedVcpuInode(SpinLock<VcpuInode>);
impl LockedVcpuInode {
pub fn new() -> Arc<Self> {
let inode = VcpuInode {
self_ref: Weak::default(),
fs: Weak::default(),
metadata: Metadata {
dev_id: 1,
inode_id: generate_inode_id(),
size: 0,
blk_size: 0,
blocks: 0,
atime: TimeSpec::default(),
mtime: TimeSpec::default(),
ctime: TimeSpec::default(),
file_type: FileType::KvmDevice, // 文件夹block设备char设备
mode: filesystem::vfs::syscall::ModeType::S_IALLUGO,
nlinks: 1,
uid: 0,
gid: 0,
raw_dev: make_rawdev(1, 4), // 这里用来作为device number
},
// fdata: InodeInfo {
// kvm: kvm,
// },
};
let result = Arc::new(LockedVcpuInode(SpinLock::new(inode)));
result.0.lock().self_ref = Arc::downgrade(&result);
return result;
}
}
impl IndexNode for LockedVcpuInode {
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn open(&self, _data: &mut FilePrivateData, _mode: &FileMode) -> Result<(), SystemError> {
kdebug!("file private data:{:?}", _data);
return Ok(());
}
fn close(&self, _data: &mut FilePrivateData) -> Result<(), SystemError> {
return Ok(());
}
fn metadata(&self) -> Result<Metadata, SystemError> {
return Ok(self.0.lock().metadata.clone());
}
fn fs(&self) -> Arc<dyn FileSystem> {
return self.0.lock().fs.upgrade().unwrap();
}
fn list(&self) -> Result<Vec<String>, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
fn set_metadata(&self, metadata: &Metadata) -> Result<(), SystemError> {
let mut inode = self.0.lock();
inode.metadata.atime = metadata.atime;
inode.metadata.mtime = metadata.mtime;
inode.metadata.ctime = metadata.ctime;
inode.metadata.mode = metadata.mode;
inode.metadata.uid = metadata.uid;
inode.metadata.gid = metadata.gid;
return Ok(());
}
fn poll(&self) -> Result<PollStatus, SystemError> {
return Ok(PollStatus::READ | PollStatus::WRITE);
}
/// @brief io control接口
///
/// @param cmd 命令
/// @param data 数据
///
/// @return 成功Ok()
/// 失败Err(错误码)
fn ioctl(&self, cmd: u32, data: usize) -> Result<usize, SystemError> {
match cmd {
0xdeadbeef => {
kdebug!("kvm_cpu ioctl");
Ok(0)
}
KVM_RUN => {
kdebug!("kvm_cpu ioctl");
// let guest_stack = vec![0xCC; GUEST_STACK_SIZE];
// let host_stack = vec![0xCC; HOST_STACK_SIZE];
// let guest_rsp = guest_stack.as_ptr() as u64 + GUEST_STACK_SIZE as u64;
// let host_rsp = (host_stack.as_ptr() as u64) + HOST_STACK_SIZE as u64;
// let hypervisor = Hypervisor::new(1, host_rsp, 0).expect("Cannot create hypervisor");
// let vcpu = VmxVcpu::new(1, Arc::new(Mutex::new(hypervisor)), host_rsp, guest_rsp, guest_code as *const () as u64).expect("Cannot create VcpuData");
// vcpu.virtualize_cpu().expect("Cannot virtualize cpu");
let vcpu = vm(0).unwrap().vcpu[0].clone();
vcpu.lock().virtualize_cpu()?;
KVMArch::kvm_arch_vcpu_ioctl_run(vcpu.as_ref())?;
Ok(0)
}
KVM_SET_REGS => {
let mut kvm_regs = VcpuContextFrame::default();
unsafe {
copy_from_user(
core::slice::from_raw_parts_mut(
(&mut kvm_regs as *mut _) as *mut u8,
core::mem::size_of::<VcpuContextFrame>(),
),
VirtAddr::new(data),
)?;
}
kdebug!(
"rip={:x}, rflags={:x}, rsp={:x}, rax={:x}",
kvm_regs.rip,
kvm_regs.rflags,
kvm_regs.regs[6],
kvm_regs.regs[0],
);
let vcpu = vm(0).unwrap().vcpu[0].clone();
vcpu.lock().set_regs(kvm_regs)?;
Ok(0)
}
_ => {
kdebug!("kvm_cpu ioctl");
Ok(usize::MAX)
}
}
}
/// 读设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
/// 写设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
}

175
kernel/src/virt/kvm/vm.rs Normal file
View File

@ -0,0 +1,175 @@
use crate::arch::kvm::vmx::vcpu::VmxVcpu;
use crate::libs::mutex::Mutex;
use crate::syscall::SystemError;
use crate::{arch::KVMArch, kdebug};
use alloc::sync::Arc;
use alloc::vec::Vec;
// use super::HOST_STACK_SIZE;
use super::host_mem::{
KvmMemoryChange, KvmMemorySlot, KvmMemorySlots, KvmUserspaceMemoryRegion,
KVM_ADDRESS_SPACE_NUM, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_MAX_NR_PAGES, KVM_MEM_READONLY,
KVM_MEM_SLOTS_NUM, KVM_USER_MEM_SLOTS, PAGE_SHIFT,
};
use crate::arch::kvm::vmx::vmcs::PAGE_SIZE;
// use crate::kdebug;
#[derive(Debug, Clone)]
pub struct Vm {
pub id: usize,
// vcpu config
pub nr_vcpus: u32, /* Number of cpus to run */
pub vcpu: Vec<Arc<Mutex<VmxVcpu>>>,
// memory config
pub nr_mem_slots: u32, /* Number of memory slots in each address space */
pub memslots: [KvmMemorySlots; KVM_ADDRESS_SPACE_NUM],
// arch related config
pub arch: KVMArch,
}
impl Vm {
pub fn new(id: usize) -> Result<Self, SystemError> {
let vcpu = Vec::new();
// Allocate stack for vm-exit handlers and fill it with garbage data
let instance = Self {
id,
nr_vcpus: 0,
vcpu,
nr_mem_slots: KVM_MEM_SLOTS_NUM,
memslots: [KvmMemorySlots::default(); KVM_ADDRESS_SPACE_NUM],
arch: Default::default(),
};
Ok(instance)
}
/// Allocate some memory and give it an address in the guest physical address space.
pub fn set_user_memory_region(
&mut self,
mem: &KvmUserspaceMemoryRegion,
) -> Result<(), SystemError> {
kdebug!("set_user_memory_region");
let id: u16 = mem.slot as u16; // slot id
let as_id = mem.slot >> 16; // address space id
kdebug!("id={}, as_id={}", id, as_id);
// 检查slot是否合法
if mem.slot as usize >= self.nr_mem_slots as usize {
return Err(SystemError::EINVAL);
}
// 检查flags是否合法
self.check_memory_region_flag(mem)?;
// 内存大小和地址必须是页对齐的
if (mem.memory_size & (PAGE_SIZE - 1) as u64) != 0
|| (mem.guest_phys_addr & (PAGE_SIZE - 1) as u64) != 0
{
return Err(SystemError::EINVAL);
}
// 检查地址空间是否合法
if as_id >= (KVM_ADDRESS_SPACE_NUM as u32) || id >= KVM_MEM_SLOTS_NUM as u16 {
return Err(SystemError::EINVAL);
}
// if mem.memory_size < 0 {
// return Err(SystemError::EINVAL);
// }
let slot = &self.memslots[as_id as usize].memslots[id as usize];
let base_gfn = mem.guest_phys_addr >> PAGE_SHIFT;
let npages = mem.memory_size >> PAGE_SHIFT;
if npages > KVM_MEM_MAX_NR_PAGES as u64 {
return Err(SystemError::EINVAL);
}
let change: KvmMemoryChange;
let old_slot = slot;
let mut new_slot = KvmMemorySlot {
base_gfn, // 虚机内存区间起始物理页框号
npages, // 虚机内存区间页数,即内存区间的大小
// dirty_bitmap: old_slot.dirty_bitmap,
userspace_addr: mem.userspace_addr, // 虚机内存区间对应的主机虚拟地址
flags: mem.flags, // 虚机内存区间属性
id, // 虚机内存区间id
};
// 判断新memoryslot的类型
if npages != 0 {
//映射内存有大小,不是删除内存条
if old_slot.npages == 0 {
//内存槽号没有虚拟内存条,意味内存新创建
change = KvmMemoryChange::Create;
} else {
//修改已存在的内存,表示修改标志或者平移映射地址
// 检查内存条是否可以修改
if mem.userspace_addr != old_slot.userspace_addr
|| npages != old_slot.npages
|| (new_slot.flags ^ old_slot.flags & KVM_MEM_READONLY) != 0
{
return Err(SystemError::EINVAL);
}
if new_slot.base_gfn != old_slot.base_gfn {
//guest地址不同内存条平移
change = KvmMemoryChange::Move;
} else if new_slot.flags != old_slot.flags {
//内存条标志不同,修改标志
change = KvmMemoryChange::FlagsOnly;
} else {
return Ok(());
}
}
} else {
if old_slot.npages == 0 {
//内存槽号没有虚拟内存条,不可以删除
return Err(SystemError::EINVAL);
}
//申请插入的内存为0而内存槽上有内存意味删除
change = KvmMemoryChange::Delete;
new_slot.base_gfn = 0;
new_slot.flags = 0;
}
if change == KvmMemoryChange::Create || change == KvmMemoryChange::Move {
// 检查内存区域是否重叠
for i in 0..KVM_MEM_SLOTS_NUM {
let memslot = &self.memslots[as_id as usize].memslots[i as usize];
if memslot.id == id || memslot.id as u32 >= KVM_USER_MEM_SLOTS {
continue;
}
// 当前已有的slot与new在guest物理地址上有交集
if !(base_gfn + npages <= memslot.base_gfn
|| memslot.base_gfn + memslot.npages <= base_gfn)
{
return Err(SystemError::EEXIST);
}
}
}
if !(new_slot.flags & KVM_MEM_LOG_DIRTY_PAGES != 0) {
// new_slot.dirty_bitmap = 0;
}
// 根据flags的值决定是否创建内存脏页
// if (new_slot.flags & KVM_MEM_LOG_DIRTY_PAGES)!=0 && new_slot.dirty_bitmap == 0 {
// let type_size = core::mem::size_of::<u64>() as u64;
// let dirty_bytes = 2 * ((new_slot.npages+type_size-1) / type_size) / 8;
// new_slot.dirty_bitmap = Box::new(vec![0; dirty_bytes as u8]);
// }
if change == KvmMemoryChange::Create {
new_slot.userspace_addr = mem.userspace_addr;
let mut memslots = self.memslots[as_id as usize].memslots.clone();
memslots[id as usize] = new_slot;
self.memslots[as_id as usize].memslots = memslots;
self.memslots[as_id as usize].used_slots += 1;
// KVMArch::kvm_arch_create_memslot(&mut new_slot, npages);
// KVMArch::kvm_arch_commit_memory_region(mem, &new_slot, old_slot, change);
}
// TODO--KvmMemoryChange::Delete & Move
Ok(())
}
fn check_memory_region_flag(&self, mem: &KvmUserspaceMemoryRegion) -> Result<(), SystemError> {
let valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
// 除了valid_flags之外的flags被置1了就返回错误
if mem.flags & !valid_flags != 0 {
return Err(SystemError::EINVAL);
}
Ok(())
}
}

View File

@ -0,0 +1,224 @@
use crate::filesystem::devfs::DevFS;
use crate::filesystem::vfs::{
core::generate_inode_id,
file::{File, FileMode},
make_rawdev, FilePrivateData, FileSystem, FileType, IndexNode, Metadata, PollStatus,
};
use crate::mm::VirtAddr;
use crate::process::ProcessManager;
use crate::syscall::user_access::copy_from_user;
use crate::virt::kvm::host_mem::KvmUserspaceMemoryRegion;
use crate::virt::kvm::update_vm;
use crate::virt::kvm::vcpu_dev::LockedVcpuInode;
use crate::virt::kvm::vm;
use crate::{arch::KVMArch, libs::spinlock::SpinLock, syscall::SystemError, time::TimeSpec};
use crate::{filesystem, kdebug};
use alloc::{
string::String,
sync::{Arc, Weak},
vec::Vec,
};
// pub const KVM_API_VERSION:u32 = 12;
// pub const GUEST_STACK_SIZE:usize = 1024;
// pub const HOST_STACK_SIZE:usize = 0x1000 * 6;
/*
* ioctls for /dev/vm fds:
*/
pub const KVM_CREATE_VCPU: u32 = 0x00;
pub const KVM_SET_USER_MEMORY_REGION: u32 = 0x01;
pub const KVM_GET_DIRTY_LOG: u32 = 0x02;
pub const KVM_IRQFD: u32 = 0x03;
pub const KVM_IOEVENTFD: u32 = 0x04;
pub const KVM_IRQ_LINE_STATUS: u32 = 0x05;
// #[derive(Debug)]
// pub struct InodeInfo {
// kvm: Arc<Hypervisor>,
// }
#[derive(Debug)]
pub struct VmInode {
/// uuid 暂时不知道有什么用x
// uuid: Uuid,
/// 指向自身的弱引用
self_ref: Weak<LockedVmInode>,
/// 指向inode所在的文件系统对象的指针
fs: Weak<DevFS>,
/// INode 元数据
metadata: Metadata,
// fdata: InodeInfo,
}
#[derive(Debug)]
pub struct LockedVmInode(SpinLock<VmInode>);
impl LockedVmInode {
pub fn new() -> Arc<Self> {
let inode = VmInode {
self_ref: Weak::default(),
fs: Weak::default(),
metadata: Metadata {
dev_id: 1,
inode_id: generate_inode_id(),
size: 0,
blk_size: 0,
blocks: 0,
atime: TimeSpec::default(),
mtime: TimeSpec::default(),
ctime: TimeSpec::default(),
file_type: FileType::KvmDevice, // 文件夹block设备char设备
mode: filesystem::vfs::syscall::ModeType::S_IALLUGO,
nlinks: 1,
uid: 0,
gid: 0,
raw_dev: make_rawdev(1, 4), // 这里用来作为device number
},
// fdata: InodeInfo {
// kvm: kvm,
// },
};
let result = Arc::new(LockedVmInode(SpinLock::new(inode)));
result.0.lock().self_ref = Arc::downgrade(&result);
return result;
}
}
impl IndexNode for LockedVmInode {
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn open(&self, _data: &mut FilePrivateData, _mode: &FileMode) -> Result<(), SystemError> {
kdebug!("file private data:{:?}", _data);
return Ok(());
}
fn close(&self, _data: &mut FilePrivateData) -> Result<(), SystemError> {
return Ok(());
}
fn metadata(&self) -> Result<Metadata, SystemError> {
return Ok(self.0.lock().metadata.clone());
}
fn fs(&self) -> Arc<dyn FileSystem> {
return self.0.lock().fs.upgrade().unwrap();
}
fn list(&self) -> Result<Vec<String>, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
fn set_metadata(&self, metadata: &Metadata) -> Result<(), SystemError> {
let mut inode = self.0.lock();
inode.metadata.atime = metadata.atime;
inode.metadata.mtime = metadata.mtime;
inode.metadata.ctime = metadata.ctime;
inode.metadata.mode = metadata.mode;
inode.metadata.uid = metadata.uid;
inode.metadata.gid = metadata.gid;
return Ok(());
}
fn poll(&self) -> Result<PollStatus, SystemError> {
return Ok(PollStatus::READ | PollStatus::WRITE);
}
/// @brief io control接口
///
/// @param cmd 命令
/// @param data 数据
///
/// @return 成功Ok()
/// 失败Err(错误码)
fn ioctl(&self, cmd: u32, data: usize) -> Result<usize, SystemError> {
match cmd {
0xdeadbeef => {
kdebug!("kvm_vm ioctl");
Ok(0)
}
KVM_CREATE_VCPU => {
kdebug!("kvm_vcpu ioctl KVM_CREATE_VCPU");
kvm_vm_ioctl_create_vcpu(data as u32)
}
KVM_SET_USER_MEMORY_REGION => {
kdebug!("kvm_vcpu ioctl KVM_SET_USER_MEMORY_REGION data={:x}", data);
let mut kvm_userspace_mem = KvmUserspaceMemoryRegion::default(); // = unsafe { (data as *const KvmUserspaceMemoryRegion).as_ref().unwrap() };
unsafe {
copy_from_user(
core::slice::from_raw_parts_mut(
(&mut kvm_userspace_mem as *mut _) as *mut u8,
core::mem::size_of::<KvmUserspaceMemoryRegion>(),
),
VirtAddr::new(data),
)?;
}
kdebug!(
"slot={}, flag={}, memory_size={:x}, guest_phys_addr={}, userspace_addr={}",
kvm_userspace_mem.slot,
kvm_userspace_mem.flags,
kvm_userspace_mem.memory_size,
kvm_userspace_mem.guest_phys_addr, // starting at physical address guest_phys_addr (from the guests perspective)
kvm_userspace_mem.userspace_addr // using memory at linear address userspace_addr (from the hosts perspective)
);
let mut current_vm = vm(0).unwrap();
current_vm.set_user_memory_region(&kvm_userspace_mem)?;
update_vm(0, current_vm);
Ok(0)
}
KVM_GET_DIRTY_LOG | KVM_IRQFD | KVM_IOEVENTFD | KVM_IRQ_LINE_STATUS => {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
_ => {
kdebug!("kvm_vm ioctl");
Ok(usize::MAX)
}
}
}
/// 读设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
/// 写设备 - 应该调用设备的函数读写,而不是通过文件系统读写
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: &mut FilePrivateData,
) -> Result<usize, SystemError> {
Err(SystemError::EOPNOTSUPP_OR_ENOTSUP)
}
}
fn kvm_vm_ioctl_create_vcpu(id: u32) -> Result<usize, SystemError> {
let vcpu = KVMArch::kvm_arch_vcpu_create(id).unwrap();
KVMArch::kvm_arch_vcpu_setup(vcpu.as_ref())?;
let mut current_vm = vm(0).unwrap();
current_vm.vcpu.push(vcpu);
current_vm.nr_vcpus += 1;
update_vm(0, current_vm);
let vcpu_inode = LockedVcpuInode::new();
let file: File = File::new(vcpu_inode, FileMode::O_RDWR)?;
let r = ProcessManager::current_pcb()
.fd_table()
.write()
.alloc_fd(file, None)
.map(|fd| fd as usize);
return r;
}

1
kernel/src/virt/mod.rs Normal file
View File

@ -0,0 +1 @@
pub mod kvm;

View File

@ -1,3 +1,3 @@
target remote localhost:1234
file bin/kernel/kernel.elf
set follow-fork-mode child
set follow-fork-mode child

View File

@ -54,7 +54,7 @@ QEMU_MEMORY="512M"
QEMU_SMP="2,cores=2,threads=1,sockets=1"
QEMU_MONITOR="stdio"
QEMU_TRACE="${qemu_trace_std}"
QEMU_CPU_FEATURES="IvyBridge,apic,x2apic,+fpu,check,${allflags}"
QEMU_CPU_FEATURES="IvyBridge,apic,x2apic,+fpu,check,+vmx,${allflags}"
QEMU_RTC_CLOCK="clock=host,base=localtime"
QEMU_SERIAL="file:../serial_opt.txt"
QEMU_DRIVE="id=disk,file=${QEMU_DISK_IMAGE},if=none"
@ -66,7 +66,7 @@ QEMU_DRIVE="id=disk,file=${QEMU_DISK_IMAGE},if=none"
QEMU_DEVICES="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 -netdev user,id=hostnet0,hostfwd=tcp::12580-:12580 -device virtio-net-pci,vectors=5,netdev=hostnet0,id=net0 -usb -device qemu-xhci,id=xhci,p2=8,p3=4 -machine accel=${qemu_accel} -machine q35 "
QEMU_ARGUMENT="-d ${QEMU_DISK_IMAGE} -m ${QEMU_MEMORY} -smp ${QEMU_SMP} -boot order=d -monitor ${QEMU_MONITOR} -d ${qemu_trace_std} "
QEMU_ARGUMENT+="-s -S -cpu ${QEMU_CPU_FEATURES} -rtc ${QEMU_RTC_CLOCK} -serial ${QEMU_SERIAL} -drive ${QEMU_DRIVE} ${QEMU_DEVICES}"
QEMU_ARGUMENT+="-s -S -enable-kvm -cpu ${QEMU_CPU_FEATURES} -rtc ${QEMU_RTC_CLOCK} -serial ${QEMU_SERIAL} -drive ${QEMU_DRIVE} ${QEMU_DEVICES}"
if [ $flag_can_run -eq 1 ]; then
while true;do

View File

@ -1,5 +1,5 @@
user_apps_sub_dirs=shell about
user_apps_sub_dirs=shell about test_kvm
ECHO:
@echo "$@"

View File

@ -0,0 +1,9 @@
OLD_LIBC_INSTALL_PATH=$(ROOT_PATH)/bin/sysroot/usr/old_libc
all: main.o
$(LD) -b elf64-x86-64 -z muldefs -o $(tmp_output_dir)/test_kvm $(shell find . -name "*.o") $(OLD_LIBC_INSTALL_PATH)/lib/libc.a -T link.lds
$(OBJCOPY) -I elf64-x86-64 -R ".eh_frame" -R ".comment" -O elf64-x86-64 $(tmp_output_dir)/test_kvm $(output_dir)/test_kvm.elf
main.o: main.c
$(CC) $(CFLAGS) -c main.c -o main.o

View File

@ -0,0 +1,3 @@
boot.bin: boot.s
nasm boot.s -o boot.bin
xxd boot.bin > boot.hex

Binary file not shown.

View File

@ -0,0 +1,32 @@
00000000: 8cc8 8ed8 8ec0 e802 00eb feb8 1e00 89c5 ................
00000010: b910 00b8 0113 bb0c 00b2 00cd 10c3 4865 ..............He
00000020: 6c6c 6f2c 204f 5320 776f 726c 6421 0000 llo, OS world!..
00000030: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000040: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000050: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000060: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000070: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000080: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000090: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000a0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000b0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000c0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000d0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000e0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000000f0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000100: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000110: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000120: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000130: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000140: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000150: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000160: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000170: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000180: 0000 0000 0000 0000 0000 0000 0000 0000 ................
00000190: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001a0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001b0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001c0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001d0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001e0: 0000 0000 0000 0000 0000 0000 0000 0000 ................
000001f0: 0000 0000 0000 0000 0000 0000 0000 55aa ..............U.

View File

@ -0,0 +1,54 @@
OUTPUT_FORMAT("elf64-x86-64","elf64-x86-64","elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(_start)
SECTIONS
{
. = 0x800000;
.text :
{
_text = .;
*(.text)
*(.text.*)
_etext = .;
}
. = ALIGN(8);
.data :
{
_data = .;
*(.data)
*(.data.*)
_edata = .;
}
rodata_start_pa = .;
.rodata :
{
_rodata = .;
*(.rodata)
*(.rodata.*)
_erodata = .;
}
.bss :
{
_bss = .;
*(.bss)
*(.bss.*)
_ebss = .;
}
_end = .;
}

114
user/apps/test_kvm/main.c Normal file
View File

@ -0,0 +1,114 @@
/**
* @file main.c
* @author xiaoyez (xiaoyez@zju.edu.cn)
* @brief kvm的程序
* @version 0.1
* @date 2023-07-13
*
* @copyright Copyright (c) 2023
*
*/
/**
* kvm命令的方法:
* 1.DragonOS的控制台输入 exec bin/test_kvm.elf
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#define KVM_CREATE_VCPU 0x00
#define KVM_SET_USER_MEMORY_REGION 0x01
#define KVM_RUN 0x00
#define KVM_GET_REGS 0x01
#define KVM_SET_REGS 0x02
struct kvm_userspace_memory_region {
uint32_t slot; // 要在哪个slot上注册内存区间
// flags有两个取值KVM_MEM_LOG_DIRTY_PAGES和KVM_MEM_READONLY用来指示kvm针对这段内存应该做的事情。
// KVM_MEM_LOG_DIRTY_PAGES用来开启内存脏页KVM_MEM_READONLY用来开启内存只读。
uint32_t flags;
uint64_t guest_phys_addr; // 虚机内存区间起始物理地址
uint64_t memory_size; // 虚机内存区间大小
uint64_t userspace_addr; // 虚机内存区间对应的主机虚拟地址
};
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
uint64_t rax, rbx, rcx, rdx;
uint64_t rsi, rdi, rsp, rbp;
uint64_t r8, r9, r10, r11;
uint64_t r12, r13, r14, r15;
uint64_t rip, rflags;
};
int guest_code(){
while (1)
{
// printf("guest code\n");
__asm__ __volatile__ (
"mov %rax, 0\n\t"
"mov %rcx, 0\n\t"
"cpuid\n\t"
);
}
return 0;
}
int main()
{
printf("Test kvm running...\n");
printf("Open /dev/kvm\n");
int kvm_fd = open("/dev/kvm", O_RDWR|O_CLOEXEC);
int vmfd = ioctl(kvm_fd, 0x01, 0);
printf("vmfd=%d\n", vmfd);
/*
__asm__ __volatile__ (
"mov %rax, 0\n\t"
"mov %rcx, 0\n\t"
"cpuid\n\t"
);
*/
const uint8_t code[] = {
0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
0x00, 0xd8, /* add %bl, %al */
0x04, '0', /* add $'0', %al */
0xee, /* out %al, (%dx) */
0xb0, '\n', /* mov $'\n', %al */
0xee, /* out %al, (%dx) */
0xf4, /* hlt */
};
size_t mem_size = 0x4000; // size of user memory you want to assign
printf("code=%p\n", code);
// void *mem = mmap(0, mem_size, 0x7, -1, 0);
// memcpy(mem, code, sizeof(code));
struct kvm_userspace_memory_region region = {
.slot = 0,
.flags = 0,
.guest_phys_addr = 0,
.memory_size = mem_size,
.userspace_addr = (size_t)code
};
ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &region);
int vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
printf("vcpufd=%d\n", vcpufd);
int user_entry = 0x0;
struct kvm_regs regs = {0};
regs.rip = user_entry;
regs.rsp = 0x3000; // stack address
regs.rflags = 0x2; // in x86 the 0x2 bit should always be set
ioctl(vcpufd, KVM_SET_REGS, &regs); // set registers
ioctl(vcpufd, KVM_RUN, 0);
return 0;
}

View File

@ -12,4 +12,17 @@
int open(const char *path, int options, ...)
{
return syscall_invoke(SYS_OPEN, (uint64_t)path, options, 0, 0, 0, 0, 0, 0);
}
/**
* @brief ioctl的接口
*
* @param fd
* @param cmd
* @param ...
* @return int 0
*/
int ioctl(int fd, int cmd, uint64_t data, ...)
{
return syscall_invoke(SYS_IOCTL, fd, cmd, data, 0, 0, 0, 0, 0);
}

View File

@ -14,6 +14,8 @@
extern "C" {
#endif
#include <stdint.h>
#define O_RDONLY 00000000 // Open Read-only
#define O_WRONLY 00000001 // Open Write-only
#define O_RDWR 00000002 // Open read/write
@ -72,6 +74,16 @@ extern "C" {
*/
int open(const char * path, int options, ...);
/**
* @brief ioctl的接口
*
* @param fd
* @param cmd
* @param ...
* @return int 0
*/
int ioctl(int fd, int cmd, uint64_t data, ...);
#if defined(__cplusplus)
} /* extern "C" */
#endif

View File

@ -49,6 +49,7 @@
#define SYS_ACCEPT 40 // 接受一个socket连接
#define SYS_GETSOCKNAME 41 // 获取socket的名字
#define SYS_GETPEERNAME 42 // 获取socket的对端名字
#define SYS_IOCTL 54
#define SYS_GETCWD 48