mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-10 13:56:48 +00:00
Improve the VmSpace
forking API
This commit is contained in:
parent
d4036d1e9d
commit
ac6d9256ef
@ -16,7 +16,7 @@ use align_ext::AlignExt;
|
||||
use aster_rights::Rights;
|
||||
use ostd::{
|
||||
cpu::CpuExceptionInfo,
|
||||
mm::{VmSpace, MAX_USERSPACE_VADDR},
|
||||
mm::{PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR},
|
||||
};
|
||||
|
||||
use self::{
|
||||
@ -220,13 +220,6 @@ impl Vmar_ {
|
||||
}
|
||||
|
||||
fn new_root() -> Arc<Self> {
|
||||
fn handle_page_fault_wrapper(
|
||||
vm_space: &VmSpace,
|
||||
trap_info: &CpuExceptionInfo,
|
||||
) -> core::result::Result<(), ()> {
|
||||
handle_page_fault_from_vm_space(vm_space, &trap_info.try_into().unwrap())
|
||||
}
|
||||
|
||||
let mut free_regions = BTreeMap::new();
|
||||
let root_region = FreeRegion::new(ROOT_VMAR_LOWEST_ADDR..ROOT_VMAR_CAP_ADDR);
|
||||
free_regions.insert(root_region.start(), root_region);
|
||||
@ -668,7 +661,9 @@ impl Vmar_ {
|
||||
let vm_space = if let Some(parent) = parent {
|
||||
parent.vm_space().clone()
|
||||
} else {
|
||||
Arc::new(self.vm_space().fork_copy_on_write())
|
||||
let new_space = VmSpace::new();
|
||||
new_space.register_page_fault_handler(handle_page_fault_wrapper);
|
||||
Arc::new(new_space)
|
||||
};
|
||||
Vmar_::new(vmar_inner, vm_space, self.base, self.size, parent)
|
||||
};
|
||||
@ -694,18 +689,43 @@ impl Vmar_ {
|
||||
}
|
||||
|
||||
// Clone mappings.
|
||||
for (vm_mapping_base, vm_mapping) in &inner.vm_mappings {
|
||||
let new_mapping = Arc::new(vm_mapping.new_fork(&new_vmar_)?);
|
||||
new_vmar_
|
||||
.inner
|
||||
.lock()
|
||||
.vm_mappings
|
||||
.insert(*vm_mapping_base, new_mapping);
|
||||
{
|
||||
let new_vmspace = new_vmar_.vm_space();
|
||||
let range = self.base..(self.base + self.size);
|
||||
let mut new_cursor = new_vmspace.cursor_mut(&range).unwrap();
|
||||
let cur_vmspace = self.vm_space();
|
||||
let mut cur_cursor = cur_vmspace.cursor_mut(&range).unwrap();
|
||||
for (vm_mapping_base, vm_mapping) in &inner.vm_mappings {
|
||||
// Clone the `VmMapping` to the new VMAR.
|
||||
let new_mapping = Arc::new(vm_mapping.new_fork(&new_vmar_)?);
|
||||
new_vmar_
|
||||
.inner
|
||||
.lock()
|
||||
.vm_mappings
|
||||
.insert(*vm_mapping_base, new_mapping);
|
||||
|
||||
// Protect the mapping and copy to the new page table for COW.
|
||||
cur_cursor.jump(*vm_mapping_base).unwrap();
|
||||
new_cursor.jump(*vm_mapping_base).unwrap();
|
||||
let mut op = |page: &mut PageProperty| {
|
||||
page.flags -= PageFlags::W;
|
||||
};
|
||||
new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(new_vmar_)
|
||||
}
|
||||
}
|
||||
|
||||
/// This is for fallible user space write handling.
|
||||
fn handle_page_fault_wrapper(
|
||||
vm_space: &VmSpace,
|
||||
trap_info: &CpuExceptionInfo,
|
||||
) -> core::result::Result<(), ()> {
|
||||
handle_page_fault_from_vm_space(vm_space, &trap_info.try_into().unwrap())
|
||||
}
|
||||
|
||||
impl<R> Vmar<R> {
|
||||
/// The base address, i.e., the offset relative to the root VMAR.
|
||||
///
|
||||
|
@ -734,26 +734,97 @@ where
|
||||
None
|
||||
}
|
||||
|
||||
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
|
||||
&self.0.preempt_guard
|
||||
/// Copies the mapping from the given cursor to the current cursor.
|
||||
///
|
||||
/// All the mappings in the current cursor's range must be empty. The
|
||||
/// function allows the source cursor to operate on the mapping before
|
||||
/// the copy happens. So it is equivalent to protect then duplicate.
|
||||
/// Only the mapping is copied, the mapped pages are not copied.
|
||||
///
|
||||
/// It can only copy tracked mappings since we consider the untracked
|
||||
/// mappings not useful to be copied.
|
||||
///
|
||||
/// After the operation, both cursors will advance by the specified length.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller should ensure that
|
||||
/// - the range being copied with the operation does not affect kernel's
|
||||
/// memory safety.
|
||||
/// - both of the cursors are in tracked mappings.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - either one of the range to be copied is out of the range where any
|
||||
/// of the cursor is required to operate;
|
||||
/// - either one of the specified virtual address ranges only covers a
|
||||
/// part of a page.
|
||||
/// - the current cursor's range contains mapped pages.
|
||||
pub unsafe fn copy_from(
|
||||
&mut self,
|
||||
src: &mut Self,
|
||||
len: usize,
|
||||
op: &mut impl FnMut(&mut PageProperty),
|
||||
) {
|
||||
assert!(len % page_size::<C>(1) == 0);
|
||||
let this_end = self.0.va + len;
|
||||
assert!(this_end <= self.0.barrier_va.end);
|
||||
let src_end = src.0.va + len;
|
||||
assert!(src_end <= src.0.barrier_va.end);
|
||||
|
||||
while self.0.va < this_end && src.0.va < src_end {
|
||||
let cur_pte = src.0.read_cur_pte();
|
||||
if !cur_pte.is_present() {
|
||||
src.0.move_forward();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Go down if it's not a last node.
|
||||
if !cur_pte.is_last(src.0.level) {
|
||||
src.0.level_down();
|
||||
|
||||
// We have got down a level. If there's no mapped PTEs in
|
||||
// the current node, we can go back and skip to save time.
|
||||
if src.0.guards[(src.0.level - 1) as usize]
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.nr_children()
|
||||
== 0
|
||||
{
|
||||
src.0.level_up();
|
||||
src.0.move_forward();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Do protection.
|
||||
let mut pte_prop = cur_pte.prop();
|
||||
op(&mut pte_prop);
|
||||
|
||||
let idx = src.0.cur_idx();
|
||||
src.cur_node_mut().protect(idx, pte_prop);
|
||||
|
||||
// Do copy.
|
||||
let child = src.cur_node_mut().child(idx, true);
|
||||
let Child::<E, C>::Page(page, prop) = child else {
|
||||
panic!("Unexpected child for source mapping: {:#?}", child);
|
||||
};
|
||||
self.jump(src.0.va).unwrap();
|
||||
let mapped_page_size = page.size();
|
||||
let original = self.map(page, prop);
|
||||
debug_assert!(original.is_none());
|
||||
|
||||
// Only move the source cursor forward since `Self::map` will do it.
|
||||
// This assertion is to ensure that they move by the same length.
|
||||
debug_assert_eq!(mapped_page_size, page_size::<C>(src.0.level));
|
||||
src.0.move_forward();
|
||||
}
|
||||
}
|
||||
|
||||
/// Consumes itself and leak the root guard for the caller if it locked the root level.
|
||||
///
|
||||
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
|
||||
pub(super) fn leak_root_guard(mut self) -> Option<PageTableNode<E, C>> {
|
||||
if self.0.guard_level != C::NR_LEVELS {
|
||||
return None;
|
||||
}
|
||||
|
||||
while self.0.level < C::NR_LEVELS {
|
||||
self.0.level_up();
|
||||
}
|
||||
|
||||
self.0.guards[(C::NR_LEVELS - 1) as usize].take()
|
||||
|
||||
// Ok to drop the cursor here because we ensure not to access the page table if the current
|
||||
// level is the root level when running the dropping method.
|
||||
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
|
||||
&self.0.preempt_guard
|
||||
}
|
||||
|
||||
/// Goes down a level assuming the current slot is absent.
|
||||
|
@ -92,53 +92,29 @@ impl PageTable<UserMode> {
|
||||
self.root.activate();
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a cloned new page table.
|
||||
///
|
||||
/// This method takes a mutable cursor to the old page table that locks the
|
||||
/// entire virtual address range. The caller may implement the copy-on-write
|
||||
/// mechanism by first protecting the old page table and then clone it using
|
||||
/// this method.
|
||||
///
|
||||
/// TODO: We may consider making the page table itself copy-on-write.
|
||||
pub fn clone_with(
|
||||
&self,
|
||||
cursor: CursorMut<'_, UserMode, PageTableEntry, PagingConsts>,
|
||||
) -> Self {
|
||||
let root_node = cursor.leak_root_guard().unwrap();
|
||||
|
||||
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
|
||||
let new_root_node = unsafe {
|
||||
root_node.make_copy(
|
||||
0..NR_PTES_PER_NODE / 2,
|
||||
NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE,
|
||||
)
|
||||
};
|
||||
|
||||
PageTable::<UserMode> {
|
||||
root: new_root_node.into_raw(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PageTable<KernelMode> {
|
||||
/// Create a new user page table.
|
||||
///
|
||||
/// This should be the only way to create the first user page table, that is
|
||||
/// to fork the kernel page table with all the kernel mappings shared.
|
||||
///
|
||||
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating
|
||||
/// other child page tables.
|
||||
/// This should be the only way to create the user page table, that is to
|
||||
/// duplicate the kernel page table with all the kernel mappings shared.
|
||||
pub fn create_user_page_table(&self) -> PageTable<UserMode> {
|
||||
let root_node = self.root.clone_shallow().lock();
|
||||
let mut new_node = PageTableNode::alloc(PagingConsts::NR_LEVELS);
|
||||
|
||||
// Make a shallow copy of the root node in the kernel space range.
|
||||
// The user space range is not copied.
|
||||
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
|
||||
let new_root_node =
|
||||
unsafe { root_node.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) };
|
||||
for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE {
|
||||
let child = root_node.child(i, /* meaningless */ true);
|
||||
if !child.is_none() {
|
||||
let _ = new_node.replace_child(i, child, /* meaningless */ true);
|
||||
}
|
||||
}
|
||||
|
||||
PageTable::<UserMode> {
|
||||
root: new_root_node.into_raw(),
|
||||
root: new_node.into_raw(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -25,9 +25,7 @@
|
||||
//! the initialization of the entity that the PTE points to. This is taken care in this module.
|
||||
//!
|
||||
|
||||
use core::{
|
||||
fmt, marker::PhantomData, mem::ManuallyDrop, ops::Range, panic, sync::atomic::Ordering,
|
||||
};
|
||||
use core::{fmt, marker::PhantomData, mem::ManuallyDrop, panic, sync::atomic::Ordering};
|
||||
|
||||
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
|
||||
use crate::{
|
||||
@ -374,74 +372,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes a copy of the page table node.
|
||||
///
|
||||
/// This function allows you to control about the way to copy the children.
|
||||
/// For indexes in `deep`, the children are deep copied and this function will be recursively called.
|
||||
/// For indexes in `shallow`, the children are shallow copied as new references.
|
||||
///
|
||||
/// You cannot shallow copy a child that is mapped to a page. Deep copying a page child will not
|
||||
/// copy the mapped page but will copy the handle to the page.
|
||||
///
|
||||
/// You cannot either deep copy or shallow copy a child that is mapped to an untracked page.
|
||||
///
|
||||
/// The ranges must be disjoint.
|
||||
pub(super) unsafe fn make_copy(&self, deep: Range<usize>, shallow: Range<usize>) -> Self {
|
||||
debug_assert!(deep.end <= nr_subpage_per_huge::<C>());
|
||||
debug_assert!(shallow.end <= nr_subpage_per_huge::<C>());
|
||||
debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end);
|
||||
|
||||
let mut new_pt = Self::alloc(self.level());
|
||||
let mut copied_child_count = self.nr_children();
|
||||
for i in deep {
|
||||
if copied_child_count == 0 {
|
||||
return new_pt;
|
||||
}
|
||||
match self.child(i, true) {
|
||||
Child::PageTable(pt) => {
|
||||
let guard = pt.clone_shallow().lock();
|
||||
let new_child = guard.make_copy(0..nr_subpage_per_huge::<C>(), 0..0);
|
||||
let old = new_pt.replace_child(i, Child::PageTable(new_child.into_raw()), true);
|
||||
debug_assert!(old.is_none());
|
||||
copied_child_count -= 1;
|
||||
}
|
||||
Child::Page(page, prop) => {
|
||||
let old = new_pt.replace_child(i, Child::Page(page.clone(), prop), true);
|
||||
debug_assert!(old.is_none());
|
||||
copied_child_count -= 1;
|
||||
}
|
||||
Child::None => {}
|
||||
Child::Untracked(_, _) => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in shallow {
|
||||
if copied_child_count == 0 {
|
||||
return new_pt;
|
||||
}
|
||||
debug_assert_eq!(self.level(), C::NR_LEVELS);
|
||||
match self.child(i, /*meaningless*/ true) {
|
||||
Child::PageTable(pt) => {
|
||||
let old = new_pt.replace_child(
|
||||
i,
|
||||
Child::PageTable(pt.clone_shallow()),
|
||||
/*meaningless*/ true,
|
||||
);
|
||||
debug_assert!(old.is_none());
|
||||
copied_child_count -= 1;
|
||||
}
|
||||
Child::None => {}
|
||||
Child::Page(_, _) | Child::Untracked(_, _) => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_pt
|
||||
}
|
||||
|
||||
/// Splits the untracked huge page mapped at `idx` to smaller pages.
|
||||
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
|
||||
// These should be ensured by the cursor.
|
||||
|
@ -81,6 +81,10 @@ fn test_untracked_map_unmap() {
|
||||
|
||||
#[ktest]
|
||||
fn test_user_copy_on_write() {
|
||||
fn prot_op(prop: &mut PageProperty) {
|
||||
prop.flags -= PageFlags::W;
|
||||
}
|
||||
|
||||
let pt = PageTable::<UserMode>::empty();
|
||||
let from = PAGE_SIZE..PAGE_SIZE * 2;
|
||||
let page = allocator::alloc_single(FrameMeta::default()).unwrap();
|
||||
@ -96,7 +100,14 @@ fn test_user_copy_on_write() {
|
||||
unsafe { pt.cursor_mut(&from).unwrap().map(page.clone().into(), prop) };
|
||||
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
|
||||
let child_pt = pt.clone_with(pt.cursor_mut(&(0..MAX_USERSPACE_VADDR)).unwrap());
|
||||
let child_pt = {
|
||||
let child_pt = PageTable::<UserMode>::empty();
|
||||
let range = 0..MAX_USERSPACE_VADDR;
|
||||
let mut child_cursor = child_pt.cursor_mut(&range).unwrap();
|
||||
let mut parent_cursor = pt.cursor_mut(&range).unwrap();
|
||||
unsafe { child_cursor.copy_from(&mut parent_cursor, range.len(), &mut prot_op) };
|
||||
child_pt
|
||||
};
|
||||
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
assert!(matches!(
|
||||
@ -106,7 +117,14 @@ fn test_user_copy_on_write() {
|
||||
assert!(pt.query(from.start + 10).is_none());
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
|
||||
let sibling_pt = pt.clone_with(pt.cursor_mut(&(0..MAX_USERSPACE_VADDR)).unwrap());
|
||||
let sibling_pt = {
|
||||
let sibling_pt = PageTable::<UserMode>::empty();
|
||||
let range = 0..MAX_USERSPACE_VADDR;
|
||||
let mut sibling_cursor = sibling_pt.cursor_mut(&range).unwrap();
|
||||
let mut parent_cursor = pt.cursor_mut(&range).unwrap();
|
||||
unsafe { sibling_cursor.copy_from(&mut parent_cursor, range.len(), &mut prot_op) };
|
||||
sibling_pt
|
||||
};
|
||||
assert!(sibling_pt.query(from.start + 10).is_none());
|
||||
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
|
||||
drop(pt);
|
||||
|
@ -22,7 +22,7 @@ use super::{
|
||||
kspace::KERNEL_PAGE_TABLE,
|
||||
page::DynPage,
|
||||
page_table::{PageTable, UserMode},
|
||||
PageFlags, PageProperty, VmReader, VmWriter, PAGE_SIZE,
|
||||
PageProperty, VmReader, VmWriter, PAGE_SIZE,
|
||||
};
|
||||
use crate::{
|
||||
arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts},
|
||||
@ -173,48 +173,6 @@ impl VmSpace {
|
||||
self.page_fault_handler.call_once(|| func);
|
||||
}
|
||||
|
||||
/// Forks a new VM space with copy-on-write semantics.
|
||||
///
|
||||
/// Both the parent and the newly forked VM space will be marked as
|
||||
/// read-only. And both the VM space will take handles to the same
|
||||
/// physical memory pages.
|
||||
pub fn fork_copy_on_write(&self) -> Self {
|
||||
// Protect the parent VM space as read-only.
|
||||
let end = MAX_USERSPACE_VADDR;
|
||||
let mut cursor = self.cursor_mut(&(0..end)).unwrap();
|
||||
let mut op = |prop: &mut PageProperty| {
|
||||
prop.flags -= PageFlags::W;
|
||||
};
|
||||
|
||||
cursor.protect(end, &mut op);
|
||||
|
||||
let page_fault_handler = {
|
||||
let new_handler = Once::new();
|
||||
if let Some(handler) = self.page_fault_handler.get() {
|
||||
new_handler.call_once(|| *handler);
|
||||
}
|
||||
new_handler
|
||||
};
|
||||
|
||||
let CursorMut {
|
||||
pt_cursor,
|
||||
activation_lock,
|
||||
..
|
||||
} = cursor;
|
||||
|
||||
let new_pt = self.pt.clone_with(pt_cursor);
|
||||
|
||||
// Release the activation lock after the page table is cloned to
|
||||
// prevent modification to the parent page table while cloning.
|
||||
drop(activation_lock);
|
||||
|
||||
Self {
|
||||
pt: new_pt,
|
||||
page_fault_handler,
|
||||
activation_lock: RwLock::new(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a reader to read data from the user space of the current task.
|
||||
///
|
||||
/// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task
|
||||
@ -433,6 +391,44 @@ impl CursorMut<'_, '_> {
|
||||
self.dispatch_tlb_flush();
|
||||
}
|
||||
|
||||
/// Copies the mapping from the given cursor to the current cursor.
|
||||
///
|
||||
/// All the mappings in the current cursor's range must be empty. The
|
||||
/// function allows the source cursor to operate on the mapping before
|
||||
/// the copy happens. So it is equivalent to protect then duplicate.
|
||||
/// Only the mapping is copied, the mapped pages are not copied.
|
||||
///
|
||||
/// After the operation, both cursors will advance by the specified length.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This function will panic if:
|
||||
/// - either one of the range to be copied is out of the range where any
|
||||
/// of the cursor is required to operate;
|
||||
/// - either one of the specified virtual address ranges only covers a
|
||||
/// part of a page.
|
||||
/// - the current cursor's range contains mapped pages.
|
||||
pub fn copy_from(
|
||||
&mut self,
|
||||
src: &mut Self,
|
||||
len: usize,
|
||||
op: &mut impl FnMut(&mut PageProperty),
|
||||
) {
|
||||
let va = src.virt_addr();
|
||||
|
||||
// SAFETY: Operations on user memory spaces are safe if it doesn't
|
||||
// involve dropping any pages.
|
||||
unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) };
|
||||
|
||||
if len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE {
|
||||
src.issue_tlb_flush(TlbFlushOp::All, None);
|
||||
} else {
|
||||
src.issue_tlb_flush(TlbFlushOp::Range(va..va + len), None);
|
||||
}
|
||||
|
||||
src.dispatch_tlb_flush();
|
||||
}
|
||||
|
||||
fn issue_tlb_flush(&self, op: TlbFlushOp, drop_after_flush: Option<DynPage>) {
|
||||
let request = TlbFlushRequest {
|
||||
op,
|
||||
|
Loading…
x
Reference in New Issue
Block a user