Improve the VmSpace forking API

This commit is contained in:
Zhang Junyang 2024-09-23 22:16:09 +08:00 committed by Tate, Hongliang Tian
parent d4036d1e9d
commit ac6d9256ef
6 changed files with 197 additions and 186 deletions

View File

@ -16,7 +16,7 @@ use align_ext::AlignExt;
use aster_rights::Rights;
use ostd::{
cpu::CpuExceptionInfo,
mm::{VmSpace, MAX_USERSPACE_VADDR},
mm::{PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR},
};
use self::{
@ -220,13 +220,6 @@ impl Vmar_ {
}
fn new_root() -> Arc<Self> {
fn handle_page_fault_wrapper(
vm_space: &VmSpace,
trap_info: &CpuExceptionInfo,
) -> core::result::Result<(), ()> {
handle_page_fault_from_vm_space(vm_space, &trap_info.try_into().unwrap())
}
let mut free_regions = BTreeMap::new();
let root_region = FreeRegion::new(ROOT_VMAR_LOWEST_ADDR..ROOT_VMAR_CAP_ADDR);
free_regions.insert(root_region.start(), root_region);
@ -668,7 +661,9 @@ impl Vmar_ {
let vm_space = if let Some(parent) = parent {
parent.vm_space().clone()
} else {
Arc::new(self.vm_space().fork_copy_on_write())
let new_space = VmSpace::new();
new_space.register_page_fault_handler(handle_page_fault_wrapper);
Arc::new(new_space)
};
Vmar_::new(vmar_inner, vm_space, self.base, self.size, parent)
};
@ -694,18 +689,43 @@ impl Vmar_ {
}
// Clone mappings.
for (vm_mapping_base, vm_mapping) in &inner.vm_mappings {
let new_mapping = Arc::new(vm_mapping.new_fork(&new_vmar_)?);
new_vmar_
.inner
.lock()
.vm_mappings
.insert(*vm_mapping_base, new_mapping);
{
let new_vmspace = new_vmar_.vm_space();
let range = self.base..(self.base + self.size);
let mut new_cursor = new_vmspace.cursor_mut(&range).unwrap();
let cur_vmspace = self.vm_space();
let mut cur_cursor = cur_vmspace.cursor_mut(&range).unwrap();
for (vm_mapping_base, vm_mapping) in &inner.vm_mappings {
// Clone the `VmMapping` to the new VMAR.
let new_mapping = Arc::new(vm_mapping.new_fork(&new_vmar_)?);
new_vmar_
.inner
.lock()
.vm_mappings
.insert(*vm_mapping_base, new_mapping);
// Protect the mapping and copy to the new page table for COW.
cur_cursor.jump(*vm_mapping_base).unwrap();
new_cursor.jump(*vm_mapping_base).unwrap();
let mut op = |page: &mut PageProperty| {
page.flags -= PageFlags::W;
};
new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op);
}
}
Ok(new_vmar_)
}
}
/// This is for fallible user space write handling.
fn handle_page_fault_wrapper(
vm_space: &VmSpace,
trap_info: &CpuExceptionInfo,
) -> core::result::Result<(), ()> {
handle_page_fault_from_vm_space(vm_space, &trap_info.try_into().unwrap())
}
impl<R> Vmar<R> {
/// The base address, i.e., the offset relative to the root VMAR.
///

View File

@ -734,26 +734,97 @@ where
None
}
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
&self.0.preempt_guard
/// Copies the mapping from the given cursor to the current cursor.
///
/// All the mappings in the current cursor's range must be empty. The
/// function allows the source cursor to operate on the mapping before
/// the copy happens. So it is equivalent to protect then duplicate.
/// Only the mapping is copied, the mapped pages are not copied.
///
/// It can only copy tracked mappings since we consider the untracked
/// mappings not useful to be copied.
///
/// After the operation, both cursors will advance by the specified length.
///
/// # Safety
///
/// The caller should ensure that
/// - the range being copied with the operation does not affect kernel's
/// memory safety.
/// - both of the cursors are in tracked mappings.
///
/// # Panics
///
/// This function will panic if:
/// - either one of the range to be copied is out of the range where any
/// of the cursor is required to operate;
/// - either one of the specified virtual address ranges only covers a
/// part of a page.
/// - the current cursor's range contains mapped pages.
pub unsafe fn copy_from(
&mut self,
src: &mut Self,
len: usize,
op: &mut impl FnMut(&mut PageProperty),
) {
assert!(len % page_size::<C>(1) == 0);
let this_end = self.0.va + len;
assert!(this_end <= self.0.barrier_va.end);
let src_end = src.0.va + len;
assert!(src_end <= src.0.barrier_va.end);
while self.0.va < this_end && src.0.va < src_end {
let cur_pte = src.0.read_cur_pte();
if !cur_pte.is_present() {
src.0.move_forward();
continue;
}
// Go down if it's not a last node.
if !cur_pte.is_last(src.0.level) {
src.0.level_down();
// We have got down a level. If there's no mapped PTEs in
// the current node, we can go back and skip to save time.
if src.0.guards[(src.0.level - 1) as usize]
.as_ref()
.unwrap()
.nr_children()
== 0
{
src.0.level_up();
src.0.move_forward();
}
continue;
}
// Do protection.
let mut pte_prop = cur_pte.prop();
op(&mut pte_prop);
let idx = src.0.cur_idx();
src.cur_node_mut().protect(idx, pte_prop);
// Do copy.
let child = src.cur_node_mut().child(idx, true);
let Child::<E, C>::Page(page, prop) = child else {
panic!("Unexpected child for source mapping: {:#?}", child);
};
self.jump(src.0.va).unwrap();
let mapped_page_size = page.size();
let original = self.map(page, prop);
debug_assert!(original.is_none());
// Only move the source cursor forward since `Self::map` will do it.
// This assertion is to ensure that they move by the same length.
debug_assert_eq!(mapped_page_size, page_size::<C>(src.0.level));
src.0.move_forward();
}
}
/// Consumes itself and leak the root guard for the caller if it locked the root level.
///
/// It is useful when the caller wants to keep the root guard while the cursor should be dropped.
pub(super) fn leak_root_guard(mut self) -> Option<PageTableNode<E, C>> {
if self.0.guard_level != C::NR_LEVELS {
return None;
}
while self.0.level < C::NR_LEVELS {
self.0.level_up();
}
self.0.guards[(C::NR_LEVELS - 1) as usize].take()
// Ok to drop the cursor here because we ensure not to access the page table if the current
// level is the root level when running the dropping method.
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
&self.0.preempt_guard
}
/// Goes down a level assuming the current slot is absent.

View File

@ -92,53 +92,29 @@ impl PageTable<UserMode> {
self.root.activate();
}
}
/// Create a cloned new page table.
///
/// This method takes a mutable cursor to the old page table that locks the
/// entire virtual address range. The caller may implement the copy-on-write
/// mechanism by first protecting the old page table and then clone it using
/// this method.
///
/// TODO: We may consider making the page table itself copy-on-write.
pub fn clone_with(
&self,
cursor: CursorMut<'_, UserMode, PageTableEntry, PagingConsts>,
) -> Self {
let root_node = cursor.leak_root_guard().unwrap();
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
let new_root_node = unsafe {
root_node.make_copy(
0..NR_PTES_PER_NODE / 2,
NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE,
)
};
PageTable::<UserMode> {
root: new_root_node.into_raw(),
_phantom: PhantomData,
}
}
}
impl PageTable<KernelMode> {
/// Create a new user page table.
///
/// This should be the only way to create the first user page table, that is
/// to fork the kernel page table with all the kernel mappings shared.
///
/// Then, one can use a user page table to call [`fork_copy_on_write`], creating
/// other child page tables.
/// This should be the only way to create the user page table, that is to
/// duplicate the kernel page table with all the kernel mappings shared.
pub fn create_user_page_table(&self) -> PageTable<UserMode> {
let root_node = self.root.clone_shallow().lock();
let mut new_node = PageTableNode::alloc(PagingConsts::NR_LEVELS);
// Make a shallow copy of the root node in the kernel space range.
// The user space range is not copied.
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
let new_root_node =
unsafe { root_node.make_copy(0..0, NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE) };
for i in NR_PTES_PER_NODE / 2..NR_PTES_PER_NODE {
let child = root_node.child(i, /* meaningless */ true);
if !child.is_none() {
let _ = new_node.replace_child(i, child, /* meaningless */ true);
}
}
PageTable::<UserMode> {
root: new_root_node.into_raw(),
root: new_node.into_raw(),
_phantom: PhantomData,
}
}

View File

@ -25,9 +25,7 @@
//! the initialization of the entity that the PTE points to. This is taken care in this module.
//!
use core::{
fmt, marker::PhantomData, mem::ManuallyDrop, ops::Range, panic, sync::atomic::Ordering,
};
use core::{fmt, marker::PhantomData, mem::ManuallyDrop, panic, sync::atomic::Ordering};
use super::{nr_subpage_per_huge, page_size, PageTableEntryTrait};
use crate::{
@ -374,74 +372,6 @@ where
}
}
/// Makes a copy of the page table node.
///
/// This function allows you to control about the way to copy the children.
/// For indexes in `deep`, the children are deep copied and this function will be recursively called.
/// For indexes in `shallow`, the children are shallow copied as new references.
///
/// You cannot shallow copy a child that is mapped to a page. Deep copying a page child will not
/// copy the mapped page but will copy the handle to the page.
///
/// You cannot either deep copy or shallow copy a child that is mapped to an untracked page.
///
/// The ranges must be disjoint.
pub(super) unsafe fn make_copy(&self, deep: Range<usize>, shallow: Range<usize>) -> Self {
debug_assert!(deep.end <= nr_subpage_per_huge::<C>());
debug_assert!(shallow.end <= nr_subpage_per_huge::<C>());
debug_assert!(deep.end <= shallow.start || deep.start >= shallow.end);
let mut new_pt = Self::alloc(self.level());
let mut copied_child_count = self.nr_children();
for i in deep {
if copied_child_count == 0 {
return new_pt;
}
match self.child(i, true) {
Child::PageTable(pt) => {
let guard = pt.clone_shallow().lock();
let new_child = guard.make_copy(0..nr_subpage_per_huge::<C>(), 0..0);
let old = new_pt.replace_child(i, Child::PageTable(new_child.into_raw()), true);
debug_assert!(old.is_none());
copied_child_count -= 1;
}
Child::Page(page, prop) => {
let old = new_pt.replace_child(i, Child::Page(page.clone(), prop), true);
debug_assert!(old.is_none());
copied_child_count -= 1;
}
Child::None => {}
Child::Untracked(_, _) => {
unreachable!();
}
}
}
for i in shallow {
if copied_child_count == 0 {
return new_pt;
}
debug_assert_eq!(self.level(), C::NR_LEVELS);
match self.child(i, /*meaningless*/ true) {
Child::PageTable(pt) => {
let old = new_pt.replace_child(
i,
Child::PageTable(pt.clone_shallow()),
/*meaningless*/ true,
);
debug_assert!(old.is_none());
copied_child_count -= 1;
}
Child::None => {}
Child::Page(_, _) | Child::Untracked(_, _) => {
unreachable!();
}
}
}
new_pt
}
/// Splits the untracked huge page mapped at `idx` to smaller pages.
pub(super) fn split_untracked_huge(&mut self, idx: usize) {
// These should be ensured by the cursor.

View File

@ -81,6 +81,10 @@ fn test_untracked_map_unmap() {
#[ktest]
fn test_user_copy_on_write() {
fn prot_op(prop: &mut PageProperty) {
prop.flags -= PageFlags::W;
}
let pt = PageTable::<UserMode>::empty();
let from = PAGE_SIZE..PAGE_SIZE * 2;
let page = allocator::alloc_single(FrameMeta::default()).unwrap();
@ -96,7 +100,14 @@ fn test_user_copy_on_write() {
unsafe { pt.cursor_mut(&from).unwrap().map(page.clone().into(), prop) };
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
let child_pt = pt.clone_with(pt.cursor_mut(&(0..MAX_USERSPACE_VADDR)).unwrap());
let child_pt = {
let child_pt = PageTable::<UserMode>::empty();
let range = 0..MAX_USERSPACE_VADDR;
let mut child_cursor = child_pt.cursor_mut(&range).unwrap();
let mut parent_cursor = pt.cursor_mut(&range).unwrap();
unsafe { child_cursor.copy_from(&mut parent_cursor, range.len(), &mut prot_op) };
child_pt
};
assert_eq!(pt.query(from.start + 10).unwrap().0, start_paddr + 10);
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
assert!(matches!(
@ -106,7 +117,14 @@ fn test_user_copy_on_write() {
assert!(pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
let sibling_pt = pt.clone_with(pt.cursor_mut(&(0..MAX_USERSPACE_VADDR)).unwrap());
let sibling_pt = {
let sibling_pt = PageTable::<UserMode>::empty();
let range = 0..MAX_USERSPACE_VADDR;
let mut sibling_cursor = sibling_pt.cursor_mut(&range).unwrap();
let mut parent_cursor = pt.cursor_mut(&range).unwrap();
unsafe { sibling_cursor.copy_from(&mut parent_cursor, range.len(), &mut prot_op) };
sibling_pt
};
assert!(sibling_pt.query(from.start + 10).is_none());
assert_eq!(child_pt.query(from.start + 10).unwrap().0, start_paddr + 10);
drop(pt);

View File

@ -22,7 +22,7 @@ use super::{
kspace::KERNEL_PAGE_TABLE,
page::DynPage,
page_table::{PageTable, UserMode},
PageFlags, PageProperty, VmReader, VmWriter, PAGE_SIZE,
PageProperty, VmReader, VmWriter, PAGE_SIZE,
};
use crate::{
arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts},
@ -173,48 +173,6 @@ impl VmSpace {
self.page_fault_handler.call_once(|| func);
}
/// Forks a new VM space with copy-on-write semantics.
///
/// Both the parent and the newly forked VM space will be marked as
/// read-only. And both the VM space will take handles to the same
/// physical memory pages.
pub fn fork_copy_on_write(&self) -> Self {
// Protect the parent VM space as read-only.
let end = MAX_USERSPACE_VADDR;
let mut cursor = self.cursor_mut(&(0..end)).unwrap();
let mut op = |prop: &mut PageProperty| {
prop.flags -= PageFlags::W;
};
cursor.protect(end, &mut op);
let page_fault_handler = {
let new_handler = Once::new();
if let Some(handler) = self.page_fault_handler.get() {
new_handler.call_once(|| *handler);
}
new_handler
};
let CursorMut {
pt_cursor,
activation_lock,
..
} = cursor;
let new_pt = self.pt.clone_with(pt_cursor);
// Release the activation lock after the page table is cloned to
// prevent modification to the parent page table while cloning.
drop(activation_lock);
Self {
pt: new_pt,
page_fault_handler,
activation_lock: RwLock::new(()),
}
}
/// Creates a reader to read data from the user space of the current task.
///
/// Returns `Err` if this `VmSpace` is not belonged to the user space of the current task
@ -433,6 +391,44 @@ impl CursorMut<'_, '_> {
self.dispatch_tlb_flush();
}
/// Copies the mapping from the given cursor to the current cursor.
///
/// All the mappings in the current cursor's range must be empty. The
/// function allows the source cursor to operate on the mapping before
/// the copy happens. So it is equivalent to protect then duplicate.
/// Only the mapping is copied, the mapped pages are not copied.
///
/// After the operation, both cursors will advance by the specified length.
///
/// # Panics
///
/// This function will panic if:
/// - either one of the range to be copied is out of the range where any
/// of the cursor is required to operate;
/// - either one of the specified virtual address ranges only covers a
/// part of a page.
/// - the current cursor's range contains mapped pages.
pub fn copy_from(
&mut self,
src: &mut Self,
len: usize,
op: &mut impl FnMut(&mut PageProperty),
) {
let va = src.virt_addr();
// SAFETY: Operations on user memory spaces are safe if it doesn't
// involve dropping any pages.
unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) };
if len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE {
src.issue_tlb_flush(TlbFlushOp::All, None);
} else {
src.issue_tlb_flush(TlbFlushOp::Range(va..va + len), None);
}
src.dispatch_tlb_flush();
}
fn issue_tlb_flush(&self, op: TlbFlushOp, drop_after_flush: Option<DynPage>) {
let request = TlbFlushRequest {
op,