diff --git a/Cargo.lock b/Cargo.lock index 0c7aed129..f8aa75572 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,6 +128,7 @@ dependencies = [ "volatile", "x86", "x86_64", + "xarray", ] [[package]] @@ -1266,6 +1267,12 @@ dependencies = [ "serde", ] +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + [[package]] name = "smoltcp" version = "0.9.1" @@ -1650,6 +1657,14 @@ dependencies = [ "volatile", ] +[[package]] +name = "xarray" +version = "0.1.0" +source = "git+https://github.com/asterinas/xarray?rev=72a4067#72a4067a65e8f94cfc193f5f19ebc981c1de9de7" +dependencies = [ + "smallvec", +] + [[package]] name = "xmas-elf" version = "0.8.0" diff --git a/framework/aster-frame/Cargo.toml b/framework/aster-frame/Cargo.toml index 944168a03..d7a757f06 100644 --- a/framework/aster-frame/Cargo.toml +++ b/framework/aster-frame/Cargo.toml @@ -16,6 +16,7 @@ buddy_system_allocator = "0.9.0" cfg-if = "1.0" gimli = { version = "0.28", default-features = false, features = ["read-core"] } inherit-methods-macro = { git = "https://github.com/asterinas/inherit-methods-macro", rev = "98f7e3e" } +xarray = { git = "https://github.com/asterinas/xarray", rev = "72a4067" } int-to-c-enum = { path = "../../kernel/libs/int-to-c-enum" } # instrusive-collections of version 0.9.6 fails to compile with current rust toolchain, # So we set a fixed version 0.9.5 for this crate diff --git a/framework/aster-frame/src/collections/mod.rs b/framework/aster-frame/src/collections/mod.rs new file mode 100644 index 000000000..8cd48ebb6 --- /dev/null +++ b/framework/aster-frame/src/collections/mod.rs @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module provides some advanced collections. +pub mod xarray; diff --git a/framework/aster-frame/src/collections/xarray.rs b/framework/aster-frame/src/collections/xarray.rs new file mode 100644 index 000000000..07f11ef43 --- /dev/null +++ b/framework/aster-frame/src/collections/xarray.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module introduces the xarray crate and provides relevant support and interfaces for `XArray`. +extern crate xarray as xarray_crate; + +use alloc::sync::Arc; +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref}; + +use xarray_crate::ItemEntry; +pub use xarray_crate::{Cursor, CursorMut, XArray, XMark}; + +use crate::vm::VmFrame; + +/// `VmFrameRef` is a struct that can work as `&'a VmFrame`. +pub struct VmFrameRef<'a> { + inner: ManuallyDrop, + _marker: PhantomData<&'a VmFrame>, +} + +impl<'a> Deref for VmFrameRef<'a> { + type Target = VmFrame; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +// SAFETY: `VmFrame` is essentially an `Arc` smart pointer that points to a location which is aligned to 4, +// meeting the requirements of the `ItemEntry` for `XArray`. +unsafe impl ItemEntry for VmFrame { + type Ref<'a> = VmFrameRef<'a> where Self: 'a; + + fn into_raw(self) -> *const () { + let ptr = Arc::as_ptr(&self.frame_index); + let _ = ManuallyDrop::new(self); + ptr.cast() + } + + unsafe fn from_raw(raw: *const ()) -> Self { + Self { + frame_index: Arc::from_raw(raw.cast()), + } + } + + unsafe fn raw_as_ref<'a>(raw: *const ()) -> Self::Ref<'a> { + VmFrameRef { + inner: ManuallyDrop::new(VmFrame::from_raw(raw.cast())), + _marker: PhantomData, + } + } +} diff --git a/framework/aster-frame/src/lib.rs b/framework/aster-frame/src/lib.rs index 18334ada1..8a7cb8102 100644 --- a/framework/aster-frame/src/lib.rs +++ b/framework/aster-frame/src/lib.rs @@ -28,6 +28,7 @@ extern crate static_assertions; pub mod arch; pub mod boot; pub mod bus; +pub mod collections; pub mod console; pub mod cpu; mod error; diff --git a/kernel/aster-nix/src/vm/vmar/mod.rs b/kernel/aster-nix/src/vm/vmar/mod.rs index 2cfd1d3e4..d494b293e 100644 --- a/kernel/aster-nix/src/vm/vmar/mod.rs +++ b/kernel/aster-nix/src/vm/vmar/mod.rs @@ -674,16 +674,33 @@ impl Vmar_ { self.new_cow(None) } + /// Set the entries in the page table associated with the current `Vmar` to read-only. + fn set_pt_read_only(&self) -> Result<()> { + let inner = self.inner.lock(); + for (map_addr, vm_mapping) in &inner.vm_mappings { + vm_mapping.set_pt_read_only(self.vm_space())?; + } + Ok(()) + } + /// Create a new vmar by creating cow child for all mapped vmos. fn new_cow(&self, parent: Option<&Arc>) -> Result> { let new_vmar_ = { let vmar_inner = VmarInner::new(); - // If this is a root vmar, we create a new vmspace, - // Otherwise, we clone the vm space from parent. + // If this is not a root `Vmar`, we clone the `VmSpace` from parent. + // + // If this is a root `Vmar`, we leverage Copy-On-Write (COW) mechanism to + // clone the `VmSpace` to the child. We set all the page table entries + // in current `VmSpace` to be read-only, then clone the `VmSpace` to the child. + // In this way, initially, the child shares the same page table contents + // as the current `Vmar`. Later on, whether the current `Vmar` or the child + // `Vmar` needs to perform a write operation, the COW mechanism will be triggered, + // creating a new page for writing. let vm_space = if let Some(parent) = parent { parent.vm_space().clone() } else { - VmSpace::new() + self.set_pt_read_only()?; + self.vm_space().deep_copy() }; Vmar_::new(vmar_inner, vm_space, self.base, self.size, parent) }; diff --git a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs index c22c76b02..1b771d388 100644 --- a/kernel/aster-nix/src/vm/vmar/vm_mapping.rs +++ b/kernel/aster-nix/src/vm/vmar/vm_mapping.rs @@ -131,6 +131,22 @@ impl VmMapping { &self.vmo } + /// Set the entries in the page table associated with the current `VmMapping` to read-only. + pub(super) fn set_pt_read_only(&self, vm_space: &VmSpace) -> Result<()> { + let map_inner = self.inner.lock(); + let mapped_addr = &map_inner.mapped_pages; + let perm = map_inner.perm; + if !perm.contains(VmPerm::W) { + return Ok(()); + } + + for page_idx in mapped_addr { + let map_addr = map_inner.page_map_addr(*page_idx); + vm_space.protect(&(map_addr..map_addr + PAGE_SIZE), perm - VmPerm::W)?; + } + Ok(()) + } + /// Add a new committed page and map it to vmspace. If copy on write is set, it's allowed to unmap the page at the same address. /// FIXME: This implementation based on the truth that we map one page at a time. If multiple pages are mapped together, this implementation may have problems pub(super) fn map_one_page( @@ -167,6 +183,7 @@ impl VmMapping { pub fn vmo_offset(&self) -> usize { self.inner.lock().vmo_offset } + pub fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> { let vmo_read_offset = self.vmo_offset() + offset; @@ -186,8 +203,23 @@ impl VmMapping { let page_idx_range = get_page_idx_range(&(vmo_write_offset..vmo_write_offset + buf.len())); let write_perm = VmPerm::W; + + let mut page_addr = + self.map_to_addr() - self.vmo_offset() + page_idx_range.start * PAGE_SIZE; for page_idx in page_idx_range { self.check_perm(&page_idx, &write_perm)?; + + let parent = self.parent.upgrade().unwrap(); + let vm_space = parent.vm_space(); + + // The `VmMapping` has the write permission but the corresponding PTE is present and is read-only. + // This means this PTE is set to read-only due to the COW mechanism. In this situation we need to trigger a + // page fault before writing at the VMO to guarantee the consistency between VMO and the page table. + let need_page_fault = vm_space.is_mapped(page_addr) && !vm_space.is_writable(page_addr); + if need_page_fault { + self.handle_page_fault(page_addr, false, true)?; + } + page_addr += PAGE_SIZE; } self.vmo.write_bytes(vmo_write_offset, buf)?; @@ -229,7 +261,7 @@ impl VmMapping { // If read access to cow vmo triggers page fault, the map should be readonly. // If user next tries to write to the frame, another page fault will be triggered. - let is_readonly = self.vmo.is_cow_child() && !write; + let is_readonly = self.vmo.is_cow_vmo() && !write; self.map_one_page(page_idx, frame, is_readonly) } @@ -429,7 +461,7 @@ impl VmMappingInner { let vm_perm = { let mut perm = self.perm; if is_readonly { - debug_assert!(vmo.is_cow_child()); + debug_assert!(vmo.is_cow_vmo()); perm -= VmPerm::W; } perm @@ -443,7 +475,7 @@ impl VmMappingInner { }; // Cow child allows unmapping the mapped page. - if vmo.is_cow_child() && vm_space.is_mapped(map_addr) { + if vmo.is_cow_vmo() && vm_space.is_mapped(map_addr) { vm_space.unmap(&(map_addr..(map_addr + PAGE_SIZE))).unwrap(); } diff --git a/kernel/aster-nix/src/vm/vmo/dyn_cap.rs b/kernel/aster-nix/src/vm/vmo/dyn_cap.rs index 513c67045..abcae42f4 100644 --- a/kernel/aster-nix/src/vm/vmo/dyn_cap.rs +++ b/kernel/aster-nix/src/vm/vmo/dyn_cap.rs @@ -2,7 +2,7 @@ use core::ops::Range; -use aster_frame::vm::VmIo; +use aster_frame::vm::{VmFrame, VmIo}; use aster_rights::{Rights, TRights}; use super::{ @@ -68,9 +68,9 @@ impl Vmo { } /// commit a page at specific offset - pub fn commit_page(&self, offset: usize) -> Result<()> { + pub fn commit_page(&self, offset: usize) -> Result { self.check_rights(Rights::WRITE)?; - self.0.commit_page(offset) + self.0.commit_page(offset, false) } /// Commits the pages specified in the range (in bytes). @@ -84,7 +84,8 @@ impl Vmo { /// The method requires the Write right. pub fn commit(&self, range: Range) -> Result<()> { self.check_rights(Rights::WRITE)?; - self.0.commit(range) + self.0.commit(range, false)?; + Ok(()) } /// Decommits the pages specified in the range (in bytes). diff --git a/kernel/aster-nix/src/vm/vmo/mod.rs b/kernel/aster-nix/src/vm/vmo/mod.rs index 67bad9b24..add62a277 100644 --- a/kernel/aster-nix/src/vm/vmo/mod.rs +++ b/kernel/aster-nix/src/vm/vmo/mod.rs @@ -5,7 +5,10 @@ use core::ops::Range; use align_ext::AlignExt; -use aster_frame::vm::{VmAllocOptions, VmFrame, VmFrameVec, VmIo}; +use aster_frame::{ + collections::xarray::{CursorMut, XArray, XMark}, + vm::{VmAllocOptions, VmFrame, VmFrameVec, VmIo}, +}; use aster_rights::Rights; use crate::prelude::*; @@ -18,6 +21,8 @@ mod static_cap; pub use options::{VmoChildOptions, VmoOptions}; pub use pager::Pager; +use self::options::ChildType; + /// Virtual Memory Objects (VMOs) are a type of capability that represents a /// range of memory pages. /// @@ -95,7 +100,7 @@ pub trait VmoRightsOp { Self: Sized; } -// We implement this trait for Vmo, so we can use functions on type like Vmo without trait bounds. +// We implement this trait for VMO, so we can use functions on type like Vmo without trait bounds. // FIXME: This requires the imcomplete feature specialization, which should be fixed further. impl VmoRightsOp for Vmo { default fn rights(&self) -> Rights { @@ -127,182 +132,230 @@ bitflags! { } } +/// Marks used for the `XArray` in `Vmo_`. +#[derive(Copy, Clone)] +pub(super) enum VmoMark { + /// Marks used for the VMO's `pages` which is managed by `XArray`. + /// The VMO whose `pages` is marked as `CowVmo` may require a Copy-On-Write (COW) operation + /// when performing a write action. + CowVmo, + /// Marks used for the `VmFrame` stored within the pages marked as `CowVmo`, + /// `VmFrame`s marked as `ExclusivePage` are newly created through the COW mechanism + /// and do not require further COW operations. + ExclusivePage, +} + +impl From for XMark { + fn from(val: VmoMark) -> Self { + match val { + VmoMark::CowVmo => XMark::Mark0, + VmoMark::ExclusivePage => XMark::Mark1, + } + } +} + +/// `Pages` is the struct that manages the `VmFrame`s stored in `Vmo_`. +pub(super) enum Pages { + /// `Pages` that cannot be resized. This kind of `Pages` will have a constant size. + Nonresizable(Arc>>, usize), + /// `Pages` that can be resized and have a variable size, and such `Pages` cannot + /// be shared between different VMOs. + Resizable(Mutex<(XArray, usize)>), +} + +impl Pages { + fn with(&self, func: F) -> R + where + F: FnOnce(&mut XArray, usize) -> R, + { + match self { + Self::Nonresizable(pages, size) => func(&mut pages.lock(), *size), + Self::Resizable(pages) => { + let mut lock = pages.lock(); + let size = lock.1; + func(&mut lock.0, size) + } + } + } +} + +/// `Vmo_` is the structure that actually manages the content of VMO. +/// Broadly speaking, there are two types of VMO: +/// 1. File-backed VMO: the VMO backed by a file and resides in the `PageCache`, +/// which includes a pager to provide it with actual pages. +/// 2. Anonymous VMO: the VMO without a file backup, which does not have a pager. pub(super) struct Vmo_ { + pager: Option>, /// Flags flags: VmoFlags, - /// VmoInner - inner: Mutex, + /// The offset of the range of pages corresponding to the VMO within `pages`. + page_idx_offset: usize, + /// The virtual pages where the VMO resides. + pages: Pages, } -struct VmoInner { - pager: Option>, - /// size, in bytes - size: usize, - /// The pages committed. The key is the page index, the value is the committed frame. - committed_pages: BTreeMap, - /// The pages from the parent that current vmo can access. The pages can only be inherited when create childs vmo. - /// We store the page index range - inherited_pages: Option>, - /// Whether the vmo is copy on write child. - is_cow: bool, -} - -impl VmoInner { - fn commit_page(&mut self, offset: usize) -> Result<()> { - let page_idx = offset / PAGE_SIZE; - // Fast path: the page is already committed. - if self.committed_pages.contains_key(&page_idx) { - return Ok(()); - } - let frame = match &self.pager { - None => VmAllocOptions::new(1).alloc_single()?, - Some(pager) => pager.commit_page(page_idx)?, - }; - self.insert_frame(page_idx, frame); - Ok(()) - } - - fn decommit_page(&mut self, offset: usize) -> Result<()> { - let page_idx = offset / PAGE_SIZE; - if self.committed_pages.remove(&page_idx).is_some() { - if let Some(pager) = &self.pager { - pager.decommit_page(page_idx)?; - } - } - Ok(()) - } - - fn insert_frame(&mut self, page_idx: usize, frame: VmFrame) { - debug_assert!(!self.committed_pages.contains_key(&page_idx)); - self.committed_pages.insert(page_idx, frame); - } - - fn get_committed_frame(&mut self, page_idx: usize, write_page: bool) -> Result { - // if the page is already commit, return the committed page. - if let Some(frames) = self.committed_pages.get(&page_idx) { - return Ok(frames.clone()); - } - - // The vmo is not child - if self.inherited_pages.is_none() { - self.commit_page(page_idx * PAGE_SIZE)?; - let frame = self.committed_pages.get(&page_idx).unwrap().clone(); - return Ok(frame); - } - - let frame = self.get_inherited_frame_or_alloc(page_idx, write_page)?; - - if !self.should_share_frame_with_parent(write_page) { - self.insert_frame(page_idx, frame.clone()); - } - - Ok(frame) - } - - fn get_inherited_frame_or_alloc(&self, page_idx: usize, write_page: bool) -> Result { - let inherited_frames = self.inherited_pages.as_ref().unwrap(); - - if page_idx >= inherited_frames.len() { - if self.is_cow { - return Ok(VmAllocOptions::new(1).alloc_single()?); - } - return_errno_with_message!(Errno::EINVAL, "the page is not inherited from parent"); - } - - let inherited_frame = inherited_frames.get(page_idx).unwrap().clone(); - - if self.should_share_frame_with_parent(write_page) { - return Ok(inherited_frame); - } - - let frame = VmAllocOptions::new(1).alloc_single()?; - frame.copy_from_frame(&inherited_frame); - Ok(frame) - } - - fn is_cow_child(&self) -> bool { - self.is_cow - } - - fn should_share_frame_with_parent(&self, write_page: bool) -> bool { - !self.is_cow || !write_page - } +fn clone_page(page: &VmFrame) -> Result { + let new_page = VmAllocOptions::new(1).alloc_single()?; + new_page.copy_from_frame(page); + Ok(new_page) } impl Vmo_ { - pub fn commit_page(&self, offset: usize) -> Result<()> { - self.inner.lock().commit_page(offset) + /// Prepare a new `VmFrame` for the target index in pages, returning the new page as well as + /// whether this page needs to be marked as exclusive. + /// + /// Based on the type of VMO and the impending operation on the prepared page, there are 3 conditions: + /// 1. For an Anonymous VMO, provide a new page directly. If the VMO requires copy-on-write (COW), + /// the prepared page can be directly set to exclusive. + /// 2. For a File-backed VMO that does not need to trigger the COW mechanism, + /// obtain a page from the pager directly without the need to be set as exclusive. + /// 3. For a File-backed VMO that requires triggering the COW mechanism, obtain a page + /// from the pager and then copy it. This page can be set as exclusive. + fn prepare_page( + &self, + page_idx: usize, + is_cow_vmo: bool, + will_write: bool, + ) -> Result<(VmFrame, bool)> { + let (page, should_mark_exclusive) = match &self.pager { + None => { + // Condition 1. The new anonymous page only need to be marked as `ExclusivePage` + // when current VMO is a cow VMO, otherwise this mark is meaningless. + (VmAllocOptions::new(1).alloc_single()?, is_cow_vmo) + } + Some(pager) => { + let page = pager.commit_page(page_idx)?; + // The prerequisite for triggering the COW mechanism here is that the current + // VMO requires COW and the prepared page is about to undergo a write operation. + // At this point, the `VmFrame` obtained from the pager needs to be cloned to + // avoid subsequent modifications affecting the content of the `VmFrame` in the pager. + let trigger_cow = is_cow_vmo && will_write; + if trigger_cow { + // Condition 3. + (clone_page(&page)?, true) + } else { + // Condition 2. + (page, false) + } + } + }; + Ok((page, should_mark_exclusive)) } - pub fn decommit_page(&self, offset: usize) -> Result<()> { - self.inner.lock().decommit_page(offset) - } + fn commit_with_cursor( + &self, + cursor: &mut CursorMut<'_, VmFrame, VmoMark>, + is_cow_vmo: bool, + will_write: bool, + ) -> Result { + let (new_page, is_exclusive) = { + let is_exclusive = cursor.is_marked(VmoMark::ExclusivePage); + if let Some(committed_page) = cursor.load() { + // The necessary and sufficient condition for triggering the COW mechanism is that + // the current VMO requires copy-on-write, there is an impending write operation to the page, + // and the page is not exclusive. + let trigger_cow = is_cow_vmo && will_write && !is_exclusive; + if !trigger_cow { + // Fast path: return the page directly. + return Ok(committed_page.clone()); + } - pub fn commit(&self, range: Range) -> Result<()> { - let page_idx_range = get_page_idx_range(&range); - for page_idx in page_idx_range { - let offset = page_idx * PAGE_SIZE; - self.commit_page(offset)?; + (clone_page(&committed_page)?, true) + } else { + self.prepare_page(cursor.index() as usize, is_cow_vmo, will_write)? + } + }; + + cursor.store(new_page.clone()); + if is_exclusive { + cursor.set_mark(VmoMark::ExclusivePage).unwrap(); } - - Ok(()) + Ok(new_page) } + /// Commit the page corresponding to the target offset in the VMO and return that page. + /// If the current offset has already been committed, the page will be returned directly. + /// During the commit process, the Copy-On-Write (COW) mechanism may be triggered depending on the circumstances. + pub fn commit_page(&self, offset: usize, will_write: bool) -> Result { + let page_idx = offset / PAGE_SIZE + self.page_idx_offset; + self.pages.with(|pages, size| { + let is_cow_vmo = pages.is_marked(VmoMark::CowVmo); + let mut cursor = pages.cursor_mut(page_idx as u64); + self.commit_with_cursor(&mut cursor, is_cow_vmo, will_write) + }) + } + + /// Decommit the page corresponding to the target offset in the VMO. + fn decommit_page(&mut self, offset: usize) -> Result<()> { + let page_idx = offset / PAGE_SIZE + self.page_idx_offset; + self.pages.with(|pages, size| { + let is_cow_vmo = pages.is_marked(VmoMark::CowVmo); + let mut cursor = pages.cursor_mut(page_idx as u64); + if cursor.remove().is_some() + && let Some(pager) = &self.pager + && !is_cow_vmo + { + pager.decommit_page(page_idx)?; + } + Ok(()) + }) + } + + /// Commit a range of pages in the VMO, returns the pages in this range. + pub fn commit(&self, range: Range, will_write: bool) -> Result { + self.pages.with(|pages, size| { + if range.end > size { + return_errno_with_message!(Errno::EINVAL, "operated range exceeds the vmo size"); + } + + let raw_page_idx_range = get_page_idx_range(&range); + let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset) + ..(raw_page_idx_range.end + self.page_idx_offset); + let mut frames = VmFrameVec::new_with_capacity(page_idx_range.len()); + + let is_cow_vmo = pages.is_marked(VmoMark::CowVmo); + let mut cursor = pages.cursor_mut(page_idx_range.start as u64); + for page_idx in page_idx_range { + let committed_page = + self.commit_with_cursor(&mut cursor, is_cow_vmo, will_write)?; + frames.push(committed_page); + cursor.next(); + } + Ok(frames) + }) + } + + /// Decommit a range of pages in the VMO. pub fn decommit(&self, range: Range) -> Result<()> { - let page_idx_range = get_page_idx_range(&range); - for page_idx in page_idx_range { - let offset = page_idx * PAGE_SIZE; - self.decommit_page(offset)?; - } - Ok(()) - } - - /// determine whether a page is commited - pub fn page_commited(&self, page_idx: usize) -> bool { - self.inner.lock().committed_pages.contains_key(&page_idx) + self.pages.with(|pages, size| { + self.decommit_pages(pages, range)?; + Ok(()) + }) } + /// Read the specified amount of buffer content starting from the target offset in the VMO. pub fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> { let read_len = buf.len(); - if offset + read_len > self.size() { - return_errno_with_message!(Errno::EINVAL, "read range exceeds vmo size"); - } let read_range = offset..(offset + read_len); - let frames = self.ensure_all_pages_exist(&read_range, false)?; + let frames = self.commit(read_range, false)?; let read_offset = offset % PAGE_SIZE; Ok(frames.read_bytes(read_offset, buf)?) } - /// Ensure all pages inside range are backed up vm frames, returns the frames. - fn ensure_all_pages_exist(&self, range: &Range, write_page: bool) -> Result { - let page_idx_range = get_page_idx_range(range); - let mut frames = VmFrameVec::new_with_capacity(page_idx_range.len()); - for page_idx in page_idx_range { - let page_frame = self.get_committed_frame(page_idx, write_page)?; - frames.push(page_frame); - } - Ok(frames) - } - - /// Get the frame for a page. If commit_if_none is set, we will commit a new page for the page - /// if the page is not committed. - fn get_committed_frame(&self, page_idx: usize, write_page: bool) -> Result { - self.inner.lock().get_committed_frame(page_idx, write_page) - } - + /// Write the specified amount of buffer content starting from the target offset in the VMO. pub fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> { let write_len = buf.len(); - debug_assert!(offset + write_len <= self.size()); - if offset + write_len > self.size() { - return_errno_with_message!(Errno::EINVAL, "write range exceeds the vmo size"); - } - let write_range = offset..(offset + write_len); - let frames = self.ensure_all_pages_exist(&write_range, true)?; + let frames = self.commit(write_range.clone(), true)?; let write_offset = offset % PAGE_SIZE; frames.write_bytes(write_offset, buf)?; - if let Some(pager) = &self.inner.lock().pager { - let page_idx_range = get_page_idx_range(&write_range); + let is_cow_vmo = self.is_cow_vmo(); + if let Some(pager) = &self.pager + && !is_cow_vmo + { + let raw_page_idx_range = get_page_idx_range(&write_range); + let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset) + ..(raw_page_idx_range.end + self.page_idx_offset); for page_idx in page_idx_range { pager.update_page(page_idx)?; } @@ -310,36 +363,179 @@ impl Vmo_ { Ok(()) } + /// Clear the target range in current VMO. pub fn clear(&self, range: Range) -> Result<()> { let buffer = vec![0u8; range.end - range.start]; - self.write_bytes(range.start, &buffer) - } - - pub fn size(&self) -> usize { - self.inner.lock().size - } - - pub fn resize(&self, new_size: usize) -> Result<()> { - assert!(self.flags.contains(VmoFlags::RESIZABLE)); - let new_size = new_size.align_up(PAGE_SIZE); - let old_size = self.size(); - if new_size == old_size { - return Ok(()); - } - - if new_size < old_size { - self.decommit(new_size..old_size)?; - self.inner.lock().size = new_size; - } else { - self.inner.lock().size = new_size; - } - + self.write_bytes(range.start, &buffer)?; Ok(()) } + /// Return the size of current VMO. + pub fn size(&self) -> usize { + self.pages.with(|pages, size| size) + } + + /// Return the page index offset of current VMO in corresponding pages. + pub fn page_idx_offset(&self) -> usize { + self.page_idx_offset + } + + /// Clone the current `pages` to the child VMO. + /// + /// Depending on the type of the VMO and the child, there are 4 conditions: + /// 1. For a slice child, directly share the current `pages` with that child. + /// 2. For a COW child, and the current VMO requires COW, it is necessary to clear the + /// ExclusivePage mark in the current `pages` and clone a new `pages` to the child. + /// 3. For a COW child, where the current VMO does not require COW and is a File-backed VMO. + /// In this case, a new `pages` needs to be cloned to the child, and the child's `pages` + /// require COW. The current `pages` do not need COW as they need to remain consistent with the pager. + /// 4. For a COW child, where the current VMO does not require COW and is an Anonymous VMO. + /// In this case, a new `pages` needs to be cloned to the child, and both the current `pages` and + /// the child's `pages` require COW. + pub fn clone_pages_for_child( + &self, + child_type: ChildType, + child_flags: VmoFlags, + range: &Range, + ) -> Result { + let child_vmo_start = range.start; + let child_vmo_end = range.end; + debug_assert!(child_vmo_start % PAGE_SIZE == 0); + debug_assert!(child_vmo_end % PAGE_SIZE == 0); + if child_vmo_start % PAGE_SIZE != 0 || child_vmo_end % PAGE_SIZE != 0 { + return_errno_with_message!(Errno::EINVAL, "VMO range does not aligned with PAGE_SIZE"); + } + + match child_type { + ChildType::Slice => { + if child_flags.contains(VmoFlags::RESIZABLE) { + return_errno_with_message!( + Errno::EINVAL, + "a slice child VMO cannot be resizable" + ); + } + + let Pages::Nonresizable(ref pages, size) = self.pages else { + return_errno_with_message!( + Errno::EINVAL, + "a resizable VMO cannot have a slice child" + ); + }; + + // A slice child should be inside parent VMO's range + debug_assert!(child_vmo_end <= size); + if child_vmo_end > size { + return_errno_with_message!( + Errno::EINVAL, + "a slice child VMO cannot exceed its parent VMO's size" + ); + } + // Condition 1. + Ok(Pages::Nonresizable(pages.clone(), range.len())) + } + ChildType::Cow => { + let new_pages = self.pages.with(|pages, size| { + // A Copy-on-Write child should intersect with parent VMO + debug_assert!(child_vmo_start <= size); + if child_vmo_start > size { + return_errno_with_message!( + Errno::EINVAL, + "a COW VMO should overlap with its parent" + ); + } + + let self_is_cow = pages.is_marked(VmoMark::CowVmo); + if self_is_cow { + // Condition 2. + pages.unset_mark_all(VmoMark::ExclusivePage); + return Ok(pages.clone()); + } + + if self.pager.is_some() { + // Condition 3. + let mut cloned_pages = pages.clone(); + cloned_pages.set_mark(VmoMark::CowVmo); + return Ok(cloned_pages); + } + + // Condition 4. + pages.set_mark(VmoMark::CowVmo); + Ok(pages.clone()) + })?; + if child_flags.contains(VmoFlags::RESIZABLE) { + Ok(Pages::Resizable(Mutex::new((new_pages, range.len())))) + } else { + Ok(Pages::Nonresizable( + Arc::new(Mutex::new(new_pages)), + range.len(), + )) + } + } + } + } + + /// Resize current VMO to target size. + pub fn resize(&self, new_size: usize) -> Result<()> { + assert!(self.flags.contains(VmoFlags::RESIZABLE)); + let new_size = new_size.align_up(PAGE_SIZE); + + let Pages::Resizable(ref pages) = self.pages else { + return_errno_with_message!(Errno::EINVAL, "current VMO is not resizable"); + }; + + let mut lock = pages.lock(); + let old_size = lock.1; + if new_size == old_size { + return Ok(()); + } + if new_size < old_size { + self.decommit_pages(&mut lock.0, new_size..old_size)?; + } + lock.1 = new_size; + Ok(()) + } + + fn decommit_pages( + &self, + pages: &mut XArray, + range: Range, + ) -> Result<()> { + let raw_page_idx_range = get_page_idx_range(&range); + let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset) + ..(raw_page_idx_range.end + self.page_idx_offset); + let is_cow_vmo = pages.is_marked(VmoMark::CowVmo); + let mut cursor = pages.cursor_mut(page_idx_range.start as u64); + for page_idx in page_idx_range { + if cursor.remove().is_some() + && let Some(pager) = &self.pager + && !is_cow_vmo + { + pager.decommit_page(page_idx)?; + } + cursor.next(); + } + Ok(()) + } + + /// Determine whether a page is committed. + pub fn is_page_committed(&self, page_idx: usize) -> bool { + self.pages.with(|pages, size| { + pages + .load((page_idx + self.page_idx_offset) as u64) + .is_some() + }) + } + + /// Return the flags of current VMO. pub fn flags(&self) -> VmoFlags { self.flags } + + /// Determine whether the VMO is need COW mechanism. + pub fn is_cow_vmo(&self) -> bool { + self.pages + .with(|pages, size| pages.is_marked(VmoMark::CowVmo)) + } } impl Vmo { @@ -355,15 +551,15 @@ impl Vmo { /// return whether a page is already committed pub fn is_page_committed(&self, page_idx: usize) -> bool { - self.0.page_commited(page_idx) + self.0.is_page_committed(page_idx) } pub fn get_committed_frame(&self, page_idx: usize, write_page: bool) -> Result { - self.0.get_committed_frame(page_idx, write_page) + self.0.commit_page(page_idx * PAGE_SIZE, write_page) } - pub fn is_cow_child(&self) -> bool { - self.0.inner.lock().is_cow_child() + pub fn is_cow_vmo(&self) -> bool { + self.0.is_cow_vmo() } } @@ -373,20 +569,3 @@ pub fn get_page_idx_range(vmo_offset_range: &Range) -> Range { let end = vmo_offset_range.end.align_up(PAGE_SIZE); (start / PAGE_SIZE)..(end / PAGE_SIZE) } - -pub(super) fn get_inherited_frames_from_parent( - parent: Arc, - num_pages: usize, - parent_page_idx_offset: usize, - is_cow: bool, -) -> Vec { - let mut inherited_frames = Vec::with_capacity(num_pages); - for page_idx in 0..num_pages { - let parent_page_idx = page_idx + parent_page_idx_offset; - let inherited_frame = parent - .get_committed_frame(parent_page_idx, !is_cow) - .unwrap(); - inherited_frames.push(inherited_frame); - } - inherited_frames -} diff --git a/kernel/aster-nix/src/vm/vmo/options.rs b/kernel/aster-nix/src/vm/vmo/options.rs index 5d29d5768..5328baddf 100644 --- a/kernel/aster-nix/src/vm/vmo/options.rs +++ b/kernel/aster-nix/src/vm/vmo/options.rs @@ -5,16 +5,16 @@ use core::{marker::PhantomData, ops::Range}; use align_ext::AlignExt; -use aster_frame::vm::{VmAllocOptions, VmFrame}; +use aster_frame::{ + collections::xarray::XArray, + vm::{VmAllocOptions, VmFrame}, +}; use aster_rights::{Dup, Rights, TRightSet, TRights, Write}; use aster_rights_proc::require; use typeflags_util::{SetExtend, SetExtendOp}; -use super::{Pager, Vmo, VmoFlags, VmoRightsOp}; -use crate::{ - prelude::*, - vm::vmo::{get_inherited_frames_from_parent, VmoInner, Vmo_}, -}; +use super::{Pager, Pages, Vmo, VmoFlags, VmoMark, VmoRightsOp}; +use crate::{prelude::*, vm::vmo::Vmo_}; /// Options for allocating a root VMO. /// @@ -124,35 +124,40 @@ impl VmoOptions> { fn alloc_vmo_(size: usize, flags: VmoFlags, pager: Option>) -> Result { let size = size.align_up(PAGE_SIZE); - let committed_pages = committed_pages_if_continuous(flags, size)?; - let vmo_inner = VmoInner { - pager, - size, - committed_pages, - inherited_pages: None, - is_cow: false, + let pages = { + let pages = committed_pages_if_continuous(flags, size)?; + if flags.contains(VmoFlags::RESIZABLE) { + Pages::Resizable(Mutex::new((pages, size))) + } else { + Pages::Nonresizable(Arc::new(Mutex::new(pages)), size) + } }; Ok(Vmo_ { + pager, flags, - inner: Mutex::new(vmo_inner), + page_idx_offset: 0, + pages, }) } -fn committed_pages_if_continuous(flags: VmoFlags, size: usize) -> Result> { +fn committed_pages_if_continuous(flags: VmoFlags, size: usize) -> Result> { if flags.contains(VmoFlags::CONTIGUOUS) { // if the vmo is continuous, we need to allocate frames for the vmo let frames_num = size / PAGE_SIZE; let frames = VmAllocOptions::new(frames_num) .is_contiguous(true) .alloc()?; - let mut committed_pages = BTreeMap::new(); - for (idx, frame) in frames.into_iter().enumerate() { - committed_pages.insert(idx * PAGE_SIZE, frame); + let mut committed_pages = XArray::new(); + let mut cursor = committed_pages.cursor_mut(0); + for frame in frames { + cursor.store(frame); + cursor.next(); } + drop(cursor); Ok(committed_pages) } else { // otherwise, we wait for the page is read or write - Ok(BTreeMap::new()) + Ok(XArray::new()) } } @@ -280,7 +285,7 @@ impl VmoChildOptions { .check_rights(Rights::DUP) .expect("function new_slice_rights should called with rights Dup"); Self { - flags: parent.flags() & Self::PARENT_FLAGS_MASK, + flags: parent.flags(), parent, range, marker: PhantomData, @@ -327,7 +332,7 @@ impl VmoChildOptions { /// Any pages that are beyond the parent's range are initially all zeros. pub fn new_cow(parent: Vmo, range: Range) -> Self { Self { - flags: parent.flags() & Self::PARENT_FLAGS_MASK, + flags: parent.flags(), parent, range, marker: PhantomData, @@ -432,7 +437,7 @@ impl VmoChildOptions, VmoCowChild> { } #[derive(Debug, Clone, Copy)] -enum ChildType { +pub(crate) enum ChildType { Cow, Slice, } @@ -443,63 +448,15 @@ fn alloc_child_vmo_( child_flags: VmoFlags, child_type: ChildType, ) -> Result { - let child_vmo_start = range.start; - let child_vmo_end = range.end; - debug_assert!(child_vmo_start % PAGE_SIZE == 0); - debug_assert!(child_vmo_end % PAGE_SIZE == 0); - if child_vmo_start % PAGE_SIZE != 0 || child_vmo_end % PAGE_SIZE != 0 { - return_errno_with_message!(Errno::EINVAL, "vmo range does not aligned with PAGE_SIZE"); - } - let parent_vmo_size = parent_vmo_.size(); - - let is_cow = { - let parent_vmo_inner = parent_vmo_.inner.lock(); - match child_type { - ChildType::Slice => { - // A slice child should be inside parent vmo's range - debug_assert!(child_vmo_end <= parent_vmo_inner.size); - if child_vmo_end > parent_vmo_inner.size { - return_errno_with_message!( - Errno::EINVAL, - "slice child vmo cannot exceed parent vmo's size" - ); - } - false - } - ChildType::Cow => { - // A copy on Write child should intersect with parent vmo - debug_assert!(range.start <= parent_vmo_inner.size); - if range.start > parent_vmo_inner.size { - return_errno_with_message!( - Errno::EINVAL, - "COW vmo should overlap with its parent" - ); - } - true - } - } - }; let parent_page_idx_offset = range.start / PAGE_SIZE; - let inherited_end = range.end.min(parent_vmo_size); - let cow_size = if inherited_end >= range.start { - inherited_end - range.start - } else { - 0 - }; - let num_pages = cow_size / PAGE_SIZE; - let inherited_pages = - get_inherited_frames_from_parent(parent_vmo_, num_pages, parent_page_idx_offset, is_cow); - let vmo_inner = VmoInner { - pager: None, - size: child_vmo_end - child_vmo_start, - committed_pages: BTreeMap::new(), - inherited_pages: Some(inherited_pages), - is_cow, - }; - Ok(Vmo_ { + let child_pages = parent_vmo_.clone_pages_for_child(child_type, child_flags, &range)?; + let new_vmo = Vmo_ { + pager: parent_vmo_.pager.clone(), flags: child_flags, - inner: Mutex::new(vmo_inner), - }) + pages: child_pages, + page_idx_offset: parent_page_idx_offset + parent_vmo_.page_idx_offset(), + }; + Ok(new_vmo) } /// A type to specify the "type" of a child, which is either a slice or a COW. @@ -525,9 +482,9 @@ mod test { #[ktest] fn alloc_vmo() { let vmo = VmoOptions::::new(PAGE_SIZE).alloc().unwrap(); - assert!(vmo.size() == PAGE_SIZE); + assert_eq!(vmo.size(), PAGE_SIZE); // the vmo is zeroed once allocated - assert!(vmo.read_val::(0).unwrap() == 0); + assert_eq!(vmo.read_val::(0).unwrap(), 0); } #[ktest] @@ -536,7 +493,7 @@ mod test { .flags(VmoFlags::CONTIGUOUS) .alloc() .unwrap(); - assert!(vmo.size() == 10 * PAGE_SIZE); + assert_eq!(vmo.size(), 10 * PAGE_SIZE); } #[ktest] @@ -546,11 +503,11 @@ mod test { // write val vmo.write_val(111, &val).unwrap(); let read_val: u8 = vmo.read_val(111).unwrap(); - assert!(val == read_val); + assert_eq!(val, read_val); // bit endian vmo.write_bytes(222, &[0x12, 0x34, 0x56, 0x78]).unwrap(); let read_val: u32 = vmo.read_val(222).unwrap(); - assert!(read_val == 0x78563412) + assert_eq!(read_val, 0x78563412) } #[ktest] @@ -562,36 +519,42 @@ mod test { .unwrap(); // write parent, read child parent.write_val(1, &42u8).unwrap(); - assert!(slice_child.read_val::(1).unwrap() == 42); + assert_eq!(slice_child.read_val::(1).unwrap(), 42); // write child, read parent slice_child.write_val(99, &0x1234u32).unwrap(); - assert!(parent.read_val::(99).unwrap() == 0x1234); + assert_eq!(parent.read_val::(99).unwrap(), 0x1234); } #[ktest] fn cow_child() { let parent = VmoOptions::::new(2 * PAGE_SIZE).alloc().unwrap(); + parent.write_val(1, &42u8).unwrap(); + parent.write_val(2, &16u8).unwrap(); let parent_dup = parent.dup().unwrap(); let cow_child = VmoChildOptions::new_cow(parent_dup, 0..10 * PAGE_SIZE) .alloc() .unwrap(); - // write parent, read child - parent.write_val(1, &42u8).unwrap(); - assert!(cow_child.read_val::(1).unwrap() == 42); - // write child to trigger copy on write, read child and parent - cow_child.write_val(99, &0x1234u32).unwrap(); - assert!(cow_child.read_val::(99).unwrap() == 0x1234); - assert!(cow_child.read_val::(1).unwrap() == 42); - assert!(parent.read_val::(99).unwrap() == 0); - assert!(parent.read_val::(1).unwrap() == 42); - // write parent on already-copied page - parent.write_val(10, &123u8).unwrap(); - assert!(parent.read_val::(10).unwrap() == 123); - assert!(cow_child.read_val::(10).unwrap() == 0); - // write parent on not-copied page - parent.write_val(PAGE_SIZE + 10, &12345u32).unwrap(); - assert!(parent.read_val::(PAGE_SIZE + 10).unwrap() == 12345); - assert!(cow_child.read_val::(PAGE_SIZE + 10).unwrap() == 12345); + // Read child. + assert_eq!(cow_child.read_val::(1).unwrap(), 42); + assert_eq!(cow_child.read_val::(2).unwrap(), 16); + // Write parent to trigger copy-on-write. read child and parent. + parent.write_val(1, &64u8).unwrap(); + assert_eq!(parent.read_val::(1).unwrap(), 64); + assert_eq!(cow_child.read_val::(1).unwrap(), 42); + // Write child to trigger copy on write, read child and parent + cow_child.write_val(2, &0x1234u32).unwrap(); + assert_eq!(cow_child.read_val::(2).unwrap(), 0x1234); + assert_eq!(cow_child.read_val::(1).unwrap(), 42); + assert_eq!(parent.read_val::(2).unwrap(), 16); + assert_eq!(parent.read_val::(1).unwrap(), 64); + // Write parent on already-copied page + parent.write_val(1, &123u8).unwrap(); + assert_eq!(parent.read_val::(1).unwrap(), 123); + assert_eq!(cow_child.read_val::(1).unwrap(), 42); + // Write parent on not-copied page + parent.write_val(2, &12345u32).unwrap(); + assert_eq!(parent.read_val::(2).unwrap(), 12345); + assert_eq!(cow_child.read_val::(2).unwrap(), 0x1234); } #[ktest] @@ -602,10 +565,10 @@ mod test { .unwrap(); vmo.write_val(10, &42u8).unwrap(); vmo.resize(2 * PAGE_SIZE).unwrap(); - assert!(vmo.size() == 2 * PAGE_SIZE); - assert!(vmo.read_val::(10).unwrap() == 42); + assert_eq!(vmo.size(), 2 * PAGE_SIZE); + assert_eq!(vmo.read_val::(10).unwrap(), 42); vmo.write_val(PAGE_SIZE + 20, &123u8).unwrap(); vmo.resize(PAGE_SIZE).unwrap(); - assert!(vmo.read_val::(10).unwrap() == 42); + assert_eq!(vmo.read_val::(10).unwrap(), 42); } } diff --git a/kernel/aster-nix/src/vm/vmo/static_cap.rs b/kernel/aster-nix/src/vm/vmo/static_cap.rs index d5ea92774..ffa23f877 100644 --- a/kernel/aster-nix/src/vm/vmo/static_cap.rs +++ b/kernel/aster-nix/src/vm/vmo/static_cap.rs @@ -2,7 +2,7 @@ use core::ops::Range; -use aster_frame::vm::VmIo; +use aster_frame::vm::{VmFrame, VmIo}; use aster_rights::{Dup, Rights, TRightSet, TRights, Write}; use aster_rights_proc::require; @@ -71,9 +71,9 @@ impl Vmo> { } /// commit a page at specific offset - pub fn commit_page(&self, offset: usize) -> Result<()> { + pub fn commit_page(&self, offset: usize) -> Result { self.check_rights(Rights::WRITE)?; - self.0.commit_page(offset) + self.0.commit_page(offset, false) } /// Commit the pages specified in the range (in bytes). @@ -87,7 +87,8 @@ impl Vmo> { /// The method requires the Write right. #[require(R > Write)] pub fn commit(&self, range: Range) -> Result<()> { - self.0.commit(range) + self.0.commit(range, false)?; + Ok(()) } /// Decommit the pages specified in the range (in bytes).