Introduce XArray and refactor the COW mechanism of Vmo

This commit is contained in:
Chen Chengjun 2024-04-08 14:26:36 +08:00 committed by Tate, Hongliang Tian
parent 233e1fac98
commit 33c8727a13
11 changed files with 575 additions and 310 deletions

15
Cargo.lock generated
View File

@ -128,6 +128,7 @@ dependencies = [
"volatile",
"x86",
"x86_64",
"xarray",
]
[[package]]
@ -1266,6 +1267,12 @@ dependencies = [
"serde",
]
[[package]]
name = "smallvec"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "smoltcp"
version = "0.9.1"
@ -1650,6 +1657,14 @@ dependencies = [
"volatile",
]
[[package]]
name = "xarray"
version = "0.1.0"
source = "git+https://github.com/asterinas/xarray?rev=72a4067#72a4067a65e8f94cfc193f5f19ebc981c1de9de7"
dependencies = [
"smallvec",
]
[[package]]
name = "xmas-elf"
version = "0.8.0"

View File

@ -16,6 +16,7 @@ buddy_system_allocator = "0.9.0"
cfg-if = "1.0"
gimli = { version = "0.28", default-features = false, features = ["read-core"] }
inherit-methods-macro = { git = "https://github.com/asterinas/inherit-methods-macro", rev = "98f7e3e" }
xarray = { git = "https://github.com/asterinas/xarray", rev = "72a4067" }
int-to-c-enum = { path = "../../kernel/libs/int-to-c-enum" }
# instrusive-collections of version 0.9.6 fails to compile with current rust toolchain,
# So we set a fixed version 0.9.5 for this crate

View File

@ -0,0 +1,4 @@
// SPDX-License-Identifier: MPL-2.0
//! This module provides some advanced collections.
pub mod xarray;

View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: MPL-2.0
//! This module introduces the xarray crate and provides relevant support and interfaces for `XArray`.
extern crate xarray as xarray_crate;
use alloc::sync::Arc;
use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref};
use xarray_crate::ItemEntry;
pub use xarray_crate::{Cursor, CursorMut, XArray, XMark};
use crate::vm::VmFrame;
/// `VmFrameRef` is a struct that can work as `&'a VmFrame`.
pub struct VmFrameRef<'a> {
inner: ManuallyDrop<VmFrame>,
_marker: PhantomData<&'a VmFrame>,
}
impl<'a> Deref for VmFrameRef<'a> {
type Target = VmFrame;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
// SAFETY: `VmFrame` is essentially an `Arc` smart pointer that points to a location which is aligned to 4,
// meeting the requirements of the `ItemEntry` for `XArray`.
unsafe impl ItemEntry for VmFrame {
type Ref<'a> = VmFrameRef<'a> where Self: 'a;
fn into_raw(self) -> *const () {
let ptr = Arc::as_ptr(&self.frame_index);
let _ = ManuallyDrop::new(self);
ptr.cast()
}
unsafe fn from_raw(raw: *const ()) -> Self {
Self {
frame_index: Arc::from_raw(raw.cast()),
}
}
unsafe fn raw_as_ref<'a>(raw: *const ()) -> Self::Ref<'a> {
VmFrameRef {
inner: ManuallyDrop::new(VmFrame::from_raw(raw.cast())),
_marker: PhantomData,
}
}
}

View File

@ -28,6 +28,7 @@ extern crate static_assertions;
pub mod arch;
pub mod boot;
pub mod bus;
pub mod collections;
pub mod console;
pub mod cpu;
mod error;

View File

@ -674,16 +674,33 @@ impl Vmar_ {
self.new_cow(None)
}
/// Set the entries in the page table associated with the current `Vmar` to read-only.
fn set_pt_read_only(&self) -> Result<()> {
let inner = self.inner.lock();
for (map_addr, vm_mapping) in &inner.vm_mappings {
vm_mapping.set_pt_read_only(self.vm_space())?;
}
Ok(())
}
/// Create a new vmar by creating cow child for all mapped vmos.
fn new_cow(&self, parent: Option<&Arc<Vmar_>>) -> Result<Arc<Self>> {
let new_vmar_ = {
let vmar_inner = VmarInner::new();
// If this is a root vmar, we create a new vmspace,
// Otherwise, we clone the vm space from parent.
// If this is not a root `Vmar`, we clone the `VmSpace` from parent.
//
// If this is a root `Vmar`, we leverage Copy-On-Write (COW) mechanism to
// clone the `VmSpace` to the child. We set all the page table entries
// in current `VmSpace` to be read-only, then clone the `VmSpace` to the child.
// In this way, initially, the child shares the same page table contents
// as the current `Vmar`. Later on, whether the current `Vmar` or the child
// `Vmar` needs to perform a write operation, the COW mechanism will be triggered,
// creating a new page for writing.
let vm_space = if let Some(parent) = parent {
parent.vm_space().clone()
} else {
VmSpace::new()
self.set_pt_read_only()?;
self.vm_space().deep_copy()
};
Vmar_::new(vmar_inner, vm_space, self.base, self.size, parent)
};

View File

@ -131,6 +131,22 @@ impl VmMapping {
&self.vmo
}
/// Set the entries in the page table associated with the current `VmMapping` to read-only.
pub(super) fn set_pt_read_only(&self, vm_space: &VmSpace) -> Result<()> {
let map_inner = self.inner.lock();
let mapped_addr = &map_inner.mapped_pages;
let perm = map_inner.perm;
if !perm.contains(VmPerm::W) {
return Ok(());
}
for page_idx in mapped_addr {
let map_addr = map_inner.page_map_addr(*page_idx);
vm_space.protect(&(map_addr..map_addr + PAGE_SIZE), perm - VmPerm::W)?;
}
Ok(())
}
/// Add a new committed page and map it to vmspace. If copy on write is set, it's allowed to unmap the page at the same address.
/// FIXME: This implementation based on the truth that we map one page at a time. If multiple pages are mapped together, this implementation may have problems
pub(super) fn map_one_page(
@ -167,6 +183,7 @@ impl VmMapping {
pub fn vmo_offset(&self) -> usize {
self.inner.lock().vmo_offset
}
pub fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
let vmo_read_offset = self.vmo_offset() + offset;
@ -186,8 +203,23 @@ impl VmMapping {
let page_idx_range = get_page_idx_range(&(vmo_write_offset..vmo_write_offset + buf.len()));
let write_perm = VmPerm::W;
let mut page_addr =
self.map_to_addr() - self.vmo_offset() + page_idx_range.start * PAGE_SIZE;
for page_idx in page_idx_range {
self.check_perm(&page_idx, &write_perm)?;
let parent = self.parent.upgrade().unwrap();
let vm_space = parent.vm_space();
// The `VmMapping` has the write permission but the corresponding PTE is present and is read-only.
// This means this PTE is set to read-only due to the COW mechanism. In this situation we need to trigger a
// page fault before writing at the VMO to guarantee the consistency between VMO and the page table.
let need_page_fault = vm_space.is_mapped(page_addr) && !vm_space.is_writable(page_addr);
if need_page_fault {
self.handle_page_fault(page_addr, false, true)?;
}
page_addr += PAGE_SIZE;
}
self.vmo.write_bytes(vmo_write_offset, buf)?;
@ -229,7 +261,7 @@ impl VmMapping {
// If read access to cow vmo triggers page fault, the map should be readonly.
// If user next tries to write to the frame, another page fault will be triggered.
let is_readonly = self.vmo.is_cow_child() && !write;
let is_readonly = self.vmo.is_cow_vmo() && !write;
self.map_one_page(page_idx, frame, is_readonly)
}
@ -429,7 +461,7 @@ impl VmMappingInner {
let vm_perm = {
let mut perm = self.perm;
if is_readonly {
debug_assert!(vmo.is_cow_child());
debug_assert!(vmo.is_cow_vmo());
perm -= VmPerm::W;
}
perm
@ -443,7 +475,7 @@ impl VmMappingInner {
};
// Cow child allows unmapping the mapped page.
if vmo.is_cow_child() && vm_space.is_mapped(map_addr) {
if vmo.is_cow_vmo() && vm_space.is_mapped(map_addr) {
vm_space.unmap(&(map_addr..(map_addr + PAGE_SIZE))).unwrap();
}

View File

@ -2,7 +2,7 @@
use core::ops::Range;
use aster_frame::vm::VmIo;
use aster_frame::vm::{VmFrame, VmIo};
use aster_rights::{Rights, TRights};
use super::{
@ -68,9 +68,9 @@ impl Vmo<Rights> {
}
/// commit a page at specific offset
pub fn commit_page(&self, offset: usize) -> Result<()> {
pub fn commit_page(&self, offset: usize) -> Result<VmFrame> {
self.check_rights(Rights::WRITE)?;
self.0.commit_page(offset)
self.0.commit_page(offset, false)
}
/// Commits the pages specified in the range (in bytes).
@ -84,7 +84,8 @@ impl Vmo<Rights> {
/// The method requires the Write right.
pub fn commit(&self, range: Range<usize>) -> Result<()> {
self.check_rights(Rights::WRITE)?;
self.0.commit(range)
self.0.commit(range, false)?;
Ok(())
}
/// Decommits the pages specified in the range (in bytes).

View File

@ -5,7 +5,10 @@
use core::ops::Range;
use align_ext::AlignExt;
use aster_frame::vm::{VmAllocOptions, VmFrame, VmFrameVec, VmIo};
use aster_frame::{
collections::xarray::{CursorMut, XArray, XMark},
vm::{VmAllocOptions, VmFrame, VmFrameVec, VmIo},
};
use aster_rights::Rights;
use crate::prelude::*;
@ -18,6 +21,8 @@ mod static_cap;
pub use options::{VmoChildOptions, VmoOptions};
pub use pager::Pager;
use self::options::ChildType;
/// Virtual Memory Objects (VMOs) are a type of capability that represents a
/// range of memory pages.
///
@ -95,7 +100,7 @@ pub trait VmoRightsOp {
Self: Sized;
}
// We implement this trait for Vmo, so we can use functions on type like Vmo<R> without trait bounds.
// We implement this trait for VMO, so we can use functions on type like Vmo<R> without trait bounds.
// FIXME: This requires the imcomplete feature specialization, which should be fixed further.
impl<R> VmoRightsOp for Vmo<R> {
default fn rights(&self) -> Rights {
@ -127,182 +132,230 @@ bitflags! {
}
}
/// Marks used for the `XArray` in `Vmo_`.
#[derive(Copy, Clone)]
pub(super) enum VmoMark {
/// Marks used for the VMO's `pages` which is managed by `XArray`.
/// The VMO whose `pages` is marked as `CowVmo` may require a Copy-On-Write (COW) operation
/// when performing a write action.
CowVmo,
/// Marks used for the `VmFrame` stored within the pages marked as `CowVmo`,
/// `VmFrame`s marked as `ExclusivePage` are newly created through the COW mechanism
/// and do not require further COW operations.
ExclusivePage,
}
impl From<VmoMark> for XMark {
fn from(val: VmoMark) -> Self {
match val {
VmoMark::CowVmo => XMark::Mark0,
VmoMark::ExclusivePage => XMark::Mark1,
}
}
}
/// `Pages` is the struct that manages the `VmFrame`s stored in `Vmo_`.
pub(super) enum Pages {
/// `Pages` that cannot be resized. This kind of `Pages` will have a constant size.
Nonresizable(Arc<Mutex<XArray<VmFrame, VmoMark>>>, usize),
/// `Pages` that can be resized and have a variable size, and such `Pages` cannot
/// be shared between different VMOs.
Resizable(Mutex<(XArray<VmFrame, VmoMark>, usize)>),
}
impl Pages {
fn with<R, F>(&self, func: F) -> R
where
F: FnOnce(&mut XArray<VmFrame, VmoMark>, usize) -> R,
{
match self {
Self::Nonresizable(pages, size) => func(&mut pages.lock(), *size),
Self::Resizable(pages) => {
let mut lock = pages.lock();
let size = lock.1;
func(&mut lock.0, size)
}
}
}
}
/// `Vmo_` is the structure that actually manages the content of VMO.
/// Broadly speaking, there are two types of VMO:
/// 1. File-backed VMO: the VMO backed by a file and resides in the `PageCache`,
/// which includes a pager to provide it with actual pages.
/// 2. Anonymous VMO: the VMO without a file backup, which does not have a pager.
pub(super) struct Vmo_ {
pager: Option<Arc<dyn Pager>>,
/// Flags
flags: VmoFlags,
/// VmoInner
inner: Mutex<VmoInner>,
/// The offset of the range of pages corresponding to the VMO within `pages`.
page_idx_offset: usize,
/// The virtual pages where the VMO resides.
pages: Pages,
}
struct VmoInner {
pager: Option<Arc<dyn Pager>>,
/// size, in bytes
size: usize,
/// The pages committed. The key is the page index, the value is the committed frame.
committed_pages: BTreeMap<usize, VmFrame>,
/// The pages from the parent that current vmo can access. The pages can only be inherited when create childs vmo.
/// We store the page index range
inherited_pages: Option<Vec<VmFrame>>,
/// Whether the vmo is copy on write child.
is_cow: bool,
}
impl VmoInner {
fn commit_page(&mut self, offset: usize) -> Result<()> {
let page_idx = offset / PAGE_SIZE;
// Fast path: the page is already committed.
if self.committed_pages.contains_key(&page_idx) {
return Ok(());
}
let frame = match &self.pager {
None => VmAllocOptions::new(1).alloc_single()?,
Some(pager) => pager.commit_page(page_idx)?,
};
self.insert_frame(page_idx, frame);
Ok(())
}
fn decommit_page(&mut self, offset: usize) -> Result<()> {
let page_idx = offset / PAGE_SIZE;
if self.committed_pages.remove(&page_idx).is_some() {
if let Some(pager) = &self.pager {
pager.decommit_page(page_idx)?;
}
}
Ok(())
}
fn insert_frame(&mut self, page_idx: usize, frame: VmFrame) {
debug_assert!(!self.committed_pages.contains_key(&page_idx));
self.committed_pages.insert(page_idx, frame);
}
fn get_committed_frame(&mut self, page_idx: usize, write_page: bool) -> Result<VmFrame> {
// if the page is already commit, return the committed page.
if let Some(frames) = self.committed_pages.get(&page_idx) {
return Ok(frames.clone());
}
// The vmo is not child
if self.inherited_pages.is_none() {
self.commit_page(page_idx * PAGE_SIZE)?;
let frame = self.committed_pages.get(&page_idx).unwrap().clone();
return Ok(frame);
}
let frame = self.get_inherited_frame_or_alloc(page_idx, write_page)?;
if !self.should_share_frame_with_parent(write_page) {
self.insert_frame(page_idx, frame.clone());
}
Ok(frame)
}
fn get_inherited_frame_or_alloc(&self, page_idx: usize, write_page: bool) -> Result<VmFrame> {
let inherited_frames = self.inherited_pages.as_ref().unwrap();
if page_idx >= inherited_frames.len() {
if self.is_cow {
return Ok(VmAllocOptions::new(1).alloc_single()?);
}
return_errno_with_message!(Errno::EINVAL, "the page is not inherited from parent");
}
let inherited_frame = inherited_frames.get(page_idx).unwrap().clone();
if self.should_share_frame_with_parent(write_page) {
return Ok(inherited_frame);
}
let frame = VmAllocOptions::new(1).alloc_single()?;
frame.copy_from_frame(&inherited_frame);
Ok(frame)
}
fn is_cow_child(&self) -> bool {
self.is_cow
}
fn should_share_frame_with_parent(&self, write_page: bool) -> bool {
!self.is_cow || !write_page
}
fn clone_page(page: &VmFrame) -> Result<VmFrame> {
let new_page = VmAllocOptions::new(1).alloc_single()?;
new_page.copy_from_frame(page);
Ok(new_page)
}
impl Vmo_ {
pub fn commit_page(&self, offset: usize) -> Result<()> {
self.inner.lock().commit_page(offset)
/// Prepare a new `VmFrame` for the target index in pages, returning the new page as well as
/// whether this page needs to be marked as exclusive.
///
/// Based on the type of VMO and the impending operation on the prepared page, there are 3 conditions:
/// 1. For an Anonymous VMO, provide a new page directly. If the VMO requires copy-on-write (COW),
/// the prepared page can be directly set to exclusive.
/// 2. For a File-backed VMO that does not need to trigger the COW mechanism,
/// obtain a page from the pager directly without the need to be set as exclusive.
/// 3. For a File-backed VMO that requires triggering the COW mechanism, obtain a page
/// from the pager and then copy it. This page can be set as exclusive.
fn prepare_page(
&self,
page_idx: usize,
is_cow_vmo: bool,
will_write: bool,
) -> Result<(VmFrame, bool)> {
let (page, should_mark_exclusive) = match &self.pager {
None => {
// Condition 1. The new anonymous page only need to be marked as `ExclusivePage`
// when current VMO is a cow VMO, otherwise this mark is meaningless.
(VmAllocOptions::new(1).alloc_single()?, is_cow_vmo)
}
Some(pager) => {
let page = pager.commit_page(page_idx)?;
// The prerequisite for triggering the COW mechanism here is that the current
// VMO requires COW and the prepared page is about to undergo a write operation.
// At this point, the `VmFrame` obtained from the pager needs to be cloned to
// avoid subsequent modifications affecting the content of the `VmFrame` in the pager.
let trigger_cow = is_cow_vmo && will_write;
if trigger_cow {
// Condition 3.
(clone_page(&page)?, true)
} else {
// Condition 2.
(page, false)
}
}
};
Ok((page, should_mark_exclusive))
}
pub fn decommit_page(&self, offset: usize) -> Result<()> {
self.inner.lock().decommit_page(offset)
}
fn commit_with_cursor(
&self,
cursor: &mut CursorMut<'_, VmFrame, VmoMark>,
is_cow_vmo: bool,
will_write: bool,
) -> Result<VmFrame> {
let (new_page, is_exclusive) = {
let is_exclusive = cursor.is_marked(VmoMark::ExclusivePage);
if let Some(committed_page) = cursor.load() {
// The necessary and sufficient condition for triggering the COW mechanism is that
// the current VMO requires copy-on-write, there is an impending write operation to the page,
// and the page is not exclusive.
let trigger_cow = is_cow_vmo && will_write && !is_exclusive;
if !trigger_cow {
// Fast path: return the page directly.
return Ok(committed_page.clone());
}
pub fn commit(&self, range: Range<usize>) -> Result<()> {
let page_idx_range = get_page_idx_range(&range);
for page_idx in page_idx_range {
let offset = page_idx * PAGE_SIZE;
self.commit_page(offset)?;
(clone_page(&committed_page)?, true)
} else {
self.prepare_page(cursor.index() as usize, is_cow_vmo, will_write)?
}
};
cursor.store(new_page.clone());
if is_exclusive {
cursor.set_mark(VmoMark::ExclusivePage).unwrap();
}
Ok(())
Ok(new_page)
}
/// Commit the page corresponding to the target offset in the VMO and return that page.
/// If the current offset has already been committed, the page will be returned directly.
/// During the commit process, the Copy-On-Write (COW) mechanism may be triggered depending on the circumstances.
pub fn commit_page(&self, offset: usize, will_write: bool) -> Result<VmFrame> {
let page_idx = offset / PAGE_SIZE + self.page_idx_offset;
self.pages.with(|pages, size| {
let is_cow_vmo = pages.is_marked(VmoMark::CowVmo);
let mut cursor = pages.cursor_mut(page_idx as u64);
self.commit_with_cursor(&mut cursor, is_cow_vmo, will_write)
})
}
/// Decommit the page corresponding to the target offset in the VMO.
fn decommit_page(&mut self, offset: usize) -> Result<()> {
let page_idx = offset / PAGE_SIZE + self.page_idx_offset;
self.pages.with(|pages, size| {
let is_cow_vmo = pages.is_marked(VmoMark::CowVmo);
let mut cursor = pages.cursor_mut(page_idx as u64);
if cursor.remove().is_some()
&& let Some(pager) = &self.pager
&& !is_cow_vmo
{
pager.decommit_page(page_idx)?;
}
Ok(())
})
}
/// Commit a range of pages in the VMO, returns the pages in this range.
pub fn commit(&self, range: Range<usize>, will_write: bool) -> Result<VmFrameVec> {
self.pages.with(|pages, size| {
if range.end > size {
return_errno_with_message!(Errno::EINVAL, "operated range exceeds the vmo size");
}
let raw_page_idx_range = get_page_idx_range(&range);
let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset)
..(raw_page_idx_range.end + self.page_idx_offset);
let mut frames = VmFrameVec::new_with_capacity(page_idx_range.len());
let is_cow_vmo = pages.is_marked(VmoMark::CowVmo);
let mut cursor = pages.cursor_mut(page_idx_range.start as u64);
for page_idx in page_idx_range {
let committed_page =
self.commit_with_cursor(&mut cursor, is_cow_vmo, will_write)?;
frames.push(committed_page);
cursor.next();
}
Ok(frames)
})
}
/// Decommit a range of pages in the VMO.
pub fn decommit(&self, range: Range<usize>) -> Result<()> {
let page_idx_range = get_page_idx_range(&range);
for page_idx in page_idx_range {
let offset = page_idx * PAGE_SIZE;
self.decommit_page(offset)?;
}
Ok(())
}
/// determine whether a page is commited
pub fn page_commited(&self, page_idx: usize) -> bool {
self.inner.lock().committed_pages.contains_key(&page_idx)
self.pages.with(|pages, size| {
self.decommit_pages(pages, range)?;
Ok(())
})
}
/// Read the specified amount of buffer content starting from the target offset in the VMO.
pub fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
let read_len = buf.len();
if offset + read_len > self.size() {
return_errno_with_message!(Errno::EINVAL, "read range exceeds vmo size");
}
let read_range = offset..(offset + read_len);
let frames = self.ensure_all_pages_exist(&read_range, false)?;
let frames = self.commit(read_range, false)?;
let read_offset = offset % PAGE_SIZE;
Ok(frames.read_bytes(read_offset, buf)?)
}
/// Ensure all pages inside range are backed up vm frames, returns the frames.
fn ensure_all_pages_exist(&self, range: &Range<usize>, write_page: bool) -> Result<VmFrameVec> {
let page_idx_range = get_page_idx_range(range);
let mut frames = VmFrameVec::new_with_capacity(page_idx_range.len());
for page_idx in page_idx_range {
let page_frame = self.get_committed_frame(page_idx, write_page)?;
frames.push(page_frame);
}
Ok(frames)
}
/// Get the frame for a page. If commit_if_none is set, we will commit a new page for the page
/// if the page is not committed.
fn get_committed_frame(&self, page_idx: usize, write_page: bool) -> Result<VmFrame> {
self.inner.lock().get_committed_frame(page_idx, write_page)
}
/// Write the specified amount of buffer content starting from the target offset in the VMO.
pub fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
let write_len = buf.len();
debug_assert!(offset + write_len <= self.size());
if offset + write_len > self.size() {
return_errno_with_message!(Errno::EINVAL, "write range exceeds the vmo size");
}
let write_range = offset..(offset + write_len);
let frames = self.ensure_all_pages_exist(&write_range, true)?;
let frames = self.commit(write_range.clone(), true)?;
let write_offset = offset % PAGE_SIZE;
frames.write_bytes(write_offset, buf)?;
if let Some(pager) = &self.inner.lock().pager {
let page_idx_range = get_page_idx_range(&write_range);
let is_cow_vmo = self.is_cow_vmo();
if let Some(pager) = &self.pager
&& !is_cow_vmo
{
let raw_page_idx_range = get_page_idx_range(&write_range);
let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset)
..(raw_page_idx_range.end + self.page_idx_offset);
for page_idx in page_idx_range {
pager.update_page(page_idx)?;
}
@ -310,36 +363,179 @@ impl Vmo_ {
Ok(())
}
/// Clear the target range in current VMO.
pub fn clear(&self, range: Range<usize>) -> Result<()> {
let buffer = vec![0u8; range.end - range.start];
self.write_bytes(range.start, &buffer)
}
pub fn size(&self) -> usize {
self.inner.lock().size
}
pub fn resize(&self, new_size: usize) -> Result<()> {
assert!(self.flags.contains(VmoFlags::RESIZABLE));
let new_size = new_size.align_up(PAGE_SIZE);
let old_size = self.size();
if new_size == old_size {
return Ok(());
}
if new_size < old_size {
self.decommit(new_size..old_size)?;
self.inner.lock().size = new_size;
} else {
self.inner.lock().size = new_size;
}
self.write_bytes(range.start, &buffer)?;
Ok(())
}
/// Return the size of current VMO.
pub fn size(&self) -> usize {
self.pages.with(|pages, size| size)
}
/// Return the page index offset of current VMO in corresponding pages.
pub fn page_idx_offset(&self) -> usize {
self.page_idx_offset
}
/// Clone the current `pages` to the child VMO.
///
/// Depending on the type of the VMO and the child, there are 4 conditions:
/// 1. For a slice child, directly share the current `pages` with that child.
/// 2. For a COW child, and the current VMO requires COW, it is necessary to clear the
/// ExclusivePage mark in the current `pages` and clone a new `pages` to the child.
/// 3. For a COW child, where the current VMO does not require COW and is a File-backed VMO.
/// In this case, a new `pages` needs to be cloned to the child, and the child's `pages`
/// require COW. The current `pages` do not need COW as they need to remain consistent with the pager.
/// 4. For a COW child, where the current VMO does not require COW and is an Anonymous VMO.
/// In this case, a new `pages` needs to be cloned to the child, and both the current `pages` and
/// the child's `pages` require COW.
pub fn clone_pages_for_child(
&self,
child_type: ChildType,
child_flags: VmoFlags,
range: &Range<usize>,
) -> Result<Pages> {
let child_vmo_start = range.start;
let child_vmo_end = range.end;
debug_assert!(child_vmo_start % PAGE_SIZE == 0);
debug_assert!(child_vmo_end % PAGE_SIZE == 0);
if child_vmo_start % PAGE_SIZE != 0 || child_vmo_end % PAGE_SIZE != 0 {
return_errno_with_message!(Errno::EINVAL, "VMO range does not aligned with PAGE_SIZE");
}
match child_type {
ChildType::Slice => {
if child_flags.contains(VmoFlags::RESIZABLE) {
return_errno_with_message!(
Errno::EINVAL,
"a slice child VMO cannot be resizable"
);
}
let Pages::Nonresizable(ref pages, size) = self.pages else {
return_errno_with_message!(
Errno::EINVAL,
"a resizable VMO cannot have a slice child"
);
};
// A slice child should be inside parent VMO's range
debug_assert!(child_vmo_end <= size);
if child_vmo_end > size {
return_errno_with_message!(
Errno::EINVAL,
"a slice child VMO cannot exceed its parent VMO's size"
);
}
// Condition 1.
Ok(Pages::Nonresizable(pages.clone(), range.len()))
}
ChildType::Cow => {
let new_pages = self.pages.with(|pages, size| {
// A Copy-on-Write child should intersect with parent VMO
debug_assert!(child_vmo_start <= size);
if child_vmo_start > size {
return_errno_with_message!(
Errno::EINVAL,
"a COW VMO should overlap with its parent"
);
}
let self_is_cow = pages.is_marked(VmoMark::CowVmo);
if self_is_cow {
// Condition 2.
pages.unset_mark_all(VmoMark::ExclusivePage);
return Ok(pages.clone());
}
if self.pager.is_some() {
// Condition 3.
let mut cloned_pages = pages.clone();
cloned_pages.set_mark(VmoMark::CowVmo);
return Ok(cloned_pages);
}
// Condition 4.
pages.set_mark(VmoMark::CowVmo);
Ok(pages.clone())
})?;
if child_flags.contains(VmoFlags::RESIZABLE) {
Ok(Pages::Resizable(Mutex::new((new_pages, range.len()))))
} else {
Ok(Pages::Nonresizable(
Arc::new(Mutex::new(new_pages)),
range.len(),
))
}
}
}
}
/// Resize current VMO to target size.
pub fn resize(&self, new_size: usize) -> Result<()> {
assert!(self.flags.contains(VmoFlags::RESIZABLE));
let new_size = new_size.align_up(PAGE_SIZE);
let Pages::Resizable(ref pages) = self.pages else {
return_errno_with_message!(Errno::EINVAL, "current VMO is not resizable");
};
let mut lock = pages.lock();
let old_size = lock.1;
if new_size == old_size {
return Ok(());
}
if new_size < old_size {
self.decommit_pages(&mut lock.0, new_size..old_size)?;
}
lock.1 = new_size;
Ok(())
}
fn decommit_pages(
&self,
pages: &mut XArray<VmFrame, VmoMark>,
range: Range<usize>,
) -> Result<()> {
let raw_page_idx_range = get_page_idx_range(&range);
let page_idx_range = (raw_page_idx_range.start + self.page_idx_offset)
..(raw_page_idx_range.end + self.page_idx_offset);
let is_cow_vmo = pages.is_marked(VmoMark::CowVmo);
let mut cursor = pages.cursor_mut(page_idx_range.start as u64);
for page_idx in page_idx_range {
if cursor.remove().is_some()
&& let Some(pager) = &self.pager
&& !is_cow_vmo
{
pager.decommit_page(page_idx)?;
}
cursor.next();
}
Ok(())
}
/// Determine whether a page is committed.
pub fn is_page_committed(&self, page_idx: usize) -> bool {
self.pages.with(|pages, size| {
pages
.load((page_idx + self.page_idx_offset) as u64)
.is_some()
})
}
/// Return the flags of current VMO.
pub fn flags(&self) -> VmoFlags {
self.flags
}
/// Determine whether the VMO is need COW mechanism.
pub fn is_cow_vmo(&self) -> bool {
self.pages
.with(|pages, size| pages.is_marked(VmoMark::CowVmo))
}
}
impl<R> Vmo<R> {
@ -355,15 +551,15 @@ impl<R> Vmo<R> {
/// return whether a page is already committed
pub fn is_page_committed(&self, page_idx: usize) -> bool {
self.0.page_commited(page_idx)
self.0.is_page_committed(page_idx)
}
pub fn get_committed_frame(&self, page_idx: usize, write_page: bool) -> Result<VmFrame> {
self.0.get_committed_frame(page_idx, write_page)
self.0.commit_page(page_idx * PAGE_SIZE, write_page)
}
pub fn is_cow_child(&self) -> bool {
self.0.inner.lock().is_cow_child()
pub fn is_cow_vmo(&self) -> bool {
self.0.is_cow_vmo()
}
}
@ -373,20 +569,3 @@ pub fn get_page_idx_range(vmo_offset_range: &Range<usize>) -> Range<usize> {
let end = vmo_offset_range.end.align_up(PAGE_SIZE);
(start / PAGE_SIZE)..(end / PAGE_SIZE)
}
pub(super) fn get_inherited_frames_from_parent(
parent: Arc<Vmo_>,
num_pages: usize,
parent_page_idx_offset: usize,
is_cow: bool,
) -> Vec<VmFrame> {
let mut inherited_frames = Vec::with_capacity(num_pages);
for page_idx in 0..num_pages {
let parent_page_idx = page_idx + parent_page_idx_offset;
let inherited_frame = parent
.get_committed_frame(parent_page_idx, !is_cow)
.unwrap();
inherited_frames.push(inherited_frame);
}
inherited_frames
}

View File

@ -5,16 +5,16 @@
use core::{marker::PhantomData, ops::Range};
use align_ext::AlignExt;
use aster_frame::vm::{VmAllocOptions, VmFrame};
use aster_frame::{
collections::xarray::XArray,
vm::{VmAllocOptions, VmFrame},
};
use aster_rights::{Dup, Rights, TRightSet, TRights, Write};
use aster_rights_proc::require;
use typeflags_util::{SetExtend, SetExtendOp};
use super::{Pager, Vmo, VmoFlags, VmoRightsOp};
use crate::{
prelude::*,
vm::vmo::{get_inherited_frames_from_parent, VmoInner, Vmo_},
};
use super::{Pager, Pages, Vmo, VmoFlags, VmoMark, VmoRightsOp};
use crate::{prelude::*, vm::vmo::Vmo_};
/// Options for allocating a root VMO.
///
@ -124,35 +124,40 @@ impl<R: TRights> VmoOptions<TRightSet<R>> {
fn alloc_vmo_(size: usize, flags: VmoFlags, pager: Option<Arc<dyn Pager>>) -> Result<Vmo_> {
let size = size.align_up(PAGE_SIZE);
let committed_pages = committed_pages_if_continuous(flags, size)?;
let vmo_inner = VmoInner {
pager,
size,
committed_pages,
inherited_pages: None,
is_cow: false,
let pages = {
let pages = committed_pages_if_continuous(flags, size)?;
if flags.contains(VmoFlags::RESIZABLE) {
Pages::Resizable(Mutex::new((pages, size)))
} else {
Pages::Nonresizable(Arc::new(Mutex::new(pages)), size)
}
};
Ok(Vmo_ {
pager,
flags,
inner: Mutex::new(vmo_inner),
page_idx_offset: 0,
pages,
})
}
fn committed_pages_if_continuous(flags: VmoFlags, size: usize) -> Result<BTreeMap<usize, VmFrame>> {
fn committed_pages_if_continuous(flags: VmoFlags, size: usize) -> Result<XArray<VmFrame, VmoMark>> {
if flags.contains(VmoFlags::CONTIGUOUS) {
// if the vmo is continuous, we need to allocate frames for the vmo
let frames_num = size / PAGE_SIZE;
let frames = VmAllocOptions::new(frames_num)
.is_contiguous(true)
.alloc()?;
let mut committed_pages = BTreeMap::new();
for (idx, frame) in frames.into_iter().enumerate() {
committed_pages.insert(idx * PAGE_SIZE, frame);
let mut committed_pages = XArray::new();
let mut cursor = committed_pages.cursor_mut(0);
for frame in frames {
cursor.store(frame);
cursor.next();
}
drop(cursor);
Ok(committed_pages)
} else {
// otherwise, we wait for the page is read or write
Ok(BTreeMap::new())
Ok(XArray::new())
}
}
@ -280,7 +285,7 @@ impl VmoChildOptions<Rights, VmoSliceChild> {
.check_rights(Rights::DUP)
.expect("function new_slice_rights should called with rights Dup");
Self {
flags: parent.flags() & Self::PARENT_FLAGS_MASK,
flags: parent.flags(),
parent,
range,
marker: PhantomData,
@ -327,7 +332,7 @@ impl<R> VmoChildOptions<R, VmoCowChild> {
/// Any pages that are beyond the parent's range are initially all zeros.
pub fn new_cow(parent: Vmo<R>, range: Range<usize>) -> Self {
Self {
flags: parent.flags() & Self::PARENT_FLAGS_MASK,
flags: parent.flags(),
parent,
range,
marker: PhantomData,
@ -432,7 +437,7 @@ impl<R: TRights> VmoChildOptions<TRightSet<R>, VmoCowChild> {
}
#[derive(Debug, Clone, Copy)]
enum ChildType {
pub(crate) enum ChildType {
Cow,
Slice,
}
@ -443,63 +448,15 @@ fn alloc_child_vmo_(
child_flags: VmoFlags,
child_type: ChildType,
) -> Result<Vmo_> {
let child_vmo_start = range.start;
let child_vmo_end = range.end;
debug_assert!(child_vmo_start % PAGE_SIZE == 0);
debug_assert!(child_vmo_end % PAGE_SIZE == 0);
if child_vmo_start % PAGE_SIZE != 0 || child_vmo_end % PAGE_SIZE != 0 {
return_errno_with_message!(Errno::EINVAL, "vmo range does not aligned with PAGE_SIZE");
}
let parent_vmo_size = parent_vmo_.size();
let is_cow = {
let parent_vmo_inner = parent_vmo_.inner.lock();
match child_type {
ChildType::Slice => {
// A slice child should be inside parent vmo's range
debug_assert!(child_vmo_end <= parent_vmo_inner.size);
if child_vmo_end > parent_vmo_inner.size {
return_errno_with_message!(
Errno::EINVAL,
"slice child vmo cannot exceed parent vmo's size"
);
}
false
}
ChildType::Cow => {
// A copy on Write child should intersect with parent vmo
debug_assert!(range.start <= parent_vmo_inner.size);
if range.start > parent_vmo_inner.size {
return_errno_with_message!(
Errno::EINVAL,
"COW vmo should overlap with its parent"
);
}
true
}
}
};
let parent_page_idx_offset = range.start / PAGE_SIZE;
let inherited_end = range.end.min(parent_vmo_size);
let cow_size = if inherited_end >= range.start {
inherited_end - range.start
} else {
0
};
let num_pages = cow_size / PAGE_SIZE;
let inherited_pages =
get_inherited_frames_from_parent(parent_vmo_, num_pages, parent_page_idx_offset, is_cow);
let vmo_inner = VmoInner {
pager: None,
size: child_vmo_end - child_vmo_start,
committed_pages: BTreeMap::new(),
inherited_pages: Some(inherited_pages),
is_cow,
};
Ok(Vmo_ {
let child_pages = parent_vmo_.clone_pages_for_child(child_type, child_flags, &range)?;
let new_vmo = Vmo_ {
pager: parent_vmo_.pager.clone(),
flags: child_flags,
inner: Mutex::new(vmo_inner),
})
pages: child_pages,
page_idx_offset: parent_page_idx_offset + parent_vmo_.page_idx_offset(),
};
Ok(new_vmo)
}
/// A type to specify the "type" of a child, which is either a slice or a COW.
@ -525,9 +482,9 @@ mod test {
#[ktest]
fn alloc_vmo() {
let vmo = VmoOptions::<Full>::new(PAGE_SIZE).alloc().unwrap();
assert!(vmo.size() == PAGE_SIZE);
assert_eq!(vmo.size(), PAGE_SIZE);
// the vmo is zeroed once allocated
assert!(vmo.read_val::<usize>(0).unwrap() == 0);
assert_eq!(vmo.read_val::<usize>(0).unwrap(), 0);
}
#[ktest]
@ -536,7 +493,7 @@ mod test {
.flags(VmoFlags::CONTIGUOUS)
.alloc()
.unwrap();
assert!(vmo.size() == 10 * PAGE_SIZE);
assert_eq!(vmo.size(), 10 * PAGE_SIZE);
}
#[ktest]
@ -546,11 +503,11 @@ mod test {
// write val
vmo.write_val(111, &val).unwrap();
let read_val: u8 = vmo.read_val(111).unwrap();
assert!(val == read_val);
assert_eq!(val, read_val);
// bit endian
vmo.write_bytes(222, &[0x12, 0x34, 0x56, 0x78]).unwrap();
let read_val: u32 = vmo.read_val(222).unwrap();
assert!(read_val == 0x78563412)
assert_eq!(read_val, 0x78563412)
}
#[ktest]
@ -562,36 +519,42 @@ mod test {
.unwrap();
// write parent, read child
parent.write_val(1, &42u8).unwrap();
assert!(slice_child.read_val::<u8>(1).unwrap() == 42);
assert_eq!(slice_child.read_val::<u8>(1).unwrap(), 42);
// write child, read parent
slice_child.write_val(99, &0x1234u32).unwrap();
assert!(parent.read_val::<u32>(99).unwrap() == 0x1234);
assert_eq!(parent.read_val::<u32>(99).unwrap(), 0x1234);
}
#[ktest]
fn cow_child() {
let parent = VmoOptions::<Full>::new(2 * PAGE_SIZE).alloc().unwrap();
parent.write_val(1, &42u8).unwrap();
parent.write_val(2, &16u8).unwrap();
let parent_dup = parent.dup().unwrap();
let cow_child = VmoChildOptions::new_cow(parent_dup, 0..10 * PAGE_SIZE)
.alloc()
.unwrap();
// write parent, read child
parent.write_val(1, &42u8).unwrap();
assert!(cow_child.read_val::<u8>(1).unwrap() == 42);
// write child to trigger copy on write, read child and parent
cow_child.write_val(99, &0x1234u32).unwrap();
assert!(cow_child.read_val::<u32>(99).unwrap() == 0x1234);
assert!(cow_child.read_val::<u32>(1).unwrap() == 42);
assert!(parent.read_val::<u32>(99).unwrap() == 0);
assert!(parent.read_val::<u32>(1).unwrap() == 42);
// write parent on already-copied page
parent.write_val(10, &123u8).unwrap();
assert!(parent.read_val::<u32>(10).unwrap() == 123);
assert!(cow_child.read_val::<u32>(10).unwrap() == 0);
// write parent on not-copied page
parent.write_val(PAGE_SIZE + 10, &12345u32).unwrap();
assert!(parent.read_val::<u32>(PAGE_SIZE + 10).unwrap() == 12345);
assert!(cow_child.read_val::<u32>(PAGE_SIZE + 10).unwrap() == 12345);
// Read child.
assert_eq!(cow_child.read_val::<u8>(1).unwrap(), 42);
assert_eq!(cow_child.read_val::<u8>(2).unwrap(), 16);
// Write parent to trigger copy-on-write. read child and parent.
parent.write_val(1, &64u8).unwrap();
assert_eq!(parent.read_val::<u8>(1).unwrap(), 64);
assert_eq!(cow_child.read_val::<u8>(1).unwrap(), 42);
// Write child to trigger copy on write, read child and parent
cow_child.write_val(2, &0x1234u32).unwrap();
assert_eq!(cow_child.read_val::<u32>(2).unwrap(), 0x1234);
assert_eq!(cow_child.read_val::<u8>(1).unwrap(), 42);
assert_eq!(parent.read_val::<u8>(2).unwrap(), 16);
assert_eq!(parent.read_val::<u8>(1).unwrap(), 64);
// Write parent on already-copied page
parent.write_val(1, &123u8).unwrap();
assert_eq!(parent.read_val::<u8>(1).unwrap(), 123);
assert_eq!(cow_child.read_val::<u8>(1).unwrap(), 42);
// Write parent on not-copied page
parent.write_val(2, &12345u32).unwrap();
assert_eq!(parent.read_val::<u32>(2).unwrap(), 12345);
assert_eq!(cow_child.read_val::<u32>(2).unwrap(), 0x1234);
}
#[ktest]
@ -602,10 +565,10 @@ mod test {
.unwrap();
vmo.write_val(10, &42u8).unwrap();
vmo.resize(2 * PAGE_SIZE).unwrap();
assert!(vmo.size() == 2 * PAGE_SIZE);
assert!(vmo.read_val::<u8>(10).unwrap() == 42);
assert_eq!(vmo.size(), 2 * PAGE_SIZE);
assert_eq!(vmo.read_val::<u8>(10).unwrap(), 42);
vmo.write_val(PAGE_SIZE + 20, &123u8).unwrap();
vmo.resize(PAGE_SIZE).unwrap();
assert!(vmo.read_val::<u8>(10).unwrap() == 42);
assert_eq!(vmo.read_val::<u8>(10).unwrap(), 42);
}
}

View File

@ -2,7 +2,7 @@
use core::ops::Range;
use aster_frame::vm::VmIo;
use aster_frame::vm::{VmFrame, VmIo};
use aster_rights::{Dup, Rights, TRightSet, TRights, Write};
use aster_rights_proc::require;
@ -71,9 +71,9 @@ impl<R: TRights> Vmo<TRightSet<R>> {
}
/// commit a page at specific offset
pub fn commit_page(&self, offset: usize) -> Result<()> {
pub fn commit_page(&self, offset: usize) -> Result<VmFrame> {
self.check_rights(Rights::WRITE)?;
self.0.commit_page(offset)
self.0.commit_page(offset, false)
}
/// Commit the pages specified in the range (in bytes).
@ -87,7 +87,8 @@ impl<R: TRights> Vmo<TRightSet<R>> {
/// The method requires the Write right.
#[require(R > Write)]
pub fn commit(&self, range: Range<usize>) -> Result<()> {
self.0.commit(range)
self.0.commit(range, false)?;
Ok(())
}
/// Decommit the pages specified in the range (in bytes).