Implement system call msync

This commit is contained in:
Zhang Junyang
2025-06-10 20:45:52 +08:00
committed by Tate, Hongliang Tian
parent a2aedcfb3a
commit 525085ab86
9 changed files with 227 additions and 54 deletions

View File

@ -15,7 +15,7 @@ support the loading of Linux kernel modules.
## System Calls ## System Calls
At the time of writing, At the time of writing,
Asterinas implements 204 out of the 336 system calls Asterinas implements 213 out of the 336 system calls
provided by Linux on x86-64 architecture. provided by Linux on x86-64 architecture.
| Numbers | Names | Is Implemented | | Numbers | Names | Is Implemented |
@ -46,7 +46,7 @@ provided by Linux on x86-64 architecture.
| 23 | select | ✅ | | 23 | select | ✅ |
| 24 | sched_yield | ✅ | | 24 | sched_yield | ✅ |
| 25 | mremap | ❌ | | 25 | mremap | ❌ |
| 26 | msync | | | 26 | msync | |
| 27 | mincore | ❌ | | 27 | mincore | ❌ |
| 28 | madvise | ✅ | | 28 | madvise | ✅ |
| 29 | shmget | ❌ | | 29 | shmget | ❌ |

View File

@ -12,11 +12,7 @@ use crate::{
file_table::{get_file_fast, FileDesc}, file_table::{get_file_fast, FileDesc},
}, },
prelude::*, prelude::*,
vm::{ vm::{perms::VmPerms, vmar::is_userspace_vaddr, vmo::VmoOptions},
perms::VmPerms,
vmar::is_userspace_vaddr,
vmo::{VmoOptions, VmoRightsOp},
},
}; };
pub fn sys_mmap( pub fn sys_mmap(
@ -124,34 +120,28 @@ fn do_sys_mmap(
options = options.vmo(shared_vmo); options = options.vmo(shared_vmo);
} }
} else { } else {
let vmo = { let mut file_table = ctx.thread_local.borrow_file_table_mut();
let mut file_table = ctx.thread_local.borrow_file_table_mut(); let file = get_file_fast!(&mut file_table, fd);
let file = get_file_fast!(&mut file_table, fd); let inode_handle = file.as_inode_or_err()?;
let inode_handle = file.as_inode_or_err()?;
let access_mode = inode_handle.access_mode(); let access_mode = inode_handle.access_mode();
if vm_perms.contains(VmPerms::READ) && !access_mode.is_readable() { if vm_perms.contains(VmPerms::READ) && !access_mode.is_readable() {
return_errno!(Errno::EACCES); return_errno!(Errno::EACCES);
} }
if option.typ() == MMapType::Shared if option.typ() == MMapType::Shared
&& vm_perms.contains(VmPerms::WRITE) && vm_perms.contains(VmPerms::WRITE)
&& !access_mode.is_writable() && !access_mode.is_writable()
{ {
return_errno!(Errno::EACCES); return_errno!(Errno::EACCES);
} }
let inode = inode_handle.dentry().inode(); let inode = inode_handle.dentry().inode();
inode if inode.page_cache().is_none() {
.page_cache() return_errno_with_message!(Errno::EBADF, "File does not have page cache");
.ok_or(Error::with_message( }
Errno::EBADF,
"File does not have page cache",
))?
.to_dyn()
};
options = options options = options
.vmo(vmo) .inode(inode.clone())
.vmo_offset(offset) .vmo_offset(offset)
.handle_page_faults_around(); .handle_page_faults_around();
} }

View File

@ -1,9 +1,99 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn; use align_ext::AlignExt;
use crate::prelude::*;
use super::SyscallReturn;
use crate::{prelude::*, thread::kernel_thread::ThreadOptions};
bitflags! {
/// Flags for `msync`.
///
/// See <https://elixir.bootlin.com/linux/v6.15.1/source/include/uapi/asm-generic/mman-common.h#L42>.
pub struct MsyncFlags: i32 {
/// Performs `msync` asynchronously.
const MS_ASYNC = 0x01;
/// Invalidates cache so that other processes mapping the same file
/// will immediately see the changes before this `msync` call.
///
/// Should be a no-op since we use the same page cache for all processes.
const MS_INVALIDATE = 0x02;
/// Performs `msync` synchronously.
const MS_SYNC = 0x04;
}
}
macro_rules! return_partially_mapped {
() => {
return_errno_with_message!(Errno::ENOMEM, "`msync` called on a partially mapped range")
};
}
pub fn sys_msync(start: Vaddr, size: usize, flag: i32, ctx: &Context) -> Result<SyscallReturn> {
let flags = MsyncFlags::from_bits(flag).ok_or_else(|| Error::new(Errno::EINVAL))?;
debug!("msync: start = {start:#x}, size = {size}, flags = {flags:?}");
if start % PAGE_SIZE != 0 || flags.contains(MsyncFlags::MS_ASYNC | MsyncFlags::MS_SYNC) {
return_errno!(Errno::EINVAL);
}
if size == 0 {
return Ok(SyscallReturn::Return(0));
}
let range = {
let end = start
.checked_add(size)
.ok_or(Error::with_message(
Errno::EINVAL,
"`msync` `size` overflows",
))?
.align_up(PAGE_SIZE);
start..end
};
let user_space = ctx.user_space();
let root_vmar = user_space.root_vmar().dup()?;
let guard = root_vmar.query(range.clone());
let mut mappings_iter = guard.iter();
// Check if the range is fully mapped.
let Some(first) = mappings_iter.next() else {
return_errno_with_message!(Errno::ENOMEM, "`msync` called on a not mapped range");
};
if first.map_to_addr() > range.start {
return_partially_mapped!();
}
let mut last_end = first.map_end();
for mapping in mappings_iter {
let start = mapping.map_to_addr();
if start != last_end {
return_partially_mapped!();
}
last_end = mapping.map_end();
}
if last_end < range.end {
return_partially_mapped!();
}
// Do nothing if not file-backed, as <https://pubs.opengroup.org/onlinepubs/9699919799/> says.
let inodes = guard
.iter()
.filter_map(|m| m.inode().cloned())
.collect::<Vec<_>>();
let task_fn = move || {
for inode in inodes {
// TODO: Sync a necessary range instead of syncing the whole inode.
let _ = inode.sync_all();
}
};
if flags.contains(MsyncFlags::MS_ASYNC) {
ThreadOptions::new(task_fn).spawn();
} else {
task_fn();
}
pub fn sys_msync(_start: Vaddr, _size: usize, _flag: i32, _ctx: &Context) -> Result<SyscallReturn> {
// TODO: implement real `msync`.
Ok(SyscallReturn::Return(0)) Ok(SyscallReturn::Return(0))
} }

View File

@ -16,6 +16,7 @@ use ostd::{
mm::{ mm::{
tlb::TlbFlushOp, vm_space::CursorMut, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR, tlb::TlbFlushOp, vm_space::CursorMut, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR,
}, },
sync::RwMutexReadGuard,
task::disable_preempt, task::disable_preempt,
}; };
@ -24,6 +25,7 @@ use self::{
vm_mapping::{MappedVmo, VmMapping}, vm_mapping::{MappedVmo, VmMapping},
}; };
use crate::{ use crate::{
fs::utils::Inode,
prelude::*, prelude::*,
process::{Process, ResourceType}, process::{Process, ResourceType},
thread::exception::PageFaultInfo, thread::exception::PageFaultInfo,
@ -164,6 +166,11 @@ impl VmarInner {
Some(vm_mapping) Some(vm_mapping)
} }
/// Finds a set of [`VmMapping`]s that intersect with the provided range.
fn query(&self, range: &Range<Vaddr>) -> impl Iterator<Item = &VmMapping> {
self.vm_mappings.find(range)
}
/// Calculates the total amount of overlap between `VmMapping`s /// Calculates the total amount of overlap between `VmMapping`s
/// and the provided range. /// and the provided range.
fn count_overlap_size(&self, range: Range<Vaddr>) -> usize { fn count_overlap_size(&self, range: Range<Vaddr>) -> usize {
@ -315,6 +322,13 @@ impl Vmar_ {
) )
} }
fn query(&self, range: Range<usize>) -> VmarQueryGuard<'_> {
VmarQueryGuard {
vmar: self.inner.read(),
range,
}
}
fn protect(&self, perms: VmPerms, range: Range<usize>) -> Result<()> { fn protect(&self, perms: VmPerms, range: Range<usize>) -> Result<()> {
assert!(range.start % PAGE_SIZE == 0); assert!(range.start % PAGE_SIZE == 0);
assert!(range.end % PAGE_SIZE == 0); assert!(range.end % PAGE_SIZE == 0);
@ -586,6 +600,7 @@ impl<R> Vmar<R> {
pub struct VmarMapOptions<'a, R1, R2> { pub struct VmarMapOptions<'a, R1, R2> {
parent: &'a Vmar<R1>, parent: &'a Vmar<R1>,
vmo: Option<Vmo<R2>>, vmo: Option<Vmo<R2>>,
inode: Option<Arc<dyn Inode>>,
perms: VmPerms, perms: VmPerms,
vmo_offset: usize, vmo_offset: usize,
vmo_limit: usize, vmo_limit: usize,
@ -610,6 +625,7 @@ impl<'a, R1, R2> VmarMapOptions<'a, R1, R2> {
Self { Self {
parent, parent,
vmo: None, vmo: None,
inode: None,
perms, perms,
vmo_offset: 0, vmo_offset: 0,
vmo_limit: usize::MAX, vmo_limit: usize::MAX,
@ -622,22 +638,30 @@ impl<'a, R1, R2> VmarMapOptions<'a, R1, R2> {
} }
} }
/// Binds a VMO to the mapping. /// Binds a [`Vmo`] to the mapping.
/// ///
/// If the mapping is a private mapping, its size may not be equal to that of the VMO. /// If the mapping is a private mapping, its size may not be equal to that
/// For example, it is ok to create a mapping whose size is larger than /// of the [`Vmo`]. For example, it is OK to create a mapping whose size is
/// that of the VMO, although one cannot read from or write to the /// larger than that of the [`Vmo`], although one cannot read from or write
/// part of the mapping that is not backed by the VMO. /// to the part of the mapping that is not backed by the [`Vmo`].
/// ///
/// So you may wonder: what is the point of supporting such _oversized_ /// Such _oversized_ mappings are useful for two reasons:
/// mappings? The reason is two-fold. /// 1. [`Vmo`]s are resizable. So even if a mapping is backed by a VMO
/// 1. VMOs are resizable. So even if a mapping is backed by a VMO whose /// whose size is equal to that of the mapping initially, we cannot
/// size is equal to that of the mapping initially, we cannot prevent /// prevent the VMO from shrinking.
/// the VMO from shrinking.
/// 2. Mappings are not allowed to overlap by default. As a result, /// 2. Mappings are not allowed to overlap by default. As a result,
/// oversized mappings can serve as a placeholder to prevent future /// oversized mappings can reserve space for future expansions.
/// mappings from occupying some particular address ranges accidentally. ///
/// The [`Vmo`] of a mapping will be implicitly set if [`Self::inode`] is
/// set.
///
/// # Panics
///
/// This function panics if an [`Inode`] is already provided.
pub fn vmo(mut self, vmo: Vmo<R2>) -> Self { pub fn vmo(mut self, vmo: Vmo<R2>) -> Self {
if self.inode.is_some() {
panic!("Cannot set `vmo` when `inode` is already set");
}
self.vmo = Some(vmo); self.vmo = Some(vmo);
self self
@ -712,6 +736,36 @@ impl<'a, R1, R2> VmarMapOptions<'a, R1, R2> {
} }
} }
impl<R1> VmarMapOptions<'_, R1, Rights> {
/// Binds an [`Inode`] to the mapping.
///
/// This is used for file-backed mappings. The provided file inode will be
/// mapped. See [`Self::vmo`] for details on the map size.
///
/// If an [`Inode`] is provided, the [`Self::vmo`] must not be provided
/// again. The actually mapped [`Vmo`] will be the [`Inode`]'s page cache.
///
/// # Panics
///
/// This function panics if:
/// - a [`Vmo`] or [`Inode`] is already provided;
/// - the provided [`Inode`] does not have a page cache.
pub fn inode(mut self, inode: Arc<dyn Inode>) -> Self {
if self.vmo.is_some() {
panic!("Cannot set `inode` when `vmo` is already set");
}
self.vmo = Some(
inode
.page_cache()
.expect("Map an inode without page cache")
.to_dyn(),
);
self.inode = Some(inode);
self
}
}
impl<R1, R2> VmarMapOptions<'_, R1, R2> impl<R1, R2> VmarMapOptions<'_, R1, R2>
where where
Vmo<R2>: VmoRightsOp, Vmo<R2>: VmoRightsOp,
@ -726,6 +780,7 @@ where
let Self { let Self {
parent, parent,
vmo, vmo,
inode,
perms, perms,
vmo_offset, vmo_offset,
vmo_limit, vmo_limit,
@ -784,6 +839,7 @@ where
NonZeroUsize::new(map_size).unwrap(), NonZeroUsize::new(map_size).unwrap(),
map_to_addr, map_to_addr,
vmo, vmo,
inode,
is_shared, is_shared,
handle_page_faults_around, handle_page_faults_around,
perms, perms,
@ -832,6 +888,20 @@ where
} }
} }
/// A guard that allows querying a [`Vmar`] for its mappings.
pub struct VmarQueryGuard<'a> {
vmar: RwMutexReadGuard<'a, VmarInner>,
range: Range<usize>,
}
impl VmarQueryGuard<'_> {
/// Returns an iterator over the [`VmMapping`]s that intersect with the
/// provided range when calling [`Vmar::query`].
pub fn iter(&self) -> impl Iterator<Item = &VmMapping> {
self.vmar.query(&self.range)
}
}
/// Determines whether two ranges are intersected. /// Determines whether two ranges are intersected.
/// returns false if one of the ranges has a length of 0 /// returns false if one of the ranges has a length of 0
pub fn is_intersected(range1: &Range<usize>, range2: &Range<usize>) -> bool { pub fn is_intersected(range1: &Range<usize>, range2: &Range<usize>) -> bool {

View File

@ -5,7 +5,7 @@ use core::ops::Range;
use aster_rights::{Dup, Read, Rights, TRightSet, TRights, Write}; use aster_rights::{Dup, Read, Rights, TRightSet, TRights, Write};
use aster_rights_proc::require; use aster_rights_proc::require;
use super::{VmPerms, Vmar, VmarMapOptions, VmarRightsOp, Vmar_}; use super::{VmPerms, Vmar, VmarMapOptions, VmarQueryGuard, VmarRightsOp, Vmar_};
use crate::{ use crate::{
prelude::*, thread::exception::PageFaultInfo, vm::page_fault_handler::PageFaultHandler, prelude::*, thread::exception::PageFaultInfo, vm::page_fault_handler::PageFaultHandler,
}; };
@ -85,6 +85,11 @@ impl<R: TRights> Vmar<TRightSet<R>> {
self.0.protect(perms, range) self.0.protect(perms, range)
} }
/// Finds all the mapped regions that intersect with the specified range.
pub fn query(&self, range: Range<usize>) -> VmarQueryGuard<'_> {
self.0.query(range)
}
/// Clears all mappings. /// Clears all mappings.
/// ///
/// After being cleared, this vmar will become an empty vmar /// After being cleared, this vmar will become an empty vmar

View File

@ -16,6 +16,7 @@ use ostd::{
use super::{interval_set::Interval, RssType}; use super::{interval_set::Interval, RssType};
use crate::{ use crate::{
fs::utils::Inode,
prelude::*, prelude::*,
thread::exception::PageFaultInfo, thread::exception::PageFaultInfo,
vm::{ vm::{
@ -42,7 +43,7 @@ use crate::{
/// This type controls the actual mapping in the [`VmSpace`]. It is a linear /// This type controls the actual mapping in the [`VmSpace`]. It is a linear
/// type and cannot be [`Drop`]. To remove a mapping, use [`Self::unmap`]. /// type and cannot be [`Drop`]. To remove a mapping, use [`Self::unmap`].
#[derive(Debug)] #[derive(Debug)]
pub(super) struct VmMapping { pub struct VmMapping {
/// The size of mapping, in bytes. The map size can even be larger than the /// The size of mapping, in bytes. The map size can even be larger than the
/// size of VMO. Those pages outside VMO range cannot be read or write. /// size of VMO. Those pages outside VMO range cannot be read or write.
/// ///
@ -56,6 +57,11 @@ pub(super) struct VmMapping {
/// The start of the virtual address maps to the start of the range /// The start of the virtual address maps to the start of the range
/// specified in [`MappedVmo`]. /// specified in [`MappedVmo`].
vmo: Option<MappedVmo>, vmo: Option<MappedVmo>,
/// The inode of the file that backs the mapping.
///
/// If the inode is `Some`, it means that the mapping is file-backed.
/// And the `vmo` field must be the page cache of the inode.
inode: Option<Arc<dyn Inode>>,
/// Whether the mapping is shared. /// Whether the mapping is shared.
/// ///
/// The updates to a shared mapping are visible among processes, or carried /// The updates to a shared mapping are visible among processes, or carried
@ -83,6 +89,7 @@ impl VmMapping {
map_size: NonZeroUsize, map_size: NonZeroUsize,
map_to_addr: Vaddr, map_to_addr: Vaddr,
vmo: Option<MappedVmo>, vmo: Option<MappedVmo>,
inode: Option<Arc<dyn Inode>>,
is_shared: bool, is_shared: bool,
handle_page_faults_around: bool, handle_page_faults_around: bool,
perms: VmPerms, perms: VmPerms,
@ -91,6 +98,7 @@ impl VmMapping {
map_size, map_size,
map_to_addr, map_to_addr,
vmo, vmo,
inode,
is_shared, is_shared,
handle_page_faults_around, handle_page_faults_around,
perms, perms,
@ -100,6 +108,7 @@ impl VmMapping {
pub(super) fn new_fork(&self) -> Result<VmMapping> { pub(super) fn new_fork(&self) -> Result<VmMapping> {
Ok(VmMapping { Ok(VmMapping {
vmo: self.vmo.as_ref().map(|vmo| vmo.dup()).transpose()?, vmo: self.vmo.as_ref().map(|vmo| vmo.dup()).transpose()?,
inode: self.inode.clone(),
..*self ..*self
}) })
} }
@ -119,12 +128,17 @@ impl VmMapping {
self.map_size.get() self.map_size.get()
} }
// Returns the permissions of pages in the mapping. /// Returns the permissions of pages in the mapping.
pub fn perms(&self) -> VmPerms { pub fn perms(&self) -> VmPerms {
self.perms self.perms
} }
// Returns the mapping's RSS type. /// Returns the inode of the file that backs the mapping.
pub fn inode(&self) -> Option<&Arc<dyn Inode>> {
self.inode.as_ref()
}
/// Returns the mapping's RSS type.
pub fn rss_type(&self) -> RssType { pub fn rss_type(&self) -> RssType {
if self.vmo.is_none() { if self.vmo.is_none() {
RssType::RSS_ANONPAGES RssType::RSS_ANONPAGES
@ -407,12 +421,14 @@ impl VmMapping {
map_to_addr: self.map_to_addr, map_to_addr: self.map_to_addr,
map_size: NonZeroUsize::new(left_size).unwrap(), map_size: NonZeroUsize::new(left_size).unwrap(),
vmo: l_vmo, vmo: l_vmo,
inode: self.inode.clone(),
..self ..self
}; };
let right = Self { let right = Self {
map_to_addr: at, map_to_addr: at,
map_size: NonZeroUsize::new(right_size).unwrap(), map_size: NonZeroUsize::new(right_size).unwrap(),
vmo: r_vmo, vmo: r_vmo,
inode: self.inode,
..self ..self
}; };

View File

@ -25,6 +25,7 @@ TESTS ?= \
mknod_test \ mknod_test \
mmap_test \ mmap_test \
mount_test \ mount_test \
msync_test \
open_create_test \ open_create_test \
open_test \ open_test \
ppoll_test \ ppoll_test \
@ -33,9 +34,9 @@ TESTS ?= \
preadv2_test \ preadv2_test \
proc_test \ proc_test \
pselect_test \ pselect_test \
pty_test \
pwrite64_test \ pwrite64_test \
pwritev2_test \ pwritev2_test \
pty_test \
read_test \ read_test \
readv_test \ readv_test \
rename_test \ rename_test \

View File

@ -0,0 +1 @@
All/MsyncFullParamTest.InvalidateUnlockedSucceeds/*

View File

@ -924,8 +924,8 @@ mprotect05
msync01 msync01
msync02 msync02
# msync03 # msync03 # FIXME: This fails because the heap place holder is treated as mapped. Fix after https://github.com/asterinas/asterinas/pull/1999.
# msync04 # msync04 # TODO: support `/proc/self/pagemap` and `/proc/kpageflags`.
# munlock01 # munlock01
# munlock02 # munlock02