Support the system call mremap

This commit is contained in:
Wang Siyuan
2025-06-16 07:39:56 +00:00
committed by Tate, Hongliang Tian
parent 33345f184a
commit 11f9675f37
13 changed files with 421 additions and 52 deletions

View File

@ -15,7 +15,7 @@ support the loading of Linux kernel modules.
## System Calls
At the time of writing,
Asterinas implements 213 out of the 336 system calls
Asterinas implements 214 out of the 336 system calls
provided by Linux on x86-64 architecture.
| Numbers | Names | Is Implemented |
@ -45,7 +45,7 @@ provided by Linux on x86-64 architecture.
| 22 | pipe | ✅ |
| 23 | select | ✅ |
| 24 | sched_yield | ✅ |
| 25 | mremap | |
| 25 | mremap | |
| 26 | msync | ✅ |
| 27 | mincore | ❌ |
| 28 | madvise | ✅ |
@ -321,10 +321,10 @@ provided by Linux on x86-64 architecture.
| 298 | perf_event_open | ❌ |
| 299 | recvmmsg | ❌ |
| 300 | fanotify_init | ❌ |
| 301 | fanotify_mark | ❌ |
| 301 | fanotify_mark | ❌ |
| 302 | prlimit64 | ✅ |
| 303 | name_to_handle_at | ❌ |
| 304 | open_by_handle_at | ❌ |
| 304 | open_by_handle_at | ❌ |
| 305 | clock_adjtime | ❌ |
| 306 | syncfs | ❌ |
| 307 | sendmmsg | ❌ |

View File

@ -91,7 +91,7 @@ impl Heap {
let new_size = new_heap_end - self.base;
// Expand the heap.
root_vmar.resize_mapping(self.base, old_size, new_size)?;
root_vmar.resize_mapping(self.base, old_size, new_size, false)?;
self.current_heap_end.store(new_heap_end, Ordering::Release);
Ok(new_heap_end)

View File

@ -60,6 +60,7 @@ use crate::syscall::{
mmap::sys_mmap,
mount::sys_mount,
mprotect::sys_mprotect,
mremap::sys_mremap,
msync::sys_msync,
munmap::sys_munmap,
nanosleep::{sys_clock_nanosleep, sys_nanosleep},
@ -277,6 +278,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_RECVMSG = 212 => sys_recvmsg(args[..3]);
SYS_BRK = 214 => sys_brk(args[..1]);
SYS_MUNMAP = 215 => sys_munmap(args[..2]);
SYS_MREMAP = 216 => sys_mremap(args[..5]);
SYS_CLONE = 220 => sys_clone(args[..5], &user_ctx);
SYS_EXECVE = 221 => sys_execve(args[..3], &mut user_ctx);
SYS_MMAP = 222 => sys_mmap(args[..6]);

View File

@ -69,6 +69,7 @@ use crate::syscall::{
mmap::sys_mmap,
mount::sys_mount,
mprotect::sys_mprotect,
mremap::sys_mremap,
msync::sys_msync,
munmap::sys_munmap,
nanosleep::{sys_clock_nanosleep, sys_nanosleep},
@ -184,6 +185,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_ACCESS = 21 => sys_access(args[..2]);
SYS_PIPE = 22 => sys_pipe(args[..1]);
SYS_SELECT = 23 => sys_select(args[..5]);
SYS_MREMAP = 25 => sys_mremap(args[..5]);
SYS_MSYNC = 26 => sys_msync(args[..3]);
SYS_SCHED_YIELD = 24 => sys_sched_yield(args[..0]);
SYS_MADVISE = 28 => sys_madvise(args[..3]);

View File

@ -74,6 +74,7 @@ mod mknod;
mod mmap;
mod mount;
mod mprotect;
mod mremap;
mod msync;
mod munmap;
mod nanosleep;

View File

@ -0,0 +1,95 @@
// SPDX-License-Identifier: MPL-2.0
use align_ext::AlignExt;
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_mremap(
old_addr: Vaddr,
old_size: usize,
new_size: usize,
flags: i32,
new_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let flags = MremapFlags::from_bits(flags).ok_or(Errno::EINVAL)?;
let new_addr = do_sys_mremap(old_addr, old_size, new_size, flags, new_addr, ctx)?;
Ok(SyscallReturn::Return(new_addr as _))
}
fn do_sys_mremap(
old_addr: Vaddr,
old_size: usize,
new_size: usize,
flags: MremapFlags,
new_addr: Vaddr,
ctx: &Context,
) -> Result<Vaddr> {
debug!(
"mremap: old_addr = 0x{:x}, old_size = {}, new_size = {}, flags = {:?}, new_addr = 0x{:x}",
old_addr, old_size, new_size, flags, new_addr,
);
if old_addr % PAGE_SIZE != 0 {
return_errno_with_message!(Errno::EINVAL, "mremap: `old_addr` must be page-aligned");
}
if new_size == 0 {
return_errno_with_message!(Errno::EINVAL, "mremap: `new_size` cannot be zero");
}
if old_size == 0 {
return_errno_with_message!(
Errno::EINVAL,
"mremap: copying shareable mapping is not supported"
);
}
let old_size = old_size.align_up(PAGE_SIZE);
let new_size = new_size.align_up(PAGE_SIZE);
let user_space = ctx.user_space();
let root_vmar = user_space.root_vmar();
if !flags.contains(MremapFlags::MREMAP_FIXED) && new_size <= old_size {
if new_size < old_size {
// We can shrink a old range which spans multiple mappings. See
// <https://github.com/google/gvisor/blob/95d875276806484f974ce9e95556a561331f8e22/test/syscalls/linux/mremap.cc#L100-L117>.
root_vmar.resize_mapping(old_addr, old_size, new_size, false)?;
}
return Ok(old_addr);
}
if flags.contains(MremapFlags::MREMAP_MAYMOVE) {
if flags.contains(MremapFlags::MREMAP_FIXED) {
root_vmar.remap(old_addr, old_size, Some(new_addr), new_size)
} else {
root_vmar.remap(old_addr, old_size, None, new_size)
}
} else {
if flags.contains(MremapFlags::MREMAP_FIXED) {
return_errno_with_message!(
Errno::EINVAL,
"mremap: `MREMAP_FIXED` specified without also specifying `MREMAP_MAYMOVE`"
);
}
// We can ensure that `new_size > old_size` here. Since we are enlarging
// the old mapping, it is necessary to check whether the old range lies
// in a single mapping.
//
// FIXME: According to <https://man7.org/linux/man-pages/man2/mremap.2.html>,
// if the `MREMAP_MAYMOVE` flag is not set, and the mapping cannot
// be expanded at the current `Vaddr`, we should return an `ENOMEM`.
// However, `resize_mapping` returns a `EACCES` in this case.
root_vmar.resize_mapping(old_addr, old_size, new_size, true)?;
Ok(old_addr)
}
}
bitflags! {
struct MremapFlags: i32 {
const MREMAP_MAYMOVE = 1 << 0;
const MREMAP_FIXED = 1 << 1;
// TODO: Add support for this flag, which exists since Linux 5.7.
// const MREMAP_DONTUNMAP = 1 << 2;
}
}

View File

@ -86,16 +86,48 @@ impl<R> Vmar<R> {
/// The range of the mapping goes from `map_addr..map_addr + old_size` to
/// `map_addr..map_addr + new_size`.
///
/// The range of the original mapping does not have to solely map to a
/// whole [`VmMapping`], but it must ensure that all existing ranges have a
/// mapping. Otherwise, this method will return `Err`.
///
/// If the new mapping size is smaller than the original mapping size, the
/// extra part will be unmapped. If the new mapping is larger than the old
/// mapping and the extra part overlaps with existing mapping, resizing
/// will fail and return `Err`.
pub fn resize_mapping(&self, map_addr: Vaddr, old_size: usize, new_size: usize) -> Result<()> {
self.0.resize_mapping(map_addr, old_size, new_size)
///
/// - When `check_single_mapping` is `true`, this method will check whether
/// the range of the original mapping is covered by a single [`VmMapping`].
/// If not, this method will return an `Err`.
/// - When `check_single_mapping` is `false`, The range of the original
/// mapping does not have to solely map to a whole [`VmMapping`], but it
/// must ensure that all existing ranges have a mapping. Otherwise, this
/// method will return an `Err`.
pub fn resize_mapping(
&self,
map_addr: Vaddr,
old_size: usize,
new_size: usize,
check_single_mapping: bool,
) -> Result<()> {
self.0
.resize_mapping(map_addr, old_size, new_size, check_single_mapping)
}
/// Remaps the original mapping to a new address and/or size.
///
/// If the new mapping size is smaller than the original mapping size, the
/// extra part will be unmapped.
///
/// - If `new_addr` is `Some(new_addr)`, this method attempts to move the
/// mapping from `old_addr..old_addr + old_size` to `new_addr..new_addr +
/// new_size`. If any existing mappings lie within the target range,
/// they will be unmapped before the move.
/// - If `new_addr` is `None`, a new range of size `new_size` will be
/// allocated, and the original mapping will be moved there.
pub fn remap(
&self,
old_addr: Vaddr,
old_size: usize,
new_addr: Option<Vaddr>,
new_size: usize,
) -> Result<Vaddr> {
self.0.remap(old_addr, old_size, new_addr, new_size)
}
}
@ -129,7 +161,7 @@ impl VmarInner {
/// Returns `Ok` if the calling process may expand its mapped
/// memory by the passed size.
fn check_expand_size(&mut self, expand_size: usize) -> Result<()> {
fn check_extra_size_fits_rlimit(&self, expand_size: usize) -> Result<()> {
let Some(process) = Process::current() else {
// When building a `Process`, the kernel task needs to build
// some `VmMapping`s, in which case this branch is reachable.
@ -151,6 +183,23 @@ impl VmarInner {
Ok(())
}
/// Checks whether `addr..addr + size` is covered by a single `VmMapping`,
/// and returns the address of the single `VmMapping` if successful.
fn check_lies_in_single_mapping(&self, addr: Vaddr, size: usize) -> Result<Vaddr> {
if let Some(vm_mapping) = self
.vm_mappings
.find_one(&addr)
.filter(|vm_mapping| vm_mapping.map_end() - addr >= size)
{
Ok(vm_mapping.map_to_addr())
} else {
// FIXME: In Linux, two adjacent mappings created by `mmap` with
// identical properties can be `mremap`ed together. Fix this by
// adding an auto-merge mechanism for adjacent `VmMapping`s.
return_errno_with_message!(Errno::EFAULT, "The range must lie in a single mapping");
}
}
/// Inserts a `VmMapping` into the `Vmar`.
///
/// Make sure the insertion doesn't exceed address space limit.
@ -277,6 +326,61 @@ impl VmarInner {
return_errno_with_message!(Errno::ENOMEM, "Cannot find free region for mapping");
}
/// Splits and unmaps the found mapping if the new size is smaller.
/// Enlarges the last mapping if the new size is larger.
fn resize_mapping(
&mut self,
vm_space: &VmSpace,
map_addr: Vaddr,
old_size: usize,
new_size: usize,
rss_delta: &mut RssDelta,
) -> Result<()> {
debug_assert_eq!(map_addr % PAGE_SIZE, 0);
debug_assert_eq!(old_size % PAGE_SIZE, 0);
debug_assert_eq!(new_size % PAGE_SIZE, 0);
// FIXME: We should check whether all existing ranges in
// `map_addr..map_addr + old_size` have a mapping. If not,
// we should return a `Err`.
if new_size == 0 {
return_errno_with_message!(Errno::EINVAL, "can not resize a mapping to 0 size");
}
if new_size == old_size {
return Ok(());
}
let old_map_end = map_addr + old_size;
let new_map_end = map_addr.checked_add(new_size).ok_or(Errno::EINVAL)?;
if !is_userspace_vaddr(new_map_end) {
return_errno_with_message!(Errno::EINVAL, "resize to a invalid new size");
}
if new_size < old_size {
self.alloc_free_region_exact_truncate(
vm_space,
new_map_end,
old_map_end - new_map_end,
rss_delta,
)?;
return Ok(());
}
self.alloc_free_region_exact(old_map_end, new_map_end - old_map_end)?;
let last_mapping = self.vm_mappings.find_one(&(old_map_end - 1)).unwrap();
let last_mapping_addr = last_mapping.map_to_addr();
debug_assert_eq!(last_mapping.map_end(), old_map_end);
self.check_extra_size_fits_rlimit(new_map_end - old_map_end)?;
let last_mapping = self.remove(&last_mapping_addr).unwrap();
let last_mapping = last_mapping.enlarge(new_map_end - old_map_end);
self.insert(last_mapping);
Ok(())
}
}
pub const ROOT_VMAR_LOWEST_ADDR: Vaddr = 0x001_0000; // 64 KiB is the Linux configurable default
@ -422,43 +526,114 @@ impl Vmar_ {
Ok(())
}
// Split and unmap the found mapping if resize smaller.
// Enlarge the last mapping if resize larger.
fn resize_mapping(&self, map_addr: Vaddr, old_size: usize, new_size: usize) -> Result<()> {
debug_assert!(map_addr % PAGE_SIZE == 0);
debug_assert!(old_size % PAGE_SIZE == 0);
debug_assert!(new_size % PAGE_SIZE == 0);
if new_size == 0 {
return_errno_with_message!(Errno::EINVAL, "can not resize a mapping to 0 size");
}
if new_size == old_size {
return Ok(());
}
let old_map_end = map_addr + old_size;
let new_map_end = map_addr + new_size;
if new_size < old_size {
self.remove_mapping(new_map_end..old_map_end)?;
return Ok(());
}
/// Splits and unmaps the found mapping if the new size is smaller.
/// Enlarges the last mapping if the new size is larger.
fn resize_mapping(
&self,
map_addr: Vaddr,
old_size: usize,
new_size: usize,
check_single_mapping: bool,
) -> Result<()> {
let mut rss_delta = RssDelta::new(self);
let mut inner = self.inner.write();
let last_mapping = inner.vm_mappings.find_one(&(old_map_end - 1)).unwrap();
let last_mapping_addr = last_mapping.map_to_addr();
let extra_mapping_start = last_mapping.map_end();
inner.check_expand_size(new_map_end - extra_mapping_start)?;
let last_mapping = inner.remove(&last_mapping_addr).unwrap();
inner.alloc_free_region_exact(extra_mapping_start, new_map_end - extra_mapping_start)?;
let last_mapping = last_mapping.enlarge(new_map_end - extra_mapping_start);
inner.insert(last_mapping);
if check_single_mapping {
inner.check_lies_in_single_mapping(map_addr, old_size)?;
}
inner.resize_mapping(&self.vm_space, map_addr, old_size, new_size, &mut rss_delta)?;
Ok(())
}
fn remap(
&self,
old_addr: Vaddr,
old_size: usize,
new_addr: Option<Vaddr>,
new_size: usize,
) -> Result<Vaddr> {
let mut inner = self.inner.write();
let old_mapping_addr = inner.check_lies_in_single_mapping(old_addr, old_size)?;
let mut old_range = old_addr..old_addr + old_size;
let mut old_size = old_size;
let mut rss_delta = RssDelta::new(self);
// Allocate a new free region that does not overlap with the old range.
let new_range = if let Some(new_addr) = new_addr {
let new_range = new_addr..new_addr.checked_add(new_size).ok_or(Errno::EINVAL)?;
if new_addr % PAGE_SIZE != 0
|| !is_userspace_vaddr(new_addr)
|| !is_userspace_vaddr(new_range.end)
{
return_errno_with_message!(Errno::EINVAL, "remap: invalid fixed new addr");
}
if is_intersected(&old_range, &new_range) {
return_errno_with_message!(
Errno::EINVAL,
"remap: the new range overlaps with the old one"
);
}
inner.alloc_free_region_exact_truncate(
&self.vm_space,
new_addr,
new_size,
&mut rss_delta,
)?
} else {
inner.alloc_free_region(new_size, PAGE_SIZE)?
};
// Create a new `VmMapping`.
let old_mapping = {
let vm_mapping = inner.remove(&old_mapping_addr).unwrap();
let (left, old_mapping, right) = vm_mapping.split_range(&old_range)?;
if let Some(left) = left {
inner.insert(left);
}
if let Some(right) = right {
inner.insert(right);
}
if new_size < old_size {
let (old_mapping, taken) = old_mapping.split(old_range.start + new_size)?;
rss_delta.add(taken.rss_type(), -(taken.unmap(&self.vm_space)? as isize));
old_size = new_size;
old_range = old_range.start..(old_range.start + old_size);
old_mapping
} else {
old_mapping
}
};
// Now we can ensure that `new_size >= old_size`.
let new_mapping = old_mapping.clone_for_remap_at(new_range.start)?;
inner.insert(new_mapping.enlarge(new_size - old_size));
// Move the mapping.
let preempt_guard = disable_preempt();
let total_range = old_range.start.min(new_range.start)..old_range.end.max(new_range.end);
let vmspace = self.vm_space();
let mut cursor = vmspace.cursor_mut(&preempt_guard, &total_range).unwrap();
let mut current_offset = 0;
cursor.jump(old_range.start).unwrap();
while let Some(mapped_va) = cursor.find_next(old_size - current_offset) {
let (va, Some((frame, prop))) = cursor.query().unwrap() else {
panic!("Found mapped page but query failed");
};
debug_assert_eq!(mapped_va, va.start);
cursor.unmap(PAGE_SIZE);
let offset = mapped_va - old_range.start;
cursor.jump(new_range.start + offset).unwrap();
cursor.map(frame, prop);
current_offset = offset + PAGE_SIZE;
cursor.jump(old_range.start + current_offset).unwrap();
}
cursor.flusher().dispatch_tlb_flush();
cursor.flusher().sync_tlb_flush();
Ok(new_range.start)
}
/// Returns the attached `VmSpace`.
fn vm_space(&self) -> &Arc<VmSpace> {
&self.vm_space
@ -783,7 +958,7 @@ where
let mut inner = parent.0.inner.write();
inner.check_expand_size(map_size).or_else(|e| {
inner.check_extra_size_fits_rlimit(map_size).or_else(|e| {
if can_overwrite {
let offset = offset.ok_or(Error::with_message(
Errno::EINVAL,
@ -791,7 +966,7 @@ where
))?;
// MAP_FIXED may remove pages overlapped with requested mapping.
let expand_size = map_size - inner.count_overlap_size(offset..offset + map_size);
inner.check_expand_size(expand_size)
inner.check_extra_size_fits_rlimit(expand_size)
} else {
Err(e)
}

View File

@ -113,6 +113,12 @@ impl VmMapping {
})
}
pub(super) fn clone_for_remap_at(&self, va: Vaddr) -> Result<VmMapping> {
let mut vm_mapping = self.new_fork()?;
vm_mapping.map_to_addr = va;
Ok(vm_mapping)
}
/// Returns the mapping's start address.
pub fn map_to_addr(&self) -> Vaddr {
self.map_to_addr
@ -388,7 +394,7 @@ impl VmMapping {
///
/// The address must be within the mapping and page-aligned. The address
/// must not be either the start or the end of the mapping.
fn split(self, at: Vaddr) -> Result<(Self, Self)> {
pub fn split(self, at: Vaddr) -> Result<(Self, Self)> {
debug_assert!(self.map_to_addr < at && at < self.map_end());
debug_assert!(at % PAGE_SIZE == 0);

View File

@ -0,0 +1,80 @@
// SPDX-License-Identifier: MPL-2.0
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include "../network/test.h"
#define PAGE_SIZE 4096
const char *content = "kjfkljk*wigo&h";
void *x_mmap(void *addr, size_t length, int prot, int flags, int fd,
off_t offset)
{
void *result = mmap(addr, length, prot, flags, fd, offset);
if (result == MAP_FAILED) {
perror("mmap");
exit(EXIT_FAILURE);
}
return result;
}
FN_TEST(mmap_and_mremap)
{
char *addr = x_mmap(NULL, 3 * PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
TEST_SUCC(munmap(addr, 3 * PAGE_SIZE));
addr = x_mmap(addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
strcpy(addr, content);
char *addr2 = x_mmap(addr + 2 * PAGE_SIZE, PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
char *new_addr = mremap(addr, PAGE_SIZE, 3 * PAGE_SIZE, MREMAP_MAYMOVE);
if (new_addr == MAP_FAILED) {
perror("mremap");
exit(EXIT_FAILURE);
}
// The following operation (if uncommented) would cause a segmentation fault.
// strcpy(addr, "Writing to old address");
TEST_RES(strcmp(new_addr, content), _ret == 0);
strcpy(new_addr + PAGE_SIZE, "Writing to page 2 (new)");
strcpy(new_addr + 2 * PAGE_SIZE, "Writing to page 3 (new)");
TEST_SUCC(munmap(new_addr, 3 * PAGE_SIZE));
TEST_SUCC(munmap(addr2, PAGE_SIZE));
}
END_TEST()
FN_TEST(mmap_and_mremap_fixed)
{
char *addr = x_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
strcpy(addr, content);
// Map and unmap a target region to ensure we know it's free
char *fixed_addr = x_mmap(NULL, PAGE_SIZE, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
TEST_SUCC(munmap(fixed_addr, PAGE_SIZE)); // free it for mremap
char *new_addr = mremap(addr, PAGE_SIZE, PAGE_SIZE,
MREMAP_MAYMOVE | MREMAP_FIXED, fixed_addr);
if (new_addr != fixed_addr) {
perror("mremap");
exit(EXIT_FAILURE);
}
TEST_RES(strcmp(new_addr, content), _ret == 0);
TEST_SUCC(munmap(new_addr, PAGE_SIZE));
}
END_TEST()

View File

@ -28,6 +28,7 @@ hello_world/hello_world
itimer/setitimer
itimer/timer_create
mmap/mmap_and_fork
mmap/mmap_and_mremap
mmap/mmap_shared_filebacked
mmap/mmap_readahead
mmap/mmap_vmrss

View File

@ -25,6 +25,7 @@ TESTS ?= \
mknod_test \
mmap_test \
mount_test \
mremap_test \
msync_test \
open_create_test \
open_test \

View File

@ -0,0 +1,6 @@
MremapDeathTest.SharedAnon
MremapTest.InPlace_Copy
MremapTest.MayMove_Copy
MremapTest.MustMove_Copy
PrivateShared/MremapParamTest.InPlace_ExpansionFailure/*
PrivateShared/MremapParamTest.Fixed_ShrinkingAcrossVMAs/*

View File

@ -883,11 +883,11 @@ mprotect05
# mq_timedsend01
# mq_unlink01
# mremap01
# mremap02
# mremap03
mremap01
mremap02
mremap03
# mremap04
# mremap05
mremap05
# mremap06
# mseal01