mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-18 12:06:43 +00:00
Add the fast_copy
and fast_copy_nonoverlapping
in frame
This commit is contained in:
committed by
Tate, Hongliang Tian
parent
2002db5481
commit
657d6719c7
@ -1,8 +1,11 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
mod util;
|
||||
|
||||
use alloc::fmt;
|
||||
|
||||
use pod::Pod;
|
||||
pub use util::{fast_copy, fast_copy_nonoverlapping};
|
||||
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
|
||||
|
||||
use crate::vm::{
|
||||
|
174
framework/aster-frame/src/arch/x86/mm/util.rs
Normal file
174
framework/aster-frame/src/arch/x86/mm/util.rs
Normal file
@ -0,0 +1,174 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
||||
/// The source and destination may overlap.
|
||||
///
|
||||
/// If the source and destination will never overlap, `fast_copy_nonoverlapping` can be used instead.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// This function is provided as a fast alternative to `core::ptr::copy` by
|
||||
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
||||
/// These instructions can result in more efficient data transfers by moving larger blocks
|
||||
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
||||
/// in certain scenarios.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The safety requirements of this function are consistent with `core::ptr::copy`.
|
||||
#[inline]
|
||||
pub unsafe fn fast_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||
if src == dst || count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
if src < dst && src.add(count) > dst {
|
||||
// Overlap and src is before dst
|
||||
backward_copy(src, dst, count);
|
||||
} else {
|
||||
// No overlap, or src is after dst
|
||||
forward_copy(src, dst, count);
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
||||
/// The source and destination must not overlap.
|
||||
///
|
||||
/// For regions of memory which might overlap, use `fast_copy` instead.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// This function is provided as a fast alternative to `core::ptr::copy_nonoverlapping` by
|
||||
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
||||
/// These instructions can result in more efficient data transfers by moving larger blocks
|
||||
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
||||
/// in certain scenarios.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The safety requirements of this function are consistent with `core::ptr::copy_nonoverlapping`.
|
||||
#[inline]
|
||||
pub unsafe fn fast_copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize) {
|
||||
if count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
forward_copy(src, dst, count);
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The `src` and `dst` must point to valid memory regions.
|
||||
/// If the memory regions of `src` and `dst` overlap, `src` must be higher than `dst`.
|
||||
#[inline]
|
||||
unsafe fn forward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||
let bytes_count = count * core::mem::size_of::<T>();
|
||||
|
||||
// The direction of string copy instructions such as `rep movsb` is controlled by DF flag.
|
||||
// If `DF = 0`, then data copy is repeated from lower addresses to higher ones;
|
||||
// Otherwise, the data copy will be done in the reversed direction.
|
||||
// The System V ABI manual requires `DF = 0` on function entry
|
||||
// and all code before the `rep movsb` instruction in this function do not change DF flag.
|
||||
// Thus, we can safely assume `DF = 0`, which is exactly what we want.
|
||||
if bytes_count % 8 == 0 {
|
||||
// In most cases, `movsq` is faster than `movsb`
|
||||
// because it transfers larger chunks of data in a single operation.
|
||||
core::arch::asm!(
|
||||
"rep movsq",
|
||||
in("rcx") bytes_count / 8,
|
||||
in("rsi") src,
|
||||
in("rdi") dst,
|
||||
lateout("rcx") _,
|
||||
lateout("rsi") _,
|
||||
lateout("rdi") _
|
||||
);
|
||||
} else {
|
||||
core::arch::asm!(
|
||||
"rep movsb",
|
||||
in("rcx") bytes_count,
|
||||
in("rsi") src,
|
||||
in("rdi") dst,
|
||||
lateout("rcx") _,
|
||||
lateout("rsi") _,
|
||||
lateout("rdi") _
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The `src` and `dst` must point to valid memory regions.
|
||||
/// If the memory regions of `src` and `dst` overlap, `src` must be lower than `dst`.
|
||||
#[inline]
|
||||
unsafe fn backward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||
let bytes_count = count * core::mem::size_of::<T>();
|
||||
let last_src = (src as *const u8).add(bytes_count).offset(-1);
|
||||
let last_dst = (dst as *mut u8).add(bytes_count).offset(-1);
|
||||
|
||||
core::arch::asm!(
|
||||
"std", // Set the direction flag (DF)
|
||||
"rep movsb",
|
||||
in("rcx") bytes_count,
|
||||
in("rsi") last_src,
|
||||
in("rdi") last_dst,
|
||||
lateout("rcx") _,
|
||||
lateout("rsi") _,
|
||||
lateout("rdi") _
|
||||
);
|
||||
|
||||
// System V ABI for AMD64 requires direction flag (DF) to be clear on function exit
|
||||
core::arch::asm!("cld");
|
||||
}
|
||||
|
||||
#[cfg(ktest)]
|
||||
mod test {
|
||||
use alloc::vec;
|
||||
|
||||
use super::*;
|
||||
#[ktest]
|
||||
fn test_fast_copy_nonoverlapping() {
|
||||
let src = vec![0u8; 8];
|
||||
let mut dst = vec![1u8; 8];
|
||||
|
||||
unsafe {
|
||||
fast_copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), 8);
|
||||
}
|
||||
assert_eq!(src, dst);
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
fn test_fast_copy_src_after_dst() {
|
||||
let mut src = vec![0u8; 8];
|
||||
src.extend(vec![1u8; 8]);
|
||||
|
||||
unsafe {
|
||||
fast_copy(src.as_ptr().add(4), src.as_mut_ptr(), 8);
|
||||
}
|
||||
|
||||
let expected_left = {
|
||||
let mut vec = vec![0u8; 4];
|
||||
vec.extend(vec![1u8; 4]);
|
||||
vec
|
||||
};
|
||||
|
||||
assert_eq!(expected_left, src[0..8]);
|
||||
}
|
||||
|
||||
#[ktest]
|
||||
fn test_fast_copy_src_before_dst() {
|
||||
let mut src = vec![0u8; 8];
|
||||
src.extend(vec![1u8; 8]);
|
||||
|
||||
unsafe {
|
||||
fast_copy(src.as_ptr().add(4), src.as_mut_ptr().add(8), 8);
|
||||
}
|
||||
|
||||
let expected_right = {
|
||||
let mut vec = vec![0u8; 4];
|
||||
vec.extend(vec![1u8; 4]);
|
||||
vec
|
||||
};
|
||||
|
||||
assert_eq!(expected_right, src[8..]);
|
||||
}
|
||||
}
|
@ -19,7 +19,7 @@ impl VmIo for IoMem {
|
||||
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
|
||||
self.check_range(offset, buf.len())?;
|
||||
unsafe {
|
||||
core::ptr::copy(
|
||||
crate::arch::mm::fast_copy(
|
||||
(self.virtual_address + offset) as *const u8,
|
||||
buf.as_mut_ptr(),
|
||||
buf.len(),
|
||||
@ -31,7 +31,7 @@ impl VmIo for IoMem {
|
||||
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
|
||||
self.check_range(offset, buf.len())?;
|
||||
unsafe {
|
||||
core::ptr::copy(
|
||||
crate::arch::mm::fast_copy(
|
||||
buf.as_ptr(),
|
||||
(self.virtual_address + offset) as *mut u8,
|
||||
buf.len(),
|
||||
|
@ -249,7 +249,7 @@ impl VmFrame {
|
||||
|
||||
// Safety: src and dst is not overlapped.
|
||||
unsafe {
|
||||
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
||||
crate::arch::mm::fast_copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -589,7 +589,7 @@ impl<'a> VmReader<'a> {
|
||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||
// of the reader's remaining data and the writer's available space.
|
||||
unsafe {
|
||||
core::ptr::copy(self.cursor, writer.cursor, copy_len);
|
||||
crate::arch::mm::fast_copy(self.cursor, writer.cursor, copy_len);
|
||||
self.cursor = self.cursor.add(copy_len);
|
||||
writer.cursor = writer.cursor.add(copy_len);
|
||||
}
|
||||
@ -714,7 +714,7 @@ impl<'a> VmWriter<'a> {
|
||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||
// of the reader's remaining data and the writer's available space.
|
||||
unsafe {
|
||||
core::ptr::copy(reader.cursor, self.cursor, copy_len);
|
||||
crate::arch::mm::fast_copy(reader.cursor, self.cursor, copy_len);
|
||||
self.cursor = self.cursor.add(copy_len);
|
||||
reader.cursor = reader.cursor.add(copy_len);
|
||||
}
|
||||
|
Reference in New Issue
Block a user