mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-19 12:36:46 +00:00
Add the fast_copy
and fast_copy_nonoverlapping
in frame
This commit is contained in:
committed by
Tate, Hongliang Tian
parent
2002db5481
commit
657d6719c7
@ -1,8 +1,11 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
mod util;
|
||||||
|
|
||||||
use alloc::fmt;
|
use alloc::fmt;
|
||||||
|
|
||||||
use pod::Pod;
|
use pod::Pod;
|
||||||
|
pub use util::{fast_copy, fast_copy_nonoverlapping};
|
||||||
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
|
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
|
||||||
|
|
||||||
use crate::vm::{
|
use crate::vm::{
|
||||||
|
174
framework/aster-frame/src/arch/x86/mm/util.rs
Normal file
174
framework/aster-frame/src/arch/x86/mm/util.rs
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
||||||
|
/// The source and destination may overlap.
|
||||||
|
///
|
||||||
|
/// If the source and destination will never overlap, `fast_copy_nonoverlapping` can be used instead.
|
||||||
|
///
|
||||||
|
/// # Performance
|
||||||
|
///
|
||||||
|
/// This function is provided as a fast alternative to `core::ptr::copy` by
|
||||||
|
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
||||||
|
/// These instructions can result in more efficient data transfers by moving larger blocks
|
||||||
|
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
||||||
|
/// in certain scenarios.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The safety requirements of this function are consistent with `core::ptr::copy`.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn fast_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||||
|
if src == dst || count == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if src < dst && src.add(count) > dst {
|
||||||
|
// Overlap and src is before dst
|
||||||
|
backward_copy(src, dst, count);
|
||||||
|
} else {
|
||||||
|
// No overlap, or src is after dst
|
||||||
|
forward_copy(src, dst, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
||||||
|
/// The source and destination must not overlap.
|
||||||
|
///
|
||||||
|
/// For regions of memory which might overlap, use `fast_copy` instead.
|
||||||
|
///
|
||||||
|
/// # Performance
|
||||||
|
///
|
||||||
|
/// This function is provided as a fast alternative to `core::ptr::copy_nonoverlapping` by
|
||||||
|
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
||||||
|
/// These instructions can result in more efficient data transfers by moving larger blocks
|
||||||
|
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
||||||
|
/// in certain scenarios.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The safety requirements of this function are consistent with `core::ptr::copy_nonoverlapping`.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn fast_copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize) {
|
||||||
|
if count == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
forward_copy(src, dst, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The `src` and `dst` must point to valid memory regions.
|
||||||
|
/// If the memory regions of `src` and `dst` overlap, `src` must be higher than `dst`.
|
||||||
|
#[inline]
|
||||||
|
unsafe fn forward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||||
|
let bytes_count = count * core::mem::size_of::<T>();
|
||||||
|
|
||||||
|
// The direction of string copy instructions such as `rep movsb` is controlled by DF flag.
|
||||||
|
// If `DF = 0`, then data copy is repeated from lower addresses to higher ones;
|
||||||
|
// Otherwise, the data copy will be done in the reversed direction.
|
||||||
|
// The System V ABI manual requires `DF = 0` on function entry
|
||||||
|
// and all code before the `rep movsb` instruction in this function do not change DF flag.
|
||||||
|
// Thus, we can safely assume `DF = 0`, which is exactly what we want.
|
||||||
|
if bytes_count % 8 == 0 {
|
||||||
|
// In most cases, `movsq` is faster than `movsb`
|
||||||
|
// because it transfers larger chunks of data in a single operation.
|
||||||
|
core::arch::asm!(
|
||||||
|
"rep movsq",
|
||||||
|
in("rcx") bytes_count / 8,
|
||||||
|
in("rsi") src,
|
||||||
|
in("rdi") dst,
|
||||||
|
lateout("rcx") _,
|
||||||
|
lateout("rsi") _,
|
||||||
|
lateout("rdi") _
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
core::arch::asm!(
|
||||||
|
"rep movsb",
|
||||||
|
in("rcx") bytes_count,
|
||||||
|
in("rsi") src,
|
||||||
|
in("rdi") dst,
|
||||||
|
lateout("rcx") _,
|
||||||
|
lateout("rsi") _,
|
||||||
|
lateout("rdi") _
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The `src` and `dst` must point to valid memory regions.
|
||||||
|
/// If the memory regions of `src` and `dst` overlap, `src` must be lower than `dst`.
|
||||||
|
#[inline]
|
||||||
|
unsafe fn backward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
||||||
|
let bytes_count = count * core::mem::size_of::<T>();
|
||||||
|
let last_src = (src as *const u8).add(bytes_count).offset(-1);
|
||||||
|
let last_dst = (dst as *mut u8).add(bytes_count).offset(-1);
|
||||||
|
|
||||||
|
core::arch::asm!(
|
||||||
|
"std", // Set the direction flag (DF)
|
||||||
|
"rep movsb",
|
||||||
|
in("rcx") bytes_count,
|
||||||
|
in("rsi") last_src,
|
||||||
|
in("rdi") last_dst,
|
||||||
|
lateout("rcx") _,
|
||||||
|
lateout("rsi") _,
|
||||||
|
lateout("rdi") _
|
||||||
|
);
|
||||||
|
|
||||||
|
// System V ABI for AMD64 requires direction flag (DF) to be clear on function exit
|
||||||
|
core::arch::asm!("cld");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(ktest)]
|
||||||
|
mod test {
|
||||||
|
use alloc::vec;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
#[ktest]
|
||||||
|
fn test_fast_copy_nonoverlapping() {
|
||||||
|
let src = vec![0u8; 8];
|
||||||
|
let mut dst = vec![1u8; 8];
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
fast_copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), 8);
|
||||||
|
}
|
||||||
|
assert_eq!(src, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[ktest]
|
||||||
|
fn test_fast_copy_src_after_dst() {
|
||||||
|
let mut src = vec![0u8; 8];
|
||||||
|
src.extend(vec![1u8; 8]);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
fast_copy(src.as_ptr().add(4), src.as_mut_ptr(), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
let expected_left = {
|
||||||
|
let mut vec = vec![0u8; 4];
|
||||||
|
vec.extend(vec![1u8; 4]);
|
||||||
|
vec
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(expected_left, src[0..8]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[ktest]
|
||||||
|
fn test_fast_copy_src_before_dst() {
|
||||||
|
let mut src = vec![0u8; 8];
|
||||||
|
src.extend(vec![1u8; 8]);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
fast_copy(src.as_ptr().add(4), src.as_mut_ptr().add(8), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
let expected_right = {
|
||||||
|
let mut vec = vec![0u8; 4];
|
||||||
|
vec.extend(vec![1u8; 4]);
|
||||||
|
vec
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(expected_right, src[8..]);
|
||||||
|
}
|
||||||
|
}
|
@ -19,7 +19,7 @@ impl VmIo for IoMem {
|
|||||||
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
|
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
|
||||||
self.check_range(offset, buf.len())?;
|
self.check_range(offset, buf.len())?;
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::copy(
|
crate::arch::mm::fast_copy(
|
||||||
(self.virtual_address + offset) as *const u8,
|
(self.virtual_address + offset) as *const u8,
|
||||||
buf.as_mut_ptr(),
|
buf.as_mut_ptr(),
|
||||||
buf.len(),
|
buf.len(),
|
||||||
@ -31,7 +31,7 @@ impl VmIo for IoMem {
|
|||||||
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
|
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
|
||||||
self.check_range(offset, buf.len())?;
|
self.check_range(offset, buf.len())?;
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::copy(
|
crate::arch::mm::fast_copy(
|
||||||
buf.as_ptr(),
|
buf.as_ptr(),
|
||||||
(self.virtual_address + offset) as *mut u8,
|
(self.virtual_address + offset) as *mut u8,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
|
@ -249,7 +249,7 @@ impl VmFrame {
|
|||||||
|
|
||||||
// Safety: src and dst is not overlapped.
|
// Safety: src and dst is not overlapped.
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
crate::arch::mm::fast_copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -589,7 +589,7 @@ impl<'a> VmReader<'a> {
|
|||||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||||
// of the reader's remaining data and the writer's available space.
|
// of the reader's remaining data and the writer's available space.
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::copy(self.cursor, writer.cursor, copy_len);
|
crate::arch::mm::fast_copy(self.cursor, writer.cursor, copy_len);
|
||||||
self.cursor = self.cursor.add(copy_len);
|
self.cursor = self.cursor.add(copy_len);
|
||||||
writer.cursor = writer.cursor.add(copy_len);
|
writer.cursor = writer.cursor.add(copy_len);
|
||||||
}
|
}
|
||||||
@ -714,7 +714,7 @@ impl<'a> VmWriter<'a> {
|
|||||||
// Safety: the memory range is valid since `copy_len` is the minimum
|
// Safety: the memory range is valid since `copy_len` is the minimum
|
||||||
// of the reader's remaining data and the writer's available space.
|
// of the reader's remaining data and the writer's available space.
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::copy(reader.cursor, self.cursor, copy_len);
|
crate::arch::mm::fast_copy(reader.cursor, self.cursor, copy_len);
|
||||||
self.cursor = self.cursor.add(copy_len);
|
self.cursor = self.cursor.add(copy_len);
|
||||||
reader.cursor = reader.cursor.add(copy_len);
|
reader.cursor = reader.cursor.add(copy_len);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user