Add the fast_copy and fast_copy_nonoverlapping in frame

This commit is contained in:
LI Qing
2024-05-15 15:56:10 +08:00
committed by Tate, Hongliang Tian
parent 2002db5481
commit 657d6719c7
4 changed files with 182 additions and 5 deletions

View File

@ -1,8 +1,11 @@
// SPDX-License-Identifier: MPL-2.0
mod util;
use alloc::fmt;
use pod::Pod;
pub use util::{fast_copy, fast_copy_nonoverlapping};
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
use crate::vm::{

View File

@ -0,0 +1,174 @@
// SPDX-License-Identifier: MPL-2.0
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
/// The source and destination may overlap.
///
/// If the source and destination will never overlap, `fast_copy_nonoverlapping` can be used instead.
///
/// # Performance
///
/// This function is provided as a fast alternative to `core::ptr::copy` by
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
/// These instructions can result in more efficient data transfers by moving larger blocks
/// of memory in a single operation, leading to fewer CPU cycles and better performance
/// in certain scenarios.
///
/// # Safety
///
/// The safety requirements of this function are consistent with `core::ptr::copy`.
#[inline]
pub unsafe fn fast_copy<T>(src: *const T, dst: *mut T, count: usize) {
if src == dst || count == 0 {
return;
}
if src < dst && src.add(count) > dst {
// Overlap and src is before dst
backward_copy(src, dst, count);
} else {
// No overlap, or src is after dst
forward_copy(src, dst, count);
}
}
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
/// The source and destination must not overlap.
///
/// For regions of memory which might overlap, use `fast_copy` instead.
///
/// # Performance
///
/// This function is provided as a fast alternative to `core::ptr::copy_nonoverlapping` by
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
/// These instructions can result in more efficient data transfers by moving larger blocks
/// of memory in a single operation, leading to fewer CPU cycles and better performance
/// in certain scenarios.
///
/// # Safety
///
/// The safety requirements of this function are consistent with `core::ptr::copy_nonoverlapping`.
#[inline]
pub unsafe fn fast_copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize) {
if count == 0 {
return;
}
forward_copy(src, dst, count);
}
/// # Safety
///
/// The `src` and `dst` must point to valid memory regions.
/// If the memory regions of `src` and `dst` overlap, `src` must be higher than `dst`.
#[inline]
unsafe fn forward_copy<T>(src: *const T, dst: *mut T, count: usize) {
let bytes_count = count * core::mem::size_of::<T>();
// The direction of string copy instructions such as `rep movsb` is controlled by DF flag.
// If `DF = 0`, then data copy is repeated from lower addresses to higher ones;
// Otherwise, the data copy will be done in the reversed direction.
// The System V ABI manual requires `DF = 0` on function entry
// and all code before the `rep movsb` instruction in this function do not change DF flag.
// Thus, we can safely assume `DF = 0`, which is exactly what we want.
if bytes_count % 8 == 0 {
// In most cases, `movsq` is faster than `movsb`
// because it transfers larger chunks of data in a single operation.
core::arch::asm!(
"rep movsq",
in("rcx") bytes_count / 8,
in("rsi") src,
in("rdi") dst,
lateout("rcx") _,
lateout("rsi") _,
lateout("rdi") _
);
} else {
core::arch::asm!(
"rep movsb",
in("rcx") bytes_count,
in("rsi") src,
in("rdi") dst,
lateout("rcx") _,
lateout("rsi") _,
lateout("rdi") _
);
}
}
/// # Safety
///
/// The `src` and `dst` must point to valid memory regions.
/// If the memory regions of `src` and `dst` overlap, `src` must be lower than `dst`.
#[inline]
unsafe fn backward_copy<T>(src: *const T, dst: *mut T, count: usize) {
let bytes_count = count * core::mem::size_of::<T>();
let last_src = (src as *const u8).add(bytes_count).offset(-1);
let last_dst = (dst as *mut u8).add(bytes_count).offset(-1);
core::arch::asm!(
"std", // Set the direction flag (DF)
"rep movsb",
in("rcx") bytes_count,
in("rsi") last_src,
in("rdi") last_dst,
lateout("rcx") _,
lateout("rsi") _,
lateout("rdi") _
);
// System V ABI for AMD64 requires direction flag (DF) to be clear on function exit
core::arch::asm!("cld");
}
#[cfg(ktest)]
mod test {
use alloc::vec;
use super::*;
#[ktest]
fn test_fast_copy_nonoverlapping() {
let src = vec![0u8; 8];
let mut dst = vec![1u8; 8];
unsafe {
fast_copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), 8);
}
assert_eq!(src, dst);
}
#[ktest]
fn test_fast_copy_src_after_dst() {
let mut src = vec![0u8; 8];
src.extend(vec![1u8; 8]);
unsafe {
fast_copy(src.as_ptr().add(4), src.as_mut_ptr(), 8);
}
let expected_left = {
let mut vec = vec![0u8; 4];
vec.extend(vec![1u8; 4]);
vec
};
assert_eq!(expected_left, src[0..8]);
}
#[ktest]
fn test_fast_copy_src_before_dst() {
let mut src = vec![0u8; 8];
src.extend(vec![1u8; 8]);
unsafe {
fast_copy(src.as_ptr().add(4), src.as_mut_ptr().add(8), 8);
}
let expected_right = {
let mut vec = vec![0u8; 4];
vec.extend(vec![1u8; 4]);
vec
};
assert_eq!(expected_right, src[8..]);
}
}

View File

@ -19,7 +19,7 @@ impl VmIo for IoMem {
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
self.check_range(offset, buf.len())?;
unsafe {
core::ptr::copy(
crate::arch::mm::fast_copy(
(self.virtual_address + offset) as *const u8,
buf.as_mut_ptr(),
buf.len(),
@ -31,7 +31,7 @@ impl VmIo for IoMem {
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
self.check_range(offset, buf.len())?;
unsafe {
core::ptr::copy(
crate::arch::mm::fast_copy(
buf.as_ptr(),
(self.virtual_address + offset) as *mut u8,
buf.len(),

View File

@ -249,7 +249,7 @@ impl VmFrame {
// Safety: src and dst is not overlapped.
unsafe {
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
crate::arch::mm::fast_copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
}
}
}
@ -589,7 +589,7 @@ impl<'a> VmReader<'a> {
// Safety: the memory range is valid since `copy_len` is the minimum
// of the reader's remaining data and the writer's available space.
unsafe {
core::ptr::copy(self.cursor, writer.cursor, copy_len);
crate::arch::mm::fast_copy(self.cursor, writer.cursor, copy_len);
self.cursor = self.cursor.add(copy_len);
writer.cursor = writer.cursor.add(copy_len);
}
@ -714,7 +714,7 @@ impl<'a> VmWriter<'a> {
// Safety: the memory range is valid since `copy_len` is the minimum
// of the reader's remaining data and the writer's available space.
unsafe {
core::ptr::copy(reader.cursor, self.cursor, copy_len);
crate::arch::mm::fast_copy(reader.cursor, self.cursor, copy_len);
self.cursor = self.cursor.add(copy_len);
reader.cursor = reader.cursor.add(copy_len);
}