mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-25 02:13:24 +00:00
Revert fast_copy
and fast_copy_nonoverlapping
in the framework
This reverts commit 657d6719c7
.
This commit is contained in:
committed by
Tate, Hongliang Tian
parent
2636bb0838
commit
56a4909211
@ -1,12 +1,9 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
mod util;
|
|
||||||
|
|
||||||
use alloc::fmt;
|
use alloc::fmt;
|
||||||
use core::ops::Range;
|
use core::ops::Range;
|
||||||
|
|
||||||
use pod::Pod;
|
use pod::Pod;
|
||||||
pub use util::{fast_copy, fast_copy_nonoverlapping};
|
|
||||||
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
|
use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr};
|
||||||
|
|
||||||
use crate::vm::{
|
use crate::vm::{
|
||||||
|
@ -1,174 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
|
||||||
|
|
||||||
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
|
||||||
/// The source and destination may overlap.
|
|
||||||
///
|
|
||||||
/// If the source and destination will never overlap, `fast_copy_nonoverlapping` can be used instead.
|
|
||||||
///
|
|
||||||
/// # Performance
|
|
||||||
///
|
|
||||||
/// This function is provided as a fast alternative to `core::ptr::copy` by
|
|
||||||
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
|
||||||
/// These instructions can result in more efficient data transfers by moving larger blocks
|
|
||||||
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
|
||||||
/// in certain scenarios.
|
|
||||||
///
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// The safety requirements of this function are consistent with `core::ptr::copy`.
|
|
||||||
#[inline]
|
|
||||||
pub unsafe fn fast_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
|
||||||
if src == dst || count == 0 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if src < dst && src.add(count) > dst {
|
|
||||||
// Overlap and src is before dst
|
|
||||||
backward_copy(src, dst, count);
|
|
||||||
} else {
|
|
||||||
// No overlap, or src is after dst
|
|
||||||
forward_copy(src, dst, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Copies `count * size_of::<T>()` bytes from `src` to `dst`.
|
|
||||||
/// The source and destination must not overlap.
|
|
||||||
///
|
|
||||||
/// For regions of memory which might overlap, use `fast_copy` instead.
|
|
||||||
///
|
|
||||||
/// # Performance
|
|
||||||
///
|
|
||||||
/// This function is provided as a fast alternative to `core::ptr::copy_nonoverlapping` by
|
|
||||||
/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying.
|
|
||||||
/// These instructions can result in more efficient data transfers by moving larger blocks
|
|
||||||
/// of memory in a single operation, leading to fewer CPU cycles and better performance
|
|
||||||
/// in certain scenarios.
|
|
||||||
///
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// The safety requirements of this function are consistent with `core::ptr::copy_nonoverlapping`.
|
|
||||||
#[inline]
|
|
||||||
pub unsafe fn fast_copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize) {
|
|
||||||
if count == 0 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
forward_copy(src, dst, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// The `src` and `dst` must point to valid memory regions.
|
|
||||||
/// If the memory regions of `src` and `dst` overlap, `src` must be higher than `dst`.
|
|
||||||
#[inline]
|
|
||||||
unsafe fn forward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
|
||||||
let bytes_count = count * core::mem::size_of::<T>();
|
|
||||||
|
|
||||||
// The direction of string copy instructions such as `rep movsb` is controlled by DF flag.
|
|
||||||
// If `DF = 0`, then data copy is repeated from lower addresses to higher ones;
|
|
||||||
// Otherwise, the data copy will be done in the reversed direction.
|
|
||||||
// The System V ABI manual requires `DF = 0` on function entry
|
|
||||||
// and all code before the `rep movsb` instruction in this function do not change DF flag.
|
|
||||||
// Thus, we can safely assume `DF = 0`, which is exactly what we want.
|
|
||||||
if bytes_count % 8 == 0 {
|
|
||||||
// In most cases, `movsq` is faster than `movsb`
|
|
||||||
// because it transfers larger chunks of data in a single operation.
|
|
||||||
core::arch::asm!(
|
|
||||||
"rep movsq",
|
|
||||||
in("rcx") bytes_count / 8,
|
|
||||||
in("rsi") src,
|
|
||||||
in("rdi") dst,
|
|
||||||
lateout("rcx") _,
|
|
||||||
lateout("rsi") _,
|
|
||||||
lateout("rdi") _
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
core::arch::asm!(
|
|
||||||
"rep movsb",
|
|
||||||
in("rcx") bytes_count,
|
|
||||||
in("rsi") src,
|
|
||||||
in("rdi") dst,
|
|
||||||
lateout("rcx") _,
|
|
||||||
lateout("rsi") _,
|
|
||||||
lateout("rdi") _
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// The `src` and `dst` must point to valid memory regions.
|
|
||||||
/// If the memory regions of `src` and `dst` overlap, `src` must be lower than `dst`.
|
|
||||||
#[inline]
|
|
||||||
unsafe fn backward_copy<T>(src: *const T, dst: *mut T, count: usize) {
|
|
||||||
let bytes_count = count * core::mem::size_of::<T>();
|
|
||||||
let last_src = (src as *const u8).add(bytes_count).offset(-1);
|
|
||||||
let last_dst = (dst as *mut u8).add(bytes_count).offset(-1);
|
|
||||||
|
|
||||||
core::arch::asm!(
|
|
||||||
"std", // Set the direction flag (DF)
|
|
||||||
"rep movsb",
|
|
||||||
in("rcx") bytes_count,
|
|
||||||
in("rsi") last_src,
|
|
||||||
in("rdi") last_dst,
|
|
||||||
lateout("rcx") _,
|
|
||||||
lateout("rsi") _,
|
|
||||||
lateout("rdi") _
|
|
||||||
);
|
|
||||||
|
|
||||||
// System V ABI for AMD64 requires direction flag (DF) to be clear on function exit
|
|
||||||
core::arch::asm!("cld");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(ktest)]
|
|
||||||
mod test {
|
|
||||||
use alloc::vec;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
#[ktest]
|
|
||||||
fn test_fast_copy_nonoverlapping() {
|
|
||||||
let src = vec![0u8; 8];
|
|
||||||
let mut dst = vec![1u8; 8];
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
fast_copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), 8);
|
|
||||||
}
|
|
||||||
assert_eq!(src, dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[ktest]
|
|
||||||
fn test_fast_copy_src_after_dst() {
|
|
||||||
let mut src = vec![0u8; 8];
|
|
||||||
src.extend(vec![1u8; 8]);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
fast_copy(src.as_ptr().add(4), src.as_mut_ptr(), 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
let expected_left = {
|
|
||||||
let mut vec = vec![0u8; 4];
|
|
||||||
vec.extend(vec![1u8; 4]);
|
|
||||||
vec
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(expected_left, src[0..8]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[ktest]
|
|
||||||
fn test_fast_copy_src_before_dst() {
|
|
||||||
let mut src = vec![0u8; 8];
|
|
||||||
src.extend(vec![1u8; 8]);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
fast_copy(src.as_ptr().add(4), src.as_mut_ptr().add(8), 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
let expected_right = {
|
|
||||||
let mut vec = vec![0u8; 4];
|
|
||||||
vec.extend(vec![1u8; 4]);
|
|
||||||
vec
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(expected_right, src[8..]);
|
|
||||||
}
|
|
||||||
}
|
|
@ -19,7 +19,7 @@ impl VmIo for IoMem {
|
|||||||
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
|
fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> {
|
||||||
self.check_range(offset, buf.len())?;
|
self.check_range(offset, buf.len())?;
|
||||||
unsafe {
|
unsafe {
|
||||||
crate::arch::mm::fast_copy(
|
core::ptr::copy(
|
||||||
(self.virtual_address + offset) as *const u8,
|
(self.virtual_address + offset) as *const u8,
|
||||||
buf.as_mut_ptr(),
|
buf.as_mut_ptr(),
|
||||||
buf.len(),
|
buf.len(),
|
||||||
@ -31,7 +31,7 @@ impl VmIo for IoMem {
|
|||||||
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
|
fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> {
|
||||||
self.check_range(offset, buf.len())?;
|
self.check_range(offset, buf.len())?;
|
||||||
unsafe {
|
unsafe {
|
||||||
crate::arch::mm::fast_copy(
|
core::ptr::copy(
|
||||||
buf.as_ptr(),
|
buf.as_ptr(),
|
||||||
(self.virtual_address + offset) as *mut u8,
|
(self.virtual_address + offset) as *mut u8,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
|
@ -249,7 +249,7 @@ impl VmFrame {
|
|||||||
|
|
||||||
// SAFETY: src and dst is not overlapped.
|
// SAFETY: src and dst is not overlapped.
|
||||||
unsafe {
|
unsafe {
|
||||||
crate::arch::mm::fast_copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -589,7 +589,7 @@ impl<'a> VmReader<'a> {
|
|||||||
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
||||||
// of the reader's remaining data and the writer's available space.
|
// of the reader's remaining data and the writer's available space.
|
||||||
unsafe {
|
unsafe {
|
||||||
crate::arch::mm::fast_copy(self.cursor, writer.cursor, copy_len);
|
core::ptr::copy(self.cursor, writer.cursor, copy_len);
|
||||||
self.cursor = self.cursor.add(copy_len);
|
self.cursor = self.cursor.add(copy_len);
|
||||||
writer.cursor = writer.cursor.add(copy_len);
|
writer.cursor = writer.cursor.add(copy_len);
|
||||||
}
|
}
|
||||||
@ -714,7 +714,7 @@ impl<'a> VmWriter<'a> {
|
|||||||
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
// SAFETY: the memory range is valid since `copy_len` is the minimum
|
||||||
// of the reader's remaining data and the writer's available space.
|
// of the reader's remaining data and the writer's available space.
|
||||||
unsafe {
|
unsafe {
|
||||||
crate::arch::mm::fast_copy(reader.cursor, self.cursor, copy_len);
|
core::ptr::copy(reader.cursor, self.cursor, copy_len);
|
||||||
self.cursor = self.cursor.add(copy_len);
|
self.cursor = self.cursor.add(copy_len);
|
||||||
reader.cursor = reader.cursor.add(copy_len);
|
reader.cursor = reader.cursor.add(copy_len);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user