diff --git a/framework/aster-frame/src/arch/x86/mm/mod.rs b/framework/aster-frame/src/arch/x86/mm/mod.rs index b149c3bc1..34deb3c9d 100644 --- a/framework/aster-frame/src/arch/x86/mm/mod.rs +++ b/framework/aster-frame/src/arch/x86/mm/mod.rs @@ -1,12 +1,9 @@ // SPDX-License-Identifier: MPL-2.0 -mod util; - use alloc::fmt; use core::ops::Range; use pod::Pod; -pub use util::{fast_copy, fast_copy_nonoverlapping}; use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr}; use crate::vm::{ diff --git a/framework/aster-frame/src/arch/x86/mm/util.rs b/framework/aster-frame/src/arch/x86/mm/util.rs deleted file mode 100644 index e83159acd..000000000 --- a/framework/aster-frame/src/arch/x86/mm/util.rs +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -/// Copies `count * size_of::()` bytes from `src` to `dst`. -/// The source and destination may overlap. -/// -/// If the source and destination will never overlap, `fast_copy_nonoverlapping` can be used instead. -/// -/// # Performance -/// -/// This function is provided as a fast alternative to `core::ptr::copy` by -/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying. -/// These instructions can result in more efficient data transfers by moving larger blocks -/// of memory in a single operation, leading to fewer CPU cycles and better performance -/// in certain scenarios. -/// -/// # Safety -/// -/// The safety requirements of this function are consistent with `core::ptr::copy`. -#[inline] -pub unsafe fn fast_copy(src: *const T, dst: *mut T, count: usize) { - if src == dst || count == 0 { - return; - } - - if src < dst && src.add(count) > dst { - // Overlap and src is before dst - backward_copy(src, dst, count); - } else { - // No overlap, or src is after dst - forward_copy(src, dst, count); - } -} - -/// Copies `count * size_of::()` bytes from `src` to `dst`. -/// The source and destination must not overlap. -/// -/// For regions of memory which might overlap, use `fast_copy` instead. -/// -/// # Performance -/// -/// This function is provided as a fast alternative to `core::ptr::copy_nonoverlapping` by -/// utilizing the CPU's `rep movsq` and `rep movsb` instructions for bulk memory copying. -/// These instructions can result in more efficient data transfers by moving larger blocks -/// of memory in a single operation, leading to fewer CPU cycles and better performance -/// in certain scenarios. -/// -/// # Safety -/// -/// The safety requirements of this function are consistent with `core::ptr::copy_nonoverlapping`. -#[inline] -pub unsafe fn fast_copy_nonoverlapping(src: *const T, dst: *mut T, count: usize) { - if count == 0 { - return; - } - - forward_copy(src, dst, count); -} - -/// # Safety -/// -/// The `src` and `dst` must point to valid memory regions. -/// If the memory regions of `src` and `dst` overlap, `src` must be higher than `dst`. -#[inline] -unsafe fn forward_copy(src: *const T, dst: *mut T, count: usize) { - let bytes_count = count * core::mem::size_of::(); - - // The direction of string copy instructions such as `rep movsb` is controlled by DF flag. - // If `DF = 0`, then data copy is repeated from lower addresses to higher ones; - // Otherwise, the data copy will be done in the reversed direction. - // The System V ABI manual requires `DF = 0` on function entry - // and all code before the `rep movsb` instruction in this function do not change DF flag. - // Thus, we can safely assume `DF = 0`, which is exactly what we want. - if bytes_count % 8 == 0 { - // In most cases, `movsq` is faster than `movsb` - // because it transfers larger chunks of data in a single operation. - core::arch::asm!( - "rep movsq", - in("rcx") bytes_count / 8, - in("rsi") src, - in("rdi") dst, - lateout("rcx") _, - lateout("rsi") _, - lateout("rdi") _ - ); - } else { - core::arch::asm!( - "rep movsb", - in("rcx") bytes_count, - in("rsi") src, - in("rdi") dst, - lateout("rcx") _, - lateout("rsi") _, - lateout("rdi") _ - ); - } -} - -/// # Safety -/// -/// The `src` and `dst` must point to valid memory regions. -/// If the memory regions of `src` and `dst` overlap, `src` must be lower than `dst`. -#[inline] -unsafe fn backward_copy(src: *const T, dst: *mut T, count: usize) { - let bytes_count = count * core::mem::size_of::(); - let last_src = (src as *const u8).add(bytes_count).offset(-1); - let last_dst = (dst as *mut u8).add(bytes_count).offset(-1); - - core::arch::asm!( - "std", // Set the direction flag (DF) - "rep movsb", - in("rcx") bytes_count, - in("rsi") last_src, - in("rdi") last_dst, - lateout("rcx") _, - lateout("rsi") _, - lateout("rdi") _ - ); - - // System V ABI for AMD64 requires direction flag (DF) to be clear on function exit - core::arch::asm!("cld"); -} - -#[cfg(ktest)] -mod test { - use alloc::vec; - - use super::*; - #[ktest] - fn test_fast_copy_nonoverlapping() { - let src = vec![0u8; 8]; - let mut dst = vec![1u8; 8]; - - unsafe { - fast_copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), 8); - } - assert_eq!(src, dst); - } - - #[ktest] - fn test_fast_copy_src_after_dst() { - let mut src = vec![0u8; 8]; - src.extend(vec![1u8; 8]); - - unsafe { - fast_copy(src.as_ptr().add(4), src.as_mut_ptr(), 8); - } - - let expected_left = { - let mut vec = vec![0u8; 4]; - vec.extend(vec![1u8; 4]); - vec - }; - - assert_eq!(expected_left, src[0..8]); - } - - #[ktest] - fn test_fast_copy_src_before_dst() { - let mut src = vec![0u8; 8]; - src.extend(vec![1u8; 8]); - - unsafe { - fast_copy(src.as_ptr().add(4), src.as_mut_ptr().add(8), 8); - } - - let expected_right = { - let mut vec = vec![0u8; 4]; - vec.extend(vec![1u8; 4]); - vec - }; - - assert_eq!(expected_right, src[8..]); - } -} diff --git a/framework/aster-frame/src/io_mem.rs b/framework/aster-frame/src/io_mem.rs index 5b7f3e734..2c5b3b035 100644 --- a/framework/aster-frame/src/io_mem.rs +++ b/framework/aster-frame/src/io_mem.rs @@ -19,7 +19,7 @@ impl VmIo for IoMem { fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> crate::Result<()> { self.check_range(offset, buf.len())?; unsafe { - crate::arch::mm::fast_copy( + core::ptr::copy( (self.virtual_address + offset) as *const u8, buf.as_mut_ptr(), buf.len(), @@ -31,7 +31,7 @@ impl VmIo for IoMem { fn write_bytes(&self, offset: usize, buf: &[u8]) -> crate::Result<()> { self.check_range(offset, buf.len())?; unsafe { - crate::arch::mm::fast_copy( + core::ptr::copy( buf.as_ptr(), (self.virtual_address + offset) as *mut u8, buf.len(), diff --git a/framework/aster-frame/src/vm/frame.rs b/framework/aster-frame/src/vm/frame.rs index 63cab272e..6710c746f 100644 --- a/framework/aster-frame/src/vm/frame.rs +++ b/framework/aster-frame/src/vm/frame.rs @@ -249,7 +249,7 @@ impl VmFrame { // SAFETY: src and dst is not overlapped. unsafe { - crate::arch::mm::fast_copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE); + core::ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), PAGE_SIZE); } } } @@ -589,7 +589,7 @@ impl<'a> VmReader<'a> { // SAFETY: the memory range is valid since `copy_len` is the minimum // of the reader's remaining data and the writer's available space. unsafe { - crate::arch::mm::fast_copy(self.cursor, writer.cursor, copy_len); + core::ptr::copy(self.cursor, writer.cursor, copy_len); self.cursor = self.cursor.add(copy_len); writer.cursor = writer.cursor.add(copy_len); } @@ -714,7 +714,7 @@ impl<'a> VmWriter<'a> { // SAFETY: the memory range is valid since `copy_len` is the minimum // of the reader's remaining data and the writer's available space. unsafe { - crate::arch::mm::fast_copy(reader.cursor, self.cursor, copy_len); + core::ptr::copy(reader.cursor, self.cursor, copy_len); self.cursor = self.cursor.add(copy_len); reader.cursor = reader.cursor.add(copy_len); }