From aae9fdb3317b72ac8f912fed3ec58e6fc65f00bc Mon Sep 17 00:00:00 2001 From: Shaowei Song Date: Wed, 28 Aug 2024 09:38:43 +0000 Subject: [PATCH] Add assembly `memset` for fast filling zeros --- kernel/src/device/zero.rs | 6 +--- ostd/src/arch/x86/mm/memcpy_fallible.S | 4 +-- ostd/src/arch/x86/mm/memset_fallible.S | 27 ++++++++++++++++ ostd/src/arch/x86/mm/mod.rs | 2 +- ostd/src/arch/x86/mm/util.rs | 5 +++ ostd/src/mm/io.rs | 45 +++++++++++++++++++++++++- 6 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 ostd/src/arch/x86/mm/memset_fallible.S diff --git a/kernel/src/device/zero.rs b/kernel/src/device/zero.rs index e883296ca..51449eadb 100644 --- a/kernel/src/device/zero.rs +++ b/kernel/src/device/zero.rs @@ -20,11 +20,7 @@ impl Device for Zero { impl FileIo for Zero { fn read(&self, writer: &mut VmWriter) -> Result { - // TODO: Use more efficient way when need to read a bunch of zeros once. - let read_len = writer.avail(); - for _ in 0..read_len { - writer.write_val(&0u8)?; - } + let read_len = writer.fill_zeros(writer.avail())?; Ok(read_len) } diff --git a/ostd/src/arch/x86/mm/memcpy_fallible.S b/ostd/src/arch/x86/mm/memcpy_fallible.S index 6249d0e47..5a117f3ab 100644 --- a/ostd/src/arch/x86/mm/memcpy_fallible.S +++ b/ostd/src/arch/x86/mm/memcpy_fallible.S @@ -15,12 +15,12 @@ __memcpy_fallible: # (dst: *mut u8, src: *const u8, size: usize) -> usize .move: rep movsb -.exit: +.memcpy_exit: mov rax, rcx ret .pushsection .ex_table, "a" .align 8 .quad [.move] - .quad [.exit] + .quad [.memcpy_exit] .popsection \ No newline at end of file diff --git a/ostd/src/arch/x86/mm/memset_fallible.S b/ostd/src/arch/x86/mm/memset_fallible.S new file mode 100644 index 000000000..f76bbef26 --- /dev/null +++ b/ostd/src/arch/x86/mm/memset_fallible.S @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MPL-2.0 */ + +// Sets `size` bytes of memory at `dst` to the byte value given by `value`. +// This function works with exception handling and can recover from a page fault. +// +// Returns number of bytes that failed to set. +// +// Ref: [https://github.com/torvalds/linux/blob/2ab79514109578fc4b6df90633d500cf281eb689/arch/x86/lib/memset_64.S] +.text +.global __memset_fallible +.code64 +__memset_fallible: # (dst: *mut u8, value: u8, size: usize) -> usize + mov rcx, rdx # Move the size to rcx for counting + mov al, sil # Move the value to al + +.set: + rep stosb # Store the value byte repeatedly + +.memset_exit: + mov rax, rcx # Return the size remaining + ret + +.pushsection .ex_table, "a" + .align 8 + .quad [.set] + .quad [.memset_exit] +.popsection \ No newline at end of file diff --git a/ostd/src/arch/x86/mm/mod.rs b/ostd/src/arch/x86/mm/mod.rs index c9903e09e..e36b5f80f 100644 --- a/ostd/src/arch/x86/mm/mod.rs +++ b/ostd/src/arch/x86/mm/mod.rs @@ -6,7 +6,7 @@ use alloc::fmt; use core::ops::Range; use cfg_if::cfg_if; -pub(crate) use util::__memcpy_fallible; +pub(crate) use util::{__memcpy_fallible, __memset_fallible}; use x86_64::{instructions::tlb, structures::paging::PhysFrame, VirtAddr}; use crate::{ diff --git a/ostd/src/arch/x86/mm/util.rs b/ostd/src/arch/x86/mm/util.rs index a68955c4e..e2c51ef4c 100644 --- a/ostd/src/arch/x86/mm/util.rs +++ b/ostd/src/arch/x86/mm/util.rs @@ -1,10 +1,15 @@ // SPDX-License-Identifier: MPL-2.0 core::arch::global_asm!(include_str!("memcpy_fallible.S")); +core::arch::global_asm!(include_str!("memset_fallible.S")); extern "C" { /// Copies `size` bytes from `src` to `dst`. This function works with exception handling /// and can recover from page fault. /// Returns number of bytes that failed to copy. pub(crate) fn __memcpy_fallible(dst: *mut u8, src: *const u8, size: usize) -> usize; + /// Fills `size` bytes in the memory pointed to by `dst` with the value `value`. + /// This function works with exception handling and can recover from page fault. + /// Returns number of bytes that failed to set. + pub(crate) fn __memset_fallible(dst: *mut u8, value: u8, size: usize) -> usize; } diff --git a/ostd/src/mm/io.rs b/ostd/src/mm/io.rs index 53356da69..67717db98 100644 --- a/ostd/src/mm/io.rs +++ b/ostd/src/mm/io.rs @@ -48,7 +48,7 @@ use const_assert::{Assert, IsTrue}; use inherit_methods_macro::inherit_methods; use crate::{ - arch::mm::__memcpy_fallible, + arch::mm::{__memcpy_fallible, __memset_fallible}, mm::{ kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR}, MAX_USERSPACE_VADDR, @@ -326,6 +326,21 @@ unsafe fn memcpy_fallible(dst: *mut u8, src: *const u8, len: usize) -> usize { len - failed_bytes } +/// Fills `len` bytes of memory at `dst` with the specified `value`. +/// This function will early stop filling if encountering an unresolvable page fault. +/// +/// Returns the number of successfully set bytes. +/// +/// # Safety +/// +/// - `dst` must either be [valid] for writes of `len` bytes or be in user space for `len` bytes. +/// +/// [valid]: crate::mm::io#safety +unsafe fn memset_fallible(dst: *mut u8, value: u8, len: usize) -> usize { + let failed_bytes = __memset_fallible(dst, value, len); + len - failed_bytes +} + /// Fallible memory read from a `VmWriter`. pub trait FallibleVmRead { /// Reads all data into the writer until one of the three conditions is met: @@ -825,6 +840,34 @@ impl<'a> VmWriter<'a, Fallible> { })?; Ok(()) } + + /// Writes `len` zeros to the target memory. + /// + /// This method attempts to fill up to `len` bytes with zeros. If the available + /// memory from the current cursor position is less than `len`, it will only fill + /// the available space. + /// + /// If the memory write failed due to an unresolvable page fault, this method + /// will return `Err` with the length set so far. + pub fn fill_zeros(&mut self, len: usize) -> core::result::Result { + let len_to_set = self.avail().min(len); + if len_to_set == 0 { + return Ok(0); + } + + // SAFETY: The destination is a subset of the memory range specified by + // the current writer, so it is either valid for writing or in user space. + let set_len = unsafe { + let set_len = memset_fallible(self.cursor, 0u8, len_to_set); + self.cursor = self.cursor.add(set_len); + set_len + }; + if set_len < len_to_set { + Err((Error::PageFault, set_len)) + } else { + Ok(len_to_set) + } + } } impl<'a, Fallibility> VmWriter<'a, Fallibility> {