From b1cec501d81ebdbaeb6cdff4ca6aa1aeb720bd3e Mon Sep 17 00:00:00 2001 From: Shaowei Song Date: Tue, 2 Jul 2024 09:29:22 +0000 Subject: [PATCH] [page_cache] Gap within one page should be filled with zeros in shrink case --- kernel/aster-nix/src/fs/exfat/inode.rs | 12 +-- kernel/aster-nix/src/fs/ext2/dir.rs | 4 +- kernel/aster-nix/src/fs/ext2/inode.rs | 6 +- kernel/aster-nix/src/fs/path/dentry.rs | 4 +- kernel/aster-nix/src/fs/ramfs/fs.rs | 4 +- kernel/aster-nix/src/fs/utils/page_cache.rs | 89 ++++++++++++++++----- kernel/aster-nix/src/lib.rs | 1 + kernel/aster-nix/src/vm/vmo/dyn_cap.rs | 1 + kernel/comps/block/src/impl_block_device.rs | 1 + 9 files changed, 85 insertions(+), 37 deletions(-) diff --git a/kernel/aster-nix/src/fs/exfat/inode.rs b/kernel/aster-nix/src/fs/exfat/inode.rs index 0b437b00b..d22b950ea 100644 --- a/kernel/aster-nix/src/fs/exfat/inode.rs +++ b/kernel/aster-nix/src/fs/exfat/inode.rs @@ -613,7 +613,7 @@ impl ExfatInode { let fs = inner.fs(); let fs_guard = fs.lock(); self.inner.write().resize(0, &fs_guard)?; - self.inner.read().page_cache.pages().resize(0)?; + self.inner.read().page_cache.resize(0)?; Ok(()) } @@ -860,7 +860,7 @@ impl ExfatInode { inner.size_allocated = new_size_allocated; inner.size = new_size_allocated; - inner.page_cache.pages().resize(new_size_allocated)?; + inner.page_cache.resize(new_size_allocated)?; } let inner = self.inner.read(); @@ -1040,7 +1040,7 @@ impl ExfatInode { if delete_contents { if is_dir { inode.inner.write().resize(0, fs_guard)?; - inode.inner.read().page_cache.pages().resize(0)?; + inode.inner.read().page_cache.resize(0)?; } // Set the delete flag. inode.inner.write().is_deleted = true; @@ -1110,7 +1110,7 @@ impl Inode for ExfatInode { // We will delay updating the page_cache size when enlarging an inode until the real write. if new_size < file_size { - self.inner.read().page_cache.pages().resize(new_size)?; + self.inner.read().page_cache.resize(new_size)?; } // Sync this inode since size has changed. @@ -1310,7 +1310,7 @@ impl Inode for ExfatInode { if new_size > file_allocated_size { inner.resize(new_size, &fs_guard)?; } - inner.page_cache.pages().resize(new_size)?; + inner.page_cache.resize(new_size)?; } new_size.max(file_size) }; @@ -1364,7 +1364,7 @@ impl Inode for ExfatInode { if end_offset > file_allocated_size { inner.resize(end_offset, &fs_guard)?; } - inner.page_cache.pages().resize(end_offset)?; + inner.page_cache.resize(end_offset)?; } file_size.max(end_offset) }; diff --git a/kernel/aster-nix/src/fs/ext2/dir.rs b/kernel/aster-nix/src/fs/ext2/dir.rs index f144fce3b..84e6dc5f0 100644 --- a/kernel/aster-nix/src/fs/ext2/dir.rs +++ b/kernel/aster-nix/src/fs/ext2/dir.rs @@ -250,7 +250,7 @@ impl<'a> DirEntryWriter<'a> { // Resize and append it at the new block. let old_size = self.page_cache.pages().size(); let new_size = old_size + BLOCK_SIZE; - self.page_cache.pages().resize(new_size)?; + self.page_cache.resize(new_size)?; new_entry.set_record_len(BLOCK_SIZE); self.offset = old_size; self.write_entry(&new_entry)?; @@ -285,7 +285,7 @@ impl<'a> DirEntryWriter<'a> { { // Shrink the size. let new_size = pre_offset.align_up(BLOCK_SIZE); - self.page_cache.pages().resize(new_size)?; + self.page_cache.resize(new_size)?; pre_entry.set_record_len(new_size - pre_offset); self.offset = pre_offset; self.write_entry(&pre_entry)?; diff --git a/kernel/aster-nix/src/fs/ext2/inode.rs b/kernel/aster-nix/src/fs/ext2/inode.rs index 19d1bdfcd..97e47b0d8 100644 --- a/kernel/aster-nix/src/fs/ext2/inode.rs +++ b/kernel/aster-nix/src/fs/ext2/inode.rs @@ -856,8 +856,8 @@ impl Inner { } pub fn resize(&mut self, new_size: usize) -> Result<()> { + self.page_cache.resize(new_size)?; self.inode_impl.resize(new_size)?; - self.page_cache.pages().resize(new_size)?; Ok(()) } @@ -905,7 +905,7 @@ impl Inner { pub fn extend_write_at(&mut self, offset: usize, buf: &[u8]) -> Result { let new_size = offset + buf.len(); - self.page_cache.pages().resize(new_size)?; + self.page_cache.resize(new_size)?; self.page_cache.pages().write_bytes(offset, buf)?; self.inode_impl.resize(new_size)?; Ok(buf.len()) @@ -947,7 +947,7 @@ impl Inner { return self.inode_impl.write_link(target); } - self.page_cache.pages().resize(target.len())?; + self.page_cache.resize(target.len())?; self.page_cache.pages().write_bytes(0, target.as_bytes())?; let file_size = self.inode_impl.file_size(); if file_size != target.len() { diff --git a/kernel/aster-nix/src/fs/path/dentry.rs b/kernel/aster-nix/src/fs/path/dentry.rs index 12bfa8a33..0f5e00982 100644 --- a/kernel/aster-nix/src/fs/path/dentry.rs +++ b/kernel/aster-nix/src/fs/path/dentry.rs @@ -165,7 +165,7 @@ impl Dentry_ { } /// Lookup a Dentry_ from filesystem. - pub fn lookuop_via_fs(&self, name: &str) -> Result> { + pub fn lookup_via_fs(&self, name: &str) -> Result> { let mut children = self.children.lock(); let inode = self.inode.lookup(name)?; let inner = Self::new( @@ -483,7 +483,7 @@ impl Dentry { match children_inner { Some(inner) => Self::new(self.mount_node().clone(), inner.clone()), None => { - let fs_inner = self.inner.lookuop_via_fs(name)?; + let fs_inner = self.inner.lookup_via_fs(name)?; Self::new(self.mount_node().clone(), fs_inner.clone()) } } diff --git a/kernel/aster-nix/src/fs/ramfs/fs.rs b/kernel/aster-nix/src/fs/ramfs/fs.rs index f1181c2b0..f0a978b29 100644 --- a/kernel/aster-nix/src/fs/ramfs/fs.rs +++ b/kernel/aster-nix/src/fs/ramfs/fs.rs @@ -557,7 +557,7 @@ impl Inode for RamInode { let new_size = offset + buf.len(); let should_expand_size = new_size > file_size; if should_expand_size { - page_cache.pages().resize(new_size)?; + page_cache.resize(new_size)?; } page_cache.pages().write_bytes(offset, buf)?; @@ -599,7 +599,7 @@ impl Inode for RamInode { let self_inode = self_inode.downgrade(); let page_cache = self_inode.inner.as_file().unwrap(); - page_cache.pages().resize(new_size)?; + page_cache.resize(new_size)?; Ok(()) } diff --git a/kernel/aster-nix/src/fs/utils/page_cache.rs b/kernel/aster-nix/src/fs/utils/page_cache.rs index 39b138efc..1675a461f 100644 --- a/kernel/aster-nix/src/fs/utils/page_cache.rs +++ b/kernel/aster-nix/src/fs/utils/page_cache.rs @@ -2,12 +2,13 @@ #![allow(dead_code)] -use core::ops::Range; +use core::{iter, ops::Range}; +use align_ext::AlignExt; use aster_block::bio::{BioStatus, BioWaiter}; use aster_rights::Full; use lru::LruCache; -use ostd::mm::{Frame, FrameAllocOptions}; +use ostd::mm::{Frame, FrameAllocOptions, VmIo}; use crate::{ prelude::*, @@ -66,6 +67,51 @@ impl PageCache { pub fn backend(&self) -> Arc { self.manager.backend() } + + /// Resizes the current page cache to a target size. + pub fn resize(&self, new_size: usize) -> Result<()> { + // If the new size is smaller and not page-aligned, + // first zero the gap between the new size and the + // next page boundry (or the old size), if such a gap exists. + let old_size = self.pages.size(); + if old_size > new_size && new_size % PAGE_SIZE != 0 { + let gap_size = old_size.min(new_size.align_up(PAGE_SIZE)) - new_size; + if gap_size > 0 { + self.fill_zeros(new_size..new_size + gap_size)?; + } + } + self.pages.resize(new_size) + } + + /// Fill the specified range with zeros in the page cache. + pub fn fill_zeros(&self, range: Range) -> Result<()> { + if range.is_empty() { + return Ok(()); + } + let (start, end) = (range.start, range.end); + + // Write zeros to the first partial page if any + let first_page_end = start.align_up(PAGE_SIZE); + if first_page_end > start { + let zero_len = first_page_end.min(end) - start; + self.pages() + .write_vals(start, iter::repeat_n(&0, zero_len), 0)?; + } + + // Write zeros to the last partial page if any + let last_page_start = end.align_down(PAGE_SIZE); + if last_page_start < end && last_page_start >= start { + let zero_len = end - last_page_start; + self.pages() + .write_vals(last_page_start, iter::repeat_n(&0, zero_len), 0)?; + } + + for offset in (first_page_end..last_page_start).step_by(PAGE_SIZE) { + self.pages() + .write_vals(offset, iter::repeat_n(&0, PAGE_SIZE), 0)?; + } + Ok(()) + } } impl Drop for PageCache { @@ -303,31 +349,30 @@ impl PageCacheManager { pub fn evict_range(&self, range: Range) -> Result<()> { let page_idx_range = get_page_idx_range(&range); - //TODO: When there are many pages, we should submit them in batches of folios rather than all at once. - let mut indices_and_waiters: Vec<(usize, BioWaiter)> = Vec::new(); - - for idx in page_idx_range { - if let Some(page) = self.pages.lock().get_mut(&idx) { - if let PageState::Dirty = page.state() { - let backend = self.backend(); - if idx < backend.npages() { - indices_and_waiters.push((idx, backend.write_page(idx, page.frame())?)); - } + let mut bio_waiter = BioWaiter::new(); + let mut pages = self.pages.lock(); + let backend = self.backend(); + let backend_npages = backend.npages(); + for idx in page_idx_range.start..page_idx_range.end { + if let Some(page) = pages.peek(&idx) { + if *page.state() == PageState::Dirty && idx < backend_npages { + let waiter = backend.write_page(idx, page.frame())?; + bio_waiter.concat(waiter); } } } - for (idx, waiter) in indices_and_waiters.iter() { - if matches!(waiter.wait(), Some(BioStatus::Complete)) { - if let Some(page) = self.pages.lock().get_mut(idx) { - page.set_state(PageState::UpToDate) - } - } else { - // TODO: We may need an error handler here. - return_errno!(Errno::EIO) - } + if !matches!(bio_waiter.wait(), Some(BioStatus::Complete)) { + // Do not allow partial failure + return_errno!(Errno::EIO); } + for (_, page) in pages + .iter_mut() + .filter(|(idx, _)| page_idx_range.contains(idx)) + { + page.set_state(PageState::UpToDate); + } Ok(()) } @@ -467,7 +512,7 @@ impl Page { } } -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] enum PageState { /// `Uninit` indicates a new allocated page which content has not been initialized. /// The page is available to write, not available to read. diff --git a/kernel/aster-nix/src/lib.rs b/kernel/aster-nix/src/lib.rs index e1764b326..7ef0fc890 100644 --- a/kernel/aster-nix/src/lib.rs +++ b/kernel/aster-nix/src/lib.rs @@ -11,6 +11,7 @@ #![feature(fn_traits)] #![feature(format_args_nl)] #![feature(int_roundings)] +#![feature(iter_repeat_n)] #![feature(let_chains)] #![feature(linked_list_remove)] #![feature(register_tool)] diff --git a/kernel/aster-nix/src/vm/vmo/dyn_cap.rs b/kernel/aster-nix/src/vm/vmo/dyn_cap.rs index f3db6e669..c1a00eef0 100644 --- a/kernel/aster-nix/src/vm/vmo/dyn_cap.rs +++ b/kernel/aster-nix/src/vm/vmo/dyn_cap.rs @@ -162,6 +162,7 @@ impl VmIo for Vmo { self.0.write_bytes(offset, buf)?; Ok(()) } + // TODO: Support efficient `write_vals()` } impl VmoRightsOp for Vmo { diff --git a/kernel/comps/block/src/impl_block_device.rs b/kernel/comps/block/src/impl_block_device.rs index 604d6d6ab..f4764a6a0 100644 --- a/kernel/comps/block/src/impl_block_device.rs +++ b/kernel/comps/block/src/impl_block_device.rs @@ -11,6 +11,7 @@ use crate::prelude::*; /// Implements several commonly used APIs for the block device to conveniently /// read and write block(s). +// TODO: Add API to submit bio with multiple segments in scatter/gather manner. impl dyn BlockDevice { /// Synchronously reads contiguous blocks starting from the `bid`. pub fn read_blocks_sync(