From 725a46fe72546af880ea183cf6d4b35349b709c6 Mon Sep 17 00:00:00 2001 From: Chen Chengjun Date: Wed, 30 Apr 2025 10:09:38 +0800 Subject: [PATCH] Modify VMO usages and fix the atomic-mode issue during handling page fault --- .../process/program_loader/elf/load_elf.rs | 109 ++++--- kernel/src/vdso.rs | 2 +- kernel/src/vm/vmar/vm_mapping.rs | 265 +++++++++++------- 3 files changed, 226 insertions(+), 150 deletions(-) diff --git a/kernel/src/process/program_loader/elf/load_elf.rs b/kernel/src/process/program_loader/elf/load_elf.rs index dd1b79dd0..211137811 100644 --- a/kernel/src/process/program_loader/elf/load_elf.rs +++ b/kernel/src/process/program_loader/elf/load_elf.rs @@ -7,7 +7,7 @@ use align_ext::AlignExt; use aster_rights::Full; -use ostd::mm::VmIo; +use ostd::mm::{CachePolicy, PageFlags, PageProperty, VmIo}; use xmas_elf::program::{self, ProgramHeader64}; use super::elf_file::Elf; @@ -23,7 +23,12 @@ use crate::{ TermStatus, }, vdso::{vdso_vmo, VDSO_VMO_SIZE}, - vm::{perms::VmPerms, util::duplicate_frame, vmar::Vmar, vmo::VmoRightsOp}, + vm::{ + perms::VmPerms, + util::duplicate_frame, + vmar::Vmar, + vmo::{CommitFlags, VmoRightsOp}, + }, }; /// Loads elf to the process vm. @@ -272,7 +277,7 @@ fn map_segment_vmo( "executable has no page cache", ))? .to_dyn() - .dup_independent()? + .dup()? }; let total_map_size = { @@ -288,60 +293,74 @@ fn map_segment_vmo( (start, end - start) }; - // Write zero as paddings. There are head padding and tail padding. - // Head padding: if the segment's virtual address is not page-aligned, - // then the bytes in first page from start to virtual address should be padded zeros. - // Tail padding: If the segment's mem_size is larger than file size, - // then the bytes that are not backed up by file content should be zeros.(usually .data/.bss sections). - - // Head padding. - let page_offset = file_offset % PAGE_SIZE; - if page_offset != 0 { - let new_frame = { - let head_frame = segment_vmo.commit_page(segment_offset)?; - let new_frame = duplicate_frame(&head_frame)?; - - let buffer = vec![0u8; page_offset]; - new_frame.write_bytes(0, &buffer).unwrap(); - new_frame - }; - let head_idx = segment_offset / PAGE_SIZE; - segment_vmo.replace(new_frame.into(), head_idx)?; - } - - // Tail padding. - let tail_padding_offset = program_header.file_size as usize + page_offset; - if segment_size > tail_padding_offset { - let new_frame = { - let tail_frame = segment_vmo.commit_page(segment_offset + tail_padding_offset)?; - let new_frame = duplicate_frame(&tail_frame)?; - - let buffer = vec![0u8; (segment_size - tail_padding_offset) % PAGE_SIZE]; - new_frame - .write_bytes(tail_padding_offset % PAGE_SIZE, &buffer) - .unwrap(); - new_frame - }; - - let tail_idx = (segment_offset + tail_padding_offset) / PAGE_SIZE; - segment_vmo.replace(new_frame.into(), tail_idx).unwrap(); - } - let perms = parse_segment_perm(program_header.flags); let offset = base_addr + (program_header.virtual_addr as Vaddr).align_down(PAGE_SIZE); if segment_size != 0 { let mut vm_map_options = root_vmar .new_map(segment_size, perms)? - .vmo(segment_vmo) + .vmo(segment_vmo.dup()?) .vmo_offset(segment_offset) .vmo_limit(segment_offset + segment_size) .can_overwrite(true); vm_map_options = vm_map_options.offset(offset).handle_page_faults_around(); - vm_map_options.build()?; + let map_addr = vm_map_options.build()?; + + // Write zero as paddings. There are head padding and tail padding. + // Head padding: if the segment's virtual address is not page-aligned, + // then the bytes in first page from start to virtual address should be padded zeros. + // Tail padding: If the segment's mem_size is larger than file size, + // then the bytes that are not backed up by file content should be zeros.(usually .data/.bss sections). + + let mut cursor = root_vmar + .vm_space() + .cursor_mut(&(map_addr..map_addr + segment_size))?; + let page_flags = PageFlags::from(perms) | PageFlags::ACCESSED; + + // Head padding. + let page_offset = file_offset % PAGE_SIZE; + if page_offset != 0 { + let new_frame = { + let head_frame = + segment_vmo.commit_on(segment_offset / PAGE_SIZE, CommitFlags::empty())?; + let new_frame = duplicate_frame(&head_frame)?; + + let buffer = vec![0u8; page_offset]; + new_frame.write_bytes(0, &buffer).unwrap(); + new_frame + }; + cursor.map( + new_frame.into(), + PageProperty::new(page_flags, CachePolicy::Writeback), + ); + } + + // Tail padding. + let tail_padding_offset = program_header.file_size as usize + page_offset; + if segment_size > tail_padding_offset { + let new_frame = { + let tail_frame = { + let offset_index = (segment_offset + tail_padding_offset) / PAGE_SIZE; + segment_vmo.commit_on(offset_index, CommitFlags::empty())? + }; + let new_frame = duplicate_frame(&tail_frame)?; + + let buffer = vec![0u8; (segment_size - tail_padding_offset) % PAGE_SIZE]; + new_frame + .write_bytes(tail_padding_offset % PAGE_SIZE, &buffer) + .unwrap(); + new_frame + }; + + let tail_page_addr = map_addr + tail_padding_offset.align_down(PAGE_SIZE); + cursor.jump(tail_page_addr)?; + cursor.map( + new_frame.into(), + PageProperty::new(page_flags, CachePolicy::Writeback), + ); + } } let anonymous_map_size: usize = total_map_size.saturating_sub(segment_size); - if anonymous_map_size > 0 { let mut anonymous_map_options = root_vmar .new_map(anonymous_map_size, perms)? diff --git a/kernel/src/vdso.rs b/kernel/src/vdso.rs index 7ad4cb9c9..aef020c33 100644 --- a/kernel/src/vdso.rs +++ b/kernel/src/vdso.rs @@ -233,7 +233,7 @@ impl Vdso { // Write VDSO library to VDSO VMO. vdso_vmo.write_bytes(0x4000, &*vdso_text).unwrap(); - let data_frame = vdso_vmo.commit_page(0).unwrap(); + let data_frame = vdso_vmo.try_commit_page(0).unwrap(); (vdso_vmo, data_frame) }; Self { diff --git a/kernel/src/vm/vmar/vm_mapping.rs b/kernel/src/vm/vmar/vm_mapping.rs index e029ee30b..3c8ceb8bf 100644 --- a/kernel/src/vm/vmar/vm_mapping.rs +++ b/kernel/src/vm/vmar/vm_mapping.rs @@ -16,7 +16,11 @@ use super::interval_set::Interval; use crate::{ prelude::*, thread::exception::PageFaultInfo, - vm::{perms::VmPerms, util::duplicate_frame, vmo::Vmo}, + vm::{ + perms::VmPerms, + util::duplicate_frame, + vmo::{CommitFlags, Vmo, VmoCommitError}, + }, }; /// Mapping a range of physical pages into a `Vmar`. @@ -143,99 +147,121 @@ impl VmMapping { let is_write = page_fault_info.required_perms.contains(VmPerms::WRITE); if !is_write && self.vmo.is_some() && self.handle_page_faults_around { - self.handle_page_faults_around(vm_space, address)?; - return Ok(()); + let res = self.handle_page_faults_around(vm_space, address); + + // Errors caused by the "around" pages should be ignored, so here we + // only return the error if the faulting page is still not mapped. + if res.is_err() { + let mut cursor = + vm_space.cursor(&(page_aligned_addr..page_aligned_addr + PAGE_SIZE))?; + if let VmItem::Mapped { .. } = cursor.query().unwrap() { + return Ok(()); + } + } + + return res; } - let mut cursor = - vm_space.cursor_mut(&(page_aligned_addr..page_aligned_addr + PAGE_SIZE))?; + 'retry: loop { + let mut cursor = + vm_space.cursor_mut(&(page_aligned_addr..page_aligned_addr + PAGE_SIZE))?; - match cursor.query().unwrap() { - VmItem::Mapped { - va, - frame, - mut prop, - } => { - if VmPerms::from(prop.flags).contains(page_fault_info.required_perms) { - // The page fault is already handled maybe by other threads. - // Just flush the TLB and return. - TlbFlushOp::Address(va).perform_on_current(); - return Ok(()); - } - assert!(is_write); - // Perform COW if it is a write access to a shared mapping. - - // Skip if the page fault is already handled. - if prop.flags.contains(PageFlags::W) { - return Ok(()); - } - - // If the forked child or parent immediately unmaps the page after - // the fork without accessing it, we are the only reference to the - // frame. We can directly map the frame as writable without - // copying. In this case, the reference count of the frame is 2 ( - // one for the mapping and one for the frame handle itself). - let only_reference = frame.reference_count() == 2; - - let new_flags = PageFlags::W | PageFlags::ACCESSED | PageFlags::DIRTY; - - if self.is_shared || only_reference { - cursor.protect_next(PAGE_SIZE, |p| p.flags |= new_flags); - cursor.flusher().issue_tlb_flush(TlbFlushOp::Address(va)); - cursor.flusher().dispatch_tlb_flush(); - } else { - let new_frame = duplicate_frame(&frame)?; - prop.flags |= new_flags; - cursor.map(new_frame.into(), prop); - } - cursor.flusher().sync_tlb_flush(); - } - VmItem::NotMapped { .. } => { - // Map a new frame to the page fault address. - - let (frame, is_readonly) = self.prepare_page(address, is_write)?; - - let vm_perms = { - let mut perms = self.perms; - if is_readonly { - // COW pages are forced to be read-only. - perms -= VmPerms::WRITE; + match cursor.query().unwrap() { + VmItem::Mapped { + va, + frame, + mut prop, + } => { + if VmPerms::from(prop.flags).contains(page_fault_info.required_perms) { + // The page fault is already handled maybe by other threads. + // Just flush the TLB and return. + TlbFlushOp::Address(va).perform_on_current(); + return Ok(()); } - perms - }; + assert!(is_write); + // Perform COW if it is a write access to a shared mapping. - let mut page_flags = vm_perms.into(); - page_flags |= PageFlags::ACCESSED; - if is_write { - page_flags |= PageFlags::DIRTY; + // Skip if the page fault is already handled. + if prop.flags.contains(PageFlags::W) { + return Ok(()); + } + + // If the forked child or parent immediately unmaps the page after + // the fork without accessing it, we are the only reference to the + // frame. We can directly map the frame as writable without + // copying. In this case, the reference count of the frame is 2 ( + // one for the mapping and one for the frame handle itself). + let only_reference = frame.reference_count() == 2; + + let new_flags = PageFlags::W | PageFlags::ACCESSED | PageFlags::DIRTY; + + if self.is_shared || only_reference { + cursor.protect_next(PAGE_SIZE, |p| p.flags |= new_flags); + cursor.flusher().issue_tlb_flush(TlbFlushOp::Address(va)); + cursor.flusher().dispatch_tlb_flush(); + } else { + let new_frame = duplicate_frame(&frame)?; + prop.flags |= new_flags; + cursor.map(new_frame.into(), prop); + } + cursor.flusher().sync_tlb_flush(); } - let map_prop = PageProperty::new(page_flags, CachePolicy::Writeback); + VmItem::NotMapped { .. } => { + // Map a new frame to the page fault address. + let (frame, is_readonly) = match self.prepare_page(address, is_write) { + Ok((frame, is_readonly)) => (frame, is_readonly), + Err(VmoCommitError::Err(e)) => return Err(e), + Err(VmoCommitError::NeedIo(index)) => { + drop(cursor); + self.vmo + .as_ref() + .unwrap() + .commit_on(index, CommitFlags::empty())?; + continue 'retry; + } + }; - cursor.map(frame, map_prop); + let vm_perms = { + let mut perms = self.perms; + if is_readonly { + // COW pages are forced to be read-only. + perms -= VmPerms::WRITE; + } + perms + }; + + let mut page_flags = vm_perms.into(); + page_flags |= PageFlags::ACCESSED; + if is_write { + page_flags |= PageFlags::DIRTY; + } + let map_prop = PageProperty::new(page_flags, CachePolicy::Writeback); + + cursor.map(frame, map_prop); + } } + break 'retry; } Ok(()) } - fn prepare_page(&self, page_fault_addr: Vaddr, write: bool) -> Result<(UFrame, bool)> { + fn prepare_page( + &self, + page_fault_addr: Vaddr, + write: bool, + ) -> core::result::Result<(UFrame, bool), VmoCommitError> { let mut is_readonly = false; let Some(vmo) = &self.vmo else { return Ok((FrameAllocOptions::new().alloc_frame()?.into(), is_readonly)); }; let page_offset = page_fault_addr.align_down(PAGE_SIZE) - self.map_to_addr; - let Ok(page) = vmo.get_committed_frame(page_offset) else { - if !self.is_shared { - // The page index is outside the VMO. This is only allowed in private mapping. - return Ok((FrameAllocOptions::new().alloc_frame()?.into(), is_readonly)); - } else { - return_errno_with_message!( - Errno::EFAULT, - "could not find a corresponding physical page" - ); - } - }; + if !self.is_shared && page_offset >= vmo.size() { + // The page index is outside the VMO. This is only allowed in private mapping. + return Ok((FrameAllocOptions::new().alloc_frame()?.into(), is_readonly)); + } + let page = vmo.get_committed_frame(page_offset)?; if !self.is_shared && write { // Write access to private VMO-backed mapping. Performs COW directly. Ok((duplicate_frame(&page)?.into(), is_readonly)) @@ -257,37 +283,47 @@ impl VmMapping { let around_page_addr = page_fault_addr & SURROUNDING_PAGE_ADDR_MASK; let size = min(vmo.size(), self.map_size.get()); - let start_addr = max(around_page_addr, self.map_to_addr); + let mut start_addr = max(around_page_addr, self.map_to_addr); let end_addr = min( start_addr + SURROUNDING_PAGE_NUM * PAGE_SIZE, self.map_to_addr + size, ); let vm_perms = self.perms - VmPerms::WRITE; - let mut cursor = vm_space.cursor_mut(&(start_addr..end_addr))?; - let operate = move |commit_fn: &mut dyn FnMut() -> Result| { - if let VmItem::NotMapped { .. } = cursor.query().unwrap() { - // We regard all the surrounding pages as accessed, no matter - // if it is really so. Then the hardware won't bother to update - // the accessed bit of the page table on following accesses. - let page_flags = PageFlags::from(vm_perms) | PageFlags::ACCESSED; - let page_prop = PageProperty::new(page_flags, CachePolicy::Writeback); - let frame = commit_fn()?; - cursor.map(frame, page_prop); - } else { - let next_addr = cursor.virt_addr() + PAGE_SIZE; - if next_addr < end_addr { - let _ = cursor.jump(next_addr); + 'retry: loop { + let mut cursor = vm_space.cursor_mut(&(start_addr..end_addr))?; + let operate = + move |commit_fn: &mut dyn FnMut() + -> core::result::Result| { + if let VmItem::NotMapped { .. } = cursor.query().unwrap() { + // We regard all the surrounding pages as accessed, no matter + // if it is really so. Then the hardware won't bother to update + // the accessed bit of the page table on following accesses. + let page_flags = PageFlags::from(vm_perms) | PageFlags::ACCESSED; + let page_prop = PageProperty::new(page_flags, CachePolicy::Writeback); + let frame = commit_fn()?; + cursor.map(frame, page_prop); + } else { + let next_addr = cursor.virt_addr() + PAGE_SIZE; + if next_addr < end_addr { + let _ = cursor.jump(next_addr); + } + } + Ok(()) + }; + + let start_offset = start_addr - self.map_to_addr; + let end_offset = end_addr - self.map_to_addr; + match vmo.try_operate_on_range(&(start_offset..end_offset), operate) { + Ok(_) => return Ok(()), + Err(VmoCommitError::NeedIo(index)) => { + vmo.commit_on(index, CommitFlags::empty())?; + start_addr = index * PAGE_SIZE + self.map_to_addr; + continue 'retry; } + Err(VmoCommitError::Err(e)) => return Err(e), } - Ok(()) - }; - - let start_offset = start_addr - self.map_to_addr; - let end_offset = end_addr - self.map_to_addr; - vmo.operate_on_range(&(start_offset..end_offset), operate)?; - - Ok(()) + } } } @@ -435,27 +471,48 @@ impl MappedVmo { /// Gets the committed frame at the input offset in the mapped VMO. /// /// If the VMO has not committed a frame at this index, it will commit - /// one first and return it. - fn get_committed_frame(&self, page_offset: usize) -> Result { + /// one first and return it. If the commit operation needs to perform I/O, + /// it will return a [`VmoCommitError::NeedIo`]. + fn get_committed_frame( + &self, + page_offset: usize, + ) -> core::result::Result { debug_assert!(page_offset < self.range.len()); debug_assert!(page_offset % PAGE_SIZE == 0); - self.vmo.commit_page(self.range.start + page_offset) + self.vmo.try_commit_page(self.range.start + page_offset) + } + + /// Commits a page at a specific page index. + /// + /// This method may involve I/O operations if the VMO needs to fecth + /// a page from the underlying page cache. + pub fn commit_on(&self, page_idx: usize, commit_flags: CommitFlags) -> Result { + debug_assert!(page_idx * PAGE_SIZE < self.range.len()); + self.vmo.commit_on(page_idx, commit_flags) } /// Traverses the indices within a specified range of a VMO sequentially. /// /// For each index position, you have the option to commit the page as well as /// perform other operations. - fn operate_on_range(&self, range: &Range, operate: F) -> Result<()> + /// + /// Once a commit operation needs to perform I/O, it will return a [`VmoCommitError::NeedIo`]. + fn try_operate_on_range( + &self, + range: &Range, + operate: F, + ) -> core::result::Result<(), VmoCommitError> where - F: FnMut(&mut dyn FnMut() -> Result) -> Result<()>, + F: FnMut( + &mut dyn FnMut() -> core::result::Result, + ) -> core::result::Result<(), VmoCommitError>, { debug_assert!(range.start < self.range.len()); debug_assert!(range.end <= self.range.len()); let range = self.range.start + range.start..self.range.start + range.end; - self.vmo.operate_on_range(&range, operate) + self.vmo.try_operate_on_range(&range, operate) } /// Duplicates the capability.