diff --git a/Cargo.lock b/Cargo.lock index 591a040cf..d953a7984 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,9 +121,12 @@ dependencies = [ "aster-util", "bitflags 1.3.2", "component", + "int-to-c-enum", "lazy_static", "log", + "pod", "spin 0.9.8", + "static_assertions", ] [[package]] @@ -270,10 +273,12 @@ dependencies = [ "aster-util", "aster-virtio", "bitflags 1.3.2", + "bitvec", "controlled", "core2", "cpio-decoder", "getrandom", + "inherit-methods-macro", "int-to-c-enum", "intrusive-collections", "keyable-arc", @@ -287,6 +292,7 @@ dependencies = [ "ringbuf", "smoltcp", "spin 0.9.8", + "static_assertions", "tdx-guest", "time", "typeflags", diff --git a/framework/aster-frame/src/vm/frame.rs b/framework/aster-frame/src/vm/frame.rs index 98a1b668b..35855adf9 100644 --- a/framework/aster-frame/src/vm/frame.rs +++ b/framework/aster-frame/src/vm/frame.rs @@ -170,7 +170,7 @@ impl<'a> Iterator for VmFrameVecIter<'a> { } bitflags::bitflags! { - pub(crate) struct VmFrameFlags : usize{ + pub(crate) struct VmFrameFlags : usize { const NEED_DEALLOC = 1 << 63; } } @@ -543,7 +543,7 @@ impl<'a> VmReader<'a> { /// Limits the length of remaining data. /// /// This method ensures the postcondition of `self.remain() <= max_remain`. - pub const fn limit(&mut self, max_remain: usize) -> &mut Self { + pub const fn limit(mut self, max_remain: usize) -> Self { if max_remain < self.remain() { // Safety: the new end is less than the old end. unsafe { self.end = self.cursor.add(max_remain) }; @@ -557,7 +557,7 @@ impl<'a> VmReader<'a> { /// # Panic /// /// If `nbytes` is greater than `self.remain()`, then the method panics. - pub fn skip(&mut self, nbytes: usize) -> &mut Self { + pub fn skip(mut self, nbytes: usize) -> Self { assert!(nbytes <= self.remain()); // Safety: the new cursor is less than or equal to the end. @@ -653,7 +653,7 @@ impl<'a> VmWriter<'a> { /// Limits the length of available space. /// /// This method ensures the postcondition of `self.avail() <= max_avail`. - pub const fn limit(&mut self, max_avail: usize) -> &mut Self { + pub const fn limit(mut self, max_avail: usize) -> Self { if max_avail < self.avail() { // Safety: the new end is less than the old end. unsafe { self.end = self.cursor.add(max_avail) }; @@ -667,7 +667,7 @@ impl<'a> VmWriter<'a> { /// # Panic /// /// If `nbytes` is greater than `self.avail()`, then the method panics. - pub fn skip(&mut self, nbytes: usize) -> &mut Self { + pub fn skip(mut self, nbytes: usize) -> Self { assert!(nbytes <= self.avail()); // Safety: the new cursor is less than or equal to the end. diff --git a/regression/Makefile b/regression/Makefile index dc21bb1af..64da74930 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -3,6 +3,7 @@ CUR_DIR := $(patsubst %/,%,$(dir $(MKFILE_PATH))) BUILD_DIR := $(CUR_DIR)/build INITRAMFS := $(BUILD_DIR)/initramfs INITRAMFS_IMAGE := $(BUILD_DIR)/initramfs.cpio.gz +EXT2_IMAGE := $(BUILD_DIR)/ext2.img SHELL := /bin/bash INITRAMFS_EMPTY_DIRS := \ $(INITRAMFS)/sbin \ @@ -10,7 +11,8 @@ INITRAMFS_EMPTY_DIRS := \ $(INITRAMFS)/tmp \ $(INITRAMFS)/opt \ $(INITRAMFS)/proc \ - $(INITRAMFS)/dev + $(INITRAMFS)/dev \ + $(INITRAMFS)/ext2 INITRAMFS_ALL_DIRS := \ $(INITRAMFS)/etc \ $(INITRAMFS)/lib/x86_64-linux-gnu \ @@ -90,7 +92,11 @@ endif @echo "Generating the initramfs image..." @(cd $(INITRAMFS); find . | cpio -o -H newc | gzip) > $@ -build: $(INITRAMFS_IMAGE) +$(EXT2_IMAGE): + @dd if=/dev/zero of=$(EXT2_IMAGE) bs=2G count=1 + @mke2fs $(EXT2_IMAGE) + +build: $(INITRAMFS_IMAGE) $(EXT2_IMAGE) clean: @rm -rf $(BUILD_DIR) diff --git a/runner/src/main.rs b/runner/src/main.rs index 5cf11734b..bca0d69e6 100644 --- a/runner/src/main.rs +++ b/runner/src/main.rs @@ -14,7 +14,6 @@ pub mod gdb; pub mod machine; use std::{ - fs::OpenOptions, path::{Path, PathBuf}, process::Command, }; @@ -175,9 +174,11 @@ fn main() { qemu_cmd.args(qemu_grub_efi::NOIOMMU_DEVICE_ARGS); } - let fs_image = create_fs_image(args.path.as_path()); + // TODO: Add arguments to the runner CLI tool so that the user can specify + // a list of disk drives, each of which may be in a different FS format. + let ext2_image = get_fs_image(&PathBuf::from("regression/build/ext2.img"), 0); qemu_cmd.arg("-drive"); - qemu_cmd.arg(fs_image); + qemu_cmd.arg(ext2_image); if args.boot_method == BootMethod::Microvm { let image = microvm::create_bootdev_image(args.path); @@ -221,20 +222,14 @@ fn main() { } } -pub fn create_fs_image(path: &Path) -> String { - let mut fs_img_path = path.parent().unwrap().to_str().unwrap().to_string(); - fs_img_path.push_str("/fs.img"); - let path = Path::new(fs_img_path.as_str()); - if path.exists() { - return format!("file={},if=none,format=raw,id=x0", fs_img_path.as_str()); +pub fn get_fs_image(path: &Path, drive_id: u32) -> String { + if !path.exists() { + panic!("can not find the fs image") } - let f = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(fs_img_path.as_str()) - .unwrap(); - // 32MiB - f.set_len(64 * 1024 * 1024).unwrap(); - format!("file={},if=none,format=raw,id=x0", fs_img_path.as_str()) + + format!( + "file={},if=none,format=raw,id=x{}", + path.to_string_lossy(), + drive_id + ) } diff --git a/services/comps/block/Cargo.toml b/services/comps/block/Cargo.toml index a3393222e..342719d13 100644 --- a/services/comps/block/Cargo.toml +++ b/services/comps/block/Cargo.toml @@ -8,10 +8,13 @@ edition = "2021" [dependencies] bitflags = "1.3" spin = "0.9.4" +pod = { git = "https://github.com/asterinas/pod", rev = "d7dba56" } aster-frame = { path = "../../../framework/aster-frame" } aster-util = { path = "../../libs/aster-util" } +int-to-c-enum = { path = "../../libs/int-to-c-enum" } component = { path = "../../libs/comp-sys/component" } log = "0.4" +static_assertions = "1.1.0" [features] diff --git a/services/comps/block/src/bio.rs b/services/comps/block/src/bio.rs new file mode 100644 index 000000000..dbdc2a7b9 --- /dev/null +++ b/services/comps/block/src/bio.rs @@ -0,0 +1,475 @@ +use crate::prelude::*; + +use super::{id::Sid, BlockDevice}; + +use aster_frame::{ + sync::WaitQueue, + vm::{VmFrame, VmReader, VmSegment, VmWriter}, +}; +use int_to_c_enum::TryFromInt; + +/// The unit for block I/O. +/// +/// Each `Bio` packs the following information: +/// (1) The type of the I/O, +/// (2) The target sectors on the device for doing I/O, +/// (3) The memory locations (`BioSegment`) from/to which data are read/written, +/// (4) The optional callback function that will be invoked when the I/O is completed. +pub struct Bio(Arc); + +impl Bio { + /// Constructs a new `Bio`. + /// + /// The `type_` describes the type of the I/O. + /// The `start_sid` is the starting sector id on the device. + /// The `segments` describes the memory segments. + /// The `complete_fn` is the optional callback function. + pub fn new( + type_: BioType, + start_sid: Sid, + segments: Vec, + complete_fn: Option, + ) -> Self { + let nsectors = segments + .iter() + .map(|segment| segment.nsectors().to_raw()) + .sum(); + + let inner = Arc::new(BioInner { + type_, + sid_range: start_sid..start_sid + nsectors, + segments, + complete_fn, + status: AtomicU32::new(BioStatus::Init as u32), + wait_queue: WaitQueue::new(), + }); + Self(inner) + } + + /// Returns the type. + pub fn type_(&self) -> BioType { + self.0.type_() + } + + /// Returns the range of target sectors on the device. + pub fn sid_range(&self) -> &Range { + self.0.sid_range() + } + + /// Returns the slice to the memory segments. + pub fn segments(&self) -> &[BioSegment] { + self.0.segments() + } + + /// Returns the status. + pub fn status(&self) -> BioStatus { + self.0.status() + } + + /// Submits self to the `block_device` asynchronously. + /// + /// Returns a `BioWaiter` to the caller to wait for its completion. + /// + /// # Panic + /// + /// The caller must not submit a `Bio` more than once. Otherwise, a panic shall be triggered. + pub fn submit(&self, block_device: &dyn BlockDevice) -> Result { + // Change the status from "Init" to "Submit". + let result = self.0.status.compare_exchange( + BioStatus::Init as u32, + BioStatus::Submit as u32, + Ordering::Release, + Ordering::Relaxed, + ); + assert!(result.is_ok()); + + if let Err(e) = block_device + .request_queue() + .enqueue(SubmittedBio(self.0.clone())) + { + // Fail to submit, revert the status. + let result = self.0.status.compare_exchange( + BioStatus::Submit as u32, + BioStatus::Init as u32, + Ordering::Release, + Ordering::Relaxed, + ); + assert!(result.is_ok()); + return Err(e); + } + + Ok(BioWaiter { + bios: vec![self.0.clone()], + }) + } + + /// Submits self to the `block_device` and waits for the result synchronously. + /// + /// Returns the result status of the `Bio`. + /// + /// # Panic + /// + /// The caller must not submit a `Bio` more than once. Otherwise, a panic shall be triggered. + pub fn submit_sync( + &self, + block_device: &dyn BlockDevice, + ) -> Result { + let waiter = self.submit(block_device)?; + match waiter.wait() { + Some(status) => { + assert!(status == BioStatus::Complete); + Ok(status) + } + None => { + let status = self.status(); + assert!(status != BioStatus::Complete); + Ok(status) + } + } + } +} + +/// The error type returned when enqueueing the `Bio`. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum BioEnqueueError { + /// The request queue is full + IsFull, + /// Refuse to enqueue the bio + Refused, +} + +impl From for aster_frame::Error { + fn from(_error: BioEnqueueError) -> Self { + aster_frame::Error::NotEnoughResources + } +} + +/// A waiter for `Bio` submissions. +/// +/// This structure holds a list of `Bio` requests and provides functionality to +/// wait for their completion and retrieve their statuses. +#[must_use] +pub struct BioWaiter { + bios: Vec>, +} + +impl BioWaiter { + /// Constructs a new `BioWaiter` instance with no `Bio` requests. + pub fn new() -> Self { + Self { bios: Vec::new() } + } + + /// Returns the number of `Bio` requests associated with `self`. + pub fn nreqs(&self) -> usize { + self.bios.len() + } + + /// Gets the `index`-th `Bio` request associated with `self`. + /// + /// # Panic + /// + /// If the `index` is out of bounds, this method will panic. + pub fn req(&self, index: usize) -> Bio { + Bio(self.bios[index].clone()) + } + + /// Returns the status of the `index`-th `Bio` request associated with `self`. + /// + /// # Panic + /// + /// If the `index` is out of bounds, this method will panic. + pub fn status(&self, index: usize) -> BioStatus { + self.bios[index].status() + } + + /// Merges the `Bio` requests from another `BioWaiter` into this one. + /// + /// The another `BioWaiter`'s `Bio` requests are appended to the end of + /// the `Bio` list of `self`, effectively concatenating the two lists. + pub fn concat(&mut self, mut other: Self) { + self.bios.append(&mut other.bios); + } + + /// Waits for the completion of all `Bio` requests. + /// + /// This method iterates through each `Bio` in the list, waiting for their + /// completion. + /// + /// The return value is an option indicating whether all the requests in the list + /// have successfully completed. + /// On success this value is guaranteed to be equal to `Some(BioStatus::Complete)`. + pub fn wait(&self) -> Option { + let mut ret = Some(BioStatus::Complete); + + for bio in self.bios.iter() { + let status = bio.wait_queue.wait_until(|| { + let status = bio.status(); + if status != BioStatus::Submit { + Some(status) + } else { + None + } + }); + if status != BioStatus::Complete && ret.is_some() { + ret = None; + } + } + + ret + } +} + +impl Default for BioWaiter { + fn default() -> Self { + Self::new() + } +} + +/// A submitted `Bio` object. +/// +/// The request queue of block device only accepts a `SubmittedBio` into the queue. +pub struct SubmittedBio(Arc); + +impl SubmittedBio { + /// Returns the type. + pub fn type_(&self) -> BioType { + self.0.type_() + } + + /// Returns the range of target sectors on the device. + pub fn sid_range(&self) -> &Range { + self.0.sid_range() + } + + /// Returns the slice to the memory segments. + pub fn segments(&self) -> &[BioSegment] { + self.0.segments() + } + + /// Returns the status. + pub fn status(&self) -> BioStatus { + self.0.status() + } + + /// Completes the `Bio` with the `status` and invokes the callback function. + /// + /// When the driver finishes the request for this `Bio`, it will call this method. + pub fn complete(&self, status: BioStatus) { + assert!(status != BioStatus::Init && status != BioStatus::Submit); + + // Set the status. + let result = self.0.status.compare_exchange( + BioStatus::Submit as u32, + status as u32, + Ordering::Release, + Ordering::Relaxed, + ); + assert!(result.is_ok()); + + self.0.wait_queue.wake_all(); + if let Some(complete_fn) = self.0.complete_fn { + complete_fn(self); + } + } +} + +/// The common inner part of `Bio`. +struct BioInner { + /// The type of the I/O + type_: BioType, + /// The range of the sector id on device + sid_range: Range, + /// The memory segments in this `Bio` + segments: Vec, + /// The I/O completion method + complete_fn: Option, + /// The I/O status + status: AtomicU32, + /// The wait queue for I/O completion + wait_queue: WaitQueue, +} + +impl BioInner { + pub fn type_(&self) -> BioType { + self.type_ + } + + pub fn sid_range(&self) -> &Range { + &self.sid_range + } + + pub fn segments(&self) -> &[BioSegment] { + &self.segments + } + + pub fn status(&self) -> BioStatus { + BioStatus::try_from(self.status.load(Ordering::Relaxed)).unwrap() + } +} + +/// The type of `Bio`. +#[derive(Clone, Copy, Debug, PartialEq, TryFromInt)] +#[repr(u8)] +pub enum BioType { + /// Read sectors from the device. + Read = 0, + /// Write sectors into the device. + Write = 1, + /// Flush the volatile write cache. + Flush = 2, + /// Discard sectors. + Discard = 3, +} + +/// The status of `Bio`. +#[derive(Clone, Copy, PartialEq, Eq, Debug, TryFromInt)] +#[repr(u32)] +pub enum BioStatus { + /// The initial status for a newly created `Bio`. + Init = 0, + /// After a `Bio` is submitted, its status will be changed to "Submit". + Submit = 1, + /// The I/O operation has been successfully completed. + Complete = 2, + /// The I/O operation is not supported. + NotSupported = 3, + /// Insufficient space is available to perform the I/O operation. + NoSpace = 4, + /// An error occurred while doing I/O. + IoError = 5, +} + +/// `BioSegment` is a smallest memory unit in block I/O. +/// +/// It is a contiguous memory region that contains multiple sectors. +#[derive(Debug, Clone)] +pub struct BioSegment { + /// The contiguous pages on which this segment resides. + pages: Pages, + /// The offset (in bytes) relative to the first page. + offset: AlignedUsize, + // The length (in bytes), may cross pages. + len: AlignedUsize, +} + +const SECTOR_SIZE: u16 = super::SECTOR_SIZE as u16; + +#[derive(Debug, Clone)] +enum Pages { + Frame(VmFrame), + Segment(VmSegment), +} + +impl<'a> BioSegment { + /// Constructs a new `BioSegment` from `VmSegment`. + pub fn from_segment(segment: VmSegment, offset: usize, len: usize) -> Self { + assert!(offset + len <= segment.nbytes()); + + Self { + pages: Pages::Segment(segment), + offset: AlignedUsize::::new(offset).unwrap(), + len: AlignedUsize::::new(len).unwrap(), + } + } + + /// Constructs a new `BioSegment` from `VmFrame`. + pub fn from_frame(frame: VmFrame, offset: usize, len: usize) -> Self { + assert!(offset + len <= super::BLOCK_SIZE); + + Self { + pages: Pages::Frame(frame), + offset: AlignedUsize::::new(offset).unwrap(), + len: AlignedUsize::::new(len).unwrap(), + } + } + + /// Returns the number of sectors. + pub fn nsectors(&self) -> Sid { + Sid::from_offset(self.len.value()) + } + + /// Returns the number of bytes. + pub fn nbytes(&self) -> usize { + self.len.value() + } + + /// Returns a reader to read data from it. + pub fn reader(&'a self) -> VmReader<'a> { + let reader = match &self.pages { + Pages::Segment(segment) => segment.reader(), + Pages::Frame(frame) => frame.reader(), + }; + reader.skip(self.offset.value()).limit(self.len.value()) + } + + /// Returns a writer to write data into it. + pub fn writer(&'a self) -> VmWriter<'a> { + let writer = match &self.pages { + Pages::Segment(segment) => segment.writer(), + Pages::Frame(frame) => frame.writer(), + }; + writer.skip(self.offset.value()).limit(self.len.value()) + } +} + +/// An aligned unsigned integer number. +/// +/// An instance of `AlignedUsize` is guaranteed to have a value that is a multiple +/// of `N`, a predetermined const value. It is preferable to express an unsigned integer value +/// in type `AlignedUsize<_>` instead of `usize` if the value must satisfy an alignment requirement. +/// This helps readability and prevents bugs. +/// +/// # Examples +/// +/// ```rust +/// const SECTOR_SIZE: u16 = 512; +/// +/// let sector_num = 1234; // The 1234-th sector +/// let sector_offset: AlignedUsize = { +/// let sector_offset = sector_num * (SECTOR_SIZE as usize); +/// AlignedUsize::::new(sector_offset).unwrap() +/// }; +/// assert!(sector_offset.value() % sector_offset.align() == 0); +/// ``` +/// +/// # Limitation +/// +/// Currently, the alignment const value must be expressed in `u16`; +/// it is not possible to use a larger or smaller type. +/// This limitation is inherited from that of Rust's const generics: +/// your code can be generic over the _value_ of a const, but not the _type_ of the const. +/// We choose `u16` because it is reasonably large to represent any alignment value +/// used in practice. +#[derive(Debug, Clone)] +pub struct AlignedUsize(usize); + +impl AlignedUsize { + /// Constructs a new instance of aligned integer if the given value is aligned. + pub fn new(val: usize) -> Option { + if val % (N as usize) == 0 { + Some(Self(val)) + } else { + None + } + } + + /// Returns the value. + pub fn value(&self) -> usize { + self.0 + } + + /// Returns the corresponding ID. + /// + /// The so-called "ID" of an aligned integer is defined to be `self.value() / self.align()`. + /// This value is named ID because one common use case is using `Aligned` to express + /// the byte offset of a sector, block, or page. In this case, the `id` method returns + /// the ID of the corresponding sector, block, or page. + pub fn id(&self) -> usize { + self.value() / self.align() + } + + /// Returns the alignment. + pub fn align(&self) -> usize { + N as usize + } +} diff --git a/services/comps/block/src/id.rs b/services/comps/block/src/id.rs new file mode 100644 index 000000000..a193cfd33 --- /dev/null +++ b/services/comps/block/src/id.rs @@ -0,0 +1,100 @@ +use core::{ + iter::Step, + ops::{Add, Sub}, +}; +use pod::Pod; +use static_assertions::const_assert; + +/// The block index used in the filesystem. +pub type Bid = BlockId; +/// The sector index used in the device. +pub type Sid = BlockId; + +impl From for Sid { + fn from(bid: Bid) -> Self { + Self::new(bid.to_raw() * (BLOCK_SIZE / SECTOR_SIZE) as u64) + } +} + +const BLOCK_SIZE: u16 = super::BLOCK_SIZE as u16; +const SECTOR_SIZE: u16 = super::SECTOR_SIZE as u16; +const_assert!(BLOCK_SIZE / SECTOR_SIZE >= 1); + +/// An index of a block. +/// +/// The `BlockId` is a generic type that is parameterized by a constant `N`, which +/// represents the size of each block in bytes. The `BlockId<_>` provides a type-safe way of handling +/// block indices. +/// An Instance of `BlockId<_>` is guaranteed to represent valid block index, derived from byte offset +/// and the specified block size `N`. +/// +/// # Examples +/// +/// ```rust +/// const BLOCK_SIZE: u16 = 512; +/// +/// let bytes_offset = 2048; +/// let block_id = BlockId::from_offset(bytes_offset); +/// assert!(block_id == (bytes_offset / BLOCK_SIZE)); +/// ``` +/// +/// # Limitation +/// +/// Currently, the block size is expressed in `u16`. We choose `u16` because +/// it is reasonably large to represent the common block size used in practice. +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Pod)] +pub struct BlockId(u64); + +impl BlockId { + /// Constructs an id from a raw id. + pub const fn new(raw_id: u64) -> Self { + Self(raw_id) + } + + /// Constructs an id from a byte offset. + pub const fn from_offset(offset: usize) -> Self { + Self((offset / (N as usize)) as _) + } + + /// Converts to a byte offset. + pub fn to_offset(self) -> usize { + (self.0 as usize) * (N as usize) + } + + /// Converts to raw id. + pub fn to_raw(self) -> u64 { + self.0 + } +} + +impl Add for BlockId { + type Output = Self; + + fn add(self, other: u64) -> Self::Output { + Self(self.0 + other) + } +} + +impl Sub for BlockId { + type Output = Self; + + fn sub(self, other: u64) -> Self::Output { + Self(self.0 - other) + } +} + +/// Implements the `Step` trait to iterate over `Range`. +impl Step for BlockId { + fn steps_between(start: &Self, end: &Self) -> Option { + u64::steps_between(&start.0, &end.0) + } + + fn forward_checked(start: Self, count: usize) -> Option { + u64::forward_checked(start.0, count).map(Self::new) + } + + fn backward_checked(start: Self, count: usize) -> Option { + u64::backward_checked(start.0, count).map(Self::new) + } +} diff --git a/services/comps/block/src/impl_block_device.rs b/services/comps/block/src/impl_block_device.rs new file mode 100644 index 000000000..6bc00e4d2 --- /dev/null +++ b/services/comps/block/src/impl_block_device.rs @@ -0,0 +1,237 @@ +use crate::prelude::*; + +use super::{ + bio::{Bio, BioEnqueueError, BioSegment, BioStatus, BioType, BioWaiter, SubmittedBio}, + id::{Bid, Sid}, + BlockDevice, BLOCK_SIZE, SECTOR_SIZE, +}; + +use aster_frame::vm::{VmAllocOptions, VmFrame, VmIo, VmSegment}; + +/// Implements several commonly used APIs for the block device to conveniently +/// read and write block(s). +impl dyn BlockDevice { + /// Synchronously reads contiguous blocks starting from the `bid`. + pub fn read_blocks_sync( + &self, + bid: Bid, + segment: &VmSegment, + ) -> Result { + let bio = create_bio_from_segment(BioType::Read, bid, segment); + let status = bio.submit_sync(self)?; + Ok(status) + } + + /// Asynchronously reads contiguous blocks starting from the `bid`. + pub fn read_blocks(&self, bid: Bid, segment: &VmSegment) -> Result { + let bio = create_bio_from_segment(BioType::Read, bid, segment); + bio.submit(self) + } + + /// Synchronously reads one block indicated by the `bid`. + pub fn read_block_sync(&self, bid: Bid, frame: &VmFrame) -> Result { + let bio = create_bio_from_frame(BioType::Read, bid, frame); + let status = bio.submit_sync(self)?; + Ok(status) + } + + /// Asynchronously reads one block indicated by the `bid`. + pub fn read_block(&self, bid: Bid, frame: &VmFrame) -> Result { + let bio = create_bio_from_frame(BioType::Read, bid, frame); + bio.submit(self) + } + + /// Synchronously writes contiguous blocks starting from the `bid`. + pub fn write_blocks_sync( + &self, + bid: Bid, + segment: &VmSegment, + ) -> Result { + let bio = create_bio_from_segment(BioType::Write, bid, segment); + let status = bio.submit_sync(self)?; + Ok(status) + } + + /// Asynchronously writes contiguous blocks starting from the `bid`. + pub fn write_blocks( + &self, + bid: Bid, + segment: &VmSegment, + ) -> Result { + let bio = create_bio_from_segment(BioType::Write, bid, segment); + bio.submit(self) + } + + /// Synchronously writes one block indicated by the `bid`. + pub fn write_block_sync( + &self, + bid: Bid, + frame: &VmFrame, + ) -> Result { + let bio = create_bio_from_frame(BioType::Write, bid, frame); + let status = bio.submit_sync(self)?; + Ok(status) + } + + /// Asynchronously writes one block indicated by the `bid`. + pub fn write_block(&self, bid: Bid, frame: &VmFrame) -> Result { + let bio = create_bio_from_frame(BioType::Write, bid, frame); + bio.submit(self) + } +} + +impl VmIo for dyn BlockDevice { + /// Reads consecutive bytes of several sectors in size. + fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> aster_frame::Result<()> { + if offset % SECTOR_SIZE != 0 || buf.len() % SECTOR_SIZE != 0 { + return Err(aster_frame::Error::InvalidArgs); + } + if buf.is_empty() { + return Ok(()); + } + + let (bio, bio_segment) = { + let num_blocks = { + let first = Bid::from_offset(offset).to_raw(); + let last = Bid::from_offset(offset + buf.len() - 1).to_raw(); + last - first + 1 + }; + let segment = VmAllocOptions::new(num_blocks as usize) + .uninit(true) + .is_contiguous(true) + .alloc_contiguous()?; + let bio_segment = BioSegment::from_segment(segment, offset % BLOCK_SIZE, buf.len()); + + ( + Bio::new( + BioType::Read, + Sid::from_offset(offset), + vec![bio_segment.clone()], + None, + ), + bio_segment, + ) + }; + + let status = bio.submit_sync(self)?; + match status { + BioStatus::Complete => { + let _ = bio_segment.reader().read(&mut buf.into()); + Ok(()) + } + _ => Err(aster_frame::Error::IoError), + } + } + + /// Writes consecutive bytes of several sectors in size. + fn write_bytes(&self, offset: usize, buf: &[u8]) -> aster_frame::Result<()> { + if offset % SECTOR_SIZE != 0 || buf.len() % SECTOR_SIZE != 0 { + return Err(aster_frame::Error::InvalidArgs); + } + if buf.is_empty() { + return Ok(()); + } + + let bio = { + let num_blocks = { + let first = Bid::from_offset(offset).to_raw(); + let last = Bid::from_offset(offset + buf.len() - 1).to_raw(); + last - first + 1 + }; + let segment = VmAllocOptions::new(num_blocks as usize) + .uninit(true) + .is_contiguous(true) + .alloc_contiguous()?; + segment.write_bytes(offset % BLOCK_SIZE, buf)?; + let len = segment + .writer() + .skip(offset % BLOCK_SIZE) + .write(&mut buf.into()); + let bio_segment = BioSegment::from_segment(segment, offset % BLOCK_SIZE, len); + Bio::new( + BioType::Write, + Sid::from_offset(offset), + vec![bio_segment], + None, + ) + }; + + let status = bio.submit_sync(self)?; + match status { + BioStatus::Complete => Ok(()), + _ => Err(aster_frame::Error::IoError), + } + } +} + +impl dyn BlockDevice { + /// Asynchronously writes consecutive bytes of several sectors in size. + pub fn write_bytes_async(&self, offset: usize, buf: &[u8]) -> aster_frame::Result { + if offset % SECTOR_SIZE != 0 || buf.len() % SECTOR_SIZE != 0 { + return Err(aster_frame::Error::InvalidArgs); + } + if buf.is_empty() { + return Ok(BioWaiter::new()); + } + + let bio = { + let num_blocks = { + let first = Bid::from_offset(offset).to_raw(); + let last = Bid::from_offset(offset + buf.len() - 1).to_raw(); + last - first + 1 + }; + let segment = VmAllocOptions::new(num_blocks as usize) + .uninit(true) + .is_contiguous(true) + .alloc_contiguous()?; + segment.write_bytes(offset % BLOCK_SIZE, buf)?; + let len = segment + .writer() + .skip(offset % BLOCK_SIZE) + .write(&mut buf.into()); + let bio_segment = BioSegment::from_segment(segment, offset % BLOCK_SIZE, len); + Bio::new( + BioType::Write, + Sid::from_offset(offset), + vec![bio_segment], + Some(general_complete_fn), + ) + }; + + let complete = bio.submit(self)?; + Ok(complete) + } +} + +// TODO: Maybe we should have a builder for `Bio`. +fn create_bio_from_segment(type_: BioType, bid: Bid, segment: &VmSegment) -> Bio { + let bio_segment = BioSegment::from_segment(segment.clone(), 0, segment.nbytes()); + Bio::new( + type_, + Sid::from(bid), + vec![bio_segment], + Some(general_complete_fn), + ) +} + +// TODO: Maybe we should have a builder for `Bio`. +fn create_bio_from_frame(type_: BioType, bid: Bid, frame: &VmFrame) -> Bio { + let bio_segment = BioSegment::from_frame(frame.clone(), 0, BLOCK_SIZE); + Bio::new( + type_, + Sid::from(bid), + vec![bio_segment], + Some(general_complete_fn), + ) +} + +fn general_complete_fn(bio: &SubmittedBio) { + match bio.status() { + BioStatus::Complete => (), + err_status => log::error!( + "faild to do {:?} on the device with error status: {:?}", + bio.type_(), + err_status + ), + } +} diff --git a/services/comps/block/src/lib.rs b/services/comps/block/src/lib.rs index 1c1c8a055..5f40d994c 100644 --- a/services/comps/block/src/lib.rs +++ b/services/comps/block/src/lib.rs @@ -1,32 +1,67 @@ //! The block devices of Asterinas. +//! +//!This crate provides a number of base components for block devices, including +//! an abstraction of block devices, as well as the registration and lookup of block devices. +//! +//! Block devices use a queue-based model for asynchronous I/O operations. It is necessary +//! for a block device to maintain a queue to handle I/O requests. The users (e.g., fs) +//! submit I/O requests to this queue and wait for their completion. Drivers implementing +//! block devices can create their own queues as needed, with the possibility to reorder +//! and merge requests within the queue. +//! +//! This crate also offers the `Bio` related data structures and APIs to accomplish +//! safe and convenient block I/O operations, for exmaple: +//! +//! ```no_run +//! // Creates a bio request. +//! let bio = Bio::new(BioType::Write, sid, segments, None); +//! // Submits to the block device. +//! let bio_waiter = bio.submit(block_device)?; +//! // Waits for the the completion. +//! let Some(status) = bio_waiter.wait() else { +//! return Err(IoError); +//! }; +//! assert!(status == BioStatus::Complete); +//! ``` +//! #![no_std] #![forbid(unsafe_code)] #![feature(fn_traits)] +#![feature(step_trait)] +#![feature(trait_upcasting)] +#![allow(dead_code)] extern crate alloc; -use core::any::Any; -use core::fmt::Debug; +pub mod bio; +pub mod id; +mod impl_block_device; +mod prelude; +pub mod request_queue; + +use self::{prelude::*, request_queue::BioRequestQueue}; -use alloc::collections::BTreeMap; -use alloc::string::String; -use alloc::sync::Arc; -use alloc::vec::Vec; use aster_frame::sync::SpinLock; -use aster_frame::vm::VmReader; -use aster_frame::vm::VmWriter; use component::init_component; use component::ComponentInitError; + use spin::Once; -pub const BLK_SIZE: usize = 512; +pub const BLOCK_SIZE: usize = aster_frame::config::PAGE_SIZE; +pub const SECTOR_SIZE: usize = 512; pub trait BlockDevice: Send + Sync + Any + Debug { - fn read_block(&self, block_id: usize, buf: &[VmWriter]); - fn write_block(&self, block_id: usize, buf: &[VmReader]); + /// Returns this block device's request queue, to which block I/O requests may be submitted. + fn request_queue(&self) -> &dyn BioRequestQueue; fn handle_irq(&self); } +impl dyn BlockDevice { + pub fn downcast_ref(&self) -> Option<&T> { + (self as &dyn Any).downcast_ref::() + } +} + pub fn register_device(name: String, device: Arc) { COMPONENT .get() diff --git a/services/comps/block/src/prelude.rs b/services/comps/block/src/prelude.rs new file mode 100644 index 000000000..dca1f8ff2 --- /dev/null +++ b/services/comps/block/src/prelude.rs @@ -0,0 +1,9 @@ +pub(crate) use alloc::collections::{BTreeMap, VecDeque}; +pub(crate) use alloc::string::String; +pub(crate) use alloc::sync::Arc; +pub(crate) use alloc::vec; +pub(crate) use alloc::vec::Vec; +pub(crate) use core::any::Any; +pub(crate) use core::fmt::Debug; +pub(crate) use core::ops::Range; +pub(crate) use core::sync::atomic::{AtomicU32, Ordering}; diff --git a/services/comps/block/src/request_queue.rs b/services/comps/block/src/request_queue.rs new file mode 100644 index 000000000..d0403e9c4 --- /dev/null +++ b/services/comps/block/src/request_queue.rs @@ -0,0 +1,93 @@ +use crate::prelude::*; + +use super::{ + bio::{BioEnqueueError, BioType, SubmittedBio}, + id::Sid, +}; + +/// Represents the software staging queue for the `BioRequest` objects. +pub trait BioRequestQueue { + /// Enqueues a `SubmittedBio` to this queue. + /// + /// This `SubmittedBio` will be merged into an existing `BioRequest`, or a new + /// `BioRequest` will be created from the `SubmittedBio` before being placed + /// into the queue. + /// + /// This method will wake up the waiter if a new `BioRequest` is enqueued. + fn enqueue(&self, bio: SubmittedBio) -> Result<(), BioEnqueueError>; + + /// Dequeues a `BioRequest` from this queue. + /// + /// This method will wait until one request can be retrieved. + fn dequeue(&self) -> BioRequest; +} + +/// The block I/O request. +pub struct BioRequest { + /// The type of the I/O + type_: BioType, + /// The range of target sectors on the device + sid_range: Range, + /// The submitted bios + bios: VecDeque, +} + +impl BioRequest { + /// Returns the type of the I/O. + pub fn type_(&self) -> BioType { + self.type_ + } + + /// Returns the range of sector id on device. + pub fn sid_range(&self) -> &Range { + &self.sid_range + } + + /// Returns an iterator to the `SubmittedBio`s. + pub fn bios(&self) -> impl Iterator { + self.bios.iter() + } + + /// Returns `true` if can merge the `SubmittedBio`, `false` otherwise. + pub fn can_merge(&self, rq_bio: &SubmittedBio) -> bool { + if rq_bio.type_() != self.type_ { + return false; + } + + rq_bio.sid_range().start == self.sid_range.end + || rq_bio.sid_range().end == self.sid_range.start + } + + /// Merges the `SubmittedBio` into this request. + /// + /// The merged `SubmittedBio` can only be placed at the front or back. + /// + /// # Panic + /// + /// If the `SubmittedBio` can not be merged, this method will panic. + pub fn merge_bio(&mut self, rq_bio: SubmittedBio) { + assert!(self.can_merge(&rq_bio)); + + if rq_bio.sid_range().start == self.sid_range.end { + self.sid_range.end = rq_bio.sid_range().end; + self.bios.push_back(rq_bio); + } else { + self.sid_range.start = rq_bio.sid_range().start; + self.bios.push_front(rq_bio); + } + } +} + +impl From for BioRequest { + fn from(bio: SubmittedBio) -> Self { + Self { + type_: bio.type_(), + sid_range: bio.sid_range().clone(), + bios: { + let mut bios = VecDeque::with_capacity(1); + bios.push_front(bio); + bios + }, + } + } +} diff --git a/services/comps/virtio/src/device/block/device.rs b/services/comps/virtio/src/device/block/device.rs index 181265eaf..6ff7cf7d6 100644 --- a/services/comps/virtio/src/device/block/device.rs +++ b/services/comps/virtio/src/device/block/device.rs @@ -1,28 +1,132 @@ -use core::{hint::spin_loop, mem::size_of}; +use core::{ + fmt::Debug, + hint::spin_loop, + mem::size_of, + sync::atomic::{AtomicUsize, Ordering}, +}; -use alloc::{boxed::Box, string::ToString, sync::Arc}; -use alloc::{boxed::Box, string::ToString, sync::Arc, vec::Vec}; +use alloc::{boxed::Box, collections::VecDeque, string::ToString, sync::Arc, vec::Vec}; +use aster_block::{ + bio::{BioEnqueueError, BioStatus, BioType, SubmittedBio}, + id::Sid, + request_queue::{BioRequest, BioRequestQueue}, +}; use aster_frame::{ io_mem::IoMem, sync::SpinLock, + sync::{Mutex, WaitQueue}, trap::TrapFrame, vm::{VmAllocOptions, VmFrame, VmIo, VmReader, VmWriter}, }; use aster_util::safe_ptr::SafePtr; use log::info; +use pod::Pod; use crate::{ - device::block::{BlkReq, BlkResp, ReqType, RespStatus}, + device::block::{ReqType, RespStatus}, device::VirtioDeviceError, queue::VirtQueue, transport::VirtioTransport, }; -use super::{BlkFeatures, VirtioBlkConfig}; +use super::{BlockFeatures, VirtioBlockConfig}; #[derive(Debug)] pub struct BlockDevice { - config: SafePtr, + device: DeviceInner, + /// The software staging queue. + queue: BioSingleQueue, +} + +impl BlockDevice { + /// Creates a new VirtIO-Block driver and registers it. + pub(crate) fn init(transport: Box) -> Result<(), VirtioDeviceError> { + let block_device = { + let device = DeviceInner::init(transport)?; + Self { + device, + queue: BioSingleQueue::new(), + } + }; + aster_block::register_device(super::DEVICE_NAME.to_string(), Arc::new(block_device)); + Ok(()) + } + + /// Dequeues a `BioRequest` from the software staging queue and + /// processes the request. + /// + /// TODO: Current read and write operations are still synchronous, + /// it needs to be modified to use the queue-based asynchronous programming pattern. + pub fn handle_requests(&self) { + let request = self.queue.dequeue(); + match request.type_() { + BioType::Read => self.do_read(&request), + BioType::Write => self.do_write(&request), + BioType::Flush | BioType::Discard => todo!(), + } + } + + fn do_read(&self, request: &BioRequest) { + let start_sid = request.sid_range().start; + + let writers = { + let mut writers = Vec::new(); + for bio in request.bios() { + for segment in bio.segments() { + writers.push(segment.writer()); + } + } + writers + }; + + self.device.read(start_sid, writers.as_slice()); + + for bio in request.bios() { + bio.complete(BioStatus::Complete); + } + } + + fn do_write(&self, request: &BioRequest) { + let start_sid = request.sid_range().start; + + let readers = { + let mut readers = Vec::new(); + for bio in request.bios() { + for segment in bio.segments() { + readers.push(segment.reader()); + } + } + readers + }; + + self.device.write(start_sid, readers.as_slice()); + + for bio in request.bios() { + bio.complete(BioStatus::Complete); + } + } + + /// Negotiate features for the device specified bits 0~23 + pub(crate) fn negotiate_features(features: u64) -> u64 { + let feature = BlockFeatures::from_bits(features).unwrap(); + let support_features = BlockFeatures::from_bits(features).unwrap(); + (feature & support_features).bits + } +} + +impl aster_block::BlockDevice for BlockDevice { + fn request_queue(&self) -> &dyn BioRequestQueue { + &self.queue + } + + fn handle_irq(&self) { + info!("Virtio block device handle irq"); + } +} + +#[derive(Debug)] +struct DeviceInner { + config: SafePtr, queue: SpinLock, transport: Box, /// Block requests, we use VmFrame to store the requests so that @@ -34,109 +138,10 @@ pub struct BlockDevice { id_allocator: SpinLock>, } -impl BlockDevice { - /// read data from block device, this function is blocking - /// FIEME: replace slice with a more secure data structure to use dma mapping. - pub fn read(&self, block_id: usize, buf: &[VmWriter]) { - // FIXME: Handling cases without id. - let id = self.id_allocator.lock().pop().unwrap() as usize; - let req = BlkReq { - type_: ReqType::In as _, - reserved: 0, - sector: block_id as u64, - }; - let resp = BlkResp::default(); - self.block_requests - .write_val(id * size_of::(), &req) - .unwrap(); - self.block_responses - .write_val(id * size_of::(), &resp) - .unwrap(); - let req = self - .block_requests - .reader() - .skip(id * size_of::()) - .limit(size_of::()); - let resp = self - .block_responses - .writer() - .skip(id * size_of::()) - .limit(size_of::()); - - let mut outputs: Vec<&VmWriter<'_>> = buf.iter().collect(); - outputs.push(&resp); - let mut queue = self.queue.lock_irq_disabled(); - let token = queue - .add_vm(&[&req], outputs.as_slice()) - .expect("add queue failed"); - queue.notify(); - while !queue.can_pop() { - spin_loop(); - } - queue.pop_used_with_token(token).expect("pop used failed"); - let resp: BlkResp = self - .block_responses - .read_val(id * size_of::()) - .unwrap(); - self.id_allocator.lock().push(id as u8); - match RespStatus::try_from(resp.status).unwrap() { - RespStatus::Ok => {} - _ => panic!("io error in block device"), - }; - } - /// write data to block device, this function is blocking - /// FIEME: replace slice with a more secure data structure to use dma mapping. - pub fn write(&self, block_id: usize, buf: &[VmReader]) { - // FIXME: Handling cases without id. - let id = self.id_allocator.lock().pop().unwrap() as usize; - let req = BlkReq { - type_: ReqType::Out as _, - reserved: 0, - sector: block_id as u64, - }; - let resp = BlkResp::default(); - self.block_requests - .write_val(id * size_of::(), &req) - .unwrap(); - self.block_responses - .write_val(id * size_of::(), &resp) - .unwrap(); - let req = self - .block_requests - .reader() - .skip(id * size_of::()) - .limit(size_of::()); - let resp = self - .block_responses - .writer() - .skip(id * size_of::()) - .limit(size_of::()); - - let mut queue = self.queue.lock_irq_disabled(); - let mut inputs: Vec<&VmReader<'_>> = buf.iter().collect(); - inputs.insert(0, &req); - let token = queue - .add_vm(inputs.as_slice(), &[&resp]) - .expect("add queue failed"); - queue.notify(); - while !queue.can_pop() { - spin_loop(); - } - queue.pop_used_with_token(token).expect("pop used failed"); - let resp: BlkResp = self - .block_responses - .read_val(id * size_of::()) - .unwrap(); - self.id_allocator.lock().push(id as u8); - match RespStatus::try_from(resp.status).unwrap() { - RespStatus::Ok => {} - _ => panic!("io error in block device:{:?}", resp.status), - }; - } - - /// Create a new VirtIO-Block driver. - pub(crate) fn init(mut transport: Box) -> Result<(), VirtioDeviceError> { - let config = VirtioBlkConfig::new(transport.as_mut()); +impl DeviceInner { + /// Creates and inits the device. + pub fn init(mut transport: Box) -> Result { + let config = VirtioBlockConfig::new(transport.as_mut()); let num_queues = transport.num_queues(); if num_queues != 1 { return Err(VirtioDeviceError::QueuesAmountDoNotMatch(num_queues, 1)); @@ -170,30 +175,225 @@ impl BlockDevice { info!("Virtio block device config space change"); } device.transport.finish_init(); + Ok(device) + } - aster_block::register_device(super::DEVICE_NAME.to_string(), Arc::new(device)); + /// Reads data from the block device, this function is blocking. + /// FIEME: replace slice with a more secure data structure to use dma mapping. + pub fn read(&self, sector_id: Sid, buf: &[VmWriter]) { + // FIXME: Handling cases without id. + let id = self.id_allocator.lock().pop().unwrap() as usize; + let req = BlockReq { + type_: ReqType::In as _, + reserved: 0, + sector: sector_id.to_raw(), + }; + let resp = BlockResp::default(); + self.block_requests + .write_val(id * size_of::(), &req) + .unwrap(); + self.block_responses + .write_val(id * size_of::(), &resp) + .unwrap(); + let req_reader = self + .block_requests + .reader() + .skip(id * size_of::()) + .limit(size_of::()); + let resp_writer = self + .block_responses + .writer() + .skip(id * size_of::()) + .limit(size_of::()); + let mut outputs: Vec<&VmWriter<'_>> = buf.iter().collect(); + outputs.push(&resp_writer); + let mut queue = self.queue.lock_irq_disabled(); + let token = queue + .add_vm(&[&req_reader], outputs.as_slice()) + .expect("add queue failed"); + queue.notify(); + while !queue.can_pop() { + spin_loop(); + } + queue.pop_used_with_token(token).expect("pop used failed"); + let resp: BlockResp = self + .block_responses + .read_val(id * size_of::()) + .unwrap(); + self.id_allocator.lock().push(id as u8); + match RespStatus::try_from(resp.status).unwrap() { + RespStatus::Ok => {} + _ => panic!("io error in block device"), + }; + } + + /// Writes data to the block device, this function is blocking. + /// FIEME: replace slice with a more secure data structure to use dma mapping. + pub fn write(&self, sector_id: Sid, buf: &[VmReader]) { + // FIXME: Handling cases without id. + let id = self.id_allocator.lock().pop().unwrap() as usize; + let req = BlockReq { + type_: ReqType::Out as _, + reserved: 0, + sector: sector_id.to_raw(), + }; + let resp = BlockResp::default(); + self.block_requests + .write_val(id * size_of::(), &req) + .unwrap(); + self.block_responses + .write_val(id * size_of::(), &resp) + .unwrap(); + let req_reader = self + .block_requests + .reader() + .skip(id * size_of::()) + .limit(size_of::()); + let resp_writer = self + .block_responses + .writer() + .skip(id * size_of::()) + .limit(size_of::()); + + let mut queue = self.queue.lock_irq_disabled(); + let mut inputs: Vec<&VmReader<'_>> = buf.iter().collect(); + inputs.insert(0, &req_reader); + let token = queue + .add_vm(inputs.as_slice(), &[&resp_writer]) + .expect("add queue failed"); + queue.notify(); + while !queue.can_pop() { + spin_loop(); + } + queue.pop_used_with_token(token).expect("pop used failed"); + let resp: BlockResp = self + .block_responses + .read_val(id * size_of::()) + .unwrap(); + self.id_allocator.lock().push(id as u8); + match RespStatus::try_from(resp.status).unwrap() { + RespStatus::Ok => {} + _ => panic!("io error in block device:{:?}", resp.status), + }; + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod)] +struct BlockReq { + pub type_: u32, + pub reserved: u32, + pub sector: u64, +} + +/// Response of a VirtIOBlock request. +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod)] +struct BlockResp { + pub status: u8, +} + +impl Default for BlockResp { + fn default() -> Self { + Self { + status: RespStatus::_NotReady as _, + } + } +} + +/// A simple block I/O request queue with a single queue. +/// +/// It is a FIFO producer-consumer queue, where the producer (e.g., filesystem) +/// submits requests to the queue, and the consumer (e.g., block device driver) +/// continuously consumes and processes these requests from the queue. +pub struct BioSingleQueue { + queue: Mutex>, + num_requests: AtomicUsize, + wait_queue: WaitQueue, +} + +impl BioSingleQueue { + /// Creates an empty queue. + pub fn new() -> Self { + Self { + queue: Mutex::new(VecDeque::new()), + num_requests: AtomicUsize::new(0), + wait_queue: WaitQueue::new(), + } + } + + /// Returns the number of requests currently in this queue. + pub fn num_requests(&self) -> usize { + self.num_requests.load(Ordering::Relaxed) + } + + fn dec_num_requests(&self) { + self.num_requests.fetch_sub(1, Ordering::Relaxed); + } + + fn inc_num_requests(&self) { + self.num_requests.fetch_add(1, Ordering::Relaxed); + } +} + +impl Default for BioSingleQueue { + fn default() -> Self { + Self::new() + } +} + +impl Debug for BioSingleQueue { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("BioSingleQueue") + .field("num_requests", &self.num_requests()) + .finish() + } +} + +impl BioRequestQueue for BioSingleQueue { + /// Enqueues a `SubmittedBio` to this queue. + /// + /// When enqueueing the `SubmittedBio`, try to insert it into the last request if the + /// type is same and the sector range is contiguous. + /// Otherwise, creates and inserts a new request for the `SubmittedBio`. + fn enqueue(&self, bio: SubmittedBio) -> Result<(), BioEnqueueError> { + let mut queue = self.queue.lock(); + if let Some(request) = queue.front_mut() { + if request.can_merge(&bio) { + request.merge_bio(bio); + return Ok(()); + } + } + + let new_request = BioRequest::from(bio); + queue.push_front(new_request); + drop(queue); + self.inc_num_requests(); + self.wait_queue.wake_all(); Ok(()) } - /// Negotiate features for the device specified bits 0~23 - pub(crate) fn negotiate_features(features: u64) -> u64 { - let feature = BlkFeatures::from_bits(features).unwrap(); - let support_features = BlkFeatures::from_bits(features).unwrap(); - (feature & support_features).bits - } -} - -impl aster_block::BlockDevice for BlockDevice { - fn read_block(&self, block_id: usize, buf: &[VmWriter]) { - self.read(block_id, buf); - } - - fn write_block(&self, block_id: usize, buf: &[VmReader]) { - self.write(block_id, buf); - } - - fn handle_irq(&self) { - info!("Virtio block device handle irq"); + /// Dequeues a `BioRequest` from this queue. + fn dequeue(&self) -> BioRequest { + let mut num_requests = self.num_requests(); + + loop { + if num_requests > 0 { + if let Some(request) = self.queue.lock().pop_back() { + self.dec_num_requests(); + return request; + } + } + + num_requests = self.wait_queue.wait_until(|| { + let num_requests = self.num_requests(); + if num_requests > 0 { + Some(num_requests) + } else { + None + } + }); + } } } diff --git a/services/comps/virtio/src/device/block/mod.rs b/services/comps/virtio/src/device/block/mod.rs index 62763d723..8473d1cce 100644 --- a/services/comps/virtio/src/device/block/mod.rs +++ b/services/comps/virtio/src/device/block/mod.rs @@ -8,12 +8,11 @@ use pod::Pod; use crate::transport::VirtioTransport; -pub const BLK_SIZE: usize = 512; pub static DEVICE_NAME: &str = "Virtio-Block"; bitflags! { /// features for virtio block device - pub(crate) struct BlkFeatures : u64{ + pub(crate) struct BlockFeatures : u64 { const BARRIER = 1 << 0; const SIZE_MAX = 1 << 1; const SEG_MAX = 1 << 2; @@ -29,29 +28,6 @@ bitflags! { } } -#[repr(C)] -#[derive(Debug, Copy, Clone, Pod)] -pub struct BlkReq { - pub type_: u32, - pub reserved: u32, - pub sector: u64, -} - -/// Response of a VirtIOBlk request. -#[repr(C)] -#[derive(Debug, Copy, Clone, Pod)] -pub struct BlkResp { - pub status: u8, -} - -impl Default for BlkResp { - fn default() -> Self { - BlkResp { - status: RespStatus::_NotReady as _, - } - } -} - #[repr(u32)] #[derive(Debug, Copy, Clone, TryFromInt)] pub enum ReqType { @@ -77,12 +53,12 @@ pub enum RespStatus { #[derive(Debug, Copy, Clone, Pod)] #[repr(C)] -pub struct VirtioBlkConfig { +pub struct VirtioBlockConfig { capacity: u64, size_max: u64, - geometry: VirtioBlkGeometry, + geometry: VirtioBlockGeometry, blk_size: u32, - topology: VirtioBlkTopology, + topology: VirtioBlockTopology, writeback: u8, unused0: [u8; 3], max_discard_sectors: u32, @@ -96,7 +72,7 @@ pub struct VirtioBlkConfig { #[derive(Debug, Copy, Clone, Pod)] #[repr(C)] -pub struct VirtioBlkGeometry { +pub struct VirtioBlockGeometry { cylinders: u16, heads: u8, sectors: u8, @@ -104,14 +80,14 @@ pub struct VirtioBlkGeometry { #[derive(Debug, Copy, Clone, Pod)] #[repr(C)] -pub struct VirtioBlkTopology { +pub struct VirtioBlockTopology { physical_block_exp: u8, alignment_offset: u8, min_io_size: u16, opt_io_size: u32, } -impl VirtioBlkConfig { +impl VirtioBlockConfig { pub(self) fn new(transport: &dyn VirtioTransport) -> SafePtr { let memory = transport.device_config_memory(); SafePtr::new(memory, 0) diff --git a/services/libs/aster-std/Cargo.toml b/services/libs/aster-std/Cargo.toml index e09364dc1..8b36f9f5f 100644 --- a/services/libs/aster-std/Cargo.toml +++ b/services/libs/aster-std/Cargo.toml @@ -64,6 +64,9 @@ log = "0.4" getrandom = { version = "0.2.10", default-features = false, features = [ "rdrand", ] } +bitvec = { version = "1.0", default-features = false, features = ["alloc"] } +static_assertions = "1.1.0" +inherit-methods-macro = { git = "https://github.com/asterinas/inherit-methods-macro", rev = "98f7e3e" } [dependencies.lazy_static] version = "1.0" diff --git a/services/libs/aster-std/src/driver/mod.rs b/services/libs/aster-std/src/driver/mod.rs index d829a0592..2ef474470 100644 --- a/services/libs/aster-std/src/driver/mod.rs +++ b/services/libs/aster-std/src/driver/mod.rs @@ -1,10 +1,3 @@ -use core::mem::size_of; - -use alloc::vec::Vec; -use aster_frame::{ - println, - vm::{VmAllocOptions, VmIo}, -}; use log::info; pub fn init() { @@ -13,43 +6,3 @@ pub fn init() { info!("Found Input device, name:{}", name); } } - -#[allow(unused)] -fn block_device_test() { - for (_, device) in aster_block::all_devices() { - let write_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - let read_frame = VmAllocOptions::new(1).alloc_single().unwrap(); - info!("write_buffer address:{:x}", write_frame.start_paddr()); - info!("read_buffer address:{:x}", read_frame.start_paddr()); - - // init write frame - for i in 0..=8 { - let slice: [u8; 512] = [i; 512]; - write_frame.write_slice(i as usize * 512, &slice); - } - - // Test multiple Writer & Reader - let mut writers = Vec::with_capacity(8); - for i in 0..8 { - let writer = read_frame.writer().skip(i * 512).limit(512); - writers.push(writer); - } - - let mut readers = Vec::with_capacity(8); - for i in 0..8 { - let reader = write_frame.reader().skip(i * 512).limit(512); - readers.push(reader); - } - - device.write_block(0, readers.as_slice()); - device.read_block(0, writers.as_slice()); - let mut read_slice = [0u8; 512]; - let mut write_slice = [0u8; 512]; - for i in 0..8 { - read_frame.read_bytes(i * size_of::<[u8; 512]>(), &mut read_slice); - write_frame.read_bytes(i * size_of::<[u8; 512]>(), &mut write_slice); - assert_eq!(read_slice, write_slice); - } - println!("block device test passed!"); - } -} diff --git a/services/libs/aster-std/src/error.rs b/services/libs/aster-std/src/error.rs index 679bd83a4..7394149b6 100644 --- a/services/libs/aster-std/src/error.rs +++ b/services/libs/aster-std/src/error.rs @@ -191,12 +191,48 @@ impl From for Error { } } +impl From for Error { + fn from(error: aster_block::bio::BioEnqueueError) -> Self { + match error { + aster_block::bio::BioEnqueueError::IsFull => { + Error::with_message(Errno::EBUSY, "The request queue is full") + } + aster_block::bio::BioEnqueueError::Refused => { + Error::with_message(Errno::EBUSY, "Refuse to enqueue the bio") + } + } + } +} + +impl From for Error { + fn from(err_status: aster_block::bio::BioStatus) -> Self { + match err_status { + aster_block::bio::BioStatus::NotSupported => { + Error::with_message(Errno::EIO, "I/O operation is not supported") + } + aster_block::bio::BioStatus::NoSpace => { + Error::with_message(Errno::ENOSPC, "Insufficient space on device") + } + aster_block::bio::BioStatus::IoError => { + Error::with_message(Errno::EIO, "I/O operation fails") + } + status => panic!("Can not convert the status: {:?} to an error", status), + } + } +} + impl From for Error { fn from(_: core::str::Utf8Error) -> Self { Error::with_message(Errno::EINVAL, "Invalid utf-8 string") } } +impl From for Error { + fn from(_: alloc::string::FromUtf8Error) -> Self { + Error::with_message(Errno::EINVAL, "Invalid utf-8 string") + } +} + impl From for Error { fn from(_: core::ffi::FromBytesUntilNulError) -> Self { Error::with_message(Errno::E2BIG, "Cannot find null in cstring") diff --git a/services/libs/aster-std/src/fs/devpts/mod.rs b/services/libs/aster-std/src/fs/devpts/mod.rs index 7b84d3eff..99f75ff86 100644 --- a/services/libs/aster-std/src/fs/devpts/mod.rs +++ b/services/libs/aster-std/src/fs/devpts/mod.rs @@ -6,7 +6,6 @@ use crate::fs::utils::{ }; use crate::prelude::*; -use aster_frame::vm::VmFrame; use aster_util::{id_allocator::IdAlloc, slot_vec::SlotVec}; use core::time::Duration; @@ -140,12 +139,18 @@ impl Inode for RootInode { self.metadata.size } - fn resize(&self, new_size: usize) {} + fn resize(&self, new_size: usize) -> Result<()> { + Err(Error::new(Errno::EISDIR)) + } fn metadata(&self) -> Metadata { self.metadata.clone() } + fn ino(&self) -> u64 { + self.metadata.ino as _ + } + fn type_(&self) -> InodeType { self.metadata.type_ } diff --git a/services/libs/aster-std/src/fs/devpts/ptmx.rs b/services/libs/aster-std/src/fs/devpts/ptmx.rs index c8031d997..398d86adc 100644 --- a/services/libs/aster-std/src/fs/devpts/ptmx.rs +++ b/services/libs/aster-std/src/fs/devpts/ptmx.rs @@ -65,12 +65,18 @@ impl Inode for Ptmx { self.metadata.size } - fn resize(&self, new_size: usize) {} + fn resize(&self, new_size: usize) -> Result<()> { + Ok(()) + } fn metadata(&self) -> Metadata { self.metadata.clone() } + fn ino(&self) -> u64 { + self.metadata.ino as _ + } + fn type_(&self) -> InodeType { self.metadata.type_ } @@ -93,14 +99,6 @@ impl Inode for Ptmx { fn set_mtime(&self, time: Duration) {} - fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Ok(()) - } - - fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Ok(()) - } - fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { Ok(0) } diff --git a/services/libs/aster-std/src/fs/devpts/slave.rs b/services/libs/aster-std/src/fs/devpts/slave.rs index 408bdb15e..6bb34fbfd 100644 --- a/services/libs/aster-std/src/fs/devpts/slave.rs +++ b/services/libs/aster-std/src/fs/devpts/slave.rs @@ -45,12 +45,18 @@ impl Inode for PtySlaveInode { self.metadata.size } - fn resize(&self, new_size: usize) {} + fn resize(&self, new_size: usize) -> Result<()> { + Err(Error::new(Errno::EPERM)) + } fn metadata(&self) -> Metadata { self.metadata.clone() } + fn ino(&self) -> u64 { + self.metadata.ino as _ + } + fn type_(&self) -> InodeType { self.metadata.type_ } @@ -73,14 +79,6 @@ impl Inode for PtySlaveInode { fn set_mtime(&self, time: Duration) {} - fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Ok(()) - } - - fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Ok(()) - } - fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { self.device.read(buf) } diff --git a/services/libs/aster-std/src/fs/ext2/block_group.rs b/services/libs/aster-std/src/fs/ext2/block_group.rs new file mode 100644 index 000000000..1941328df --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/block_group.rs @@ -0,0 +1,476 @@ +use super::fs::Ext2; +use super::inode::{Inode, InodeDesc, RawInode}; +use super::prelude::*; +use super::super_block::SuperBlock; + +use aster_util::id_allocator::IdAlloc; + +/// Blocks are clustered into block groups in order to reduce fragmentation and minimise +/// the amount of head seeking when reading a large amount of consecutive data. +pub(super) struct BlockGroup { + idx: usize, + bg_impl: Arc, + raw_inodes_cache: PageCache, +} + +struct BlockGroupImpl { + inode_table_bid: Bid, + raw_inodes_size: usize, + inner: RwMutex, + fs: Weak, +} + +impl BlockGroup { + /// Loads and constructs a block group. + pub fn load( + group_descriptors_segment: &VmSegment, + idx: usize, + block_device: &dyn BlockDevice, + super_block: &SuperBlock, + fs: Weak, + ) -> Result { + let raw_inodes_size = (super_block.inodes_per_group() as usize) * super_block.inode_size(); + + let bg_impl = { + let metadata = { + let descriptor = { + // Read the block group descriptor + // TODO: if the main is corrupted, should we load the backup? + let offset = idx * core::mem::size_of::(); + let raw_descriptor = group_descriptors_segment + .read_val::(offset) + .unwrap(); + GroupDescriptor::from(raw_descriptor) + }; + + let get_bitmap = |bid: Bid, capacity: usize| -> Result { + if capacity > BLOCK_SIZE * 8 { + return_errno_with_message!(Errno::EINVAL, "bad bitmap"); + } + let mut buf = vec![0u8; BLOCK_SIZE]; + block_device.read_bytes(bid.to_offset(), &mut buf)?; + Ok(IdAlloc::from_bytes_with_capacity(&buf, capacity)) + }; + + let block_bitmap = get_bitmap( + descriptor.block_bitmap_bid, + super_block.blocks_per_group() as usize, + )?; + let inode_bitmap = get_bitmap( + descriptor.inode_bitmap_bid, + super_block.inodes_per_group() as usize, + )?; + + GroupMetadata { + descriptor, + block_bitmap, + inode_bitmap, + } + }; + + Arc::new(BlockGroupImpl { + inode_table_bid: metadata.descriptor.inode_table_bid, + raw_inodes_size, + inner: RwMutex::new(Inner { + metadata: Dirty::new(metadata), + inode_cache: BTreeMap::new(), + }), + fs, + }) + }; + + let raw_inodes_cache = + PageCache::with_capacity(raw_inodes_size, Arc::downgrade(&bg_impl) as _)?; + + Ok(Self { + idx, + bg_impl, + raw_inodes_cache, + }) + } + + /// Finds and returns the inode. + pub fn lookup_inode(&self, inode_idx: u32) -> Result> { + // The fast path + let inner = self.bg_impl.inner.read(); + if !inner.metadata.is_inode_allocated(inode_idx) { + return_errno!(Errno::ENOENT); + } + if let Some(inode) = inner.inode_cache.get(&inode_idx) { + return Ok(inode.clone()); + } + + // The slow path + drop(inner); + let mut inner = self.bg_impl.inner.write(); + if !inner.metadata.is_inode_allocated(inode_idx) { + return_errno!(Errno::ENOENT); + } + if let Some(inode) = inner.inode_cache.get(&inode_idx) { + return Ok(inode.clone()); + } + + // Loads the inode, then inserts it into the inode cache. + let inode = self.load_inode(inode_idx)?; + inner.inode_cache.insert(inode_idx, inode.clone()); + Ok(inode) + } + + /// Loads an existing inode. + /// + /// This method may load the raw inode metadata from block device. + fn load_inode(&self, inode_idx: u32) -> Result> { + let fs = self.fs(); + let raw_inode = { + let offset = (inode_idx as usize) * fs.inode_size(); + self.raw_inodes_cache + .pages() + .read_val::(offset) + .unwrap() + }; + let inode_desc = Dirty::new(InodeDesc::try_from(raw_inode)?); + let ino = inode_idx + self.idx as u32 * fs.inodes_per_group() + 1; + + Ok(Inode::new(ino, self.idx, inode_desc, Arc::downgrade(&fs))) + } + + /// Inserts the inode into the inode cache. + /// + /// # Panic + /// + /// If `inode_idx` has not been allocated before, then the method panics. + pub fn insert_cache(&self, inode_idx: u32, inode: Arc) { + let mut inner = self.bg_impl.inner.write(); + assert!(inner.metadata.is_inode_allocated(inode_idx)); + inner.inode_cache.insert(inode_idx, inode); + } + + /// Allocates and returns an inode index. + pub fn alloc_inode(&self, is_dir: bool) -> Option { + // The fast path + if self.bg_impl.inner.read().metadata.free_inodes_count() == 0 { + return None; + } + + // The slow path + self.bg_impl.inner.write().metadata.alloc_inode(is_dir) + } + + /// Frees the allocated inode idx. + /// + /// # Panic + /// + /// If `inode_idx` has not been allocated before, then the method panics. + pub fn free_inode(&self, inode_idx: u32, is_dir: bool) { + let mut inner = self.bg_impl.inner.write(); + assert!(inner.metadata.is_inode_allocated(inode_idx)); + + inner.metadata.free_inode(inode_idx, is_dir); + inner.inode_cache.remove(&inode_idx); + } + + /// Allocates and returns a block index. + pub fn alloc_block(&self) -> Option { + // The fast path + if self.bg_impl.inner.read().metadata.free_blocks_count() == 0 { + return None; + } + + // The slow path + self.bg_impl.inner.write().metadata.alloc_block() + } + + /// Frees the allocated block idx. + /// + /// # Panic + /// + /// If `block_idx` has not been allocated before, then the method panics. + pub fn free_block(&self, block_idx: u32) { + let mut inner = self.bg_impl.inner.write(); + assert!(inner.metadata.is_block_allocated(block_idx)); + + inner.metadata.free_block(block_idx); + } + + /// Writes back the raw inode metadata to the raw inode metadata cache. + pub fn sync_raw_inode(&self, inode_idx: u32, raw_inode: &RawInode) { + let offset = (inode_idx as usize) * self.fs().inode_size(); + self.raw_inodes_cache + .pages() + .write_val(offset, raw_inode) + .unwrap(); + } + + /// Writes back the metadata of this group. + pub fn sync_metadata(&self, super_block: &SuperBlock) -> Result<()> { + if !self.bg_impl.inner.read().metadata.is_dirty() { + return Ok(()); + } + + let mut inner = self.bg_impl.inner.write(); + let fs = self.fs(); + // Writes back the descriptor. + let raw_descriptor = RawGroupDescriptor::from(&inner.metadata.descriptor); + self.fs().sync_group_descriptor(self.idx, &raw_descriptor)?; + + let mut bio_waiter = BioWaiter::new(); + // Writes back the inode bitmap. + let inode_bitmap_bid = inner.metadata.descriptor.inode_bitmap_bid; + bio_waiter.concat(fs.block_device().write_bytes_async( + inode_bitmap_bid.to_offset(), + inner.metadata.inode_bitmap.as_bytes(), + )?); + + // Writes back the block bitmap. + let block_bitmap_bid = inner.metadata.descriptor.block_bitmap_bid; + bio_waiter.concat(fs.block_device().write_bytes_async( + block_bitmap_bid.to_offset(), + inner.metadata.block_bitmap.as_bytes(), + )?); + + // Waits for the completion of all submitted bios. + bio_waiter.wait().ok_or_else(|| { + Error::with_message(Errno::EIO, "failed to sync metadata of block group") + })?; + + inner.metadata.clear_dirty(); + Ok(()) + } + + /// Writes back all of the cached inodes. + /// + /// The `sync_all` method of inode may modify the data of this block group, + /// so we should not hold the lock while syncing the inodes. + pub fn sync_all_inodes(&self) -> Result<()> { + // Removes the inodes that is unused from the inode cache. + let unused_inodes: Vec> = self + .bg_impl + .inner + .write() + .inode_cache + .extract_if(|_, inode| Arc::strong_count(inode) == 1) + .map(|(_, inode)| inode) + .collect(); + + // Writes back the unused inodes. + for inode in unused_inodes.iter() { + inode.sync_all()?; + } + drop(unused_inodes); + + // Writes back the remaining inodes in the inode cache. + let remaining_inodes: Vec> = self + .bg_impl + .inner + .read() + .inode_cache + .values() + .cloned() + .collect(); + for inode in remaining_inodes.iter() { + inode.sync_all()?; + } + drop(remaining_inodes); + + // Writes back the raw inode metadata. + self.raw_inodes_cache + .pages() + .decommit(0..self.bg_impl.raw_inodes_size)?; + Ok(()) + } + + fn fs(&self) -> Arc { + self.bg_impl.fs.upgrade().unwrap() + } +} + +impl Debug for BlockGroup { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("BlockGroup") + .field("idx", &self.idx) + .field("descriptor", &self.bg_impl.inner.read().metadata.descriptor) + .field( + "block_bitmap", + &self.bg_impl.inner.read().metadata.block_bitmap, + ) + .field( + "inode_bitmap", + &self.bg_impl.inner.read().metadata.inode_bitmap, + ) + .finish() + } +} + +impl PageCacheBackend for BlockGroupImpl { + fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { + let bid = self.inode_table_bid + idx as u64; + self.fs.upgrade().unwrap().read_block(bid, frame)?; + Ok(()) + } + + fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { + let bid = self.inode_table_bid + idx as u64; + self.fs.upgrade().unwrap().write_block(bid, frame)?; + Ok(()) + } + + fn npages(&self) -> usize { + self.raw_inodes_size.div_ceil(BLOCK_SIZE) + } +} + +#[derive(Debug)] +struct Inner { + metadata: Dirty, + inode_cache: BTreeMap>, +} + +#[derive(Clone, Debug)] +struct GroupMetadata { + descriptor: GroupDescriptor, + block_bitmap: IdAlloc, + inode_bitmap: IdAlloc, +} + +impl GroupMetadata { + pub fn is_inode_allocated(&self, inode_idx: u32) -> bool { + self.inode_bitmap.is_allocated(inode_idx as usize) + } + + pub fn alloc_inode(&mut self, is_dir: bool) -> Option { + let Some(inode_idx) = self.inode_bitmap.alloc() else { + return None; + }; + self.dec_free_inodes(); + if is_dir { + self.inc_dirs(); + } + Some(inode_idx as u32) + } + + pub fn free_inode(&mut self, inode_idx: u32, is_dir: bool) { + self.inode_bitmap.free(inode_idx as usize); + self.inc_free_inodes(); + if is_dir { + self.dec_dirs(); + } + } + + pub fn is_block_allocated(&self, block_idx: u32) -> bool { + self.block_bitmap.is_allocated(block_idx as usize) + } + + pub fn alloc_block(&mut self) -> Option { + let Some(block_idx) = self.block_bitmap.alloc() else { + return None; + }; + self.dec_free_blocks(); + Some(block_idx as u32) + } + + pub fn free_block(&mut self, block_idx: u32) { + self.block_bitmap.free(block_idx as usize); + self.inc_free_blocks(); + } + + pub fn free_inodes_count(&self) -> u16 { + self.descriptor.free_inodes_count + } + + pub fn free_blocks_count(&self) -> u16 { + self.descriptor.free_blocks_count + } + + pub fn inc_free_inodes(&mut self) { + self.descriptor.free_inodes_count += 1; + } + + pub fn dec_free_inodes(&mut self) { + debug_assert!(self.descriptor.free_inodes_count > 0); + self.descriptor.free_inodes_count -= 1; + } + + pub fn inc_free_blocks(&mut self) { + self.descriptor.free_blocks_count += 1; + } + + pub fn dec_free_blocks(&mut self) { + debug_assert!(self.descriptor.free_blocks_count > 0); + self.descriptor.free_blocks_count -= 1; + } + + pub fn inc_dirs(&mut self) { + self.descriptor.dirs_count += 1; + } + + pub fn dec_dirs(&mut self) { + debug_assert!(self.descriptor.dirs_count > 0); + self.descriptor.dirs_count -= 1; + } +} + +/// The in-memory rust block group descriptor. +/// +/// The block group descriptor contains information regarding where important data +/// structures for that group are located. +#[derive(Clone, Copy, Debug)] +struct GroupDescriptor { + /// Blocks usage bitmap block + block_bitmap_bid: Bid, + /// Inodes usage bitmap block + inode_bitmap_bid: Bid, + /// Starting block of inode table + inode_table_bid: Bid, + /// Number of free blocks in group + free_blocks_count: u16, + /// Number of free inodes in group + free_inodes_count: u16, + /// Number of directories in group + dirs_count: u16, +} + +impl From for GroupDescriptor { + fn from(desc: RawGroupDescriptor) -> Self { + Self { + block_bitmap_bid: Bid::new(desc.block_bitmap as _), + inode_bitmap_bid: Bid::new(desc.inode_bitmap as _), + inode_table_bid: Bid::new(desc.inode_table as _), + free_blocks_count: desc.free_blocks_count, + free_inodes_count: desc.free_inodes_count, + dirs_count: desc.dirs_count, + } + } +} + +const_assert!(core::mem::size_of::() == 32); + +/// The raw block group descriptor. +/// +/// The table starts on the first block following the superblock. +#[repr(C)] +#[derive(Clone, Copy, Debug, Pod)] +pub(super) struct RawGroupDescriptor { + pub block_bitmap: u32, + pub inode_bitmap: u32, + pub inode_table: u32, + pub free_blocks_count: u16, + pub free_inodes_count: u16, + pub dirs_count: u16, + pad: u16, + reserved: [u32; 3], +} + +impl From<&GroupDescriptor> for RawGroupDescriptor { + fn from(desc: &GroupDescriptor) -> Self { + Self { + block_bitmap: desc.block_bitmap_bid.to_raw() as _, + inode_bitmap: desc.inode_bitmap_bid.to_raw() as _, + inode_table: desc.inode_table_bid.to_raw() as _, + free_blocks_count: desc.free_blocks_count, + free_inodes_count: desc.free_inodes_count, + dirs_count: desc.dirs_count, + pad: 0u16, + reserved: [0u32; 3], + } + } +} diff --git a/services/libs/aster-std/src/fs/ext2/blocks_hole.rs b/services/libs/aster-std/src/fs/ext2/blocks_hole.rs new file mode 100644 index 000000000..da924b651 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/blocks_hole.rs @@ -0,0 +1,56 @@ +use bitvec::prelude::BitVec; + +/// A blocks hole descriptor implemented by the `BitVec`. +/// +/// The true bit implies that the block is a hole, and conversely. +pub(super) struct BlocksHoleDesc(BitVec); + +impl BlocksHoleDesc { + /// Constructs a blocks hole descriptor with initial size. + /// + /// The `initial_size` usually is the number of blocks for a file. + pub fn new(initial_size: usize) -> Self { + let mut bit_vec = BitVec::with_capacity(initial_size); + bit_vec.resize(initial_size, false); + Self(bit_vec) + } + + /// Returns the size. + pub fn size(&self) -> usize { + self.0.len() + } + + /// Resizes the blocks hole to a new size. + /// + /// If `new_size` is greater than current size, the new blocks are all marked as hole. + pub fn resize(&mut self, new_size: usize) { + self.0.resize(new_size, true); + } + + /// Returns if the block `idx` is a hole. + /// + /// # Panic + /// + /// If the `idx` is out of bounds, this method will panic. + pub fn is_hole(&self, idx: usize) -> bool { + self.0[idx] + } + + /// Marks the block `idx` as a hole. + /// + /// # Panic + /// + /// If the `idx` is out of bounds, this method will panic. + pub fn set(&mut self, idx: usize) { + self.0.set(idx, true); + } + + /// Unmarks the block `idx` as a hole. + /// + /// # Panic + /// + /// If the `idx` is out of bounds, this method will panic. + pub fn unset(&mut self, idx: usize) { + self.0.set(idx, false); + } +} diff --git a/services/libs/aster-std/src/fs/ext2/dir.rs b/services/libs/aster-std/src/fs/ext2/dir.rs new file mode 100644 index 000000000..5f60bcdb2 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/dir.rs @@ -0,0 +1,321 @@ +use super::inode::{FileType, MAX_FNAME_LEN}; +use super::prelude::*; + +use core::iter::Iterator; + +/// The data structure in a directory's data block. It is stored in a linked list. +/// +/// Each entry contains the name of the entry, the inode number, the file type, +/// and the distance within the directory file to the next entry. +#[derive(Clone, Debug)] +pub struct DirEntry { + /// The header part. + header: DirEntryHeader, + /// Name of the entry, up to 255 bytes (excluding the null terminator). + name: CStr256, +} + +impl DirEntry { + /// Constructs a new `DirEntry` object with the specified inode (`ino`), + /// name (`name`), and file type (`file_type`). + pub(super) fn new(ino: u32, name: &str, file_type: FileType) -> Self { + debug_assert!(name.len() <= MAX_FNAME_LEN); + + let record_len = (Self::header_len() + name.len()).align_up(4) as u16; + Self { + header: DirEntryHeader { + ino, + record_len, + name_len: name.len() as u8, + file_type: DirEntryFileType::from(file_type) as _, + }, + name: CStr256::from(name), + } + } + + /// Constructs a `DirEntry` with the name "." and `self_ino` as its inode. + pub(super) fn self_entry(self_ino: u32) -> Self { + Self::new(self_ino, ".", FileType::Dir) + } + + /// Constructs a `DirEntry` with the name ".." and `parent_ino` as its inode. + pub(super) fn parent_entry(parent_ino: u32) -> Self { + Self::new(parent_ino, "..", FileType::Dir) + } + + /// Returns a reference to the header. + fn header(&self) -> &DirEntryHeader { + &self.header + } + + /// Returns the length of the header. + fn header_len() -> usize { + core::mem::size_of::() + } + + /// Returns the inode number. + pub fn ino(&self) -> u32 { + self.header.ino + } + + /// Modifies the inode number. + pub fn set_ino(&mut self, ino: u32) { + self.header.ino = ino; + } + + /// Returns the name. + pub fn name(&self) -> &str { + self.name.as_str().unwrap() + } + + /// Returns the type. + pub fn type_(&self) -> FileType { + FileType::from(DirEntryFileType::try_from(self.header.file_type).unwrap()) + } + + /// Returns the distance to the next entry. + pub fn record_len(&self) -> usize { + self.header.record_len as _ + } + + /// Modifies the distance to the next entry. + pub(super) fn set_record_len(&mut self, record_len: usize) { + debug_assert!(record_len >= self.actual_len()); + self.header.record_len = record_len as _; + } + + /// Returns the actual length of the current entry. + pub(super) fn actual_len(&self) -> usize { + (Self::header_len() + self.name.len()).align_up(4) + } + + /// Returns the length of the gap between the current entry and the next entry. + pub(super) fn gap_len(&self) -> usize { + self.record_len() - self.actual_len() + } +} + +/// The header of `DirEntry`. +#[repr(C)] +#[derive(Clone, Copy, Debug, Pod)] +struct DirEntryHeader { + /// Inode number + ino: u32, + /// Directory entry length + record_len: u16, + /// Name Length + name_len: u8, + /// Type indicator + file_type: u8, +} + +/// The type indicator in the `DirEntry`. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +enum DirEntryFileType { + Unknown = 0, + File = 1, + Dir = 2, + Char = 3, + Block = 4, + Fifo = 5, + Socket = 6, + Symlink = 7, +} + +impl From for DirEntryFileType { + fn from(file_type: FileType) -> Self { + match file_type { + FileType::Fifo => Self::Fifo, + FileType::Char => Self::Char, + FileType::Dir => Self::Dir, + FileType::Block => Self::Block, + FileType::File => Self::File, + FileType::Symlink => Self::Symlink, + FileType::Socket => Self::Socket, + } + } +} + +impl From for FileType { + fn from(file_type: DirEntryFileType) -> Self { + match file_type { + DirEntryFileType::Fifo => Self::Fifo, + DirEntryFileType::Char => Self::Char, + DirEntryFileType::Dir => Self::Dir, + DirEntryFileType::Block => Self::Block, + DirEntryFileType::File => Self::File, + DirEntryFileType::Symlink => Self::Symlink, + DirEntryFileType::Socket => Self::Socket, + DirEntryFileType::Unknown => panic!("unknown file type"), + } + } +} + +/// A reader for reading `DirEntry` from the page cache. +pub struct DirEntryReader<'a> { + page_cache: &'a PageCache, + offset: usize, +} + +impl<'a> DirEntryReader<'a> { + /// Constructs a reader with the given page cache and offset. + pub(super) fn new(page_cache: &'a PageCache, from_offset: usize) -> Self { + Self { + page_cache, + offset: from_offset, + } + } + + /// Reads one `DirEntry` from the current offset. + pub fn read_entry(&mut self) -> Result { + let header = self + .page_cache + .pages() + .read_val::(self.offset)?; + if header.ino == 0 { + return_errno!(Errno::ENOENT); + } + + let mut name = vec![0u8; header.name_len as _]; + self.page_cache + .pages() + .read_bytes(self.offset + DirEntry::header_len(), &mut name)?; + let entry = DirEntry { + header, + name: CStr256::from(name.as_slice()), + }; + self.offset += entry.record_len(); + + Ok(entry) + } +} + +impl<'a> Iterator for DirEntryReader<'a> { + type Item = (usize, DirEntry); + + fn next(&mut self) -> Option { + let offset = self.offset; + let entry = match self.read_entry() { + Ok(entry) => entry, + Err(_) => { + return None; + } + }; + + Some((offset, entry)) + } +} + +/// A writer for modifying `DirEntry` of the page cache. +pub struct DirEntryWriter<'a> { + page_cache: &'a PageCache, + offset: usize, +} + +impl<'a> DirEntryWriter<'a> { + /// Constructs a writer with the given page cache and offset. + pub(super) fn new(page_cache: &'a PageCache, from_offset: usize) -> Self { + Self { + page_cache, + offset: from_offset, + } + } + + /// Writes a `DirEntry` at the current offset. + pub fn write_entry(&mut self, entry: &DirEntry) -> Result<()> { + self.page_cache + .pages() + .write_val(self.offset, entry.header())?; + self.page_cache.pages().write_bytes( + self.offset + DirEntry::header_len(), + entry.name().as_bytes(), + )?; + self.offset += entry.record_len(); + Ok(()) + } + + /// Appends a new `DirEntry` starting from the current offset. + /// + /// If there is a gap between existing entries, inserts the new entry into the gap; + /// If there is no available space, expands the size and appends the new entry at the end. + pub fn append_entry(&mut self, mut new_entry: DirEntry) -> Result<()> { + let Some((offset, mut entry)) = DirEntryReader::new(self.page_cache, self.offset) + .find(|(_, entry)| entry.gap_len() >= new_entry.record_len()) + else { + // Resize and append it at the new block. + let old_size = self.page_cache.pages().size(); + let new_size = old_size + BLOCK_SIZE; + self.page_cache.pages().resize(new_size)?; + new_entry.set_record_len(BLOCK_SIZE); + self.offset = old_size; + self.write_entry(&new_entry)?; + return Ok(()); + }; + + // Write in the gap between existing entries. + new_entry.set_record_len(entry.gap_len()); + entry.set_record_len(entry.actual_len()); + self.offset = offset; + self.write_entry(&entry)?; + self.write_entry(&new_entry)?; + Ok(()) + } + + /// Removes and returns an existing `DirEntry` indicated by `name`. + pub fn remove_entry(&mut self, name: &str) -> Result { + let self_entry_record_len = DirEntry::self_entry(0).record_len(); + let reader = DirEntryReader::new(self.page_cache, 0); + let next_reader = DirEntryReader::new(self.page_cache, self_entry_record_len); + let Some(((pre_offset, mut pre_entry), (offset, entry))) = reader + .zip(next_reader) + .find(|((offset, _), (_, dir_entry))| dir_entry.name() == name) + else { + return_errno!(Errno::ENOENT); + }; + + if DirEntryReader::new(self.page_cache, offset) + .next() + .is_none() + && Bid::from_offset(pre_offset) != Bid::from_offset(offset) + { + // Shrink the size. + let new_size = pre_offset.align_up(BLOCK_SIZE); + self.page_cache.pages().resize(new_size)?; + pre_entry.set_record_len(new_size - pre_offset); + self.offset = pre_offset; + self.write_entry(&pre_entry)?; + } else { + // Update the previous entry. + pre_entry.set_record_len(pre_entry.record_len() + entry.record_len()); + self.offset = pre_offset; + self.write_entry(&pre_entry)?; + } + + Ok(entry) + } + + /// Renames the `DirEntry` from `old_name` to the `new_name` from the current offset. + /// + /// It will moves the `DirEntry` to another position, + /// if the record length is not big enough. + pub fn rename_entry(&mut self, old_name: &str, new_name: &str) -> Result<()> { + let (offset, entry) = DirEntryReader::new(self.page_cache, self.offset) + .find(|(offset, entry)| entry.name() == old_name) + .ok_or(Error::new(Errno::ENOENT))?; + + let mut new_entry = DirEntry::new(entry.ino(), new_name, entry.type_()); + if new_entry.record_len() <= entry.record_len() { + // Just rename the entry. + new_entry.set_record_len(entry.record_len()); + self.offset = offset; + self.write_entry(&new_entry)?; + } else { + // Move to another position. + self.remove_entry(old_name)?; + self.offset = 0; + self.append_entry(new_entry)?; + } + Ok(()) + } +} diff --git a/services/libs/aster-std/src/fs/ext2/fs.rs b/services/libs/aster-std/src/fs/ext2/fs.rs new file mode 100644 index 000000000..45b47c0a9 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/fs.rs @@ -0,0 +1,366 @@ +use super::block_group::{BlockGroup, RawGroupDescriptor}; +use super::inode::{FilePerm, FileType, Inode, InodeDesc, RawInode}; +use super::prelude::*; +use super::super_block::{RawSuperBlock, SuperBlock, SUPER_BLOCK_OFFSET}; + +/// The root inode number. +const ROOT_INO: u32 = 2; + +/// The Ext2 filesystem. +#[derive(Debug)] +pub struct Ext2 { + block_device: Arc, + super_block: RwMutex>, + block_groups: Vec, + inodes_per_group: u32, + blocks_per_group: u32, + inode_size: usize, + block_size: usize, + group_descriptors_segment: VmSegment, + self_ref: Weak, +} + +impl Ext2 { + /// Opens and loads an Ext2 from the `block_device`. + pub fn open(block_device: Arc) -> Result> { + // Load the superblock + // TODO: if the main superblock is corrupted, should we load the backup? + let super_block = { + let raw_super_block = block_device.read_val::(SUPER_BLOCK_OFFSET)?; + SuperBlock::try_from(raw_super_block)? + }; + assert!(super_block.block_size() == BLOCK_SIZE); + + let group_descriptors_segment = { + let npages = ((super_block.block_groups_count() as usize) + * core::mem::size_of::()) + .div_ceil(BLOCK_SIZE); + let segment = VmAllocOptions::new(npages) + .uninit(true) + .is_contiguous(true) + .alloc_contiguous()?; + match block_device.read_blocks_sync(super_block.group_descriptors_bid(0), &segment)? { + BioStatus::Complete => (), + err_status => { + return Err(Error::from(err_status)); + } + } + segment + }; + + // Load the block groups information + let load_block_groups = |fs: Weak, + block_device: &dyn BlockDevice, + group_descriptors_segment: &VmSegment| + -> Result> { + let block_groups_count = super_block.block_groups_count() as usize; + let mut block_groups = Vec::with_capacity(block_groups_count); + for idx in 0..block_groups_count { + let block_group = BlockGroup::load( + group_descriptors_segment, + idx, + block_device, + &super_block, + fs.clone(), + )?; + block_groups.push(block_group); + } + Ok(block_groups) + }; + + let ext2 = Arc::new_cyclic(|weak_ref| Self { + inodes_per_group: super_block.inodes_per_group(), + blocks_per_group: super_block.blocks_per_group(), + inode_size: super_block.inode_size(), + block_size: super_block.block_size(), + block_groups: load_block_groups( + weak_ref.clone(), + block_device.as_ref(), + &group_descriptors_segment, + ) + .unwrap(), + block_device, + super_block: RwMutex::new(Dirty::new(super_block)), + group_descriptors_segment, + self_ref: weak_ref.clone(), + }); + Ok(ext2) + } + + /// Returns the block device. + pub fn block_device(&self) -> &dyn BlockDevice { + self.block_device.as_ref() + } + + /// Returns the size of block. + pub fn block_size(&self) -> usize { + self.block_size + } + + /// Returns the size of inode. + pub fn inode_size(&self) -> usize { + self.inode_size + } + + /// Returns the number of inodes in each block group. + pub fn inodes_per_group(&self) -> u32 { + self.inodes_per_group + } + + /// Returns the number of blocks in each block group. + pub fn blocks_per_group(&self) -> u32 { + self.blocks_per_group + } + + /// Returns the super block. + pub fn super_block(&self) -> RwMutexReadGuard<'_, Dirty> { + self.super_block.read() + } + + /// Returns the root inode. + pub fn root_inode(&self) -> Result> { + self.lookup_inode(ROOT_INO) + } + + /// Finds and returns the inode by `ino`. + pub(super) fn lookup_inode(&self, ino: u32) -> Result> { + let (_, block_group) = self.block_group_of_ino(ino)?; + let inode_idx = self.inode_idx(ino); + block_group.lookup_inode(inode_idx) + } + + /// Creates a new inode. + pub(super) fn create_inode( + &self, + dir_block_group_idx: usize, + file_type: FileType, + file_perm: FilePerm, + ) -> Result> { + let (block_group_idx, ino) = + self.alloc_ino(dir_block_group_idx, file_type == FileType::Dir)?; + let inode = { + let inode_desc = InodeDesc::new(file_type, file_perm); + Inode::new(ino, block_group_idx, inode_desc, self.self_ref.clone()) + }; + let block_group = &self.block_groups[block_group_idx]; + block_group.insert_cache(self.inode_idx(ino), inode.clone()); + Ok(inode) + } + + /// Allocates a new inode number, internally used by `new_inode`. + /// + /// Attempts to allocate from the `dir_block_group_idx` group first. + /// If allocation is not possible from this group, then search the remaining groups. + fn alloc_ino(&self, dir_block_group_idx: usize, is_dir: bool) -> Result<(usize, u32)> { + let mut block_group_idx = dir_block_group_idx; + if block_group_idx >= self.block_groups.len() { + return_errno_with_message!(Errno::EINVAL, "invalid block group idx"); + } + + for _ in 0..self.block_groups.len() { + if block_group_idx >= self.block_groups.len() { + block_group_idx = 0; + } + let block_group = &self.block_groups[block_group_idx]; + if let Some(inode_idx) = block_group.alloc_inode(is_dir) { + let ino = block_group_idx as u32 * self.inodes_per_group + inode_idx + 1; + self.super_block.write().dec_free_inodes(); + return Ok((block_group_idx, ino)); + } + block_group_idx += 1; + } + + return_errno_with_message!(Errno::ENOSPC, "no space on device"); + } + + /// Frees an inode. + pub(super) fn free_inode(&self, ino: u32, is_dir: bool) -> Result<()> { + let (_, block_group) = self.block_group_of_ino(ino)?; + let inode_idx = self.inode_idx(ino); + // In order to prevent value underflow, it is necessary to increment + // the free inode counter prior to freeing the inode. + self.super_block.write().inc_free_inodes(); + block_group.free_inode(inode_idx, is_dir); + Ok(()) + } + + /// Writes back the metadata of inode. + pub(super) fn sync_inode(&self, ino: u32, inode: &InodeDesc) -> Result<()> { + let (_, block_group) = self.block_group_of_ino(ino)?; + let inode_idx = self.inode_idx(ino); + block_group.sync_raw_inode(inode_idx, &RawInode::from(inode)); + Ok(()) + } + + /// Writes back the block group descriptor to the descriptors table. + pub(super) fn sync_group_descriptor( + &self, + block_group_idx: usize, + raw_descriptor: &RawGroupDescriptor, + ) -> Result<()> { + let offset = block_group_idx * core::mem::size_of::(); + self.group_descriptors_segment + .write_val(offset, raw_descriptor)?; + Ok(()) + } + + /// Allocates a new block. + /// + /// Attempts to allocate from the `block_group_idx` group first. + /// If allocation is not possible from this group, then search the remaining groups. + pub(super) fn alloc_block(&self, block_group_idx: usize) -> Result { + let mut block_group_idx = block_group_idx; + if block_group_idx >= self.block_groups.len() { + return_errno_with_message!(Errno::EINVAL, "invalid block group idx"); + } + + for _ in 0..self.block_groups.len() { + if block_group_idx >= self.block_groups.len() { + block_group_idx = 0; + } + let block_group = &self.block_groups[block_group_idx]; + if let Some(block_idx) = block_group.alloc_block() { + let bid = block_group_idx as u32 * self.blocks_per_group + block_idx; + self.super_block.write().dec_free_blocks(); + return Ok(Bid::new(bid as _)); + } + block_group_idx += 1; + } + + return_errno_with_message!(Errno::ENOSPC, "no space on device"); + } + + /// Frees a block. + pub(super) fn free_block(&self, bid: Bid) -> Result<()> { + let (_, block_group) = self.block_group_of_bid(bid)?; + let block_idx = self.block_idx(bid); + // In order to prevent value underflow, it is necessary to increment + // the free block counter prior to freeing the block. + self.super_block.write().inc_free_blocks(); + block_group.free_block(block_idx); + Ok(()) + } + + /// Reads contiguous blocks starting from the `bid` synchronously. + pub(super) fn read_blocks(&self, bid: Bid, segment: &VmSegment) -> Result<()> { + let status = self.block_device.read_blocks_sync(bid, segment)?; + match status { + BioStatus::Complete => Ok(()), + err_status => Err(Error::from(err_status)), + } + } + + /// Reads one block indicated by the `bid` synchronously. + pub(super) fn read_block(&self, bid: Bid, frame: &VmFrame) -> Result<()> { + let status = self.block_device.read_block_sync(bid, frame)?; + match status { + BioStatus::Complete => Ok(()), + err_status => Err(Error::from(err_status)), + } + } + + /// Writes contiguous blocks starting from the `bid` synchronously. + pub(super) fn write_blocks(&self, bid: Bid, segment: &VmSegment) -> Result<()> { + let status = self.block_device.write_blocks_sync(bid, segment)?; + match status { + BioStatus::Complete => Ok(()), + err_status => Err(Error::from(err_status)), + } + } + + /// Writes one block indicated by the `bid` synchronously. + pub(super) fn write_block(&self, bid: Bid, frame: &VmFrame) -> Result<()> { + let status = self.block_device.write_block_sync(bid, frame)?; + match status { + BioStatus::Complete => Ok(()), + err_status => Err(Error::from(err_status)), + } + } + + /// Writes back the metadata to the block device. + pub fn sync_metadata(&self) -> Result<()> { + // If the superblock is clean, the block groups must be clean. + if !self.super_block.read().is_dirty() { + return Ok(()); + } + + let mut super_block = self.super_block.write(); + // Writes back the metadata of block groups + for block_group in &self.block_groups { + block_group.sync_metadata(&super_block)?; + } + + let mut bio_waiter = BioWaiter::new(); + // Writes back the main superblock and group descriptor table. + let raw_super_block = RawSuperBlock::from((*super_block).deref()); + bio_waiter.concat( + self.block_device + .write_bytes_async(SUPER_BLOCK_OFFSET, raw_super_block.as_bytes())?, + ); + bio_waiter.concat(self.block_device.write_blocks( + super_block.group_descriptors_bid(0), + &self.group_descriptors_segment, + )?); + + // Writes back the backups of superblock and group descriptor table. + let mut raw_super_block_backup = raw_super_block; + for idx in 1..super_block.block_groups_count() { + if super_block.is_backup_group(idx as usize) { + raw_super_block_backup.block_group_idx = idx as u16; + bio_waiter.concat(self.block_device.write_bytes_async( + super_block.bid(idx as usize).to_offset(), + raw_super_block_backup.as_bytes(), + )?); + bio_waiter.concat(self.block_device.write_blocks( + super_block.group_descriptors_bid(idx as usize), + &self.group_descriptors_segment, + )?); + } + } + + // Waits for the completion of all submitted bios. + bio_waiter + .wait() + .ok_or_else(|| Error::with_message(Errno::EIO, "failed to sync metadata of fs"))?; + + // Reset to clean. + super_block.clear_dirty(); + Ok(()) + } + + /// Writes back all the cached inodes to the block device. + pub fn sync_all_inodes(&self) -> Result<()> { + for block_group in &self.block_groups { + block_group.sync_all_inodes()?; + } + Ok(()) + } + + #[inline] + fn block_group_of_bid(&self, bid: Bid) -> Result<(usize, &BlockGroup)> { + let block_group_idx = (bid.to_raw() / (self.blocks_per_group as u64)) as usize; + if block_group_idx >= self.block_groups.len() { + return_errno!(Errno::ENOENT); + } + Ok((block_group_idx, &self.block_groups[block_group_idx])) + } + + #[inline] + fn block_group_of_ino(&self, ino: u32) -> Result<(usize, &BlockGroup)> { + let block_group_idx = ((ino - 1) / self.inodes_per_group) as usize; + if block_group_idx >= self.block_groups.len() { + return_errno!(Errno::ENOENT); + } + Ok((block_group_idx, &self.block_groups[block_group_idx])) + } + + #[inline] + fn inode_idx(&self, ino: u32) -> u32 { + (ino - 1) % self.inodes_per_group + } + + #[inline] + fn block_idx(&self, bid: Bid) -> u32 { + (bid.to_raw() as u32) % self.blocks_per_group + } +} diff --git a/services/libs/aster-std/src/fs/ext2/impl_for_vfs/fs.rs b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/fs.rs new file mode 100644 index 000000000..e65f43194 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/fs.rs @@ -0,0 +1,43 @@ +use crate::fs::ext2::{utils::Dirty, Ext2, SuperBlock as Ext2SuperBlock, MAGIC_NUM as EXT2_MAGIC}; +use crate::fs::utils::{FileSystem, FsFlags, Inode, SuperBlock, NAME_MAX}; +use crate::prelude::*; + +use aster_frame::sync::RwMutexReadGuard; + +impl FileSystem for Ext2 { + fn sync(&self) -> Result<()> { + self.sync_all_inodes()?; + self.sync_metadata()?; + Ok(()) + } + + fn root_inode(&self) -> Arc { + self.root_inode().unwrap() + } + + fn sb(&self) -> SuperBlock { + SuperBlock::from(self.super_block()) + } + + fn flags(&self) -> FsFlags { + FsFlags::empty() + } +} + +impl From>> for SuperBlock { + fn from(ext2_sb: RwMutexReadGuard>) -> Self { + Self { + magic: EXT2_MAGIC as _, + bsize: ext2_sb.block_size(), + blocks: ext2_sb.total_blocks() as _, + bfree: ext2_sb.free_blocks() as _, + bavail: ext2_sb.free_blocks() as _, + files: ext2_sb.total_inodes() as _, + ffree: ext2_sb.free_inodes() as _, + fsid: 0, // TODO + namelen: NAME_MAX, + frsize: ext2_sb.fragment_size(), + flags: 0, // TODO + } + } +} diff --git a/services/libs/aster-std/src/fs/ext2/impl_for_vfs/inode.rs b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/inode.rs new file mode 100644 index 000000000..fd4ae9212 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/inode.rs @@ -0,0 +1,175 @@ +use crate::fs::device::Device; +use crate::fs::ext2::{FilePerm, FileType, Inode as Ext2Inode}; +use crate::fs::utils::{ + DirentVisitor, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata, +}; +use crate::prelude::*; +use crate::vm::vmo::Vmo; + +use aster_rights::Full; +use core::time::Duration; + +impl Inode for Ext2Inode { + fn len(&self) -> usize { + self.file_size() as _ + } + + fn resize(&self, new_size: usize) -> Result<()> { + self.resize(new_size) + } + + fn metadata(&self) -> Metadata { + Metadata { + dev: 0, // TODO: ID of block device + ino: self.ino() as _, + size: self.file_size() as _, + blk_size: self.fs().super_block().block_size(), + blocks: self.blocks_count() as _, + atime: self.atime(), + mtime: self.mtime(), + ctime: self.ctime(), + type_: InodeType::from(self.file_type()), + mode: InodeMode::from(self.file_perm()), + nlinks: self.hard_links() as _, + uid: self.uid() as _, + gid: self.gid() as _, + rdev: self.device_id(), + } + } + + fn atime(&self) -> Duration { + self.atime() + } + + fn set_atime(&self, time: Duration) { + self.set_atime(time) + } + + fn mtime(&self) -> Duration { + self.mtime() + } + + fn set_mtime(&self, time: Duration) { + self.set_mtime(time) + } + + fn ino(&self) -> u64 { + self.ino() as _ + } + + fn type_(&self) -> InodeType { + InodeType::from(self.file_type()) + } + + fn mode(&self) -> InodeMode { + InodeMode::from(self.file_perm()) + } + + fn set_mode(&self, mode: InodeMode) { + self.set_file_perm(mode.into()); + } + + fn page_cache(&self) -> Option> { + Some(self.page_cache()) + } + + fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { + self.read_at(offset, buf) + } + + fn read_direct_at(&self, offset: usize, buf: &mut [u8]) -> Result { + self.read_direct_at(offset, buf) + } + + fn write_at(&self, offset: usize, buf: &[u8]) -> Result { + self.write_at(offset, buf) + } + + fn write_direct_at(&self, offset: usize, buf: &[u8]) -> Result { + self.write_direct_at(offset, buf) + } + + fn create(&self, name: &str, type_: InodeType, mode: InodeMode) -> Result> { + Ok(self.create(name, type_.into(), mode.into())?) + } + + fn mknod(&self, name: &str, mode: InodeMode, dev: Arc) -> Result> { + let inode = self.create(name, InodeType::from(dev.type_()).into(), mode.into())?; + inode.set_device_id(dev.id().into()).unwrap(); + Ok(inode) + } + + fn lookup(&self, name: &str) -> Result> { + Ok(self.lookup(name)?) + } + + fn readdir_at(&self, offset: usize, visitor: &mut dyn DirentVisitor) -> Result { + self.readdir_at(offset, visitor) + } + + fn link(&self, old: &Arc, name: &str) -> Result<()> { + let old = old + .downcast_ref::() + .ok_or_else(|| Error::with_message(Errno::EXDEV, "not same fs"))?; + self.link(old, name) + } + + fn unlink(&self, name: &str) -> Result<()> { + self.unlink(name) + } + + fn rmdir(&self, name: &str) -> Result<()> { + self.rmdir(name) + } + + fn rename(&self, old_name: &str, target: &Arc, new_name: &str) -> Result<()> { + let target = target + .downcast_ref::() + .ok_or_else(|| Error::with_message(Errno::EXDEV, "not same fs"))?; + self.rename(old_name, target, new_name) + } + + fn read_link(&self) -> Result { + self.read_link() + } + + fn write_link(&self, target: &str) -> Result<()> { + self.write_link(target) + } + + fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result { + Err(Error::new(Errno::EINVAL)) + } + + fn sync(&self) -> Result<()> { + self.sync_all() + } + + fn fs(&self) -> Arc { + self.fs() + } +} + +impl From for InodeMode { + fn from(perm: FilePerm) -> Self { + Self::from_bits_truncate(perm.bits() as _) + } +} + +impl From for FilePerm { + fn from(mode: InodeMode) -> Self { + Self::from_bits_truncate(mode.bits() as _) + } +} + +impl From for InodeType { + fn from(type_: FileType) -> Self { + Self::try_from(type_ as u32).unwrap() + } +} + +impl From for FileType { + fn from(type_: InodeType) -> Self { + Self::try_from(type_ as u16).unwrap() + } +} diff --git a/services/libs/aster-std/src/fs/ext2/impl_for_vfs/mod.rs b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/mod.rs new file mode 100644 index 000000000..249746aab --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/impl_for_vfs/mod.rs @@ -0,0 +1,2 @@ +mod fs; +mod inode; diff --git a/services/libs/aster-std/src/fs/ext2/inode.rs b/services/libs/aster-std/src/fs/ext2/inode.rs new file mode 100644 index 000000000..61bdf58d5 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/inode.rs @@ -0,0 +1,1407 @@ +use super::blocks_hole::BlocksHoleDesc; +use super::dir::{DirEntry, DirEntryReader, DirEntryWriter}; +use super::fs::Ext2; +use super::prelude::*; + +use core::cmp::Ordering; +use inherit_methods_macro::inherit_methods; + +mod field { + pub type Field = core::ops::Range; + + /// Direct pointer to blocks. + pub const DIRECT: Field = 0..12; + /// Indirect pointer to blocks. + pub const INDIRECT: Field = 12..13; + /// Doubly indirect pointer to blocks. + pub const DB_INDIRECT: Field = 13..14; + /// Trebly indirect pointer to blocks. + pub const TB_INDIRECT: Field = 14..15; +} + +/// The number of block pointers. +pub const BLOCK_PTR_CNT: usize = field::TB_INDIRECT.end; +/// Max length of file name. +pub const MAX_FNAME_LEN: usize = 255; +/// Max path length of the fast symlink. +pub const FAST_SYMLINK_MAX_LEN: usize = BLOCK_PTR_CNT * core::mem::size_of::(); + +/// The Ext2 inode. +pub struct Inode { + ino: u32, + block_group_idx: usize, + inner: RwMutex, + fs: Weak, +} + +impl Inode { + pub(super) fn new( + ino: u32, + block_group_idx: usize, + desc: Dirty, + fs: Weak, + ) -> Arc { + Arc::new_cyclic(|weak_self| Self { + ino, + block_group_idx, + inner: RwMutex::new(Inner::new(desc, weak_self.clone())), + fs, + }) + } + + pub fn ino(&self) -> u32 { + self.ino + } + + pub(super) fn block_group_idx(&self) -> usize { + self.block_group_idx + } + + pub fn fs(&self) -> Arc { + self.fs.upgrade().unwrap() + } + + pub fn resize(&self, new_size: usize) -> Result<()> { + let inner = self.inner.upread(); + if inner.file_type() != FileType::File { + return_errno!(Errno::EISDIR); + } + if new_size == inner.file_size() { + return Ok(()); + } + + let mut inner = inner.upgrade(); + inner.resize(new_size)?; + Ok(()) + } + + pub fn page_cache(&self) -> Vmo { + self.inner.read().page_cache.pages() + } + + pub fn create( + &self, + name: &str, + file_type: FileType, + file_perm: FilePerm, + ) -> Result> { + let inner = self.inner.upread(); + if inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + if name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + + if inner.get_entry(name, 0).is_some() { + return_errno!(Errno::EEXIST); + } + + let inode = self + .fs() + .create_inode(self.block_group_idx, file_type, file_perm)?; + let is_dir = file_type == FileType::Dir; + if let Err(e) = inode.init(self.ino) { + self.fs().free_inode(inode.ino, is_dir).unwrap(); + return Err(e); + } + let new_entry = DirEntry::new(inode.ino, name, file_type); + + let mut inner = inner.upgrade(); + if let Err(e) = inner.append_entry(new_entry, 0) { + self.fs().free_inode(inode.ino, is_dir).unwrap(); + return Err(e); + } + Ok(inode) + } + + pub fn lookup(&self, name: &str) -> Result> { + let inner = self.inner.read(); + if inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + if name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + + let ino = inner + .get_entry_ino(name, 0) + .ok_or(Error::new(Errno::ENOENT))?; + drop(inner); + self.fs().lookup_inode(ino) + } + + pub fn link(&self, inode: &Inode, name: &str) -> Result<()> { + let inner = self.inner.upread(); + if inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + if name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + let inode_type = inode.file_type(); + if inode_type == FileType::Dir { + return_errno!(Errno::EPERM); + } + + if inner.get_entry(name, 0).is_some() { + return_errno!(Errno::EEXIST); + } + + let new_entry = DirEntry::new(inode.ino, name, inode_type); + let mut inner = inner.upgrade(); + inner.append_entry(new_entry, 0)?; + drop(inner); + + inode.inner.write().inc_hard_links(); + Ok(()) + } + + pub fn unlink(&self, name: &str) -> Result<()> { + let inner = self.inner.upread(); + if inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + if name == "." || name == ".." { + return_errno!(Errno::EISDIR); + } + if name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + + let inode = { + let ino = inner + .get_entry_ino(name, 0) + .ok_or(Error::new(Errno::ENOENT))?; + self.fs().lookup_inode(ino)? + }; + if inode.file_type() == FileType::Dir { + return_errno!(Errno::EISDIR); + } + + let mut inner = inner.upgrade(); + inner.remove_entry(name, 0)?; + drop(inner); + + inode.inner.write().dec_hard_links(); + Ok(()) + } + + pub fn rmdir(&self, name: &str) -> Result<()> { + let self_inner = self.inner.upread(); + if self_inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if self_inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + if name == "." { + return_errno_with_message!(Errno::EINVAL, "rmdir on ."); + } + if name == ".." { + return_errno_with_message!(Errno::ENOTEMPTY, "rmdir on .."); + } + if name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + + let dir_inode = { + let ino = self_inner + .get_entry_ino(name, 0) + .ok_or(Error::new(Errno::ENOENT))?; + self.fs().lookup_inode(ino)? + }; + + // FIXME: There may be a deadlock here. + let dir_inner = dir_inode.inner.upread(); + if dir_inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if dir_inner.entry_count() > 2 { + return_errno!(Errno::ENOTEMPTY); + } + + let mut self_inner = self_inner.upgrade(); + self_inner.remove_entry(name, 0)?; + drop(self_inner); + + let mut dir_inner = dir_inner.upgrade(); + dir_inner.dec_hard_links(); + dir_inner.dec_hard_links(); // For "." + Ok(()) + } + + /// Rename within its own directory. + fn rename_within(&self, old_name: &str, new_name: &str) -> Result<()> { + let self_inner = self.inner.upread(); + if self_inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if self_inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + + let (src_offset, src_inode) = { + let (offset, entry) = self_inner + .get_entry(old_name, 0) + .ok_or(Error::new(Errno::ENOENT))?; + (offset, self.fs().lookup_inode(entry.ino())?) + }; + + let Some((dst_offset, dst_entry)) = self_inner.get_entry(new_name, 0) else { + let mut self_inner = self_inner.upgrade(); + self_inner.rename_entry(old_name, new_name, src_offset)?; + return Ok(()); + }; + + if src_inode.ino == dst_entry.ino() { + // Same inode, do nothing + return Ok(()); + } + + let dst_inode = self.fs().lookup_inode(dst_entry.ino())?; + // FIXME: There may be a deadlock here. + let dst_inner = dst_inode.inner.upread(); + let dst_inode_type = dst_inner.file_type(); + match (src_inode.file_type(), dst_inode_type) { + (FileType::Dir, FileType::Dir) => { + if dst_inner.entry_count() > 2 { + return_errno!(Errno::ENOTEMPTY); + } + } + (FileType::Dir, _) => { + return_errno!(Errno::ENOTDIR); + } + (_, FileType::Dir) => { + return_errno!(Errno::EISDIR); + } + _ => {} + } + let dst_is_dir = dst_inode_type == FileType::Dir; + + let mut self_inner = self_inner.upgrade(); + self_inner.remove_entry(new_name, dst_offset)?; + self_inner.rename_entry(old_name, new_name, src_offset)?; + drop(self_inner); + + let mut dst_inner = dst_inner.upgrade(); + dst_inner.dec_hard_links(); + if dst_is_dir { + dst_inner.dec_hard_links(); // For "." + } + + Ok(()) + } + + pub fn rename(&self, old_name: &str, target: &Inode, new_name: &str) -> Result<()> { + if old_name == "." || old_name == ".." || new_name == "." || new_name == ".." { + return_errno!(Errno::EISDIR); + } + if new_name.len() > MAX_FNAME_LEN || new_name.len() > MAX_FNAME_LEN { + return_errno!(Errno::ENAMETOOLONG); + } + + // Rename inside the inode + if self.ino == target.ino { + return self.rename_within(old_name, new_name); + } + + // FIXME: There may be a deadlock here. + let self_inner = self.inner.upread(); + let target_inner = target.inner.upread(); + if self_inner.file_type() != FileType::Dir || target_inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if self_inner.hard_links() == 0 || target_inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + + let (src_offset, src_inode) = { + let (offset, entry) = self_inner + .get_entry(old_name, 0) + .ok_or(Error::new(Errno::ENOENT))?; + (offset, self.fs().lookup_inode(entry.ino())?) + }; + // Avoid renaming a directory to a subdirectory of itself + if src_inode.ino == target.ino { + return_errno!(Errno::EINVAL); + } + let src_inode_type = src_inode.file_type(); + let is_dir = src_inode_type == FileType::Dir; + + let Some((dst_offset, dst_entry)) = target_inner.get_entry(new_name, 0) else { + let mut self_inner = self_inner.upgrade(); + let mut target_inner = target_inner.upgrade(); + self_inner.remove_entry(old_name, src_offset)?; + let new_entry = DirEntry::new(src_inode.ino, new_name, src_inode_type); + target_inner.append_entry(new_entry, 0)?; + drop(self_inner); + drop(target_inner); + + if is_dir { + src_inode.inner.write().set_parent_ino(target.ino)?; + } + return Ok(()); + }; + + if src_inode.ino == dst_entry.ino() { + // Same inode, do nothing + return Ok(()); + } + + // Avoid renaming a subdirectory to a directory. + if self.ino == dst_entry.ino() { + return_errno!(Errno::ENOTEMPTY); + } + + let dst_inode = self.fs().lookup_inode(dst_entry.ino())?; + // FIXME: There may be a deadlock here. + let dst_inner = dst_inode.inner.upread(); + let dst_inode_type = dst_inner.file_type(); + match (src_inode_type, dst_inode_type) { + (FileType::Dir, FileType::Dir) => { + if dst_inner.entry_count() > 2 { + return_errno!(Errno::ENOTEMPTY); + } + } + (FileType::Dir, _) => { + return_errno!(Errno::ENOTDIR); + } + (_, FileType::Dir) => { + return_errno!(Errno::EISDIR); + } + _ => {} + } + let mut self_inner = self_inner.upgrade(); + let mut target_inner = target_inner.upgrade(); + self_inner.remove_entry(old_name, src_offset)?; + target_inner.remove_entry(new_name, dst_offset)?; + let new_entry = DirEntry::new(src_inode.ino, new_name, src_inode_type); + target_inner.append_entry(new_entry, 0)?; + drop(self_inner); + drop(target_inner); + + let mut dst_inner = dst_inner.upgrade(); + dst_inner.dec_hard_links(); + if is_dir { + dst_inner.dec_hard_links(); // For "." + } + drop(dst_inner); + + if is_dir { + src_inode.inner.write().set_parent_ino(target.ino)?; + } + + Ok(()) + } + + pub fn readdir_at(&self, offset: usize, visitor: &mut dyn DirentVisitor) -> Result { + let inner = self.inner.read(); + if inner.file_type() != FileType::Dir { + return_errno!(Errno::ENOTDIR); + } + if inner.hard_links() == 0 { + return_errno_with_message!(Errno::ENOENT, "dir removed"); + } + + let try_readdir = |offset: &mut usize, visitor: &mut dyn DirentVisitor| -> Result<()> { + let dir_entry_reader = DirEntryReader::new(&inner.page_cache, *offset); + for (_, dir_entry) in dir_entry_reader { + visitor.visit( + dir_entry.name(), + dir_entry.ino() as u64, + InodeType::from(dir_entry.type_()), + dir_entry.record_len(), + )?; + *offset += dir_entry.record_len(); + } + + Ok(()) + }; + + let mut iterate_offset = offset; + match try_readdir(&mut iterate_offset, visitor) { + Err(e) if iterate_offset == offset => Err(e), + _ => Ok(iterate_offset - offset), + } + } + + pub fn write_link(&self, target: &str) -> Result<()> { + let mut inner = self.inner.write(); + if inner.file_type() != FileType::Symlink { + return_errno!(Errno::EISDIR); + } + + inner.write_link(target)?; + Ok(()) + } + + pub fn read_link(&self) -> Result { + let inner = self.inner.read(); + if inner.file_type() != FileType::Symlink { + return_errno!(Errno::EISDIR); + } + + inner.read_link() + } + + pub fn set_device_id(&self, device_id: u64) -> Result<()> { + let mut inner = self.inner.write(); + let file_type = inner.file_type(); + if file_type != FileType::Block && file_type != FileType::Char { + return_errno!(Errno::EISDIR); + } + + inner.set_device_id(device_id); + Ok(()) + } + + pub fn device_id(&self) -> u64 { + let inner = self.inner.read(); + let file_type = inner.file_type(); + if file_type != FileType::Block && file_type != FileType::Char { + return 0; + } + inner.device_id() + } + + pub fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { + let inner = self.inner.read(); + if inner.file_type() != FileType::File { + return_errno!(Errno::EISDIR); + } + + inner.read_at(offset, buf) + } + + // The offset and the length of buffer must be multiples of the block size. + pub fn read_direct_at(&self, offset: usize, buf: &mut [u8]) -> Result { + let inner = self.inner.read(); + if inner.file_type() != FileType::File { + return_errno!(Errno::EISDIR); + } + if !is_block_aligned(offset) || !is_block_aligned(buf.len()) { + return_errno_with_message!(Errno::EINVAL, "not block-aligned"); + } + + inner.read_direct_at(offset, buf) + } + + pub fn write_at(&self, offset: usize, buf: &[u8]) -> Result { + let inner = self.inner.upread(); + if inner.file_type() != FileType::File { + return_errno!(Errno::EISDIR); + } + + let file_size = inner.file_size(); + let new_size = offset + buf.len(); + if new_size > file_size { + let mut inner = inner.upgrade(); + inner.extend_write_at(offset, buf)?; + } else { + inner.write_at(offset, buf)?; + } + + Ok(buf.len()) + } + + // The offset and the length of buffer must be multiples of the block size. + pub fn write_direct_at(&self, offset: usize, buf: &[u8]) -> Result { + let inner = self.inner.upread(); + if inner.file_type() != FileType::File { + return_errno!(Errno::EISDIR); + } + if !is_block_aligned(offset) || !is_block_aligned(buf.len()) { + return_errno_with_message!(Errno::EINVAL, "not block aligned"); + } + + let mut inner = inner.upgrade(); + inner.write_direct_at(offset, buf)?; + Ok(buf.len()) + } + + fn init(&self, dir_ino: u32) -> Result<()> { + let mut inner = self.inner.write(); + match inner.file_type() { + FileType::Dir => { + inner.init_dir(self.ino, dir_ino)?; + } + _ => { + // TODO: Reserve serval blocks for regular file ? + } + } + Ok(()) + } + + pub fn sync_all(&self) -> Result<()> { + let inner = self.inner.read(); + inner.sync_data()?; + inner.sync_metadata()?; + Ok(()) + } +} + +#[inherit_methods(from = "self.inner.read()")] +impl Inode { + pub fn file_size(&self) -> usize; + pub fn file_type(&self) -> FileType; + pub fn file_perm(&self) -> FilePerm; + pub fn uid(&self) -> u32; + pub fn gid(&self) -> u32; + pub fn file_flags(&self) -> FileFlags; + pub fn hard_links(&self) -> u16; + pub fn blocks_count(&self) -> u32; + pub fn acl(&self) -> Option; + pub fn atime(&self) -> Duration; + pub fn mtime(&self) -> Duration; + pub fn ctime(&self) -> Duration; + pub fn sync_data(&self) -> Result<()>; + pub fn sync_metadata(&self) -> Result<()>; +} + +#[inherit_methods(from = "self.inner.write()")] +impl Inode { + pub fn set_file_perm(&self, perm: FilePerm); + pub fn set_atime(&self, time: Duration); + pub fn set_mtime(&self, time: Duration); +} + +impl Debug for Inode { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Inode") + .field("ino", &self.ino) + .field("block_group_idx", &self.block_group_idx) + .finish() + } +} + +struct Inner { + inode_impl: Arc, + page_cache: PageCache, +} + +#[inherit_methods(from = "self.inode_impl")] +impl Inner { + pub fn file_size(&self) -> usize; + pub fn file_type(&self) -> FileType; + pub fn file_perm(&self) -> FilePerm; + pub fn set_file_perm(&mut self, perm: FilePerm); + pub fn uid(&self) -> u32; + pub fn gid(&self) -> u32; + pub fn file_flags(&self) -> FileFlags; + pub fn hard_links(&self) -> u16; + pub fn inc_hard_links(&mut self); + pub fn dec_hard_links(&mut self); + pub fn blocks_count(&self) -> u32; + pub fn acl(&self) -> Option; + pub fn atime(&self) -> Duration; + pub fn set_atime(&mut self, time: Duration); + pub fn mtime(&self) -> Duration; + pub fn set_mtime(&mut self, time: Duration); + pub fn ctime(&self) -> Duration; + pub fn set_device_id(&mut self, device_id: u64); + pub fn device_id(&self) -> u64; + pub fn sync_metadata(&self) -> Result<()>; +} + +impl Inner { + pub fn new(desc: Dirty, weak_self: Weak) -> Self { + let num_page_bytes = desc.num_page_bytes(); + let inode_impl = InodeImpl::new(desc, weak_self); + Self { + page_cache: PageCache::with_capacity(num_page_bytes, Arc::downgrade(&inode_impl) as _) + .unwrap(), + inode_impl, + } + } + + pub fn resize(&mut self, new_size: usize) -> Result<()> { + self.page_cache.pages().resize(new_size)?; + self.inode_impl.resize(new_size)?; + Ok(()) + } + + pub fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { + let (offset, read_len) = { + let file_size = self.inode_impl.file_size(); + let start = file_size.min(offset); + let end = file_size.min(offset + buf.len()); + (start, end - start) + }; + + self.page_cache + .pages() + .read_bytes(offset, &mut buf[..read_len])?; + Ok(read_len) + } + + pub fn read_direct_at(&self, offset: usize, buf: &mut [u8]) -> Result { + let (offset, read_len) = { + let file_size = self.inode_impl.file_size(); + let start = file_size.min(offset).align_down(BLOCK_SIZE); + let end = file_size.min(offset + buf.len()).align_down(BLOCK_SIZE); + (start, end - start) + }; + self.page_cache + .pages() + .decommit(offset..offset + read_len)?; + + let mut buf_offset = 0; + for bid in Bid::from_offset(offset)..Bid::from_offset(offset + read_len) { + let frame = VmAllocOptions::new(1).uninit(true).alloc_single().unwrap(); + self.inode_impl.read_block(bid, &frame)?; + frame.read_bytes(0, &mut buf[buf_offset..buf_offset + BLOCK_SIZE])?; + buf_offset += BLOCK_SIZE; + } + Ok(read_len) + } + + pub fn write_at(&self, offset: usize, buf: &[u8]) -> Result<()> { + self.page_cache.pages().write_bytes(offset, buf)?; + Ok(()) + } + + pub fn extend_write_at(&mut self, offset: usize, buf: &[u8]) -> Result<()> { + let new_size = offset + buf.len(); + self.page_cache.pages().resize(new_size)?; + self.page_cache.pages().write_bytes(offset, buf)?; + self.inode_impl.resize(new_size)?; + Ok(()) + } + + pub fn write_direct_at(&mut self, offset: usize, buf: &[u8]) -> Result<()> { + let file_size = self.inode_impl.file_size(); + let end_offset = offset + buf.len(); + + let start = offset.min(file_size); + let end = end_offset.min(file_size); + self.page_cache.pages().decommit(start..end)?; + + if end_offset > file_size { + self.page_cache.pages().resize(end_offset)?; + self.inode_impl.resize(end_offset)?; + } + + let mut buf_offset = 0; + for bid in Bid::from_offset(offset)..Bid::from_offset(end_offset) { + let frame = { + let frame = VmAllocOptions::new(1).uninit(true).alloc_single().unwrap(); + frame.write_bytes(0, &buf[buf_offset..buf_offset + BLOCK_SIZE])?; + frame + }; + self.inode_impl.write_block(bid, &frame)?; + buf_offset += BLOCK_SIZE; + } + + Ok(()) + } + + pub fn write_link(&mut self, target: &str) -> Result<()> { + if target.len() <= FAST_SYMLINK_MAX_LEN { + return self.inode_impl.write_link(target); + } + + self.page_cache.pages().resize(target.len())?; + self.page_cache.pages().write_bytes(0, target.as_bytes())?; + let file_size = self.inode_impl.file_size(); + if file_size != target.len() { + self.inode_impl.resize(target.len())?; + } + Ok(()) + } + + pub fn read_link(&self) -> Result { + let file_size = self.inode_impl.file_size(); + if file_size <= FAST_SYMLINK_MAX_LEN { + return self.inode_impl.read_link(); + } + + let mut symlink = vec![0u8; file_size]; + self.page_cache + .pages() + .read_bytes(0, symlink.as_mut_slice())?; + + Ok(String::from_utf8(symlink)?) + } + + fn init_dir(&mut self, self_ino: u32, parent_ino: u32) -> Result<()> { + self.append_entry(DirEntry::self_entry(self_ino), 0)?; + self.append_entry(DirEntry::parent_entry(parent_ino), 0)?; + Ok(()) + } + + pub fn get_entry_ino(&self, name: &str, offset: usize) -> Option { + self.get_entry(name, offset).map(|(_, entry)| entry.ino()) + } + + pub fn get_entry(&self, name: &str, offset: usize) -> Option<(usize, DirEntry)> { + DirEntryReader::new(&self.page_cache, offset).find(|(offset, entry)| entry.name() == name) + } + + pub fn entry_count(&self) -> usize { + DirEntryReader::new(&self.page_cache, 0).count() + } + + pub fn append_entry(&mut self, entry: DirEntry, offset: usize) -> Result<()> { + let is_dir = entry.type_() == FileType::Dir; + let is_parent = entry.name() == ".."; + + DirEntryWriter::new(&self.page_cache, offset).append_entry(entry)?; + let file_size = self.inode_impl.file_size(); + let page_cache_size = self.page_cache.pages().size(); + if page_cache_size > file_size { + self.inode_impl.resize(page_cache_size)?; + } + if is_dir && !is_parent { + self.inc_hard_links(); // for ".." + } + Ok(()) + } + + pub fn remove_entry(&mut self, name: &str, offset: usize) -> Result<()> { + let entry = DirEntryWriter::new(&self.page_cache, offset).remove_entry(name)?; + let is_dir = entry.type_() == FileType::Dir; + let file_size = self.inode_impl.file_size(); + let page_cache_size = self.page_cache.pages().size(); + if page_cache_size < file_size { + self.inode_impl.resize(page_cache_size)?; + } + if is_dir { + self.dec_hard_links(); // for ".." + } + Ok(()) + } + + pub fn rename_entry(&mut self, old_name: &str, new_name: &str, offset: usize) -> Result<()> { + DirEntryWriter::new(&self.page_cache, offset).rename_entry(old_name, new_name)?; + let file_size = self.inode_impl.file_size(); + let page_cache_size = self.page_cache.pages().size(); + if page_cache_size != file_size { + self.inode_impl.resize(page_cache_size)?; + } + Ok(()) + } + + pub fn set_parent_ino(&mut self, parent_ino: u32) -> Result<()> { + let (offset, mut entry) = self.get_entry("..", 0).unwrap(); + entry.set_ino(parent_ino); + DirEntryWriter::new(&self.page_cache, offset).write_entry(&entry)?; + Ok(()) + } + + pub fn sync_data(&self) -> Result<()> { + // Writes back the data in page cache. + let file_size = self.inode_impl.file_size(); + self.page_cache.evict_range(0..file_size)?; + + // Writes back the data holes + self.inode_impl.sync_data_holes()?; + Ok(()) + } +} + +struct InodeImpl(RwMutex); + +struct InodeImpl_ { + desc: Dirty, + blocks_hole_desc: BlocksHoleDesc, + is_freed: bool, + weak_self: Weak, +} + +impl InodeImpl_ { + pub fn new(desc: Dirty, weak_self: Weak) -> Self { + Self { + blocks_hole_desc: BlocksHoleDesc::new(desc.blocks_count() as usize), + desc, + is_freed: false, + weak_self, + } + } + + pub fn inode(&self) -> Arc { + self.weak_self.upgrade().unwrap() + } + + pub fn read_block(&self, bid: Bid, block: &VmFrame) -> Result<()> { + let bid = bid.to_raw() as u32; + if bid >= self.desc.blocks_count() { + return_errno!(Errno::EINVAL); + } + + debug_assert!(field::DIRECT.contains(&(bid as usize))); + if self.blocks_hole_desc.is_hole(bid as usize) { + block.zero(); + return Ok(()); + } + let device_bid = Bid::new(self.desc.data[bid as usize] as _); + self.inode().fs().read_block(device_bid, block)?; + Ok(()) + } + + pub fn write_block(&self, bid: Bid, block: &VmFrame) -> Result<()> { + let bid = bid.to_raw() as u32; + if bid >= self.desc.blocks_count() { + return_errno!(Errno::EINVAL); + } + + debug_assert!(field::DIRECT.contains(&(bid as usize))); + let device_bid = Bid::new(self.desc.data[bid as usize] as _); + self.inode().fs().write_block(device_bid, block)?; + Ok(()) + } + + pub fn resize(&mut self, new_size: usize) -> Result<()> { + let new_blocks = if self.desc.type_ == FileType::Symlink && new_size <= FAST_SYMLINK_MAX_LEN + { + 0 + } else { + new_size.div_ceil(BLOCK_SIZE) as u32 + }; + let old_blocks = self.desc.blocks_count(); + + match new_blocks.cmp(&old_blocks) { + Ordering::Greater => { + // Allocate blocks + for file_bid in old_blocks..new_blocks { + debug_assert!(field::DIRECT.contains(&(file_bid as usize))); + let device_bid = self + .inode() + .fs() + .alloc_block(self.inode().block_group_idx)?; + self.desc.data[file_bid as usize] = device_bid.to_raw() as u32; + } + self.desc.blocks_count = new_blocks; + } + Ordering::Equal => (), + Ordering::Less => { + // Free blocks + for file_bid in new_blocks..old_blocks { + debug_assert!(field::DIRECT.contains(&(file_bid as usize))); + let device_bid = Bid::new(self.desc.data[file_bid as usize] as _); + self.inode().fs().free_block(device_bid)?; + } + self.desc.blocks_count = new_blocks; + } + } + + self.desc.size = new_size; + self.blocks_hole_desc.resize(new_blocks as usize); + Ok(()) + } +} + +impl InodeImpl { + pub fn new(desc: Dirty, weak_self: Weak) -> Arc { + let inner = InodeImpl_::new(desc, weak_self); + Arc::new(Self(RwMutex::new(inner))) + } + + pub fn file_size(&self) -> usize { + self.0.read().desc.size + } + + pub fn resize(&self, new_size: usize) -> Result<()> { + self.0.write().resize(new_size) + } + + pub fn file_type(&self) -> FileType { + self.0.read().desc.type_ + } + + pub fn file_perm(&self) -> FilePerm { + self.0.read().desc.perm + } + + pub fn set_file_perm(&self, perm: FilePerm) { + let mut inner = self.0.write(); + inner.desc.perm = perm; + } + + pub fn uid(&self) -> u32 { + self.0.read().desc.uid + } + + pub fn gid(&self) -> u32 { + self.0.read().desc.gid + } + + pub fn file_flags(&self) -> FileFlags { + self.0.read().desc.flags + } + + pub fn hard_links(&self) -> u16 { + self.0.read().desc.hard_links + } + + pub fn inc_hard_links(&self) { + let mut inner = self.0.write(); + inner.desc.hard_links += 1; + } + + pub fn dec_hard_links(&self) { + let mut inner = self.0.write(); + debug_assert!(inner.desc.hard_links > 0); + inner.desc.hard_links -= 1; + } + + pub fn blocks_count(&self) -> u32 { + self.0.read().desc.blocks_count() + } + + pub fn acl(&self) -> Option { + self.0.read().desc.acl + } + + pub fn atime(&self) -> Duration { + self.0.read().desc.atime + } + + pub fn set_atime(&self, time: Duration) { + let mut inner = self.0.write(); + inner.desc.atime = time; + } + + pub fn mtime(&self) -> Duration { + self.0.read().desc.mtime + } + + pub fn set_mtime(&self, time: Duration) { + let mut inner = self.0.write(); + inner.desc.mtime = time; + } + + pub fn ctime(&self) -> Duration { + self.0.read().desc.ctime + } + + pub fn read_block(&self, bid: Bid, block: &VmFrame) -> Result<()> { + self.0.read().read_block(bid, block) + } + + pub fn write_block(&self, bid: Bid, block: &VmFrame) -> Result<()> { + let inner = self.0.read(); + inner.write_block(bid, block)?; + + let bid = bid.to_raw() as usize; + if inner.blocks_hole_desc.is_hole(bid) { + drop(inner); + let mut inner = self.0.write(); + if bid < inner.blocks_hole_desc.size() && inner.blocks_hole_desc.is_hole(bid) { + inner.blocks_hole_desc.unset(bid); + } + } + Ok(()) + } + + pub fn set_device_id(&self, device_id: u64) { + self.0.write().desc.data.as_bytes_mut()[..core::mem::size_of::()] + .copy_from_slice(device_id.as_bytes()); + } + + pub fn device_id(&self) -> u64 { + let mut device_id: u64 = 0; + device_id + .as_bytes_mut() + .copy_from_slice(&self.0.read().desc.data.as_bytes()[..core::mem::size_of::()]); + device_id + } + + pub fn write_link(&self, target: &str) -> Result<()> { + let mut inner = self.0.write(); + inner.desc.data.as_bytes_mut()[..target.len()].copy_from_slice(target.as_bytes()); + if inner.desc.size != target.len() { + inner.resize(target.len())?; + } + Ok(()) + } + + pub fn read_link(&self) -> Result { + let inner = self.0.read(); + let mut symlink = vec![0u8; inner.desc.size]; + symlink.copy_from_slice(&inner.desc.data.as_bytes()[..inner.desc.size]); + Ok(String::from_utf8(symlink)?) + } + + pub fn sync_data_holes(&self) -> Result<()> { + let mut inner = self.0.write(); + let zero_frame = VmAllocOptions::new(1).alloc_single().unwrap(); + for bid in 0..inner.desc.blocks_count() { + if inner.blocks_hole_desc.is_hole(bid as usize) { + inner.write_block(Bid::new(bid as _), &zero_frame)?; + inner.blocks_hole_desc.unset(bid as usize); + } + } + Ok(()) + } + + pub fn sync_metadata(&self) -> Result<()> { + if !self.0.read().desc.is_dirty() { + return Ok(()); + } + + let mut inner = self.0.write(); + if !inner.desc.is_dirty() { + return Ok(()); + } + + let inode = inner.inode(); + if inner.desc.hard_links == 0 { + inner.resize(0)?; + // Adds the check here to prevent double-free. + if !inner.is_freed { + inode + .fs() + .free_inode(inode.ino(), inner.desc.type_ == FileType::Dir)?; + inner.is_freed = true; + } + } + + inode.fs().sync_inode(inode.ino(), &inner.desc)?; + inner.desc.clear_dirty(); + Ok(()) + } +} + +impl PageCacheBackend for InodeImpl { + fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { + let bid = Bid::new(idx as _); + self.read_block(bid, frame)?; + Ok(()) + } + + fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { + let bid = Bid::new(idx as _); + self.write_block(bid, frame)?; + Ok(()) + } + + fn npages(&self) -> usize { + self.blocks_count() as _ + } +} + +/// The in-memory rust inode descriptor. +/// +/// It represents a file, directory, symbolic link, etc. +/// It contains pointers to the filesystem blocks which contain the data held in the +/// object and all of the metadata about an object except its name. +/// +/// Each block group has an inode table it is responsible for. +#[derive(Clone, Copy, Debug)] +pub(super) struct InodeDesc { + /// Type. + type_: FileType, + /// Permission. + perm: FilePerm, + /// User Id. + uid: u32, + /// Group Id. + gid: u32, + /// Size in bytes. + size: usize, + /// Access time. + atime: Duration, + /// Creation time. + ctime: Duration, + /// Modification time. + mtime: Duration, + /// Deletion time. + dtime: Duration, + /// Hard links count. + hard_links: u16, + /// Number of blocks. + blocks_count: u32, + /// File flags. + flags: FileFlags, + /// Pointers to blocks. + data: [u32; BLOCK_PTR_CNT], + /// File or directory acl block. + acl: Option, +} + +impl TryFrom for InodeDesc { + type Error = crate::error::Error; + + fn try_from(inode: RawInode) -> Result { + let file_type = FileType::from_raw_mode(inode.mode)?; + Ok(Self { + type_: file_type, + perm: FilePerm::from_raw_mode(inode.mode)?, + uid: (inode.os_dependent_2.uid_high as u32) << 16 | inode.uid as u32, + gid: (inode.os_dependent_2.gid_high as u32) << 16 | inode.gid as u32, + size: if file_type == FileType::File { + (inode.size_high as usize) << 32 | inode.size_low as usize + } else { + inode.size_low as usize + }, + atime: Duration::from(inode.atime), + ctime: Duration::from(inode.ctime), + mtime: Duration::from(inode.mtime), + dtime: Duration::from(inode.dtime), + hard_links: inode.hard_links, + blocks_count: inode.blocks_count, + flags: FileFlags::from_bits(inode.flags) + .ok_or(Error::with_message(Errno::EINVAL, "invalid file flags"))?, + data: inode.data, + acl: match file_type { + FileType::File => Some(Bid::new(inode.file_acl as _)), + FileType::Dir => Some(Bid::new(inode.size_high as _)), + _ => None, + }, + }) + } +} + +impl InodeDesc { + pub fn new(type_: FileType, perm: FilePerm) -> Dirty { + Dirty::new_dirty(Self { + type_, + perm, + uid: 0, + gid: 0, + size: 0, + atime: Duration::ZERO, + ctime: Duration::ZERO, + mtime: Duration::ZERO, + dtime: Duration::ZERO, + hard_links: 1, + blocks_count: 0, + flags: FileFlags::empty(), + data: [0; BLOCK_PTR_CNT], + acl: match type_ { + FileType::File | FileType::Dir => Some(Bid::new(0)), + _ => None, + }, + }) + } + + pub fn num_page_bytes(&self) -> usize { + (self.blocks_count() as usize) * BLOCK_SIZE + } + + pub fn blocks_count(&self) -> u32 { + if self.type_ == FileType::Dir { + let real_blocks = (self.size / BLOCK_SIZE) as u32; + assert!(real_blocks <= self.blocks_count); + return real_blocks; + } + self.blocks_count + } +} + +#[repr(u16)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +pub enum FileType { + /// FIFO special file + Fifo = 0o010000, + /// Character device + Char = 0o020000, + /// Directory + Dir = 0o040000, + /// Block device + Block = 0o060000, + /// Regular file + File = 0o100000, + /// Symbolic link + Symlink = 0o120000, + /// Socket + Socket = 0o140000, +} + +impl FileType { + pub fn from_raw_mode(mode: u16) -> Result { + const TYPE_MASK: u16 = 0o170000; + Self::try_from(mode & TYPE_MASK) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid file type")) + } +} + +bitflags! { + pub struct FilePerm: u16 { + /// set-user-ID + const S_ISUID = 0o4000; + /// set-group-ID + const S_ISGID = 0o2000; + /// sticky bit + const S_ISVTX = 0o1000; + /// read by owner + const S_IRUSR = 0o0400; + /// write by owner + const S_IWUSR = 0o0200; + /// execute/search by owner + const S_IXUSR = 0o0100; + /// read by group + const S_IRGRP = 0o0040; + /// write by group + const S_IWGRP = 0o0020; + /// execute/search by group + const S_IXGRP = 0o0010; + /// read by others + const S_IROTH = 0o0004; + /// write by others + const S_IWOTH = 0o0002; + /// execute/search by others + const S_IXOTH = 0o0001; + } +} + +impl FilePerm { + pub fn from_raw_mode(mode: u16) -> Result { + const PERM_MASK: u16 = 0o7777; + Self::from_bits(mode & PERM_MASK) + .ok_or(Error::with_message(Errno::EINVAL, "invalid file perm")) + } +} + +bitflags! { + pub struct FileFlags: u32 { + /// Secure deletion. + const SECURE_DEL = 1 << 0; + /// Undelete. + const UNDELETE = 1 << 1; + /// Compress file. + const COMPRESS = 1 << 2; + /// Synchronous updates. + const SYNC_UPDATE = 1 << 3; + /// Immutable file. + const IMMUTABLE = 1 << 4; + /// Append only. + const APPEND_ONLY = 1 << 5; + /// Do not dump file. + const NO_DUMP = 1 << 6; + /// Do not update atime. + const NO_ATIME = 1 << 7; + /// Dirty. + const DIRTY = 1 << 8; + /// One or more compressed clusters. + const COMPRESS_BLK = 1 << 9; + /// Do not compress. + const NO_COMPRESS = 1 << 10; + /// Encrypted file. + const ENCRYPT = 1 << 11; + /// Hash-indexed directory. + const INDEX_DIR = 1 << 12; + /// AFS directory. + const IMAGIC = 1 << 13; + /// Journal file data. + const JOURNAL_DATA = 1 << 14; + /// File tail should not be merged. + const NO_TAIL = 1 << 15; + /// Dirsync behaviour (directories only). + const DIR_SYNC = 1 << 16; + /// Top of directory hierarchies. + const TOP_DIR = 1 << 17; + /// Reserved for ext2 lib. + const RESERVED = 1 << 31; + } +} + +const_assert!(core::mem::size_of::() == 128); + +/// The raw inode on device. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug, Pod)] +pub(super) struct RawInode { + /// File mode (type and permissions). + pub mode: u16, + /// Low 16 bits of User Id. + pub uid: u16, + /// Lower 32 bits of size in bytes. + pub size_low: u32, + /// Access time. + pub atime: UnixTime, + /// Creation time. + pub ctime: UnixTime, + /// Modification time. + pub mtime: UnixTime, + /// Deletion time. + pub dtime: UnixTime, + /// Low 16 bits of Group Id. + pub gid: u16, + pub hard_links: u16, + pub blocks_count: u32, + /// File flags. + pub flags: u32, + /// OS dependent Value 1. + reserved1: u32, + pub data: [u32; BLOCK_PTR_CNT], + /// File version (for NFS). + pub generation: u32, + /// In revision 0, this field is reserved. + /// In revision 1, File ACL. + pub file_acl: u32, + /// In revision 0, this field is reserved. + /// In revision 1, Upper 32 bits of file size (if feature bit set) + /// if it's a file, Directory ACL if it's a directory. + pub size_high: u32, + /// Fragment address. + pub frag_addr: u32, + /// OS dependent 2. + pub os_dependent_2: Osd2, +} + +impl From<&InodeDesc> for RawInode { + fn from(inode: &InodeDesc) -> Self { + Self { + mode: inode.type_ as u16 | inode.perm.bits(), + uid: inode.uid as u16, + size_low: inode.size as u32, + atime: UnixTime::from(inode.atime), + ctime: UnixTime::from(inode.ctime), + mtime: UnixTime::from(inode.mtime), + dtime: UnixTime::from(inode.dtime), + gid: inode.gid as u16, + hard_links: inode.hard_links, + blocks_count: inode.blocks_count, + flags: inode.flags.bits(), + data: inode.data, + file_acl: match inode.acl { + Some(acl) if inode.type_ == FileType::File => acl.to_raw() as u32, + _ => Default::default(), + }, + size_high: match inode.acl { + Some(acl) if inode.type_ == FileType::Dir => acl.to_raw() as u32, + _ => Default::default(), + }, + os_dependent_2: Osd2 { + uid_high: (inode.uid >> 16) as u16, + gid_high: (inode.gid >> 16) as u16, + ..Default::default() + }, + ..Default::default() + } + } +} + +/// OS dependent Value 2 +#[repr(C)] +#[derive(Clone, Copy, Default, Debug, Pod)] +pub(super) struct Osd2 { + /// Fragment number. + pub frag_num: u8, + /// Fragment size. + pub frag_size: u8, + pad1: u16, + /// High 16 bits of User Id. + pub uid_high: u16, + /// High 16 bits of Group Id. + pub gid_high: u16, + reserved2: u32, +} + +fn is_block_aligned(offset: usize) -> bool { + offset % BLOCK_SIZE == 0 +} diff --git a/services/libs/aster-std/src/fs/ext2/mod.rs b/services/libs/aster-std/src/fs/ext2/mod.rs new file mode 100644 index 000000000..ff7774871 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/mod.rs @@ -0,0 +1,50 @@ +//! A safe Rust Ext2 filesystem. +//! +//! The Second Extended File System(Ext2) is a major rewrite of the Ext filesystem. +//! It is the predominant filesystem in use by Linux from the early 1990s to the early 2000s. +//! The structures of Ext3 and Ext4 are based on Ext2 and add some additional options +//! such as journaling. +//! +//! The features of this version of Ext2 are as follows: +//! 1. No unsafe Rust. The filesystem is written is Rust without any unsafe code, +//! ensuring that there are no memory safety issues in the code. +//! 2. Deep integration with PageCache. The data and metadata of the filesystem are +//! stored in PageCache, which accelerates the performance of data access. +//! 3. Compatible with queue-based block device. The filesystem can submits multiple +//! BIO requests to be block device at once, thereby enhancing I/O performance. +//! +//! # Example +//! +//! ```no_run +//! // Opens an Ext2 from the block device. +//! let ext2 = Ext2::open(block_device)?; +//! // Lookup the root inode. +//! let root = ext2.root_inode()?; +//! // Create a file inside root directory. +//! let file = root.create("file", FileType::File, FilePerm::from_bits_truncate(0o666))?; +//! // Write data into the file. +//! const WRITE_DATA: &[u8] = b"Hello, World"; +//! let len = file.write_at(0, WRITE_DATA)?; +//! assert!(len == WRITE_DATA.len()); +//! ``` +//! +//! # Limitation +//! +//! Here we summarizes the features that need to be implemented in the future. +//! 1. Supports large file. +//! 2. Supports merging small read/write operations. +//! 3. Handles the intermediate failure status correctly. + +pub use fs::Ext2; +pub use inode::{FilePerm, FileType, Inode}; +pub use super_block::{SuperBlock, MAGIC_NUM}; + +mod block_group; +mod blocks_hole; +mod dir; +mod fs; +mod impl_for_vfs; +mod inode; +mod prelude; +mod super_block; +mod utils; diff --git a/services/libs/aster-std/src/fs/ext2/prelude.rs b/services/libs/aster-std/src/fs/ext2/prelude.rs new file mode 100644 index 000000000..6fc487b7a --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/prelude.rs @@ -0,0 +1,23 @@ +pub(super) use super::utils::{Dirty, IsPowerOf}; + +pub(super) use crate::fs::utils::{ + CStr256, DirentVisitor, InodeType, PageCache, PageCacheBackend, Str16, Str64, +}; +pub(super) use crate::prelude::*; +pub(super) use crate::time::UnixTime; +pub(super) use crate::vm::vmo::Vmo; + +pub(super) use align_ext::AlignExt; +pub(super) use aster_block::{ + bio::{BioStatus, BioWaiter}, + id::Bid, + BlockDevice, BLOCK_SIZE, +}; +pub(super) use aster_frame::sync::{RwMutex, RwMutexReadGuard}; +pub(super) use aster_frame::vm::VmAllocOptions; +pub(super) use aster_frame::vm::VmIo; +pub(super) use aster_frame::vm::{VmFrame, VmSegment}; +pub(super) use aster_rights::Full; +pub(super) use core::ops::{Deref, DerefMut}; +pub(super) use core::time::Duration; +pub(super) use static_assertions::const_assert; diff --git a/services/libs/aster-std/src/fs/ext2/super_block.rs b/services/libs/aster-std/src/fs/ext2/super_block.rs new file mode 100644 index 000000000..631b96ee0 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/super_block.rs @@ -0,0 +1,542 @@ +use super::inode::RawInode; +use super::prelude::*; + +/// The magic number of Ext2. +pub const MAGIC_NUM: u16 = 0xef53; + +/// The main superblock is located at byte 1024 from the beginning of the device. +pub const SUPER_BLOCK_OFFSET: usize = 1024; + +const SUPER_BLOCK_SIZE: usize = 1024; + +/// The in-memory rust superblock. +/// +/// It contains all information about the layout of the Ext2. +#[derive(Clone, Copy, Debug)] +pub struct SuperBlock { + /// Total number of inodes. + inodes_count: u32, + /// Total number of blocks. + blocks_count: u32, + /// Total number of reserved blocks. + reserved_blocks_count: u32, + /// Total number of free blocks. + free_blocks_count: u32, + /// Total number of free inodes. + free_inodes_count: u32, + /// First data block. + first_data_block: Bid, + /// Block size. + block_size: usize, + /// Fragment size. + frag_size: usize, + /// Number of blocks in each block group. + blocks_per_group: u32, + /// Number of fragments in each block group. + frags_per_group: u32, + /// Number of inodes in each block group. + inodes_per_group: u32, + /// Mount time. + mtime: UnixTime, + /// Write time. + wtime: UnixTime, + /// Mount count. + mnt_count: u16, + /// Maximal mount count. + max_mnt_count: u16, + /// Magic signature. + magic: u16, + /// Filesystem state. + state: FsState, + /// Behaviour when detecting errors. + errors_behaviour: ErrorsBehaviour, + /// Time of last check. + last_check_time: UnixTime, + /// Interval between checks. + check_interval: Duration, + /// Creator OS ID. + creator_os: OsId, + /// Revision level. + rev_level: RevLevel, + /// Default uid for reserved blocks. + def_resuid: u32, + /// Default gid for reserved blocks. + def_resgid: u32, + // + // These fields are valid for RevLevel::Dynamic only. + // + /// First non-reserved inode number. + first_ino: u32, + /// Size of inode structure. + inode_size: usize, + /// Block group that this superblock is part of (if backup copy). + block_group_idx: usize, + /// Compatible feature set. + feature_compat: FeatureCompatSet, + /// Incompatible feature set. + feature_incompat: FeatureInCompatSet, + /// Readonly-compatible feature set. + feature_ro_compat: FeatureRoCompatSet, + /// 128-bit uuid for volume. + uuid: [u8; 16], + /// Volume name. + volume_name: Str16, + /// Directory where last mounted. + last_mounted_dir: Str64, + /// + /// This fields are valid if the FeatureCompatSet::DIR_PREALLOC is set. + /// + /// Number of blocks to preallocate for files. + prealloc_file_blocks: u8, + /// Number of blocks to preallocate for directories. + prealloc_dir_blocks: u8, +} + +impl TryFrom for SuperBlock { + type Error = crate::error::Error; + + fn try_from(sb: RawSuperBlock) -> Result { + Ok(Self { + inodes_count: sb.inodes_count, + blocks_count: sb.blocks_count, + reserved_blocks_count: sb.reserved_blocks_count, + free_blocks_count: sb.free_blocks_count, + free_inodes_count: sb.free_inodes_count, + first_data_block: Bid::new(sb.first_data_block as _), + block_size: 1024 << sb.log_block_size, + frag_size: 1024 << sb.log_frag_size, + blocks_per_group: sb.blocks_per_group, + frags_per_group: sb.frags_per_group, + inodes_per_group: sb.inodes_per_group, + mtime: sb.mtime, + wtime: sb.wtime, + mnt_count: sb.mnt_count, + max_mnt_count: sb.max_mnt_count, + magic: { + if sb.magic != MAGIC_NUM { + return_errno_with_message!(Errno::EINVAL, "bad ext2 magic number"); + } + MAGIC_NUM + }, + state: { + let state = FsState::try_from(sb.state) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid fs state"))?; + if state == FsState::Corrupted { + return_errno_with_message!(Errno::EUCLEAN, "fs is corrupted"); + } + state + }, + errors_behaviour: ErrorsBehaviour::try_from(sb.errors) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid errors behaviour"))?, + last_check_time: sb.last_check_time, + check_interval: Duration::from_secs(sb.check_interval as _), + creator_os: { + let os_id = OsId::try_from(sb.creator_os) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid creater os"))?; + if os_id != OsId::Linux { + return_errno_with_message!(Errno::EINVAL, "not supported os id"); + } + OsId::Linux + }, + rev_level: { + let rev_level = RevLevel::try_from(sb.rev_level) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid revision level"))?; + if rev_level != RevLevel::Dynamic { + return_errno_with_message!(Errno::EINVAL, "not supported rev level"); + } + RevLevel::Dynamic + }, + def_resuid: sb.def_resuid as _, + def_resgid: sb.def_resgid as _, + first_ino: sb.first_ino, + inode_size: { + let inode_size = sb.inode_size as _; + if inode_size < core::mem::size_of::() { + return_errno_with_message!(Errno::EINVAL, "inode size is too small"); + } + inode_size + }, + block_group_idx: sb.block_group_idx as _, + feature_compat: FeatureCompatSet::from_bits(sb.feature_compat).ok_or( + Error::with_message(Errno::EINVAL, "invalid feature compat set"), + )?, + feature_incompat: FeatureInCompatSet::from_bits(sb.feature_incompat).ok_or( + Error::with_message(Errno::EINVAL, "invalid feature incompat set"), + )?, + feature_ro_compat: FeatureRoCompatSet::from_bits(sb.feature_ro_compat).ok_or( + Error::with_message(Errno::EINVAL, "invalid feature ro compat set"), + )?, + uuid: sb.uuid, + volume_name: sb.volume_name, + last_mounted_dir: sb.last_mounted_dir, + prealloc_file_blocks: sb.prealloc_file_blocks, + prealloc_dir_blocks: sb.prealloc_dir_blocks, + }) + } +} + +impl SuperBlock { + /// Returns the block size. + pub fn block_size(&self) -> usize { + self.block_size + } + + /// Returns the size of inode structure. + pub fn inode_size(&self) -> usize { + self.inode_size + } + + /// Returns the fragment size. + pub fn fragment_size(&self) -> usize { + self.frag_size + } + + /// Returns total number of inodes. + pub fn total_inodes(&self) -> u32 { + self.inodes_count + } + + /// Returns total number of blocks. + pub fn total_blocks(&self) -> u32 { + self.blocks_count + } + + /// Returns the number of blocks in each block group. + pub fn blocks_per_group(&self) -> u32 { + self.blocks_per_group + } + + /// Returns the number of inodes in each block group. + pub fn inodes_per_group(&self) -> u32 { + self.inodes_per_group + } + + /// Returns the number of block groups. + pub fn block_groups_count(&self) -> u32 { + self.blocks_count / self.blocks_per_group + } + + /// Returns the filesystem state. + pub fn state(&self) -> FsState { + self.state + } + + /// Returns the revision level. + pub fn rev_level(&self) -> RevLevel { + self.rev_level + } + + /// Returns the compatible feature set. + pub fn feature_compat(&self) -> FeatureCompatSet { + self.feature_compat + } + + /// Returns the incompatible feature set. + pub fn feature_incompat(&self) -> FeatureInCompatSet { + self.feature_incompat + } + + /// Returns the readonly-compatible feature set. + pub fn feature_ro_compat(&self) -> FeatureRoCompatSet { + self.feature_ro_compat + } + + /// Returns the number of free blocks. + pub fn free_blocks(&self) -> u32 { + self.free_blocks_count + } + + /// Increase the number of free blocks. + pub(super) fn inc_free_blocks(&mut self) { + self.free_blocks_count += 1; + } + + /// Decrease the number of free blocks. + pub(super) fn dec_free_blocks(&mut self) { + debug_assert!(self.free_blocks_count > 0); + self.free_blocks_count -= 1; + } + + /// Returns the number of free inodes. + pub fn free_inodes(&self) -> u32 { + self.free_inodes_count + } + + /// Increase the number of free inodes. + pub(super) fn inc_free_inodes(&mut self) { + self.free_inodes_count += 1; + } + + /// Decrease the number of free inodes. + pub(super) fn dec_free_inodes(&mut self) { + debug_assert!(self.free_inodes_count > 0); + self.free_inodes_count -= 1; + } + + /// Checks if the block group will backup the super block. + pub(super) fn is_backup_group(&self, block_group_idx: usize) -> bool { + if block_group_idx == 0 { + false + } else if self + .feature_ro_compat + .contains(FeatureRoCompatSet::SPARSE_SUPER) + { + // The backup groups chosen are 1 and powers of 3, 5 and 7. + block_group_idx == 1 + || block_group_idx.is_power_of(3) + || block_group_idx.is_power_of(5) + || block_group_idx.is_power_of(7) + } else { + true + } + } + + /// Returns the starting block id of the super block + /// inside the block group pointed by `block_group_idx`. + /// + /// # Panic + /// + /// If `block_group_idx` is neither 0 nor a backup block group index, + /// then the method panics. + pub(super) fn bid(&self, block_group_idx: usize) -> Bid { + if block_group_idx == 0 { + let bid = (SUPER_BLOCK_OFFSET / self.block_size) as u64; + return Bid::new(bid); + } + + assert!(self.is_backup_group(block_group_idx)); + let super_block_bid = block_group_idx * (self.blocks_per_group as usize); + Bid::new(super_block_bid as u64) + } + + /// Returns the starting block id of the block group descripter table + /// inside the block group pointed by `block_group_idx`. + /// + /// # Panic + /// + /// If `block_group_idx` is neither 0 nor a backup block group index, + /// then the method panics. + pub(super) fn group_descriptors_bid(&self, block_group_idx: usize) -> Bid { + let super_block_bid = self.bid(block_group_idx); + super_block_bid + (SUPER_BLOCK_SIZE.div_ceil(self.block_size) as u64) + } +} + +bitflags! { + /// Compatible feature set. + pub struct FeatureCompatSet: u32 { + /// Preallocate some number of blocks to a directory when creating a new one + const DIR_PREALLOC = 1 << 0; + /// AFS server inodes exist + const IMAGIC_INODES = 1 << 1; + /// File system has a journal + const HAS_JOURNAL = 1 << 2; + /// Inodes have extended attributes + const EXT_ATTR = 1 << 3; + /// File system can resize itself for larger partitions + const RESIZE_INO = 1 << 4; + /// Directories use hash index + const DIR_INDEX = 1 << 5; + } +} + +bitflags! { + /// Incompatible feature set. + pub struct FeatureInCompatSet: u32 { + /// Compression is used + const COMPRESSION = 1 << 0; + /// Directory entries contain a type field + const FILETYPE = 1 << 1; + /// File system needs to replay its journal + const RECOVER = 1 << 2; + /// File system uses a journal device + const JOURNAL_DEV = 1 << 3; + /// Metablock block group + const META_BG = 1 << 4; + } +} + +bitflags! { + /// Readonly-compatible feature set. + pub struct FeatureRoCompatSet: u32 { + /// Sparse superblocks and group descriptor tables + const SPARSE_SUPER = 1 << 0; + /// File system uses a 64-bit file size + const LARGE_FILE = 1 << 1; + /// Directory contents are stored in the form of a Binary Tree + const BTREE_DIR = 1 << 2; + } +} + +#[repr(u16)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +pub enum FsState { + /// Unmounted cleanly + Valid = 1, + /// Errors detected + Err = 2, + /// Filesystem is corrupted (EUCLEAN) + Corrupted = 117, +} + +#[repr(u16)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +pub enum ErrorsBehaviour { + /// Continue execution + Continue = 1, + // Remount fs read-only + RemountReadonly = 2, + // Should panic + Panic = 3, +} + +impl Default for ErrorsBehaviour { + fn default() -> Self { + Self::Continue + } +} + +#[repr(u32)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +pub enum OsId { + Linux = 0, + Hurd = 1, + Masix = 2, + FreeBSD = 3, + Lites = 4, +} + +#[repr(u32)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, TryFromInt)] +pub enum RevLevel { + /// The good old (original) format. + GoodOld = 0, + /// V2 format with dynamic inode size. + Dynamic = 1, +} + +const_assert!(core::mem::size_of::() == SUPER_BLOCK_SIZE); + +/// The raw superblock, it must be exactly 1024 bytes in length. +#[repr(C)] +#[derive(Clone, Copy, Debug, Pod, Default)] +pub(super) struct RawSuperBlock { + pub inodes_count: u32, + pub blocks_count: u32, + pub reserved_blocks_count: u32, + pub free_blocks_count: u32, + pub free_inodes_count: u32, + pub first_data_block: u32, + /// The number to left-shift 1024 to obtain the block size. + pub log_block_size: u32, + /// The number to left-shift 1024 to obtain the fragment size. + pub log_frag_size: u32, + pub blocks_per_group: u32, + pub frags_per_group: u32, + pub inodes_per_group: u32, + /// Mount time. + pub mtime: UnixTime, + /// Write time. + pub wtime: UnixTime, + pub mnt_count: u16, + pub max_mnt_count: u16, + pub magic: u16, + pub state: u16, + pub errors: u16, + pub min_rev_level: u16, + /// Time of last check. + pub last_check_time: UnixTime, + pub check_interval: u32, + pub creator_os: u32, + pub rev_level: u32, + pub def_resuid: u16, + pub def_resgid: u16, + pub first_ino: u32, + pub inode_size: u16, + pub block_group_idx: u16, + pub feature_compat: u32, + pub feature_incompat: u32, + pub feature_ro_compat: u32, + pub uuid: [u8; 16], + pub volume_name: Str16, + pub last_mounted_dir: Str64, + pub algorithm_usage_bitmap: u32, + pub prealloc_file_blocks: u8, + pub prealloc_dir_blocks: u8, + padding1: u16, + /// + /// This fileds are for journaling support in Ext3. + /// + /// Uuid of journal superblock. + pub journal_uuid: [u8; 16], + /// Inode number of journal file. + pub journal_ino: u32, + /// Device number of journal file. + pub journal_dev: u32, + /// Start of list of inodes to delete. + pub last_orphan: u32, + /// HTREE hash seed. + pub hash_seed: [u32; 4], + /// Default hash version to use + pub def_hash_version: u8, + reserved_char_pad: u8, + reserved_word_pad: u16, + /// Default mount options. + pub default_mount_opts: u32, + /// First metablock block group. + pub first_meta_bg: u32, + reserved: Reserved, +} + +impl From<&SuperBlock> for RawSuperBlock { + fn from(sb: &SuperBlock) -> Self { + Self { + inodes_count: sb.inodes_count, + blocks_count: sb.blocks_count, + reserved_blocks_count: sb.reserved_blocks_count, + free_blocks_count: sb.free_blocks_count, + free_inodes_count: sb.free_inodes_count, + first_data_block: sb.first_data_block.to_raw() as u32, + log_block_size: (sb.block_size >> 11) as u32, + log_frag_size: (sb.frag_size >> 11) as u32, + blocks_per_group: sb.blocks_per_group, + frags_per_group: sb.frags_per_group, + inodes_per_group: sb.inodes_per_group, + mtime: sb.mtime, + wtime: sb.wtime, + mnt_count: sb.mnt_count, + max_mnt_count: sb.max_mnt_count, + magic: sb.magic, + state: sb.state as u16, + errors: sb.errors_behaviour as u16, + last_check_time: sb.last_check_time, + check_interval: sb.check_interval.as_secs() as u32, + creator_os: sb.creator_os as u32, + rev_level: sb.rev_level as u32, + def_resuid: sb.def_resuid as u16, + def_resgid: sb.def_resgid as u16, + first_ino: sb.first_ino, + inode_size: sb.inode_size as u16, + block_group_idx: sb.block_group_idx as u16, + feature_compat: sb.feature_compat.bits(), + feature_incompat: sb.feature_incompat.bits(), + feature_ro_compat: sb.feature_ro_compat.bits(), + uuid: sb.uuid, + volume_name: sb.volume_name, + last_mounted_dir: sb.last_mounted_dir, + prealloc_file_blocks: sb.prealloc_file_blocks, + prealloc_dir_blocks: sb.prealloc_dir_blocks, + ..Default::default() + } + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Pod)] +struct Reserved([u32; 190]); + +impl Default for Reserved { + fn default() -> Self { + Self([0u32; 190]) + } +} diff --git a/services/libs/aster-std/src/fs/ext2/utils.rs b/services/libs/aster-std/src/fs/ext2/utils.rs new file mode 100644 index 000000000..8d2870319 --- /dev/null +++ b/services/libs/aster-std/src/fs/ext2/utils.rs @@ -0,0 +1,93 @@ +use super::prelude::*; + +use core::ops::MulAssign; + +pub trait IsPowerOf: Copy + Sized + MulAssign + PartialOrd { + /// Returns true if and only if `self == x^k` for some `k` where `k > 0`. + /// + /// The `x` must be a positive value. + fn is_power_of(&self, x: Self) -> bool { + let mut power = x; + while power < *self { + power *= x; + } + + power == *self + } +} + +macro_rules! impl_ipo_for { + ($($ipo_ty:ty),*) => { + $(impl IsPowerOf for $ipo_ty {})* + }; +} + +impl_ipo_for!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, isize, usize); + +/// The `Dirty` wraps a value of type `T` with functions similar to that of a rw-lock, +/// but simply sets a dirty flag on `write()`. +pub struct Dirty { + value: T, + dirty: bool, +} + +impl Dirty { + /// Creates a new Dirty without setting the dirty flag. + pub fn new(val: T) -> Dirty { + Dirty { + value: val, + dirty: false, + } + } + + /// Creates a new Dirty with setting the dirty flag. + pub fn new_dirty(val: T) -> Dirty { + Dirty { + value: val, + dirty: true, + } + } + + /// Returns true if dirty, false otherwise. + pub fn is_dirty(&self) -> bool { + self.dirty + } + + /// Clears the dirty flag. + pub fn clear_dirty(&mut self) { + self.dirty = false; + } +} + +impl Deref for Dirty { + type Target = T; + + /// Returns the imutable value. + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for Dirty { + /// Returns the mutable value, sets the dirty flag. + fn deref_mut(&mut self) -> &mut T { + self.dirty = true; + &mut self.value + } +} + +impl Drop for Dirty { + /// Guards if it is dirty when dropping. + fn drop(&mut self) { + if self.is_dirty() { + warn!("[{:?}] is dirty then dropping", self.value); + } + } +} + +impl Debug for Dirty { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let tag = if self.dirty { "Dirty" } else { "Clean" }; + write!(f, "[{}] {:?}", tag, self.value) + } +} diff --git a/services/libs/aster-std/src/fs/mod.rs b/services/libs/aster-std/src/fs/mod.rs index 973229eca..644e8d170 100644 --- a/services/libs/aster-std/src/fs/mod.rs +++ b/services/libs/aster-std/src/fs/mod.rs @@ -1,6 +1,7 @@ pub mod device; pub mod devpts; pub mod epoll; +pub mod ext2; pub mod file_handle; pub mod file_table; pub mod fs_resolver; @@ -10,3 +11,28 @@ pub mod procfs; pub mod ramfs; pub mod rootfs; pub mod utils; + +use crate::fs::{ext2::Ext2, fs_resolver::FsPath}; +use crate::prelude::*; +use crate::thread::kernel_thread::KernelThreadExt; +use aster_virtio::device::block::device::BlockDevice as VirtIoBlockDevice; +use aster_virtio::device::block::DEVICE_NAME as VIRTIO_BLOCK_NAME; + +pub fn lazy_init() { + let block_device = aster_block::get_device(VIRTIO_BLOCK_NAME).unwrap(); + let cloned_block_device = block_device.clone(); + + let task_fn = move || { + info!("spawn the virt-io-block thread"); + let virtio_block_device = block_device.downcast_ref::().unwrap(); + loop { + virtio_block_device.handle_requests(); + } + }; + crate::Thread::spawn_kernel_thread(crate::ThreadOptions::new(task_fn)); + + let ext2_fs = Ext2::open(cloned_block_device).unwrap(); + let target_path = FsPath::try_from("/ext2").unwrap(); + println!("[kernel] Mount Ext2 fs at {:?} ", target_path); + self::rootfs::mount_fs_at(ext2_fs, &target_path).unwrap(); +} diff --git a/services/libs/aster-std/src/fs/procfs/template/dir.rs b/services/libs/aster-std/src/fs/procfs/template/dir.rs index f14d38a14..251c62312 100644 --- a/services/libs/aster-std/src/fs/procfs/template/dir.rs +++ b/services/libs/aster-std/src/fs/procfs/template/dir.rs @@ -58,12 +58,18 @@ impl Inode for ProcDir { self.info.size() } - fn resize(&self, _new_size: usize) {} + fn resize(&self, _new_size: usize) -> Result<()> { + Err(Error::new(Errno::EISDIR)) + } fn metadata(&self) -> Metadata { self.info.metadata() } + fn ino(&self) -> u64 { + self.info.ino() + } + fn type_(&self) -> InodeType { InodeType::Dir } diff --git a/services/libs/aster-std/src/fs/procfs/template/file.rs b/services/libs/aster-std/src/fs/procfs/template/file.rs index f583d9d69..ad810575c 100644 --- a/services/libs/aster-std/src/fs/procfs/template/file.rs +++ b/services/libs/aster-std/src/fs/procfs/template/file.rs @@ -1,4 +1,3 @@ -use aster_frame::vm::VmFrame; use core::time::Duration; use crate::fs::utils::{FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata}; @@ -31,12 +30,18 @@ impl Inode for ProcFile { self.info.size() } - fn resize(&self, _new_size: usize) {} + fn resize(&self, _new_size: usize) -> Result<()> { + Err(Error::new(Errno::EPERM)) + } fn metadata(&self) -> Metadata { self.info.metadata() } + fn ino(&self) -> u64 { + self.info.ino() + } + fn type_(&self) -> InodeType { InodeType::File } @@ -65,14 +70,6 @@ impl Inode for ProcFile { self.info.set_mtime(time) } - fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> { - unreachable!() - } - - fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> { - unreachable!() - } - fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result { let data = self.inner.data()?; let start = data.len().min(offset); diff --git a/services/libs/aster-std/src/fs/procfs/template/mod.rs b/services/libs/aster-std/src/fs/procfs/template/mod.rs index cc189746c..d15fa01a8 100644 --- a/services/libs/aster-std/src/fs/procfs/template/mod.rs +++ b/services/libs/aster-std/src/fs/procfs/template/mod.rs @@ -38,6 +38,10 @@ impl ProcInodeInfo { self.metadata.read().clone() } + pub fn ino(&self) -> u64 { + self.metadata.read().ino as _ + } + pub fn size(&self) -> usize { self.metadata.read().size } diff --git a/services/libs/aster-std/src/fs/procfs/template/sym.rs b/services/libs/aster-std/src/fs/procfs/template/sym.rs index 01a9867e0..289438e0a 100644 --- a/services/libs/aster-std/src/fs/procfs/template/sym.rs +++ b/services/libs/aster-std/src/fs/procfs/template/sym.rs @@ -1,4 +1,3 @@ -use aster_frame::vm::VmFrame; use core::time::Duration; use crate::fs::utils::{FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata}; @@ -31,12 +30,18 @@ impl Inode for ProcSym { self.info.size() } - fn resize(&self, _new_size: usize) {} + fn resize(&self, _new_size: usize) -> Result<()> { + Err(Error::new(Errno::EPERM)) + } fn metadata(&self) -> Metadata { self.info.metadata() } + fn ino(&self) -> u64 { + self.info.ino() + } + fn type_(&self) -> InodeType { InodeType::SymLink } @@ -65,14 +70,6 @@ impl Inode for ProcSym { self.info.set_mtime(time) } - fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> { - Err(Error::new(Errno::EPERM)) - } - - fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> { - Err(Error::new(Errno::EPERM)) - } - fn read_at(&self, _offset: usize, _buf: &mut [u8]) -> Result { Err(Error::new(Errno::EPERM)) } diff --git a/services/libs/aster-std/src/fs/ramfs/fs.rs b/services/libs/aster-std/src/fs/ramfs/fs.rs index 93467c71d..efdc601ce 100644 --- a/services/libs/aster-std/src/fs/ramfs/fs.rs +++ b/services/libs/aster-std/src/fs/ramfs/fs.rs @@ -1,6 +1,6 @@ -use alloc::str; -use aster_frame::sync::{RwLock, RwLockWriteGuard}; -use aster_frame::vm::{VmFrame, VmIo}; +use aster_frame::sync::RwLockWriteGuard; +use aster_frame::vm::VmFrame; +use aster_frame::vm::VmIo; use aster_rights::Full; use aster_util::slot_vec::SlotVec; use core::sync::atomic::{AtomicUsize, Ordering}; @@ -10,8 +10,8 @@ use super::*; use crate::events::IoEvents; use crate::fs::device::Device; use crate::fs::utils::{ - DirentVisitor, FileSystem, FsFlags, Inode, InodeMode, InodeType, IoctlCmd, Metadata, PageCache, - SuperBlock, + CStr256, DirentVisitor, FileSystem, FsFlags, Inode, InodeMode, InodeType, IoctlCmd, Metadata, + PageCache, PageCacheBackend, SuperBlock, }; use crate::prelude::*; use crate::process::signal::Poller; @@ -219,7 +219,7 @@ impl Inner { } struct DirEntry { - children: SlotVec<(Str256, Arc)>, + children: SlotVec<(CStr256, Arc)>, this: Weak, parent: Weak, } @@ -248,7 +248,7 @@ impl DirEntry { } else { self.children .iter() - .any(|(child, _)| child.as_ref() == name) + .any(|(child, _)| child.as_str().unwrap() == name) } } @@ -260,16 +260,16 @@ impl DirEntry { } else { self.children .idxes_and_items() - .find(|(_, (child, _))| child.as_ref() == name) + .find(|(_, (child, _))| child.as_str().unwrap() == name) .map(|(idx, (_, inode))| (idx + 2, inode.clone())) } } fn append_entry(&mut self, name: &str, inode: Arc) -> usize { - self.children.put((Str256::from(name), inode)) + self.children.put((CStr256::from(name), inode)) } - fn remove_entry(&mut self, idx: usize) -> Option<(Str256, Arc)> { + fn remove_entry(&mut self, idx: usize) -> Option<(CStr256, Arc)> { assert!(idx >= 2); self.children.remove(idx - 2) } @@ -277,8 +277,8 @@ impl DirEntry { fn substitute_entry( &mut self, idx: usize, - new_entry: (Str256, Arc), - ) -> Option<(Str256, Arc)> { + new_entry: (CStr256, Arc), + ) -> Option<(CStr256, Arc)> { assert!(idx >= 2); self.children.put_at(idx - 2, new_entry) } @@ -315,7 +315,7 @@ impl DirEntry { .skip_while(|(offset, _)| offset < &start_idx) { visitor.visit( - name.as_ref(), + name.as_str().unwrap(), child.metadata().ino as u64, child.metadata().type_, offset, @@ -337,36 +337,6 @@ impl DirEntry { } } -#[repr(C)] -#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Str256([u8; 256]); - -impl AsRef for Str256 { - fn as_ref(&self) -> &str { - let len = self.0.iter().enumerate().find(|(_, &b)| b == 0).unwrap().0; - str::from_utf8(&self.0[0..len]).unwrap() - } -} - -impl<'a> From<&'a str> for Str256 { - fn from(s: &'a str) -> Self { - let mut inner = [0u8; 256]; - let len = if s.len() > NAME_MAX { - NAME_MAX - } else { - s.len() - }; - inner[0..len].copy_from_slice(&s.as_bytes()[0..len]); - Self(inner) - } -} - -impl core::fmt::Debug for Str256 { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "{}", self.as_ref()) - } -} - impl RamInode { fn new_dir(fs: &Arc, mode: InodeMode, parent: &Weak) -> Arc { Arc::new_cyclic(|weak_self| { @@ -439,7 +409,7 @@ impl RamInode { } } -impl Inode for RamInode { +impl PageCacheBackend for RamInode { fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> { // do nothing Ok(()) @@ -450,6 +420,12 @@ impl Inode for RamInode { Ok(()) } + fn npages(&self) -> usize { + self.0.read().metadata.blocks + } +} + +impl Inode for RamInode { fn page_cache(&self) -> Option> { self.0 .read() @@ -515,8 +491,9 @@ impl Inode for RamInode { self.0.read().metadata.size } - fn resize(&self, new_size: usize) { - self.0.write().resize(new_size) + fn resize(&self, new_size: usize) -> Result<()> { + self.0.write().resize(new_size); + Ok(()) } fn atime(&self) -> Duration { @@ -535,6 +512,10 @@ impl Inode for RamInode { self.0.write().metadata.mtime = time; } + fn ino(&self) -> u64 { + self.0.read().metadata.ino as _ + } + fn type_(&self) -> InodeType { self.0.read().metadata.type_ } @@ -780,7 +761,7 @@ impl Inode for RamInode { let (idx, inode) = self_dir .get_entry(old_name) .ok_or(Error::new(Errno::ENOENT))?; - self_dir.substitute_entry(idx, (Str256::from(new_name), inode)); + self_dir.substitute_entry(idx, (CStr256::from(new_name), inode)); } else { let (mut self_inode, mut target_inode) = write_lock_two_inodes(self, target); let self_dir = self_inode.inner.as_direntry_mut().unwrap(); diff --git a/services/libs/aster-std/src/fs/rootfs.rs b/services/libs/aster-std/src/fs/rootfs.rs index 3feb36f0f..ea6113580 100644 --- a/services/libs/aster-std/src/fs/rootfs.rs +++ b/services/libs/aster-std/src/fs/rootfs.rs @@ -3,7 +3,7 @@ use crate::prelude::*; use super::fs_resolver::{FsPath, FsResolver}; use super::procfs::ProcFS; use super::ramfs::RamFS; -use super::utils::{InodeMode, InodeType, MountNode}; +use super::utils::{FileSystem, InodeMode, InodeType, MountNode}; use cpio_decoder::{CpioDecoder, FileType}; use lending_iterator::LendingIterator; @@ -77,11 +77,18 @@ pub fn init(initramfs_buf: &[u8]) -> Result<()> { // Mount DevFS let dev_dentry = fs.lookup(&FsPath::try_from("/dev")?)?; dev_dentry.mount(RamFS::new())?; + println!("[kernel] rootfs is ready"); Ok(()) } +pub fn mount_fs_at(fs: Arc, fs_path: &FsPath) -> Result<()> { + let target_dentry = FsResolver::new().lookup(fs_path)?; + target_dentry.mount(fs)?; + Ok(()) +} + static ROOT_MOUNT: Once> = Once::new(); pub fn init_root_mount() { diff --git a/services/libs/aster-std/src/fs/utils/inode.rs b/services/libs/aster-std/src/fs/utils/inode.rs index 24b06a9de..01143dc47 100644 --- a/services/libs/aster-std/src/fs/utils/inode.rs +++ b/services/libs/aster-std/src/fs/utils/inode.rs @@ -1,4 +1,3 @@ -use aster_frame::vm::VmFrame; use aster_rights::Full; use core::time::Duration; use core2::io::{Error as IoError, ErrorKind as IoErrorKind, Result as IoResult, Write}; @@ -232,10 +231,12 @@ pub trait Inode: Any + Sync + Send { self.len() == 0 } - fn resize(&self, new_size: usize); + fn resize(&self, new_size: usize) -> Result<()>; fn metadata(&self) -> Metadata; + fn ino(&self) -> u64; + fn type_(&self) -> InodeType; fn mode(&self) -> InodeMode; @@ -250,14 +251,6 @@ pub trait Inode: Any + Sync + Send { fn set_mtime(&self, time: Duration); - fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Err(Error::new(Errno::EISDIR)) - } - - fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> { - Err(Error::new(Errno::EISDIR)) - } - fn page_cache(&self) -> Option> { None } diff --git a/services/libs/aster-std/src/fs/utils/mod.rs b/services/libs/aster-std/src/fs/utils/mod.rs index 441ed6674..f5595834c 100644 --- a/services/libs/aster-std/src/fs/utils/mod.rs +++ b/services/libs/aster-std/src/fs/utils/mod.rs @@ -11,7 +11,7 @@ pub use fs::{FileSystem, FsFlags, SuperBlock}; pub use inode::{Inode, InodeMode, InodeType, Metadata}; pub use ioctl::IoctlCmd; pub use mount::MountNode; -pub use page_cache::PageCache; +pub use page_cache::{PageCache, PageCacheBackend}; pub use status_flags::StatusFlags; mod access_mode; @@ -28,6 +28,8 @@ mod mount; mod page_cache; mod status_flags; +use crate::prelude::*; + #[derive(Copy, PartialEq, Eq, Clone, Debug)] pub enum SeekFrom { Start(usize), @@ -43,3 +45,152 @@ pub const NAME_MAX: usize = 255; /// The upper limit for resolving symbolic links pub const SYMLINKS_MAX: usize = 40; + +pub type CStr256 = FixedCStr<256>; +pub type Str16 = FixedStr<16>; +pub type Str64 = FixedStr<64>; + +/// An owned C-compatible string with a fixed capacity of `N`. +/// +/// The string is terminated with a null byte. +#[repr(C)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Pod)] +pub struct FixedCStr([u8; N]); + +impl FixedCStr { + pub fn len(&self) -> usize { + self.0.iter().position(|&b| b == 0).unwrap() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn as_str(&self) -> Result<&str> { + Ok(alloc::str::from_utf8(self.as_bytes())?) + } + + pub fn as_cstr(&self) -> Result<&CStr> { + Ok(CStr::from_bytes_with_nul(self.as_bytes_with_nul())?) + } + + pub fn as_bytes(&self) -> &[u8] { + &self.0[0..self.len()] + } + + pub fn as_bytes_with_nul(&self) -> &[u8] { + &self.0[0..=self.len()] + } +} + +impl<'a, const N: usize> From<&'a [u8]> for FixedCStr { + fn from(bytes: &'a [u8]) -> Self { + assert!(N > 0); + + let mut inner = [0u8; N]; + let len = { + let mut nul_byte_idx = match bytes.iter().position(|&b| b == 0) { + Some(idx) => idx, + None => bytes.len(), + }; + if nul_byte_idx >= N { + nul_byte_idx = N - 1; + } + nul_byte_idx + }; + inner[0..len].copy_from_slice(&bytes[0..len]); + Self(inner) + } +} + +impl<'a, const N: usize> From<&'a str> for FixedCStr { + fn from(string: &'a str) -> Self { + let bytes = string.as_bytes(); + Self::from(bytes) + } +} + +impl<'a, const N: usize> From<&'a CStr> for FixedCStr { + fn from(cstr: &'a CStr) -> Self { + let bytes = cstr.to_bytes_with_nul(); + Self::from(bytes) + } +} + +impl Default for FixedCStr { + fn default() -> Self { + Self([0u8; N]) + } +} + +impl Debug for FixedCStr { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self.as_cstr() { + Ok(cstr) => write!(f, "{:?}", cstr), + Err(_) => write!(f, "{:?}", self.as_bytes()), + } + } +} + +/// An owned string with a fixed capacity of `N`. +#[repr(C)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Pod)] +pub struct FixedStr([u8; N]); + +impl FixedStr { + pub fn len(&self) -> usize { + self.0.iter().position(|&b| b == 0).unwrap_or(N) + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn as_str(&self) -> Result<&str> { + Ok(alloc::str::from_utf8(self.as_bytes())?) + } + + pub fn as_bytes(&self) -> &[u8] { + &self.0[0..self.len()] + } +} + +impl<'a, const N: usize> From<&'a [u8]> for FixedStr { + fn from(bytes: &'a [u8]) -> Self { + let mut inner = [0u8; N]; + let len = { + let mut nul_byte_idx = match bytes.iter().position(|&b| b == 0) { + Some(idx) => idx, + None => bytes.len(), + }; + if nul_byte_idx > N { + nul_byte_idx = N; + } + nul_byte_idx + }; + inner[0..len].copy_from_slice(&bytes[0..len]); + Self(inner) + } +} + +impl<'a, const N: usize> From<&'a str> for FixedStr { + fn from(string: &'a str) -> Self { + let bytes = string.as_bytes(); + Self::from(bytes) + } +} + +impl Default for FixedStr { + fn default() -> Self { + Self([0u8; N]) + } +} + +impl Debug for FixedStr { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self.as_str() { + Ok(string) => write!(f, "{}", string), + Err(_) => write!(f, "{:?}", self.as_bytes()), + } + } +} diff --git a/services/libs/aster-std/src/fs/utils/page_cache.rs b/services/libs/aster-std/src/fs/utils/page_cache.rs index f22f9ab98..e685ab1db 100644 --- a/services/libs/aster-std/src/fs/utils/page_cache.rs +++ b/services/libs/aster-std/src/fs/utils/page_cache.rs @@ -1,4 +1,3 @@ -use super::Inode; use crate::prelude::*; use crate::vm::vmo::{get_page_idx_range, Pager, Vmo, VmoFlags, VmoOptions}; use aster_rights::Full; @@ -13,9 +12,9 @@ pub struct PageCache { } impl PageCache { - /// Creates an empty size page cache associated with a new inode. - pub fn new(backed_inode: Weak) -> Result { - let manager = Arc::new(PageCacheManager::new(backed_inode)); + /// Creates an empty size page cache associated with a new backend. + pub fn new(backend: Weak) -> Result { + let manager = Arc::new(PageCacheManager::new(backend)); let pages = VmoOptions::::new(0) .flags(VmoFlags::RESIZABLE) .pager(manager.clone()) @@ -23,12 +22,12 @@ impl PageCache { Ok(Self { pages, manager }) } - /// Creates a page cache associated with an existing inode. + /// Creates a page cache associated with an existing backend. /// - /// The `capacity` is the initial cache size required by the inode. - /// It is usually used the same size as the inode. - pub fn with_capacity(capacity: usize, backed_inode: Weak) -> Result { - let manager = Arc::new(PageCacheManager::new(backed_inode)); + /// The `capacity` is the initial cache size required by the backend. + /// This size usually corresponds to the size of the backend. + pub fn with_capacity(capacity: usize, backend: Weak) -> Result { + let manager = Arc::new(PageCacheManager::new(backend)); let pages = VmoOptions::::new(capacity) .flags(VmoFlags::RESIZABLE) .pager(manager.clone()) @@ -36,7 +35,7 @@ impl PageCache { Ok(Self { pages, manager }) } - /// Returns the Vmo object backed by inode. + /// Returns the Vmo object. // TODO: The capability is too high,restrict it to eliminate the possibility of misuse. // For example, the `resize` api should be forbidded. pub fn pages(&self) -> Vmo { @@ -44,10 +43,15 @@ impl PageCache { } /// Evict the data within a specified range from the page cache and persist - /// them to the disk. + /// them to the backend. pub fn evict_range(&self, range: Range) -> Result<()> { self.manager.evict_range(range) } + + /// Returns the backend. + pub fn backend(&self) -> Arc { + self.manager.backend() + } } impl Debug for PageCache { @@ -61,33 +65,36 @@ impl Debug for PageCache { struct PageCacheManager { pages: Mutex>, - backed_inode: Weak, + backend: Weak, } impl PageCacheManager { - pub fn new(backed_inode: Weak) -> Self { + pub fn new(backend: Weak) -> Self { Self { pages: Mutex::new(LruCache::unbounded()), - backed_inode, + backend, } } + pub fn backend(&self) -> Arc { + self.backend.upgrade().unwrap() + } + pub fn evict_range(&self, range: Range) -> Result<()> { let page_idx_range = get_page_idx_range(&range); let mut pages = self.pages.lock(); - for page_idx in page_idx_range { - if let Some(page) = pages.get_mut(&page_idx) { + for idx in page_idx_range { + if let Some(page) = pages.get_mut(&idx) { if let PageState::Dirty = page.state() { - self.backed_inode - .upgrade() - .unwrap() - .write_page(page_idx, page.frame())?; - page.set_state(PageState::UpToDate); + let backend = self.backend(); + if idx < backend.npages() { + backend.write_page(idx, page.frame())?; + page.set_state(PageState::UpToDate); + } } - } else { - warn!("page {} is not in page cache, do nothing", page_idx); } } + Ok(()) } } @@ -101,53 +108,50 @@ impl Debug for PageCacheManager { } impl Pager for PageCacheManager { - fn commit_page(&self, offset: usize) -> Result { - let page_idx = offset / PAGE_SIZE; + fn commit_page(&self, idx: usize) -> Result { let mut pages = self.pages.lock(); - let frame = if let Some(page) = pages.get(&page_idx) { + let frame = if let Some(page) = pages.get(&idx) { page.frame().clone() } else { - let backed_inode = self.backed_inode.upgrade().unwrap(); - let page = if offset < backed_inode.len() { + let backend = self.backend(); + let page = if idx < backend.npages() { let mut page = Page::alloc()?; - backed_inode.read_page(page_idx, page.frame())?; + backend.read_page(idx, page.frame())?; page.set_state(PageState::UpToDate); page } else { Page::alloc_zero()? }; let frame = page.frame().clone(); - pages.put(page_idx, page); + pages.put(idx, page); frame }; + Ok(frame) } - fn update_page(&self, offset: usize) -> Result<()> { - let page_idx = offset / PAGE_SIZE; + fn update_page(&self, idx: usize) -> Result<()> { let mut pages = self.pages.lock(); - if let Some(page) = pages.get_mut(&page_idx) { + if let Some(page) = pages.get_mut(&idx) { page.set_state(PageState::Dirty); } else { - error!("page {} is not in page cache", page_idx); - panic!(); + warn!("The page {} is not in page cache", idx); } + Ok(()) } - fn decommit_page(&self, offset: usize) -> Result<()> { - let page_idx = offset / PAGE_SIZE; + fn decommit_page(&self, idx: usize) -> Result<()> { let mut pages = self.pages.lock(); - if let Some(page) = pages.pop(&page_idx) { + if let Some(page) = pages.pop(&idx) { if let PageState::Dirty = page.state() { - self.backed_inode - .upgrade() - .unwrap() - .write_page(page_idx, page.frame())? + let backend = self.backend(); + if idx < backend.npages() { + backend.write_page(idx, page.frame())?; + } } - } else { - warn!("page {} is not in page cache, do nothing", page_idx); } + Ok(()) } } @@ -200,3 +204,13 @@ enum PageState { /// The page is available to read and write. Dirty, } + +/// This trait represents the backend for the page cache. +pub trait PageCacheBackend: Sync + Send { + /// Reads a page from the backend. + fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()>; + /// Writes a page to the backend. + fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()>; + /// Returns the number of pages in the backend. + fn npages(&self) -> usize; +} diff --git a/services/libs/aster-std/src/lib.rs b/services/libs/aster-std/src/lib.rs index 5a1b9c2cb..0186ece53 100644 --- a/services/libs/aster-std/src/lib.rs +++ b/services/libs/aster-std/src/lib.rs @@ -19,6 +19,8 @@ #![feature(register_tool)] #![feature(trait_upcasting)] #![feature(format_args_nl)] +#![feature(int_roundings)] +#![feature(step_trait)] #![register_tool(component_access_control)] use crate::{ @@ -73,6 +75,7 @@ fn init_thread() { current_thread!().tid() ); net::lazy_init(); + fs::lazy_init(); // driver::pci::virtio::block::block_device_test(); let thread = Thread::spawn_kernel_thread(ThreadOptions::new(|| { println!("[kernel] Hello world from kernel!"); diff --git a/services/libs/aster-std/src/prelude.rs b/services/libs/aster-std/src/prelude.rs index 13465285b..538603340 100644 --- a/services/libs/aster-std/src/prelude.rs +++ b/services/libs/aster-std/src/prelude.rs @@ -13,7 +13,7 @@ pub(crate) use alloc::sync::Weak; pub(crate) use alloc::vec; pub(crate) use alloc::vec::Vec; pub(crate) use aster_frame::config::PAGE_SIZE; -pub(crate) use aster_frame::sync::{Mutex, MutexGuard, RwLock, SpinLock, SpinLockGuard}; +pub(crate) use aster_frame::sync::{Mutex, MutexGuard, RwLock, RwMutex, SpinLock, SpinLockGuard}; pub(crate) use aster_frame::vm::Vaddr; pub(crate) use bitflags::bitflags; pub(crate) use core::any::Any; diff --git a/services/libs/aster-std/src/process/clone.rs b/services/libs/aster-std/src/process/clone.rs index 4f901de00..9ca391e71 100644 --- a/services/libs/aster-std/src/process/clone.rs +++ b/services/libs/aster-std/src/process/clone.rs @@ -379,13 +379,13 @@ fn clone_cpu_context( } fn clone_fs( - parent_fs: &Arc>, + parent_fs: &Arc>, clone_flags: CloneFlags, -) -> Arc> { +) -> Arc> { if clone_flags.contains(CloneFlags::CLONE_FS) { parent_fs.clone() } else { - Arc::new(RwLock::new(parent_fs.read().clone())) + Arc::new(RwMutex::new(parent_fs.read().clone())) } } diff --git a/services/libs/aster-std/src/process/process/builder.rs b/services/libs/aster-std/src/process/process/builder.rs index 7d4683566..7bbe12448 100644 --- a/services/libs/aster-std/src/process/process/builder.rs +++ b/services/libs/aster-std/src/process/process/builder.rs @@ -23,7 +23,7 @@ pub struct ProcessBuilder<'a> { envp: Option>, process_vm: Option, file_table: Option>>, - fs: Option>>, + fs: Option>>, umask: Option>>, resource_limits: Option, sig_dispositions: Option>>, @@ -64,7 +64,7 @@ impl<'a> ProcessBuilder<'a> { self } - pub fn fs(&mut self, fs: Arc>) -> &mut Self { + pub fn fs(&mut self, fs: Arc>) -> &mut Self { self.fs = Some(fs); self } @@ -142,7 +142,7 @@ impl<'a> ProcessBuilder<'a> { .unwrap(); let fs = fs - .or_else(|| Some(Arc::new(RwLock::new(FsResolver::new())))) + .or_else(|| Some(Arc::new(RwMutex::new(FsResolver::new())))) .unwrap(); let umask = umask diff --git a/services/libs/aster-std/src/process/process/mod.rs b/services/libs/aster-std/src/process/process/mod.rs index 64cdeaf2a..6ec298178 100644 --- a/services/libs/aster-std/src/process/process/mod.rs +++ b/services/libs/aster-std/src/process/process/mod.rs @@ -64,7 +64,7 @@ pub struct Process { /// File table file_table: Arc>, /// FsResolver - fs: Arc>, + fs: Arc>, /// umask umask: Arc>, /// resource limits @@ -84,7 +84,7 @@ impl Process { executable_path: String, process_vm: ProcessVm, file_table: Arc>, - fs: Arc>, + fs: Arc>, umask: Arc>, sig_dispositions: Arc>, resource_limits: ResourceLimits, @@ -496,7 +496,7 @@ impl Process { &self.file_table } - pub fn fs(&self) -> &Arc> { + pub fn fs(&self) -> &Arc> { &self.fs } @@ -595,7 +595,7 @@ mod test { String::new(), ProcessVm::alloc(), Arc::new(Mutex::new(FileTable::new())), - Arc::new(RwLock::new(FsResolver::new())), + Arc::new(RwMutex::new(FsResolver::new())), Arc::new(RwLock::new(FileCreationMask::default())), Arc::new(Mutex::new(SigDispositions::default())), ResourceLimits::default(), diff --git a/services/libs/aster-std/src/time/mod.rs b/services/libs/aster-std/src/time/mod.rs index 7c9e737da..f9c8682ed 100644 --- a/services/libs/aster-std/src/time/mod.rs +++ b/services/libs/aster-std/src/time/mod.rs @@ -104,3 +104,25 @@ pub fn now_as_duration(clock_id: &ClockID) -> Result { } } } + +/// Unix time measures time by the number of seconds that have elapsed since +/// the Unix epoch, without adjustments made due to leap seconds. +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Pod)] +pub struct UnixTime { + sec: u32, +} + +impl From for UnixTime { + fn from(duration: Duration) -> Self { + Self { + sec: duration.as_secs() as u32, + } + } +} + +impl From for Duration { + fn from(time: UnixTime) -> Self { + Duration::from_secs(time.sec as _) + } +} diff --git a/services/libs/aster-std/src/vm/vmo/mod.rs b/services/libs/aster-std/src/vm/vmo/mod.rs index 3d37eecf7..b84b2ccd9 100644 --- a/services/libs/aster-std/src/vm/vmo/mod.rs +++ b/services/libs/aster-std/src/vm/vmo/mod.rs @@ -154,7 +154,7 @@ impl VmoInner { } let frame = match &self.pager { None => VmAllocOptions::new(1).alloc_single()?, - Some(pager) => pager.commit_page(offset)?, + Some(pager) => pager.commit_page(page_idx)?, }; self.insert_frame(page_idx, frame); Ok(()) @@ -164,7 +164,7 @@ impl VmoInner { let page_idx = offset / PAGE_SIZE; if self.committed_pages.remove(&page_idx).is_some() { if let Some(pager) = &self.pager { - pager.decommit_page(offset)?; + pager.decommit_page(page_idx)?; } } Ok(()) @@ -302,7 +302,7 @@ impl Vmo_ { if let Some(pager) = &self.inner.lock().pager { let page_idx_range = get_page_idx_range(&write_range); for page_idx in page_idx_range { - pager.update_page(page_idx * PAGE_SIZE)?; + pager.update_page(page_idx)?; } } Ok(()) diff --git a/services/libs/aster-std/src/vm/vmo/pager.rs b/services/libs/aster-std/src/vm/vmo/pager.rs index f73858689..d3893c350 100644 --- a/services/libs/aster-std/src/vm/vmo/pager.rs +++ b/services/libs/aster-std/src/vm/vmo/pager.rs @@ -11,7 +11,7 @@ use aster_frame::vm::VmFrame; /// Finally, when a frame is no longer needed (i.e., on decommits), /// the frame pager will also be notified. pub trait Pager: Send + Sync { - /// Ask the pager to provide a frame at a specified offset (in bytes). + /// Ask the pager to provide a frame at a specified index. /// /// After a page of a VMO is committed, the VMO shall not call this method /// again until the page is decommitted. But a robust implementation of @@ -22,13 +22,10 @@ pub trait Pager: Send + Sync { /// and is to be committed again, then the pager is free to return /// whatever frame that may or may not be the same as the last time. /// - /// It is up to the pager to decide the range of valid offsets. - /// - /// The offset will be rounded down to page boundary. - fn commit_page(&self, offset: usize) -> Result; + /// It is up to the pager to decide the range of valid indices. + fn commit_page(&self, idx: usize) -> Result; - /// Notify the pager that the frame at a specified offset (in bytes) - /// has been updated. + /// Notify the pager that the frame at a specified index has been updated. /// /// Being aware of the updates allow the pager (e.g., an inode) to /// know which pages are dirty and only write back the _dirty_ pages back @@ -38,12 +35,9 @@ pub trait Pager: Send + Sync { /// But a robust implementation of `Pager` should not make /// such an assumption for its correctness; instead, it should simply ignore the /// call or return an error. - /// - /// The offset will be rounded down to page boundary. - fn update_page(&self, offset: usize) -> Result<()>; + fn update_page(&self, idx: usize) -> Result<()>; - /// Notify the pager that the frame at the specified offset (in bytes) - /// has been decommitted. + /// Notify the pager that the frame at the specified index has been decommitted. /// /// Knowing that a frame is no longer needed, the pager (e.g., an inode) /// can free the frame after writing back its data to the disk. @@ -52,7 +46,5 @@ pub trait Pager: Send + Sync { /// But a robust implementation of `Pager` should not make /// such an assumption for its correctness; instead, it should simply ignore the /// call or return an error. - /// - /// The offset will be rounded down to page boundary. - fn decommit_page(&self, offset: usize) -> Result<()>; + fn decommit_page(&self, idx: usize) -> Result<()>; } diff --git a/services/libs/aster-util/src/id_allocator.rs b/services/libs/aster-util/src/id_allocator.rs index d147caf0d..2938b084b 100644 --- a/services/libs/aster-util/src/id_allocator.rs +++ b/services/libs/aster-util/src/id_allocator.rs @@ -1,15 +1,16 @@ use bitvec::prelude::BitVec; +use core::fmt::Debug; -/// An id allocator with BitVec. -/// The true bit means the id is allocated and vice versa. -#[derive(Clone, Debug)] +/// An id allocator implemented by the bitmap. +/// The true bit implies that the id is allocated, and vice versa. +#[derive(Clone)] pub struct IdAlloc { - bitset: BitVec, + bitset: BitVec, first_available_id: usize, } impl IdAlloc { - /// Constructs a new id allocator with the maximum capacity. + /// Constructs a new id allocator with a maximum capacity. pub fn with_capacity(capacity: usize) -> Self { let mut bitset = BitVec::with_capacity(capacity); bitset.resize(capacity, false); @@ -19,9 +20,33 @@ impl IdAlloc { } } - /// Allocates and returns an id. + /// Constructs a new id allocator from a slice of `u8` bytes and a maximum capacity. /// - /// Returns None if can not allocate. + /// The slice of `u8` bytes is the raw data of a bitmap. + pub fn from_bytes_with_capacity(slice: &[u8], capacity: usize) -> Self { + let bitset = if capacity > slice.len() * 8 { + let mut bitset = BitVec::from_slice(slice); + bitset.resize(capacity, false); + bitset + } else { + let mut bitset = BitVec::from_slice(&slice[..capacity.div_ceil(8)]); + bitset.truncate(capacity); + bitset + }; + + let first_available_id = (0..bitset.len()) + .find(|&i| !bitset[i]) + .map_or(bitset.len(), |i| i); + + Self { + bitset, + first_available_id, + } + } + + /// Allocates and returns a new `id`. + /// + /// If allocation is not possible, it returns `None`. pub fn alloc(&mut self) -> Option { if self.first_available_id < self.bitset.len() { let id = self.first_available_id; @@ -35,9 +60,11 @@ impl IdAlloc { } } - /// Frees the allocated id. + /// Releases the allocated `id`. /// - /// This panics if the id is out of bounds. + /// # Panic + /// + /// If the `id` is out of bounds, this method will panic. pub fn free(&mut self, id: usize) { debug_assert!(self.is_allocated(id)); @@ -47,10 +74,26 @@ impl IdAlloc { } } - /// Returns true is the id is allocated. + /// Returns true if the `id` is allocated. /// - /// This panics if the id is out of bounds. + /// # Panic + /// + /// If the `id` is out of bounds, this method will panic. pub fn is_allocated(&self, id: usize) -> bool { self.bitset[id] } + + /// Views the id allocator as a slice of `u8` bytes. + pub fn as_bytes(&self) -> &[u8] { + self.bitset.as_raw_slice() + } +} + +impl Debug for IdAlloc { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("IdAlloc") + .field("len", &self.bitset.len()) + .field("first_available_id", &self.first_available_id) + .finish() + } } diff --git a/services/libs/aster-util/src/lib.rs b/services/libs/aster-util/src/lib.rs index 27f75cc71..783f59b22 100644 --- a/services/libs/aster-util/src/lib.rs +++ b/services/libs/aster-util/src/lib.rs @@ -1,6 +1,7 @@ //! The util of Asterinas. #![no_std] #![forbid(unsafe_code)] +#![feature(int_roundings)] extern crate alloc;