feat(ebpf):[WIP] add eBPF support (#948)

* feat(kprobe): Add basic kprobe support for x86_64 * feat: add ebpf support (#912) - 实现bpf()一部分命令，包括几种基本map，相关的helper函数 - 实现部分perf相关的数据结构 - 暂时为文件实现简单mmap - 实现一个使用kprobe统计syscall 调用次数的ebpf程序对eBPF支持程度(基本)： - 简单的eBPF程序(没有指定特殊的Map) - 使用内核已经实现的Map的eBPF程序 - 可以和kprobe配合使用 - 内核Map相关的接口定义已经实现，添加新的Map较为简单不支持的功能: - 区分不同的eBPF程序类型(Network/Cgroup)并限定可调用的helper函数集 - 与内核其它跟踪机制配合(tracepoint) - 其它helper和Map todo - [ ] 修改mmap，需要讨论，因为这个和块缓存层相关 - [x] 添加文档 - [x] 修复可能的错误 - [x] 增加rbpf版本信息 * feat: add /sys/devices/system/cpu/possible file * feat: add /sys/devices/system/cpu/online
2025-06-18 12:16:31 +00:00 · 2024-10-25 15:59:57 +08:00
parent 80c9e8f8f0
commit fae6e9ade4
126 changed files with 29529 additions and 62 deletions
--- a/kernel/src/arch/riscv64/interrupt/handle.rs
+++ b/kernel/src/arch/riscv64/interrupt/handle.rs
@ -3,12 +3,12 @@
 //! 架构相关的处理逻辑参考： https://code.dragonos.org.cn/xref/linux-6.6.21/arch/riscv/kernel/traps.c
 use core::hint::spin_loop;

-use log::error;
+use log::{error, trace};
 use system_error::SystemError;

-use crate::{arch::syscall::syscall_handler, driver::irqchip::riscv_intc::riscv_intc_irq};
-
 use super::TrapFrame;
+use crate::exception::ebreak::EBreak;
+use crate::{arch::syscall::syscall_handler, driver::irqchip::riscv_intc::riscv_intc_irq};

 type ExceptionHandler = fn(&mut TrapFrame) -> Result<(), SystemError>;

@ -93,11 +93,10 @@ fn do_trap_insn_illegal(_trap_frame: &mut TrapFrame) -> Result<(), SystemError>
 }

 /// 处理断点异常 #3
-fn do_trap_break(_trap_frame: &mut TrapFrame) -> Result<(), SystemError> {
-    error!("riscv64_do_irq: do_trap_break");
-    loop {
-        spin_loop();
-    }
+fn do_trap_break(trap_frame: &mut TrapFrame) -> Result<(), SystemError> {
+    trace!("riscv64_do_irq: do_trap_break");
+    // handle breakpoint
+    EBreak::handle(trap_frame)
 }

 /// 处理加载地址不对齐异常 #4
--- a/kernel/src/arch/riscv64/interrupt/mod.rs
+++ b/kernel/src/arch/riscv64/interrupt/mod.rs
@ -1,3 +1,5 @@
+use core::any::Any;
+use kprobe::ProbeArgs;
 use riscv::register::{scause::Scause, sstatus::Sstatus};
 use system_error::SystemError;

@ -160,4 +162,21 @@ impl TrapFrame {
    pub fn set_return_value(&mut self, value: usize) {
        self.a0 = value;
    }
+
+    /// 设置当前的程序计数器
+    pub fn set_pc(&mut self, pc: usize) {
+        self.epc = pc;
+    }
+}
+
+impl ProbeArgs for TrapFrame {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn break_address(&self) -> usize {
+        self.epc
+    }
+    fn debug_address(&self) -> usize {
+        self.epc
+    }
 }
--- a/kernel/src/arch/riscv64/kprobe.rs
+++ b/kernel/src/arch/riscv64/kprobe.rs
@ -0,0 +1,85 @@
+use crate::arch::interrupt::TrapFrame;
+
+pub fn setup_single_step(frame: &mut TrapFrame, step_addr: usize) {
+    frame.set_pc(step_addr);
+}
+
+pub fn clear_single_step(frame: &mut TrapFrame, return_addr: usize) {
+    frame.set_pc(return_addr);
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct KProbeContext {
+    pub pc: usize,
+    pub ra: usize,
+    pub sp: usize,
+    pub gp: usize,
+    pub tp: usize,
+    pub t0: usize,
+    pub t1: usize,
+    pub t2: usize,
+    pub s0: usize,
+    pub s1: usize,
+    pub a0: usize,
+    pub a1: usize,
+    pub a2: usize,
+    pub a3: usize,
+    pub a4: usize,
+    pub a5: usize,
+    pub a6: usize,
+    pub a7: usize,
+    pub s2: usize,
+    pub s3: usize,
+    pub s4: usize,
+    pub s5: usize,
+    pub s6: usize,
+    pub s7: usize,
+    pub s8: usize,
+    pub s9: usize,
+    pub s10: usize,
+    pub s11: usize,
+    pub t3: usize,
+    pub t4: usize,
+    pub t5: usize,
+    pub t6: usize,
+}
+
+impl From<&TrapFrame> for KProbeContext {
+    fn from(trap_frame: &TrapFrame) -> Self {
+        Self {
+            pc: trap_frame.epc,
+            ra: trap_frame.ra,
+            sp: trap_frame.sp,
+            gp: trap_frame.gp,
+            tp: trap_frame.tp,
+            t0: trap_frame.t0,
+            t1: trap_frame.t1,
+            t2: trap_frame.t2,
+            s0: trap_frame.s0,
+            s1: trap_frame.s1,
+            a0: trap_frame.a0,
+            a1: trap_frame.a1,
+            a2: trap_frame.a2,
+            a3: trap_frame.a3,
+            a4: trap_frame.a4,
+            a5: trap_frame.a5,
+            a6: trap_frame.a6,
+            a7: trap_frame.a7,
+            s2: trap_frame.s2,
+            s3: trap_frame.s3,
+            s4: trap_frame.s4,
+            s5: trap_frame.s5,
+            s6: trap_frame.s6,
+            s7: trap_frame.s7,
+            s8: trap_frame.s8,
+            s9: trap_frame.s9,
+            s10: trap_frame.s10,
+            s11: trap_frame.s11,
+            t3: trap_frame.t3,
+            t4: trap_frame.t4,
+            t5: trap_frame.t5,
+            t6: trap_frame.t6,
+        }
+    }
+}
--- a/kernel/src/arch/riscv64/mod.rs
+++ b/kernel/src/arch/riscv64/mod.rs
@ -5,6 +5,7 @@ pub mod elf;
 pub mod init;
 pub mod interrupt;
 pub mod ipc;
+pub mod kprobe;
 mod kvm;
 pub mod mm;
 pub mod msi;
--- a/kernel/src/arch/x86_64/interrupt/mod.rs
+++ b/kernel/src/arch/x86_64/interrupt/mod.rs
@ -4,11 +4,12 @@ pub mod ipi;
 pub mod msi;
 pub mod trap;

+use core::any::Any;
 use core::{
    arch::asm,
    sync::atomic::{compiler_fence, Ordering},
 };
-
+use kprobe::ProbeArgs;
 use log::error;
 use system_error::SystemError;

@ -177,4 +178,21 @@ impl TrapFrame {
    pub fn is_from_user(&self) -> bool {
        return (self.cs & 0x3) != 0;
    }
+    /// 设置当前的程序计数器
+    pub fn set_pc(&mut self, pc: usize) {
+        self.rip = pc as u64;
+    }
+}
+
+impl ProbeArgs for TrapFrame {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn break_address(&self) -> usize {
+        (self.rip - 1) as usize
+    }
+
+    fn debug_address(&self) -> usize {
+        self.rip as usize
+    }
 }
--- a/kernel/src/arch/x86_64/interrupt/trap.rs
+++ b/kernel/src/arch/x86_64/interrupt/trap.rs
@ -1,6 +1,12 @@
-use log::{error, warn};
+use log::{error, trace, warn};
 use system_error::SystemError;

+use super::{
+    entry::{set_intr_gate, set_system_trap_gate},
+    TrapFrame,
+};
+use crate::exception::debug::DebugException;
+use crate::exception::ebreak::EBreak;
 use crate::{
    arch::{CurrentIrqArch, MMArch},
    exception::InterruptArch,
@ -9,11 +15,6 @@ use crate::{
    smp::core::smp_get_processor_id,
 };

-use super::{
-    entry::{set_intr_gate, set_system_trap_gate},
-    TrapFrame,
-};
-
 extern "C" {
    fn trap_divide_error();
    fn trap_debug();
@ -125,8 +126,8 @@ unsafe extern "C" fn do_divide_error(regs: &'static TrapFrame, error_code: u64)

 /// 处理调试异常 1 #DB
 #[no_mangle]
-unsafe extern "C" fn do_debug(regs: &'static TrapFrame, error_code: u64) {
-    error!(
+unsafe extern "C" fn do_debug(regs: &'static mut TrapFrame, error_code: u64) {
+    trace!(
        "do_debug(1), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}",
        error_code,
        regs.rsp,
@ -134,7 +135,7 @@ unsafe extern "C" fn do_debug(regs: &'static TrapFrame, error_code: u64) {
        smp_get_processor_id().data(),
        ProcessManager::current_pid()
    );
-    panic!("Debug Exception");
+    DebugException::handle(regs).unwrap();
 }

 /// 处理NMI中断 2 NMI
@ -153,8 +154,8 @@ unsafe extern "C" fn do_nmi(regs: &'static TrapFrame, error_code: u64) {

 /// 处理断点异常 3 #BP
 #[no_mangle]
-unsafe extern "C" fn do_int3(regs: &'static TrapFrame, error_code: u64) {
-    error!(
+unsafe extern "C" fn do_int3(regs: &'static mut TrapFrame, error_code: u64) {
+    trace!(
        "do_int3(3), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}",
        error_code,
        regs.rsp,
@ -162,7 +163,7 @@ unsafe extern "C" fn do_int3(regs: &'static TrapFrame, error_code: u64) {
        smp_get_processor_id().data(),
        ProcessManager::current_pid()
    );
-    panic!("Int3");
+    EBreak::handle(regs).unwrap();
 }

 /// 处理溢出异常 4 #OF
--- a/kernel/src/arch/x86_64/kprobe.rs
+++ b/kernel/src/arch/x86_64/kprobe.rs
@ -0,0 +1,65 @@
+use crate::arch::interrupt::TrapFrame;
+
+pub fn setup_single_step(frame: &mut TrapFrame, step_addr: usize) {
+    frame.rflags |= 0x100;
+    frame.set_pc(step_addr);
+}
+
+pub fn clear_single_step(frame: &mut TrapFrame, return_addr: usize) {
+    frame.rflags &= !0x100;
+    frame.set_pc(return_addr);
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct KProbeContext {
+    pub r15: ::core::ffi::c_ulong,
+    pub r14: ::core::ffi::c_ulong,
+    pub r13: ::core::ffi::c_ulong,
+    pub r12: ::core::ffi::c_ulong,
+    pub rbp: ::core::ffi::c_ulong,
+    pub rbx: ::core::ffi::c_ulong,
+    pub r11: ::core::ffi::c_ulong,
+    pub r10: ::core::ffi::c_ulong,
+    pub r9: ::core::ffi::c_ulong,
+    pub r8: ::core::ffi::c_ulong,
+    pub rax: ::core::ffi::c_ulong,
+    pub rcx: ::core::ffi::c_ulong,
+    pub rdx: ::core::ffi::c_ulong,
+    pub rsi: ::core::ffi::c_ulong,
+    pub rdi: ::core::ffi::c_ulong,
+    pub orig_rax: ::core::ffi::c_ulong,
+    pub rip: ::core::ffi::c_ulong,
+    pub cs: ::core::ffi::c_ulong,
+    pub eflags: ::core::ffi::c_ulong,
+    pub rsp: ::core::ffi::c_ulong,
+    pub ss: ::core::ffi::c_ulong,
+}
+
+impl From<&TrapFrame> for KProbeContext {
+    fn from(trap_frame: &TrapFrame) -> Self {
+        Self {
+            r15: trap_frame.r15,
+            r14: trap_frame.r14,
+            r13: trap_frame.r13,
+            r12: trap_frame.r12,
+            rbp: trap_frame.rbp,
+            rbx: trap_frame.rbx,
+            r11: trap_frame.r11,
+            r10: trap_frame.r10,
+            r9: trap_frame.r9,
+            r8: trap_frame.r8,
+            rax: trap_frame.rax,
+            rcx: trap_frame.rcx,
+            rdx: trap_frame.rdx,
+            rsi: trap_frame.rsi,
+            rdi: trap_frame.rdi,
+            orig_rax: 0,
+            rip: trap_frame.rip,
+            cs: trap_frame.cs,
+            eflags: trap_frame.rflags,
+            rsp: trap_frame.rsp,
+            ss: trap_frame.ss,
+        }
+    }
+}
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@ -8,6 +8,7 @@ pub mod fpu;
 pub mod init;
 pub mod interrupt;
 pub mod ipc;
+pub mod kprobe;
 pub mod kvm;
 pub mod libs;
 pub mod mm;
--- a/kernel/src/bpf/helper/consts.rs
+++ b/kernel/src/bpf/helper/consts.rs
@ -0,0 +1,11 @@
+pub const HELPER_MAP_LOOKUP_ELEM: u32 = 1;
+pub const HELPER_MAP_UPDATE_ELEM: u32 = 2;
+pub const HELPER_MAP_DELETE_ELEM: u32 = 3;
+pub const HELPER_MAP_FOR_EACH_ELEM: u32 = 164;
+pub const HELPER_MAP_LOOKUP_PERCPU_ELEM: u32 = 195;
+pub const HELPER_PERF_EVENT_OUTPUT: u32 = 25;
+pub const HELPER_BPF_PROBE_READ: u32 = 4;
+pub const HELPER_TRACE_PRINTF: u32 = 6;
+pub const HELPER_MAP_PUSH_ELEM: u32 = 87;
+pub const HELPER_MAP_POP_ELEM: u32 = 88;
+pub const HELPER_MAP_PEEK_ELEM: u32 = 89;
--- a/kernel/src/bpf/helper/mod.rs
+++ b/kernel/src/bpf/helper/mod.rs
@ -0,0 +1,340 @@
+mod consts;
+mod print;
+
+use crate::bpf::helper::print::trace_printf;
+use crate::bpf::map::{BpfCallBackFn, BpfMap};
+use crate::include::bindings::linux_bpf::BPF_F_CURRENT_CPU;
+use crate::libs::lazy_init::Lazy;
+use crate::smp::core::smp_get_processor_id;
+use alloc::{collections::BTreeMap, sync::Arc};
+use core::ffi::c_void;
+use system_error::SystemError;
+
+type RawBPFHelperFn = fn(u64, u64, u64, u64, u64) -> u64;
+type Result<T> = core::result::Result<T, SystemError>;
+macro_rules! define_func {
+    ($name:ident) => {
+        core::mem::transmute::<usize, RawBPFHelperFn>($name as usize)
+    };
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
+unsafe fn raw_map_lookup_elem(map: *mut c_void, key: *const c_void) -> *const c_void {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let key_size = map.key_size();
+    let key = core::slice::from_raw_parts(key as *const u8, key_size);
+    let value = map_lookup_elem(&map, key);
+    // log::info!("<raw_map_lookup_elem>: {:x?}", value);
+    // warning: We need to keep the map alive, so we don't drop it here.
+    let _ = Arc::into_raw(map);
+    match value {
+        Ok(Some(value)) => value as *const c_void,
+        _ => core::ptr::null_mut(),
+    }
+}
+
+pub fn map_lookup_elem(map: &Arc<BpfMap>, key: &[u8]) -> Result<Option<*const u8>> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.lookup_elem(key);
+    match value {
+        Ok(Some(value)) => Ok(Some(value.as_ptr())),
+        _ => Ok(None),
+    }
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_perf_event_output/
+///
+/// See https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+unsafe fn raw_perf_event_output(
+    ctx: *mut c_void,
+    map: *mut c_void,
+    flags: u64,
+    data: *mut c_void,
+    size: u64,
+) -> i64 {
+    // log::info!("<raw_perf_event_output>: {:x?}", data);
+    let map = Arc::from_raw(map as *const BpfMap);
+    let data = core::slice::from_raw_parts(data as *const u8, size as usize);
+    let res = perf_event_output(ctx, &map, flags, data);
+    // warning: We need to keep the map alive, so we don't drop it here.
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn perf_event_output(
+    ctx: *mut c_void,
+    map: &Arc<BpfMap>,
+    flags: u64,
+    data: &[u8],
+) -> Result<()> {
+    let mut binding = map.inner_map().lock();
+    let index = flags as u32;
+    let flags = (flags >> 32) as u32;
+    let key = if index == BPF_F_CURRENT_CPU as u32 {
+        smp_get_processor_id().data()
+    } else {
+        index
+    };
+    let fd = binding
+        .lookup_elem(&key.to_ne_bytes())?
+        .ok_or(SystemError::ENOENT)?;
+    let fd = u32::from_ne_bytes(fd.try_into().map_err(|_| SystemError::EINVAL)?);
+    crate::perf::perf_event_output(ctx, fd as usize, flags, data)?;
+    Ok(())
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_probe_read/
+fn raw_bpf_probe_read(dst: *mut c_void, size: u32, unsafe_ptr: *const c_void) -> i64 {
+    log::info!(
+        "raw_bpf_probe_read, dst:{:x}, size:{}, unsafe_ptr: {:x}",
+        dst as usize,
+        size,
+        unsafe_ptr as usize
+    );
+    let (dst, src) = unsafe {
+        let dst = core::slice::from_raw_parts_mut(dst as *mut u8, size as usize);
+        let src = core::slice::from_raw_parts(unsafe_ptr as *const u8, size as usize);
+        (dst, src)
+    };
+    let res = bpf_probe_read(dst, src);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+/// For tracing programs, safely attempt to read size
+/// bytes from kernel space address unsafe_ptr and
+/// store the data in dst.
+pub fn bpf_probe_read(dst: &mut [u8], src: &[u8]) -> Result<()> {
+    log::info!("bpf_probe_read: len: {}", dst.len());
+    dst.copy_from_slice(src);
+    Ok(())
+}
+
+unsafe fn raw_map_update_elem(
+    map: *mut c_void,
+    key: *const c_void,
+    value: *const c_void,
+    flags: u64,
+) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let key_size = map.key_size();
+    let value_size = map.value_size();
+    // log::info!("<raw_map_update_elem>: flags: {:x?}", flags);
+    let key = core::slice::from_raw_parts(key as *const u8, key_size);
+    let value = core::slice::from_raw_parts(value as *const u8, value_size);
+    let res = map_update_elem(&map, key, value, flags);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as _,
+    }
+}
+
+pub fn map_update_elem(map: &Arc<BpfMap>, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.update_elem(key, value, flags);
+    value
+}
+
+/// Delete entry with key from map.
+///
+/// The delete map element helper call is used to delete values from maps.
+unsafe fn raw_map_delete_elem(map: *mut c_void, key: *const c_void) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let key_size = map.key_size();
+    let key = core::slice::from_raw_parts(key as *const u8, key_size);
+    let res = map_delete_elem(&map, key);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn map_delete_elem(map: &Arc<BpfMap>, key: &[u8]) -> Result<()> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.delete_elem(key);
+    value
+}
+
+/// For each element in map, call callback_fn function with map, callback_ctx and other map-specific
+/// parameters. The callback_fn should be a static function and the callback_ctx should be a pointer
+/// to the stack. The flags is used to control certain aspects of the helper.  Currently, the flags must
+/// be 0.
+///
+/// The following are a list of supported map types and their respective expected callback signatures:
+/// - BPF_MAP_TYPE_HASH
+/// - BPF_MAP_TYPE_PERCPU_HASH
+/// - BPF_MAP_TYPE_LRU_HASH
+/// - BPF_MAP_TYPE_LRU_PERCPU_HASH
+/// - BPF_MAP_TYPE_ARRAY
+/// - BPF_MAP_TYPE_PERCPU_ARRAY
+///
+/// `long (*callback_fn)(struct bpf_map *map, const void key, void *value, void *ctx);`
+///
+/// For per_cpu maps, the map_value is the value on the cpu where the bpf_prog is running.
+unsafe fn raw_map_for_each_elem(
+    map: *mut c_void,
+    cb: *const c_void,
+    ctx: *const c_void,
+    flags: u64,
+) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let cb = *core::mem::transmute::<*const c_void, *const BpfCallBackFn>(cb);
+    let res = map_for_each_elem(&map, cb, ctx as _, flags);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(v) => v as i64,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn map_for_each_elem(
+    map: &Arc<BpfMap>,
+    cb: BpfCallBackFn,
+    ctx: *const u8,
+    flags: u64,
+) -> Result<u32> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.for_each_elem(cb, ctx, flags);
+    value
+}
+
+/// Perform a lookup in percpu map for an entry associated to key on cpu.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_percpu_elem/
+unsafe fn raw_map_lookup_percpu_elem(
+    map: *mut c_void,
+    key: *const c_void,
+    cpu: u32,
+) -> *const c_void {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let key_size = map.key_size();
+    let key = core::slice::from_raw_parts(key as *const u8, key_size);
+    let value = map_lookup_percpu_elem(&map, key, cpu);
+    // warning: We need to keep the map alive, so we don't drop it here.
+    let _ = Arc::into_raw(map);
+    match value {
+        Ok(Some(value)) => value as *const c_void,
+        _ => core::ptr::null_mut(),
+    }
+}
+
+pub fn map_lookup_percpu_elem(
+    map: &Arc<BpfMap>,
+    key: &[u8],
+    cpu: u32,
+) -> Result<Option<*const u8>> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.lookup_percpu_elem(key, cpu);
+    match value {
+        Ok(Some(value)) => Ok(Some(value.as_ptr())),
+        _ => Ok(None),
+    }
+}
+/// Push an element value in map.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_push_elem/
+unsafe fn raw_map_push_elem(map: *mut c_void, value: *const c_void, flags: u64) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let value_size = map.value_size();
+    let value = core::slice::from_raw_parts(value as *const u8, value_size);
+    let res = map_push_elem(&map, value, flags);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn map_push_elem(map: &Arc<BpfMap>, value: &[u8], flags: u64) -> Result<()> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.push_elem(value, flags);
+    value
+}
+
+/// Pop an element from map.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_pop_elem/
+unsafe fn raw_map_pop_elem(map: *mut c_void, value: *mut c_void) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let value_size = map.value_size();
+    let value = core::slice::from_raw_parts_mut(value as *mut u8, value_size);
+    let res = map_pop_elem(&map, value);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn map_pop_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
+    let mut binding = map.inner_map().lock();
+    let value = binding.pop_elem(value);
+    value
+}
+
+/// Get an element from map without removing it.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_peek_elem/
+unsafe fn raw_map_peek_elem(map: *mut c_void, value: *mut c_void) -> i64 {
+    let map = Arc::from_raw(map as *const BpfMap);
+    let value_size = map.value_size();
+    let value = core::slice::from_raw_parts_mut(value as *mut u8, value_size);
+    let res = map_peek_elem(&map, value);
+    let _ = Arc::into_raw(map);
+    match res {
+        Ok(_) => 0,
+        Err(e) => e as i64,
+    }
+}
+
+pub fn map_peek_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
+    let binding = map.inner_map().lock();
+    let value = binding.peek_elem(value);
+    value
+}
+
+pub static BPF_HELPER_FUN_SET: Lazy<BTreeMap<u32, RawBPFHelperFn>> = Lazy::new();
+
+/// Initialize the helper functions.
+pub fn init_helper_functions() {
+    use consts::*;
+    let mut map = BTreeMap::new();
+    unsafe {
+        // Map helpers::Generic map helpers
+        map.insert(HELPER_MAP_LOOKUP_ELEM, define_func!(raw_map_lookup_elem));
+        map.insert(HELPER_MAP_UPDATE_ELEM, define_func!(raw_map_update_elem));
+        map.insert(HELPER_MAP_DELETE_ELEM, define_func!(raw_map_delete_elem));
+        map.insert(
+            HELPER_MAP_FOR_EACH_ELEM,
+            define_func!(raw_map_for_each_elem),
+        );
+        map.insert(
+            HELPER_MAP_LOOKUP_PERCPU_ELEM,
+            define_func!(raw_map_lookup_percpu_elem),
+        );
+        // map.insert(93,define_func!(raw_bpf_spin_lock);
+        // map.insert(94,define_func!(raw_bpf_spin_unlock);
+        // Map helpers::Perf event array helpers
+        map.insert(
+            HELPER_PERF_EVENT_OUTPUT,
+            define_func!(raw_perf_event_output),
+        );
+        // Probe and trace helpers::Memory helpers
+        map.insert(HELPER_BPF_PROBE_READ, define_func!(raw_bpf_probe_read));
+        // Print helpers
+        map.insert(HELPER_TRACE_PRINTF, define_func!(trace_printf));
+
+        // Map helpers::Queue and stack helpers
+        map.insert(HELPER_MAP_PUSH_ELEM, define_func!(raw_map_push_elem));
+        map.insert(HELPER_MAP_POP_ELEM, define_func!(raw_map_pop_elem));
+        map.insert(HELPER_MAP_PEEK_ELEM, define_func!(raw_map_peek_elem));
+    }
+    BPF_HELPER_FUN_SET.init(map);
+}
--- a/kernel/src/bpf/helper/print.rs
+++ b/kernel/src/bpf/helper/print.rs
@ -0,0 +1,25 @@
+use core::{
+    ffi::{c_char, c_int},
+    fmt::Write,
+};
+
+use printf_compat::{format, output};
+
+/// Printf according to the format string, function will return the number of bytes written(including '\0')
+pub unsafe extern "C" fn printf(w: &mut impl Write, str: *const c_char, mut args: ...) -> c_int {
+    let bytes_written = format(str as _, args.as_va_list(), output::fmt_write(w));
+    bytes_written + 1
+}
+
+struct TerminalOut;
+impl Write for TerminalOut {
+    fn write_str(&mut self, s: &str) -> core::fmt::Result {
+        print!("{}", s);
+        Ok(())
+    }
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_trace_printk/
+pub fn trace_printf(fmt_ptr: u64, _fmt_len: u64, arg3: u64, arg4: u64, arg5: u64) -> u64 {
+    unsafe { printf(&mut TerminalOut, fmt_ptr as _, arg3, arg4, arg5) as u64 }
+}
--- a/kernel/src/bpf/map/array_map.rs
+++ b/kernel/src/bpf/map/array_map.rs
@ -0,0 +1,283 @@
+//! BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY
+//!
+//!
+//! See https://docs.kernel.org/bpf/map_array.html
+
+use super::super::Result;
+use crate::bpf::map::util::round_up;
+use crate::bpf::map::{BpfCallBackFn, BpfMapCommonOps, BpfMapMeta};
+use crate::mm::percpu::{PerCpu, PerCpuVar};
+use crate::smp::cpu::{smp_cpu_manager, ProcessorId};
+use alloc::{vec, vec::Vec};
+use core::{
+    fmt::{Debug, Formatter},
+    ops::{Index, IndexMut},
+};
+use log::info;
+use system_error::SystemError;
+
+/// The array map type is a generic map type with no restrictions on the structure of the value.
+/// Like a normal array, the array map has a numeric key starting at 0 and incrementing.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_ARRAY/
+#[derive(Debug)]
+pub struct ArrayMap {
+    max_entries: u32,
+    data: ArrayMapData,
+}
+
+struct ArrayMapData {
+    elem_size: u32,
+    /// The data is stored in a Vec<u8> with the size of elem_size * max_entries.
+    data: Vec<u8>,
+}
+
+impl Debug for ArrayMapData {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("ArrayMapData")
+            .field("elem_size", &self.elem_size)
+            .field("data_len", &self.data.len())
+            .finish()
+    }
+}
+
+impl ArrayMapData {
+    pub fn new(elem_size: u32, max_entries: u32) -> Self {
+        debug_assert!(elem_size % 8 == 0);
+        let total_size = elem_size * max_entries;
+        let data = vec![0; total_size as usize];
+        ArrayMapData { elem_size, data }
+    }
+}
+
+impl Index<u32> for ArrayMapData {
+    type Output = [u8];
+    fn index(&self, index: u32) -> &Self::Output {
+        let start = index * self.elem_size;
+        &self.data[start as usize..(start + self.elem_size) as usize]
+    }
+}
+
+impl IndexMut<u32> for ArrayMapData {
+    fn index_mut(&mut self, index: u32) -> &mut Self::Output {
+        let start = index * self.elem_size;
+        &mut self.data[start as usize..(start + self.elem_size) as usize]
+    }
+}
+
+impl ArrayMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        if attr.value_size == 0 || attr.max_entries == 0 || attr.key_size != 4 {
+            return Err(SystemError::EINVAL);
+        }
+        let elem_size = round_up(attr.value_size as usize, 8);
+        let data = ArrayMapData::new(elem_size as u32, attr.max_entries);
+        Ok(ArrayMap {
+            max_entries: attr.max_entries,
+            data,
+        })
+    }
+}
+
+impl BpfMapCommonOps for ArrayMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        if key.len() != 4 {
+            return Err(SystemError::EINVAL);
+        }
+        let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+        if index >= self.max_entries {
+            return Err(SystemError::EINVAL);
+        }
+        let val = self.data.index(index);
+        Ok(Some(val))
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
+        if key.len() != 4 {
+            return Err(SystemError::EINVAL);
+        }
+        let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+        if index >= self.max_entries {
+            return Err(SystemError::EINVAL);
+        }
+        if value.len() > self.data.elem_size as usize {
+            return Err(SystemError::EINVAL);
+        }
+        let old_value = self.data.index_mut(index);
+        old_value[..value.len()].copy_from_slice(value);
+        Ok(())
+    }
+    /// For ArrayMap, delete_elem is not supported.
+    fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
+        Err(SystemError::EINVAL)
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        if flags != 0 {
+            return Err(SystemError::EINVAL);
+        }
+        let mut total_used = 0;
+        for i in 0..self.max_entries {
+            let key = i.to_ne_bytes();
+            let value = self.data.index(i);
+            total_used += 1;
+            let res = cb(&key, value, ctx);
+            // return value: 0 - continue, 1 - stop and return
+            if res != 0 {
+                break;
+            }
+        }
+        Ok(total_used)
+    }
+
+    fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::EINVAL)
+    }
+
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        if let Some(key) = key {
+            if key.len() != 4 {
+                return Err(SystemError::EINVAL);
+            }
+            let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+            if index == self.max_entries - 1 {
+                return Err(SystemError::ENOENT);
+            }
+            let next_index = index + 1;
+            next_key.copy_from_slice(&next_index.to_ne_bytes());
+        } else {
+            next_key.copy_from_slice(&0u32.to_ne_bytes());
+        }
+        Ok(())
+    }
+
+    fn freeze(&self) -> Result<()> {
+        info!("fake freeze done for ArrayMap");
+        Ok(())
+    }
+    fn first_value_ptr(&self) -> Result<*const u8> {
+        Ok(self.data.data.as_ptr())
+    }
+}
+
+/// This is the per-CPU variant of the [ArrayMap] map type.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERCPU_ARRAY/
+pub struct PerCpuArrayMap {
+    per_cpu_data: PerCpuVar<ArrayMap>,
+}
+
+impl Debug for PerCpuArrayMap {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("PerCpuArrayMap")
+            .field("data", &self.per_cpu_data)
+            .finish()
+    }
+}
+
+impl PerCpuArrayMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        let num_cpus = PerCpu::MAX_CPU_NUM;
+        let mut data = Vec::with_capacity(num_cpus as usize);
+        for _ in 0..num_cpus {
+            let array_map = ArrayMap::new(attr)?;
+            data.push(array_map);
+        }
+        let per_cpu_data = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
+        Ok(PerCpuArrayMap { per_cpu_data })
+    }
+}
+
+impl BpfMapCommonOps for PerCpuArrayMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        self.per_cpu_data.get_mut().lookup_elem(key)
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+        self.per_cpu_data.get_mut().update_elem(key, value, flags)
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        self.per_cpu_data.get_mut().delete_elem(key)
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        self.per_cpu_data.get_mut().for_each_elem(cb, ctx, flags)
+    }
+    fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::EINVAL)
+    }
+    fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
+        unsafe {
+            self.per_cpu_data
+                .force_get_mut(ProcessorId::new(cpu))
+                .lookup_elem(key)
+        }
+    }
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        self.per_cpu_data.get_mut().get_next_key(key, next_key)
+    }
+    fn first_value_ptr(&self) -> Result<*const u8> {
+        self.per_cpu_data.get_mut().first_value_ptr()
+    }
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERF_EVENT_ARRAY/
+pub struct PerfEventArrayMap {
+    // The value is the file descriptor of the perf event.
+    fds: ArrayMapData,
+}
+
+impl Debug for PerfEventArrayMap {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("PerfEventArrayMap")
+            .field("fds", &self.fds)
+            .finish()
+    }
+}
+
+impl PerfEventArrayMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        let num_cpus = smp_cpu_manager().possible_cpus_count();
+        if attr.key_size != 4 || attr.value_size != 4 || attr.max_entries != num_cpus {
+            return Err(SystemError::EINVAL);
+        }
+        let fds = ArrayMapData::new(4, num_cpus);
+        Ok(PerfEventArrayMap { fds })
+    }
+}
+
+impl BpfMapCommonOps for PerfEventArrayMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+        let value = self.fds.index(cpu_id);
+        Ok(Some(value))
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
+        assert_eq!(value.len(), 4);
+        let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+        let old_value = self.fds.index_mut(cpu_id);
+        old_value.copy_from_slice(value);
+        Ok(())
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
+        self.fds.index_mut(cpu_id).copy_from_slice(&[0; 4]);
+        Ok(())
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, _flags: u64) -> Result<u32> {
+        let mut total_used = 0;
+        let num_cpus = smp_cpu_manager().possible_cpus_count();
+        for i in 0..num_cpus {
+            let key = i.to_ne_bytes();
+            let value = self.fds.index(i);
+            total_used += 1;
+            let res = cb(&key, value, ctx);
+            if res != 0 {
+                break;
+            }
+        }
+        Ok(total_used)
+    }
+    fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::EINVAL)
+    }
+    fn first_value_ptr(&self) -> Result<*const u8> {
+        Ok(self.fds.data.as_ptr())
+    }
+}
--- a/kernel/src/bpf/map/hash_map.rs
+++ b/kernel/src/bpf/map/hash_map.rs
@ -0,0 +1,156 @@
+use super::Result;
+use crate::bpf::map::util::{round_up, BpfMapUpdateElemFlags};
+use crate::bpf::map::{BpfCallBackFn, BpfMapCommonOps, BpfMapMeta};
+use crate::mm::percpu::{PerCpu, PerCpuVar};
+use crate::smp::cpu::ProcessorId;
+use alloc::{collections::BTreeMap, vec::Vec};
+use core::fmt::Debug;
+use system_error::SystemError;
+
+type BpfHashMapKey = Vec<u8>;
+type BpfHashMapValue = Vec<u8>;
+
+/// The hash map type is a generic map type with no restrictions on the structure of the key and value.
+/// Hash-maps are implemented using a hash table, allowing for lookups with arbitrary keys.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_HASH/
+#[derive(Debug)]
+pub struct BpfHashMap {
+    _max_entries: u32,
+    _key_size: u32,
+    _value_size: u32,
+    data: BTreeMap<BpfHashMapKey, BpfHashMapValue>,
+}
+
+impl BpfHashMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        if attr.value_size == 0 || attr.max_entries == 0 {
+            return Err(SystemError::EINVAL);
+        }
+        let value_size = round_up(attr.value_size as usize, 8);
+        Ok(Self {
+            _max_entries: attr.max_entries,
+            _key_size: attr.key_size,
+            _value_size: value_size as u32,
+            data: BTreeMap::new(),
+        })
+    }
+}
+
+impl BpfMapCommonOps for BpfHashMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        let value = self.data.get(key).map(|v| v.as_slice());
+        Ok(value)
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+        let _flags = BpfMapUpdateElemFlags::from_bits_truncate(flags);
+        self.data.insert(key.to_vec(), value.to_vec());
+        Ok(())
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        self.data.remove(key);
+        Ok(())
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        if flags != 0 {
+            return Err(SystemError::EINVAL);
+        }
+        let mut total_used = 0;
+        for (key, value) in self.data.iter() {
+            let res = cb(key, value, ctx);
+            // return value: 0 - continue, 1 - stop and return
+            if res != 0 {
+                break;
+            }
+            total_used += 1;
+        }
+        Ok(total_used)
+    }
+    fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
+        let v = self
+            .data
+            .get(key)
+            .map(|v| v.as_slice())
+            .ok_or(SystemError::ENOENT)?;
+        value.copy_from_slice(v);
+        self.data.remove(key);
+        Ok(())
+    }
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        let mut iter = self.data.iter();
+        if let Some(key) = key {
+            for (k, _) in iter.by_ref() {
+                if k.as_slice() == key {
+                    break;
+                }
+            }
+        }
+        let res = iter.next();
+        match res {
+            Some((k, _)) => {
+                next_key.copy_from_slice(k.as_slice());
+                Ok(())
+            }
+            None => Err(SystemError::ENOENT),
+        }
+    }
+}
+
+/// This is the per-CPU variant of the [BpfHashMap] map type.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERCPU_HASH/
+pub struct PerCpuHashMap {
+    per_cpu_maps: PerCpuVar<BpfHashMap>,
+}
+
+impl Debug for PerCpuHashMap {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("PerCpuHashMap")
+            .field("maps", &self.per_cpu_maps)
+            .finish()
+    }
+}
+impl PerCpuHashMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        let num_cpus = PerCpu::MAX_CPU_NUM;
+        let mut data = Vec::with_capacity(num_cpus as usize);
+        for _ in 0..num_cpus {
+            let array_map = BpfHashMap::new(attr)?;
+            data.push(array_map);
+        }
+        let per_cpu_maps = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
+        Ok(PerCpuHashMap { per_cpu_maps })
+    }
+}
+impl BpfMapCommonOps for PerCpuHashMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        self.per_cpu_maps.get_mut().lookup_elem(key)
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+        self.per_cpu_maps.get_mut().update_elem(key, value, flags)
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        self.per_cpu_maps.get_mut().delete_elem(key)
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        self.per_cpu_maps.get_mut().for_each_elem(cb, ctx, flags)
+    }
+    fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
+        self.per_cpu_maps
+            .get_mut()
+            .lookup_and_delete_elem(key, value)
+    }
+    fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
+        unsafe {
+            self.per_cpu_maps
+                .force_get_mut(ProcessorId::new(cpu))
+                .lookup_elem(key)
+        }
+    }
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        self.per_cpu_maps.get_mut().get_next_key(key, next_key)
+    }
+    fn first_value_ptr(&self) -> Result<*const u8> {
+        self.per_cpu_maps.get_mut().first_value_ptr()
+    }
+}
--- a/kernel/src/bpf/map/lru.rs
+++ b/kernel/src/bpf/map/lru.rs
@ -0,0 +1,151 @@
+use super::{BpfCallBackFn, BpfMapCommonOps, Result};
+use crate::bpf::map::util::BpfMapMeta;
+use crate::mm::percpu::{PerCpu, PerCpuVar};
+use crate::smp::cpu::ProcessorId;
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::num::NonZero;
+use lru::LruCache;
+use system_error::SystemError;
+
+type BpfHashMapKey = Vec<u8>;
+type BpfHashMapValue = Vec<u8>;
+/// This map is the LRU (Least Recently Used) variant of the BPF_MAP_TYPE_HASH.
+/// It is a generic map type that stores a fixed maximum number of key/value pairs.
+/// When the map starts to get at capacity, the approximately least recently
+/// used elements is removed to make room for new elements.
+///
+/// See https://docs.ebpf.io/linux/map-type/BPF_MAP_TYPE_LRU_HASH/
+#[derive(Debug)]
+pub struct LruMap {
+    _max_entries: u32,
+    data: LruCache<BpfHashMapKey, BpfHashMapValue>,
+}
+
+impl LruMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        if attr.value_size == 0 || attr.max_entries == 0 {
+            return Err(SystemError::EINVAL);
+        }
+        Ok(Self {
+            _max_entries: attr.max_entries,
+            data: LruCache::new(
+                NonZero::new(attr.max_entries as usize).ok_or(SystemError::EINVAL)?,
+            ),
+        })
+    }
+}
+
+impl BpfMapCommonOps for LruMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        let value = self.data.get(key).map(|v| v.as_slice());
+        Ok(value)
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
+        self.data.put(key.to_vec(), value.to_vec());
+        Ok(())
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        self.data.pop(key);
+        Ok(())
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        if flags != 0 {
+            return Err(SystemError::EINVAL);
+        }
+        let mut total_used = 0;
+        for (key, value) in self.data.iter() {
+            let res = cb(key, value, ctx);
+            // return value: 0 - continue, 1 - stop and return
+            if res != 0 {
+                break;
+            }
+            total_used += 1;
+        }
+        Ok(total_used)
+    }
+    fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
+        let v = self
+            .data
+            .get(key)
+            .map(|v| v.as_slice())
+            .ok_or(SystemError::ENOENT)?;
+        value.copy_from_slice(v);
+        self.data.pop(key);
+        Ok(())
+    }
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        let mut iter = self.data.iter();
+        if let Some(key) = key {
+            for (k, _) in iter.by_ref() {
+                if k.as_slice() == key {
+                    break;
+                }
+            }
+        }
+        let res = iter.next();
+        match res {
+            Some((k, _)) => {
+                next_key.copy_from_slice(k.as_slice());
+                Ok(())
+            }
+            None => Err(SystemError::ENOENT),
+        }
+    }
+}
+
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_LRU_PERCPU_HASH/
+pub struct PerCpuLruMap {
+    per_cpu_maps: PerCpuVar<LruMap>,
+}
+
+impl Debug for PerCpuLruMap {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("PerCpuLruMap")
+            .field("maps", &self.per_cpu_maps)
+            .finish()
+    }
+}
+
+impl PerCpuLruMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        let num_cpus = PerCpu::MAX_CPU_NUM;
+        let mut data = Vec::with_capacity(num_cpus as usize);
+        for _ in 0..num_cpus {
+            let array_map = LruMap::new(attr)?;
+            data.push(array_map);
+        }
+        let per_cpu_maps = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
+        Ok(PerCpuLruMap { per_cpu_maps })
+    }
+}
+
+impl BpfMapCommonOps for PerCpuLruMap {
+    fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
+        self.per_cpu_maps.get_mut().lookup_elem(key)
+    }
+    fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+        self.per_cpu_maps.get_mut().update_elem(key, value, flags)
+    }
+    fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
+        self.per_cpu_maps.get_mut().delete_elem(key)
+    }
+    fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
+        self.per_cpu_maps.get_mut().for_each_elem(cb, ctx, flags)
+    }
+    fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
+        self.per_cpu_maps
+            .get_mut()
+            .lookup_and_delete_elem(key, value)
+    }
+    fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
+        unsafe {
+            self.per_cpu_maps
+                .force_get_mut(ProcessorId::new(cpu))
+                .lookup_elem(key)
+        }
+    }
+    fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
+        self.per_cpu_maps.get_mut().get_next_key(key, next_key)
+    }
+}
--- a/kernel/src/bpf/map/mod.rs
+++ b/kernel/src/bpf/map/mod.rs
@ -0,0 +1,416 @@
+mod array_map;
+mod hash_map;
+mod lru;
+mod queue;
+mod util;
+
+use super::Result;
+use crate::bpf::map::array_map::{ArrayMap, PerCpuArrayMap, PerfEventArrayMap};
+use crate::bpf::map::hash_map::PerCpuHashMap;
+use crate::bpf::map::util::{BpfMapGetNextKeyArg, BpfMapMeta, BpfMapUpdateArg};
+use crate::filesystem::vfs::file::{File, FileMode};
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
+use crate::include::bindings::linux_bpf::{bpf_attr, bpf_map_type};
+use crate::libs::casting::DowncastArc;
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
+use crate::process::ProcessManager;
+use crate::syscall::user_access::{UserBufferReader, UserBufferWriter};
+use alloc::boxed::Box;
+use alloc::string::String;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::any::Any;
+use core::fmt::Debug;
+use intertrait::CastFromSync;
+use log::{error, info};
+use system_error::SystemError;
+
+#[derive(Debug)]
+pub struct BpfMap {
+    inner_map: SpinLock<Box<dyn BpfMapCommonOps>>,
+    meta: BpfMapMeta,
+}
+
+pub type BpfCallBackFn = fn(key: &[u8], value: &[u8], ctx: *const u8) -> i32;
+
+pub trait BpfMapCommonOps: Send + Sync + Debug + CastFromSync {
+    /// Lookup an element in the map.
+    ///
+    /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
+    fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Update an element in the map.
+    ///
+    /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_update_elem/
+    fn update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Delete an element from the map.
+    ///
+    /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_delete_elem/
+    fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+    /// For each element in map, call callback_fn function with map,
+    /// callback_ctx and other map-specific parameters.
+    ///
+    /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_for_each_map_elem/
+    fn for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Look up an element with the given key in the map referred to by the file descriptor fd,
+    /// and if found, delete the element.
+    fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// erform a lookup in percpu map for an entry associated to key on cpu.
+    fn lookup_percpu_elem(&mut self, _key: &[u8], _cpu: u32) -> Result<Option<&[u8]>> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Get the next key in the map. If key is None, get the first key.
+    ///
+    /// Called from syscall
+    fn get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// Push an element value in map.
+    fn push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// Pop an element value from map.
+    fn pop_elem(&mut self, _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// Peek an element value from map.
+    fn peek_elem(&self, _value: &mut [u8]) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// Freeze the map.
+    ///
+    /// It's useful for .rodata maps.
+    fn freeze(&self) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+
+    /// Get the first value pointer.
+    fn first_value_ptr(&self) -> Result<*const u8> {
+        Err(SystemError::ENOSYS)
+    }
+}
+impl DowncastArc for dyn BpfMapCommonOps {
+    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any> {
+        self
+    }
+}
+impl BpfMap {
+    pub fn new(map: Box<dyn BpfMapCommonOps>, meta: BpfMapMeta) -> Self {
+        assert_ne!(meta.key_size, 0);
+        BpfMap {
+            inner_map: SpinLock::new(map),
+            meta,
+        }
+    }
+
+    pub fn inner_map(&self) -> &SpinLock<Box<dyn BpfMapCommonOps>> {
+        &self.inner_map
+    }
+
+    pub fn key_size(&self) -> usize {
+        self.meta.key_size as usize
+    }
+
+    pub fn value_size(&self) -> usize {
+        self.meta.value_size as usize
+    }
+}
+
+impl IndexNode for BpfMap {
+    fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
+        Ok(())
+    }
+    fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
+        Ok(())
+    }
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn metadata(&self) -> Result<Metadata> {
+        let meta = Metadata {
+            mode: ModeType::from_bits_truncate(0o755),
+            file_type: FileType::File,
+            ..Default::default()
+        };
+        Ok(meta)
+    }
+
+    fn resize(&self, _len: usize) -> Result<()> {
+        Ok(())
+    }
+
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        todo!("BpfMap does not have a filesystem")
+    }
+
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn list(&self) -> Result<Vec<String>> {
+        Err(SystemError::ENOSYS)
+    }
+}
+
+/// Create a map and return a file descriptor that refers to
+/// the map.  The close-on-exec file descriptor flag
+/// is automatically enabled for the new file descriptor.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_CREATE/
+pub fn bpf_map_create(attr: &bpf_attr) -> Result<usize> {
+    let map_meta = BpfMapMeta::try_from(attr)?;
+    info!("The map attr is {:#?}", map_meta);
+    let map: Box<dyn BpfMapCommonOps> = match map_meta.map_type {
+        bpf_map_type::BPF_MAP_TYPE_ARRAY => {
+            let array_map = ArrayMap::new(&map_meta)?;
+            Box::new(array_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_PERCPU_ARRAY => {
+            let per_cpu_array_map = PerCpuArrayMap::new(&map_meta)?;
+            Box::new(per_cpu_array_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY => {
+            let perf_event_array_map = PerfEventArrayMap::new(&map_meta)?;
+            Box::new(perf_event_array_map)
+        }
+
+        bpf_map_type::BPF_MAP_TYPE_CPUMAP
+        | bpf_map_type::BPF_MAP_TYPE_DEVMAP
+        | bpf_map_type::BPF_MAP_TYPE_DEVMAP_HASH => {
+            error!("bpf map type {:?} not implemented", map_meta.map_type);
+            Err(SystemError::EINVAL)?
+        }
+        bpf_map_type::BPF_MAP_TYPE_HASH => {
+            let hash_map = hash_map::BpfHashMap::new(&map_meta)?;
+            Box::new(hash_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH => {
+            let per_cpu_hash_map = PerCpuHashMap::new(&map_meta)?;
+            Box::new(per_cpu_hash_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_QUEUE => {
+            let queue_map = queue::QueueMap::new(&map_meta)?;
+            Box::new(queue_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_STACK => {
+            let stack_map = queue::StackMap::new(&map_meta)?;
+            Box::new(stack_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_LRU_HASH => {
+            let lru_hash_map = lru::LruMap::new(&map_meta)?;
+            Box::new(lru_hash_map)
+        }
+        bpf_map_type::BPF_MAP_TYPE_LRU_PERCPU_HASH => {
+            let lru_per_cpu_hash_map = lru::PerCpuLruMap::new(&map_meta)?;
+            Box::new(lru_per_cpu_hash_map)
+        }
+        _ => {
+            unimplemented!("bpf map type {:?} not implemented", map_meta.map_type)
+        }
+    };
+    let bpf_map = BpfMap::new(map, map_meta);
+    let fd_table = ProcessManager::current_pcb().fd_table();
+    let file = File::new(Arc::new(bpf_map), FileMode::O_RDWR | FileMode::O_CLOEXEC)?;
+    let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
+    info!("create map with fd: [{}]", fd);
+    Ok(fd)
+}
+
+/// Create or update an element (key/value pair) in a specified map.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_UPDATE_ELEM/
+pub fn bpf_map_update_elem(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapUpdateArg::from(attr);
+    info!("<bpf_map_update_elem>: {:#x?}", arg);
+    let map = get_map_file(arg.map_fd as i32)?;
+    let meta = &map.meta;
+    let key_size = meta.key_size as usize;
+    let value_size = meta.value_size as usize;
+
+    let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
+    let value_buf = UserBufferReader::new(arg.value as *const u8, value_size, true)?;
+
+    let key = key_buf.read_from_user(0)?;
+    let value = value_buf.read_from_user(0)?;
+    map.inner_map.lock().update_elem(key, value, arg.flags)?;
+    info!("bpf_map_update_elem ok");
+    Ok(0)
+}
+
+pub fn bpf_map_freeze(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapUpdateArg::from(attr);
+    let map_fd = arg.map_fd;
+    info!("<bpf_map_freeze>: map_fd: {:}", map_fd);
+    let map = get_map_file(map_fd as i32)?;
+    map.inner_map.lock().freeze()?;
+    Ok(0)
+}
+
+///  Look up an element by key in a specified map and return its value.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_ELEM/
+pub fn bpf_lookup_elem(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapUpdateArg::from(attr);
+    // info!("<bpf_lookup_elem>: {:#x?}", arg);
+    let map = get_map_file(arg.map_fd as _)?;
+    let meta = &map.meta;
+    let key_size = meta.key_size as usize;
+    let value_size = meta.value_size as usize;
+
+    let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
+    let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
+
+    let key = key_buf.read_from_user(0)?;
+
+    let mut inner = map.inner_map.lock();
+    let r_value = inner.lookup_elem(key)?;
+    if let Some(r_value) = r_value {
+        value_buf.copy_to_user(r_value, 0)?;
+        Ok(0)
+    } else {
+        Err(SystemError::ENOENT)
+    }
+}
+/// Look up an element by key in a specified map and return the key of the next element.
+///
+/// - If key is `None`, the operation returns zero and sets the next_key pointer to the key of the first element.
+/// - If key is `Some(T)`, the operation returns zero and sets the next_key pointer to the key of the next element.
+/// - If key is the last element, returns -1 and errno is set to ENOENT.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_GET_NEXT_KEY/
+pub fn bpf_map_get_next_key(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapGetNextKeyArg::from(attr);
+    // info!("<bpf_map_get_next_key>: {:#x?}", arg);
+    let map = get_map_file(arg.map_fd as i32)?;
+    let meta = &map.meta;
+    let key_size = meta.key_size as usize;
+
+    let key = if let Some(key_ptr) = arg.key {
+        let key_buf = UserBufferReader::new(key_ptr as *const u8, key_size, true)?;
+        let key = key_buf.read_from_user(0)?.to_vec();
+        Some(key)
+    } else {
+        None
+    };
+    let key = key.as_deref();
+    let mut next_key_buf = UserBufferWriter::new(arg.next_key as *mut u8, key_size, true)?;
+    let inner = map.inner_map.lock();
+    let next_key = next_key_buf.buffer(0)?;
+    inner.get_next_key(key, next_key)?;
+    // info!("next_key: {:?}", next_key);
+    Ok(0)
+}
+
+/// Look up and delete an element by key in a specified map.
+///
+/// # WARN
+///
+/// Not all map types (particularly array maps) support this operation,
+/// instead a zero value can be written to the map value. Check the map types page to check for support.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_DELETE_ELEM/
+pub fn bpf_map_delete_elem(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapUpdateArg::from(attr);
+    // info!("<bpf_map_delete_elem>: {:#x?}", arg);
+    let map = get_map_file(arg.map_fd as i32)?;
+    let meta = &map.meta;
+    let key_size = meta.key_size as usize;
+
+    let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
+    let key = key_buf.read_from_user(0)?;
+    map.inner_map.lock().delete_elem(key)?;
+    Ok(0)
+}
+
+/// Iterate and fetch multiple elements in a map.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_BATCH/
+pub fn bpf_map_lookup_batch(_attr: &bpf_attr) -> Result<usize> {
+    todo!()
+}
+
+/// Look up an element with the given key in the map referred to by the file descriptor fd,
+/// and if found, delete the element.
+///
+/// For BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types, the flags argument needs to be set to 0,
+/// but for other map types, it may be specified as:
+/// - BPF_F_LOCK : If this flag is set, the command will acquire the spin-lock of the map value we are looking up.
+///
+/// If the map contains no spin-lock in its value, -EINVAL will be returned by the command.
+///
+/// The BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types implement this command as a “pop” operation,
+/// deleting the top element rather than one corresponding to key.
+/// The key and key_len parameters should be zeroed when issuing this operation for these map types.
+///
+/// This command is only valid for the following map types:
+/// - BPF_MAP_TYPE_QUEUE
+/// - BPF_MAP_TYPE_STACK
+/// - BPF_MAP_TYPE_HASH
+/// - BPF_MAP_TYPE_PERCPU_HASH
+/// - BPF_MAP_TYPE_LRU_HASH
+/// - BPF_MAP_TYPE_LRU_PERCPU_HASH
+///
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_AND_DELETE_ELEM/
+pub fn bpf_map_lookup_and_delete_elem(attr: &bpf_attr) -> Result<usize> {
+    let arg = BpfMapUpdateArg::from(attr);
+    // info!("<bpf_map_lookup_and_delete_elem>: {:#x?}", arg);
+    let map = get_map_file(arg.map_fd as i32)?;
+    let meta = &map.meta;
+    let key_size = meta.key_size as usize;
+    let value_size = meta.value_size as usize;
+
+    let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
+    let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
+
+    let value = value_buf.buffer(0)?;
+    let key = key_buf.read_from_user(0)?;
+    let mut inner = map.inner_map.lock();
+    inner.lookup_and_delete_elem(key, value)?;
+    Ok(0)
+}
+
+fn get_map_file(fd: i32) -> Result<Arc<BpfMap>> {
+    let fd_table = ProcessManager::current_pcb().fd_table();
+    let map = fd_table
+        .read()
+        .get_file_by_fd(fd)
+        .ok_or(SystemError::EBADF)?;
+    let map = map
+        .inode()
+        .downcast_arc::<BpfMap>()
+        .ok_or(SystemError::EINVAL)?;
+    Ok(map)
+}
--- a/kernel/src/bpf/map/queue.rs
+++ b/kernel/src/bpf/map/queue.rs
@ -0,0 +1,154 @@
+use super::{BpfMapCommonOps, Result};
+use crate::bpf::map::util::{BpfMapMeta, BpfMapUpdateElemFlags};
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::ops::Deref;
+use core::ops::DerefMut;
+use system_error::SystemError;
+
+type BpfQueueValue = Vec<u8>;
+/// BPF_MAP_TYPE_QUEUE provides FIFO storage and BPF_MAP_TYPE_STACK provides LIFO storage for BPF programs.
+/// These maps support peek, pop and push operations that are exposed to BPF programs through the respective helpers.
+/// These operations are exposed to userspace applications using the existing bpf syscall in the following way:
+/// - `BPF_MAP_LOOKUP_ELEM` -> `peek`
+/// - `BPF_MAP_UPDATE_ELEM` -> `push`
+/// - `BPF_MAP_LOOKUP_AND_DELETE_ELEM ` -> `pop`
+///
+/// See https://docs.kernel.org/bpf/map_queue_stack.html
+pub trait SpecialMap: Debug + Send + Sync + 'static {
+    /// Returns the number of elements the queue can hold.
+    fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()>;
+    /// Removes the first element and returns it.
+    fn pop(&mut self) -> Option<BpfQueueValue>;
+    /// Returns the first element without removing it.
+    fn peek(&self) -> Option<&BpfQueueValue>;
+}
+
+/// The queue map type is a generic map type, resembling a FIFO (First-In First-Out) queue.
+///
+/// This map type has no keys, only values. The size and type of the values can be specified by the user
+/// to fit a large variety of use cases. The typical use-case for this map type is to keep track of
+/// a pool of elements such as available network ports when implementing NAT (network address translation).
+///
+/// As apposed to most map types, this map type uses a custom set of helpers to pop, peek and push elements.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_QUEUE/
+#[derive(Debug)]
+pub struct QueueMap {
+    max_entries: u32,
+    data: Vec<BpfQueueValue>,
+}
+
+impl QueueMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        if attr.value_size == 0 || attr.max_entries == 0 || attr.key_size != 0 {
+            return Err(SystemError::EINVAL);
+        }
+        let data = Vec::with_capacity(attr.max_entries as usize);
+        Ok(Self {
+            max_entries: attr.max_entries,
+            data,
+        })
+    }
+}
+
+impl SpecialMap for QueueMap {
+    fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()> {
+        if self.data.len() == self.max_entries as usize {
+            if flags.contains(BpfMapUpdateElemFlags::BPF_EXIST) {
+                // remove the first element
+                self.data.remove(0);
+            } else {
+                return Err(SystemError::ENOSPC);
+            }
+        }
+        self.data.push(value);
+        Ok(())
+    }
+    fn pop(&mut self) -> Option<BpfQueueValue> {
+        if self.data.is_empty() {
+            return None;
+        }
+        Some(self.data.remove(0))
+    }
+    fn peek(&self) -> Option<&BpfQueueValue> {
+        self.data.first()
+    }
+}
+/// The stack map type is a generic map type, resembling a stack data structure.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_STACK/
+#[derive(Debug)]
+pub struct StackMap(QueueMap);
+
+impl StackMap {
+    pub fn new(attr: &BpfMapMeta) -> Result<Self> {
+        QueueMap::new(attr).map(StackMap)
+    }
+}
+
+impl Deref for StackMap {
+    type Target = QueueMap;
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for StackMap {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl SpecialMap for StackMap {
+    fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()> {
+        if self.data.len() == self.max_entries as usize {
+            if flags.contains(BpfMapUpdateElemFlags::BPF_EXIST) {
+                // remove the last element
+                self.data.pop();
+            } else {
+                return Err(SystemError::ENOSPC);
+            }
+        }
+        self.data.push(value);
+        Ok(())
+    }
+    fn pop(&mut self) -> Option<BpfQueueValue> {
+        self.data.pop()
+    }
+    fn peek(&self) -> Option<&BpfQueueValue> {
+        self.data.last()
+    }
+}
+
+impl<T: SpecialMap> BpfMapCommonOps for T {
+    /// Equal to [QueueMap::peek]
+    fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
+        Ok(self.peek().map(|v| v.as_slice()))
+    }
+    /// Equal to [QueueMap::push]
+    fn update_elem(&mut self, _key: &[u8], value: &[u8], flags: u64) -> Result<()> {
+        let flag = BpfMapUpdateElemFlags::from_bits_truncate(flags);
+        self.push(value.to_vec(), flag)
+    }
+    /// Equal to [QueueMap::pop]
+    fn lookup_and_delete_elem(&mut self, _key: &[u8], value: &mut [u8]) -> Result<()> {
+        if let Some(v) = self.pop() {
+            value.copy_from_slice(&v);
+            Ok(())
+        } else {
+            Err(SystemError::ENOENT)
+        }
+    }
+    fn push_elem(&mut self, value: &[u8], flags: u64) -> Result<()> {
+        self.update_elem(&[], value, flags)
+    }
+    fn pop_elem(&mut self, value: &mut [u8]) -> Result<()> {
+        self.lookup_and_delete_elem(&[], value)
+    }
+    fn peek_elem(&self, value: &mut [u8]) -> Result<()> {
+        self.peek()
+            .map(|v| value.copy_from_slice(v))
+            .ok_or(SystemError::ENOENT)
+    }
+}
--- a/kernel/src/bpf/map/util.rs
+++ b/kernel/src/bpf/map/util.rs
@ -0,0 +1,100 @@
+use crate::include::bindings::linux_bpf::{bpf_attr, bpf_map_type};
+use alloc::string::{String, ToString};
+use core::ffi::CStr;
+use num_traits::FromPrimitive;
+use system_error::SystemError;
+
+#[derive(Debug, Clone)]
+pub struct BpfMapMeta {
+    pub map_type: bpf_map_type,
+    pub key_size: u32,
+    pub value_size: u32,
+    pub max_entries: u32,
+    pub _map_flags: u32,
+    pub _map_name: String,
+}
+
+impl TryFrom<&bpf_attr> for BpfMapMeta {
+    type Error = SystemError;
+    fn try_from(value: &bpf_attr) -> Result<Self, Self::Error> {
+        let u = unsafe { &value.__bindgen_anon_1 };
+        let map_name_slice = unsafe {
+            core::slice::from_raw_parts(u.map_name.as_ptr() as *const u8, u.map_name.len())
+        };
+        let map_name = CStr::from_bytes_until_nul(map_name_slice)
+            .map_err(|_| SystemError::EINVAL)?
+            .to_str()
+            .map_err(|_| SystemError::EINVAL)?
+            .to_string();
+        let map_type = bpf_map_type::from_u32(u.map_type).ok_or(SystemError::EINVAL)?;
+        Ok(BpfMapMeta {
+            map_type,
+            key_size: u.key_size,
+            value_size: u.value_size,
+            max_entries: u.max_entries,
+            _map_flags: u.map_flags,
+            _map_name: map_name,
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct BpfMapUpdateArg {
+    pub map_fd: u32,
+    pub key: u64,
+    pub value: u64,
+    pub flags: u64,
+}
+
+impl From<&bpf_attr> for BpfMapUpdateArg {
+    fn from(value: &bpf_attr) -> Self {
+        unsafe {
+            let u = &value.__bindgen_anon_2;
+            BpfMapUpdateArg {
+                map_fd: u.map_fd,
+                key: u.key,
+                value: u.__bindgen_anon_1.value,
+                flags: u.flags,
+            }
+        }
+    }
+}
+#[derive(Debug)]
+pub struct BpfMapGetNextKeyArg {
+    pub map_fd: u32,
+    pub key: Option<u64>,
+    pub next_key: u64,
+}
+
+impl From<&bpf_attr> for BpfMapGetNextKeyArg {
+    fn from(value: &bpf_attr) -> Self {
+        unsafe {
+            let u = &value.__bindgen_anon_2;
+            BpfMapGetNextKeyArg {
+                map_fd: u.map_fd,
+                key: if u.key != 0 { Some(u.key) } else { None },
+                next_key: u.__bindgen_anon_1.next_key,
+            }
+        }
+    }
+}
+
+#[inline]
+/// Round up `x` to the nearest multiple of `align`.
+pub fn round_up(x: usize, align: usize) -> usize {
+    (x + align - 1) & !(align - 1)
+}
+
+bitflags! {
+    /// flags for BPF_MAP_UPDATE_ELEM command
+    pub struct BpfMapUpdateElemFlags: u64 {
+        /// create new element or update existing
+        const BPF_ANY = 0;
+        /// create new element if it didn't exist
+        const BPF_NOEXIST = 1;
+        /// update existing element
+        const BPF_EXIST = 2;
+        /// spin_lock-ed map_lookup/map_update
+        const BPF_F_LOCK = 4;
+    }
+}
--- a/kernel/src/bpf/mod.rs
+++ b/kernel/src/bpf/mod.rs
@ -0,0 +1,50 @@
+pub mod helper;
+pub mod map;
+pub mod prog;
+use crate::include::bindings::linux_bpf::{bpf_attr, bpf_cmd};
+use crate::syscall::user_access::UserBufferReader;
+use crate::syscall::Syscall;
+use log::error;
+use num_traits::FromPrimitive;
+use system_error::SystemError;
+
+type Result<T> = core::result::Result<T, SystemError>;
+
+impl Syscall {
+    pub fn sys_bpf(cmd: u32, attr: *mut u8, size: u32) -> Result<usize> {
+        let buf = UserBufferReader::new(attr, size as usize, true)?;
+        let attr = buf.read_one_from_user::<bpf_attr>(0)?;
+        let cmd = bpf_cmd::from_u32(cmd).ok_or(SystemError::EINVAL)?;
+        bpf(cmd, attr)
+    }
+}
+
+pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result<usize> {
+    let res = match cmd {
+        // Map related commands
+        bpf_cmd::BPF_MAP_CREATE => map::bpf_map_create(attr),
+        bpf_cmd::BPF_MAP_UPDATE_ELEM => map::bpf_map_update_elem(attr),
+        bpf_cmd::BPF_MAP_LOOKUP_ELEM => map::bpf_lookup_elem(attr),
+        bpf_cmd::BPF_MAP_GET_NEXT_KEY => map::bpf_map_get_next_key(attr),
+        bpf_cmd::BPF_MAP_DELETE_ELEM => map::bpf_map_delete_elem(attr),
+        bpf_cmd::BPF_MAP_LOOKUP_AND_DELETE_ELEM => map::bpf_map_lookup_and_delete_elem(attr),
+        bpf_cmd::BPF_MAP_LOOKUP_BATCH => map::bpf_map_lookup_batch(attr),
+        bpf_cmd::BPF_MAP_FREEZE => map::bpf_map_freeze(attr),
+        // Program related commands
+        bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr),
+        // Object creation commands
+        bpf_cmd::BPF_BTF_LOAD => {
+            error!("bpf cmd {:?} not implemented", cmd);
+            return Err(SystemError::ENOSYS);
+        }
+        ty => {
+            unimplemented!("bpf cmd {:?} not implemented", ty)
+        }
+    };
+    res
+}
+
+/// Initialize the BPF system
+pub fn init_bpf_system() {
+    helper::init_helper_functions();
+}
--- a/kernel/src/bpf/prog/mod.rs
+++ b/kernel/src/bpf/prog/mod.rs
@ -0,0 +1,123 @@
+mod util;
+mod verifier;
+
+use super::Result;
+use crate::bpf::map::BpfMap;
+use crate::bpf::prog::util::{BpfProgMeta, BpfProgVerifierInfo};
+use crate::bpf::prog::verifier::BpfProgVerifier;
+use crate::filesystem::vfs::file::{File, FileMode};
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
+use crate::include::bindings::linux_bpf::bpf_attr;
+use crate::libs::spinlock::SpinLockGuard;
+use crate::process::ProcessManager;
+use alloc::string::String;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::any::Any;
+use system_error::SystemError;
+
+#[derive(Debug)]
+pub struct BpfProg {
+    meta: BpfProgMeta,
+    raw_file_ptr: Vec<usize>,
+}
+
+impl BpfProg {
+    pub fn new(meta: BpfProgMeta) -> Self {
+        Self {
+            meta,
+            raw_file_ptr: Vec::new(),
+        }
+    }
+
+    pub fn insns(&self) -> &[u8] {
+        &self.meta.insns
+    }
+
+    pub fn insns_mut(&mut self) -> &mut [u8] {
+        &mut self.meta.insns
+    }
+
+    pub fn insert_map(&mut self, map_ptr: usize) {
+        self.raw_file_ptr.push(map_ptr);
+    }
+}
+
+impl IndexNode for BpfProg {
+    fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
+        Ok(())
+    }
+    fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
+        Ok(())
+    }
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn metadata(&self) -> Result<Metadata> {
+        let meta = Metadata {
+            mode: ModeType::from_bits_truncate(0o755),
+            file_type: FileType::File,
+            ..Default::default()
+        };
+        Ok(meta)
+    }
+
+    fn resize(&self, _len: usize) -> Result<()> {
+        Ok(())
+    }
+
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        panic!("BpfProg does not have a filesystem")
+    }
+
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn list(&self) -> Result<Vec<String>> {
+        Err(SystemError::ENOSYS)
+    }
+}
+
+impl Drop for BpfProg {
+    fn drop(&mut self) {
+        unsafe {
+            for ptr in self.raw_file_ptr.iter() {
+                let file = Arc::from_raw(*ptr as *const u8 as *const BpfMap);
+                drop(file)
+            }
+        }
+    }
+}
+/// Load a BPF program into the kernel.
+///
+/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_PROG_LOAD/
+pub fn bpf_prog_load(attr: &bpf_attr) -> Result<usize> {
+    let args = BpfProgMeta::try_from(attr)?;
+    // info!("bpf_prog_load: {:#?}", args);
+    let log_info = BpfProgVerifierInfo::from(attr);
+    let prog = BpfProg::new(args);
+    let fd_table = ProcessManager::current_pcb().fd_table();
+    let prog = BpfProgVerifier::new(prog, log_info.log_level, &mut []).verify(&fd_table)?;
+    let file = File::new(Arc::new(prog), FileMode::O_RDWR)?;
+    let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
+    Ok(fd)
+}
--- a/kernel/src/bpf/prog/util.rs
+++ b/kernel/src/bpf/prog/util.rs
@ -0,0 +1,112 @@
+use crate::include::bindings::linux_bpf::{bpf_attach_type, bpf_attr, bpf_prog_type};
+use crate::syscall::user_access::{check_and_clone_cstr, UserBufferReader};
+use alloc::string::{String, ToString};
+use alloc::vec::Vec;
+use core::ffi::CStr;
+use core::fmt::Debug;
+use num_traits::FromPrimitive;
+use system_error::SystemError;
+
+bitflags::bitflags! {
+
+    pub struct VerifierLogLevel: u32 {
+        /// Sets no verifier logging.
+        const DISABLE = 0;
+        /// Enables debug verifier logging.
+        const DEBUG = 1;
+        /// Enables verbose verifier logging.
+        const VERBOSE = 2 | Self::DEBUG.bits();
+        /// Enables verifier stats.
+        const STATS = 4;
+    }
+}
+
+#[derive(Debug)]
+pub struct BpfProgVerifierInfo {
+    /// This attribute specifies the level/detail of the log output. Valid values are.
+    pub log_level: VerifierLogLevel,
+    /// This attributes indicates the size of the memory region in bytes
+    /// indicated by `log_buf` which can safely be written to by the kernel.
+    pub _log_buf_size: u32,
+    /// This attributes can be set to a pointer to a memory region
+    /// allocated/reservedby the loader process where the verifier log will
+    /// be written to.
+    /// The detail of the log is set by log_level. The verifier log
+    /// is often the only indication in addition to the error code of
+    /// why the syscall command failed to load the program.
+    ///
+    /// The log is also written to on success. If the kernel runs out of
+    /// space in the buffer while loading, the loading process will fail
+    /// and the command will return with an error code of -ENOSPC. So it
+    /// is important to correctly size the buffer when enabling logging.
+    pub _log_buf_ptr: usize,
+}
+
+impl From<&bpf_attr> for BpfProgVerifierInfo {
+    fn from(attr: &bpf_attr) -> Self {
+        unsafe {
+            let u = &attr.__bindgen_anon_3;
+            Self {
+                log_level: VerifierLogLevel::from_bits_truncate(u.log_level),
+                _log_buf_size: u.log_size,
+                _log_buf_ptr: u.log_buf as usize,
+            }
+        }
+    }
+}
+
+pub struct BpfProgMeta {
+    pub prog_flags: u32,
+    pub prog_type: bpf_prog_type,
+    pub expected_attach_type: bpf_attach_type,
+    pub insns: Vec<u8>,
+    pub license: String,
+    pub kern_version: u32,
+    pub name: String,
+}
+
+impl Debug for BpfProgMeta {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("BpfProgMeta")
+            .field("prog_flags", &self.prog_flags)
+            .field("prog_type", &self.prog_type)
+            .field("expected_attach_type", &self.expected_attach_type)
+            .field("insns_len", &(self.insns.len() / 8))
+            .field("license", &self.license)
+            .field("kern_version", &self.kern_version)
+            .field("name", &self.name)
+            .finish()
+    }
+}
+
+impl TryFrom<&bpf_attr> for BpfProgMeta {
+    type Error = SystemError;
+    fn try_from(attr: &bpf_attr) -> Result<Self, Self::Error> {
+        let u = unsafe { &attr.__bindgen_anon_3 };
+        let prog_type = bpf_prog_type::from_u32(u.prog_type).ok_or(SystemError::EINVAL)?;
+        let expected_attach_type =
+            bpf_attach_type::from_u32(u.expected_attach_type).ok_or(SystemError::EINVAL)?;
+        unsafe {
+            let insns_buf =
+                UserBufferReader::new(u.insns as *mut u8, u.insn_cnt as usize * 8, true)?;
+            let insns = insns_buf.read_from_user::<u8>(0)?.to_vec();
+            let name_slice =
+                core::slice::from_raw_parts(u.prog_name.as_ptr() as *const u8, u.prog_name.len());
+            let prog_name = CStr::from_bytes_until_nul(name_slice)
+                .map_err(|_| SystemError::EINVAL)?
+                .to_str()
+                .map_err(|_| SystemError::EINVAL)?
+                .to_string();
+            let license = check_and_clone_cstr(u.license as *const u8, None)?;
+            Ok(Self {
+                prog_flags: u.prog_flags,
+                prog_type,
+                expected_attach_type,
+                insns,
+                license: license.into_string().map_err(|_| SystemError::EINVAL)?,
+                kern_version: u.kern_version,
+                name: prog_name,
+            })
+        }
+    }
+}
--- a/kernel/src/bpf/prog/verifier.rs
+++ b/kernel/src/bpf/prog/verifier.rs
@ -0,0 +1,131 @@
+use super::super::Result;
+use crate::bpf::map::BpfMap;
+use crate::bpf::prog::util::VerifierLogLevel;
+use crate::bpf::prog::BpfProg;
+use crate::filesystem::vfs::file::FileDescriptorVec;
+use crate::include::bindings::linux_bpf::*;
+use crate::libs::casting::DowncastArc;
+use crate::libs::rwlock::RwLock;
+use alloc::{sync::Arc, vec::Vec};
+use log::{error, info};
+use rbpf::ebpf;
+use rbpf::ebpf::to_insn_vec;
+use system_error::SystemError;
+
+/// The BPF program verifier.
+///
+/// See https://docs.kernel.org/bpf/verifier.html
+#[derive(Debug)]
+pub struct BpfProgVerifier<'a> {
+    prog: BpfProg,
+    _log_level: VerifierLogLevel,
+    _log_buf: &'a mut [u8],
+}
+
+impl<'a> BpfProgVerifier<'a> {
+    pub fn new(prog: BpfProg, log_level: VerifierLogLevel, log_buf: &'a mut [u8]) -> Self {
+        Self {
+            prog,
+            _log_level: log_level,
+            _log_buf: log_buf,
+        }
+    }
+    /// Relocate the program.
+    ///
+    /// This function will relocate the program, and update the program's instructions.
+    fn relocation(&mut self, fd_table: &Arc<RwLock<FileDescriptorVec>>) -> Result<()> {
+        let instructions = self.prog.insns_mut();
+        let mut fmt_insn = to_insn_vec(instructions);
+        let mut index = 0;
+        let mut raw_file_ptr = vec![];
+        loop {
+            if index >= fmt_insn.len() {
+                break;
+            }
+            let mut insn = fmt_insn[index].clone();
+            if insn.opc == ebpf::LD_DW_IMM {
+                // relocate the instruction
+                let mut next_insn = fmt_insn[index + 1].clone();
+                // the imm is the map_fd because user lib has already done the relocation
+                let map_fd = insn.imm as usize;
+                let src_reg = insn.src;
+                // See https://www.kernel.org/doc/html/latest/bpf/standardization/instruction-set.html#id23
+                let ptr = match src_reg as u32 {
+                    BPF_PSEUDO_MAP_VALUE => {
+                        // dst = map_val(map_by_fd(imm)) + next_imm
+                        // map_val(map) gets the address of the first value in a given map
+                        let file = fd_table
+                            .read()
+                            .get_file_by_fd(map_fd as i32)
+                            .ok_or(SystemError::EBADF)?;
+                        let bpf_map = file
+                            .inode()
+                            .downcast_arc::<BpfMap>()
+                            .ok_or(SystemError::EINVAL)?;
+                        let first_value_ptr =
+                            bpf_map.inner_map().lock().first_value_ptr()? as usize;
+                        let offset = next_insn.imm as usize;
+                        info!(
+                            "Relocate for BPF_PSEUDO_MAP_VALUE, instruction index: {}, map_fd: {}",
+                            index, map_fd
+                        );
+                        Some(first_value_ptr + offset)
+                    }
+                    BPF_PSEUDO_MAP_FD => {
+                        // dst = map_by_fd(imm)
+                        // map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map
+                        let bpf_map = fd_table
+                            .read()
+                            .get_file_by_fd(map_fd as i32)
+                            .ok_or(SystemError::EBADF)?
+                            .inode()
+                            .downcast_arc::<BpfMap>()
+                            .ok_or(SystemError::EINVAL)?;
+                        // todo!(warning: We need release after prog unload)
+                        let map_ptr = Arc::into_raw(bpf_map) as usize;
+                        info!(
+                            "Relocate for BPF_PSEUDO_MAP_FD, instruction index: {}, map_fd: {}, ptr: {:#x}",
+                            index, map_fd, map_ptr
+                        );
+                        raw_file_ptr.push(map_ptr);
+                        Some(map_ptr)
+                    }
+                    ty => {
+                        error!(
+                            "relocation for ty: {} not implemented, instruction index: {}",
+                            ty, index
+                        );
+                        None
+                    }
+                };
+                if let Some(ptr) = ptr {
+                    // The current ins store the map_data_ptr low 32 bits,
+                    // the next ins store the map_data_ptr high 32 bits
+                    insn.imm = ptr as i32;
+                    next_insn.imm = (ptr >> 32) as i32;
+                    fmt_insn[index] = insn;
+                    fmt_insn[index + 1] = next_insn;
+                    index += 2;
+                } else {
+                    index += 1;
+                }
+            } else {
+                index += 1;
+            }
+        }
+        let fmt_insn = fmt_insn
+            .iter()
+            .flat_map(|ins| ins.to_vec())
+            .collect::<Vec<u8>>();
+        instructions.copy_from_slice(&fmt_insn);
+        for ptr in raw_file_ptr {
+            self.prog.insert_map(ptr);
+        }
+        Ok(())
+    }
+
+    pub fn verify(mut self, fd_table: &Arc<RwLock<FileDescriptorVec>>) -> Result<BpfProg> {
+        self.relocation(fd_table)?;
+        Ok(self.prog)
+    }
+}
--- a/kernel/src/common/string.h
+++ b/kernel/src/common/string.h
@ -11,3 +11,12 @@ static inline int strlen(const char *s) {
  }
  return __res;
 }
+
+
+static inline int strcmp(const char *s1, const char *s2) {
+  while (*s1 && *s2 && *s1 == *s2) {
+    ++s1;
+    ++s2;
+  }
+  return *s1 - *s2;
+}
--- a/kernel/src/debug/kprobe/args.rs
+++ b/kernel/src/debug/kprobe/args.rs
@ -0,0 +1,66 @@
+use alloc::boxed::Box;
+use alloc::string::String;
+use kprobe::{CallBackFunc, KprobeBuilder, ProbeArgs};
+use log::warn;
+use system_error::SystemError;
+
+pub struct KprobeInfo {
+    pub pre_handler: fn(&dyn ProbeArgs),
+    pub post_handler: fn(&dyn ProbeArgs),
+    pub fault_handler: Option<fn(&dyn ProbeArgs)>,
+    pub event_callback: Option<Box<dyn CallBackFunc>>,
+    pub symbol: Option<String>,
+    pub addr: Option<usize>,
+    pub offset: usize,
+    pub enable: bool,
+}
+
+extern "C" {
+    fn addr_from_symbol(symbol: *const u8) -> usize;
+}
+
+impl TryFrom<KprobeInfo> for KprobeBuilder {
+    type Error = SystemError;
+    fn try_from(kprobe_info: KprobeInfo) -> Result<Self, Self::Error> {
+        // 检查参数: symbol和addr必须有一个但不能同时有
+        if kprobe_info.symbol.is_none() && kprobe_info.addr.is_none() {
+            return Err(SystemError::EINVAL);
+        }
+        if kprobe_info.symbol.is_some() && kprobe_info.addr.is_some() {
+            return Err(SystemError::EINVAL);
+        }
+        let func_addr = if let Some(symbol) = kprobe_info.symbol.clone() {
+            let mut symbol_sting = symbol;
+            if !symbol_sting.ends_with("\0") {
+                symbol_sting.push('\0');
+            }
+            let symbol = symbol_sting.as_ptr();
+            let func_addr = unsafe { addr_from_symbol(symbol) };
+            if func_addr == 0 {
+                warn!(
+                    "register_kprobe: the symbol: {:?} not found",
+                    kprobe_info.symbol
+                );
+                return Err(SystemError::ENXIO);
+            }
+            func_addr
+        } else {
+            kprobe_info.addr.unwrap()
+        };
+        let mut builder = KprobeBuilder::new(
+            kprobe_info.symbol,
+            func_addr,
+            kprobe_info.offset,
+            kprobe_info.pre_handler,
+            kprobe_info.post_handler,
+            kprobe_info.enable,
+        );
+        if let Some(fault_handler) = kprobe_info.fault_handler {
+            builder = builder.with_fault_handler(fault_handler);
+        }
+        if let Some(event_callback) = kprobe_info.event_callback {
+            builder = builder.with_event_callback(event_callback);
+        }
+        Ok(builder)
+    }
+}
--- a/kernel/src/debug/kprobe/mod.rs
+++ b/kernel/src/debug/kprobe/mod.rs
@ -0,0 +1,183 @@
+use crate::debug::kprobe::args::KprobeInfo;
+use crate::libs::rwlock::RwLock;
+use crate::libs::spinlock::SpinLock;
+use alloc::collections::BTreeMap;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use kprobe::{Kprobe, KprobeBuilder, KprobeOps, KprobePoint};
+use system_error::SystemError;
+
+pub mod args;
+#[cfg(feature = "kprobe_test")]
+mod test;
+
+pub type LockKprobe = Arc<RwLock<Kprobe>>;
+pub static KPROBE_MANAGER: SpinLock<KprobeManager> = SpinLock::new(KprobeManager::new());
+static KPROBE_POINT_LIST: SpinLock<BTreeMap<usize, Arc<KprobePoint>>> =
+    SpinLock::new(BTreeMap::new());
+
+/// 管理所有的kprobe探测点
+#[derive(Debug, Default)]
+pub struct KprobeManager {
+    break_list: BTreeMap<usize, Vec<LockKprobe>>,
+    debug_list: BTreeMap<usize, Vec<LockKprobe>>,
+}
+
+impl KprobeManager {
+    pub const fn new() -> Self {
+        KprobeManager {
+            break_list: BTreeMap::new(),
+            debug_list: BTreeMap::new(),
+        }
+    }
+    /// # 插入一个kprobe
+    ///
+    /// ## 参数
+    /// - `kprobe`: kprobe的实例
+    pub fn insert_kprobe(&mut self, kprobe: LockKprobe) {
+        let probe_point = kprobe.read().probe_point().clone();
+        self.insert_break_point(probe_point.break_address(), kprobe.clone());
+        self.insert_debug_point(probe_point.debug_address(), kprobe);
+    }
+
+    /// # 向break_list中插入一个kprobe
+    ///
+    /// ## 参数
+    /// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
+    /// - `kprobe`: kprobe的实例
+    fn insert_break_point(&mut self, address: usize, kprobe: LockKprobe) {
+        let list = self.break_list.entry(address).or_default();
+        list.push(kprobe);
+    }
+
+    /// # 向debug_list中插入一个kprobe
+    ///
+    /// ## 参数
+    /// - `address`: kprobe的单步执行地址，由`KprobePoint::debug_address()`返回
+    /// - `kprobe`: kprobe的实例
+    fn insert_debug_point(&mut self, address: usize, kprobe: LockKprobe) {
+        let list = self.debug_list.entry(address).or_default();
+        list.push(kprobe);
+    }
+
+    pub fn get_break_list(&self, address: usize) -> Option<&Vec<LockKprobe>> {
+        self.break_list.get(&address)
+    }
+
+    pub fn get_debug_list(&self, address: usize) -> Option<&Vec<LockKprobe>> {
+        self.debug_list.get(&address)
+    }
+
+    /// # 返回一个地址上注册的kprobe数量
+    ///
+    /// ## 参数
+    /// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
+    pub fn kprobe_num(&self, address: usize) -> usize {
+        self.break_list_len(address)
+    }
+
+    #[inline]
+    fn break_list_len(&self, address: usize) -> usize {
+        self.break_list
+            .get(&address)
+            .map(|list| list.len())
+            .unwrap_or(0)
+    }
+    #[inline]
+    fn debug_list_len(&self, address: usize) -> usize {
+        self.debug_list
+            .get(&address)
+            .map(|list| list.len())
+            .unwrap_or(0)
+    }
+
+    /// # 移除一个kprobe
+    ///
+    /// ## 参数
+    /// - `kprobe`: kprobe的实例
+    pub fn remove_kprobe(&mut self, kprobe: &LockKprobe) {
+        let probe_point = kprobe.read().probe_point().clone();
+        self.remove_one_break(probe_point.break_address(), kprobe);
+        self.remove_one_debug(probe_point.debug_address(), kprobe);
+    }
+
+    /// # 从break_list中移除一个kprobe
+    ///
+    /// 如果没有其他kprobe注册在这个地址上，则删除列表
+    ///
+    /// ## 参数
+    /// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
+    /// - `kprobe`: kprobe的实例
+    fn remove_one_break(&mut self, address: usize, kprobe: &LockKprobe) {
+        if let Some(list) = self.break_list.get_mut(&address) {
+            list.retain(|x| !Arc::ptr_eq(x, kprobe));
+        }
+        if self.break_list_len(address) == 0 {
+            self.break_list.remove(&address);
+        }
+    }
+
+    /// # 从debug_list中移除一个kprobe
+    ///
+    /// 如果没有其他kprobe注册在这个地址上，则删除列表
+    ///
+    /// ## 参数
+    /// - `address`: kprobe的单步执行地址，由`KprobePoint::debug_address()`返回
+    /// - `kprobe`: kprobe的实例
+    fn remove_one_debug(&mut self, address: usize, kprobe: &LockKprobe) {
+        if let Some(list) = self.debug_list.get_mut(&address) {
+            list.retain(|x| !Arc::ptr_eq(x, kprobe));
+        }
+        if self.debug_list_len(address) == 0 {
+            self.debug_list.remove(&address);
+        }
+    }
+}
+
+#[cfg(feature = "kprobe_test")]
+#[allow(unused)]
+/// This function is only used for testing kprobe
+pub fn kprobe_test() {
+    test::kprobe_test();
+}
+
+/// # 注册一个kprobe
+///
+/// 该函数会根据`symbol`查找对应的函数地址，如果找不到则返回错误。
+///
+/// ## 参数
+/// - `kprobe_info`: kprobe的信息
+pub fn register_kprobe(kprobe_info: KprobeInfo) -> Result<LockKprobe, SystemError> {
+    let kprobe_builder = KprobeBuilder::try_from(kprobe_info)?;
+    let address = kprobe_builder.probe_addr();
+    let existed_point = KPROBE_POINT_LIST.lock().get(&address).map(Clone::clone);
+    let kprobe = match existed_point {
+        Some(existed_point) => {
+            kprobe_builder
+                .with_probe_point(existed_point.clone())
+                .install()
+                .0
+        }
+        None => {
+            let (kprobe, probe_point) = kprobe_builder.install();
+            KPROBE_POINT_LIST.lock().insert(address, probe_point);
+            kprobe
+        }
+    };
+    let kprobe = Arc::new(RwLock::new(kprobe));
+    KPROBE_MANAGER.lock().insert_kprobe(kprobe.clone());
+    Ok(kprobe)
+}
+
+/// # 注销一个kprobe
+///
+/// ## 参数
+/// - `kprobe`: 已安装的kprobe
+pub fn unregister_kprobe(kprobe: LockKprobe) {
+    let kprobe_addr = kprobe.read().probe_point().break_address();
+    KPROBE_MANAGER.lock().remove_kprobe(&kprobe);
+    // 如果没有其他kprobe注册在这个地址上，则删除探测点
+    if KPROBE_MANAGER.lock().kprobe_num(kprobe_addr) == 0 {
+        KPROBE_POINT_LIST.lock().remove(&kprobe_addr);
+    }
+}
--- a/kernel/src/debug/kprobe/test.rs
+++ b/kernel/src/debug/kprobe/test.rs
@ -0,0 +1,84 @@
+use crate::arch::interrupt::TrapFrame;
+use crate::debug::kprobe::{register_kprobe, unregister_kprobe, KprobeInfo};
+use alloc::string::ToString;
+use kprobe::ProbeArgs;
+use log::info;
+
+#[inline(never)]
+fn detect_func(x: usize, y: usize) -> usize {
+    let hart = 0;
+    info!("detect_func: hart_id: {}, x: {}, y:{}", hart, x, y);
+    hart
+}
+
+fn pre_handler(regs: &dyn ProbeArgs) {
+    let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
+    info!(
+        "call pre_handler, the sp is {:#x}",
+        pt_regs as *const _ as usize
+    );
+}
+
+fn post_handler(regs: &dyn ProbeArgs) {
+    let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
+    info!(
+        "call post_handler, the sp is {:#x}",
+        pt_regs as *const _ as usize
+    );
+}
+
+fn fault_handler(regs: &dyn ProbeArgs) {
+    let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
+    info!(
+        "call fault_handler, the sp is {:#x}",
+        pt_regs as *const _ as usize
+    );
+}
+
+pub fn kprobe_test() {
+    info!("kprobe test for [detect_func]: {:#x}", detect_func as usize);
+    let kprobe_info = KprobeInfo {
+        pre_handler,
+        post_handler,
+        fault_handler: Some(fault_handler),
+        event_callback: None,
+        symbol: None,
+        addr: Some(detect_func as usize),
+        offset: 0,
+        enable: true,
+    };
+    let kprobe = register_kprobe(kprobe_info).unwrap();
+
+    let new_pre_handler = |regs: &dyn ProbeArgs| {
+        let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
+        info!(
+            "call new pre_handler, the sp is {:#x}",
+            pt_regs as *const _ as usize
+        );
+    };
+
+    let kprobe_info = KprobeInfo {
+        pre_handler: new_pre_handler,
+        post_handler,
+        fault_handler: Some(fault_handler),
+        event_callback: None,
+        symbol: Some("dragonos_kernel::debug::kprobe::test::detect_func".to_string()),
+        addr: None,
+        offset: 0,
+        enable: true,
+    };
+    let kprobe2 = register_kprobe(kprobe_info).unwrap();
+    info!(
+        "install 2 kprobes at [detect_func]: {:#x}",
+        detect_func as usize
+    );
+    detect_func(1, 2);
+    unregister_kprobe(kprobe);
+    unregister_kprobe(kprobe2);
+    info!(
+        "uninstall 2 kprobes at [detect_func]: {:#x}",
+        detect_func as usize
+    );
+    detect_func(1, 2);
+    info!("kprobe test end");
+}
--- a/kernel/src/debug/mod.rs
+++ b/kernel/src/debug/mod.rs
@ -1 +1,2 @@
 pub mod klog;
+pub mod kprobe;
--- a/kernel/src/debug/traceback/traceback.c
+++ b/kernel/src/debug/traceback/traceback.c
@ -1,5 +1,6 @@
 #include "traceback.h"
 #include <common/printk.h>
+#include <common/string.h>
 #include <process/process.h>

 int lookup_kallsyms(uint64_t addr, int level)
@ -26,6 +27,18 @@ int lookup_kallsyms(uint64_t addr, int level)
        return -1;
 }

+uint64_t addr_from_symbol(const char *symbol)
+{
+    const char *str = (const char *)&kallsyms_names;
+    for (uint64_t i = 0; i < kallsyms_num; ++i)
+    {
+        if (strcmp(&str[kallsyms_names_index[i]], symbol) == 0)
+            return kallsyms_address[i];
+    }
+    return 0;
+
+}
+
 /**
 * @brief 追溯内核栈调用情况
 *
--- a/kernel/src/debug/traceback/traceback.h
+++ b/kernel/src/debug/traceback/traceback.h
@ -14,4 +14,5 @@ extern const char *kallsyms_names __attribute__((weak));
 *
 * @param regs 内核栈结构体
 */
-void traceback(struct pt_regs *regs);
+void traceback(struct pt_regs *regs);
+uint64_t addr_from_symbol(const char *symbol);
--- a/kernel/src/driver/base/cpu.rs
+++ b/kernel/src/driver/base/cpu.rs
@ -11,8 +11,6 @@ use crate::{
    libs::rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard},
 };

-use system_error::SystemError;
-
 use super::{
    class::Class,
    device::{
@ -24,29 +22,36 @@ use super::{
    kset::KSet,
    subsys::SubSysPrivate,
 };
+use crate::filesystem::sysfs::file::sysfs_emit_str;
+use crate::filesystem::sysfs::{Attribute, AttributeGroup, SysFSOpsSupport};
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::libs::lazy_init::Lazy;
+use system_error::SystemError;

-#[inline(always)]
-pub fn cpu_device_manager() -> &'static CpuDeviceManager {
-    return &CpuDeviceManager;
-}
+static CPU_DEVICE_MANAGER: Lazy<CpuDeviceManager> = Lazy::new();

 #[derive(Debug)]
-pub struct CpuDeviceManager;
+pub struct CpuDeviceManager {
+    _root_device: Arc<CpuSubSystemFakeRootDevice>,
+}

 impl CpuDeviceManager {
    /// 初始化设备驱动模型的CPU子系统
    ///
    /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/drivers/base/cpu.c?fi=get_cpu_device#622
-    pub fn init(&self) -> Result<(), SystemError> {
+    pub fn init() -> Result<(), SystemError> {
        let cpu_subsys = CpuSubSystem::new();
        let root_device = CpuSubSystemFakeRootDevice::new();
        subsystem_manager()
            .subsys_system_register(
                &(cpu_subsys as Arc<dyn Bus>),
-                &(root_device as Arc<dyn Device>),
+                &(root_device.clone() as Arc<dyn Device>),
            )
            .expect("register cpu subsys failed");
-
+        let manager = Self {
+            _root_device: root_device,
+        };
+        CPU_DEVICE_MANAGER.init(manager);
        return Ok(());
    }
 }
@ -190,6 +195,10 @@ impl Device for CpuSubSystemFakeRootDevice {
    fn set_dev_parent(&self, dev_parent: Option<Weak<dyn Device>>) {
        self.inner.write().device_common.parent = dev_parent;
    }
+
+    fn attribute_groups(&self) -> Option<&'static [&'static dyn AttributeGroup]> {
+        Some(&[&AttrGroupCpu])
+    }
 }

 impl KObject for CpuSubSystemFakeRootDevice {
@ -249,3 +258,70 @@ impl KObject for CpuSubSystemFakeRootDevice {
        *self.kobj_state_mut() = state;
    }
 }
+
+#[derive(Debug)]
+pub struct AttrGroupCpu;
+
+impl AttributeGroup for AttrGroupCpu {
+    fn name(&self) -> Option<&str> {
+        None
+    }
+    fn attrs(&self) -> &[&'static dyn Attribute] {
+        &[&AttrCpuPossible, &AttrCpuOnline]
+    }
+    fn is_visible(
+        &self,
+        _kobj: Arc<dyn KObject>,
+        _attr: &'static dyn Attribute,
+    ) -> Option<ModeType> {
+        None
+    }
+}
+
+#[derive(Debug)]
+pub struct AttrCpuPossible;
+
+impl Attribute for AttrCpuPossible {
+    fn name(&self) -> &str {
+        "possible"
+    }
+
+    fn mode(&self) -> ModeType {
+        ModeType::S_IRUGO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let cpu_manager = crate::smp::cpu::smp_cpu_manager();
+        let cpus = cpu_manager.possible_cpus_count();
+        let data = format!("0-{}", cpus - 1);
+        sysfs_emit_str(buf, &data)
+    }
+}
+
+#[derive(Debug)]
+pub struct AttrCpuOnline;
+
+impl Attribute for AttrCpuOnline {
+    fn name(&self) -> &str {
+        "online"
+    }
+
+    fn mode(&self) -> ModeType {
+        ModeType::S_IRUGO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let cpu_manager = crate::smp::cpu::smp_cpu_manager();
+        let cpus = cpu_manager.present_cpus_count();
+        let data = format!("0-{}", cpus - 1);
+        sysfs_emit_str(buf, &data)
+    }
+}
--- a/kernel/src/driver/base/init.rs
+++ b/kernel/src/driver/base/init.rs
@ -3,7 +3,7 @@ use system_error::SystemError;

 use super::{
    class::classes_init,
-    cpu::cpu_device_manager,
+    cpu::CpuDeviceManager,
    device::{bus::buses_init, init::devices_init},
    firmware::firmware_init,
    hypervisor::hypervisor_init,
@ -20,7 +20,7 @@ pub fn driver_init() -> Result<(), SystemError> {
    hypervisor_init()?;
    platform_bus_init()?;
    serio_bus_init()?;
-    cpu_device_manager().init()?;
+    CpuDeviceManager::init()?;

    // 至此，已完成设备驱动模型的初始化
    return Ok(());
--- a/kernel/src/exception/debug.rs
+++ b/kernel/src/exception/debug.rs
@ -0,0 +1,33 @@
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::kprobe::clear_single_step;
+use crate::debug::kprobe::KPROBE_MANAGER;
+use kprobe::{KprobeOps, ProbeArgs};
+use log::debug;
+use system_error::SystemError;
+
+#[derive(Debug)]
+pub struct DebugException;
+
+impl DebugException {
+    pub fn handle(frame: &mut TrapFrame) -> Result<(), SystemError> {
+        Self::post_kprobe_handler(frame)
+    }
+
+    fn post_kprobe_handler(frame: &mut TrapFrame) -> Result<(), SystemError> {
+        let pc = frame.debug_address();
+        if let Some(kprobe_list) = KPROBE_MANAGER.lock().get_debug_list(pc) {
+            for kprobe in kprobe_list {
+                let guard = kprobe.read();
+                if guard.is_enabled() {
+                    guard.call_post_handler(frame);
+                    guard.call_event_callback(frame);
+                }
+            }
+            let return_address = kprobe_list[0].read().probe_point().return_address();
+            clear_single_step(frame, return_address);
+        } else {
+            debug!("There is no kprobe on pc {:#x}", pc);
+        }
+        Ok(())
+    }
+}
--- a/kernel/src/exception/ebreak.rs
+++ b/kernel/src/exception/ebreak.rs
@ -0,0 +1,37 @@
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::kprobe::setup_single_step;
+use crate::debug::kprobe::KPROBE_MANAGER;
+use crate::exception::debug::DebugException;
+use kprobe::{KprobeOps, ProbeArgs};
+use system_error::SystemError;
+
+#[derive(Debug)]
+pub struct EBreak;
+
+impl EBreak {
+    pub fn handle(frame: &mut TrapFrame) -> Result<(), SystemError> {
+        Self::kprobe_handler(frame)
+    }
+    fn kprobe_handler(frame: &mut TrapFrame) -> Result<(), SystemError> {
+        let break_addr = frame.break_address();
+        let guard = KPROBE_MANAGER.lock();
+        let kprobe_list = guard.get_break_list(break_addr);
+        if let Some(kprobe_list) = kprobe_list {
+            for kprobe in kprobe_list {
+                let guard = kprobe.read();
+                if guard.is_enabled() {
+                    guard.call_pre_handler(frame);
+                }
+            }
+            let single_step_address = kprobe_list[0].read().probe_point().single_step_address();
+            // setup_single_step
+            setup_single_step(frame, single_step_address);
+        } else {
+            // For some architectures, they do not support single step execution,
+            // and we need to use breakpoint exceptions to simulate
+            drop(guard);
+            DebugException::handle(frame)?;
+        }
+        Ok(())
+    }
+}
--- a/kernel/src/exception/mod.rs
+++ b/kernel/src/exception/mod.rs
@ -4,7 +4,9 @@ use system_error::SystemError;

 use crate::arch::CurrentIrqArch;

+pub mod debug;
 pub mod dummychip;
+pub mod ebreak;
 pub mod handle;
 pub mod init;
 pub mod ipi;
--- a/kernel/src/filesystem/vfs/file.rs
+++ b/kernel/src/filesystem/vfs/file.rs
@ -11,6 +11,8 @@ use system_error::SystemError;

 use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData};
 use crate::filesystem::eventfd::EventFdInode;
+use crate::libs::lazy_init::Lazy;
+use crate::perf::PerfEventInode;
 use crate::{
    arch::MMArch,
    driver::{
@ -125,7 +127,7 @@ impl FileMode {
 /// 页面缓存
 pub struct PageCache {
    xarray: SpinLock<XArray<Arc<Page>>>,
-    inode: Option<Weak<dyn IndexNode>>,
+    inode: Lazy<Weak<dyn IndexNode>>,
 }

 impl core::fmt::Debug for PageCache {
@ -148,13 +150,19 @@ impl PageCache {
    pub fn new(inode: Option<Weak<dyn IndexNode>>) -> Arc<PageCache> {
        let page_cache = Self {
            xarray: SpinLock::new(XArray::new()),
-            inode,
+            inode: {
+                let v: Lazy<Weak<dyn IndexNode>> = Lazy::new();
+                if let Some(inode) = inode {
+                    v.init(inode);
+                }
+                v
+            },
        };
        Arc::new(page_cache)
    }

    pub fn inode(&self) -> Option<Weak<dyn IndexNode>> {
-        self.inode.clone()
+        self.inode.try_get().cloned()
    }

    pub fn add_page(&self, offset: usize, page: &Arc<Page>) {
@ -176,8 +184,12 @@ impl PageCache {
        cursor.remove();
    }

-    pub fn set_inode(&mut self, inode: Weak<dyn IndexNode>) {
-        self.inode = Some(inode)
+    pub fn set_inode(&self, inode: Weak<dyn IndexNode>) -> Result<(), SystemError> {
+        if self.inode.initialized() {
+            return Err(SystemError::EINVAL);
+        }
+        self.inode.init(inode);
+        Ok(())
    }
 }

@ -603,11 +615,15 @@ impl File {
                inode.inner().lock().remove_epoll(epoll)
            }
            _ => {
+                let inode = self.inode.downcast_ref::<EventFdInode>();
+                if let Some(inode) = inode {
+                    return inode.remove_epoll(epoll);
+                }
                let inode = self
                    .inode
-                    .downcast_ref::<EventFdInode>()
+                    .downcast_ref::<PerfEventInode>()
                    .ok_or(SystemError::ENOSYS)?;
-                inode.remove_epoll(epoll)
+                return inode.remove_epoll(epoll);
            }
        }
    }
@ -745,7 +761,6 @@ impl FileDescriptorVec {

        // 把文件描述符数组对应位置设置为空
        let file = self.fds[fd as usize].take().unwrap();
-
        return Ok(file);
    }

--- a/kernel/src/filesystem/vfs/mod.rs
+++ b/kernel/src/filesystem/vfs/mod.rs
@ -125,6 +125,9 @@ bitflags! {
 }

 pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
+    fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
+        return Err(SystemError::ENOSYS);
+    }
    /// @brief 打开文件
    ///
    /// @return 成功：Ok()
--- a/kernel/src/include/bindings/linux_bpf.rs
+++ b/kernel/src/include/bindings/linux_bpf.rs
--- a/kernel/src/include/bindings/mod.rs
+++ b/kernel/src/include/bindings/mod.rs
@ -1,2 +1,10 @@
-#[allow(clippy::module_inception)]
+#![allow(
+    dead_code,
+    non_camel_case_types,
+    non_snake_case,
+    clippy::all,
+    missing_docs,
+    clippy::module_inception
+)]
 pub mod bindings;
+pub mod linux_bpf;
--- a/kernel/src/init/init.rs
+++ b/kernel/src/init/init.rs
@ -1,5 +1,3 @@
-use log::warn;
-
 use crate::{
    arch::{
        init::{early_setup_arch, setup_arch, setup_arch_post},
@ -30,6 +28,7 @@ use crate::{
        clocksource::clocksource_boot_finish, timekeeping::timekeeping_init, timer::timer_init,
    },
 };
+use log::warn;

 use super::{
    boot::{boot_callback_except_early, boot_callbacks},
@ -89,9 +88,8 @@ fn do_start_kernel() {
    kthread_init();
    setup_arch_post().expect("setup_arch_post failed");
    clocksource_boot_finish();
-
    Futex::init();
-
+    crate::bpf::init_bpf_system();
    #[cfg(all(target_arch = "x86_64", feature = "kvm"))]
    crate::virt::kvm::kvm_init();
 }
--- a/kernel/src/lib.rs
+++ b/kernel/src/lib.rs
@ -21,6 +21,7 @@
 #![feature(slice_ptr_get)]
 #![feature(sync_unsafe_cell)]
 #![feature(vec_into_raw_parts)]
+#![feature(c_variadic)]
 #![cfg_attr(target_os = "none", no_std)]
 #![allow(internal_features)]
 // clippy的配置
@ -46,6 +47,7 @@ mod arch;
 mod libs;
 #[macro_use]
 mod include;
+mod bpf;
 mod debug;
 mod driver; // 如果driver依赖了libs，应该在libs后面导出
 mod exception;
@ -55,12 +57,12 @@ mod ipc;
 mod misc;
 mod mm;
 mod net;
+mod perf;
 mod process;
 mod sched;
 mod smp;
 mod syscall;
 mod time;
-
 #[cfg(target_arch = "x86_64")]
 mod virt;

--- a/kernel/src/mm/fault.rs
+++ b/kernel/src/mm/fault.rs
@ -272,16 +272,16 @@ impl PageFaultHandler {
    /// - VmFaultReason: 页面错误处理信息标志
    pub unsafe fn do_fault(pfm: &mut PageFaultMessage) -> VmFaultReason {
        if !pfm.flags().contains(FaultFlags::FAULT_FLAG_WRITE) {
-            return Self::do_read_fault(pfm);
+            Self::do_read_fault(pfm)
        } else if !pfm
            .vma()
            .lock_irqsave()
            .vm_flags()
            .contains(VmFlags::VM_SHARED)
        {
-            return Self::do_cow_fault(pfm);
+            Self::do_cow_fault(pfm)
        } else {
-            return Self::do_shared_fault(pfm);
+            Self::do_shared_fault(pfm)
        }
    }

--- a/kernel/src/mm/ucontext.rs
+++ b/kernel/src/mm/ucontext.rs
@ -377,7 +377,7 @@ impl InnerAddressSpace {
            PageFrameCount::from_bytes(len).unwrap(),
            prot_flags,
            map_flags,
-            move |page, count, vm_flags, flags, mapper, flusher| {
+            |page, count, vm_flags, flags, mapper, flusher| {
                if allocate_at_once {
                    VMA::zeroed(
                        page,
@ -386,7 +386,7 @@ impl InnerAddressSpace {
                        flags,
                        mapper,
                        flusher,
-                        file,
+                        file.clone(),
                        Some(pgoff),
                    )
                } else {
@ -394,13 +394,17 @@ impl InnerAddressSpace {
                        VirtRegion::new(page.virt_address(), count.data() * MMArch::PAGE_SIZE),
                        vm_flags,
                        flags,
-                        file,
+                        file.clone(),
                        Some(pgoff),
                        false,
                    )))
                }
            },
        )?;
+        // todo!(impl mmap for other file)
+        // https://github.com/DragonOS-Community/DragonOS/pull/912#discussion_r1765334272
+        let file = file.unwrap();
+        let _ = file.inode().mmap(start_vaddr.data(), len, offset);
        return Ok(start_page);
    }

--- a/kernel/src/perf/bpf.rs
+++ b/kernel/src/perf/bpf.rs
@ -0,0 +1,333 @@
+use super::{PerfEventOps, Result};
+use crate::arch::mm::LockedFrameAllocator;
+use crate::arch::MMArch;
+use crate::filesystem::vfs::file::PageCache;
+use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
+use crate::include::bindings::linux_bpf::{
+    perf_event_header, perf_event_mmap_page, perf_event_type,
+};
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
+use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
+use crate::mm::page::{page_manager_lock_irqsave, Page};
+use crate::mm::{MemoryManagementArch, PhysAddr};
+use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
+use alloc::string::String;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::any::Any;
+use core::fmt::Debug;
+use system_error::SystemError;
+const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
+#[derive(Debug)]
+pub struct BpfPerfEvent {
+    _args: PerfProbeArgs,
+    data: SpinLock<BpfPerfEventData>,
+}
+
+#[derive(Debug)]
+pub struct BpfPerfEventData {
+    enabled: bool,
+    mmap_page: RingPage,
+    page_cache: Arc<PageCache>,
+    offset: usize,
+}
+
+#[derive(Debug)]
+pub struct RingPage {
+    size: usize,
+    ptr: usize,
+    data_region_size: usize,
+    lost: usize,
+    phys_addr: PhysAddr,
+}
+
+impl RingPage {
+    pub fn empty() -> Self {
+        RingPage {
+            ptr: 0,
+            size: 0,
+            data_region_size: 0,
+            lost: 0,
+            phys_addr: PhysAddr::new(0),
+        }
+    }
+
+    pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
+        Self::init(start as _, len, phys_addr)
+    }
+
+    fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
+        assert_eq!(size % PAGE_SIZE, 0);
+        assert!(size / PAGE_SIZE >= 2);
+        // The first page will be filled with perf_event_mmap_page
+        unsafe {
+            let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
+            perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
+            perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
+            // user will read sample or lost record from data_tail
+            perf_event_mmap_page.data_tail = 0;
+            // kernel will write sample or lost record from data_head
+            perf_event_mmap_page.data_head = 0;
+            // It is a ring buffer.
+        }
+        RingPage {
+            ptr: ptr as usize,
+            size,
+            data_region_size: size - PAGE_SIZE,
+            lost: 0,
+            phys_addr,
+        }
+    }
+
+    fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
+        if (data_head + 1) % self.data_region_size == data_tail {
+            // The buffer is full
+            return false;
+        }
+        let capacity = if data_head >= data_tail {
+            self.data_region_size - data_head + data_tail
+        } else {
+            data_tail - data_head
+        };
+        data_size <= capacity
+    }
+
+    pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
+        let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
+        let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
+        // data_tail..data_head is the region that can be written
+        // check if there is enough space to write the event
+        let sample_size = PerfSample::calculate_size(data.len());
+
+        let can_write_sample =
+            self.can_write(sample_size, *data_tail as usize, *data_head as usize);
+        // log::error!(
+        //     "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
+        //     can_write_sample,
+        //     *data_tail,
+        //     *data_head,
+        //     data.len(),
+        //     self.data_region_size
+        // );
+        if !can_write_sample {
+            //we need record it to the lost record
+            self.lost += 1;
+            // log::error!(
+            //     "Lost record: {}, data_tail: {}, data_head: {}",
+            //     self.lost,
+            //     *data_tail,
+            //     *data_head
+            // );
+            Ok(())
+        } else {
+            // we can write the sample to the page
+            // If the lost record is not zero, we need to write the lost record first.
+            let can_write_lost_record = self.can_write(
+                size_of::<LostSamples>(),
+                *data_tail as usize,
+                *data_head as usize,
+            );
+            if self.lost > 0 && can_write_lost_record {
+                let new_data_head = self.write_lost(*data_head as usize)?;
+                *data_head = new_data_head as u64;
+                // log::info!(
+                //     "Write lost record: {}, data_tail: {}, new_data_head: {}",
+                //     self.lost,
+                //     *data_tail,
+                //     *data_head
+                // );
+                self.lost = 0;
+                self.write_event(data)
+            } else {
+                let new_data_head = self.write_sample(data, *data_head as usize)?;
+                *data_head = new_data_head as u64;
+                // log::info!(
+                //     "Write sample record, data_tail: {}, new_data_head: {}",
+                //     *data_tail,
+                //     *data_head
+                // );
+                Ok(())
+            }
+        }
+    }
+
+    /// Write any data to the page.
+    ///
+    /// Return the new data_head
+    fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
+        let data_region_len = self.data_region_size;
+        let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
+        let data_len = data.len();
+        let end = (data_head + data_len) % data_region_len;
+        let start = data_head;
+        if start < end {
+            data_region[start..end].copy_from_slice(data);
+        } else {
+            let first_len = data_region_len - start;
+            data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
+            data_region[0..end].copy_from_slice(&data[first_len..]);
+        }
+        Ok(end)
+    }
+
+    /// Write a sample to the page.
+    fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
+        let perf_sample = PerfSample {
+            s_hdr: SampleHeader {
+                header: perf_event_header {
+                    type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
+                    misc: 0,
+                    size: size_of::<SampleHeader>() as u16 + data.len() as u16,
+                },
+                size: data.len() as u32,
+            },
+            value: data,
+        };
+        let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
+        self.write_any(perf_sample.value, new_head)
+    }
+
+    /// Write a lost record to the page.
+    ///
+    /// Return the new data_head
+    fn write_lost(&mut self, data_head: usize) -> Result<usize> {
+        let lost = LostSamples {
+            header: perf_event_header {
+                type_: perf_event_type::PERF_RECORD_LOST as u32,
+                misc: 0,
+                size: size_of::<LostSamples>() as u16,
+            },
+            id: 0,
+            count: self.lost as u64,
+        };
+        self.write_any(lost.as_bytes(), data_head)
+    }
+
+    pub fn readable(&self) -> bool {
+        let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
+        let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
+        data_tail != data_head
+    }
+    pub fn as_slice(&self) -> &[u8] {
+        unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
+    }
+    pub fn as_mut_slice(&mut self) -> &mut [u8] {
+        unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
+    }
+}
+
+impl BpfPerfEvent {
+    pub fn new(args: PerfProbeArgs) -> Self {
+        BpfPerfEvent {
+            _args: args,
+            data: SpinLock::new(BpfPerfEventData {
+                enabled: false,
+                mmap_page: RingPage::empty(),
+                page_cache: PageCache::new(None),
+                offset: 0,
+            }),
+        }
+    }
+    pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
+        let mut data = self.data.lock();
+        // alloc page frame
+        let (phy_addr, page_count) =
+            unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
+                .ok_or(SystemError::ENOSPC)?;
+        let mut page_manager_guard = page_manager_lock_irqsave();
+        let mut cur_phys = PhysPageFrame::new(phy_addr);
+        for i in 0..page_count.data() {
+            let page = Arc::new(Page::new(true, cur_phys.phys_address()));
+            let paddr = cur_phys.phys_address();
+            page_manager_guard.insert(paddr, &page);
+            data.page_cache.add_page(i, &page);
+            cur_phys = cur_phys.next();
+        }
+        let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
+        // create mmap page
+        let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
+        data.mmap_page = mmap_page;
+        data.offset = offset;
+        Ok(())
+    }
+
+    pub fn write_event(&self, data: &[u8]) -> Result<()> {
+        let mut inner_data = self.data.lock();
+        inner_data.mmap_page.write_event(data)?;
+        Ok(())
+    }
+}
+
+impl Drop for BpfPerfEvent {
+    fn drop(&mut self) {
+        let mut page_manager_guard = page_manager_lock_irqsave();
+        let data = self.data.lock();
+        let phy_addr = data.mmap_page.phys_addr;
+        let len = data.mmap_page.size;
+        let page_count = PageFrameCount::new(len / PAGE_SIZE);
+        let mut cur_phys = PhysPageFrame::new(phy_addr);
+        for _ in 0..page_count.data() {
+            page_manager_guard.remove_page(&cur_phys.phys_address());
+            cur_phys = cur_phys.next();
+        }
+    }
+}
+
+impl IndexNode for BpfPerfEvent {
+    fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
+        self.do_mmap(start, len, offset)
+    }
+
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("PerfEventInode does not support read")
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("PerfEventInode does not support write")
+    }
+
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        panic!("PerfEventInode does not have a filesystem")
+    }
+
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+    fn list(&self) -> Result<Vec<String>> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn page_cache(&self) -> Option<Arc<PageCache>> {
+        Some(self.data.lock().page_cache.clone())
+    }
+}
+
+impl PerfEventOps for BpfPerfEvent {
+    fn enable(&self) -> Result<()> {
+        self.data.lock().enabled = true;
+        Ok(())
+    }
+    fn disable(&self) -> Result<()> {
+        self.data.lock().enabled = false;
+        Ok(())
+    }
+    fn readable(&self) -> bool {
+        self.data.lock().mmap_page.readable()
+    }
+}
+
+pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
+    BpfPerfEvent::new(args)
+}
--- a/kernel/src/perf/kprobe.rs
+++ b/kernel/src/perf/kprobe.rs
@ -0,0 +1,159 @@
+use super::Result;
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::kprobe::KProbeContext;
+use crate::bpf::helper::BPF_HELPER_FUN_SET;
+use crate::bpf::prog::BpfProg;
+use crate::debug::kprobe::args::KprobeInfo;
+use crate::debug::kprobe::{register_kprobe, unregister_kprobe, LockKprobe};
+use crate::filesystem::vfs::file::{File, PageCache};
+use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
+use crate::libs::casting::DowncastArc;
+use crate::libs::spinlock::SpinLockGuard;
+use crate::perf::util::PerfProbeArgs;
+use crate::perf::PerfEventOps;
+use alloc::boxed::Box;
+use alloc::string::String;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::any::Any;
+use core::fmt::Debug;
+use kprobe::{CallBackFunc, ProbeArgs};
+use rbpf::EbpfVmRawOwned;
+use system_error::SystemError;
+#[derive(Debug)]
+pub struct KprobePerfEvent {
+    _args: PerfProbeArgs,
+    kprobe: LockKprobe,
+}
+
+impl Drop for KprobePerfEvent {
+    fn drop(&mut self) {
+        unregister_kprobe(self.kprobe.clone());
+    }
+}
+
+impl KprobePerfEvent {
+    pub fn do_set_bpf_prog(&self, prog_file: Arc<File>) -> Result<()> {
+        let file = prog_file
+            .inode()
+            .downcast_arc::<BpfProg>()
+            .ok_or(SystemError::EINVAL)?;
+        let prog_slice = file.insns();
+        let mut vm =
+            EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|_| SystemError::EINVAL)?;
+        vm.register_helper_set(BPF_HELPER_FUN_SET.get())
+            .map_err(|_| SystemError::EINVAL)?;
+        // create a callback to execute the ebpf prog
+        let callback = Box::new(KprobePerfCallBack::new(file, vm));
+        // update callback for kprobe
+        self.kprobe.write().update_event_callback(callback);
+        Ok(())
+    }
+}
+
+pub struct KprobePerfCallBack {
+    _bpf_prog_file: Arc<BpfProg>,
+    vm: EbpfVmRawOwned,
+}
+
+impl KprobePerfCallBack {
+    fn new(bpf_prog_file: Arc<BpfProg>, vm: EbpfVmRawOwned) -> Self {
+        Self {
+            _bpf_prog_file: bpf_prog_file,
+            vm,
+        }
+    }
+}
+
+impl CallBackFunc for KprobePerfCallBack {
+    fn call(&self, trap_frame: &dyn ProbeArgs) {
+        let trap_frame = trap_frame.as_any().downcast_ref::<TrapFrame>().unwrap();
+        let pt_regs = KProbeContext::from(trap_frame);
+        let probe_context = unsafe {
+            core::slice::from_raw_parts_mut(
+                &pt_regs as *const KProbeContext as *mut u8,
+                size_of::<KProbeContext>(),
+            )
+        };
+        let _res = self
+            .vm
+            .execute_program(probe_context)
+            .map_err(|_| SystemError::EINVAL);
+    }
+}
+
+impl IndexNode for KprobePerfEvent {
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("read_at not implemented for PerfEvent");
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("write_at not implemented for PerfEvent");
+    }
+
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        panic!("fs not implemented for PerfEvent");
+    }
+
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn list(&self) -> Result<Vec<String>> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn page_cache(&self) -> Option<Arc<PageCache>> {
+        None
+    }
+}
+
+impl PerfEventOps for KprobePerfEvent {
+    fn set_bpf_prog(&self, bpf_prog: Arc<File>) -> Result<()> {
+        self.do_set_bpf_prog(bpf_prog)
+    }
+    fn enable(&self) -> Result<()> {
+        self.kprobe.write().enable();
+        Ok(())
+    }
+    fn disable(&self) -> Result<()> {
+        self.kprobe.write().disable();
+        Ok(())
+    }
+
+    fn readable(&self) -> bool {
+        true
+    }
+}
+
+pub fn perf_event_open_kprobe(args: PerfProbeArgs) -> KprobePerfEvent {
+    let symbol = args.name.clone();
+    log::info!("create kprobe for symbol: {symbol}");
+    let kprobe_info = KprobeInfo {
+        pre_handler: |_| {},
+        post_handler: |_| {},
+        fault_handler: None,
+        event_callback: None,
+        symbol: Some(symbol),
+        addr: None,
+        offset: 0,
+        enable: false,
+    };
+    let kprobe = register_kprobe(kprobe_info).expect("create kprobe failed");
+    KprobePerfEvent {
+        _args: args,
+        kprobe,
+    }
+}
--- a/kernel/src/perf/mod.rs
+++ b/kernel/src/perf/mod.rs
@ -0,0 +1,337 @@
+mod bpf;
+mod kprobe;
+mod util;
+
+use crate::filesystem::vfs::file::{File, FileMode, PageCache};
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::filesystem::vfs::{
+    FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
+};
+use crate::include::bindings::linux_bpf::{
+    perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
+};
+use crate::libs::casting::DowncastArc;
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
+use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
+use crate::mm::VmFaultReason;
+use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
+use crate::perf::bpf::BpfPerfEvent;
+use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
+use crate::process::ProcessManager;
+use crate::syscall::user_access::UserBufferReader;
+use crate::syscall::Syscall;
+use alloc::boxed::Box;
+use alloc::collections::LinkedList;
+use alloc::string::String;
+use alloc::sync::{Arc, Weak};
+use alloc::vec::Vec;
+use core::any::Any;
+use core::ffi::c_void;
+use core::fmt::Debug;
+use core::ops::Deref;
+use intertrait::{CastFrom, CastFromSync};
+use log::info;
+use num_traits::FromPrimitive;
+use system_error::SystemError;
+
+type Result<T> = core::result::Result<T, SystemError>;
+
+pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode {
+    /// Set the bpf program for the perf event
+    fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Enable the perf event
+    fn enable(&self) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Disable the perf event
+    fn disable(&self) -> Result<()> {
+        Err(SystemError::ENOSYS)
+    }
+    /// Whether the perf event is readable
+    fn readable(&self) -> bool;
+}
+
+#[derive(Debug)]
+pub struct PerfEventInode {
+    event: Box<dyn PerfEventOps>,
+    epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
+}
+
+impl PerfEventInode {
+    pub fn new(event: Box<dyn PerfEventOps>) -> Self {
+        Self {
+            event,
+            epitems: SpinLock::new(LinkedList::new()),
+        }
+    }
+    pub fn remove_epoll(
+        &self,
+        epoll: &Weak<SpinLock<EventPoll>>,
+    ) -> core::result::Result<(), SystemError> {
+        let is_remove = !self
+            .epitems
+            .lock_irqsave()
+            .extract_if(|x| x.epoll().ptr_eq(epoll))
+            .collect::<Vec<_>>()
+            .is_empty();
+        if is_remove {
+            return Ok(());
+        }
+        Err(SystemError::ENOENT)
+    }
+    fn do_poll(&self) -> Result<usize> {
+        let mut events = EPollEventType::empty();
+        if self.event.readable() {
+            events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
+        }
+        return Ok(events.bits() as usize);
+    }
+    fn epoll_callback(&self) -> Result<()> {
+        let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32);
+        // 唤醒epoll中等待的进程
+        EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))
+    }
+}
+
+impl Deref for PerfEventInode {
+    type Target = Box<dyn PerfEventOps>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.event
+    }
+}
+
+impl IndexNode for PerfEventInode {
+    fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
+        self.event.mmap(start, len, offset)
+    }
+    fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
+        Ok(())
+    }
+    fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
+        Ok(())
+    }
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("read_at not implemented for PerfEvent");
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize> {
+        panic!("write_at not implemented for PerfEvent");
+    }
+
+    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
+        self.do_poll()
+    }
+
+    fn metadata(&self) -> Result<Metadata> {
+        let meta = Metadata {
+            mode: ModeType::from_bits_truncate(0o755),
+            file_type: FileType::File,
+            ..Default::default()
+        };
+        Ok(meta)
+    }
+
+    fn resize(&self, _len: usize) -> Result<()> {
+        Ok(())
+    }
+
+    fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> {
+        let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?;
+        info!("perf_event_ioctl: request: {:?}, arg: {}", req, data);
+        match req {
+            PerfEventIoc::Enable => {
+                self.event.enable()?;
+                Ok(0)
+            }
+            PerfEventIoc::Disable => {
+                self.event.disable()?;
+                Ok(0)
+            }
+            PerfEventIoc::SetBpf => {
+                info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data);
+                let bpf_prog_fd = data;
+                let fd_table = ProcessManager::current_pcb().fd_table();
+                let file = fd_table
+                    .read()
+                    .get_file_by_fd(bpf_prog_fd as _)
+                    .ok_or(SystemError::EBADF)?;
+                self.event.set_bpf_prog(file)?;
+                Ok(0)
+            }
+        }
+    }
+
+    fn kernel_ioctl(
+        &self,
+        arg: Arc<dyn KernelIoctlData>,
+        _data: &FilePrivateData,
+    ) -> core::result::Result<usize, SystemError> {
+        let epitem = arg
+            .arc_any()
+            .downcast::<EPollItem>()
+            .map_err(|_| SystemError::EFAULT)?;
+        self.epitems.lock().push_back(epitem);
+        Ok(0)
+    }
+
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        // panic!("PerfEvent does not have a filesystem")
+        Arc::new(PerfFakeFs)
+    }
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+    fn list(&self) -> Result<Vec<String>> {
+        Err(SystemError::ENOSYS)
+    }
+    fn page_cache(&self) -> Option<Arc<PageCache>> {
+        self.event.page_cache()
+    }
+}
+
+#[derive(Debug)]
+struct PerfFakeFs;
+
+impl FileSystem for PerfFakeFs {
+    fn root_inode(&self) -> Arc<dyn IndexNode> {
+        panic!("PerfFakeFs does not have a root inode")
+    }
+
+    fn info(&self) -> FsInfo {
+        panic!("PerfFakeFs does not have a filesystem info")
+    }
+
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "perf"
+    }
+
+    fn super_block(&self) -> SuperBlock {
+        panic!("PerfFakeFs does not have a super block")
+    }
+    unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
+        let res = PageFaultHandler::filemap_fault(pfm);
+        res
+    }
+    unsafe fn map_pages(
+        &self,
+        pfm: &mut PageFaultMessage,
+        start_pgoff: usize,
+        end_pgoff: usize,
+    ) -> VmFaultReason {
+        PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff)
+    }
+}
+
+impl Syscall {
+    pub fn sys_perf_event_open(
+        attr: *const u8,
+        pid: i32,
+        cpu: i32,
+        group_fd: i32,
+        flags: u32,
+    ) -> Result<usize> {
+        let buf = UserBufferReader::new(
+            attr as *const perf_event_attr,
+            size_of::<perf_event_attr>(),
+            true,
+        )?;
+        let attr = buf.read_one_from_user(0)?;
+        perf_event_open(attr, pid, cpu, group_fd, flags)
+    }
+}
+
+pub fn perf_event_open(
+    attr: &perf_event_attr,
+    pid: i32,
+    cpu: i32,
+    group_fd: i32,
+    flags: u32,
+) -> Result<usize> {
+    let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?;
+    log::info!("perf_event_process: {:#?}", args);
+    let file_mode = if args
+        .flags
+        .contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC)
+    {
+        FileMode::O_RDWR | FileMode::O_CLOEXEC
+    } else {
+        FileMode::O_RDWR
+    };
+
+    let event: Box<dyn PerfEventOps> = match args.type_ {
+        // Kprobe
+        // See /sys/bus/event_source/devices/kprobe/type
+        perf_type_id::PERF_TYPE_MAX => {
+            let kprobe_event = kprobe::perf_event_open_kprobe(args);
+            Box::new(kprobe_event)
+        }
+        perf_type_id::PERF_TYPE_SOFTWARE => {
+            // For bpf prog output
+            assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
+            assert_eq!(
+                args.sample_type,
+                Some(perf_event_sample_format::PERF_SAMPLE_RAW)
+            );
+            let bpf_event = bpf::perf_event_open_bpf(args);
+            Box::new(bpf_event)
+        }
+        _ => {
+            unimplemented!("perf_event_process: unknown type: {:?}", args);
+        }
+    };
+
+    let page_cache = event.page_cache();
+    let perf_event = Arc::new(PerfEventInode::new(event));
+    if let Some(cache) = page_cache {
+        cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?;
+    }
+    let file = File::new(perf_event, file_mode)?;
+    let fd_table = ProcessManager::current_pcb().fd_table();
+    let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
+    Ok(fd)
+}
+
+pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> {
+    let file = get_perf_event_file(fd)?;
+    let bpf_event_file = file.deref().deref();
+    let bpf_event_file = bpf_event_file
+        .deref()
+        .ref_any()
+        .downcast_ref::<BpfPerfEvent>()
+        .ok_or(SystemError::EINVAL)?;
+    bpf_event_file.write_event(data)?;
+    file.epoll_callback()?;
+    Ok(())
+}
+
+fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> {
+    let fd_table = ProcessManager::current_pcb().fd_table();
+    let file = fd_table
+        .read()
+        .get_file_by_fd(fd as _)
+        .ok_or(SystemError::EBADF)?;
+    let event = file
+        .inode()
+        .downcast_arc::<PerfEventInode>()
+        .ok_or(SystemError::EINVAL)?;
+    Ok(event)
+}
--- a/kernel/src/perf/util.rs
+++ b/kernel/src/perf/util.rs
@ -0,0 +1,123 @@
+use crate::include::bindings::linux_bpf::{
+    perf_event_attr, perf_event_header, perf_event_sample_format, perf_sw_ids, perf_type_id,
+};
+use crate::syscall::user_access::check_and_clone_cstr;
+use alloc::string::String;
+use num_traits::FromPrimitive;
+use system_error::SystemError;
+
+bitflags! {
+    pub struct PerfEventOpenFlags: u32 {
+        const PERF_FLAG_FD_NO_GROUP = 1;
+        const PERF_FLAG_FD_OUTPUT = 2;
+        const PERF_FLAG_PID_CGROUP = 4;
+        const PERF_FLAG_FD_CLOEXEC = 8;
+    }
+}
+
+/// The `PerfEventIoc` enum is used to define the ioctl commands for perf events.
+///
+/// See https://elixir.bootlin.com/linux/v6.1/source/include/uapi/linux/perf_event.h#L544
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, FromPrimitive)]
+pub enum PerfEventIoc {
+    /// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_ENABLE].
+    Enable = 9216,
+    /// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_DISABLE].
+    Disable = 9217,
+    /// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_SET_BPF].
+    SetBpf = 1074013192,
+}
+
+#[derive(Debug, Clone)]
+#[allow(unused)]
+/// `perf_event_open` syscall arguments.
+pub struct PerfProbeArgs {
+    pub config: perf_sw_ids,
+    pub name: String,
+    pub offset: u64,
+    pub size: u32,
+    pub type_: perf_type_id,
+    pub pid: i32,
+    pub cpu: i32,
+    pub group_fd: i32,
+    pub flags: PerfEventOpenFlags,
+    pub sample_type: Option<perf_event_sample_format>,
+}
+
+impl PerfProbeArgs {
+    pub fn try_from(
+        attr: &perf_event_attr,
+        pid: i32,
+        cpu: i32,
+        group_fd: i32,
+        flags: u32,
+    ) -> Result<Self, SystemError> {
+        let ty = perf_type_id::from_u32(attr.type_).ok_or(SystemError::EINVAL)?;
+        let config = perf_sw_ids::from_u32(attr.config as u32).ok_or(SystemError::EINVAL)?;
+        let name = if ty == perf_type_id::PERF_TYPE_MAX {
+            let name_ptr = unsafe { attr.__bindgen_anon_3.config1 } as *const u8;
+            let name = check_and_clone_cstr(name_ptr, None)?;
+            name.into_string().map_err(|_| SystemError::EINVAL)?
+        } else {
+            String::new()
+        };
+        let sample_ty = perf_event_sample_format::from_u32(attr.sample_type as u32);
+        let args = PerfProbeArgs {
+            config,
+            name,
+            offset: unsafe { attr.__bindgen_anon_4.config2 },
+            size: attr.size,
+            type_: ty,
+            pid,
+            cpu,
+            group_fd,
+            flags: PerfEventOpenFlags::from_bits_truncate(flags),
+            sample_type: sample_ty,
+        };
+        Ok(args)
+    }
+}
+
+/// The event type in our particular use case will be `PERF_RECORD_SAMPLE` or `PERF_RECORD_LOST`.
+/// `PERF_RECORD_SAMPLE` indicating that there is an actual sample after this header.
+/// And `PERF_RECORD_LOST` indicating that there is a record lost header following the perf event header.
+#[repr(C)]
+#[derive(Debug)]
+pub struct LostSamples {
+    pub header: perf_event_header,
+    pub id: u64,
+    pub count: u64,
+}
+
+impl LostSamples {
+    pub fn as_bytes(&self) -> &[u8] {
+        unsafe { core::slice::from_raw_parts(self as *const Self as *const u8, size_of::<Self>()) }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct SampleHeader {
+    pub header: perf_event_header,
+    pub size: u32,
+}
+
+impl SampleHeader {
+    pub fn as_bytes(&self) -> &[u8] {
+        unsafe { core::slice::from_raw_parts(self as *const Self as *const u8, size_of::<Self>()) }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct PerfSample<'a> {
+    pub s_hdr: SampleHeader,
+    pub value: &'a [u8],
+}
+
+impl<'a> PerfSample<'a> {
+    pub fn calculate_size(value_size: usize) -> usize {
+        size_of::<SampleHeader>() + value_size
+    }
+}
--- a/kernel/src/smp/cpu/mod.rs
+++ b/kernel/src/smp/cpu/mod.rs
@ -134,7 +134,6 @@ impl SmpCpuManager {
        &self.possible_cpus
    }

-    #[allow(dead_code)]
    pub fn possible_cpus_count(&self) -> u32 {
        self.possible_cnt.load(core::sync::atomic::Ordering::SeqCst)
    }
--- a/kernel/src/syscall/mod.rs
+++ b/kernel/src/syscall/mod.rs
@ -1159,6 +1159,20 @@ impl Syscall {
                let flags = args[1] as u32;
                Self::sys_eventfd(initval, flags)
            }
+            SYS_BPF => {
+                let cmd = args[0] as u32;
+                let attr = args[1] as *mut u8;
+                let size = args[2] as u32;
+                Self::sys_bpf(cmd, attr, size)
+            }
+            SYS_PERF_EVENT_OPEN => {
+                let attr = args[0] as *const u8;
+                let pid = args[1] as i32;
+                let cpu = args[2] as i32;
+                let group_fd = args[3] as i32;
+                let flags = args[4] as u32;
+                Self::sys_perf_event_open(attr, pid, cpu, group_fd, flags)
+            }
            _ => panic!("Unsupported syscall ID: {}", syscall_num),
        };

--- a/kernel/src/time/syscall.rs
+++ b/kernel/src/time/syscall.rs
@ -2,8 +2,6 @@ use core::{
    ffi::{c_int, c_longlong},
    time::Duration,
 };
-
-use log::warn;
 use num_traits::FromPrimitive;
 use system_error::SystemError;

@ -139,7 +137,7 @@ impl Syscall {
    pub fn clock_gettime(clock_id: c_int, tp: *mut PosixTimeSpec) -> Result<usize, SystemError> {
        let clock_id = PosixClockID::try_from(clock_id)?;
        if clock_id != PosixClockID::Realtime {
-            warn!("clock_gettime: currently only support Realtime clock, but got {:?}. Defaultly return realtime!!!\n", clock_id);
+            // warn!("clock_gettime: currently only support Realtime clock, but got {:?}. Defaultly return realtime!!!\n", clock_id);
        }
        if tp.is_null() {
            return Err(SystemError::EFAULT);