feat(ebpf): support Aya framework. (#1070)

* feat(ebpf): support Aya framework.

1. fix the rbpf bug
2. use new Aya template
3. add kprobe related device files and attributes to sysfs

---
Signed-off-by: chenlinfeng <chenlinfeng25@outlook.com>
This commit is contained in:
linfeng
2024-12-07 16:41:37 +08:00
committed by GitHub
parent c09af909c5
commit 72423f90bb
53 changed files with 849 additions and 511 deletions

View File

@ -660,9 +660,9 @@ pub fn execute_program(
// Save the callee saved registers
pre_stack.save_registers(&reg[6..=9]);
// Save the return address
pre_stack.save_return_address(insn_ptr as u16);
pre_stack.save_return_address(insn_ptr as u64);
// save the stack pointer
pre_stack.save_sp(reg[10] as u16);
pre_stack.save_sp(reg[10]);
let mut stack = StackFrame::new();
log::trace!("BPF TO BPF CALL: new pc: {} + {} = {}",insn_ptr ,insn.imm,insn_ptr + insn.imm as usize);
reg[10] = stack.as_ptr() as u64 + stack.len() as u64;
@ -695,7 +695,7 @@ pub fn execute_program(
// Restore the return address
insn_ptr = stack.get_return_address() as usize;
// Restore the stack pointer
reg[10] = stack.get_sp() as u64;
reg[10] = stack.get_sp();
log::trace!("EXIT: new pc: {}", insn_ptr);
}
}

View File

@ -1,9 +1,9 @@
use crate::{ebpf::STACK_SIZE, vec, Vec};
pub struct StackFrame {
return_address: u16,
return_address: u64,
saved_registers: [u64; 4],
sp: u16,
sp: u64,
frame: Vec<u8>,
}
@ -54,22 +54,22 @@ impl StackFrame {
}
/// Save the return address
pub fn save_return_address(&mut self, address: u16) {
pub fn save_return_address(&mut self, address: u64) {
self.return_address = address;
}
/// Get the return address
pub fn get_return_address(&self) -> u16 {
pub fn get_return_address(&self) -> u64 {
self.return_address
}
/// Save the stack pointer
pub fn save_sp(&mut self, sp: u16) {
pub fn save_sp(&mut self, sp: u64) {
self.sp = sp;
}
/// Get the stack pointer
pub fn get_sp(&self) -> u16 {
pub fn get_sp(&self) -> u64 {
self.sp
}
}

View File

@ -267,9 +267,10 @@ impl X86_64MMArch {
});
} else {
log::error!(
"No mapped vma, error_code: {:#b}, address: {:#x}",
"No mapped vma, error_code: {:#b}, address: {:#x}, flags: {:?}",
error_code,
address.data(),
flags
);
let pid = ProcessManager::current_pid();
let mut info =

View File

@ -1,6 +1,7 @@
pub const HELPER_MAP_LOOKUP_ELEM: u32 = 1;
pub const HELPER_MAP_UPDATE_ELEM: u32 = 2;
pub const HELPER_MAP_DELETE_ELEM: u32 = 3;
pub const HELPER_KTIME_GET_NS: u32 = 5;
pub const HELPER_MAP_FOR_EACH_ELEM: u32 = 164;
pub const HELPER_MAP_LOOKUP_PERCPU_ELEM: u32 = 195;
pub const HELPER_PERF_EVENT_OUTPUT: u32 = 25;

View File

@ -6,6 +6,7 @@ use crate::bpf::map::{BpfCallBackFn, BpfMap};
use crate::include::bindings::linux_bpf::BPF_F_CURRENT_CPU;
use crate::libs::lazy_init::Lazy;
use crate::smp::core::smp_get_processor_id;
use crate::time::Instant;
use alloc::{collections::BTreeMap, sync::Arc};
use core::ffi::c_void;
use system_error::SystemError;
@ -300,6 +301,10 @@ pub fn map_peek_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
value
}
pub fn bpf_ktime_get_ns() -> u64 {
(Instant::now().total_micros() * 1000) as u64
}
pub static BPF_HELPER_FUN_SET: Lazy<BTreeMap<u32, RawBPFHelperFn>> = Lazy::new();
/// Initialize the helper functions.
@ -311,6 +316,7 @@ pub fn init_helper_functions() {
map.insert(HELPER_MAP_LOOKUP_ELEM, define_func!(raw_map_lookup_elem));
map.insert(HELPER_MAP_UPDATE_ELEM, define_func!(raw_map_update_elem));
map.insert(HELPER_MAP_DELETE_ELEM, define_func!(raw_map_delete_elem));
map.insert(HELPER_KTIME_GET_NS, define_func!(bpf_ktime_get_ns));
map.insert(
HELPER_MAP_FOR_EACH_ELEM,
define_func!(raw_map_for_each_elem),

View File

@ -33,7 +33,7 @@ pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result<usize> {
// Program related commands
bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr),
// Object creation commands
bpf_cmd::BPF_BTF_LOAD => {
bpf_cmd::BPF_BTF_LOAD | bpf_cmd::BPF_LINK_CREATE | bpf_cmd::BPF_OBJ_GET_INFO_BY_FD => {
error!("bpf cmd {:?} not implemented", cmd);
return Err(SystemError::ENOSYS);
}

View File

@ -310,6 +310,7 @@ pub enum DeviceType {
PlatformDev,
Char,
Pci,
Other,
}
/// @brief: 设备标识符类型

View File

@ -21,7 +21,6 @@ pub fn driver_init() -> Result<(), SystemError> {
platform_bus_init()?;
serio_bus_init()?;
CpuDeviceManager::init()?;
// 至此,已完成设备驱动模型的初始化
return Ok(());
}

View File

@ -394,7 +394,31 @@ impl ProcFS {
} else {
panic!("create ksmg error");
}
// 这个文件是用来欺骗Aya框架识别内核版本
/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
* but Ubuntu provides /proc/version_signature file, as described at
* https://ubuntu.com/kernel, with an example contents below, which we
* can use to get a proper LINUX_VERSION_CODE.
*
* Ubuntu 5.4.0-12.15-generic 5.4.8
*
* In the above, 5.4.8 is what kernel is actually expecting, while
* uname() call will return 5.4.0 in info.release.
*/
let binding = inode.create("version_signature", FileType::File, ModeType::S_IRUGO);
if let Ok(version_signature) = binding {
let version_signature = version_signature
.as_any_ref()
.downcast_ref::<LockedProcFSInode>()
.unwrap();
version_signature.0.lock().fdata.ftype = ProcFileType::Default;
version_signature.0.lock().data = "DragonOS 6.0.0-generic 6.0.0\n"
.to_string()
.as_bytes()
.to_vec();
} else {
panic!("create version_signature error");
}
return result;
}
@ -466,6 +490,7 @@ impl IndexNode for LockedProcFSInode {
let file_size = match inode.fdata.ftype {
ProcFileType::ProcStatus => inode.open_status(&mut private_data)?,
ProcFileType::ProcMeminfo => inode.open_meminfo(&mut private_data)?,
ProcFileType::Default => inode.data.len() as i64,
_ => {
todo!()
}

View File

@ -0,0 +1,189 @@
use crate::driver::base::class::Class;
use crate::driver::base::device::bus::Bus;
use crate::driver::base::device::driver::Driver;
use crate::driver::base::device::{Device, DeviceCommonData, DeviceType, IdTable};
use crate::driver::base::kobject::{
KObjType, KObject, KObjectCommonData, KObjectState, LockedKObjectState,
};
use crate::driver::base::kset::KSet;
use crate::filesystem::kernfs::KernFSInode;
use crate::filesystem::sysfs::{Attribute, SysFSOpsSupport};
use crate::filesystem::vfs::syscall::ModeType;
use crate::libs::rwlock::{RwLockReadGuard, RwLockWriteGuard};
use crate::libs::spinlock::{SpinLock, SpinLockGuard};
use alloc::string::{String, ToString};
use alloc::sync::{Arc, Weak};
use core::fmt::Debug;
use system_error::SystemError;
#[derive(Debug)]
#[cast_to([sync] Device)]
pub struct KprobeDevice {
inner: SpinLock<InnerKprobeDevice>,
kobj_state: LockedKObjectState,
name: String,
}
#[derive(Debug)]
struct InnerKprobeDevice {
kobject_common: KObjectCommonData,
device_common: DeviceCommonData,
}
impl KprobeDevice {
pub fn new(parent: Option<Weak<dyn KObject>>) -> Arc<Self> {
let bus_device = Self {
inner: SpinLock::new(InnerKprobeDevice {
kobject_common: KObjectCommonData::default(),
device_common: DeviceCommonData::default(),
}),
kobj_state: LockedKObjectState::new(None),
name: "kprobe".to_string(),
};
bus_device.set_parent(parent);
return Arc::new(bus_device);
}
fn inner(&self) -> SpinLockGuard<InnerKprobeDevice> {
self.inner.lock()
}
}
impl KObject for KprobeDevice {
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn set_inode(&self, inode: Option<Arc<KernFSInode>>) {
self.inner().kobject_common.kern_inode = inode;
}
fn inode(&self) -> Option<Arc<KernFSInode>> {
self.inner().kobject_common.kern_inode.clone()
}
fn parent(&self) -> Option<Weak<dyn KObject>> {
self.inner().kobject_common.parent.clone()
}
fn set_parent(&self, parent: Option<Weak<dyn KObject>>) {
self.inner().kobject_common.parent = parent;
}
fn kset(&self) -> Option<Arc<KSet>> {
self.inner().kobject_common.kset.clone()
}
fn set_kset(&self, kset: Option<Arc<KSet>>) {
self.inner().kobject_common.kset = kset;
}
fn kobj_type(&self) -> Option<&'static dyn KObjType> {
self.inner().kobject_common.kobj_type
}
fn set_kobj_type(&self, ktype: Option<&'static dyn KObjType>) {
self.inner().kobject_common.kobj_type = ktype;
}
fn name(&self) -> String {
self.name.clone()
}
fn set_name(&self, _name: String) {}
fn kobj_state(&self) -> RwLockReadGuard<KObjectState> {
self.kobj_state.read()
}
fn kobj_state_mut(&self) -> RwLockWriteGuard<KObjectState> {
self.kobj_state.write()
}
fn set_kobj_state(&self, state: KObjectState) {
*self.kobj_state.write() = state;
}
}
impl Device for KprobeDevice {
#[inline]
#[allow(dead_code)]
fn dev_type(&self) -> DeviceType {
return DeviceType::Other;
}
#[inline]
fn id_table(&self) -> IdTable {
IdTable::new("kprobe".to_string(), None)
}
fn bus(&self) -> Option<Weak<dyn Bus>> {
self.inner().device_common.bus.clone()
}
fn set_bus(&self, bus: Option<Weak<dyn Bus>>) {
self.inner().device_common.bus = bus;
}
fn set_class(&self, class: Option<Weak<dyn Class>>) {
self.inner().device_common.class = class;
}
fn driver(&self) -> Option<Arc<dyn Driver>> {
self.inner().device_common.driver.clone()?.upgrade()
}
fn set_driver(&self, driver: Option<Weak<dyn Driver>>) {
self.inner().device_common.driver = driver;
}
#[inline]
fn is_dead(&self) -> bool {
false
}
fn can_match(&self) -> bool {
todo!()
}
fn set_can_match(&self, _can_match: bool) {
todo!()
}
fn state_synced(&self) -> bool {
todo!()
}
fn dev_parent(&self) -> Option<Weak<dyn Device>> {
self.inner().device_common.get_parent_weak_or_clear()
}
fn set_dev_parent(&self, dev_parent: Option<Weak<dyn Device>>) {
self.inner().device_common.parent = dev_parent;
}
}
#[derive(Debug)]
pub struct KprobeAttr;
impl Attribute for KprobeAttr {
fn name(&self) -> &str {
"type"
}
fn mode(&self) -> ModeType {
ModeType::S_IRUGO
}
fn support(&self) -> SysFSOpsSupport {
SysFSOpsSupport::ATTR_SHOW
}
fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
if buf.is_empty() {
return Err(SystemError::EINVAL);
}
// perf_type_id::PERF_TYPE_MAX
buf[0] = b'6';
Ok(1)
}
}

View File

@ -0,0 +1,31 @@
use crate::driver::base::device::bus::Bus;
use crate::driver::base::device::{device_manager, device_register, sys_devices_kset, Device};
use crate::driver::base::kobject::KObject;
use crate::init::initcall::INITCALL_DEVICE;
use crate::misc::events::get_event_source_bus;
use crate::misc::events::kprobe::device::{KprobeAttr, KprobeDevice};
use alloc::sync::Arc;
use system_error::SystemError;
use unified_init::macros::unified_init;
pub mod device;
static mut KPROBE_DEVICE: Option<Arc<KprobeDevice>> = None;
#[unified_init(INITCALL_DEVICE)]
pub fn kprobe_subsys_init() -> Result<(), SystemError> {
let kprobe_device = KprobeDevice::new(Some(Arc::downgrade(
&(sys_devices_kset() as Arc<dyn KObject>),
)));
let event_source_bus = get_event_source_bus().ok_or(SystemError::EINVAL)?;
kprobe_device.set_bus(Some(Arc::downgrade(&(event_source_bus as Arc<dyn Bus>))));
// 注册到/sys/devices下
device_register(kprobe_device.clone())?;
unsafe {
KPROBE_DEVICE = Some(kprobe_device.clone());
}
device_manager().create_file(&(kprobe_device as Arc<dyn Device>), &KprobeAttr)?;
Ok(())
}

View File

@ -0,0 +1,28 @@
use crate::driver::base::device::bus::{bus_register, Bus};
use crate::init::initcall::INITCALL_SUBSYS;
use crate::misc::events::subsys::EventSourceBus;
use alloc::sync::Arc;
use system_error::SystemError;
use unified_init::macros::unified_init;
mod kprobe;
mod subsys;
static mut EVENT_SOURCE_BUS: Option<Arc<EventSourceBus>> = None;
fn get_event_source_bus() -> Option<Arc<EventSourceBus>> {
unsafe { EVENT_SOURCE_BUS.clone() }
}
#[unified_init(INITCALL_SUBSYS)]
pub fn init_event_source_bus() -> Result<(), SystemError> {
let event_source_bus = EventSourceBus::new();
let r = bus_register(event_source_bus.clone() as Arc<dyn Bus>);
if r.is_err() {
unsafe { EVENT_SOURCE_BUS = None };
return r;
}
unsafe { EVENT_SOURCE_BUS = Some(event_source_bus.clone()) };
// kprobe::kprobe_subsys_init()?;
Ok(())
}

View File

@ -0,0 +1,52 @@
use crate::driver::base::device::bus::Bus;
use crate::driver::base::device::Device;
use crate::driver::base::subsys::SubSysPrivate;
use alloc::string::{String, ToString};
use alloc::sync::{Arc, Weak};
use system_error::SystemError;
#[derive(Debug)]
pub struct EventSourceBus {
private: SubSysPrivate,
}
impl EventSourceBus {
pub fn new() -> Arc<Self> {
let w: Weak<Self> = Weak::new();
let private = SubSysPrivate::new("event_source".to_string(), Some(w), None, &[]);
let bus = Arc::new(Self { private });
bus.subsystem()
.set_bus(Some(Arc::downgrade(&(bus.clone() as Arc<dyn Bus>))));
return bus;
}
}
impl Bus for EventSourceBus {
fn name(&self) -> String {
"event_source".to_string()
}
fn dev_name(&self) -> String {
self.name()
}
fn root_device(&self) -> Option<Weak<dyn Device>> {
None
}
fn remove(&self, _device: &Arc<dyn Device>) -> Result<(), SystemError> {
todo!()
}
fn shutdown(&self, _device: &Arc<dyn Device>) {
todo!()
}
fn resume(&self, _device: &Arc<dyn Device>) -> Result<(), SystemError> {
todo!()
}
fn subsystem(&self) -> &SubSysPrivate {
&self.private
}
}

View File

@ -1 +1,2 @@
pub mod events;
pub mod ksysfs;

View File

@ -79,54 +79,28 @@ impl RingPage {
}
}
#[inline]
fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
if (data_head + 1) % self.data_region_size == data_tail {
// The buffer is full
return false;
}
let capacity = if data_head >= data_tail {
self.data_region_size - data_head + data_tail
} else {
data_tail - data_head
};
let capacity = self.data_region_size - data_head + data_tail;
data_size <= capacity
}
pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
// data_tail..data_head is the region that can be written
// check if there is enough space to write the event
let sample_size = PerfSample::calculate_size(data.len());
let can_write_sample =
self.can_write(sample_size, *data_tail as usize, *data_head as usize);
// log::error!(
// "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
// can_write_sample,
// *data_tail,
// *data_head,
// data.len(),
// self.data_region_size
// );
if !can_write_sample {
//we need record it to the lost record
self.lost += 1;
// log::error!(
// "Lost record: {}, data_tail: {}, data_head: {}",
// self.lost,
// *data_tail,
// *data_head
// );
Ok(())
} else {
// we can write the sample to the page
// If the lost record is not zero, we need to write the lost record first.
// user lib will update the tail after read the data,but it will not % data_region_size
let perf_header_size = size_of::<perf_event_header>();
let can_write_perf_header =
self.can_write(perf_header_size, *data_tail as usize, *data_head as usize);
if can_write_perf_header {
let can_write_lost_record = self.can_write(
size_of::<LostSamples>(),
*data_tail as usize,
*data_head as usize,
);
// if there is lost record, we need to write the lost record first
if self.lost > 0 && can_write_lost_record {
let new_data_head = self.write_lost(*data_head as usize)?;
*data_head = new_data_head as u64;
@ -137,8 +111,21 @@ impl RingPage {
// *data_head
// );
self.lost = 0;
self.write_event(data)
} else {
// try to write the event again
return self.write_event(data);
}
let sample_size = PerfSample::calculate_size(data.len());
let can_write_sample =
self.can_write(sample_size, *data_tail as usize, *data_head as usize);
// log::error!(
// "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
// can_write_sample,
// *data_tail,
// *data_head,
// data.len(),
// self.data_region_size
// );
if can_write_sample {
let new_data_head = self.write_sample(data, *data_head as usize)?;
*data_head = new_data_head as u64;
// log::info!(
@ -146,20 +133,24 @@ impl RingPage {
// *data_tail,
// *data_head
// );
Ok(())
} else {
self.lost += 1;
}
} else {
self.lost += 1;
}
Ok(())
}
/// Write any data to the page.
///
/// Return the new data_head
fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<()> {
let data_region_len = self.data_region_size;
let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
let data_len = data.len();
let start = data_head % data_region_len;
let end = (data_head + data_len) % data_region_len;
let start = data_head;
if start < end {
data_region[start..end].copy_from_slice(data);
} else {
@ -167,40 +158,57 @@ impl RingPage {
data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
data_region[0..end].copy_from_slice(&data[first_len..]);
}
Ok(end)
Ok(())
}
#[inline]
fn fill_size(&self, data_head_mod: usize) -> usize {
if self.data_region_size - data_head_mod < size_of::<perf_event_header>() {
// The remaining space is not enough to write the perf_event_header
// We need to fill the remaining space with 0
self.data_region_size - data_head_mod
} else {
0
}
}
/// Write a sample to the page.
fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
let sample_size = PerfSample::calculate_size(data.len());
let maybe_end = (data_head + sample_size) % self.data_region_size;
let fill_size = self.fill_size(maybe_end);
let perf_sample = PerfSample {
s_hdr: SampleHeader {
header: perf_event_header {
type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
misc: 0,
size: size_of::<SampleHeader>() as u16 + data.len() as u16,
size: size_of::<SampleHeader>() as u16 + data.len() as u16 + fill_size as u16,
},
size: data.len() as u32,
},
value: data,
};
let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
self.write_any(perf_sample.value, new_head)
self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
self.write_any(perf_sample.value, data_head + size_of::<SampleHeader>())?;
Ok(data_head + sample_size + fill_size)
}
/// Write a lost record to the page.
///
/// Return the new data_head
fn write_lost(&mut self, data_head: usize) -> Result<usize> {
let maybe_end = (data_head + size_of::<LostSamples>()) % self.data_region_size;
let fill_size = self.fill_size(maybe_end);
let lost = LostSamples {
header: perf_event_header {
type_: perf_event_type::PERF_RECORD_LOST as u32,
misc: 0,
size: size_of::<LostSamples>() as u16,
size: size_of::<LostSamples>() as u16 + fill_size as u16,
},
id: 0,
count: self.lost as u64,
};
self.write_any(lost.as_bytes(), data_head)
self.write_any(lost.as_bytes(), data_head)?;
Ok(data_head + size_of::<LostSamples>() + fill_size)
}
pub fn readable(&self) -> bool {

View File

@ -39,8 +39,10 @@ impl KprobePerfEvent {
.downcast_arc::<BpfProg>()
.ok_or(SystemError::EINVAL)?;
let prog_slice = file.insns();
let mut vm =
EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|_| SystemError::EINVAL)?;
let mut vm = EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|e| {
log::error!("create ebpf vm failed: {:?}", e);
SystemError::EINVAL
})?;
vm.register_helper_set(BPF_HELPER_FUN_SET.get())
.map_err(|_| SystemError::EINVAL)?;
// create a callback to execute the ebpf prog
@ -75,10 +77,10 @@ impl CallBackFunc for KprobePerfCallBack {
size_of::<KProbeContext>(),
)
};
let _res = self
.vm
.execute_program(probe_context)
.map_err(|_| SystemError::EINVAL);
let res = self.vm.execute_program(probe_context);
if res.is_err() {
log::error!("kprobe callback error: {:?}", res);
}
}
}

View File

@ -1216,6 +1216,7 @@ impl Syscall {
let flags = args[4] as u32;
Self::sys_perf_event_open(attr, pid, cpu, group_fd, flags)
}
SYS_SETRLIMIT => Ok(0),
_ => panic!("Unsupported syscall ID: {}", syscall_num),
};