feat(ebpf):[WIP] add eBPF support (#948)

* feat(kprobe): Add basic kprobe support for x86_64

* feat: add ebpf support (#912)

- 实现bpf()一部分命令,包括几种基本map,相关的helper函数
- 实现部分perf相关的数据结构
- 暂时为文件实现简单mmap
- 实现一个使用kprobe统计syscall 调用次数的ebpf程序

对eBPF支持程度(基本):

- 简单的eBPF程序(没有指定特殊的Map)
- 使用内核已经实现的Map的eBPF程序
- 可以和kprobe配合使用
- 内核Map相关的接口定义已经实现,添加新的Map较为简单

不支持的功能:
- 区分不同的eBPF程序类型(Network/Cgroup)并限定可调用的helper函数集
- 与内核其它跟踪机制配合(tracepoint)
- 其它helper和Map


todo

- [ ]  修改mmap,需要讨论,因为这个和块缓存层相关
- [x]  添加文档
- [x]  修复可能的错误
- [x] 增加rbpf版本信息

* feat: add /sys/devices/system/cpu/possible file

* feat: add /sys/devices/system/cpu/online
This commit is contained in:
linfeng 2024-10-25 15:59:57 +08:00 committed by GitHub
parent 80c9e8f8f0
commit fae6e9ade4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
126 changed files with 29529 additions and 62 deletions

94
.github/workflows/cache-toolchain.yml vendored Normal file
View File

@ -0,0 +1,94 @@
name: Cache toolchain
on: workflow_call
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Cache DragonOS GCC
id: cache-dragonos-gcc
uses: actions/cache@v3
env:
cache-name: cache-dragonos-gcc
with:
path: |
~/opt
~/.bashrc
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('tools/build_gcc_toolchain.sh') }}
- if: ${{ steps.cache-dragonos-gcc.outputs.cache-hit != 'true' }}
name: build dragonos-gcc
continue-on-error: true
run: |
bash tools/build_gcc_toolchain.sh -f
- uses: ./.github/actions/install-apt-packages
- name: Cache build tools
id: cache-build-tools
uses: actions/cache@v3
env:
cache-name: cache-build-tools
dadk_version: 0.1.11
with:
path: |
~/.cargo
~/.rustup
~/.bashrc
~/opt
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ env.dadk_version }}-${{ hashFiles('.github/workflows/cache-toolchain.yml') }}-${{ hashFiles('tools/install_musl_gcc.sh') }}
- if: ${{ steps.cache-build-tools.outputs.cache-hit != 'true' }}
name: Install toolchain
continue-on-error: false
run: |
USE_GITHUB=1 bash tools/install_musl_gcc.sh
cargo install cargo-binutils
rustup toolchain install nightly-x86_64-unknown-linux-gnu
rustup toolchain install nightly-2024-07-23-x86_64-unknown-linux-gnu
rustup toolchain install nightly-2023-08-15-x86_64-unknown-linux-gnu
rustup component add rust-src --toolchain nightly-2024-07-23-x86_64-unknown-linux-gnu
rustup component add rust-src --toolchain nightly-2023-08-15-x86_64-unknown-linux-gnu
rustup target add x86_64-unknown-none --toolchain nightly-2024-07-23-x86_64-unknown-linux-gnu
rustup target add x86_64-unknown-none --toolchain nightly-2023-08-15-x86_64-unknown-linux-gnu
rustup toolchain install nightly-2024-07-23-riscv64gc-unknown-linux-gnu --force-non-host
rustup toolchain install nightly-2023-08-15-riscv64gc-unknown-linux-gnu --force-non-host
rustup target add riscv64gc-unknown-none-elf --toolchain nightly-2024-07-23-riscv64gc-unknown-linux-gnu
rustup target add riscv64imac-unknown-none-elf --toolchain nightly-2024-07-23-riscv64gc-unknown-linux-gnu
rustup target add riscv64gc-unknown-none-elf --toolchain nightly-2023-08-15-riscv64gc-unknown-linux-gnu
rustup target add riscv64imac-unknown-none-elf --toolchain nightly-2023-08-15-riscv64gc-unknown-linux-gnu
rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
rustup component add rust-src
rustup component add llvm-tools-preview
rustup component add rustfmt
rustup component add rustfmt --toolchain nightly-x86_64-unknown-linux-gnu
rustup component add rustfmt --toolchain nightly-2024-07-23-x86_64-unknown-linux-gnu
rustup component add rustfmt --toolchain nightly-2023-08-15-x86_64-unknown-linux-gnu
rustup component add rustfmt --toolchain nightly-2024-07-23-riscv64gc-unknown-linux-gnu
rustup component add rustfmt --toolchain nightly-2023-08-15-riscv64gc-unknown-linux-gnu
rustup default nightly-2024-07-23
cargo install dadk --version 0.1.11
userapp_musl_toolchain="nightly-2023-08-15-x86_64-unknown-linux-gnu"
rustup toolchain install ${userapp_musl_toolchain}
rustup component add --toolchain ${userapp_musl_toolchain} rust-src
rustup target add --toolchain ${userapp_musl_toolchain} x86_64-unknown-linux-musl
rustup target add x86_64-unknown-linux-musl --toolchain nightly-2024-07-23-x86_64-unknown-linux-gnu
rustup component add rust-src --toolchain nightly-2024-07-23-x86_64-unknown-linux-gnu
cargo install bpf-linker

View File

@ -51,7 +51,7 @@ jobs:
env: env:
ARCH: ${{ matrix.arch }} ARCH: ${{ matrix.arch }}
HOME: /root HOME: /root
run: bash -c "source /root/.cargo/env && cd kernel && make test" run: bash -c "source /root/.cargo/env && cd kernel && make test && make test-rbpf"
build-x86_64: build-x86_64:

View File

@ -31,6 +31,7 @@
kernel/ktest/index kernel/ktest/index
kernel/cpu_arch/index kernel/cpu_arch/index
kernel/libs/index kernel/libs/index
kernel/trace/index
.. toctree:: .. toctree::

324
docs/kernel/trace/eBPF.md Normal file
View File

@ -0,0 +1,324 @@
# eBPF
> 作者: 陈林峰
>
> Email: chenlinfeng25@outlook.com
## 概述
eBPF 是一项革命性的技术,起源于 Linux 内核,它可以在特权上下文中(如操作系统内核)运行沙盒程序。它用于安全有效地扩展内核的功能,而无需通过更改内核源代码或加载内核模块的方式来实现。
从历史上看,由于内核具有监督和控制整个系统的特权,操作系统一直是实现可观测性、安全性和网络功能的理想场所。同时,由于操作系统内核的核心地位和对稳定性和安全性的高要求,操作系统内核很难快速迭代发展。因此在传统意义上,与在操作系统本身之外实现的功能相比,操作系统级别的创新速度要慢一些。
eBPF 从根本上改变了这个方式。通过允许在操作系统中运行沙盒程序的方式,应用程序开发人员可以运行 eBPF 程序,以便在运行时向操作系统添加额外的功能。然后在 JIT 编译器和验证引擎的帮助下,操作系统确保它像本地编译的程序一样具备安全性和执行效率。这引发了一股基于 eBPF 的项目热潮,它们涵盖了广泛的用例,包括下一代网络实现、可观测性和安全功能等领域。
## eBPF In DragonOS
在一个新的OS上添加eBPF的支持需要了解eBPF的运行过程通常eBPF需要用户态工具和内核相关基础设施配合才能发挥其功能。而新的OS通常会兼容Linux上的应用程序这可以进一步简化对用户态工具的移植工作只要内核实现相关的系统调用和功能就可以配合已有的工具完成eBPF的支持。
## eBPF的运行流程
![image-20240909165945192](./ebpf_flow.png)
如图所示eBPF程序的运行过程分为三个主要步骤
1. 源代码->二进制
1. 用户可以使用python/C/Rust编写eBPF程序并使用相关的工具链编译源代码到二进制程序
2. 这个步骤中用户需要合理使用helper函数丰富eBPF程序功能
2. 加载eBPF程序
1. 用户态的工具库会封装内核提供的系统调用接口以简化用户的工作。用户态工具对eBPF程序经过预处理后发出系统调用请求内核加载eBPF程序。
1. 内核首先会对eBPF程序进行验证检查程序的正确性和合法性同时也会对程序做进一步的处理
1. 内核会根据用户请求将eBPF程序附加到内核的挂载点上(kprobe/uprobe/trace_point)
1. 在内核运行期间,当这些挂载点被特定的事件触发, eBPF程序就会被执行
3. 数据交互
1. eBPF程序可以收集内核的信息用户工具可以选择性的获取这些信息
2. eBPF程序可以直接将信息输出到文件中用户工具通过读取和解析文件中的内容拿到信息
3. eBPF程序通过Map在内核和用户态之间共享和交换数据
## 用户态支持
用户态的eBPF工具库有很多比如C的libbpfpython的bcc, Rust的Aya总体来说这些工具的处理流程都大致相同。DragonOS当前支持[Aya](https://github.com/aya-rs/aya)框架编写的eBPF程序以Aya为例用户态的工具的处理过程如下:
1. 提供eBPF使用的helper函数和Map抽象方便实现eBPF程序
2. 处理编译出来的eBPF程序调用系统调用创建Map获得对应的文件描述符
3. 根据需要更新Map的值(.data)
4. 根据重定位信息对eBPF程序的相关指令做修改
5. 根据内核版本对eBPF程序中的bpf to bpf call进行处理
6. 加载eBPF程序到内核中
7. 对系统调用封装提供大量的函数帮助访问eBPF的信息并与内核交互
DragonOS对Aya 库的支持并不完整。通过对Aya库的删减我们实现了一个较小的[tiny-aya](https://github.com/DragonOS-Community/tiny-aya)。为了确保后期对Aya的兼容tiny-aya只对Aya中的核心工具aya做了修改**其中一些函数被禁用因为这些函数的所需的系统调用或者文件在DragonOS中还未实现**。
### Tokio
Aya需要使用异步运行时通过增加一些系统调用和修复一些错误DragonOS现在已经支持基本的tokio运行时。
### 使用Aya创建eBPF程序
与Aya官方提供的[文档](https://aya-rs.dev/book/start/development/)所述只需要根据其流程安装对应的Rust工具链就可以按照模板创建eBPF项目。以当前实现的`syscall_ebf`为例这个程序的功能是统计系统调用的次数并将其存储在一个HashMap中。
```
├── Cargo.toml
├── README.md
├── syscall_ebpf
├── syscall_ebpf-common
├── syscall_ebpf-ebpf
└── xtask
```
在user/app目录中项目结构如上所示
- `syscall_ebpf-ebpf`是 eBPF代码的实现目录其会被编译到字节码
- `syscall_ebpf-common` 是公共库,方便内核和用户态进行信息交互
- `syscall_ebpf` 是用户态程序其负责加载eBPF程序并获取eBPF程序产生的数据
- `xtask` 是一个命令行工具,方便用户编译和运行用户态程序
为了在DragonOS中运行用户态程序暂时还不能直接使用模板创建的项目
1. 这个项目不符合DragonOS对用户程序的项目结构要求当然这可以通过稍加修改完成
2. 因为DragonOS对tokio运行时的支持还不是完整体需要稍微修改一下使用方式
```
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn Error>> {
```
3. 因为对Aya支持不是完整体因此项目依赖的aya和aya-log需要换成tiny-aya中的实现。
```
[dependencies]
aya = { git = "https://github.com/DragonOS-Community/tiny-aya.git" }
aya-log = { git = "https://github.com/DragonOS-Community/tiny-aya.git" }
```
只需要稍加修改就可以利用Aya现有的工具完成eBPF程序的实现。
## 内核态支持
内核态支持主要为三个部分:
1. kprobe实现位于目录`kernel/crates/kprobe`
2. rbpf运行时位于目录`kernel/crates/rbpf`
3. 系统调用支持
4. helper函数支持
### rbpf
由于rbpf之前只是用于运行一些简单的eBPF程序其需要通过一些修改才能运行更复杂的程序。
1. 增加bpf to bpf call 的支持:通过增加新的栈抽象和保存和恢复必要的寄存器数据
2. 关闭内部不必要的内存检查,这通常由内核的验证器完成
3. 增加带所有权的数据结构避免生命周期的限制
### 系统调用
eBPF相关的系统调用都集中在`bpf()`通过参数cmd来进一步区分功能目前对其支持如下:
```rust
pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result<usize> {
let res = match cmd {
// Map related commands
bpf_cmd::BPF_MAP_CREATE => map::bpf_map_create(attr),
bpf_cmd::BPF_MAP_UPDATE_ELEM => map::bpf_map_update_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_ELEM => map::bpf_lookup_elem(attr),
bpf_cmd::BPF_MAP_GET_NEXT_KEY => map::bpf_map_get_next_key(attr),
bpf_cmd::BPF_MAP_DELETE_ELEM => map::bpf_map_delete_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_AND_DELETE_ELEM => map::bpf_map_lookup_and_delete_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_BATCH => map::bpf_map_lookup_batch(attr),
bpf_cmd::BPF_MAP_FREEZE => map::bpf_map_freeze(attr),
// Program related commands
bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr),
// Object creation commands
bpf_cmd::BPF_BTF_LOAD => {
error!("bpf cmd {:?} not implemented", cmd);
return Err(SystemError::ENOSYS);
}
ty => {
unimplemented!("bpf cmd {:?} not implemented", ty)
}
};
res
}
```
其中对创建Map命令会再次细分以确定具体的Map类型目前我们对通用的Map基本添加了支持:
```rust
bpf_map_type::BPF_MAP_TYPE_ARRAY
bpf_map_type::BPF_MAP_TYPE_PERCPU_ARRAY
bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY
bpf_map_type::BPF_MAP_TYPE_HASH
bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH
bpf_map_type::BPF_MAP_TYPE_QUEUE
bpf_map_type::BPF_MAP_TYPE_STACK
bpf_map_type::BPF_MAP_TYPE_LRU_HASH
bpf_map_type::BPF_MAP_TYPE_LRU_PERCPU_HASH
bpf_map_type::BPF_MAP_TYPE_CPUMAP
| bpf_map_type::BPF_MAP_TYPE_DEVMAP
| bpf_map_type::BPF_MAP_TYPE_DEVMAP_HASH => {
error!("bpf map type {:?} not implemented", map_meta.map_type);
Err(SystemError::EINVAL)?
}
```
所有的Map都会实现定义好的接口这个接口参考Linux的实现定义:
```rust
pub trait BpfMapCommonOps: Send + Sync + Debug + CastFromSync {
/// Lookup an element in the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
Err(SystemError::ENOSYS)
}
/// Update an element in the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_update_elem/
fn update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Delete an element from the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_delete_elem/
fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// For each element in map, call callback_fn function with map,
/// callback_ctx and other map-specific parameters.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_for_each_map_elem/
fn for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32> {
Err(SystemError::ENOSYS)
}
/// Look up an element with the given key in the map referred to by the file descriptor fd,
/// and if found, delete the element.
fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// perform a lookup in percpu map for an entry associated to key on cpu.
fn lookup_percpu_elem(&mut self, _key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
Err(SystemError::ENOSYS)
}
/// Get the next key in the map. If key is None, get the first key.
///
/// Called from syscall
fn get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Push an element value in map.
fn push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Pop an element value from map.
fn pop_elem(&mut self, _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Peek an element value from map.
fn peek_elem(&self, _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Freeze the map.
///
/// It's useful for .rodata maps.
fn freeze(&self) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Get the first value pointer.
fn first_value_ptr(&self) -> *const u8 {
panic!("value_ptr not implemented")
}
}
```
联通eBPF和kprobe的系统调用是[`perf_event_open`](https://man7.org/linux/man-pages/man2/perf_event_open.2.html)这个系统调用在Linux中非常复杂因此Dragon中并没有按照Linux进行实现目前只支持其中两个功能:
```rust
match args.type_ {
// Kprobe
// See /sys/bus/event_source/devices/kprobe/type
perf_type_id::PERF_TYPE_MAX => {
let kprobe_event = kprobe::perf_event_open_kprobe(args);
Box::new(kprobe_event)
}
perf_type_id::PERF_TYPE_SOFTWARE => {
// For bpf prog output
assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
assert_eq!(
args.sample_type,
Some(perf_event_sample_format::PERF_SAMPLE_RAW)
);
let bpf_event = bpf::perf_event_open_bpf(args);
Box::new(bpf_event)
}
}
```
- 其中一个`PERF_TYPE_SOFTWARE`是用来创建软件定义的事件,`PERF_COUNT_SW_BPF_OUTPUT` 确保这个事件用来采集bpf的输出。
- `PERF_TYPE_MAX` 通常指示创建kprobe/uprobe事件也就是用户程序使用kprobe的途径之一用户程序可以将eBPF程序绑定在这个事件上
同样的perf不同的事件也实现定义的接口:
```rust
pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom {
fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<()> {
panic!("mmap not implemented for PerfEvent");
}
fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
panic!("set_bpf_prog not implemented for PerfEvent");
}
fn enable(&self) -> Result<()> {
panic!("enable not implemented");
}
fn disable(&self) -> Result<()> {
panic!("disable not implemented");
}
fn readable(&self) -> bool {
panic!("readable not implemented");
}
}
```
这个接口目前并不稳定。
### helper函数支持
用户态工具通过系统调用和内核进行通信完成eBPF数据的设置、交换。在内核中eBPF程序的运行也需要内核的帮助单独的eBPF程序并没有什么太大的用处因此其会调用内核提供的`helper` 函数完成对内核资源的访问。
目前已经支持的大多数`helper` 函数是与Map操作相关:
```rust
/// Initialize the helper functions.
pub fn init_helper_functions() {
let mut map = BTreeMap::new();
unsafe {
// Map helpers::Generic map helpers
map.insert(1, define_func!(raw_map_lookup_elem));
map.insert(2, define_func!(raw_map_update_elem));
map.insert(3, define_func!(raw_map_delete_elem));
map.insert(164, define_func!(raw_map_for_each_elem));
map.insert(195, define_func!(raw_map_lookup_percpu_elem));
// map.insert(93,define_func!(raw_bpf_spin_lock);
// map.insert(94,define_func!(raw_bpf_spin_unlock);
// Map helpers::Perf event array helpers
map.insert(25, define_func!(raw_perf_event_output));
// Probe and trace helpers::Memory helpers
map.insert(4, define_func!(raw_bpf_probe_read));
// Print helpers
map.insert(6, define_func!(trace_printf));
// Map helpers::Queue and stack helpers
map.insert(87, define_func!(raw_map_push_elem));
map.insert(88, define_func!(raw_map_pop_elem));
map.insert(89, define_func!(raw_map_peek_elem));
}
BPF_HELPER_FUN_SET.init(map);
}
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@ -0,0 +1,11 @@
内核跟踪机制
====================================
内核跟踪机制由很多功能构成, 比如kprobe/uprobe/tracepoint/ftrace等 以及用于扩展内核可观测性的eBPF内核当前支持kprobe和eBPF 本章将介绍这两种机制。
.. toctree::
:maxdepth: 1
:caption: 目录
eBPF
kprobe

View File

@ -0,0 +1,57 @@
# kprobe
> 作者: 陈林峰
>
> Email: chenlinfeng25@outlook.com
## 概述
Linux kprobes调试技术是内核开发者们专门为了便于跟踪内核函数执行状态所设计的一种轻量级内核调试技术。利用kprobes技术内核开发人员可以在内核的绝大多数指定函数中动态的插入探测点来收集所需的调试状态信息而基本不影响内核原有的执行流程。
kprobes技术依赖硬件架构相关的支持主要包括CPU的异常处理和单步调试机制前者用于让程序的执行流程陷入到用户注册的回调函数中去而后者则用于单步执行被探测点指令。需要注意的是在一些架构上硬件并不支持单步调试机制这可以通过一些软件模拟的方法解决(比如riscv)。
## kprobe工作流程
<img src="./kprobe_flow.png" style="zoom: 67%;" alt="xxx"/>
1. 注册kprobe后注册的每一个kprobe对应一个kprobe结构体该结构中记录着探测点的位置以及该探测点本来对应的指令。
2. 探测点的位置被替换成了一条异常的指令这样当CPU执行到探测点位置时会陷入到异常态在x86_64上指令是int3如果kprobe经过优化后指令是jmp
3. 当执行到异常指令时系统换检查是否是kprobe 安装的异常如果是就执行kprobe的pre_handler,然后利用CPU提供的单步调试single-step功能设置好相应的寄存器将下一条指令设置为插入点处本来的指令从异常态返回
4. 再次陷入异常态。上一步骤中设置了single-step相关的寄存器所以原指令刚一执行便会再次陷入异常态此时将single-step清除并且执行post_handler然后从异常态安全返回.
5. 当卸载kprobe时探测点原来的指令会被恢复回去。
内核目前对x86和riscv64都进行了支持由于 riscv64 没有单步执行模式,因此我们使用 break 异常来进行模拟,在保存探测点指令时,我们会额外填充一条 break 指令这样就可以使得在riscv64架构上在执行完原指令后会再次触发break陷入异常。
## kprobe的接口
```rust
pub fn register_kprobe(kprobe_info: KprobeInfo) -> Result<LockKprobe, SystemError>;
pub fn unregister_kprobe(kprobe: LockKprobe) -> Result<(), SystemError>;
impl KprobeBasic {
pub fn call_pre_handler(&self, trap_frame: &dyn ProbeArgs)
pub fn call_post_handler(&self, trap_frame: &dyn ProbeArgs)
pub fn call_fault_handler(&self, trap_frame: &dyn ProbeArgs)
pub fn call_event_callback(&self, trap_frame: &dyn ProbeArgs)
pub fn update_event_callback(&mut self, callback: Box<dyn CallBackFunc>)
pub fn disable(&mut self)
pub fn enable(&mut self)
pub fn is_enabled(&self) -> bool
pub fn symbol(&self) -> Option<&str>
}
```
- `call_pre_handler` 在探测点指令被执行前调用用户定义的回调函数
- `call_post_handler` 在单步执行完探测点指令后调用用户定义的回调函数
- `call_fault_handler` 在调用前两种回调函数发生失败时调用
- `call_event_callback` 用于调用eBPF相关的回调函数通常与`call_post_handler` 一样在单步执行探测点指令会调用
- `update_event_callback`用于运行过程中更新回调函数
- `disable``enable` 用于动态关闭kprobe`disable`调用后kprobe被触发时不执行回调函数
- `symbol` 返回探测点的函数名称

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -24,6 +24,8 @@ kvm = []
fatfs = [] fatfs = []
fatfs-secure = ["fatfs"] fatfs-secure = ["fatfs"]
# kprobe
kprobe_test = []
# 运行时依赖项 # 运行时依赖项
[dependencies] [dependencies]
@ -58,8 +60,12 @@ wait_queue_macros = { path = "crates/wait_queue_macros" }
paste = "=1.0.14" paste = "=1.0.14"
slabmalloc = { path = "crates/rust-slabmalloc" } slabmalloc = { path = "crates/rust-slabmalloc" }
log = "0.4.21" log = "0.4.21"
kprobe = { path = "crates/kprobe" }
xarray = "0.1.0"
lru = "0.12.3" lru = "0.12.3"
rbpf = { path = "crates/rbpf" }
printf-compat = { version = "0.1.1", default-features = false }
# target为x86_64时使用下面的依赖 # target为x86_64时使用下面的依赖
[target.'cfg(target_arch = "x86_64")'.dependencies] [target.'cfg(target_arch = "x86_64")'.dependencies]
mini-backtrace = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/mini-backtrace.git", rev = "e0b1d90940" } mini-backtrace = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/mini-backtrace.git", rev = "e0b1d90940" }

View File

@ -43,5 +43,7 @@ endif
test: test:
# 测试内核库 # 测试内核库
RUSTFLAGS="$(RUSTFLAGS)" cargo +nightly-2024-07-23 test --workspace --exclude dragonos_kernel RUSTFLAGS="$(RUSTFLAGS)" cargo +nightly-2024-07-23 test --workspace --exclude dragonos_kernel rbpf
test-rbpf:
cd crates/rbpf && RUSTFLAGS="$(RUSTFLAGS)" cargo +nightly-2024-07-23 test --features=std,user,cranelift

View File

@ -0,0 +1,11 @@
[package]
name = "kprobe"
version = "0.1.0"
edition = "2021"
[dependencies]
log = "0.4.21"
[target.'cfg(target_arch = "x86_64")'.dependencies]
yaxpeax-x86 = { version = "2", default-features = false, features = ["fmt"] }
yaxpeax-arch = { version = "0", default-features = false }

View File

@ -0,0 +1,112 @@
use alloc::sync::Arc;
use core::ops::{Deref, DerefMut};
use crate::{KprobeBasic, KprobeBuilder, KprobeOps};
const BRK_KPROBE_BP: u64 = 10;
const BRK_KPROBE_SSTEPBP: u64 = 11;
const EBREAK_INST: u32 = 0x002a0000;
#[derive(Debug)]
pub struct Kprobe {
basic: KprobeBasic,
point: Arc<LA64KprobePoint>,
}
#[derive(Debug)]
pub struct LA64KprobePoint {
addr: usize,
inst_tmp: [u8; 8],
}
impl Deref for Kprobe {
type Target = KprobeBasic;
fn deref(&self) -> &Self::Target {
&self.basic
}
}
impl DerefMut for Kprobe {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.basic
}
}
impl Kprobe {
pub fn probe_point(&self) -> &Arc<LA64KprobePoint> {
&self.point
}
}
impl Drop for LA64KprobePoint {
fn drop(&mut self) {
let address = self.addr;
let inst_tmp_ptr = self.inst_tmp.as_ptr() as usize;
let inst_32 = unsafe { core::ptr::read(inst_tmp_ptr as *const u32) };
unsafe {
core::ptr::write(address as *mut u32, inst_32);
}
log::trace!(
"Kprobe::uninstall: address: {:#x}, old_instruction: {:?}",
address,
inst_32
);
}
}
impl KprobeBuilder {
pub fn install(self) -> (Kprobe, Arc<LA64KprobePoint>) {
let probe_point = match &self.probe_point {
Some(point) => point.clone(),
None => self.replace_inst(),
};
let kprobe = Kprobe {
basic: KprobeBasic::from(self),
point: probe_point.clone(),
};
(kprobe, probe_point)
}
/// # 安装kprobe
///
/// 不同的架构下需要保存原指令,然后替换为断点指令
fn replace_inst(&self) -> Arc<LA64KprobePoint> {
let address = self.symbol_addr + self.offset;
let point = LA64KprobePoint {
addr: address,
inst_tmp: [0u8; 8],
};
let inst_tmp_ptr = point.inst_tmp.as_ptr() as usize;
let inst_32 = unsafe { core::ptr::read(address as *const u32) };
unsafe {
core::ptr::write(address as *mut u32, EBREAK_INST);
// inst_32 :0-32
// ebreak :32-64
core::ptr::write(inst_tmp_ptr as *mut u32, inst_32);
core::ptr::write((inst_tmp_ptr + 4) as *mut u32, EBREAK_INST);
}
log::trace!(
"Kprobe::install: address: {:#x}, func_name: {:?}, opcode: {:x?}",
address,
self.symbol,
inst_32
);
}
}
impl KprobeOps for LA64KprobePoint {
fn return_address(&self) -> usize {
self.addr + 4
}
fn single_step_address(&self) -> usize {
self.inst_tmp.as_ptr() as usize
}
fn debug_address(&self) -> usize {
self.inst_tmp.as_ptr() as usize + 4
}
fn break_address(&self) -> usize {
self.addr
}
}

View File

@ -0,0 +1,211 @@
use alloc::boxed::Box;
use alloc::string::String;
use alloc::sync::Arc;
use core::{any::Any, fmt::Debug};
#[cfg(target_arch = "loongarch64")]
mod loongarch64;
#[cfg(target_arch = "riscv64")]
mod rv64;
#[cfg(target_arch = "x86_64")]
mod x86;
#[cfg(target_arch = "loongarch64")]
pub use loongarch64::*;
#[cfg(target_arch = "riscv64")]
pub use rv64::*;
#[cfg(target_arch = "x86_64")]
pub use x86::*;
#[cfg(target_arch = "x86_64")]
pub type KprobePoint = X86KprobePoint;
#[cfg(target_arch = "riscv64")]
pub type KprobePoint = Rv64KprobePoint;
#[cfg(target_arch = "loongarch64")]
pub type KprobePoint = LA64KprobePoint;
pub trait ProbeArgs: Send {
/// 用于使用者转换到特定架构下的TrapFrame
fn as_any(&self) -> &dyn Any;
/// 返回导致break异常的地址
fn break_address(&self) -> usize;
/// 返回导致单步执行异常的地址
fn debug_address(&self) -> usize;
}
pub trait KprobeOps: Send {
/// # 返回探测点的下一条指令地址
///
/// 执行流需要回到正常的路径中,在执行完探测点的指令后,需要返回到下一条指令
fn return_address(&self) -> usize;
/// # 返回单步执行的指令地址
///
/// 通常探测点的处的原指令被保存在一个数组当中。根据架构的不同, 在保存的指令后面,可能会填充必要的指令。
/// 例如x86架构下支持单步执行的特性 而其它架构下通常没有因此我们使用break异常来进行模拟所以会填充
/// 一条断点指令。
fn single_step_address(&self) -> usize;
/// # 返回单步执行指令触发异常的地址
///
/// 其值等于`single_step_address`的值加上探测点指令的长度
fn debug_address(&self) -> usize;
/// # 返回设置break断点的地址
///
/// 其值与探测点地址相等
fn break_address(&self) -> usize;
}
struct ProbeHandler {
func: fn(&dyn ProbeArgs),
}
impl ProbeHandler {
pub fn new(func: fn(&dyn ProbeArgs)) -> Self {
ProbeHandler { func }
}
/// 调用探测点处理函数
pub fn call(&self, trap_frame: &dyn ProbeArgs) {
(self.func)(trap_frame);
}
}
pub struct KprobeBuilder {
symbol: Option<String>,
symbol_addr: usize,
offset: usize,
pre_handler: ProbeHandler,
post_handler: ProbeHandler,
fault_handler: Option<ProbeHandler>,
event_callback: Option<Box<dyn CallBackFunc>>,
probe_point: Option<Arc<KprobePoint>>,
enable: bool,
}
pub trait EventCallback: Send {
fn call(&self, trap_frame: &dyn ProbeArgs);
}
impl KprobeBuilder {
pub fn new(
symbol: Option<String>,
symbol_addr: usize,
offset: usize,
pre_handler: fn(&dyn ProbeArgs),
post_handler: fn(&dyn ProbeArgs),
enable: bool,
) -> Self {
KprobeBuilder {
symbol,
symbol_addr,
offset,
pre_handler: ProbeHandler::new(pre_handler),
post_handler: ProbeHandler::new(post_handler),
event_callback: None,
fault_handler: None,
probe_point: None,
enable,
}
}
pub fn with_fault_handler(mut self, func: fn(&dyn ProbeArgs)) -> Self {
self.fault_handler = Some(ProbeHandler::new(func));
self
}
pub fn with_probe_point(mut self, point: Arc<KprobePoint>) -> Self {
self.probe_point = Some(point);
self
}
pub fn with_event_callback(mut self, event_callback: Box<dyn CallBackFunc>) -> Self {
self.event_callback = Some(event_callback);
self
}
/// 获取探测点的地址
///
/// 探测点的地址 == break指令的地址
pub fn probe_addr(&self) -> usize {
self.symbol_addr + self.offset
}
}
pub struct KprobeBasic {
symbol: Option<String>,
symbol_addr: usize,
offset: usize,
pre_handler: ProbeHandler,
post_handler: ProbeHandler,
fault_handler: ProbeHandler,
event_callback: Option<Box<dyn CallBackFunc>>,
enable: bool,
}
pub trait CallBackFunc: Send + Sync {
fn call(&self, trap_frame: &dyn ProbeArgs);
}
impl Debug for KprobeBasic {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Kprobe")
.field("symbol", &self.symbol)
.field("symbol_addr", &self.symbol_addr)
.field("offset", &self.offset)
.finish()
}
}
impl KprobeBasic {
pub fn call_pre_handler(&self, trap_frame: &dyn ProbeArgs) {
self.pre_handler.call(trap_frame);
}
pub fn call_post_handler(&self, trap_frame: &dyn ProbeArgs) {
self.post_handler.call(trap_frame);
}
pub fn call_fault_handler(&self, trap_frame: &dyn ProbeArgs) {
self.fault_handler.call(trap_frame);
}
pub fn call_event_callback(&self, trap_frame: &dyn ProbeArgs) {
if let Some(ref call_back) = self.event_callback {
call_back.call(trap_frame);
}
}
pub fn update_event_callback(&mut self, callback: Box<dyn CallBackFunc>) {
self.event_callback = Some(callback);
}
pub fn disable(&mut self) {
self.enable = false;
}
pub fn enable(&mut self) {
self.enable = true;
}
pub fn is_enabled(&self) -> bool {
self.enable
}
/// 返回探测点的函数名称
pub fn symbol(&self) -> Option<&str> {
self.symbol.as_deref()
}
}
impl From<KprobeBuilder> for KprobeBasic {
fn from(value: KprobeBuilder) -> Self {
let fault_handler = value.fault_handler.unwrap_or(ProbeHandler::new(|_| {}));
KprobeBasic {
symbol: value.symbol,
symbol_addr: value.symbol_addr,
offset: value.offset,
pre_handler: value.pre_handler,
post_handler: value.post_handler,
event_callback: value.event_callback,
fault_handler,
enable: value.enable,
}
}
}

View File

@ -0,0 +1,157 @@
use alloc::sync::Arc;
use core::{
arch::riscv64::sfence_vma_all,
fmt::Debug,
ops::{Deref, DerefMut},
};
use crate::{KprobeBasic, KprobeBuilder, KprobeOps};
const EBREAK_INST: u32 = 0x00100073; // ebreak
const C_EBREAK_INST: u32 = 0x9002; // c.ebreak
const INSN_LENGTH_MASK: u16 = 0x3;
const INSN_LENGTH_32: u16 = 0x3;
#[derive(Debug)]
pub struct Kprobe {
basic: KprobeBasic,
point: Arc<Rv64KprobePoint>,
}
#[derive(Debug)]
enum OpcodeTy {
Inst16(u16),
Inst32(u32),
}
#[derive(Debug)]
pub struct Rv64KprobePoint {
addr: usize,
old_instruction: OpcodeTy,
inst_tmp: [u8; 8],
}
impl Deref for Kprobe {
type Target = KprobeBasic;
fn deref(&self) -> &Self::Target {
&self.basic
}
}
impl DerefMut for Kprobe {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.basic
}
}
impl Kprobe {
pub fn probe_point(&self) -> &Arc<Rv64KprobePoint> {
&self.point
}
}
impl Drop for Rv64KprobePoint {
fn drop(&mut self) {
let address = self.addr;
match self.old_instruction {
OpcodeTy::Inst16(inst_16) => unsafe {
core::ptr::write(address as *mut u16, inst_16);
},
OpcodeTy::Inst32(inst_32) => unsafe {
core::ptr::write(address as *mut u32, inst_32);
},
}
unsafe {
sfence_vma_all();
}
log::trace!(
"Kprobe::uninstall: address: {:#x}, old_instruction: {:?}",
address,
self.old_instruction
);
}
}
impl KprobeBuilder {
pub fn install(self) -> (Kprobe, Arc<Rv64KprobePoint>) {
let probe_point = match &self.probe_point {
Some(point) => point.clone(),
None => self.replace_inst(),
};
let kprobe = Kprobe {
basic: KprobeBasic::from(self),
point: probe_point.clone(),
};
(kprobe, probe_point)
}
/// # 安装kprobe
///
/// 不同的架构下需要保存原指令,然后替换为断点指令
fn replace_inst(&self) -> Arc<Rv64KprobePoint> {
let address = self.symbol_addr + self.offset;
let inst_16 = unsafe { core::ptr::read(address as *const u16) };
// See https://elixir.bootlin.com/linux/v6.10.2/source/arch/riscv/kernel/probes/kprobes.c#L68
let is_inst_16 = if (inst_16 & INSN_LENGTH_MASK) == INSN_LENGTH_32 {
false
} else {
true
};
let mut point = Rv64KprobePoint {
old_instruction: OpcodeTy::Inst16(0),
inst_tmp: [0; 8],
addr: address,
};
let inst_tmp_ptr = point.inst_tmp.as_ptr() as usize;
if is_inst_16 {
point.old_instruction = OpcodeTy::Inst16(inst_16);
unsafe {
core::ptr::write(address as *mut u16, C_EBREAK_INST as u16);
// inst_16 :0-16
// c.ebreak:16-32
core::ptr::write(inst_tmp_ptr as *mut u16, inst_16);
core::ptr::write((inst_tmp_ptr + 2) as *mut u16, C_EBREAK_INST as u16);
}
} else {
let inst_32 = unsafe { core::ptr::read(address as *const u32) };
point.old_instruction = OpcodeTy::Inst32(inst_32);
unsafe {
core::ptr::write(address as *mut u32, EBREAK_INST);
// inst_32 :0-32
// ebreak :32-64
core::ptr::write(inst_tmp_ptr as *mut u32, inst_32);
core::ptr::write((inst_tmp_ptr + 4) as *mut u32, EBREAK_INST);
}
}
unsafe {
sfence_vma_all();
}
log::trace!(
"Kprobe::install: address: {:#x}, func_name: {:?}, opcode: {:x?}",
address,
self.symbol,
point.old_instruction
);
Arc::new(point)
}
}
impl KprobeOps for Rv64KprobePoint {
fn return_address(&self) -> usize {
let address = self.addr;
match self.old_instruction {
OpcodeTy::Inst16(_) => address + 2,
OpcodeTy::Inst32(_) => address + 4,
}
}
fn single_step_address(&self) -> usize {
self.inst_tmp.as_ptr() as usize
}
fn debug_address(&self) -> usize {
match self.old_instruction {
OpcodeTy::Inst16(_) => self.inst_tmp.as_ptr() as usize + 2,
OpcodeTy::Inst32(_) => self.inst_tmp.as_ptr() as usize + 4,
}
}
fn break_address(&self) -> usize {
self.addr
}
}

View File

@ -0,0 +1,135 @@
use crate::{KprobeBasic, KprobeBuilder, KprobeOps};
use alloc::string::ToString;
use alloc::sync::Arc;
use core::{
fmt::Debug,
ops::{Deref, DerefMut},
};
use yaxpeax_arch::LengthedInstruction;
const EBREAK_INST: u8 = 0xcc; // x86_64: 0xcc
const MAX_INSTRUCTION_SIZE: usize = 15; // x86_64 max instruction length
pub struct Kprobe {
basic: KprobeBasic,
point: Arc<X86KprobePoint>,
}
#[derive(Debug)]
pub struct X86KprobePoint {
addr: usize,
old_instruction: [u8; MAX_INSTRUCTION_SIZE],
old_instruction_len: usize,
}
impl Drop for X86KprobePoint {
fn drop(&mut self) {
let address = self.addr;
unsafe {
core::ptr::copy(
self.old_instruction.as_ptr(),
address as *mut u8,
self.old_instruction_len,
);
core::arch::x86_64::_mm_mfence();
}
let decoder = yaxpeax_x86::amd64::InstDecoder::default();
let inst = decoder.decode_slice(&self.old_instruction).unwrap();
log::trace!(
"Kprobe::uninstall: address: {:#x}, old_instruction: {:?}",
address,
inst.to_string()
);
}
}
impl Debug for Kprobe {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Kprobe")
.field("basic", &self.basic)
.field("point", &self.point)
.finish()
}
}
impl Deref for Kprobe {
type Target = KprobeBasic;
fn deref(&self) -> &Self::Target {
&self.basic
}
}
impl DerefMut for Kprobe {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.basic
}
}
impl KprobeBuilder {
pub fn install(self) -> (Kprobe, Arc<X86KprobePoint>) {
let probe_point = match &self.probe_point {
Some(point) => point.clone(),
None => self.replace_inst(),
};
let kprobe = Kprobe {
basic: KprobeBasic::from(self),
point: probe_point.clone(),
};
(kprobe, probe_point)
}
/// # 安装kprobe
///
/// 不同的架构下需要保存原指令,然后替换为断点指令
fn replace_inst(&self) -> Arc<X86KprobePoint> {
let address = self.symbol_addr + self.offset;
let mut inst_tmp = [0u8; MAX_INSTRUCTION_SIZE];
unsafe {
core::ptr::copy(
address as *const u8,
inst_tmp.as_mut_ptr(),
MAX_INSTRUCTION_SIZE,
);
}
let decoder = yaxpeax_x86::amd64::InstDecoder::default();
let inst = decoder.decode_slice(&inst_tmp).unwrap();
let len = inst.len().to_const();
log::trace!("inst: {:?}, len: {:?}", inst.to_string(), len);
let point = Arc::new(X86KprobePoint {
addr: address,
old_instruction: inst_tmp,
old_instruction_len: len as usize,
});
unsafe {
core::ptr::write_volatile(address as *mut u8, EBREAK_INST);
core::arch::x86_64::_mm_mfence();
}
log::trace!(
"Kprobe::install: address: {:#x}, func_name: {:?}",
address,
self.symbol
);
point
}
}
impl Kprobe {
pub fn probe_point(&self) -> &Arc<X86KprobePoint> {
&self.point
}
}
impl KprobeOps for X86KprobePoint {
fn return_address(&self) -> usize {
self.addr + self.old_instruction_len
}
fn single_step_address(&self) -> usize {
self.old_instruction.as_ptr() as usize
}
fn debug_address(&self) -> usize {
self.old_instruction.as_ptr() as usize + self.old_instruction_len
}
fn break_address(&self) -> usize {
self.addr
}
}

View File

@ -0,0 +1,7 @@
#![cfg_attr(target_arch = "riscv64", feature(riscv_ext_intrinsics))]
#![no_std]
extern crate alloc;
mod arch;
pub use arch::*;

View File

@ -0,0 +1,21 @@
version: 1.0.{build}
branches:
only:
- main
os:
- Visual Studio 2015
clone_depth: 1
configuration:
- Debug
platform:
- x64
environment:
matrix:
- TOOLCHAIN_VERSION: 14.0
RUST: 1.76.0
- TOOLCHAIN_VERSION: 14.0
RUST: beta
- TOOLCHAIN_VERSION: 14.0
RUST: nightly
build_script: mk/appveyor.bat

2
kernel/crates/rbpf/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
target
Cargo.lock

View File

@ -0,0 +1,78 @@
[package]
# Project metadata
name = "rbpf"
version = "0.2.0"
authors = ["Quentin <quentin@isovalent.com>"]
# Additional metadata for packaging
description = "Virtual machine and JIT compiler for eBPF programs"
repository = "https://github.com/qmonnet/rbpf"
readme = "README.md"
keywords = ["BPF", "eBPF", "interpreter", "JIT", "filtering"]
license = "Apache-2.0/MIT"
edition = "2021"
# Packaging directives
include = [
"src/**",
"examples/**",
"tests/**",
"bench/**",
"LICENSE*",
"Cargo.toml",
]
[dependencies]
# Default features (std) are disabled so that the dependencies don't pull in the
# standard library when the crate is compiled for no_std
byteorder = { version = "1.2", default-features = false }
log = {version = "0.4.21", default-features = false }
combine = { version = "4.6", default-features = false }
# Optional Dependencies when using the standard library
libc = { version = "0.2", optional = true }
time = { version = "0.2", optional = true }
# Optional Dependencies for the CraneLift JIT
cranelift-codegen = { version = "0.99", optional = true }
cranelift-frontend = { version = "0.99", optional = true }
cranelift-jit = { version = "0.99", optional = true }
cranelift-native = { version = "0.99", optional = true }
cranelift-module = { version = "0.99", optional = true }
[dev-dependencies]
elf = "0.0.10"
json = "0.11"
hex = "0.4.3"
[features]
#default = ["std", "user", "cranelift"]
cargo-clippy = []
std = ["dep:time", "dep:libc", "combine/std"]
cranelift = [
"dep:cranelift-codegen",
"dep:cranelift-frontend",
"dep:cranelift-jit",
"dep:cranelift-native",
"dep:cranelift-module",
]
user = []
# Examples that depend on the standard library should be disabled when
# testing the `no_std` configuration.
[[example]]
name = "disassemble"
required-features = ["std"]
[[example]]
name = "uptime"
required-features = ["std"]
[[example]]
name = "to_json"
[[example]]
name = "rbpf_plugin"

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) 2016 6WIND S.A.
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,743 @@
# rbpf
<picture>
<source media="(prefers-color-scheme: dark)" srcset="misc/rbpf_256_border.png">
<img src="misc/rbpf_256.png">
</picture>
Rust (user-space) virtual machine for eBPF
[![Build Status](https://github.com/qmonnet/rbpf/actions/workflows/test.yaml/badge.svg)](https://github.com/qmonnet/rbpf/actions/workflows/test.yaml)
[![Build status](https://ci.appveyor.com/api/projects/status/ia74coeuhxtrcvsk/branch/main?svg=true)](https://ci.appveyor.com/project/qmonnet/rbpf/branch/main)
[![Coverage Status](https://coveralls.io/repos/github/qmonnet/rbpf/badge.svg?branch=main)](https://coveralls.io/github/qmonnet/rbpf?branch=main)
[![Crates.io](https://img.shields.io/crates/v/rbpf.svg)](https://crates.io/crates/rbpf)
* [Description](#description)
* [Link to the crate](#link-to-the-crate)
* [API](#api)
* [Example uses](#example-uses)
* [Building eBPF programs](#building-ebpf-programs)
* [Build Features](#build-features)
* [Feedback welcome!](#feedback-welcome)
* [Questions / Answers](#questions--answers)
* [Caveats](#caveats)
* [_To do_ list](#to-do-list)
* [License](#license)
* [Inspired by](#inspired-by)
* [Other resources](#other-resources)
## Description
This crate contains a virtual machine for eBPF program execution. BPF, as in
_Berkeley Packet Filter_, is an assembly-like language initially developed for
BSD systems, in order to filter packets in the kernel with tools such as
tcpdump so as to avoid useless copies to user-space. It was ported to Linux,
where it evolved into eBPF (_extended_ BPF), a faster version with more
features. While BPF programs are originally intended to run in the kernel, the
virtual machine of this crate enables running it in user-space applications;
it contains an interpreter, an x86_64 JIT-compiler for eBPF programs, as well as
a disassembler.
It is based on Rich Lane's [uBPF software](https://github.com/iovisor/ubpf/),
which does nearly the same, but is written in C.
The crate is supposed to compile and run on Linux, MacOS X, and Windows,
although the JIT-compiler does not work with Windows at this time.
## Link to the crate
This crate is available from [crates.io](https://crates.io/crates/rbpf), so it
should work out of the box by adding it as a dependency in your `Cargo.toml`
file:
```toml
[dependencies]
rbpf = "0.2.0"
```
You can also use the development version from this GitHub repository. This
should be as simple as putting this inside your `Cargo.toml`:
```toml
[dependencies]
rbpf = { git = "https://github.com/qmonnet/rbpf" }
```
Of course, if you prefer, you can clone it locally, possibly hack the crate,
and then indicate the path of your local version in `Cargo.toml`:
```toml
[dependencies]
rbpf = { path = "path/to/rbpf" }
```
Then indicate in your source code that you want to use the crate:
```rust,ignore
extern crate rbpf;
```
## API
The API is pretty well documented inside the source code. You should also be
able to access [an online version of the documentation from
here](https://docs.rs/rbpf/), automatically generated from the
[crates.io](https://crates.io/crates/rbpf) version (may not be up-to-date with
the main branch). [Examples](../../tree/main/examples) and [unit
tests](../../tree/main/tests) should also prove helpful. Here is a summary of
how to use the crate.
Here are the steps to follow to run an eBPF program with rbpf:
1. Create a virtual machine. There are several kinds of machines, we will come
back on this later. When creating the VM, pass the eBPF program as an
argument to the constructor.
2. If you want to use some helper functions, register them into the virtual
machine.
3. If you want a JIT-compiled program, compile it.
4. Execute your program: either run the interpreter or call the JIT-compiled
function.
eBPF has been initially designed to filter packets (now it has some other hooks
in the Linux kernel, such as kprobes, but this is not covered by rbpf). As a
consequence, most of the load and store instructions of the program are
performed on a memory area representing the packet data. However, in the Linux
kernel, the eBPF program does not immediately access this data area: initially,
it has access to a C `struct sk_buff` instead, which is a buffer containing
metadata about the packet—including memory addresses of the beginning and of
the end of the packet data area. So the program first loads those pointers from
the `sk_buff`, and then can access the packet data.
This behavior can be replicated with rbpf, but it is not mandatory. For this
reason, we have several structs representing different kinds of virtual
machines:
* `struct EbpfVmMbuffer` mimics the kernel. When the program is run, the
address provided to its first eBPF register will be the address of a metadata
buffer provided by the user, and that is expected to contain pointers to the
start and the end of the packet data memory area.
* `struct EbpfVmFixedMbuff` has one purpose: enabling the execution of programs
created to be compatible with the kernel, while saving the effort to manually
handle the metadata buffer for the user. In fact, this struct has a static
internal buffer that is passed to the program. The user has to indicate the
offset values at which the eBPF program expects to find the start and the end
of packet data in the buffer. On calling the function that runs the program
(JITted or not), the struct automatically updates the addresses in this
static buffer, at the appointed offsets, for the start and the end of the
packet data the program is called upon.
* `struct EbpfVmRaw` is for programs that want to run directly on packet data.
No metadata buffer is involved, the eBPF program directly receives the
address of the packet data in its first register. This is the behavior of
uBPF.
* `struct EbpfVmNoData` does not take any data. The eBPF program takes no
argument whatsoever and its return value is deterministic. Not so sure there
is a valid use case for that, but if nothing else, this is very useful for
unit tests.
All these structs implement the same public functions:
```rust,ignore
// called with EbpfVmMbuff:: prefix
pub fn new(prog: &'a [u8]) -> Result<EbpfVmMbuff<'a>, Error>
// called with EbpfVmFixedMbuff:: prefix
pub fn new(prog: &'a [u8],
data_offset: usize,
data_end_offset: usize) -> Result<EbpfVmFixedMbuff<'a>, Error>
// called with EbpfVmRaw:: prefix
pub fn new(prog: &'a [u8]) -> Result<EbpfVmRaw<'a>, Error>
// called with EbpfVmNoData:: prefix
pub fn new(prog: &'a [u8]) -> Result<EbpfVmNoData<'a>, Error>
```
This is used to create a new instance of a VM. The return type is dependent of
the struct from which the function is called. For instance,
`rbpf::EbpfVmRaw::new(Some(my_program))` would return an instance of `struct
rbpf::EbpfVmRaw` (wrapped in a `Result`). When a program is loaded, it is
checked with a very simple verifier (nothing close to the one for Linux
kernel). Users are also able to replace it with a custom verifier.
For `struct EbpfVmFixedMbuff`, two additional arguments must be passed to the
constructor: `data_offset` and `data_end_offset`. They are the offset (byte
number) at which the pointers to the beginning and to the end, respectively, of
the memory area of packet data are to be stored in the internal metadata buffer
each time the program is executed. Other structs do not use this mechanism and
do not need those offsets.
```rust,ignore
// for struct EbpfVmMbuff, struct EbpfVmRaw and struct EbpfVmRawData
pub fn set_program(&mut self, prog: &'a [u8]) -> Result<(), Error>
// for struct EbpfVmFixedMbuff
pub fn set_program(&mut self, prog: &'a [u8],
data_offset: usize,
data_end_offset: usize) -> Result<(), Error>
```
You can use for example `my_vm.set_program(my_program);` to change the loaded
program after the VM instance creation. This program is checked with the
verifier attached to the VM. The verifying function of the VM can be changed at
any moment.
```rust,ignore
pub type Verifier = fn(prog: &[u8]) -> Result<(), Error>;
pub fn set_verifier(&mut self,
verifier: Verifier) -> Result<(), Error>
```
Note that if a program has already been loaded into the VM, setting a new
verifier also immediately runs it on the loaded program. However, the verifier
is not run if no program has been loaded (if `None` was passed to the `new()`
method when creating the VM).
```rust,ignore
pub type Helper = fn (u64, u64, u64, u64, u64) -> u64;
pub fn register_helper(&mut self,
key: u32,
function: Helper) -> Result<(), Error>
```
This function is used to register a helper function. The VM stores its
registers in a hashmap, so the key can be any `u32` value you want. It may be
useful for programs that should be compatible with the Linux kernel and
therefore must use specific helper numbers.
```rust,ignore
// for struct EbpfVmMbuff
pub fn execute_program(&self,
mem: &'a mut [u8],
mbuff: &'a mut [u8]) -> Result<(u64), Error>
// for struct EbpfVmFixedMbuff and struct EbpfVmRaw
pub fn execute_program(&self,
mem: &'a mut [u8]) -> Result<(u64), Error>
// for struct EbpfVmNoData
pub fn execute_program(&self) -> Result<(u64), Error>
```
Interprets the loaded program. The function takes a reference to the packet
data and the metadata buffer, or only to the packet data, or nothing at all,
depending on the kind of the VM used. The value returned is the result of the
eBPF program.
```rust,ignore
pub fn jit_compile(&mut self) -> Result<(), Error>
```
JIT-compile the loaded program, for x86_64 architecture. If the program is to
use helper functions, they must be registered into the VM before this function
is called. The generated assembly function is internally stored in the VM.
```rust,ignore
// for struct EbpfVmMbuff
pub unsafe fn execute_program_jit(&self, mem: &'a mut [u8],
mbuff: &'a mut [u8]) -> Result<(u64), Error>
// for struct EbpfVmFixedMbuff and struct EbpfVmRaw
pub unsafe fn execute_program_jit(&self, mem: &'a mut [u8]) -> Result<(u64), Error>
// for struct EbpfVmNoData
pub unsafe fn execute_program_jit(&self) -> Result<(u64), Error>
```
Calls the JIT-compiled program. The arguments to provide are the same as for
`execute_program()`, again depending on the kind of VM that is used. The result of
the JIT-compiled program should be the same as with the interpreter, but it
should run faster. Note that if errors occur during the program execution, the
JIT-compiled version does not handle it as well as the interpreter, and the
program may crash. For this reason, the functions are marked as `unsafe`.
## Example uses
### Simple example
This comes from the unit test `test_vm_add`.
```rust
extern crate rbpf;
fn main() {
// This is the eBPF program, in the form of bytecode instructions.
let prog = &[
0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov32 r0, 0
0xb4, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // mov32 r1, 2
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // add32 r0, 1
0x0c, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // add32 r0, r1
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // exit
];
// Instantiate a struct EbpfVmNoData. This is an eBPF VM for programs that
// takes no packet data in argument.
// The eBPF program is passed to the constructor.
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
// Execute (interpret) the program. No argument required for this VM.
assert_eq!(vm.execute_program().unwrap(), 0x3);
}
```
### With JIT, on packet data
This comes from the unit test `test_jit_ldxh`.
```rust
extern crate rbpf;
fn main() {
let prog = &[
0x71, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, // ldxh r0, [r1+2]
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // exit
];
// Let's use some data.
let mem = &mut [
0xaa, 0xbb, 0x11, 0xcc, 0xdd
];
// This is an eBPF VM for programs reading from a given memory area (it
// directly reads from packet data)
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
#[cfg(any(windows, not(feature = "std")))] {
assert_eq!(vm.execute_program(mem).unwrap(), 0x11);
}
#[cfg(all(not(windows), feature = "std"))] {
// This time we JIT-compile the program.
vm.jit_compile().unwrap();
// Then we execute it. For this kind of VM, a reference to the packet
// data must be passed to the function that executes the program.
unsafe { assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x11); }
}
}
```
### Using a metadata buffer
This comes from the unit test `test_jit_mbuff` and derives from the unit test
`test_jit_ldxh`.
```rust
extern crate rbpf;
fn main() {
let prog = &[
// Load mem from mbuff at offset 8 into R1
0x79, 0x11, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
// ldhx r1[2], r0
0x69, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
];
let mem = &mut [
0xaa, 0xbb, 0x11, 0x22, 0xcc, 0xdd
];
// Just for the example we create our metadata buffer from scratch, and
// we store the pointers to packet data start and end in it.
let mut mbuff = &mut [0u8; 32];
unsafe {
let mut data = mbuff.as_ptr().offset(8) as *mut u64;
let mut data_end = mbuff.as_ptr().offset(24) as *mut u64;
*data = mem.as_ptr() as u64;
*data_end = mem.as_ptr() as u64 + mem.len() as u64;
}
// This eBPF VM is for program that use a metadata buffer.
let mut vm = rbpf::EbpfVmMbuff::new(Some(prog)).unwrap();
#[cfg(any(windows, not(feature = "std")))] {
assert_eq!(vm.execute_program(mem, mbuff).unwrap(), 0x2211);
}
#[cfg(all(not(windows), feature = "std"))] {
// Here again we JIT-compile the program.
vm.jit_compile().unwrap();
// Here we must provide both a reference to the packet data, and to the
// metadata buffer we use.
unsafe {
assert_eq!(vm.execute_program_jit(mem, mbuff).unwrap(), 0x2211);
}
}
}
```
### Loading code from an object file; and using a virtual metadata buffer
This comes from unit test `test_vm_block_port`.
This example requires the following additional crates, you may have to add them
to your `Cargo.toml` file.
```toml
[dependencies]
rbpf = "0.2.0"
elf = "0.0.10"
```
It also uses a kind of VM that uses an internal buffer used to simulate the
`sk_buff` used by eBPF programs in the kernel, without having to manually
create a new buffer for each packet. It may be useful for programs compiled for
the kernel and that assumes the data they receive is a `sk_buff` pointing to
the packet data start and end addresses. So here we just provide the offsets at
which the eBPF program expects to find those pointers, and the VM handles the
buffer update so that we only have to provide a reference to the packet data
for each run of the program.
```rust
extern crate elf;
use std::path::PathBuf;
extern crate rbpf;
use rbpf::helpers;
fn main() {
// Load a program from an ELF file, e.g. compiled from C to eBPF with
// clang/LLVM. Some minor modification to the bytecode may be required.
let filename = "examples/load_elf__block_a_port.elf";
let path = PathBuf::from(filename);
let file = match elf::File::open_path(&path) {
Ok(f) => f,
Err(e) => panic!("Error: {:?}", e),
};
// Here we assume the eBPF program is in the ELF section called
// ".classifier".
let text_scn = match file.get_section(".classifier") {
Some(s) => s,
None => panic!("Failed to look up .classifier section"),
};
let prog = &text_scn.data;
// This is our data: a real packet, starting with Ethernet header
let packet = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab,
0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54,
0x08, 0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00,
0x40, 0x06, 0x96, 0x0f,
0x7f, 0x00, 0x00, 0x01,
0x7f, 0x00, 0x00, 0x01,
0x99, 0x99, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d,
0xd4, 0x30, 0xb5, 0xd2,
0x80, 0x18, 0x01, 0x56,
0xfe, 0x2f, 0x00, 0x00,
0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89,
0x00, 0x23, 0x63, 0x2d,
0x71, 0x64, 0x66, 0x73,
0x64, 0x66, 0x0a
];
// This is an eBPF VM for programs using a virtual metadata buffer, similar
// to the sk_buff that eBPF programs use with tc and in Linux kernel.
// We must provide the offsets at which the pointers to packet data start
// and end must be stored: these are the offsets at which the program will
// load the packet data from the metadata buffer.
let mut vm = rbpf::EbpfVmFixedMbuff::new(Some(prog), 0x40, 0x50).unwrap();
// We register a helper function, that can be called by the program, into
// the VM. The `bpf_trace_printf` is only available when we have access to
// the standard library.
#[cfg(feature = "std")] {
vm.register_helper(helpers::BPF_TRACE_PRINTK_IDX,
helpers::bpf_trace_printf).unwrap();
}
// This kind of VM takes a reference to the packet data, but does not need
// any reference to the metadata buffer: a fixed buffer is handled
// internally by the VM.
let res = vm.execute_program(packet).unwrap();
println!("Program returned: {:?} ({:#x})", res, res);
}
```
## Building eBPF programs
Besides passing the raw hexadecimal codes for building eBPF programs, two other
methods are available.
### Assembler
The first method consists in using the assembler provided by the crate.
```rust
extern crate rbpf;
use rbpf::assembler::assemble;
let prog = assemble("add64 r1, 0x605
mov64 r2, 0x32
mov64 r1, r0
be16 r0
neg64 r2
exit").unwrap();
#[cfg(feature = "std")] {
println!("{:?}", prog);
}
```
The above snippet will produce:
```rust,ignore
Ok([0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
```
Conversely, a disassembler is also available to dump instruction names from
bytecode in a human-friendly format.
```rust
extern crate rbpf;
use rbpf::disassembler::disassemble;
let prog = &[
0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
];
disassemble(prog);
```
This will produce the following output:
```txt
add64 r1, 0x605
mov64 r2, 0x32
mov64 r1, r0
be16 r0
neg64 r2
exit
```
Please refer to [source code](src/assembler.rs) and [tests](tests/assembler.rs)
for the syntax and the list of instruction names.
### Building API
The other way to build programs is to chain commands from the instruction
builder API. It looks less like assembly, maybe more like high-level functions.
What's sure is that the result is more verbose, but if you prefer to build
programs this way, it works just as well. If we take again the same sample as
above, it would be constructed as follows.
```rust
extern crate rbpf;
use rbpf::insn_builder::*;
let mut program = BpfCode::new();
program.add(Source::Imm, Arch::X64).set_dst(1).set_imm(0x605).push()
.mov(Source::Imm, Arch::X64).set_dst(2).set_imm(0x32).push()
.mov(Source::Reg, Arch::X64).set_src(0).set_dst(1).push()
.swap_bytes(Endian::Big).set_dst(0).set_imm(0x10).push()
.negate(Arch::X64).set_dst(2).push()
.exit().push();
```
Again, please refer to [the source and related tests](src/insn_builder.rs) to
get more information and examples on how to use it.
## Build features
### `no_std`
The `rbpf` crate has a Cargo feature named "std" that is enabled by default. To
use `rbpf` in `no_std` environments this feature needs to be disabled. To do
this, you need to modify your dependency on `rbpf` in Cargo.toml to disable the
enabled-by-default features.
```toml
[dependencies]
rbpf = { version = "1.0", default-features = false }
```
Note that when using this crate in `no_std` environments, the `jit` module
isn't available. This is because it depends on functions provided by `libc`
(`libc::posix_memalign()`, `libc::mprotect()`) which aren't available on
`no_std`.
The `assembler` module is available, albeit with reduced debugging features. It
depends on the `combine` crate providing parser combinators. Under `no_std`
this crate only provides simple parsers which generate less descriptive error
messages.
## Feedback welcome!
This is the author's first try at writing Rust code. He learned a lot in the
process, but there remains a feeling that this crate has a kind of C-ish style
in some places instead of the Rusty look the author would like it to have. So
feedback (or PRs) are welcome, including about ways you might see to take
better advantage of Rust features.
Note that the project expects new commits to be covered by the
[Developer's Certificate of Origin](https://wiki.linuxfoundation.org/dco).
When contributing Pull Requests, please sign off your commits accordingly.
## Questions / Answers
### Why implementing an eBPF virtual machine in Rust?
As of this writing, there is no particular use case for this crate at the best
of the author's knowledge. The author happens to work with BPF on Linux and to
know how uBPF works, and he wanted to learn and experiment with Rust—no more
than that.
### What are the differences with uBPF?
Other than the language, obviously? Well, there are some differences:
* Some constants, such as the maximum length for programs or the length for the
stack, differs between uBPF and rbpf. The latter uses the same values as the
Linux kernel, while uBPF has its own values.
* When an error occurs while a program is run by uBPF, the function running the
program silently returns the maximum value as an error code, while rbpf
returns Rust type `Error`.
* The registration of helper functions, that can be called from within an eBPF
program, is not handled in the same way.
* The distinct structs permitting to run program either on packet data, or with
a metadata buffer (simulated or not) is a specificity of rbpf.
* As for performance: theoretically the JITted programs are expected to run at
the same speed, while the C interpreter of uBPF should go slightly faster
than rbpf. But this has not been asserted yet. Benchmarking both programs
would be an interesting thing to do.
### Can I use it with the “classic” BPF (a.k.a cBPF) version?
No. This crate only works with extended BPF (eBPF) programs. For cBPF programs,
such as used by tcpdump (as of this writing) for example, you may be interested
in the [bpfjit crate](https://crates.io/crates/bpfjit) written by Alexander
Polakov instead.
### What functionalities are implemented?
Running and JIT-compiling eBPF programs work. There is also a mechanism to
register user-defined helper functions. The eBPF implementation of the Linux
kernel comes with [some additional
features](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md):
a high number of helpers, several kinds of maps, tail calls.
* Additional helpers should be easy to add, but very few of the existing Linux
helpers have been replicated in rbpf so far.
* Tail calls (“long jumps” from an eBPF program into another) are not
implemented. This is probably not trivial to design and implement.
* The interaction with maps is done through the use of specific helpers, so
this should not be difficult to add. The maps themselves can reuse the maps
in the kernel (if on Linux), to communicate with in-kernel eBPF programs for
instance; or they can be handled in user space. Rust has arrays and hashmaps,
so their implementation should be pretty straightforward (and may be added to
rbpf in the future).
### What about program validation?
The ”verifier” of this crate is very short and has nothing to do with the
kernel verifier, which means that it accepts programs that may not be safe. On
the other hand, you probably do not run this in a kernel here, so it will not
crash your system. Implementing a verifier similar to the one in the kernel is
not trivial, and we cannot “copy” it since it is under GPL license.
### What about safety then?
Rust has a strong emphasis on safety. Yet to have the eBPF VM work, some
`unsafe` blocks of code are used. The VM, taken as an eBPF interpreter, can
return an error but should not crash. Please file an issue otherwise.
As for the JIT-compiler, it is a different story, since runtime memory checks
are more complicated to implement in assembly. It _will_ crash if your
JIT-compiled program tries to perform unauthorized memory accesses. Usually, it
could be a good idea to test your program with the interpreter first.
Oh, and if your program has infinite loops, even with the interpreter, you're
on your own.
## Caveats
* This crate is **under development** and the API may be subject to change.
* The JIT compiler produces an unsafe program: memory access are not tested at
runtime (yet). Use with caution.
* A small number of eBPF instructions have not been implemented yet. This
should not be a problem for the majority of eBPF programs.
* Beware of turnips. Turnips are disgusting.
## _To do_ list
* Implement some traits (`Clone`, `Drop`, `Debug` are good candidates).
* Provide built-in support for user-space array and hash BPF maps.
* Improve safety of JIT-compiled programs with runtime memory checks.
* Add helpers (some of those supported in the kernel, such as checksum update,
could be helpful).
* Improve verifier. Could we find a way to directly support programs compiled
with clang?
* Maybe one day, tail calls?
* JIT-compilers for other architectures?
* …
## License
Following the effort of the Rust language project itself in order to ease
integration with other projects, the rbpf crate is distributed under the terms
of both the MIT license and the Apache License (Version 2.0).
See
[LICENSE-APACHE](https://github.com/qmonnet/rbpf/blob/main/LICENSE-APACHE)
and [LICENSE-MIT](https://github.com/qmonnet/rbpf/blob/main/LICENSE-MIT) for
details.
## Version
[The last commit](https://github.com/qmonnet/rbpf/commit/fe7021b07b08a43b836743a77796d07ce1f4902e)
## Inspired by
* [uBPF](https://github.com/iovisor/ubpf), a C user-space implementation of an
eBPF virtual machine, with a JIT-compiler and disassembler (and also
including the assembler from the human-readable form of the instructions,
such as in `mov r0, 0x1337`), by Rich Lane for Big Switch Networks (2015)
* [_Building a simple JIT in
Rust_](https://www.sophiajt.com/building-a-simple-jit-in-rust),
by Sophia Turner (2015)
* [bpfjit](https://github.com/polachok/bpfjit) (also [on
crates.io](https://crates.io/crates/bpfjit)), a Rust crate exporting the cBPF
JIT compiler from FreeBSD 10 tree to Rust, by Alexander Polakov (2016)
## Other resources
* Cilium project documentation about BPF: [_BPF and XDP Reference
Guide_](http://docs.cilium.io/en/latest/bpf/)
* [Kernel documentation about BPF](https://docs.kernel.org/bpf/)
* [_Dive into BPF: a list of reading
material_](https://qmonnet.github.io/whirl-offload/2016/09/01/dive-into-bpf),
a blog article listing documentation for BPF and related technologies (2016)
* [The Rust programming language](https://www.rust-lang.org)

View File

@ -0,0 +1 @@
doc-valid-idents = ["eBPF", "uBPF"]

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
extern crate rbpf;
use rbpf::disassembler;
// Simply disassemble a program into human-readable instructions.
fn main() {
let prog = &[
0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x12, 0x50, 0x00, 0x00, 0x00, 0x00,
0x00, 0x79, 0x11, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf, 0x13, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x07, 0x03, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x2d, 0x23, 0x12, 0x00, 0x00,
0x00, 0x00, 0x00, 0x69, 0x12, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x02, 0x10, 0x00,
0x08, 0x00, 0x00, 0x00, 0x71, 0x12, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x02, 0x0e,
0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x11, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf,
0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0x02, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
0x15, 0x02, 0x08, 0x00, 0x99, 0x99, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x00, 0x00, 0xff,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x21, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x18, 0x02, 0x00, 0x00, 0x00,
0x00, 0x99, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x21, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
];
disassembler::disassemble(prog);
}

View File

@ -0,0 +1,3 @@
fn main() {
rbpf::helpers::show_helper();
}

View File

@ -0,0 +1,115 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
#![allow(clippy::unreadable_literal)]
extern crate elf;
use std::path::PathBuf;
extern crate rbpf;
use rbpf::helpers;
// The following example uses an ELF file that has been compiled from the C program available in
// `load_elf__block_a_port.c` in the same directory.
//
// It was compiled with the following command:
//
// ```bash
// clang -O2 -emit-llvm -c load_elf__block_a_port.c -o - | \
// llc -march=bpf -filetype=obj -o load_elf__block_a_port.o
// ```
//
// Once compiled, this program can be injected into Linux kernel, with tc for instance. Sadly, we
// need to bring some modifications to the generated bytecode in order to run it: the three
// instructions with opcode 0x61 load data from a packet area as 4-byte words, where we need to
// load it as 8-bytes double words (0x79). The kernel does the same kind of translation before
// running the program, but rbpf does not implement this.
//
// In addition, the offset at which the pointer to the packet data is stored must be changed: since
// we use 8 bytes instead of 4 for the start and end addresses of the data packet, we cannot use
// the offsets produced by clang (0x4c and 0x50), the addresses would overlap. Instead we can use,
// for example, 0x40 and 0x50.
//
// These change were applied with the following script:
//
// ```bash
// xxd load_elf__block_a_port.o | sed '
// s/6112 5000 0000 0000/7912 5000 0000 0000/ ;
// s/6111 4c00 0000 0000/7911 4000 0000 0000/ ;
// s/6111 2200 0000 0000/7911 2200 0000 0000/' | xxd -r > load_elf__block_a_port.tmp
// mv load_elf__block_a_port.tmp load_elf__block_a_port.o
// ```
//
// The eBPF program was placed into the `.classifier` ELF section (see C code above), which means
// that you can retrieve the raw bytecode with `readelf -x .classifier load_elf__block_a_port.o` or
// with `objdump -s -j .classifier load_elf__block_a_port.o`.
//
// Once the bytecode has been edited, we can load the bytecode directly from the ELF object file.
fn main() {
let filename = "examples/load_elf__block_a_port.elf";
let path = PathBuf::from(filename);
let file = match elf::File::open_path(path) {
Ok(f) => f,
Err(e) => panic!("Error: {:?}", e),
};
let text_scn = match file.get_section(".classifier") {
Some(s) => s,
None => panic!("Failed to look up .classifier section"),
};
let prog = &text_scn.data;
let packet1 = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x99, 0x99, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let packet2 = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x98, 0x76, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let mut vm = rbpf::EbpfVmFixedMbuff::new(Some(prog), 0x40, 0x50).unwrap();
vm.register_helper(helpers::BPF_TRACE_PRINTK_IDX, helpers::bpf_trace_printf)
.unwrap();
let res = vm.execute_program(packet1).unwrap();
println!("Packet #1, program returned: {res:?} ({res:#x})");
assert_eq!(res, 0xffffffff);
#[cfg(not(windows))]
{
vm.jit_compile().unwrap();
let res = unsafe { vm.execute_program_jit(packet2).unwrap() };
println!("Packet #2, program returned: {res:?} ({res:#x})");
assert_eq!(res, 0);
}
#[cfg(windows)]
{
let res = vm.execute_program(packet2).unwrap();
println!("Packet #2, program returned: {:?} ({:#x})", res, res);
assert_eq!(res, 0);
}
}

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: (APACHE-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// Block TCP packets on source or destination port 0x9999.
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#define ETH_ALEN 6
#define ETH_P_IP 0x0008 /* htons(0x0800) */
#define TCP_HDR_LEN 20
#define BLOCKED_TCP_PORT 0x9999
struct eth_hdr {
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
unsigned short h_proto;
};
#define SEC(NAME) __attribute__((section(NAME), used))
SEC(".classifier")
int handle_ingress(struct __sk_buff *skb)
{
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct eth_hdr *eth = data;
struct iphdr *iph = data + sizeof(*eth);
struct tcphdr *tcp = data + sizeof(*eth) + sizeof(*iph);
/* single length check */
if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*tcp) > data_end)
return 0;
if (eth->h_proto != ETH_P_IP)
return 0;
if (iph->protocol != IPPROTO_TCP)
return 0;
if (tcp->source == BLOCKED_TCP_PORT || tcp->dest == BLOCKED_TCP_PORT)
return -1;
return 0;
}

View File

@ -0,0 +1,126 @@
// Copyright Microsoft Corporation
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Path: examples/rbpf_plugin.rs
use std::io::Read;
// Helper function used by https://github.com/Alan-Jowett/bpf_conformance/blob/main/tests/call_unwind_fail.data
fn _unwind(a: u64, _b: u64, _c: u64, _d: u64, _e: u64) -> u64 {
a
}
// This is a plugin for the bpf_conformance test suite (https://github.com/Alan-Jowett/bpf_conformance)
// It accepts a single argument, the memory contents to pass to the VM.
// It reads the program from stdin.
fn main() {
let mut args: Vec<String> = std::env::args().collect();
#[allow(unused_mut)] // In no_std the jit variable isn't mutated.
let mut jit: bool = false;
let mut cranelift: bool = false;
let mut program_text = String::new();
let mut memory_text = String::new();
args.remove(0);
// Memory is always the first argument.
if !args.is_empty() {
memory_text.clone_from(&args[0]);
// Strip whitespace
memory_text.retain(|c| !c.is_whitespace());
args.remove(0);
}
// Process the rest of the arguments.
while !args.is_empty() {
match args[0].as_str() {
"--help" => {
println!("Usage: rbpf_plugin [memory] < program");
return;
}
"--jit" => {
#[cfg(any(windows, not(feature = "std")))]
{
println!("JIT not supported");
return;
}
#[cfg(all(not(windows), feature = "std"))]
{
jit = true;
}
}
"--cranelift" => {
cranelift = true;
#[cfg(not(feature = "cranelift"))]
{
let _ = cranelift;
println!("Cranelift is not enabled");
return;
}
}
"--program" => {
if args.len() < 2 {
println!("Missing argument to --program");
return;
}
args.remove(0);
if !args.is_empty() {
program_text.clone_from(&args[0]);
args.remove(0);
}
}
_ => panic!("Unknown argument {}", args[0]),
}
args.remove(0);
}
if program_text.is_empty() {
// Read program text from stdin
std::io::stdin().read_to_string(&mut program_text).unwrap();
}
// Strip whitespace
program_text.retain(|c| !c.is_whitespace());
// Convert program from hex to bytecode
let bytecode = hex::decode(program_text).unwrap();
// Convert memory from hex to bytes
let mut memory: Vec<u8> = hex::decode(memory_text).unwrap();
// Create rbpf vm
let mut vm = rbpf::EbpfVmRaw::new(Some(&bytecode)).unwrap();
// Register the helper function used by call_unwind_fail.data test.
vm.register_helper(5, _unwind).unwrap();
let result: u64;
if jit {
#[cfg(any(windows, not(feature = "std")))]
{
println!("JIT not supported");
return;
}
#[cfg(all(not(windows), feature = "std"))]
{
unsafe {
vm.jit_compile().unwrap();
result = vm.execute_program_jit(&mut memory).unwrap();
}
}
} else if cranelift {
#[cfg(not(feature = "cranelift"))]
{
println!("Cranelift is not enabled");
return;
}
#[cfg(feature = "cranelift")]
{
vm.cranelift_compile().unwrap();
result = vm.execute_program_cranelift(&mut memory).unwrap();
}
} else {
result = vm.execute_program(&mut memory).unwrap();
}
println!("{result:x}");
}

View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
#[macro_use]
extern crate json;
extern crate elf;
use std::path::PathBuf;
extern crate rbpf;
use rbpf::disassembler;
// Turn a program into a JSON string.
//
// Relies on `json` crate.
//
// You may copy this function and adapt it according to your needs. For instance, you may want to:
//
// * Remove the "desc" (description) attributes from the output.
// * Print integers as integers, and not as strings containing their hexadecimal representation
// (just replace the relevant `format!()` calls by the commented values.
fn to_json(prog: &[u8]) -> String {
// This call returns a high-level representation of the instructions, with the two parts of
// `LD_DW_IMM` instructions merged, and name and descriptions of the instructions.
// If you prefer to use a lower-level representation, use `ebpf::to_insn_vec()` function
// instead.
let insns = disassembler::to_insn_vec(prog);
let mut json_insns = vec![];
for insn in insns {
json_insns.push(object!(
"opc" => format!("{:#x}", insn.opc), // => insn.opc,
"dst" => format!("{:#x}", insn.dst), // => insn.dst,
"src" => format!("{:#x}", insn.src), // => insn.src,
"off" => format!("{:#x}", insn.off), // => insn.off,
// Warning: for imm we use a i64 instead of a i32 (to have correct values for
// `lddw` operation. If we print a number in the JSON this is not a problem, the
// internal i64 has the same value with extended sign on 32 most significant bytes.
// If we print the hexadecimal value as a string however, we want to cast as a i32
// to prevent all other instructions to print spurious `ffffffff` prefix if the
// number is negative. When values takes more than 32 bits with `lddw`, the cast
// has no effect and the complete value is printed anyway.
"imm" => format!("{:#x}", insn.imm as i32), // => insn.imm,
"desc" => insn.desc
));
}
json::stringify_pretty(
object!(
"size" => json_insns.len(),
"insns" => json_insns
),
4,
)
}
// Load a program from an object file, and prints it to standard output as a JSON string.
fn main() {
// Let's reuse this file from `load_elf/example`.
let filename = "examples/load_elf__block_a_port.elf";
let path = PathBuf::from(filename);
let file = match elf::File::open_path(path) {
Ok(f) => f,
Err(e) => panic!("Error: {:?}", e),
};
let text_scn = match file.get_section(".classifier") {
Some(s) => s,
None => panic!("Failed to look up .classifier section"),
};
let prog = &text_scn.data;
println!("{}", to_json(prog));
}

View File

@ -0,0 +1,78 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
extern crate rbpf;
use rbpf::helpers;
// The main objectives of this example is to show:
//
// * the use of EbpfVmNoData function,
// * and the use of a helper.
//
// The two eBPF programs are independent and are not related to one another.
fn main() {
let prog1 = &[
0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov32 r0, 0
0xb4, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // mov32 r1, 2
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // add32 r0, 1
0x0c, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // add32 r0, r1
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit and return r0
];
// We use helper `bpf_time_getns()`, which is similar to helper `bpf_ktime_getns()` from Linux
// kernel. Hence rbpf::helpers module provides the index of this in-kernel helper as a
// constant, so that we can remain compatible with programs for the kernel. Here we also cast
// it to a u8 so as to use it directly in program instructions.
let hkey = helpers::BPF_KTIME_GETNS_IDX as u8;
let prog2 = &[
0xb7, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0x85, 0x00, 0x00, 0x00, hkey, 0x00, 0x00, 0x00, // call helper <hkey>
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit and return r0
];
// Create a VM: this one takes no data. Load prog1 in it.
let mut vm = rbpf::EbpfVmNoData::new(Some(prog1)).unwrap();
// Execute prog1.
assert_eq!(vm.execute_program().unwrap(), 0x3);
// As struct EbpfVmNoData does not takes any memory area, its return value is mostly
// deterministic. So we know prog1 will always return 3. There is an exception: when it uses
// helpers, the latter may have non-deterministic values, and all calls may not return the same
// value.
//
// In the following example we use a helper to get the elapsed time since boot time: we
// reimplement uptime in eBPF, in Rust. Because why not.
vm.set_program(prog2).unwrap();
vm.register_helper(helpers::BPF_KTIME_GETNS_IDX, helpers::bpf_time_getns)
.unwrap();
let time;
#[cfg(all(not(windows), feature = "std"))]
{
vm.jit_compile().unwrap();
time = unsafe { vm.execute_program_jit().unwrap() };
}
#[cfg(any(windows, not(feature = "std")))]
{
time = vm.execute_program().unwrap();
}
let days = time / 10u64.pow(9) / 60 / 60 / 24;
let hours = (time / 10u64.pow(9) / 60 / 60) % 24;
let minutes = (time / 10u64.pow(9) / 60) % 60;
let seconds = (time / 10u64.pow(9)) % 60;
let nanosec = time % 10u64.pow(9);
println!(
"Uptime: {:#x} ns == {} days {:02}:{:02}:{:02}, {} ns",
time, days, hours, minutes, seconds, nanosec
);
}

View File

@ -0,0 +1,72 @@
echo on
SetLocal EnableDelayedExpansion
REM This is the recommended way to choose the toolchain version, according to
REM Appveyor's documentation.
SET PATH=C:\Program Files (x86)\MSBuild\%TOOLCHAIN_VERSION%\Bin;%PATH%
set VCVARSALL="C:\Program Files (x86)\Microsoft Visual Studio %TOOLCHAIN_VERSION%\VC\vcvarsall.bat"
if [%Platform%] NEQ [x64] goto win32
set TARGET_ARCH=x86_64
set TARGET_PROGRAM_FILES=%ProgramFiles%
call %VCVARSALL% amd64
if %ERRORLEVEL% NEQ 0 exit 1
goto download
:win32
echo on
if [%Platform%] NEQ [Win32] exit 1
set TARGET_ARCH=i686
set TARGET_PROGRAM_FILES=%ProgramFiles(x86)%
call %VCVARSALL% amd64_x86
if %ERRORLEVEL% NEQ 0 exit 1
goto download
:download
REM vcvarsall turns echo off
echo on
mkdir windows_build_tools
mkdir windows_build_tools\
echo Downloading Yasm...
powershell -Command "(New-Object Net.WebClient).DownloadFile('http://www.tortall.net/projects/yasm/releases/yasm-1.3.0-win64.exe', 'windows_build_tools\yasm.exe')"
if %ERRORLEVEL% NEQ 0 (
echo ...downloading Yasm failed.
exit 1
)
set RUST_URL=https://static.rust-lang.org/dist/rust-%RUST%-%TARGET_ARCH%-pc-windows-msvc.msi
echo Downloading %RUST_URL%...
mkdir build
powershell -Command "(New-Object Net.WebClient).DownloadFile('%RUST_URL%', 'build\rust-%RUST%-%TARGET_ARCH%-pc-windows-msvc.msi')"
if %ERRORLEVEL% NEQ 0 (
echo ...downloading Rust failed.
exit 1
)
start /wait msiexec /i build\rust-%RUST%-%TARGET_ARCH%-pc-windows-msvc.msi INSTALLDIR="%TARGET_PROGRAM_FILES%\Rust %RUST%" /quiet /qn /norestart
if %ERRORLEVEL% NEQ 0 exit 1
set PATH="%TARGET_PROGRAM_FILES%\Rust %RUST%\bin";%cd%\windows_build_tools;%PATH%
if [%Configuration%] == [Release] set CARGO_MODE=--release
set
link /?
cl /?
rustc --version
cargo --version
cargo test --all-features -vv %CARGO_MODE%
if %ERRORLEVEL% NEQ 0 exit 1
REM Verify that `cargo build`, independent from `cargo test`, works; i.e.
REM verify that non-test builds aren't trying to use test-only features.
cargo build -vv %CARGO_MODE%
if %ERRORLEVEL% NEQ 0 exit 1
REM Verify that we can build with all features
cargo build --all-features -vv %CARGO_MODE%
if %ERRORLEVEL% NEQ 0 exit 1

View File

@ -0,0 +1,3 @@
group_imports="StdExternalCrate"
reorder_imports=true
imports_granularity="Crate"

View File

@ -0,0 +1,642 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 Rich Lane <lanerl@gmail.com>
// Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
// file of the crate. Do not expect to find those comments in the documentation of the crate.
//! This module parses eBPF assembly language source code.
use alloc::{
string::{String, ToString},
vec::Vec,
};
#[cfg(feature = "std")]
use combine::EasyParser;
use combine::{
attempt, between, eof, many, many1, one_of, optional,
parser::char::{alpha_num, char, digit, hex_digit, spaces, string},
sep_by,
stream::position::{self},
ParseError, Parser, Stream,
};
/// Operand of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Operand {
/// Register number.
Register(i64),
/// Jump offset or immediate.
Integer(i64),
/// Register number and offset.
Memory(i64, i64),
/// Used for pattern matching.
Nil,
}
/// Parsed instruction.
#[derive(Debug, PartialEq, Eq)]
pub struct Instruction {
/// Instruction name.
pub name: String,
/// Operands.
pub operands: Vec<Operand>,
}
fn ident<I>() -> impl Parser<I, Output = String>
where
I: Stream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
{
many1(alpha_num())
}
fn integer<I>() -> impl Parser<I, Output = i64>
where
I: Stream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
{
let sign = optional(one_of("-+".chars())).map(|x| match x {
Some('-') => -1,
_ => 1,
});
let hex = string("0x")
.with(many1(hex_digit()))
.map(|x: String| u64::from_str_radix(&x, 16).unwrap() as i64);
let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap());
(sign, attempt(hex).or(dec)).map(|(s, x)| s * x)
}
fn register<I>() -> impl Parser<I, Output = i64>
where
I: Stream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
{
char('r')
.with(many1(digit()))
.map(|x: String| x.parse::<i64>().unwrap())
}
fn operand<I>() -> impl Parser<I, Output = Operand>
where
I: Stream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
{
let register_operand = register().map(Operand::Register);
let immediate = integer().map(Operand::Integer);
let memory = between(char('['), char(']'), (register(), optional(integer())))
.map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
register_operand.or(immediate).or(memory)
}
fn instruction<I>() -> impl Parser<I, Output = Instruction>
where
I: Stream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
{
let operands = sep_by(operand(), char(',').skip(spaces()));
(ident().skip(spaces()), operands, spaces()).map(|t| Instruction {
name: t.0,
operands: t.1,
})
}
/// Parse a string into a list of instructions.
///
/// The instructions are not validated and may have invalid names and operand types.
pub fn parse(input: &str) -> Result<Vec<Instruction>, String> {
let mut with = spaces().with(many(instruction()).skip(eof()));
#[cfg(feature = "std")]
{
match with.easy_parse(position::Stream::new(input)) {
Ok((insts, _)) => Ok(insts),
Err(err) => Err(err.to_string()),
}
}
#[cfg(not(feature = "std"))]
{
match with.parse(position::Stream::new(input)) {
Ok((insts, _)) => Ok(insts),
Err(err) => Err(err.to_string()),
}
}
}
#[cfg(test)]
mod tests {
use alloc::{string::ToString, vec};
use combine::Parser;
use super::{ident, instruction, integer, operand, parse, register, Instruction, Operand};
// Unit tests for the different kinds of parsers.
#[test]
fn test_ident() {
assert_eq!(ident().parse("nop"), Ok(("nop".to_string(), "")));
assert_eq!(ident().parse("add32"), Ok(("add32".to_string(), "")));
assert_eq!(ident().parse("add32*"), Ok(("add32".to_string(), "*")));
}
#[test]
fn test_integer() {
assert_eq!(integer().parse("0"), Ok((0, "")));
assert_eq!(integer().parse("42"), Ok((42, "")));
assert_eq!(integer().parse("+42"), Ok((42, "")));
assert_eq!(integer().parse("-42"), Ok((-42, "")));
assert_eq!(integer().parse("0x0"), Ok((0, "")));
assert_eq!(
integer().parse("0x123456789abcdef0"),
Ok((0x123456789abcdef0, ""))
);
assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
}
#[test]
fn test_register() {
assert_eq!(register().parse("r0"), Ok((0, "")));
assert_eq!(register().parse("r15"), Ok((15, "")));
}
#[test]
fn test_operand() {
assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
assert_eq!(
operand().parse("[r3+0x1f]"),
Ok((Operand::Memory(3, 31), ""))
);
assert_eq!(
operand().parse("[r3-0x1f]"),
Ok((Operand::Memory(3, -31), ""))
);
}
#[test]
fn test_instruction() {
assert_eq!(
instruction().parse("exit"),
Ok((
Instruction {
name: "exit".to_string(),
operands: vec![],
},
""
))
);
assert_eq!(
instruction().parse("call 2"),
Ok((
Instruction {
name: "call".to_string(),
operands: vec![Operand::Integer(2)],
},
""
))
);
assert_eq!(
instruction().parse("addi r1, 2"),
Ok((
Instruction {
name: "addi".to_string(),
operands: vec![Operand::Register(1), Operand::Integer(2)],
},
""
))
);
assert_eq!(
instruction().parse("ldxb r2, [r1+12]"),
Ok((
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
},
""
))
);
assert_eq!(
instruction().parse("lsh r3, 0x8"),
Ok((
Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(8)],
},
""
))
);
assert_eq!(
instruction().parse("jne r3, 0x8, +37"),
Ok((
Instruction {
name: "jne".to_string(),
operands: vec![
Operand::Register(3),
Operand::Integer(8),
Operand::Integer(37)
],
},
""
))
);
// Whitespace between operands is optional.
assert_eq!(
instruction().parse("jne r3,0x8,+37"),
Ok((
Instruction {
name: "jne".to_string(),
operands: vec![
Operand::Register(3),
Operand::Integer(8),
Operand::Integer(37)
],
},
""
))
);
}
// Other unit tests: try to parse various set of instructions.
#[test]
fn test_empty() {
assert_eq!(parse(""), Ok(vec![]));
}
#[test]
fn test_exit() {
// No operands.
assert_eq!(
parse("exit"),
Ok(vec![Instruction {
name: "exit".to_string(),
operands: vec![],
}])
);
}
#[test]
fn test_lsh() {
// Register and immediate operands.
assert_eq!(
parse("lsh r3, 0x20"),
Ok(vec![Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(0x20)],
}])
);
}
#[test]
fn test_ja() {
// Jump offset operand.
assert_eq!(
parse("ja +1"),
Ok(vec![Instruction {
name: "ja".to_string(),
operands: vec![Operand::Integer(1)],
}])
);
}
#[test]
fn test_ldxh() {
// Register and memory operands.
assert_eq!(
parse("ldxh r4, [r1+12]"),
Ok(vec![Instruction {
name: "ldxh".to_string(),
operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
}])
);
}
#[test]
fn test_tcp_sack() {
// Sample program from ubpf.
// We could technically indent the instructions since the parser support white spaces at
// the beginning, but there is another test for that.
let src = "\
ldxb r2, [r1+12]
ldxb r3, [r1+13]
lsh r3, 0x8
or r3, r2
mov r0, 0x0
jne r3, 0x8, +37
ldxb r2, [r1+23]
jne r2, 0x6, +35
ldxb r2, [r1+14]
add r1, 0xe
and r2, 0xf
lsh r2, 0x2
add r1, r2
mov r0, 0x0
ldxh r4, [r1+12]
add r1, 0x14
rsh r4, 0x2
and r4, 0x3c
mov r2, r4
add r2, 0xffffffec
mov r5, 0x15
mov r3, 0x0
jgt r5, r4, +20
mov r5, r3
lsh r5, 0x20
arsh r5, 0x20
mov r4, r1
add r4, r5
ldxb r5, [r4]
jeq r5, 0x1, +4
jeq r5, 0x0, +12
mov r6, r3
jeq r5, 0x5, +9
ja +2
add r3, 0x1
mov r6, r3
ldxb r3, [r4+1]
add r3, r6
lsh r3, 0x20
arsh r3, 0x20
jsgt r2, r3, -18
ja +1
mov r0, 0x1
exit
";
assert_eq!(
parse(src),
Ok(vec![
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
},
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
},
Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(8)],
},
Instruction {
name: "or".to_string(),
operands: vec![Operand::Register(3), Operand::Register(2)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(0), Operand::Integer(0)],
},
Instruction {
name: "jne".to_string(),
operands: vec![
Operand::Register(3),
Operand::Integer(8),
Operand::Integer(37)
],
},
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
},
Instruction {
name: "jne".to_string(),
operands: vec![
Operand::Register(2),
Operand::Integer(6),
Operand::Integer(35)
],
},
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(1), Operand::Integer(14)],
},
Instruction {
name: "and".to_string(),
operands: vec![Operand::Register(2), Operand::Integer(15)],
},
Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(2), Operand::Integer(2)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(1), Operand::Register(2)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(0), Operand::Integer(0)],
},
Instruction {
name: "ldxh".to_string(),
operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(1), Operand::Integer(20)],
},
Instruction {
name: "rsh".to_string(),
operands: vec![Operand::Register(4), Operand::Integer(2)],
},
Instruction {
name: "and".to_string(),
operands: vec![Operand::Register(4), Operand::Integer(60)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(2), Operand::Register(4)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(5), Operand::Integer(21)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(0)],
},
Instruction {
name: "jgt".to_string(),
operands: vec![
Operand::Register(5),
Operand::Register(4),
Operand::Integer(20)
],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(5), Operand::Register(3)],
},
Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(5), Operand::Integer(32)],
},
Instruction {
name: "arsh".to_string(),
operands: vec![Operand::Register(5), Operand::Integer(32)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(4), Operand::Register(1)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(4), Operand::Register(5)],
},
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
},
Instruction {
name: "jeq".to_string(),
operands: vec![
Operand::Register(5),
Operand::Integer(1),
Operand::Integer(4)
],
},
Instruction {
name: "jeq".to_string(),
operands: vec![
Operand::Register(5),
Operand::Integer(0),
Operand::Integer(12)
],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(6), Operand::Register(3)],
},
Instruction {
name: "jeq".to_string(),
operands: vec![
Operand::Register(5),
Operand::Integer(5),
Operand::Integer(9)
],
},
Instruction {
name: "ja".to_string(),
operands: vec![Operand::Integer(2)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(1)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(6), Operand::Register(3)],
},
Instruction {
name: "ldxb".to_string(),
operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
},
Instruction {
name: "add".to_string(),
operands: vec![Operand::Register(3), Operand::Register(6)],
},
Instruction {
name: "lsh".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(32)],
},
Instruction {
name: "arsh".to_string(),
operands: vec![Operand::Register(3), Operand::Integer(32)],
},
Instruction {
name: "jsgt".to_string(),
operands: vec![
Operand::Register(2),
Operand::Register(3),
Operand::Integer(-18)
],
},
Instruction {
name: "ja".to_string(),
operands: vec![Operand::Integer(1)],
},
Instruction {
name: "mov".to_string(),
operands: vec![Operand::Register(0), Operand::Integer(1)],
},
Instruction {
name: "exit".to_string(),
operands: vec![],
}
])
);
}
/// When running without `std` the `EasyParser` provided by `combine`
/// cannot be used. Because of this we need to use the `Parser` and the
/// error messages are different.
#[test]
fn test_error_eof() {
let expected_error;
#[cfg(feature = "std")]
{
expected_error = Err(
"Parse error at line: 1, column: 6\nUnexpected end of input\nExpected digit\n"
.to_string(),
);
}
#[cfg(not(feature = "std"))]
{
expected_error = Err("unexpected parse".to_string());
}
// Unexpected end of input in a register name.
assert_eq!(parse("lsh r"), expected_error);
}
/// When running without `std` the `EasyParser` provided by `combine`
/// cannot be used. Because of this we need to use the `Parser` and the
/// error messages are different.
#[test]
fn test_error_unexpected_character() {
let expected_error;
#[cfg(feature = "std")]
{
expected_error = Err(
"Parse error at line: 2, column: 1\nUnexpected `^`\nExpected letter or digit, whitespaces, `r`, `-`, `+`, `[` or end of input\n".to_string()
);
}
#[cfg(not(feature = "std"))]
{
expected_error = Err("unexpected parse".to_string());
}
// Unexpected character at end of input.
assert_eq!(parse("exit\n^"), expected_error);
}
#[test]
fn test_initial_whitespace() {
assert_eq!(
parse(
"
exit"
),
Ok(vec![Instruction {
name: "exit".to_string(),
operands: vec![],
}])
);
}
}

View File

@ -0,0 +1,277 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 Rich Lane <lanerl@gmail.com>
//! This module translates eBPF assembly language to binary.
use alloc::{
collections::BTreeMap,
format,
string::{String, ToString},
vec,
vec::Vec,
};
use self::InstructionType::{
AluBinary, AluUnary, Call, Endian, JumpConditional, JumpUnconditional, LoadAbs, LoadImm,
LoadInd, LoadReg, NoOperand, StoreImm, StoreReg,
};
use crate::{
asm_parser::{
parse, Instruction, Operand,
Operand::{Integer, Memory, Nil, Register},
},
ebpf::{self, Insn},
};
#[derive(Clone, Copy, Debug, PartialEq)]
enum InstructionType {
AluBinary,
AluUnary,
LoadImm,
LoadAbs,
LoadInd,
LoadReg,
StoreImm,
StoreReg,
JumpUnconditional,
JumpConditional,
Call,
Endian(i64),
NoOperand,
}
fn make_instruction_map() -> BTreeMap<String, (InstructionType, u8)> {
let mut result = BTreeMap::new();
let alu_binary_ops = [
("add", ebpf::BPF_ADD),
("sub", ebpf::BPF_SUB),
("mul", ebpf::BPF_MUL),
("div", ebpf::BPF_DIV),
("or", ebpf::BPF_OR),
("and", ebpf::BPF_AND),
("lsh", ebpf::BPF_LSH),
("rsh", ebpf::BPF_RSH),
("mod", ebpf::BPF_MOD),
("xor", ebpf::BPF_XOR),
("mov", ebpf::BPF_MOV),
("arsh", ebpf::BPF_ARSH),
];
let mem_sizes = [
("w", ebpf::BPF_W),
("h", ebpf::BPF_H),
("b", ebpf::BPF_B),
("dw", ebpf::BPF_DW),
];
let jump_conditions = [
("jeq", ebpf::BPF_JEQ),
("jgt", ebpf::BPF_JGT),
("jge", ebpf::BPF_JGE),
("jlt", ebpf::BPF_JLT),
("jle", ebpf::BPF_JLE),
("jset", ebpf::BPF_JSET),
("jne", ebpf::BPF_JNE),
("jsgt", ebpf::BPF_JSGT),
("jsge", ebpf::BPF_JSGE),
("jslt", ebpf::BPF_JSLT),
("jsle", ebpf::BPF_JSLE),
];
{
let mut entry = |name: &str, inst_type: InstructionType, opc: u8| {
result.insert(name.to_string(), (inst_type, opc))
};
// Miscellaneous.
entry("exit", NoOperand, ebpf::EXIT);
entry("ja", JumpUnconditional, ebpf::JA);
entry("call", Call, ebpf::CALL);
entry("lddw", LoadImm, ebpf::LD_DW_IMM);
// AluUnary.
entry("neg", AluUnary, ebpf::NEG64);
entry("neg32", AluUnary, ebpf::NEG32);
entry("neg64", AluUnary, ebpf::NEG64);
// AluBinary.
for &(name, opc) in &alu_binary_ops {
entry(name, AluBinary, ebpf::BPF_ALU64 | opc);
entry(&format!("{name}32"), AluBinary, ebpf::BPF_ALU | opc);
entry(&format!("{name}64"), AluBinary, ebpf::BPF_ALU64 | opc);
}
// LoadAbs, LoadInd, LoadReg, StoreImm, and StoreReg.
for &(suffix, size) in &mem_sizes {
entry(
&format!("ldabs{suffix}"),
LoadAbs,
ebpf::BPF_ABS | ebpf::BPF_LD | size,
);
entry(
&format!("ldind{suffix}"),
LoadInd,
ebpf::BPF_IND | ebpf::BPF_LD | size,
);
entry(
&format!("ldx{suffix}"),
LoadReg,
ebpf::BPF_MEM | ebpf::BPF_LDX | size,
);
entry(
&format!("st{suffix}"),
StoreImm,
ebpf::BPF_MEM | ebpf::BPF_ST | size,
);
entry(
&format!("stx{suffix}"),
StoreReg,
ebpf::BPF_MEM | ebpf::BPF_STX | size,
);
}
// JumpConditional.
for &(name, condition) in &jump_conditions {
entry(name, JumpConditional, ebpf::BPF_JMP | condition);
entry(
&format!("{name}32"),
JumpConditional,
ebpf::BPF_JMP32 | condition,
);
}
// Endian.
for &size in &[16, 32, 64] {
entry(&format!("be{size}"), Endian(size), ebpf::BE);
entry(&format!("le{size}"), Endian(size), ebpf::LE);
}
}
result
}
fn insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String> {
if !(0..16).contains(&dst) {
return Err(format!("Invalid destination register {dst}"));
}
if dst < 0 || src >= 16 {
return Err(format!("Invalid source register {src}"));
}
if !(-32768..32768).contains(&off) {
return Err(format!("Invalid offset {off}"));
}
if !(-2147483648..2147483648).contains(&imm) {
return Err(format!("Invalid immediate {imm}"));
}
Ok(Insn {
opc,
dst: dst as u8,
src: src as u8,
off: off as i16,
imm: imm as i32,
})
}
// TODO Use slice patterns when available and remove this function.
fn operands_tuple(operands: &[Operand]) -> Result<(Operand, Operand, Operand), String> {
match operands.len() {
0 => Ok((Nil, Nil, Nil)),
1 => Ok((operands[0], Nil, Nil)),
2 => Ok((operands[0], operands[1], Nil)),
3 => Ok((operands[0], operands[1], operands[2])),
_ => Err("Too many operands".to_string()),
}
}
fn encode(inst_type: InstructionType, opc: u8, operands: &[Operand]) -> Result<Insn, String> {
let (a, b, c) = (operands_tuple(operands))?;
match (inst_type, a, b, c) {
(AluBinary, Register(dst), Register(src), Nil) => insn(opc | ebpf::BPF_X, dst, src, 0, 0),
(AluBinary, Register(dst), Integer(imm), Nil) => insn(opc | ebpf::BPF_K, dst, 0, 0, imm),
(AluUnary, Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, 0),
(LoadAbs, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
(LoadInd, Register(src), Integer(imm), Nil) => insn(opc, 0, src, 0, imm),
(LoadReg, Register(dst), Memory(src, off), Nil)
| (StoreReg, Memory(dst, off), Register(src), Nil) => insn(opc, dst, src, off, 0),
(StoreImm, Memory(dst, off), Integer(imm), Nil) => insn(opc, dst, 0, off, imm),
(NoOperand, Nil, Nil, Nil) => insn(opc, 0, 0, 0, 0),
(JumpUnconditional, Integer(off), Nil, Nil) => insn(opc, 0, 0, off, 0),
(JumpConditional, Register(dst), Register(src), Integer(off)) => {
insn(opc | ebpf::BPF_X, dst, src, off, 0)
}
(JumpConditional, Register(dst), Integer(imm), Integer(off)) => {
insn(opc | ebpf::BPF_K, dst, 0, off, imm)
}
(Call, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
(Endian(size), Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, size),
(LoadImm, Register(dst), Integer(imm), Nil) => insn(opc, dst, 0, 0, (imm << 32) >> 32),
_ => Err(format!("Unexpected operands: {operands:?}")),
}
}
fn assemble_internal(parsed: &[Instruction]) -> Result<Vec<Insn>, String> {
let instruction_map = make_instruction_map();
let mut result: Vec<Insn> = vec![];
for instruction in parsed {
let name = instruction.name.as_str();
match instruction_map.get(name) {
Some(&(inst_type, opc)) => {
match encode(inst_type, opc, &instruction.operands) {
Ok(insn) => result.push(insn),
Err(msg) => return Err(format!("Failed to encode {name}: {msg}")),
}
// Special case for lddw.
if let LoadImm = inst_type {
if let Integer(imm) = instruction.operands[1] {
result.push(insn(0, 0, 0, 0, imm >> 32).unwrap());
}
}
}
None => return Err(format!("Invalid instruction {name:?}")),
}
}
Ok(result)
}
/// Parse assembly source and translate to binary.
///
/// # Examples
///
/// ```
/// use rbpf::assembler::assemble;
/// let prog = assemble("add64 r1, 0x605
/// mov64 r2, 0x32
/// mov64 r1, r0
/// be16 r0
/// neg64 r2
/// exit");
/// println!("{:?}", prog);
/// # assert_eq!(prog,
/// # Ok(vec![0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
/// # 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
/// # 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// # 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
/// # 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// # 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]));
/// ```
///
/// This will produce the following output:
///
/// ```test
/// Ok([0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
/// 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
/// 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
/// 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
/// ```
pub fn assemble(src: &str) -> Result<Vec<u8>, String> {
let parsed = (parse(src))?;
let insns = (assemble_internal(&parsed))?;
let mut result: Vec<u8> = vec![];
for insn in insns {
result.extend_from_slice(&insn.to_array());
}
Ok(result)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,807 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
//! Functions in this module are used to handle eBPF programs with a higher level representation,
//! for example to disassemble the code into a human-readable format.
use alloc::{
format,
string::{String, ToString},
vec,
vec::Vec,
};
use log::warn;
use crate::ebpf;
#[inline]
fn alu_imm_str(name: &str, insn: &ebpf::Insn) -> String {
format!("{name} r{}, {:#x}", insn.dst, insn.imm)
}
#[inline]
fn alu_reg_str(name: &str, insn: &ebpf::Insn) -> String {
format!("{name} r{}, r{}", insn.dst, insn.src)
}
#[inline]
fn byteswap_str(name: &str, insn: &ebpf::Insn) -> String {
match insn.imm {
16 | 32 | 64 => {}
_ => warn!("[Disassembler] Warning: Invalid offset value for {name} insn"),
}
format!("{name}{} r{}", insn.imm, insn.dst)
}
#[inline]
fn ld_st_imm_str(name: &str, insn: &ebpf::Insn) -> String {
if insn.off >= 0 {
format!("{name} [r{}+{:#x}], {:#x}", insn.dst, insn.off, insn.imm)
} else {
format!(
"{name} [r{}-{:#x}], {:#x}",
insn.dst,
-(insn.off as isize),
insn.imm
)
}
}
#[inline]
fn ld_reg_str(name: &str, insn: &ebpf::Insn) -> String {
if insn.off >= 0 {
format!("{name} r{}, [r{}+{:#x}]", insn.dst, insn.src, insn.off)
} else {
format!(
"{name} r{}, [r{}-{:#x}]",
insn.dst,
insn.src,
-(insn.off as isize)
)
}
}
#[inline]
fn st_reg_str(name: &str, insn: &ebpf::Insn) -> String {
if insn.off >= 0 {
format!("{name} [r{}+{:#x}], r{}", insn.dst, insn.off, insn.src)
} else {
format!(
"{name} [r{}-{:#x}], r{}",
insn.dst,
-(insn.off as isize),
insn.src
)
}
}
#[inline]
fn ldabs_str(name: &str, insn: &ebpf::Insn) -> String {
format!("{name} {:#x}", insn.imm)
}
#[inline]
fn ldind_str(name: &str, insn: &ebpf::Insn) -> String {
format!("{name} r{}, {:#x}", insn.src, insn.imm)
}
#[inline]
fn jmp_imm_str(name: &str, insn: &ebpf::Insn) -> String {
if insn.off >= 0 {
format!("{name} r{}, {:#x}, +{:#x}", insn.dst, insn.imm, insn.off)
} else {
format!(
"{name} r{}, {:#x}, -{:#x}",
insn.dst,
insn.imm,
-(insn.off as isize)
)
}
}
#[inline]
fn jmp_reg_str(name: &str, insn: &ebpf::Insn) -> String {
if insn.off >= 0 {
format!("{name} r{}, r{}, +{:#x}", insn.dst, insn.src, insn.off)
} else {
format!(
"{name} r{}, r{}, -{:#x}",
insn.dst,
insn.src,
-(insn.off as isize)
)
}
}
/// High-level representation of an eBPF instruction.
///
/// In addition to standard operation code and various operand, this struct has the following
/// properties:
///
/// * It stores a name, corresponding to a mnemonic for the operation code.
/// * It also stores a description, which is a mnemonic for the full instruction, using the actual
/// values of the relevant operands, and that can be used for disassembling the eBPF program for
/// example.
/// * Immediate values are stored in an `i64` instead of a traditional i32, in order to merge the
/// two parts of (otherwise double-length) `LD_DW_IMM` instructions.
///
/// See <https://www.kernel.org/doc/Documentation/networking/filter.txt> for the Linux kernel
/// documentation about eBPF, or <https://github.com/iovisor/bpf-docs/blob/master/eBPF.md> for a
/// more concise version.
#[derive(Debug, PartialEq, Eq)]
pub struct HLInsn {
/// Operation code.
pub opc: u8,
/// Name (mnemonic). This name is not canon.
pub name: String,
/// Description of the instruction. This is not canon.
pub desc: String,
/// Destination register operand.
pub dst: u8,
/// Source register operand.
pub src: u8,
/// Offset operand.
pub off: i16,
/// Immediate value operand. For `LD_DW_IMM` instructions, contains the whole value merged from
/// the two 8-bytes parts of the instruction.
pub imm: i64,
}
/// Return a vector of `struct HLInsn` built from an eBPF program.
///
/// This is made public to provide a way to manipulate a program as a vector of instructions, in a
/// high-level format, for example for dumping the program instruction after instruction with a
/// custom format.
///
/// Note that the two parts of `LD_DW_IMM` instructions (that have the size of two standard
/// instructions) are considered as making a single immediate value. As a consequence, the number
/// of instructions stored in the vector may not be equal to the size in bytes of the program
/// divided by the length of an instructions.
///
/// To do so, the immediate value operand is stored as an `i64` instead as an i32, so be careful
/// when you use it (see example `examples/to_json.rs`).
///
/// This is to oppose to `ebpf::to_insn_vec()` function, that treats instructions on a low-level
/// ground and do not merge the parts of `LD_DW_IMM`. Also, the version in `ebpf` module does not
/// use names or descriptions when storing the instructions.
///
/// # Examples
///
/// ```
/// use rbpf::disassembler;
///
/// let prog = &[
/// 0x18, 0x00, 0x00, 0x00, 0x88, 0x77, 0x66, 0x55,
/// 0x00, 0x00, 0x00, 0x00, 0x44, 0x33, 0x22, 0x11,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
/// ];
///
/// let v = disassembler::to_insn_vec(prog);
/// assert_eq!(v, vec![
/// disassembler::HLInsn {
/// opc: 0x18,
/// name: "lddw".to_string(),
/// desc: "lddw r0, 0x1122334455667788".to_string(),
/// dst: 0,
/// src: 0,
/// off: 0,
/// imm: 0x1122334455667788
/// },
/// disassembler::HLInsn {
/// opc: 0x95,
/// name: "exit".to_string(),
/// desc: "exit".to_string(),
/// dst: 0,
/// src: 0,
/// off: 0,
/// imm: 0
/// },
/// ]);
/// ```
pub fn to_insn_vec(prog: &[u8]) -> Vec<HLInsn> {
if prog.len() % ebpf::INSN_SIZE != 0 {
panic!(
"[Disassembler] Error: eBPF program length must be a multiple of {:?} octets",
ebpf::INSN_SIZE
);
}
if prog.is_empty() {
return vec![];
}
let mut res = vec![];
let mut insn_ptr: usize = 0;
while insn_ptr * ebpf::INSN_SIZE < prog.len() {
let insn = ebpf::get_insn(prog, insn_ptr);
let name;
let desc;
let mut imm = insn.imm as i64;
match insn.opc {
// BPF_LD class
ebpf::LD_ABS_B => {
name = "ldabsb";
desc = ldabs_str(name, &insn);
}
ebpf::LD_ABS_H => {
name = "ldabsh";
desc = ldabs_str(name, &insn);
}
ebpf::LD_ABS_W => {
name = "ldabsw";
desc = ldabs_str(name, &insn);
}
ebpf::LD_ABS_DW => {
name = "ldabsdw";
desc = ldabs_str(name, &insn);
}
ebpf::LD_IND_B => {
name = "ldindb";
desc = ldind_str(name, &insn);
}
ebpf::LD_IND_H => {
name = "ldindh";
desc = ldind_str(name, &insn);
}
ebpf::LD_IND_W => {
name = "ldindw";
desc = ldind_str(name, &insn);
}
ebpf::LD_IND_DW => {
name = "ldinddw";
desc = ldind_str(name, &insn);
}
ebpf::LD_DW_IMM => {
insn_ptr += 1;
let next_insn = ebpf::get_insn(prog, insn_ptr);
imm = ((insn.imm as u32) as u64 + ((next_insn.imm as u64) << 32)) as i64;
name = "lddw";
desc = format!("{name} r{:}, {imm:#x}", insn.dst);
}
// BPF_LDX class
ebpf::LD_B_REG => {
name = "ldxb";
desc = ld_reg_str(name, &insn);
}
ebpf::LD_H_REG => {
name = "ldxh";
desc = ld_reg_str(name, &insn);
}
ebpf::LD_W_REG => {
name = "ldxw";
desc = ld_reg_str(name, &insn);
}
ebpf::LD_DW_REG => {
name = "ldxdw";
desc = ld_reg_str(name, &insn);
}
// BPF_ST class
ebpf::ST_B_IMM => {
name = "stb";
desc = ld_st_imm_str(name, &insn);
}
ebpf::ST_H_IMM => {
name = "sth";
desc = ld_st_imm_str(name, &insn);
}
ebpf::ST_W_IMM => {
name = "stw";
desc = ld_st_imm_str(name, &insn);
}
ebpf::ST_DW_IMM => {
name = "stdw";
desc = ld_st_imm_str(name, &insn);
}
// BPF_STX class
ebpf::ST_B_REG => {
name = "stxb";
desc = st_reg_str(name, &insn);
}
ebpf::ST_H_REG => {
name = "stxh";
desc = st_reg_str(name, &insn);
}
ebpf::ST_W_REG => {
name = "stxw";
desc = st_reg_str(name, &insn);
}
ebpf::ST_DW_REG => {
name = "stxdw";
desc = st_reg_str(name, &insn);
}
ebpf::ST_W_XADD => {
name = "stxxaddw";
desc = st_reg_str(name, &insn);
}
ebpf::ST_DW_XADD => {
name = "stxxadddw";
desc = st_reg_str(name, &insn);
}
// BPF_ALU class
ebpf::ADD32_IMM => {
name = "add32";
desc = alu_imm_str(name, &insn);
}
ebpf::ADD32_REG => {
name = "add32";
desc = alu_reg_str(name, &insn);
}
ebpf::SUB32_IMM => {
name = "sub32";
desc = alu_imm_str(name, &insn);
}
ebpf::SUB32_REG => {
name = "sub32";
desc = alu_reg_str(name, &insn);
}
ebpf::MUL32_IMM => {
name = "mul32";
desc = alu_imm_str(name, &insn);
}
ebpf::MUL32_REG => {
name = "mul32";
desc = alu_reg_str(name, &insn);
}
ebpf::DIV32_IMM => {
name = "div32";
desc = alu_imm_str(name, &insn);
}
ebpf::DIV32_REG => {
name = "div32";
desc = alu_reg_str(name, &insn);
}
ebpf::OR32_IMM => {
name = "or32";
desc = alu_imm_str(name, &insn);
}
ebpf::OR32_REG => {
name = "or32";
desc = alu_reg_str(name, &insn);
}
ebpf::AND32_IMM => {
name = "and32";
desc = alu_imm_str(name, &insn);
}
ebpf::AND32_REG => {
name = "and32";
desc = alu_reg_str(name, &insn);
}
ebpf::LSH32_IMM => {
name = "lsh32";
desc = alu_imm_str(name, &insn);
}
ebpf::LSH32_REG => {
name = "lsh32";
desc = alu_reg_str(name, &insn);
}
ebpf::RSH32_IMM => {
name = "rsh32";
desc = alu_imm_str(name, &insn);
}
ebpf::RSH32_REG => {
name = "rsh32";
desc = alu_reg_str(name, &insn);
}
ebpf::NEG32 => {
name = "neg32";
desc = format!("{name} r{:}", insn.dst);
}
ebpf::MOD32_IMM => {
name = "mod32";
desc = alu_imm_str(name, &insn);
}
ebpf::MOD32_REG => {
name = "mod32";
desc = alu_reg_str(name, &insn);
}
ebpf::XOR32_IMM => {
name = "xor32";
desc = alu_imm_str(name, &insn);
}
ebpf::XOR32_REG => {
name = "xor32";
desc = alu_reg_str(name, &insn);
}
ebpf::MOV32_IMM => {
name = "mov32";
desc = alu_imm_str(name, &insn);
}
ebpf::MOV32_REG => {
name = "mov32";
desc = alu_reg_str(name, &insn);
}
ebpf::ARSH32_IMM => {
name = "arsh32";
desc = alu_imm_str(name, &insn);
}
ebpf::ARSH32_REG => {
name = "arsh32";
desc = alu_reg_str(name, &insn);
}
ebpf::LE => {
name = "le";
desc = byteswap_str(name, &insn);
}
ebpf::BE => {
name = "be";
desc = byteswap_str(name, &insn);
}
// BPF_ALU64 class
ebpf::ADD64_IMM => {
name = "add64";
desc = alu_imm_str(name, &insn);
}
ebpf::ADD64_REG => {
name = "add64";
desc = alu_reg_str(name, &insn);
}
ebpf::SUB64_IMM => {
name = "sub64";
desc = alu_imm_str(name, &insn);
}
ebpf::SUB64_REG => {
name = "sub64";
desc = alu_reg_str(name, &insn);
}
ebpf::MUL64_IMM => {
name = "mul64";
desc = alu_imm_str(name, &insn);
}
ebpf::MUL64_REG => {
name = "mul64";
desc = alu_reg_str(name, &insn);
}
ebpf::DIV64_IMM => {
name = "div64";
desc = alu_imm_str(name, &insn);
}
ebpf::DIV64_REG => {
name = "div64";
desc = alu_reg_str(name, &insn);
}
ebpf::OR64_IMM => {
name = "or64";
desc = alu_imm_str(name, &insn);
}
ebpf::OR64_REG => {
name = "or64";
desc = alu_reg_str(name, &insn);
}
ebpf::AND64_IMM => {
name = "and64";
desc = alu_imm_str(name, &insn);
}
ebpf::AND64_REG => {
name = "and64";
desc = alu_reg_str(name, &insn);
}
ebpf::LSH64_IMM => {
name = "lsh64";
desc = alu_imm_str(name, &insn);
}
ebpf::LSH64_REG => {
name = "lsh64";
desc = alu_reg_str(name, &insn);
}
ebpf::RSH64_IMM => {
name = "rsh64";
desc = alu_imm_str(name, &insn);
}
ebpf::RSH64_REG => {
name = "rsh64";
desc = alu_reg_str(name, &insn);
}
ebpf::NEG64 => {
name = "neg64";
desc = format!("{name} r{:}", insn.dst);
}
ebpf::MOD64_IMM => {
name = "mod64";
desc = alu_imm_str(name, &insn);
}
ebpf::MOD64_REG => {
name = "mod64";
desc = alu_reg_str(name, &insn);
}
ebpf::XOR64_IMM => {
name = "xor64";
desc = alu_imm_str(name, &insn);
}
ebpf::XOR64_REG => {
name = "xor64";
desc = alu_reg_str(name, &insn);
}
ebpf::MOV64_IMM => {
name = "mov64";
desc = alu_imm_str(name, &insn);
}
ebpf::MOV64_REG => {
name = "mov64";
desc = alu_reg_str(name, &insn);
}
ebpf::ARSH64_IMM => {
name = "arsh64";
desc = alu_imm_str(name, &insn);
}
ebpf::ARSH64_REG => {
name = "arsh64";
desc = alu_reg_str(name, &insn);
}
// BPF_JMP class
ebpf::JA => {
name = "ja";
desc = if insn.off >= 0 {
format!("{name} +{:#x}", insn.off)
} else {
format!("{name} -{:#x}", -insn.off)
}
}
ebpf::JEQ_IMM => {
name = "jeq";
desc = jmp_imm_str(name, &insn);
}
ebpf::JEQ_REG => {
name = "jeq";
desc = jmp_reg_str(name, &insn);
}
ebpf::JGT_IMM => {
name = "jgt";
desc = jmp_imm_str(name, &insn);
}
ebpf::JGT_REG => {
name = "jgt";
desc = jmp_reg_str(name, &insn);
}
ebpf::JGE_IMM => {
name = "jge";
desc = jmp_imm_str(name, &insn);
}
ebpf::JGE_REG => {
name = "jge";
desc = jmp_reg_str(name, &insn);
}
ebpf::JLT_IMM => {
name = "jlt";
desc = jmp_imm_str(name, &insn);
}
ebpf::JLT_REG => {
name = "jlt";
desc = jmp_reg_str(name, &insn);
}
ebpf::JLE_IMM => {
name = "jle";
desc = jmp_imm_str(name, &insn);
}
ebpf::JLE_REG => {
name = "jle";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSET_IMM => {
name = "jset";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSET_REG => {
name = "jset";
desc = jmp_reg_str(name, &insn);
}
ebpf::JNE_IMM => {
name = "jne";
desc = jmp_imm_str(name, &insn);
}
ebpf::JNE_REG => {
name = "jne";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSGT_IMM => {
name = "jsgt";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSGT_REG => {
name = "jsgt";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSGE_IMM => {
name = "jsge";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSGE_REG => {
name = "jsge";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSLT_IMM => {
name = "jslt";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSLT_REG => {
name = "jslt";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSLE_IMM => {
name = "jsle";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSLE_REG => {
name = "jsle";
desc = jmp_reg_str(name, &insn);
}
ebpf::CALL => {
name = "call";
desc = format!("{name} {:#x}", insn.imm);
}
ebpf::TAIL_CALL => {
name = "tail_call";
desc = name.to_string();
}
ebpf::EXIT => {
name = "exit";
desc = name.to_string();
}
// BPF_JMP32 class
ebpf::JEQ_IMM32 => {
name = "jeq32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JEQ_REG32 => {
name = "jeq32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JGT_IMM32 => {
name = "jgt32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JGT_REG32 => {
name = "jgt32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JGE_IMM32 => {
name = "jge32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JGE_REG32 => {
name = "jge32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JLT_IMM32 => {
name = "jlt32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JLT_REG32 => {
name = "jlt32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JLE_IMM32 => {
name = "jle32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JLE_REG32 => {
name = "jle32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSET_IMM32 => {
name = "jset32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSET_REG32 => {
name = "jset32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JNE_IMM32 => {
name = "jne32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JNE_REG32 => {
name = "jne32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSGT_IMM32 => {
name = "jsgt32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSGT_REG32 => {
name = "jsgt32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSGE_IMM32 => {
name = "jsge32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSGE_REG32 => {
name = "jsge32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSLT_IMM32 => {
name = "jslt32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSLT_REG32 => {
name = "jslt32";
desc = jmp_reg_str(name, &insn);
}
ebpf::JSLE_IMM32 => {
name = "jsle32";
desc = jmp_imm_str(name, &insn);
}
ebpf::JSLE_REG32 => {
name = "jsle32";
desc = jmp_reg_str(name, &insn);
}
_ => {
panic!(
"[Disassembler] Error: unknown eBPF opcode {:#2x} (insn #{:?})",
insn.opc, insn_ptr
);
}
};
let hl_insn = HLInsn {
opc: insn.opc,
name: name.to_string(),
desc,
dst: insn.dst,
src: insn.src,
off: insn.off,
imm,
};
res.push(hl_insn);
insn_ptr += 1;
}
res
}
/// Disassemble an eBPF program into human-readable instructions and prints it to standard output.
///
/// The program is not checked for errors or inconsistencies.
///
/// # Examples
///
/// ```
/// use rbpf::disassembler;
/// let prog = &[
/// 0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
/// 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
/// 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
/// 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
/// ];
/// disassembler::disassemble(prog);
/// # // "\nadd64 r1, 0x605\nmov64 r2, 0x32\nmov64 r1, r0\nbe16 r0\nneg64 r2\nexit"
/// ```
///
/// This will produce the following output:
///
/// ```test
/// add64 r1, 0x605
/// mov64 r2, 0x32
/// mov64 r1, r0
/// be16 r0
/// neg64 r2
/// exit
/// ```
pub fn disassemble(prog: &[u8]) {
#[cfg(feature = "std")]
{
for insn in to_insn_vec(prog) {
println!("{}", insn.desc);
}
}
#[cfg(not(feature = "std"))]
{
for insn in to_insn_vec(prog) {
log::info!("{}", insn.desc);
}
}
}

View File

@ -0,0 +1,635 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
//! This module contains all the definitions related to eBPF, and some functions permitting to
//! manipulate eBPF instructions.
//!
//! The number of bytes in an instruction, the maximum number of instructions in a program, and
//! also all operation codes are defined here as constants.
//!
//! The structure for an instruction used by this crate, as well as the function to extract it from
//! a program, is also defined in the module.
//!
//! To learn more about these instructions, see the Linux kernel documentation:
//! <https://www.kernel.org/doc/Documentation/networking/filter.txt>, or for a shorter version of
//! the list of the operation codes: <https://github.com/iovisor/bpf-docs/blob/master/eBPF.md>
use alloc::{vec, vec::Vec};
use byteorder::{ByteOrder, LittleEndian};
/// The maximum call depth is 8
pub const RBPF_MAX_CALL_DEPTH: usize = 8;
/// Maximum number of instructions in an eBPF program.
pub const PROG_MAX_INSNS: usize = 1000000;
/// Size of an eBPF instructions, in bytes.
pub const INSN_SIZE: usize = 8;
/// Maximum size of an eBPF program, in bytes.
pub const PROG_MAX_SIZE: usize = PROG_MAX_INSNS * INSN_SIZE;
/// Stack for the eBPF stack, in bytes.
pub const STACK_SIZE: usize = 512;
// eBPF op codes.
// See also https://www.kernel.org/doc/Documentation/networking/filter.txt
// Three least significant bits are operation class:
/// BPF operation class: load from immediate.
pub const BPF_LD: u8 = 0x00;
/// BPF operation class: load from register.
pub const BPF_LDX: u8 = 0x01;
/// BPF operation class: store immediate.
pub const BPF_ST: u8 = 0x02;
/// BPF operation class: store value from register.
pub const BPF_STX: u8 = 0x03;
/// BPF operation class: 32 bits arithmetic operation.
pub const BPF_ALU: u8 = 0x04;
/// BPF operation class: jump (64-bit wide operands for comparisons).
pub const BPF_JMP: u8 = 0x05;
/// BPF operation class: jump (32-bit wide operands for comparisons).
pub const BPF_JMP32: u8 = 0x06;
// [ class 6 unused, reserved for future use ]
/// BPF operation class: 64 bits arithmetic operation.
pub const BPF_ALU64: u8 = 0x07;
// For load and store instructions:
// +------------+--------+------------+
// | 3 bits | 2 bits | 3 bits |
// | mode | size | insn class |
// +------------+--------+------------+
// (MSB) (LSB)
// Size modifiers:
/// BPF size modifier: word (4 bytes).
pub const BPF_W: u8 = 0x00;
/// BPF size modifier: half-word (2 bytes).
pub const BPF_H: u8 = 0x08;
/// BPF size modifier: byte (1 byte).
pub const BPF_B: u8 = 0x10;
/// BPF size modifier: double word (8 bytes).
pub const BPF_DW: u8 = 0x18;
// Mode modifiers:
/// BPF mode modifier: immediate value.
pub const BPF_IMM: u8 = 0x00;
/// BPF mode modifier: absolute load.
pub const BPF_ABS: u8 = 0x20;
/// BPF mode modifier: indirect load.
pub const BPF_IND: u8 = 0x40;
/// BPF mode modifier: load from / store to memory.
pub const BPF_MEM: u8 = 0x60;
// [ 0x80 reserved ]
// [ 0xa0 reserved ]
/// BPF mode modifier: exclusive add.
pub const BPF_XADD: u8 = 0xc0;
// For arithmetic (BPF_ALU/BPF_ALU64) and jump (BPF_JMP) instructions:
// +----------------+--------+--------+
// | 4 bits |1 b.| 3 bits |
// | operation code | src| insn class |
// +----------------+----+------------+
// (MSB) (LSB)
// Source modifiers:
/// BPF source operand modifier: 32-bit immediate value.
pub const BPF_K: u8 = 0x00;
/// BPF source operand modifier: `src` register.
pub const BPF_X: u8 = 0x08;
// Operation codes -- BPF_ALU or BPF_ALU64 classes:
/// BPF ALU/ALU64 operation code: addition.
pub const BPF_ADD: u8 = 0x00;
/// BPF ALU/ALU64 operation code: subtraction.
pub const BPF_SUB: u8 = 0x10;
/// BPF ALU/ALU64 operation code: multiplication.
pub const BPF_MUL: u8 = 0x20;
/// BPF ALU/ALU64 operation code: division.
pub const BPF_DIV: u8 = 0x30;
/// BPF ALU/ALU64 operation code: or.
pub const BPF_OR: u8 = 0x40;
/// BPF ALU/ALU64 operation code: and.
pub const BPF_AND: u8 = 0x50;
/// BPF ALU/ALU64 operation code: left shift.
pub const BPF_LSH: u8 = 0x60;
/// BPF ALU/ALU64 operation code: right shift.
pub const BPF_RSH: u8 = 0x70;
/// BPF ALU/ALU64 operation code: negation.
pub const BPF_NEG: u8 = 0x80;
/// BPF ALU/ALU64 operation code: modulus.
pub const BPF_MOD: u8 = 0x90;
/// BPF ALU/ALU64 operation code: exclusive or.
pub const BPF_XOR: u8 = 0xa0;
/// BPF ALU/ALU64 operation code: move.
pub const BPF_MOV: u8 = 0xb0;
/// BPF ALU/ALU64 operation code: sign extending right shift.
pub const BPF_ARSH: u8 = 0xc0;
/// BPF ALU/ALU64 operation code: endianness conversion.
pub const BPF_END: u8 = 0xd0;
// Operation codes -- BPF_JMP or BPF_JMP32 classes:
/// BPF JMP operation code: jump.
pub const BPF_JA: u8 = 0x00;
/// BPF JMP operation code: jump if equal.
pub const BPF_JEQ: u8 = 0x10;
/// BPF JMP operation code: jump if greater than.
pub const BPF_JGT: u8 = 0x20;
/// BPF JMP operation code: jump if greater or equal.
pub const BPF_JGE: u8 = 0x30;
/// BPF JMP operation code: jump if `src` & `reg`.
pub const BPF_JSET: u8 = 0x40;
/// BPF JMP operation code: jump if not equal.
pub const BPF_JNE: u8 = 0x50;
/// BPF JMP operation code: jump if greater than (signed).
pub const BPF_JSGT: u8 = 0x60;
/// BPF JMP operation code: jump if greater or equal (signed).
pub const BPF_JSGE: u8 = 0x70;
/// BPF JMP operation code: helper function call.
pub const BPF_CALL: u8 = 0x80;
/// BPF JMP operation code: return from program.
pub const BPF_EXIT: u8 = 0x90;
/// BPF JMP operation code: jump if lower than.
pub const BPF_JLT: u8 = 0xa0;
/// BPF JMP operation code: jump if lower or equal.
pub const BPF_JLE: u8 = 0xb0;
/// BPF JMP operation code: jump if lower than (signed).
pub const BPF_JSLT: u8 = 0xc0;
/// BPF JMP operation code: jump if lower or equal (signed).
pub const BPF_JSLE: u8 = 0xd0;
// Op codes
// (Following operation names are not “official”, but may be proper to rbpf; Linux kernel only
// combines above flags and does not attribute a name per operation.)
/// BPF opcode: `ldabsb src, dst, imm`.
pub const LD_ABS_B: u8 = BPF_LD | BPF_ABS | BPF_B;
/// BPF opcode: `ldabsh src, dst, imm`.
pub const LD_ABS_H: u8 = BPF_LD | BPF_ABS | BPF_H;
/// BPF opcode: `ldabsw src, dst, imm`.
pub const LD_ABS_W: u8 = BPF_LD | BPF_ABS | BPF_W;
/// BPF opcode: `ldabsdw src, dst, imm`.
pub const LD_ABS_DW: u8 = BPF_LD | BPF_ABS | BPF_DW;
/// BPF opcode: `ldindb src, dst, imm`.
pub const LD_IND_B: u8 = BPF_LD | BPF_IND | BPF_B;
/// BPF opcode: `ldindh src, dst, imm`.
pub const LD_IND_H: u8 = BPF_LD | BPF_IND | BPF_H;
/// BPF opcode: `ldindw src, dst, imm`.
pub const LD_IND_W: u8 = BPF_LD | BPF_IND | BPF_W;
/// BPF opcode: `ldinddw src, dst, imm`.
pub const LD_IND_DW: u8 = BPF_LD | BPF_IND | BPF_DW;
#[allow(unknown_lints)]
#[allow(clippy::eq_op)]
/// BPF opcode: `lddw dst, imm` /// `dst = imm`.
pub const LD_DW_IMM: u8 = BPF_LD | BPF_IMM | BPF_DW;
/// BPF opcode: `ldxb dst, [src + off]` /// `dst = (src + off) as u8`.
pub const LD_B_REG: u8 = BPF_LDX | BPF_MEM | BPF_B;
/// BPF opcode: `ldxh dst, [src + off]` /// `dst = (src + off) as u16`.
pub const LD_H_REG: u8 = BPF_LDX | BPF_MEM | BPF_H;
/// BPF opcode: `ldxw dst, [src + off]` /// `dst = (src + off) as u32`.
pub const LD_W_REG: u8 = BPF_LDX | BPF_MEM | BPF_W;
/// BPF opcode: `ldxdw dst, [src + off]` /// `dst = (src + off) as u64`.
pub const LD_DW_REG: u8 = BPF_LDX | BPF_MEM | BPF_DW;
/// BPF opcode: `stb [dst + off], imm` /// `(dst + offset) as u8 = imm`.
pub const ST_B_IMM: u8 = BPF_ST | BPF_MEM | BPF_B;
/// BPF opcode: `sth [dst + off], imm` /// `(dst + offset) as u16 = imm`.
pub const ST_H_IMM: u8 = BPF_ST | BPF_MEM | BPF_H;
/// BPF opcode: `stw [dst + off], imm` /// `(dst + offset) as u32 = imm`.
pub const ST_W_IMM: u8 = BPF_ST | BPF_MEM | BPF_W;
/// BPF opcode: `stdw [dst + off], imm` /// `(dst + offset) as u64 = imm`.
pub const ST_DW_IMM: u8 = BPF_ST | BPF_MEM | BPF_DW;
/// BPF opcode: `stxb [dst + off], src` /// `(dst + offset) as u8 = src`.
pub const ST_B_REG: u8 = BPF_STX | BPF_MEM | BPF_B;
/// BPF opcode: `stxh [dst + off], src` /// `(dst + offset) as u16 = src`.
pub const ST_H_REG: u8 = BPF_STX | BPF_MEM | BPF_H;
/// BPF opcode: `stxw [dst + off], src` /// `(dst + offset) as u32 = src`.
pub const ST_W_REG: u8 = BPF_STX | BPF_MEM | BPF_W;
/// BPF opcode: `stxdw [dst + off], src` /// `(dst + offset) as u64 = src`.
pub const ST_DW_REG: u8 = BPF_STX | BPF_MEM | BPF_DW;
/// BPF opcode: `stxxaddw [dst + off], src`.
pub const ST_W_XADD: u8 = BPF_STX | BPF_XADD | BPF_W;
/// BPF opcode: `stxxadddw [dst + off], src`.
pub const ST_DW_XADD: u8 = BPF_STX | BPF_XADD | BPF_DW;
/// BPF opcode: `add32 dst, imm` /// `dst += imm`.
pub const ADD32_IMM: u8 = BPF_ALU | BPF_K | BPF_ADD;
/// BPF opcode: `add32 dst, src` /// `dst += src`.
pub const ADD32_REG: u8 = BPF_ALU | BPF_X | BPF_ADD;
/// BPF opcode: `sub32 dst, imm` /// `dst -= imm`.
pub const SUB32_IMM: u8 = BPF_ALU | BPF_K | BPF_SUB;
/// BPF opcode: `sub32 dst, src` /// `dst -= src`.
pub const SUB32_REG: u8 = BPF_ALU | BPF_X | BPF_SUB;
/// BPF opcode: `mul32 dst, imm` /// `dst *= imm`.
pub const MUL32_IMM: u8 = BPF_ALU | BPF_K | BPF_MUL;
/// BPF opcode: `mul32 dst, src` /// `dst *= src`.
pub const MUL32_REG: u8 = BPF_ALU | BPF_X | BPF_MUL;
/// BPF opcode: `div32 dst, imm` /// `dst /= imm`.
pub const DIV32_IMM: u8 = BPF_ALU | BPF_K | BPF_DIV;
/// BPF opcode: `div32 dst, src` /// `dst /= src`.
pub const DIV32_REG: u8 = BPF_ALU | BPF_X | BPF_DIV;
/// BPF opcode: `or32 dst, imm` /// `dst |= imm`.
pub const OR32_IMM: u8 = BPF_ALU | BPF_K | BPF_OR;
/// BPF opcode: `or32 dst, src` /// `dst |= src`.
pub const OR32_REG: u8 = BPF_ALU | BPF_X | BPF_OR;
/// BPF opcode: `and32 dst, imm` /// `dst &= imm`.
pub const AND32_IMM: u8 = BPF_ALU | BPF_K | BPF_AND;
/// BPF opcode: `and32 dst, src` /// `dst &= src`.
pub const AND32_REG: u8 = BPF_ALU | BPF_X | BPF_AND;
/// BPF opcode: `lsh32 dst, imm` /// `dst <<= imm`.
pub const LSH32_IMM: u8 = BPF_ALU | BPF_K | BPF_LSH;
/// BPF opcode: `lsh32 dst, src` /// `dst <<= src`.
pub const LSH32_REG: u8 = BPF_ALU | BPF_X | BPF_LSH;
/// BPF opcode: `rsh32 dst, imm` /// `dst >>= imm`.
pub const RSH32_IMM: u8 = BPF_ALU | BPF_K | BPF_RSH;
/// BPF opcode: `rsh32 dst, src` /// `dst >>= src`.
pub const RSH32_REG: u8 = BPF_ALU | BPF_X | BPF_RSH;
/// BPF opcode: `neg32 dst` /// `dst = -dst`.
pub const NEG32: u8 = BPF_ALU | BPF_NEG;
/// BPF opcode: `mod32 dst, imm` /// `dst %= imm`.
pub const MOD32_IMM: u8 = BPF_ALU | BPF_K | BPF_MOD;
/// BPF opcode: `mod32 dst, src` /// `dst %= src`.
pub const MOD32_REG: u8 = BPF_ALU | BPF_X | BPF_MOD;
/// BPF opcode: `xor32 dst, imm` /// `dst ^= imm`.
pub const XOR32_IMM: u8 = BPF_ALU | BPF_K | BPF_XOR;
/// BPF opcode: `xor32 dst, src` /// `dst ^= src`.
pub const XOR32_REG: u8 = BPF_ALU | BPF_X | BPF_XOR;
/// BPF opcode: `mov32 dst, imm` /// `dst = imm`.
pub const MOV32_IMM: u8 = BPF_ALU | BPF_K | BPF_MOV;
/// BPF opcode: `mov32 dst, src` /// `dst = src`.
pub const MOV32_REG: u8 = BPF_ALU | BPF_X | BPF_MOV;
/// BPF opcode: `arsh32 dst, imm` /// `dst >>= imm (arithmetic)`.
///
/// <https://en.wikipedia.org/wiki/Arithmetic_shift>
pub const ARSH32_IMM: u8 = BPF_ALU | BPF_K | BPF_ARSH;
/// BPF opcode: `arsh32 dst, src` /// `dst >>= src (arithmetic)`.
///
/// <https://en.wikipedia.org/wiki/Arithmetic_shift>
pub const ARSH32_REG: u8 = BPF_ALU | BPF_X | BPF_ARSH;
/// BPF opcode: `le dst` /// `dst = htole<imm>(dst), with imm in {16, 32, 64}`.
pub const LE: u8 = BPF_ALU | BPF_K | BPF_END;
/// BPF opcode: `be dst` /// `dst = htobe<imm>(dst), with imm in {16, 32, 64}`.
pub const BE: u8 = BPF_ALU | BPF_X | BPF_END;
/// BPF opcode: `add64 dst, imm` /// `dst += imm`.
pub const ADD64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_ADD;
/// BPF opcode: `add64 dst, src` /// `dst += src`.
pub const ADD64_REG: u8 = BPF_ALU64 | BPF_X | BPF_ADD;
/// BPF opcode: `sub64 dst, imm` /// `dst -= imm`.
pub const SUB64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_SUB;
/// BPF opcode: `sub64 dst, src` /// `dst -= src`.
pub const SUB64_REG: u8 = BPF_ALU64 | BPF_X | BPF_SUB;
/// BPF opcode: `div64 dst, imm` /// `dst /= imm`.
pub const MUL64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_MUL;
/// BPF opcode: `div64 dst, src` /// `dst /= src`.
pub const MUL64_REG: u8 = BPF_ALU64 | BPF_X | BPF_MUL;
/// BPF opcode: `div64 dst, imm` /// `dst /= imm`.
pub const DIV64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_DIV;
/// BPF opcode: `div64 dst, src` /// `dst /= src`.
pub const DIV64_REG: u8 = BPF_ALU64 | BPF_X | BPF_DIV;
/// BPF opcode: `or64 dst, imm` /// `dst |= imm`.
pub const OR64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_OR;
/// BPF opcode: `or64 dst, src` /// `dst |= src`.
pub const OR64_REG: u8 = BPF_ALU64 | BPF_X | BPF_OR;
/// BPF opcode: `and64 dst, imm` /// `dst &= imm`.
pub const AND64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_AND;
/// BPF opcode: `and64 dst, src` /// `dst &= src`.
pub const AND64_REG: u8 = BPF_ALU64 | BPF_X | BPF_AND;
/// BPF opcode: `lsh64 dst, imm` /// `dst <<= imm`.
pub const LSH64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_LSH;
/// BPF opcode: `lsh64 dst, src` /// `dst <<= src`.
pub const LSH64_REG: u8 = BPF_ALU64 | BPF_X | BPF_LSH;
/// BPF opcode: `rsh64 dst, imm` /// `dst >>= imm`.
pub const RSH64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_RSH;
/// BPF opcode: `rsh64 dst, src` /// `dst >>= src`.
pub const RSH64_REG: u8 = BPF_ALU64 | BPF_X | BPF_RSH;
/// BPF opcode: `neg64 dst, imm` /// `dst = -dst`.
pub const NEG64: u8 = BPF_ALU64 | BPF_NEG;
/// BPF opcode: `mod64 dst, imm` /// `dst %= imm`.
pub const MOD64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_MOD;
/// BPF opcode: `mod64 dst, src` /// `dst %= src`.
pub const MOD64_REG: u8 = BPF_ALU64 | BPF_X | BPF_MOD;
/// BPF opcode: `xor64 dst, imm` /// `dst ^= imm`.
pub const XOR64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_XOR;
/// BPF opcode: `xor64 dst, src` /// `dst ^= src`.
pub const XOR64_REG: u8 = BPF_ALU64 | BPF_X | BPF_XOR;
/// BPF opcode: `mov64 dst, imm` /// `dst = imm`.
pub const MOV64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_MOV;
/// BPF opcode: `mov64 dst, src` /// `dst = src`.
pub const MOV64_REG: u8 = BPF_ALU64 | BPF_X | BPF_MOV;
/// BPF opcode: `arsh64 dst, imm` /// `dst >>= imm (arithmetic)`.
///
/// <https://en.wikipedia.org/wiki/Arithmetic_shift>
pub const ARSH64_IMM: u8 = BPF_ALU64 | BPF_K | BPF_ARSH;
/// BPF opcode: `arsh64 dst, src` /// `dst >>= src (arithmetic)`.
///
/// <https://en.wikipedia.org/wiki/Arithmetic_shift>
pub const ARSH64_REG: u8 = BPF_ALU64 | BPF_X | BPF_ARSH;
/// BPF opcode: `ja +off` /// `PC += off`.
pub const JA: u8 = BPF_JMP | BPF_JA;
/// BPF opcode: `jeq dst, imm, +off` /// `PC += off if dst == imm`.
pub const JEQ_IMM: u8 = BPF_JMP | BPF_K | BPF_JEQ;
/// BPF opcode: `jeq dst, src, +off` /// `PC += off if dst == src`.
pub const JEQ_REG: u8 = BPF_JMP | BPF_X | BPF_JEQ;
/// BPF opcode: `jgt dst, imm, +off` /// `PC += off if dst > imm`.
pub const JGT_IMM: u8 = BPF_JMP | BPF_K | BPF_JGT;
/// BPF opcode: `jgt dst, src, +off` /// `PC += off if dst > src`.
pub const JGT_REG: u8 = BPF_JMP | BPF_X | BPF_JGT;
/// BPF opcode: `jge dst, imm, +off` /// `PC += off if dst >= imm`.
pub const JGE_IMM: u8 = BPF_JMP | BPF_K | BPF_JGE;
/// BPF opcode: `jge dst, src, +off` /// `PC += off if dst >= src`.
pub const JGE_REG: u8 = BPF_JMP | BPF_X | BPF_JGE;
/// BPF opcode: `jlt dst, imm, +off` /// `PC += off if dst < imm`.
pub const JLT_IMM: u8 = BPF_JMP | BPF_K | BPF_JLT;
/// BPF opcode: `jlt dst, src, +off` /// `PC += off if dst < src`.
pub const JLT_REG: u8 = BPF_JMP | BPF_X | BPF_JLT;
/// BPF opcode: `jle dst, imm, +off` /// `PC += off if dst <= imm`.
pub const JLE_IMM: u8 = BPF_JMP | BPF_K | BPF_JLE;
/// BPF opcode: `jle dst, src, +off` /// `PC += off if dst <= src`.
pub const JLE_REG: u8 = BPF_JMP | BPF_X | BPF_JLE;
/// BPF opcode: `jset dst, imm, +off` /// `PC += off if dst & imm`.
pub const JSET_IMM: u8 = BPF_JMP | BPF_K | BPF_JSET;
/// BPF opcode: `jset dst, src, +off` /// `PC += off if dst & src`.
pub const JSET_REG: u8 = BPF_JMP | BPF_X | BPF_JSET;
/// BPF opcode: `jne dst, imm, +off` /// `PC += off if dst != imm`.
pub const JNE_IMM: u8 = BPF_JMP | BPF_K | BPF_JNE;
/// BPF opcode: `jne dst, src, +off` /// `PC += off if dst != src`.
pub const JNE_REG: u8 = BPF_JMP | BPF_X | BPF_JNE;
/// BPF opcode: `jsgt dst, imm, +off` /// `PC += off if dst > imm (signed)`.
pub const JSGT_IMM: u8 = BPF_JMP | BPF_K | BPF_JSGT;
/// BPF opcode: `jsgt dst, src, +off` /// `PC += off if dst > src (signed)`.
pub const JSGT_REG: u8 = BPF_JMP | BPF_X | BPF_JSGT;
/// BPF opcode: `jsge dst, imm, +off` /// `PC += off if dst >= imm (signed)`.
pub const JSGE_IMM: u8 = BPF_JMP | BPF_K | BPF_JSGE;
/// BPF opcode: `jsge dst, src, +off` /// `PC += off if dst >= src (signed)`.
pub const JSGE_REG: u8 = BPF_JMP | BPF_X | BPF_JSGE;
/// BPF opcode: `jslt dst, imm, +off` /// `PC += off if dst < imm (signed)`.
pub const JSLT_IMM: u8 = BPF_JMP | BPF_K | BPF_JSLT;
/// BPF opcode: `jslt dst, src, +off` /// `PC += off if dst < src (signed)`.
pub const JSLT_REG: u8 = BPF_JMP | BPF_X | BPF_JSLT;
/// BPF opcode: `jsle dst, imm, +off` /// `PC += off if dst <= imm (signed)`.
pub const JSLE_IMM: u8 = BPF_JMP | BPF_K | BPF_JSLE;
/// BPF opcode: `jsle dst, src, +off` /// `PC += off if dst <= src (signed)`.
pub const JSLE_REG: u8 = BPF_JMP | BPF_X | BPF_JSLE;
/// BPF opcode: `jeq dst, imm, +off` /// `PC += off if (dst as u32) == imm`.
pub const JEQ_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JEQ;
/// BPF opcode: `jeq dst, src, +off` /// `PC += off if (dst as u32) == (src as u32)`.
pub const JEQ_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JEQ;
/// BPF opcode: `jgt dst, imm, +off` /// `PC += off if (dst as u32) > imm`.
pub const JGT_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JGT;
/// BPF opcode: `jgt dst, src, +off` /// `PC += off if (dst as u32) > (src as u32)`.
pub const JGT_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JGT;
/// BPF opcode: `jge dst, imm, +off` /// `PC += off if (dst as u32) >= imm`.
pub const JGE_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JGE;
/// BPF opcode: `jge dst, src, +off` /// `PC += off if (dst as u32) >= (src as u32)`.
pub const JGE_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JGE;
/// BPF opcode: `jlt dst, imm, +off` /// `PC += off if (dst as u32) < imm`.
pub const JLT_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JLT;
/// BPF opcode: `jlt dst, src, +off` /// `PC += off if (dst as u32) < (src as u32)`.
pub const JLT_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JLT;
/// BPF opcode: `jle dst, imm, +off` /// `PC += off if (dst as u32) <= imm`.
pub const JLE_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JLE;
/// BPF opcode: `jle dst, src, +off` /// `PC += off if (dst as u32) <= (src as u32)`.
pub const JLE_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JLE;
/// BPF opcode: `jset dst, imm, +off` /// `PC += off if (dst as u32) & imm`.
pub const JSET_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JSET;
/// BPF opcode: `jset dst, src, +off` /// `PC += off if (dst as u32) & (src as u32)`.
pub const JSET_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JSET;
/// BPF opcode: `jne dst, imm, +off` /// `PC += off if (dst as u32) != imm`.
pub const JNE_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JNE;
/// BPF opcode: `jne dst, src, +off` /// `PC += off if (dst as u32) != (src as u32)`.
pub const JNE_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JNE;
/// BPF opcode: `jsgt dst, imm, +off` /// `PC += off if (dst as i32) > imm (signed)`.
pub const JSGT_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JSGT;
/// BPF opcode: `jsgt dst, src, +off` /// `PC += off if (dst as i32) > (src as i32) (signed)`.
pub const JSGT_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JSGT;
/// BPF opcode: `jsge dst, imm, +off` /// `PC += off if (dst as i32) >= imm (signed)`.
pub const JSGE_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JSGE;
/// BPF opcode: `jsge dst, src, +off` /// `PC += off if (dst as i32) >= (src as i32) (signed)`.
pub const JSGE_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JSGE;
/// BPF opcode: `jslt dst, imm, +off` /// `PC += off if (dst as i32) < imm (signed)`.
pub const JSLT_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JSLT;
/// BPF opcode: `jslt dst, src, +off` /// `PC += off if (dst as i32) < (src as i32) (signed)`.
pub const JSLT_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JSLT;
/// BPF opcode: `jsle dst, imm, +off` /// `PC += off if (dst as i32) <= imm (signed)`.
pub const JSLE_IMM32: u8 = BPF_JMP32 | BPF_K | BPF_JSLE;
/// BPF opcode: `jsle dst, src, +off` /// `PC += off if (dst as i32) <= (src as i32) (signed)`.
pub const JSLE_REG32: u8 = BPF_JMP32 | BPF_X | BPF_JSLE;
/// BPF opcode: `call imm` /// helper function call to helper with key `imm`.
pub const CALL: u8 = BPF_JMP | BPF_CALL;
/// BPF opcode: tail call.
pub const TAIL_CALL: u8 = BPF_JMP | BPF_X | BPF_CALL;
/// BPF opcode: `exit` /// `return r0`.
pub const EXIT: u8 = BPF_JMP | BPF_EXIT;
// Used in JIT
/// Mask to extract the operation class from an operation code.
pub const BPF_CLS_MASK: u8 = 0x07;
/// Mask to extract the arithmetic operation code from an instruction operation code.
pub const BPF_ALU_OP_MASK: u8 = 0xf0;
/// Prototype of an eBPF helper function.
pub type Helper = fn(u64, u64, u64, u64, u64) -> u64;
/// An eBPF instruction.
///
/// See <https://www.kernel.org/doc/Documentation/networking/filter.txt> for the Linux kernel
/// documentation about eBPF, or <https://github.com/iovisor/bpf-docs/blob/master/eBPF.md> for a
/// more concise version.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Insn {
/// Operation code.
pub opc: u8,
/// Destination register operand.
pub dst: u8,
/// Source register operand.
pub src: u8,
/// Offset operand.
pub off: i16,
/// Immediate value operand.
pub imm: i32,
}
impl Insn {
/// Turn an `Insn` back into an array of bytes.
///
/// # Examples
///
/// ```
/// use rbpf::ebpf;
///
/// let prog: &[u8] = &[
/// 0xb7, 0x12, 0x56, 0x34, 0xde, 0xbc, 0x9a, 0x78,
/// ];
/// let insn = ebpf::Insn {
/// opc: 0xb7,
/// dst: 2,
/// src: 1,
/// off: 0x3456,
/// imm: 0x789abcde
/// };
/// assert_eq!(insn.to_array(), prog);
/// ```
pub fn to_array(&self) -> [u8; INSN_SIZE] {
[
self.opc,
self.src.wrapping_shl(4) | self.dst,
(self.off & 0xff) as u8,
self.off.wrapping_shr(8) as u8,
(self.imm & 0xff) as u8,
(self.imm & 0xff_00).wrapping_shr(8) as u8,
(self.imm as u32 & 0xff_00_00).wrapping_shr(16) as u8,
(self.imm as u32 & 0xff_00_00_00).wrapping_shr(24) as u8,
]
}
/// Turn an `Insn` into an vector of bytes.
///
/// # Examples
///
/// ```
/// use rbpf::ebpf;
///
/// let prog: Vec<u8> = vec![
/// 0xb7, 0x12, 0x56, 0x34, 0xde, 0xbc, 0x9a, 0x78,
/// ];
/// let insn = ebpf::Insn {
/// opc: 0xb7,
/// dst: 2,
/// src: 1,
/// off: 0x3456,
/// imm: 0x789abcde
/// };
/// assert_eq!(insn.to_vec(), prog);
/// ```
pub fn to_vec(&self) -> Vec<u8> {
vec![
self.opc,
self.src.wrapping_shl(4) | self.dst,
(self.off & 0xff) as u8,
self.off.wrapping_shr(8) as u8,
(self.imm & 0xff) as u8,
(self.imm & 0xff_00).wrapping_shr(8) as u8,
(self.imm as u32 & 0xff_00_00).wrapping_shr(16) as u8,
(self.imm as u32 & 0xff_00_00_00).wrapping_shr(24) as u8,
]
}
}
/// Get the instruction at `idx` of an eBPF program. `idx` is the index (number) of the
/// instruction (not a byte offset). The first instruction has index 0.
///
/// # Panics
///
/// Panics if it is not possible to get the instruction (if idx is too high, or last instruction is
/// incomplete).
///
/// # Examples
///
/// ```
/// use rbpf::ebpf;
///
/// let prog = &[
/// 0xb7, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
/// ];
/// let insn = ebpf::get_insn(prog, 1);
/// assert_eq!(insn.opc, 0x95);
/// ```
///
/// The example below will panic, since the last instruction is not complete and cannot be loaded.
///
/// ```rust,should_panic
/// use rbpf::ebpf;
///
/// let prog = &[
/// 0xb7, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00 // two bytes missing
/// ];
/// let insn = ebpf::get_insn(prog, 1);
/// ```
pub fn get_insn(prog: &[u8], idx: usize) -> Insn {
// This guard should not be needed in most cases, since the verifier already checks the program
// size, and indexes should be fine in the interpreter/JIT. But this function is publicly
// available and user can call it with any `idx`, so we have to check anyway.
if (idx + 1) * INSN_SIZE > prog.len() {
panic!(
"Error: cannot reach instruction at index {:?} in program containing {:?} bytes",
idx,
prog.len()
);
}
Insn {
opc: prog[INSN_SIZE * idx],
dst: prog[INSN_SIZE * idx + 1] & 0x0f,
src: (prog[INSN_SIZE * idx + 1] & 0xf0) >> 4,
off: LittleEndian::read_i16(&prog[(INSN_SIZE * idx + 2)..]),
imm: LittleEndian::read_i32(&prog[(INSN_SIZE * idx + 4)..]),
}
}
/// Return a vector of `struct Insn` built from a program.
///
/// This is provided as a convenience for users wishing to manipulate a vector of instructions, for
/// example for dumping the program instruction after instruction with a custom format.
///
/// Note that the two parts of `LD_DW_IMM` instructions (spanning on 64 bits) are considered as two
/// distinct instructions.
///
/// # Examples
///
/// ```
/// use rbpf::ebpf;
///
/// let prog = &[
/// 0x18, 0x00, 0x00, 0x00, 0x88, 0x77, 0x66, 0x55,
/// 0x00, 0x00, 0x00, 0x00, 0x44, 0x33, 0x22, 0x11,
/// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
/// ];
///
/// let v = ebpf::to_insn_vec(prog);
/// assert_eq!(v, vec![
/// ebpf::Insn {
/// opc: 0x18,
/// dst: 0,
/// src: 0,
/// off: 0,
/// imm: 0x55667788
/// },
/// ebpf::Insn {
/// opc: 0,
/// dst: 0,
/// src: 0,
/// off: 0,
/// imm: 0x11223344
/// },
/// ebpf::Insn {
/// opc: 0x95,
/// dst: 0,
/// src: 0,
/// off: 0,
/// imm: 0
/// },
/// ]);
/// ```
pub fn to_insn_vec(prog: &[u8]) -> Vec<Insn> {
if prog.len() % INSN_SIZE != 0 {
panic!(
"Error: eBPF program length must be a multiple of {:?} octets",
INSN_SIZE
);
}
let mut res = vec![];
let mut insn_ptr: usize = 0;
while insn_ptr * INSN_SIZE < prog.len() {
let insn = get_insn(prog, insn_ptr);
res.push(insn);
insn_ptr += 1;
}
res
}

View File

@ -0,0 +1,488 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2015 Big Switch Networks, Inc
// (Algorithms for uBPF helpers, originally in C)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// (Translation to Rust, other helpers)
//! This module implements some built-in helpers that can be called from within an eBPF program.
//!
//! These helpers may originate from several places:
//!
//! * Some of them mimic the helpers available in the Linux kernel.
//! * Some of them were proposed as example helpers in uBPF and they were adapted here.
//! * Other helpers may be specific to rbpf.
//!
//! The prototype for helpers is always the same: five `u64` as arguments, and a `u64` as a return
//! value. Hence some helpers have unused arguments, or return a 0 value in all cases, in order to
//! respect this convention.
// Helpers associated to kernel helpers
// See also linux/include/uapi/linux/bpf.h in Linux kernel sources.
// bpf_ktime_getns()
/// Index of helper `bpf_ktime_getns()`, equivalent to `bpf_time_getns()`, in Linux kernel, see
/// <https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/bpf.h>.
pub const BPF_KTIME_GETNS_IDX: u32 = 5;
/// Get monotonic time (since boot time) in nanoseconds. All arguments are unused.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let t = helpers::bpf_time_getns(0, 0, 0, 0, 0);
/// let d = t / 10u64.pow(9) / 60 / 60 / 24;
/// let h = (t / 10u64.pow(9) / 60 / 60) % 24;
/// let m = (t / 10u64.pow(9) / 60 ) % 60;
/// let s = (t / 10u64.pow(9)) % 60;
/// let ns = t % 10u64.pow(9);
/// println!("Uptime: {:#x} == {} days {}:{}:{}, {} ns", t, d, h, m, s, ns);
/// ```
#[allow(dead_code)]
#[allow(unused_variables)]
#[allow(deprecated)]
#[cfg(feature = "std")]
pub fn bpf_time_getns(unused1: u64, unused2: u64, unused3: u64, unused4: u64, unused5: u64) -> u64 {
time::precise_time_ns()
}
// bpf_trace_printk()
/// Index of helper `bpf_trace_printk()`, equivalent to `bpf_trace_printf()`, in Linux kernel, see
/// <https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/bpf.h>.
pub const BPF_TRACE_PRINTK_IDX: u32 = 6;
/// Prints its **last three** arguments to standard output. The **first two** arguments are
/// **unused**. Returns the number of bytes written.
///
/// By ignoring the first two arguments, it creates a helper that will have a behavior similar to
/// the one of the equivalent helper `bpf_trace_printk()` from Linux kernel.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let res = helpers::bpf_trace_printf(0, 0, 1, 15, 32);
/// assert_eq!(res as usize, "bpf_trace_printf: 0x1, 0xf, 0x20\n".len());
/// ```
///
/// This will print `bpf_trace_printf: 0x1, 0xf, 0x20`.
///
/// The eBPF code needed to perform the call in this example would be nearly identical to the code
/// obtained by compiling the following code from C to eBPF with clang:
///
/// ```c
/// #include <linux/bpf.h>
/// #include "path/to/linux/samples/bpf/bpf_helpers.h"
///
/// int main(struct __sk_buff *skb)
/// {
/// // Only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed.
/// // See <https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/bpf_trace.c>.
/// char *fmt = "bpf_trace_printk %llx, %llx, %llx\n";
/// return bpf_trace_printk(fmt, sizeof(fmt), 1, 15, 32);
/// }
/// ```
///
/// This would equally print the three numbers in `/sys/kernel/debug/tracing` file each time the
/// program is run.
#[allow(dead_code)]
#[allow(unused_variables)]
#[cfg(feature = "std")]
pub fn bpf_trace_printf(unused1: u64, unused2: u64, arg3: u64, arg4: u64, arg5: u64) -> u64 {
println!("bpf_trace_printf: {arg3:#x}, {arg4:#x}, {arg5:#x}");
let size_arg = |x| {
if x == 0 {
1
} else {
(x as f64).log(16.0).floor() as u64 + 1
}
};
"bpf_trace_printf: 0x, 0x, 0x\n".len() as u64 + size_arg(arg3) + size_arg(arg4) + size_arg(arg5)
}
// Helpers coming from uBPF <https://github.com/iovisor/ubpf/blob/master/vm/test.c>
/// The idea is to assemble five bytes into a single `u64`. For compatibility with the helpers API,
/// each argument must be a `u64`.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let gathered = helpers::gather_bytes(0x11, 0x22, 0x33, 0x44, 0x55);
/// assert_eq!(gathered, 0x1122334455);
/// ```
pub fn gather_bytes(arg1: u64, arg2: u64, arg3: u64, arg4: u64, arg5: u64) -> u64 {
arg1.wrapping_shl(32)
| arg2.wrapping_shl(24)
| arg3.wrapping_shl(16)
| arg4.wrapping_shl(8)
| arg5
}
/// Same as `void *memfrob(void *s, size_t n);` in `string.h` in C. See the GNU manual page (in
/// section 3) for `memfrob`. The memory is directly modified, and the helper returns 0 in all
/// cases. Arguments 3 to 5 are unused.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let val: u64 = 0x112233;
/// let val_ptr = &val as *const u64;
///
/// helpers::memfrob(val_ptr as u64, 8, 0, 0, 0);
/// assert_eq!(val, 0x2a2a2a2a2a3b0819);
/// helpers::memfrob(val_ptr as u64, 8, 0, 0, 0);
/// assert_eq!(val, 0x112233);
/// ```
#[allow(unused_variables)]
pub fn memfrob(ptr: u64, len: u64, unused3: u64, unused4: u64, unused5: u64) -> u64 {
for i in 0..len {
unsafe {
let mut p = (ptr + i) as *mut u8;
*p ^= 0b101010;
}
}
0
}
// TODO: Try again when asm!() is available in stable Rust.
// #![feature(asm)]
// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
// #[allow(unused_variables)]
// pub fn memfrob (ptr: u64, len: u64, arg3: u64, arg4: u64, arg5: u64) -> u64 {
// unsafe {
// asm!(
// "mov $0xf0, %rax"
// ::: "mov $0xf1, %rcx"
// ::: "mov $0xf2, %rdx"
// ::: "mov $0xf3, %rsi"
// ::: "mov $0xf4, %rdi"
// ::: "mov $0xf5, %r8"
// ::: "mov $0xf6, %r9"
// ::: "mov $0xf7, %r10"
// ::: "mov $0xf8, %r11"
// );
// }
// 0
// }
/// Compute and return the square root of argument 1, cast as a float. Arguments 2 to 5 are
/// unused.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let x = helpers::sqrti(9, 0, 0, 0, 0);
/// assert_eq!(x, 3);
/// ```
#[allow(dead_code)]
#[allow(unused_variables)]
#[cfg(feature = "std")] // sqrt is only available when using `std`
pub fn sqrti(arg1: u64, unused2: u64, unused3: u64, unused4: u64, unused5: u64) -> u64 {
(arg1 as f64).sqrt() as u64
}
/// C-like `strcmp`, return 0 if the strings are equal, and a non-null value otherwise.
///
/// # Examples
///
/// ```
/// use rbpf::helpers;
///
/// let foo = "This is a string.\0".as_ptr() as u64;
/// let bar = "This is another sting.\0".as_ptr() as u64;
///
/// assert!(helpers::strcmp(foo, foo, 0, 0, 0) == 0);
/// assert!(helpers::strcmp(foo, bar, 0, 0, 0) != 0);
/// ```
#[allow(dead_code)]
#[allow(unused_variables)]
pub fn strcmp(arg1: u64, arg2: u64, arg3: u64, unused4: u64, unused5: u64) -> u64 {
// C-like strcmp, maybe shorter than converting the bytes to string and comparing?
if arg1 == 0 || arg2 == 0 {
return u64::MAX;
}
let mut a = arg1;
let mut b = arg2;
unsafe {
let mut a_val = *(a as *const u8);
let mut b_val = *(b as *const u8);
while a_val == b_val && a_val != 0 && b_val != 0 {
a += 1;
b += 1;
a_val = *(a as *const u8);
b_val = *(b as *const u8);
}
if a_val >= b_val {
(a_val - b_val) as u64
} else {
(b_val - a_val) as u64
}
}
}
// Some additional helpers
/// Returns a random u64 value comprised between `min` and `max` values (inclusive). Arguments 3 to
/// 5 are unused.
///
/// Relies on `rand()` function from libc, so `libc::srand()` should be called once before this
/// helper is used.
///
/// # Examples
///
/// ```
/// extern crate libc;
/// extern crate rbpf;
/// extern crate time;
///
/// unsafe {
/// libc::srand(time::precise_time_ns() as u32)
/// }
///
/// let n = rbpf::helpers::rand(3, 6, 0, 0, 0);
/// assert!(3 <= n && n <= 6);
/// ```
#[allow(dead_code)]
#[allow(unused_variables)]
#[cfg(feature = "std")]
pub fn rand(min: u64, max: u64, unused3: u64, unused4: u64, unused5: u64) -> u64 {
let mut n = unsafe { (libc::rand() as u64).wrapping_shl(32) + libc::rand() as u64 };
if min < max {
n = n % (max + 1 - min) + min;
};
n
}
/// Prints the helper functions name and it's index.
#[cfg(feature = "std")]
pub fn show_helper() {
for (index, name) in BPF_FUNC_MAPPER.iter().enumerate() {
println!("{}:{}", index, name);
}
}
/// See https://github.com/torvalds/linux/blob/master/include/uapi/linux/bpf.h
pub const BPF_FUNC_MAPPER: &[&str] = &[
"unspec",
"map_lookup_elem",
"map_update_elem",
"map_delete_elem",
"probe_read",
"ktime_get_ns",
"trace_printk",
"get_prandom_u32",
"get_smp_processor_id",
"skb_store_bytes",
"l3_csum_replace",
"l4_csum_replace",
"tail_call",
"clone_redirect",
"get_current_pid_tgid",
"get_current_uid_gid",
"get_current_comm",
"get_cgroup_classid",
"skb_vlan_push",
"skb_vlan_pop",
"skb_get_tunnel_key",
"skb_set_tunnel_key",
"perf_event_read",
"redirect",
"get_route_realm",
"perf_event_output",
"skb_load_bytes",
"get_stackid",
"csum_diff",
"skb_get_tunnel_opt",
"skb_set_tunnel_opt",
"skb_change_proto",
"skb_change_type",
"skb_under_cgroup",
"get_hash_recalc",
"get_current_task",
"probe_write_user",
"current_task_under_cgroup",
"skb_change_tail",
"skb_pull_data",
"csum_update",
"set_hash_invalid",
"get_numa_node_id",
"skb_change_head",
"xdp_adjust_head",
"probe_read_str",
"get_socket_cookie",
"get_socket_uid",
"set_hash",
"setsockopt",
"skb_adjust_room",
"redirect_map",
"sk_redirect_map",
"sock_map_update",
"xdp_adjust_meta",
"perf_event_read_value",
"perf_prog_read_value",
"getsockopt",
"override_return",
"sock_ops_cb_flags_set",
"msg_redirect_map",
"msg_apply_bytes",
"msg_cork_bytes",
"msg_pull_data",
"bind",
"xdp_adjust_tail",
"skb_get_xfrm_state",
"get_stack",
"skb_load_bytes_relative",
"fib_lookup",
"sock_hash_update",
"msg_redirect_hash",
"sk_redirect_hash",
"lwt_push_encap",
"lwt_seg6_store_bytes",
"lwt_seg6_adjust_srh",
"lwt_seg6_action",
"rc_repeat",
"rc_keydown",
"skb_cgroup_id",
"get_current_cgroup_id",
"get_local_storage",
"sk_select_reuseport",
"skb_ancestor_cgroup_id",
"sk_lookup_tcp",
"sk_lookup_udp",
"sk_release",
"map_push_elem",
"map_pop_elem",
"map_peek_elem",
"msg_push_data",
"msg_pop_data",
"rc_pointer_rel",
"spin_lock",
"spin_unlock",
"sk_fullsock",
"tcp_sock",
"skb_ecn_set_ce",
"get_listener_sock",
"skc_lookup_tcp",
"tcp_check_syncookie",
"sysctl_get_name",
"sysctl_get_current_value",
"sysctl_get_new_value",
"sysctl_set_new_value",
"strtol",
"strtoul",
"sk_storage_get",
"sk_storage_delete",
"send_signal",
"tcp_gen_syncookie",
"skb_output",
"probe_read_user",
"probe_read_kernel",
"probe_read_user_str",
"probe_read_kernel_str",
"tcp_send_ack",
"send_signal_thread",
"jiffies64",
"read_branch_records",
"get_ns_current_pid_tgid",
"xdp_output",
"get_netns_cookie",
"get_current_ancestor_cgroup_id",
"sk_assign",
"ktime_get_boot_ns",
"seq_printf",
"seq_write",
"sk_cgroup_id",
"sk_ancestor_cgroup_id",
"ringbuf_output",
"ringbuf_reserve",
"ringbuf_submit",
"ringbuf_discard",
"ringbuf_query",
"csum_level",
"skc_to_tcp6_sock",
"skc_to_tcp_sock",
"skc_to_tcp_timewait_sock",
"skc_to_tcp_request_sock",
"skc_to_udp6_sock",
"get_task_stack",
"load_hdr_opt",
"store_hdr_opt",
"reserve_hdr_opt",
"inode_storage_get",
"inode_storage_delete",
"d_path",
"copy_from_user",
"snprintf_btf",
"seq_printf_btf",
"skb_cgroup_classid",
"redirect_neigh",
"per_cpu_ptr",
"this_cpu_ptr",
"redirect_peer",
"task_storage_get",
"task_storage_delete",
"get_current_task_btf",
"bprm_opts_set",
"ktime_get_coarse_ns",
"ima_inode_hash",
"sock_from_file",
"check_mtu",
"for_each_map_elem",
"snprintf",
"sys_bpf",
"btf_find_by_name_kind",
"sys_close",
"timer_init",
"timer_set_callback",
"timer_start",
"timer_cancel",
"get_func_ip",
"get_attach_cookie",
"task_pt_regs",
"get_branch_snapshot",
"trace_vprintk",
"skc_to_unix_sock",
"kallsyms_lookup_name",
"find_vma",
"loop",
"strncmp",
"get_func_arg",
"get_func_ret",
"get_func_arg_cnt",
"get_retval",
"set_retval",
"xdp_get_buff_len",
"xdp_load_bytes",
"xdp_store_bytes",
"copy_from_user_task",
"skb_set_tstamp",
"ima_file_hash",
"kptr_xchg",
"map_lookup_percpu_elem",
"skc_to_mptcp_sock",
"dynptr_from_mem",
"ringbuf_reserve_dynptr",
"ringbuf_submit_dynptr",
"ringbuf_discard_dynptr",
"dynptr_read",
"dynptr_write",
"dynptr_data",
"tcp_raw_gen_syncookie_ipv4",
"tcp_raw_gen_syncookie_ipv6",
"tcp_raw_check_syncookie_ipv4",
"tcp_raw_check_syncookie_ipv6",
"ktime_get_tai_ns",
"user_ringbuf_drain",
"cgrp_storage_get",
"cgrp_storage_delete",
];

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,708 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Derived from uBPF <https://github.com/iovisor/ubpf>
// Copyright 2015 Big Switch Networks, Inc
// (uBPF: VM architecture, parts of the interpreter, originally in C)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// (Translation to Rust, MetaBuff/multiple classes addition, hashmaps for helpers)
use crate::{
ebpf::{self, Insn},
helpers::BPF_FUNC_MAPPER,
stack::StackFrame,
*,
};
#[cfg(not(feature = "user"))]
#[allow(unused)]
fn check_mem(
addr: u64,
len: usize,
access_type: &str,
insn_ptr: usize,
mbuff: &[u8],
mem: &[u8],
stack: &[u8],
) -> Result<(), Error> {
log::trace!(
"check_mem: addr {:#x}, len {}, access_type {}, insn_ptr {}",
addr,
len,
access_type,
insn_ptr
);
log::trace!(
"check_mem: mbuff: {:#x}/{:#x}, mem: {:#x}/{:#x}, stack: {:#x}/{:#x}",
mbuff.as_ptr() as u64,
mbuff.len(),
mem.as_ptr() as u64,
mem.len(),
stack.as_ptr() as u64,
stack.len()
);
Ok(())
}
#[cfg(feature = "user")]
fn check_mem(
addr: u64,
len: usize,
access_type: &str,
insn_ptr: usize,
mbuff: &[u8],
mem: &[u8],
stack: &[u8],
) -> Result<(), Error> {
if let Some(addr_end) = addr.checked_add(len as u64) {
if mbuff.as_ptr() as u64 <= addr && addr_end <= mbuff.as_ptr() as u64 + mbuff.len() as u64 {
return Ok(());
}
if mem.as_ptr() as u64 <= addr && addr_end <= mem.as_ptr() as u64 + mem.len() as u64 {
return Ok(());
}
if stack.as_ptr() as u64 <= addr && addr_end <= stack.as_ptr() as u64 + stack.len() as u64 {
return Ok(());
}
}
Err(Error::new(ErrorKind::Other, format!(
"Error: out of bounds memory {} (insn #{:?}), addr {:#x}, size {:?}\nmbuff: {:#x}/{:#x}, mem: {:#x}/{:#x}, stack: {:#x}/{:#x}",
access_type, insn_ptr, addr, len,
mbuff.as_ptr() as u64, mbuff.len(),
mem.as_ptr() as u64, mem.len(),
stack.as_ptr() as u64, stack.len()
)))
}
#[inline]
fn do_jump(insn_ptr: &mut usize, insn: &Insn) {
*insn_ptr = (*insn_ptr as i16 + insn.off) as usize;
}
#[allow(unknown_lints)]
#[allow(cyclomatic_complexity)]
pub fn execute_program(
prog_: Option<&[u8]>,
mem: &[u8],
mbuff: &[u8],
helpers: &HashMap<u32, ebpf::Helper>,
) -> Result<u64, Error> {
const U32MAX: u64 = u32::MAX as u64;
const SHIFT_MASK_64: u64 = 0x3f;
let prog = match prog_ {
Some(prog) => prog,
None => Err(Error::new(
ErrorKind::Other,
"Error: No program set, call prog_set() to load one",
))?,
};
let mut stacks = Vec::new();
let stack = StackFrame::new();
// R1 points to beginning of memory area, R10 to stack
let mut reg: [u64; 11] = [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
stack.as_ptr() as u64 + stack.len() as u64,
];
stacks.push(stack);
if !mbuff.is_empty() {
reg[1] = mbuff.as_ptr() as u64;
} else if !mem.is_empty() {
reg[1] = mem.as_ptr() as u64;
}
let check_mem_load =
|stack: &[u8], addr: u64, len: usize, insn_ptr: usize| -> Result<(), Error> {
check_mem(addr, len, "load", insn_ptr, mbuff, mem, stack)
};
let check_mem_store =
|stack: &[u8], addr: u64, len: usize, insn_ptr: usize| -> Result<(), Error> {
check_mem(addr, len, "store", insn_ptr, mbuff, mem, stack)
};
// Loop on instructions
let mut insn_ptr: usize = 0;
while insn_ptr * ebpf::INSN_SIZE < prog.len() {
let insn = ebpf::get_insn(prog, insn_ptr);
insn_ptr += 1;
let _dst = insn.dst as usize;
let _src = insn.src as usize;
match insn.opc {
// BPF_LD class
// LD_ABS_* and LD_IND_* are supposed to load pointer to data from metadata buffer.
// Since this pointer is constant, and since we already know it (mem), do not
// bother re-fetching it, just use mem already.
ebpf::LD_ABS_B => {
reg[0] = unsafe {
let x = (mem.as_ptr() as u64 + (insn.imm as u32) as u64) as *const u8;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_ABS_H => {
reg[0] = unsafe {
let x = (mem.as_ptr() as u64 + (insn.imm as u32) as u64) as *const u16;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_ABS_W => {
reg[0] = unsafe {
let x = (mem.as_ptr() as u64 + (insn.imm as u32) as u64) as *const u32;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_ABS_DW => {
log::info!("executing LD_ABS_DW, set reg[{}] to {:#x}", _dst, insn.imm);
reg[0] = unsafe {
let x = (mem.as_ptr() as u64 + (insn.imm as u32) as u64) as *const u64;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned()
}
}
ebpf::LD_IND_B => {
reg[0] = unsafe {
let x =
(mem.as_ptr() as u64 + reg[_src] + (insn.imm as u32) as u64) as *const u8;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_IND_H => {
reg[0] = unsafe {
let x =
(mem.as_ptr() as u64 + reg[_src] + (insn.imm as u32) as u64) as *const u16;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_IND_W => {
reg[0] = unsafe {
let x =
(mem.as_ptr() as u64 + reg[_src] + (insn.imm as u32) as u64) as *const u32;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_IND_DW => {
reg[0] = unsafe {
let x =
(mem.as_ptr() as u64 + reg[_src] + (insn.imm as u32) as u64) as *const u64;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned()
}
}
ebpf::LD_DW_IMM => {
let next_insn = ebpf::get_insn(prog, insn_ptr);
insn_ptr += 1;
// log::warn!(
// "executing LD_DW_IMM, set reg[{}] to {:#x}",
// _dst,
// ((insn.imm as u32) as u64) + ((next_insn.imm as u64) << 32)
// );
reg[_dst] = ((insn.imm as u32) as u64) + ((next_insn.imm as u64) << 32);
}
// BPF_LDX class
ebpf::LD_B_REG => {
reg[_dst] = unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_src] as *const u8).offset(insn.off as isize);
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 1, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_H_REG => {
reg[_dst] = unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_src] as *const u8).offset(insn.off as isize) as *const u16;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 2, insn_ptr)?;
x.read_unaligned() as u64
}
}
ebpf::LD_W_REG => {
reg[_dst] = unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_src] as *const u8).offset(insn.off as isize) as *const u32;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 4, insn_ptr)?;
// log::warn!(
// "executing LD_W_REG, the ptr is REG:{} -> [{:#x}] + {:#x}",
// _src,
// reg[_src],
// insn.off
// );
x.read_unaligned() as u64
}
}
ebpf::LD_DW_REG => {
reg[_dst] = unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_src] as *const u8).offset(insn.off as isize) as *const u64;
check_mem_load(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.read_unaligned()
}
}
// BPF_ST class
ebpf::ST_B_IMM => unsafe {
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u8;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 1, insn_ptr)?;
x.write_unaligned(insn.imm as u8);
},
ebpf::ST_H_IMM => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u16;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 2, insn_ptr)?;
x.write_unaligned(insn.imm as u16);
},
ebpf::ST_W_IMM => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u32;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 4, insn_ptr)?;
x.write_unaligned(insn.imm as u32);
},
ebpf::ST_DW_IMM => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u64;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.write_unaligned(insn.imm as u64);
},
// BPF_STX class
ebpf::ST_B_REG => unsafe {
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u8;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 1, insn_ptr)?;
x.write_unaligned(reg[_src] as u8);
},
ebpf::ST_H_REG => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u16;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 2, insn_ptr)?;
x.write_unaligned(reg[_src] as u16);
},
ebpf::ST_W_REG => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u32;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 4, insn_ptr)?;
x.write_unaligned(reg[_src] as u32);
},
ebpf::ST_DW_REG => unsafe {
#[allow(clippy::cast_ptr_alignment)]
let x = (reg[_dst] as *const u8).offset(insn.off as isize) as *mut u64;
check_mem_store(stacks.last().unwrap().as_slice(), x as u64, 8, insn_ptr)?;
x.write_unaligned(reg[_src]);
},
ebpf::ST_W_XADD => unimplemented!(),
ebpf::ST_DW_XADD => unimplemented!(),
// BPF_ALU class
// TODO Check how overflow works in kernel. Should we &= U32MAX all src register value
// before we do the operation?
// Cf ((0x11 << 32) - (0x1 << 32)) as u32 VS ((0x11 << 32) as u32 - (0x1 << 32) as u32
ebpf::ADD32_IMM => reg[_dst] = (reg[_dst] as i32).wrapping_add(insn.imm) as u64, //((reg[_dst] & U32MAX) + insn.imm as u64) & U32MAX,
ebpf::ADD32_REG => reg[_dst] = (reg[_dst] as i32).wrapping_add(reg[_src] as i32) as u64, //((reg[_dst] & U32MAX) + (reg[_src] & U32MAX)) & U32MAX,
ebpf::SUB32_IMM => reg[_dst] = (reg[_dst] as i32).wrapping_sub(insn.imm) as u64,
ebpf::SUB32_REG => reg[_dst] = (reg[_dst] as i32).wrapping_sub(reg[_src] as i32) as u64,
ebpf::MUL32_IMM => reg[_dst] = (reg[_dst] as i32).wrapping_mul(insn.imm) as u64,
ebpf::MUL32_REG => reg[_dst] = (reg[_dst] as i32).wrapping_mul(reg[_src] as i32) as u64,
ebpf::DIV32_IMM if insn.imm as u32 == 0 => reg[_dst] = 0,
ebpf::DIV32_IMM => reg[_dst] = (reg[_dst] as u32 / insn.imm as u32) as u64,
ebpf::DIV32_REG if reg[_src] as u32 == 0 => reg[_dst] = 0,
ebpf::DIV32_REG => reg[_dst] = (reg[_dst] as u32 / reg[_src] as u32) as u64,
ebpf::OR32_IMM => reg[_dst] = (reg[_dst] as u32 | insn.imm as u32) as u64,
ebpf::OR32_REG => reg[_dst] = (reg[_dst] as u32 | reg[_src] as u32) as u64,
ebpf::AND32_IMM => reg[_dst] = (reg[_dst] as u32 & insn.imm as u32) as u64,
ebpf::AND32_REG => reg[_dst] = (reg[_dst] as u32 & reg[_src] as u32) as u64,
// As for the 64-bit version, we should mask the number of bits to shift with
// 0x1f, but .wrappping_shr() already takes care of it for us.
ebpf::LSH32_IMM => reg[_dst] = (reg[_dst] as u32).wrapping_shl(insn.imm as u32) as u64,
ebpf::LSH32_REG => reg[_dst] = (reg[_dst] as u32).wrapping_shl(reg[_src] as u32) as u64,
ebpf::RSH32_IMM => reg[_dst] = (reg[_dst] as u32).wrapping_shr(insn.imm as u32) as u64,
ebpf::RSH32_REG => reg[_dst] = (reg[_dst] as u32).wrapping_shr(reg[_src] as u32) as u64,
ebpf::NEG32 => {
reg[_dst] = (reg[_dst] as i32).wrapping_neg() as u64;
reg[_dst] &= U32MAX;
}
ebpf::MOD32_IMM if insn.imm as u32 == 0 => (),
ebpf::MOD32_IMM => reg[_dst] = (reg[_dst] as u32 % insn.imm as u32) as u64,
ebpf::MOD32_REG if reg[_src] as u32 == 0 => (),
ebpf::MOD32_REG => reg[_dst] = (reg[_dst] as u32 % reg[_src] as u32) as u64,
ebpf::XOR32_IMM => reg[_dst] = (reg[_dst] as u32 ^ insn.imm as u32) as u64,
ebpf::XOR32_REG => reg[_dst] = (reg[_dst] as u32 ^ reg[_src] as u32) as u64,
ebpf::MOV32_IMM => reg[_dst] = insn.imm as u32 as u64,
ebpf::MOV32_REG => reg[_dst] = (reg[_src] as u32) as u64,
// As for the 64-bit version, we should mask the number of bits to shift with
// 0x1f, but .wrappping_shr() already takes care of it for us.
ebpf::ARSH32_IMM => {
reg[_dst] = (reg[_dst] as i32).wrapping_shr(insn.imm as u32) as u64;
reg[_dst] &= U32MAX;
}
ebpf::ARSH32_REG => {
reg[_dst] = (reg[_dst] as i32).wrapping_shr(reg[_src] as u32) as u64;
reg[_dst] &= U32MAX;
}
ebpf::LE => {
reg[_dst] = match insn.imm {
16 => (reg[_dst] as u16).to_le() as u64,
32 => (reg[_dst] as u32).to_le() as u64,
64 => reg[_dst].to_le(),
_ => unreachable!(),
};
}
ebpf::BE => {
reg[_dst] = match insn.imm {
16 => (reg[_dst] as u16).to_be() as u64,
32 => (reg[_dst] as u32).to_be() as u64,
64 => reg[_dst].to_be(),
_ => unreachable!(),
};
}
// BPF_ALU64 class
ebpf::ADD64_IMM => reg[_dst] = reg[_dst].wrapping_add(insn.imm as u64),
ebpf::ADD64_REG => reg[_dst] = reg[_dst].wrapping_add(reg[_src]),
ebpf::SUB64_IMM => reg[_dst] = reg[_dst].wrapping_sub(insn.imm as u64),
ebpf::SUB64_REG => reg[_dst] = reg[_dst].wrapping_sub(reg[_src]),
ebpf::MUL64_IMM => reg[_dst] = reg[_dst].wrapping_mul(insn.imm as u64),
ebpf::MUL64_REG => reg[_dst] = reg[_dst].wrapping_mul(reg[_src]),
ebpf::DIV64_IMM if insn.imm == 0 => reg[_dst] = 0,
ebpf::DIV64_IMM => reg[_dst] /= insn.imm as u64,
ebpf::DIV64_REG if reg[_src] == 0 => reg[_dst] = 0,
ebpf::DIV64_REG => reg[_dst] /= reg[_src],
ebpf::OR64_IMM => reg[_dst] |= insn.imm as u64,
ebpf::OR64_REG => reg[_dst] |= reg[_src],
ebpf::AND64_IMM => reg[_dst] &= insn.imm as u64,
ebpf::AND64_REG => reg[_dst] &= reg[_src],
ebpf::LSH64_IMM => reg[_dst] <<= insn.imm as u64 & SHIFT_MASK_64,
ebpf::LSH64_REG => reg[_dst] <<= reg[_src] & SHIFT_MASK_64,
ebpf::RSH64_IMM => reg[_dst] >>= insn.imm as u64 & SHIFT_MASK_64,
ebpf::RSH64_REG => reg[_dst] >>= reg[_src] & SHIFT_MASK_64,
ebpf::NEG64 => reg[_dst] = -(reg[_dst] as i64) as u64,
ebpf::MOD64_IMM if insn.imm == 0 => (),
ebpf::MOD64_IMM => reg[_dst] %= insn.imm as u64,
ebpf::MOD64_REG if reg[_src] == 0 => (),
ebpf::MOD64_REG => reg[_dst] %= reg[_src],
ebpf::XOR64_IMM => reg[_dst] ^= insn.imm as u64,
ebpf::XOR64_REG => reg[_dst] ^= reg[_src],
ebpf::MOV64_IMM => reg[_dst] = insn.imm as u64,
ebpf::MOV64_REG => reg[_dst] = reg[_src],
ebpf::ARSH64_IMM => {
reg[_dst] = (reg[_dst] as i64 >> (insn.imm as u64 & SHIFT_MASK_64)) as u64
}
ebpf::ARSH64_REG => {
reg[_dst] = (reg[_dst] as i64 >> (reg[_src] as u64 & SHIFT_MASK_64)) as u64
}
// BPF_JMP class
// TODO: check this actually works as expected for signed / unsigned ops
ebpf::JA => do_jump(&mut insn_ptr, &insn),
ebpf::JEQ_IMM => {
if reg[_dst] == insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JEQ_REG => {
if reg[_dst] == reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGT_IMM => {
if reg[_dst] > insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGT_REG => {
if reg[_dst] > reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGE_IMM => {
if reg[_dst] >= insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGE_REG => {
if reg[_dst] >= reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLT_IMM => {
if reg[_dst] < insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLT_REG => {
if reg[_dst] < reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLE_IMM => {
if reg[_dst] <= insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLE_REG => {
if reg[_dst] <= reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSET_IMM => {
if reg[_dst] & insn.imm as u64 != 0 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSET_REG => {
if reg[_dst] & reg[_src] != 0 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JNE_IMM => {
if reg[_dst] != insn.imm as u64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JNE_REG => {
if reg[_dst] != reg[_src] {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGT_IMM => {
if reg[_dst] as i64 > insn.imm as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGT_REG => {
if reg[_dst] as i64 > reg[_src] as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGE_IMM => {
if reg[_dst] as i64 >= insn.imm as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGE_REG => {
if reg[_dst] as i64 >= reg[_src] as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLT_IMM => {
if (reg[_dst] as i64) < insn.imm as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLT_REG => {
if (reg[_dst] as i64) < reg[_src] as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLE_IMM => {
if reg[_dst] as i64 <= insn.imm as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLE_REG => {
if reg[_dst] as i64 <= reg[_src] as i64 {
do_jump(&mut insn_ptr, &insn);
}
}
// BPF_JMP32 class
ebpf::JEQ_IMM32 => {
if reg[_dst] as u32 == insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JEQ_REG32 => {
if reg[_dst] as u32 == reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGT_IMM32 => {
if reg[_dst] as u32 > insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGT_REG32 => {
if reg[_dst] as u32 > reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGE_IMM32 => {
if reg[_dst] as u32 >= insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JGE_REG32 => {
if reg[_dst] as u32 >= reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLT_IMM32 => {
if (reg[_dst] as u32) < insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLT_REG32 => {
if (reg[_dst] as u32) < reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLE_IMM32 => {
if reg[_dst] as u32 <= insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JLE_REG32 => {
if reg[_dst] as u32 <= reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSET_IMM32 => {
if reg[_dst] as u32 & insn.imm as u32 != 0 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSET_REG32 => {
if reg[_dst] as u32 & reg[_src] as u32 != 0 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JNE_IMM32 => {
if reg[_dst] as u32 != insn.imm as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JNE_REG32 => {
if reg[_dst] as u32 != reg[_src] as u32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGT_IMM32 => {
if reg[_dst] as i32 > insn.imm {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGT_REG32 => {
if reg[_dst] as i32 > reg[_src] as i32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGE_IMM32 => {
if reg[_dst] as i32 >= insn.imm {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSGE_REG32 => {
if reg[_dst] as i32 >= reg[_src] as i32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLT_IMM32 => {
if (reg[_dst] as i32) < insn.imm {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLT_REG32 => {
if (reg[_dst] as i32) < reg[_src] as i32 {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLE_IMM32 => {
if reg[_dst] as i32 <= insn.imm {
do_jump(&mut insn_ptr, &insn);
}
}
ebpf::JSLE_REG32 => {
if reg[_dst] as i32 <= reg[_src] as i32 {
do_jump(&mut insn_ptr, &insn);
}
}
// Do not delegate the check to the verifier, since registered functions can be
// changed after the program has been verified.
ebpf::CALL => {
// See https://www.kernel.org/doc/html/latest/bpf/standardization/instruction-set.html#id16
let src_reg = _src;
let call_func_res = match src_reg {
0 => {
// Handle call by address to external function.
if let Some(function) = helpers.get(&(insn.imm as u32)) {
reg[0] = function(reg[1], reg[2], reg[3], reg[4], reg[5]);
Ok(())
}else {
Err(format!(
"Error: unknown helper function (id: {:#x}) [{}], (instruction #{})",
insn.imm as u32,BPF_FUNC_MAPPER[insn.imm as usize],insn_ptr
))
}
}
1 => {
// bpf to bpf call
// The function is in the same program, so we can just jump to the address
if stacks.len() >= ebpf::RBPF_MAX_CALL_DEPTH{
Err(format!(
"Error: bpf to bpf call stack limit reached (instruction #{}) max depth: {}",
insn_ptr, ebpf::RBPF_MAX_CALL_DEPTH
))
}else {
let mut pre_stack = stacks.last_mut().unwrap();
// Save the callee saved registers
pre_stack.save_registers(&reg[6..=9]);
// Save the return address
pre_stack.save_return_address(insn_ptr as u16);
// save the stack pointer
pre_stack.save_sp(reg[10] as u16);
let mut stack = StackFrame::new();
log::trace!("BPF TO BPF CALL: new pc: {} + {} = {}",insn_ptr ,insn.imm,insn_ptr + insn.imm as usize);
reg[10] = stack.as_ptr() as u64 + stack.len() as u64;
stacks.push(stack);
insn_ptr += insn.imm as usize;
Ok(())
}
}
_ =>{
Err(format!(
"Error: the function call type (id: {:#x}) [{}], (instruction #{}) not supported",
insn.imm as u32,BPF_FUNC_MAPPER[insn.imm as usize],insn_ptr
))
}
};
if let Err(e) = call_func_res {
Err(Error::new(ErrorKind::Other, e))?;
}
}
ebpf::TAIL_CALL => unimplemented!(),
ebpf::EXIT => {
if stacks.len() == 1 {
return Ok(reg[0]);
} else {
// Pop the stack
stacks.pop();
let stack = stacks.last().unwrap();
// Restore the callee saved registers
reg[6..=9].copy_from_slice(&stack.get_registers());
// Restore the return address
insn_ptr = stack.get_return_address() as usize;
// Restore the stack pointer
reg[10] = stack.get_sp() as u64;
log::trace!("EXIT: new pc: {}", insn_ptr);
}
}
_ => unreachable!(),
}
}
unreachable!()
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
//! This module provides a simple implementation of the Error struct that is
//! used as a drop-in replacement for `std::io::Error` when using `rbpf` in `no_std`.
use alloc::string::String;
/// Implementation of Error for no_std applications.
/// Ensures that the existing code can use it with the same interface
/// as the Error from std::io::Error.
#[derive(Debug)]
pub struct Error {
#[allow(dead_code)]
kind: ErrorKind,
#[allow(dead_code)]
error: String,
}
impl Error {
/// New function exposing the same signature as `std::io::Error::new`.
#[allow(dead_code)]
pub fn new<S: Into<String>>(kind: ErrorKind, error: S) -> Error {
Error {
kind,
error: error.into(),
}
}
}
/// The current version of `rbpf` only uses the [`Other`](ErrorKind::Other) variant
/// from the [std::io::ErrorKind] enum. If a dependency on other variants were
/// introduced in the future, this enum needs to be updated accordingly to maintain
/// compatibility with the real `ErrorKind`. The reason all available variants
/// aren't included in the first place is that [std::io::ErrorKind] exposes
/// 40 variants, and not all of them are meaningful under `no_std`.
#[derive(Debug)]
pub enum ErrorKind {
/// The no_std code only uses this variant.
#[allow(dead_code)]
Other,
}

View File

@ -0,0 +1,75 @@
use crate::{ebpf::STACK_SIZE, vec, Vec};
pub struct StackFrame {
return_address: u16,
saved_registers: [u64; 4],
sp: u16,
frame: Vec<u8>,
}
impl StackFrame {
/// Create a new stack frame
///
/// The stack frame is created with a capacity of `STACK_SIZE` == 512 bytes
pub fn new() -> Self {
Self {
sp: 0,
return_address: 0,
saved_registers: [0; 4],
frame: vec![0; STACK_SIZE],
}
}
/// Create a new stack frame with a given capacity
#[allow(unused)]
pub fn with_capacity(capacity: usize) -> Self {
Self {
sp: 0,
return_address: 0,
saved_registers: [0; 4],
frame: vec![0; capacity],
}
}
/// The capacity of the stack frame
pub fn len(&self) -> usize {
self.frame.len()
}
pub fn as_ptr(&self) -> *const u8 {
self.frame.as_ptr()
}
pub fn as_slice(&self) -> &[u8] {
self.frame.as_slice()
}
/// Save the callee-saved registers
pub fn save_registers(&mut self, regs: &[u64]) {
self.saved_registers.copy_from_slice(regs);
}
/// Get the callee-saved registers
pub fn get_registers(&self) -> [u64; 4] {
self.saved_registers
}
/// Save the return address
pub fn save_return_address(&mut self, address: u16) {
self.return_address = address;
}
/// Get the return address
pub fn get_return_address(&self) -> u16 {
self.return_address
}
/// Save the stack pointer
pub fn save_sp(&mut self, sp: u16) {
self.sp = sp;
}
/// Get the stack pointer
pub fn get_sp(&self) -> u16 {
self.sp
}
}

View File

@ -0,0 +1,386 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Derived from uBPF <https://github.com/iovisor/ubpf>
// Copyright 2015 Big Switch Networks, Inc
// (uBPF: safety checks, originally in C)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// (Translation to Rust)
// This “verifier” performs simple checks when the eBPF program is loaded into the VM (before it is
// interpreted or JIT-compiled). It has nothing to do with the much more elaborated verifier inside
// Linux kernel. There is no verification regarding the program flow control (should be a Direct
// Acyclic Graph) or the consistency for registers usage (the verifier of the kernel assigns types
// to the registers and is much stricter).
//
// On the other hand, rbpf is not expected to run in kernel space.
//
// Improving the verifier would be nice, but this is not trivial (and Linux kernel is under GPL
// license, so we cannot copy it).
//
// Contrary to the verifier of the Linux kernel, this one does not modify the bytecode at all.
use alloc::format;
use crate::{ebpf, Error, ErrorKind};
fn reject<S: AsRef<str>>(msg: S) -> Result<(), Error> {
let full_msg = format!("[Verifier] Error: {}", msg.as_ref());
Err(Error::new(ErrorKind::Other, full_msg))
}
fn check_prog_len(prog: &[u8]) -> Result<(), Error> {
if prog.len() % ebpf::INSN_SIZE != 0 {
reject(format!(
"eBPF program length must be a multiple of {:?} octets",
ebpf::INSN_SIZE
))?;
}
if prog.len() > ebpf::PROG_MAX_SIZE {
reject(format!(
"eBPF program length limited to {:?}, here {:?}",
ebpf::PROG_MAX_INSNS,
prog.len() / ebpf::INSN_SIZE
))?;
}
if prog.is_empty() {
reject("no program set, call set_program() to load one")?;
}
let last_opc = ebpf::get_insn(prog, (prog.len() / ebpf::INSN_SIZE) - 1).opc;
if last_opc & ebpf::BPF_CLS_MASK != ebpf::BPF_JMP {
reject("program does not end with “EXIT” instruction")?;
}
Ok(())
}
fn check_imm_endian(insn: &ebpf::Insn, insn_ptr: usize) -> Result<(), Error> {
match insn.imm {
16 | 32 | 64 => Ok(()),
_ => reject(format!(
"unsupported argument for LE/BE (insn #{insn_ptr:?})"
)),
}
}
fn check_load_dw(prog: &[u8], insn_ptr: usize) -> Result<(), Error> {
// We know we can reach next insn since we enforce an EXIT insn at the end of program, while
// this function should be called only for LD_DW insn, that cannot be last in program.
let next_insn = ebpf::get_insn(prog, insn_ptr + 1);
if next_insn.opc != 0 {
reject(format!("incomplete LD_DW instruction (insn #{insn_ptr:?})"))?;
}
Ok(())
}
fn check_jmp_offset(prog: &[u8], insn_ptr: usize) -> Result<(), Error> {
let insn = ebpf::get_insn(prog, insn_ptr);
if insn.off == -1 {
reject(format!("infinite loop (insn #{insn_ptr:?})"))?;
}
let dst_insn_ptr = insn_ptr as isize + 1 + insn.off as isize;
if dst_insn_ptr < 0 || dst_insn_ptr as usize >= (prog.len() / ebpf::INSN_SIZE) {
reject(format!(
"jump out of code to #{dst_insn_ptr:?} (insn #{insn_ptr:?})"
))?;
}
let dst_insn = ebpf::get_insn(prog, dst_insn_ptr as usize);
if dst_insn.opc == 0 {
reject(format!(
"jump to middle of LD_DW at #{dst_insn_ptr:?} (insn #{insn_ptr:?})"
))?;
}
Ok(())
}
fn check_registers(insn: &ebpf::Insn, store: bool, insn_ptr: usize) -> Result<(), Error> {
if insn.src > 10 {
reject(format!("invalid source register (insn #{insn_ptr:?})"))?;
}
match (insn.dst, store) {
(0..=9, _) | (10, true) => Ok(()),
(10, false) => reject(format!(
"cannot write into register r10 (insn #{insn_ptr:?})"
)),
(_, _) => reject(format!("invalid destination register (insn #{insn_ptr:?})")),
}
}
pub fn check(prog: &[u8]) -> Result<(), Error> {
check_prog_len(prog)?;
let mut insn_ptr: usize = 0;
while insn_ptr * ebpf::INSN_SIZE < prog.len() {
let insn = ebpf::get_insn(prog, insn_ptr);
let mut store = false;
match insn.opc {
// BPF_LD class
ebpf::LD_ABS_B => {}
ebpf::LD_ABS_H => {}
ebpf::LD_ABS_W => {}
ebpf::LD_ABS_DW => {}
ebpf::LD_IND_B => {}
ebpf::LD_IND_H => {}
ebpf::LD_IND_W => {}
ebpf::LD_IND_DW => {}
ebpf::LD_DW_IMM => {
store = true;
check_load_dw(prog, insn_ptr)?;
insn_ptr += 1;
}
// BPF_LDX class
ebpf::LD_B_REG => {}
ebpf::LD_H_REG => {}
ebpf::LD_W_REG => {}
ebpf::LD_DW_REG => {}
// BPF_ST class
ebpf::ST_B_IMM => store = true,
ebpf::ST_H_IMM => store = true,
ebpf::ST_W_IMM => store = true,
ebpf::ST_DW_IMM => store = true,
// BPF_STX class
ebpf::ST_B_REG => store = true,
ebpf::ST_H_REG => store = true,
ebpf::ST_W_REG => store = true,
ebpf::ST_DW_REG => store = true,
ebpf::ST_W_XADD => {
unimplemented!();
}
ebpf::ST_DW_XADD => {
unimplemented!();
}
// BPF_ALU class
ebpf::ADD32_IMM => {}
ebpf::ADD32_REG => {}
ebpf::SUB32_IMM => {}
ebpf::SUB32_REG => {}
ebpf::MUL32_IMM => {}
ebpf::MUL32_REG => {}
ebpf::DIV32_IMM => {}
ebpf::DIV32_REG => {}
ebpf::OR32_IMM => {}
ebpf::OR32_REG => {}
ebpf::AND32_IMM => {}
ebpf::AND32_REG => {}
ebpf::LSH32_IMM => {}
ebpf::LSH32_REG => {}
ebpf::RSH32_IMM => {}
ebpf::RSH32_REG => {}
ebpf::NEG32 => {}
ebpf::MOD32_IMM => {}
ebpf::MOD32_REG => {}
ebpf::XOR32_IMM => {}
ebpf::XOR32_REG => {}
ebpf::MOV32_IMM => {}
ebpf::MOV32_REG => {}
ebpf::ARSH32_IMM => {}
ebpf::ARSH32_REG => {}
ebpf::LE => {
check_imm_endian(&insn, insn_ptr)?;
}
ebpf::BE => {
check_imm_endian(&insn, insn_ptr)?;
}
// BPF_ALU64 class
ebpf::ADD64_IMM => {}
ebpf::ADD64_REG => {}
ebpf::SUB64_IMM => {}
ebpf::SUB64_REG => {}
ebpf::MUL64_IMM => {}
ebpf::MUL64_REG => {}
ebpf::DIV64_IMM => {}
ebpf::DIV64_REG => {}
ebpf::OR64_IMM => {}
ebpf::OR64_REG => {}
ebpf::AND64_IMM => {}
ebpf::AND64_REG => {}
ebpf::LSH64_IMM => {}
ebpf::LSH64_REG => {}
ebpf::RSH64_IMM => {}
ebpf::RSH64_REG => {}
ebpf::NEG64 => {}
ebpf::MOD64_IMM => {}
ebpf::MOD64_REG => {}
ebpf::XOR64_IMM => {}
ebpf::XOR64_REG => {}
ebpf::MOV64_IMM => {}
ebpf::MOV64_REG => {}
ebpf::ARSH64_IMM => {}
ebpf::ARSH64_REG => {}
// BPF_JMP class
ebpf::JA => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JEQ_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JEQ_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGT_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGT_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGE_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGE_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLT_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLT_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLE_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLE_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSET_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSET_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JNE_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JNE_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGT_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGT_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGE_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGE_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLT_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLT_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLE_IMM => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLE_REG => {
check_jmp_offset(prog, insn_ptr)?;
}
// BPF_JMP32 class
ebpf::JEQ_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JEQ_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGT_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGT_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGE_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JGE_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLT_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLT_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLE_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JLE_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSET_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSET_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JNE_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JNE_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGT_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGT_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGE_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSGE_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLT_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLT_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLE_IMM32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::JSLE_REG32 => {
check_jmp_offset(prog, insn_ptr)?;
}
ebpf::CALL => {}
ebpf::TAIL_CALL => {
unimplemented!()
}
ebpf::EXIT => {}
_ => {
reject(format!(
"unknown eBPF opcode {:#2x} (insn #{insn_ptr:?})",
insn.opc
))?;
}
}
check_registers(&insn, store, insn_ptr)?;
insn_ptr += 1;
}
// insn_ptr should now be equal to number of instructions.
if insn_ptr != prog.len() / ebpf::INSN_SIZE {
reject(format!("jumped out of code to #{insn_ptr:?}"))?;
}
Ok(())
}

View File

@ -0,0 +1,655 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 Rich Lane <lanerl@gmail.com>
#![allow(clippy::unreadable_literal)]
extern crate rbpf;
mod common;
use common::{TCP_SACK_ASM, TCP_SACK_BIN};
use rbpf::{assembler::assemble, ebpf};
fn asm(src: &str) -> Result<Vec<ebpf::Insn>, String> {
Ok(ebpf::to_insn_vec(&(assemble(src))?))
}
fn insn(opc: u8, dst: u8, src: u8, off: i16, imm: i32) -> ebpf::Insn {
ebpf::Insn {
opc,
dst,
src,
off,
imm,
}
}
#[test]
fn test_empty() {
assert_eq!(asm(""), Ok(vec![]));
}
// Example for InstructionType::NoOperand.
#[test]
fn test_exit() {
assert_eq!(asm("exit"), Ok(vec![insn(ebpf::EXIT, 0, 0, 0, 0)]));
}
// Example for InstructionType::AluBinary.
#[test]
fn test_add64() {
assert_eq!(
asm("add64 r1, r3"),
Ok(vec![insn(ebpf::ADD64_REG, 1, 3, 0, 0)])
);
assert_eq!(
asm("add64 r1, 5"),
Ok(vec![insn(ebpf::ADD64_IMM, 1, 0, 0, 5)])
);
}
// Example for InstructionType::AluUnary.
#[test]
fn test_neg64() {
assert_eq!(asm("neg64 r1"), Ok(vec![insn(ebpf::NEG64, 1, 0, 0, 0)]));
}
// Example for InstructionType::LoadReg.
#[test]
fn test_ldxw() {
assert_eq!(
asm("ldxw r1, [r2+5]"),
Ok(vec![insn(ebpf::LD_W_REG, 1, 2, 5, 0)])
);
}
// Example for InstructionType::StoreImm.
#[test]
fn test_stw() {
assert_eq!(
asm("stw [r2+5], 7"),
Ok(vec![insn(ebpf::ST_W_IMM, 2, 0, 5, 7)])
);
}
// Example for InstructionType::StoreReg.
#[test]
fn test_stxw() {
assert_eq!(
asm("stxw [r2+5], r8"),
Ok(vec![insn(ebpf::ST_W_REG, 2, 8, 5, 0)])
);
}
// Example for InstructionType::JumpUnconditional.
#[test]
fn test_ja() {
assert_eq!(asm("ja +8"), Ok(vec![insn(ebpf::JA, 0, 0, 8, 0)]));
assert_eq!(asm("ja -3"), Ok(vec![insn(ebpf::JA, 0, 0, -3, 0)]));
}
// Example for InstructionType::JumpConditional.
#[test]
fn test_jeq() {
assert_eq!(
asm("jeq r1, 4, +8"),
Ok(vec![insn(ebpf::JEQ_IMM, 1, 0, 8, 4)])
);
assert_eq!(
asm("jeq r1, r3, +8"),
Ok(vec![insn(ebpf::JEQ_REG, 1, 3, 8, 0)])
);
}
// Example for InstructionType::Call.
#[test]
fn test_call() {
assert_eq!(asm("call 300"), Ok(vec![insn(ebpf::CALL, 0, 0, 0, 300)]));
}
// Example for InstructionType::Endian.
#[test]
fn test_be32() {
assert_eq!(asm("be32 r1"), Ok(vec![insn(ebpf::BE, 1, 0, 0, 32)]));
}
// Example for InstructionType::LoadImm.
#[test]
fn test_lddw() {
assert_eq!(
asm("lddw r1, 0x1234abcd5678eeff"),
Ok(vec![
insn(ebpf::LD_DW_IMM, 1, 0, 0, 0x5678eeff),
insn(0, 0, 0, 0, 0x1234abcd)
])
);
assert_eq!(
asm("lddw r1, 0xff11ee22dd33cc44"),
Ok(vec![
insn(ebpf::LD_DW_IMM, 1, 0, 0, 0xdd33cc44u32 as i32),
insn(0, 0, 0, 0, 0xff11ee22u32 as i32)
])
);
}
// Example for InstructionType::LoadAbs.
#[test]
fn test_ldabsw() {
assert_eq!(asm("ldabsw 1"), Ok(vec![insn(ebpf::LD_ABS_W, 0, 0, 0, 1)]));
}
// Example for InstructionType::LoadInd.
#[test]
fn test_ldindw() {
assert_eq!(
asm("ldindw r1, 2"),
Ok(vec![insn(ebpf::LD_IND_W, 0, 1, 0, 2)])
);
}
// Example for InstructionType::LoadReg.
#[test]
fn test_ldxdw() {
assert_eq!(
asm("ldxdw r1, [r2+3]"),
Ok(vec![insn(ebpf::LD_DW_REG, 1, 2, 3, 0)])
);
}
// Example for InstructionType::StoreImm.
#[test]
fn test_sth() {
assert_eq!(
asm("sth [r1+2], 3"),
Ok(vec![insn(ebpf::ST_H_IMM, 1, 0, 2, 3)])
);
}
// Example for InstructionType::StoreReg.
#[test]
fn test_stxh() {
assert_eq!(
asm("stxh [r1+2], r3"),
Ok(vec![insn(ebpf::ST_H_REG, 1, 3, 2, 0)])
);
}
// Test all supported AluBinary mnemonics.
#[test]
fn test_alu_binary() {
assert_eq!(
asm("add r1, r2
sub r1, r2
mul r1, r2
div r1, r2
or r1, r2
and r1, r2
lsh r1, r2
rsh r1, r2
mod r1, r2
xor r1, r2
mov r1, r2
arsh r1, r2"),
Ok(vec![
insn(ebpf::ADD64_REG, 1, 2, 0, 0),
insn(ebpf::SUB64_REG, 1, 2, 0, 0),
insn(ebpf::MUL64_REG, 1, 2, 0, 0),
insn(ebpf::DIV64_REG, 1, 2, 0, 0),
insn(ebpf::OR64_REG, 1, 2, 0, 0),
insn(ebpf::AND64_REG, 1, 2, 0, 0),
insn(ebpf::LSH64_REG, 1, 2, 0, 0),
insn(ebpf::RSH64_REG, 1, 2, 0, 0),
insn(ebpf::MOD64_REG, 1, 2, 0, 0),
insn(ebpf::XOR64_REG, 1, 2, 0, 0),
insn(ebpf::MOV64_REG, 1, 2, 0, 0),
insn(ebpf::ARSH64_REG, 1, 2, 0, 0)
])
);
assert_eq!(
asm("add r1, 2
sub r1, 2
mul r1, 2
div r1, 2
or r1, 2
and r1, 2
lsh r1, 2
rsh r1, 2
mod r1, 2
xor r1, 2
mov r1, 2
arsh r1, 2"),
Ok(vec![
insn(ebpf::ADD64_IMM, 1, 0, 0, 2),
insn(ebpf::SUB64_IMM, 1, 0, 0, 2),
insn(ebpf::MUL64_IMM, 1, 0, 0, 2),
insn(ebpf::DIV64_IMM, 1, 0, 0, 2),
insn(ebpf::OR64_IMM, 1, 0, 0, 2),
insn(ebpf::AND64_IMM, 1, 0, 0, 2),
insn(ebpf::LSH64_IMM, 1, 0, 0, 2),
insn(ebpf::RSH64_IMM, 1, 0, 0, 2),
insn(ebpf::MOD64_IMM, 1, 0, 0, 2),
insn(ebpf::XOR64_IMM, 1, 0, 0, 2),
insn(ebpf::MOV64_IMM, 1, 0, 0, 2),
insn(ebpf::ARSH64_IMM, 1, 0, 0, 2)
])
);
assert_eq!(
asm("add64 r1, r2
sub64 r1, r2
mul64 r1, r2
div64 r1, r2
or64 r1, r2
and64 r1, r2
lsh64 r1, r2
rsh64 r1, r2
mod64 r1, r2
xor64 r1, r2
mov64 r1, r2
arsh64 r1, r2"),
Ok(vec![
insn(ebpf::ADD64_REG, 1, 2, 0, 0),
insn(ebpf::SUB64_REG, 1, 2, 0, 0),
insn(ebpf::MUL64_REG, 1, 2, 0, 0),
insn(ebpf::DIV64_REG, 1, 2, 0, 0),
insn(ebpf::OR64_REG, 1, 2, 0, 0),
insn(ebpf::AND64_REG, 1, 2, 0, 0),
insn(ebpf::LSH64_REG, 1, 2, 0, 0),
insn(ebpf::RSH64_REG, 1, 2, 0, 0),
insn(ebpf::MOD64_REG, 1, 2, 0, 0),
insn(ebpf::XOR64_REG, 1, 2, 0, 0),
insn(ebpf::MOV64_REG, 1, 2, 0, 0),
insn(ebpf::ARSH64_REG, 1, 2, 0, 0)
])
);
assert_eq!(
asm("add64 r1, 2
sub64 r1, 2
mul64 r1, 2
div64 r1, 2
or64 r1, 2
and64 r1, 2
lsh64 r1, 2
rsh64 r1, 2
mod64 r1, 2
xor64 r1, 2
mov64 r1, 2
arsh64 r1, 2"),
Ok(vec![
insn(ebpf::ADD64_IMM, 1, 0, 0, 2),
insn(ebpf::SUB64_IMM, 1, 0, 0, 2),
insn(ebpf::MUL64_IMM, 1, 0, 0, 2),
insn(ebpf::DIV64_IMM, 1, 0, 0, 2),
insn(ebpf::OR64_IMM, 1, 0, 0, 2),
insn(ebpf::AND64_IMM, 1, 0, 0, 2),
insn(ebpf::LSH64_IMM, 1, 0, 0, 2),
insn(ebpf::RSH64_IMM, 1, 0, 0, 2),
insn(ebpf::MOD64_IMM, 1, 0, 0, 2),
insn(ebpf::XOR64_IMM, 1, 0, 0, 2),
insn(ebpf::MOV64_IMM, 1, 0, 0, 2),
insn(ebpf::ARSH64_IMM, 1, 0, 0, 2)
])
);
assert_eq!(
asm("add32 r1, r2
sub32 r1, r2
mul32 r1, r2
div32 r1, r2
or32 r1, r2
and32 r1, r2
lsh32 r1, r2
rsh32 r1, r2
mod32 r1, r2
xor32 r1, r2
mov32 r1, r2
arsh32 r1, r2"),
Ok(vec![
insn(ebpf::ADD32_REG, 1, 2, 0, 0),
insn(ebpf::SUB32_REG, 1, 2, 0, 0),
insn(ebpf::MUL32_REG, 1, 2, 0, 0),
insn(ebpf::DIV32_REG, 1, 2, 0, 0),
insn(ebpf::OR32_REG, 1, 2, 0, 0),
insn(ebpf::AND32_REG, 1, 2, 0, 0),
insn(ebpf::LSH32_REG, 1, 2, 0, 0),
insn(ebpf::RSH32_REG, 1, 2, 0, 0),
insn(ebpf::MOD32_REG, 1, 2, 0, 0),
insn(ebpf::XOR32_REG, 1, 2, 0, 0),
insn(ebpf::MOV32_REG, 1, 2, 0, 0),
insn(ebpf::ARSH32_REG, 1, 2, 0, 0)
])
);
assert_eq!(
asm("add32 r1, 2
sub32 r1, 2
mul32 r1, 2
div32 r1, 2
or32 r1, 2
and32 r1, 2
lsh32 r1, 2
rsh32 r1, 2
mod32 r1, 2
xor32 r1, 2
mov32 r1, 2
arsh32 r1, 2"),
Ok(vec![
insn(ebpf::ADD32_IMM, 1, 0, 0, 2),
insn(ebpf::SUB32_IMM, 1, 0, 0, 2),
insn(ebpf::MUL32_IMM, 1, 0, 0, 2),
insn(ebpf::DIV32_IMM, 1, 0, 0, 2),
insn(ebpf::OR32_IMM, 1, 0, 0, 2),
insn(ebpf::AND32_IMM, 1, 0, 0, 2),
insn(ebpf::LSH32_IMM, 1, 0, 0, 2),
insn(ebpf::RSH32_IMM, 1, 0, 0, 2),
insn(ebpf::MOD32_IMM, 1, 0, 0, 2),
insn(ebpf::XOR32_IMM, 1, 0, 0, 2),
insn(ebpf::MOV32_IMM, 1, 0, 0, 2),
insn(ebpf::ARSH32_IMM, 1, 0, 0, 2)
])
);
}
// Test all supported AluUnary mnemonics.
#[test]
fn test_alu_unary() {
assert_eq!(
asm("neg r1
neg64 r1
neg32 r1"),
Ok(vec![
insn(ebpf::NEG64, 1, 0, 0, 0),
insn(ebpf::NEG64, 1, 0, 0, 0),
insn(ebpf::NEG32, 1, 0, 0, 0)
])
);
}
// Test all supported LoadAbs mnemonics.
#[test]
fn test_load_abs() {
assert_eq!(
asm("ldabsw 1
ldabsh 1
ldabsb 1
ldabsdw 1"),
Ok(vec![
insn(ebpf::LD_ABS_W, 0, 0, 0, 1),
insn(ebpf::LD_ABS_H, 0, 0, 0, 1),
insn(ebpf::LD_ABS_B, 0, 0, 0, 1),
insn(ebpf::LD_ABS_DW, 0, 0, 0, 1)
])
);
}
// Test all supported LoadInd mnemonics.
#[test]
fn test_load_ind() {
assert_eq!(
asm("ldindw r1, 2
ldindh r1, 2
ldindb r1, 2
ldinddw r1, 2"),
Ok(vec![
insn(ebpf::LD_IND_W, 0, 1, 0, 2),
insn(ebpf::LD_IND_H, 0, 1, 0, 2),
insn(ebpf::LD_IND_B, 0, 1, 0, 2),
insn(ebpf::LD_IND_DW, 0, 1, 0, 2)
])
);
}
// Test all supported LoadReg mnemonics.
#[test]
fn test_load_reg() {
assert_eq!(
asm("ldxw r1, [r2+3]
ldxh r1, [r2+3]
ldxb r1, [r2+3]
ldxdw r1, [r2+3]"),
Ok(vec![
insn(ebpf::LD_W_REG, 1, 2, 3, 0),
insn(ebpf::LD_H_REG, 1, 2, 3, 0),
insn(ebpf::LD_B_REG, 1, 2, 3, 0),
insn(ebpf::LD_DW_REG, 1, 2, 3, 0)
])
);
}
// Test all supported StoreImm mnemonics.
#[test]
fn test_store_imm() {
assert_eq!(
asm("stw [r1+2], 3
sth [r1+2], 3
stb [r1+2], 3
stdw [r1+2], 3"),
Ok(vec![
insn(ebpf::ST_W_IMM, 1, 0, 2, 3),
insn(ebpf::ST_H_IMM, 1, 0, 2, 3),
insn(ebpf::ST_B_IMM, 1, 0, 2, 3),
insn(ebpf::ST_DW_IMM, 1, 0, 2, 3)
])
);
}
// Test all supported StoreReg mnemonics.
#[test]
fn test_store_reg() {
assert_eq!(
asm("stxw [r1+2], r3
stxh [r1+2], r3
stxb [r1+2], r3
stxdw [r1+2], r3"),
Ok(vec![
insn(ebpf::ST_W_REG, 1, 3, 2, 0),
insn(ebpf::ST_H_REG, 1, 3, 2, 0),
insn(ebpf::ST_B_REG, 1, 3, 2, 0),
insn(ebpf::ST_DW_REG, 1, 3, 2, 0)
])
);
}
// Test all supported JumpConditional mnemonics.
#[test]
fn test_jump_conditional() {
assert_eq!(
asm("jeq r1, r2, +3
jgt r1, r2, +3
jge r1, r2, +3
jlt r1, r2, +3
jle r1, r2, +3
jset r1, r2, +3
jne r1, r2, +3
jsgt r1, r2, +3
jsge r1, r2, +3
jslt r1, r2, +3
jsle r1, r2, +3"),
Ok(vec![
insn(ebpf::JEQ_REG, 1, 2, 3, 0),
insn(ebpf::JGT_REG, 1, 2, 3, 0),
insn(ebpf::JGE_REG, 1, 2, 3, 0),
insn(ebpf::JLT_REG, 1, 2, 3, 0),
insn(ebpf::JLE_REG, 1, 2, 3, 0),
insn(ebpf::JSET_REG, 1, 2, 3, 0),
insn(ebpf::JNE_REG, 1, 2, 3, 0),
insn(ebpf::JSGT_REG, 1, 2, 3, 0),
insn(ebpf::JSGE_REG, 1, 2, 3, 0),
insn(ebpf::JSLT_REG, 1, 2, 3, 0),
insn(ebpf::JSLE_REG, 1, 2, 3, 0)
])
);
assert_eq!(
asm("jeq r1, 2, +3
jgt r1, 2, +3
jge r1, 2, +3
jlt r1, 2, +3
jle r1, 2, +3
jset r1, 2, +3
jne r1, 2, +3
jsgt r1, 2, +3
jsge r1, 2, +3
jslt r1, 2, +3
jsle r1, 2, +3"),
Ok(vec![
insn(ebpf::JEQ_IMM, 1, 0, 3, 2),
insn(ebpf::JGT_IMM, 1, 0, 3, 2),
insn(ebpf::JGE_IMM, 1, 0, 3, 2),
insn(ebpf::JLT_IMM, 1, 0, 3, 2),
insn(ebpf::JLE_IMM, 1, 0, 3, 2),
insn(ebpf::JSET_IMM, 1, 0, 3, 2),
insn(ebpf::JNE_IMM, 1, 0, 3, 2),
insn(ebpf::JSGT_IMM, 1, 0, 3, 2),
insn(ebpf::JSGE_IMM, 1, 0, 3, 2),
insn(ebpf::JSLT_IMM, 1, 0, 3, 2),
insn(ebpf::JSLE_IMM, 1, 0, 3, 2)
])
);
assert_eq!(
asm("jeq32 r1, r2, +3
jgt32 r1, r2, +3
jge32 r1, r2, +3
jlt32 r1, r2, +3
jle32 r1, r2, +3
jset32 r1, r2, +3
jne32 r1, r2, +3
jsgt32 r1, r2, +3
jsge32 r1, r2, +3
jslt32 r1, r2, +3
jsle32 r1, r2, +3"),
Ok(vec![
insn(ebpf::JEQ_REG32, 1, 2, 3, 0),
insn(ebpf::JGT_REG32, 1, 2, 3, 0),
insn(ebpf::JGE_REG32, 1, 2, 3, 0),
insn(ebpf::JLT_REG32, 1, 2, 3, 0),
insn(ebpf::JLE_REG32, 1, 2, 3, 0),
insn(ebpf::JSET_REG32, 1, 2, 3, 0),
insn(ebpf::JNE_REG32, 1, 2, 3, 0),
insn(ebpf::JSGT_REG32, 1, 2, 3, 0),
insn(ebpf::JSGE_REG32, 1, 2, 3, 0),
insn(ebpf::JSLT_REG32, 1, 2, 3, 0),
insn(ebpf::JSLE_REG32, 1, 2, 3, 0)
])
);
assert_eq!(
asm("jeq32 r1, 2, +3
jgt32 r1, 2, +3
jge32 r1, 2, +3
jlt32 r1, 2, +3
jle32 r1, 2, +3
jset32 r1, 2, +3
jne32 r1, 2, +3
jsgt32 r1, 2, +3
jsge32 r1, 2, +3
jslt32 r1, 2, +3
jsle32 r1, 2, +3"),
Ok(vec![
insn(ebpf::JEQ_IMM32, 1, 0, 3, 2),
insn(ebpf::JGT_IMM32, 1, 0, 3, 2),
insn(ebpf::JGE_IMM32, 1, 0, 3, 2),
insn(ebpf::JLT_IMM32, 1, 0, 3, 2),
insn(ebpf::JLE_IMM32, 1, 0, 3, 2),
insn(ebpf::JSET_IMM32, 1, 0, 3, 2),
insn(ebpf::JNE_IMM32, 1, 0, 3, 2),
insn(ebpf::JSGT_IMM32, 1, 0, 3, 2),
insn(ebpf::JSGE_IMM32, 1, 0, 3, 2),
insn(ebpf::JSLT_IMM32, 1, 0, 3, 2),
insn(ebpf::JSLE_IMM32, 1, 0, 3, 2)
])
);
}
// Test all supported Endian mnemonics.
#[test]
fn test_endian() {
assert_eq!(
asm("be16 r1
be32 r1
be64 r1
le16 r1
le32 r1
le64 r1"),
Ok(vec![
insn(ebpf::BE, 1, 0, 0, 16),
insn(ebpf::BE, 1, 0, 0, 32),
insn(ebpf::BE, 1, 0, 0, 64),
insn(ebpf::LE, 1, 0, 0, 16),
insn(ebpf::LE, 1, 0, 0, 32),
insn(ebpf::LE, 1, 0, 0, 64)
])
);
}
#[test]
fn test_large_immediate() {
assert_eq!(
asm("add64 r1, 2147483647"),
Ok(vec![insn(ebpf::ADD64_IMM, 1, 0, 0, 2147483647)])
);
assert_eq!(
asm("add64 r1, -2147483648"),
Ok(vec![insn(ebpf::ADD64_IMM, 1, 0, 0, -2147483648)])
);
}
#[test]
fn test_tcp_sack() {
assert_eq!(assemble(TCP_SACK_ASM), Ok(TCP_SACK_BIN.to_vec()));
}
#[test]
fn test_error_invalid_instruction() {
assert_eq!(asm("abcd"), Err("Invalid instruction \"abcd\"".to_string()));
}
#[test]
fn test_error_unexpected_operands() {
assert_eq!(
asm("add 1, 2"),
Err("Failed to encode add: Unexpected operands: [Integer(1), Integer(2)]".to_string())
);
}
#[test]
fn test_error_too_many_operands() {
assert_eq!(
asm("add 1, 2, 3, 4"),
Err("Failed to encode add: Too many operands".to_string())
);
}
#[test]
fn test_error_operands_out_of_range() {
assert_eq!(
asm("add r16, r2"),
Err("Failed to encode add: Invalid destination register 16".to_string())
);
assert_eq!(
asm("add r1, r16"),
Err("Failed to encode add: Invalid source register 16".to_string())
);
assert_eq!(
asm("ja -32769"),
Err("Failed to encode ja: Invalid offset -32769".to_string())
);
assert_eq!(
asm("ja 32768"),
Err("Failed to encode ja: Invalid offset 32768".to_string())
);
assert_eq!(
asm("add r1, 4294967296"),
Err("Failed to encode add: Invalid immediate 4294967296".to_string())
);
assert_eq!(
asm("add r1, 2147483648"),
Err("Failed to encode add: Invalid immediate 2147483648".to_string())
);
assert_eq!(
asm("add r1, -2147483649"),
Err("Failed to encode add: Invalid immediate -2147483649".to_string())
);
}

View File

@ -0,0 +1,97 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Converted from the tests for uBPF <https://github.com/iovisor/ubpf>
// Copyright 2015 Big Switch Networks, Inc
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// Assembly code and data for tcp_sack testcases.
#[allow(dead_code)]
pub const TCP_SACK_ASM: &str = "
ldxb r2, [r1+12]
ldxb r3, [r1+13]
lsh r3, 0x8
or r3, r2
mov r0, 0x0
jne r3, 0x8, +37
ldxb r2, [r1+23]
jne r2, 0x6, +35
ldxb r2, [r1+14]
add r1, 0xe
and r2, 0xf
lsh r2, 0x2
add r1, r2
mov r0, 0x0
ldxh r4, [r1+12]
add r1, 0x14
rsh r4, 0x2
and r4, 0x3c
mov r2, r4
add r2, -20
mov r5, 0x15
mov r3, 0x0
jgt r5, r4, +20
mov r5, r3
lsh r5, 0x20
arsh r5, 0x20
mov r4, r1
add r4, r5
ldxb r5, [r4]
jeq r5, 0x1, +4
jeq r5, 0x0, +12
mov r6, r3
jeq r5, 0x5, +9
ja +2
add r3, 0x1
mov r6, r3
ldxb r3, [r4+1]
add r3, r6
lsh r3, 0x20
arsh r3, 0x20
jsgt r2, r3, -18
ja +1
mov r0, 0x1
exit";
#[allow(dead_code)]
pub const TCP_SACK_BIN: [u8; 352] = [
0x71, 0x12, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x71, 0x13, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00,
0x67, 0x03, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x03, 0x25, 0x00, 0x08, 0x00, 0x00, 0x00,
0x71, 0x12, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x02, 0x23, 0x00, 0x06, 0x00, 0x00, 0x00,
0x71, 0x12, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x01, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x57, 0x02, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x67, 0x02, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0f, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x69, 0x14, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x01, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x77, 0x04, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x57, 0x04, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
0xbf, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x02, 0x00, 0x00, 0xec, 0xff, 0xff, 0xff,
0xb7, 0x05, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2d, 0x45, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x67, 0x05, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0xbf, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x71, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x05, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00,
0x15, 0x05, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x05, 0x09, 0x00, 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x07, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xbf, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x71, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x67, 0x03, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xc7, 0x03, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x6d, 0x32, 0xee, 0xff, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
#[allow(dead_code)]
pub const TCP_SACK_MATCH: [u8; 78] = [
0x00, 0x26, 0x62, 0x2f, 0x47, 0x87, 0x00, 0x1d, 0x60, 0xb3, 0x01, 0x84, 0x08, 0x00, 0x45, 0x00,
0x00, 0x40, 0xa8, 0xde, 0x40, 0x00, 0x40, 0x06, 0x9d, 0x58, 0xc0, 0xa8, 0x01, 0x03, 0x3f, 0x74,
0xf3, 0x61, 0xe5, 0xc0, 0x00, 0x50, 0xe5, 0x94, 0x3f, 0x77, 0xa3, 0xc4, 0xc4, 0x80, 0xb0, 0x10,
0x01, 0x3e, 0x34, 0xb6, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x00, 0x17, 0x95, 0x6f, 0x8d, 0x9d,
0x9e, 0x27, 0x01, 0x01, 0x05, 0x0a, 0xa3, 0xc4, 0xca, 0x28, 0xa3, 0xc4, 0xcf, 0xd0,
];
#[allow(dead_code)]
pub const TCP_SACK_NOMATCH: [u8; 66] = [
0x00, 0x26, 0x62, 0x2f, 0x47, 0x87, 0x00, 0x1d, 0x60, 0xb3, 0x01, 0x84, 0x08, 0x00, 0x45, 0x00,
0x00, 0x40, 0xa8, 0xde, 0x40, 0x00, 0x40, 0x06, 0x9d, 0x58, 0xc0, 0xa8, 0x01, 0x03, 0x3f, 0x74,
0xf3, 0x61, 0xe5, 0xc0, 0x00, 0x50, 0xe5, 0x94, 0x3f, 0x77, 0xa3, 0xc4, 0xc4, 0x80, 0x80, 0x10,
0x01, 0x3e, 0x34, 0xb6, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x00, 0x17, 0x95, 0x6f, 0x8d, 0x9d,
0x9e, 0x27,
];

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,377 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 Jan-Erik Rediger <badboy@archlinux.us>
//
// Adopted from tests in `tests/assembler.rs`
extern crate rbpf;
mod common;
use rbpf::{assembler::assemble, disassembler::to_insn_vec};
// Using a macro to keep actual line numbers in failure output
macro_rules! disasm {
($src:expr) => {{
let src = $src;
let asm = assemble(src).expect("Can't assemble from string");
let insn = to_insn_vec(&asm);
let reasm = insn
.into_iter()
.map(|ins| ins.desc)
.collect::<Vec<_>>()
.join("\n");
assert_eq!(src, reasm);
}};
}
#[test]
fn test_empty() {
disasm!("");
}
// Example for InstructionType::NoOperand.
#[test]
fn test_exit() {
disasm!("exit");
}
// Example for InstructionType::AluBinary.
#[test]
fn test_add64() {
disasm!("add64 r1, r3");
disasm!("add64 r1, 0x5");
}
// Example for InstructionType::AluUnary.
#[test]
fn test_neg64() {
disasm!("neg64 r1");
}
// Example for InstructionType::LoadReg.
#[test]
fn test_ldxw() {
disasm!("ldxw r1, [r2+0x5]");
}
// Example for InstructionType::StoreImm.
#[test]
fn test_stw() {
disasm!("stw [r2+0x5], 0x7");
}
// Example for InstructionType::StoreReg.
#[test]
fn test_stxw() {
disasm!("stxw [r2+0x5], r8");
}
// Example for InstructionType::JumpUnconditional.
#[test]
fn test_ja() {
disasm!("ja +0x8");
}
// Example for InstructionType::JumpConditional.
#[test]
fn test_jeq() {
disasm!("jeq r1, 0x4, +0x8");
disasm!("jeq r1, r3, +0x8");
}
// Example for InstructionType::Call.
#[test]
fn test_call() {
disasm!("call 0x3");
}
// Example for InstructionType::Endian.
#[test]
fn test_be32() {
disasm!("be32 r1");
}
// Example for InstructionType::LoadImm.
#[test]
fn test_lddw() {
disasm!("lddw r1, 0x1234abcd5678eeff");
disasm!("lddw r1, 0xff11ee22dd33cc44");
}
// Example for InstructionType::LoadAbs.
#[test]
fn test_ldabsw() {
disasm!("ldabsw 0x1");
}
// Example for InstructionType::LoadInd.
#[test]
fn test_ldindw() {
disasm!("ldindw r1, 0x2");
}
// Example for InstructionType::LoadReg.
#[test]
fn test_ldxdw() {
disasm!("ldxdw r1, [r2+0x3]");
}
// Example for InstructionType::StoreImm.
#[test]
fn test_sth() {
disasm!("sth [r1+0x2], 0x3");
}
// Example for InstructionType::StoreReg.
#[test]
fn test_stxh() {
disasm!("stxh [r1+0x2], r3");
}
// Test all supported AluBinary mnemonics.
#[test]
fn test_alu_binary() {
disasm!(
"add64 r1, r2
sub64 r1, r2
mul64 r1, r2
div64 r1, r2
or64 r1, r2
and64 r1, r2
lsh64 r1, r2
rsh64 r1, r2
mod64 r1, r2
xor64 r1, r2
mov64 r1, r2
arsh64 r1, r2"
);
disasm!(
"add64 r1, 0x2
sub64 r1, 0x2
mul64 r1, 0x2
div64 r1, 0x2
or64 r1, 0x2
and64 r1, 0x2
lsh64 r1, 0x2
rsh64 r1, 0x2
mod64 r1, 0x2
xor64 r1, 0x2
mov64 r1, 0x2
arsh64 r1, 0x2"
);
disasm!(
"add32 r1, r2
sub32 r1, r2
mul32 r1, r2
div32 r1, r2
or32 r1, r2
and32 r1, r2
lsh32 r1, r2
rsh32 r1, r2
mod32 r1, r2
xor32 r1, r2
mov32 r1, r2
arsh32 r1, r2"
);
disasm!(
"add32 r1, 0x2
sub32 r1, 0x2
mul32 r1, 0x2
div32 r1, 0x2
or32 r1, 0x2
and32 r1, 0x2
lsh32 r1, 0x2
rsh32 r1, 0x2
mod32 r1, 0x2
xor32 r1, 0x2
mov32 r1, 0x2
arsh32 r1, 0x2"
);
}
// Test all supported AluUnary mnemonics.
#[test]
fn test_alu_unary() {
disasm!(
"neg64 r1
neg32 r1"
);
}
// Test all supported LoadAbs mnemonics.
#[test]
fn test_load_abs() {
disasm!(
"ldabsw 0x1
ldabsh 0x1
ldabsb 0x1
ldabsdw 0x1"
);
}
// Test all supported LoadInd mnemonics.
#[test]
fn test_load_ind() {
disasm!(
"ldindw r1, 0x2
ldindh r1, 0x2
ldindb r1, 0x2
ldinddw r1, 0x2"
);
}
// Test all supported LoadReg mnemonics.
#[test]
fn test_load_reg() {
disasm!(
r"ldxw r1, [r2+0x3]
ldxh r1, [r2+0x3]
ldxb r1, [r2+0x3]
ldxdw r1, [r2+0x3]"
);
}
// Test all supported StoreImm mnemonics.
#[test]
fn test_store_imm() {
disasm!(
"stw [r1+0x2], 0x3
sth [r1+0x2], 0x3
stb [r1+0x2], 0x3
stdw [r1+0x2], 0x3"
);
}
// Test all supported StoreReg mnemonics.
#[test]
fn test_store_reg() {
disasm!(
"stxw [r1+0x2], r3
stxh [r1+0x2], r3
stxb [r1+0x2], r3
stxdw [r1+0x2], r3"
);
}
// Test all supported JumpConditional mnemonics.
#[test]
fn test_jump_conditional() {
disasm!(
"jeq r1, r2, +0x3
jgt r1, r2, +0x3
jge r1, r2, +0x3
jlt r1, r2, +0x3
jle r1, r2, +0x3
jset r1, r2, +0x3
jne r1, r2, +0x3
jsgt r1, r2, +0x3
jsge r1, r2, -0x3
jslt r1, r2, +0x3
jsle r1, r2, -0x3"
);
disasm!(
"jeq r1, 0x2, +0x3
jgt r1, 0x2, +0x3
jge r1, 0x2, +0x3
jlt r1, 0x2, +0x3
jle r1, 0x2, +0x3
jset r1, 0x2, +0x3
jne r1, 0x2, +0x3
jsgt r1, 0x2, +0x3
jsge r1, 0x2, -0x3
jslt r1, 0x2, +0x3
jsle r1, 0x2, -0x3"
);
disasm!(
"jeq32 r1, r2, +0x3
jgt32 r1, r2, +0x3
jge32 r1, r2, +0x3
jlt32 r1, r2, +0x3
jle32 r1, r2, +0x3
jset32 r1, r2, +0x3
jne32 r1, r2, +0x3
jsgt32 r1, r2, +0x3
jsge32 r1, r2, -0x3
jslt32 r1, r2, +0x3
jsle32 r1, r2, -0x3"
);
disasm!(
"jeq32 r1, 0x2, +0x3
jgt32 r1, 0x2, +0x3
jge32 r1, 0x2, +0x3
jlt32 r1, 0x2, +0x3
jle32 r1, 0x2, +0x3
jset32 r1, 0x2, +0x3
jne32 r1, 0x2, +0x3
jsgt32 r1, 0x2, +0x3
jsge32 r1, 0x2, -0x3
jslt32 r1, 0x2, +0x3
jsle32 r1, 0x2, -0x3"
);
}
// Test all supported Endian mnemonics.
#[test]
fn test_endian() {
disasm!(
"be16 r1
be32 r1
be64 r1
le16 r1
le32 r1
le64 r1"
);
}
#[test]
fn test_large_immediate() {
disasm!("add64 r1, 0x7fffffff");
disasm!("add64 r1, 0x7fffffff");
}
// Non-regression tests for overflow when trying to negate offset 0x8000i16.
#[test]
fn test_offset_overflow() {
let insns = [
0x62, 0x01, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, // stw
0x6a, 0x01, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, // sth
0x72, 0x01, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, // stb
0x7a, 0x01, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, // stdw
0x61, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // ldxw
0x69, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // ldxh
0x71, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // ldxb
0x79, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // ldxdw
0x15, 0x01, 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, // jeq (imm)
0x1d, 0x21, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // jeq (reg)
0x16, 0x01, 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, // jeq32 (imm)
0x1e, 0x21, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, // jeq32 (reg)
];
let expected_output = "stw [r1-0x8000], 0x1
sth [r1-0x8000], 0x1
stb [r1-0x8000], 0x1
stdw [r1-0x8000], 0x1
ldxw r1, [r0-0x8000]
ldxh r1, [r0-0x8000]
ldxb r1, [r0-0x8000]
ldxdw r1, [r0-0x8000]
jeq r1, 0x2, -0x8000
jeq r1, r2, -0x8000
jeq32 r1, 0x2, -0x8000
jeq32 r1, r2, -0x8000";
let prog = to_insn_vec(&insns);
let asm = prog
.into_iter()
.map(|ins| ins.desc)
.collect::<Vec<_>>()
.join("\n");
assert_eq!(asm, expected_output);
}

View File

@ -0,0 +1,571 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// There are unused mut warnings due to unsafe code.
#![allow(unused_mut)]
#![allow(clippy::unreadable_literal)]
// This crate would be needed to load bytecode from a BPF-compiled object file. Since the crate
// is not used anywhere else in the library, it is deactivated: we do not want to load and compile
// it just for the tests. If you want to use it, do not forget to add the following
// dependency to your Cargo.toml file:
//
// ---
// elf = "0.0.10"
// ---
//
// extern crate elf;
// use std::path::PathBuf;
extern crate rbpf;
#[cfg(feature = "std")]
use rbpf::helpers;
use rbpf::{assembler::assemble, Error, ErrorKind};
// The following two examples have been compiled from C with the following command:
//
// ```bash
// clang -O2 -emit-llvm -c <file.c> -o - | llc -march=bpf -filetype=obj -o <file.o>
// ```
//
// The C source code was the following:
//
// ```c
// #include <linux/ip.h>
// #include <linux/in.h>
// #include <linux/tcp.h>
// #include <linux/bpf.h>
//
// #define ETH_ALEN 6
// #define ETH_P_IP 0x0008 /* htons(0x0800) */
// #define TCP_HDR_LEN 20
//
// #define BLOCKED_TCP_PORT 0x9999
//
// struct eth_hdr {
// unsigned char h_dest[ETH_ALEN];
// unsigned char h_source[ETH_ALEN];
// unsigned short h_proto;
// };
//
// #define SEC(NAME) __attribute__((section(NAME), used))
// SEC(".classifier")
// int handle_ingress(struct __sk_buff *skb)
// {
// void *data = (void *)(long)skb->data;
// void *data_end = (void *)(long)skb->data_end;
// struct eth_hdr *eth = data;
// struct iphdr *iph = data + sizeof(*eth);
// struct tcphdr *tcp = data + sizeof(*eth) + sizeof(*iph);
//
// /* single length check */
// if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*tcp) > data_end)
// return 0;
// if (eth->h_proto != ETH_P_IP)
// return 0;
// if (iph->protocol != IPPROTO_TCP)
// return 0;
// if (tcp->source == BLOCKED_TCP_PORT || tcp->dest == BLOCKED_TCP_PORT)
// return -1;
// return 0;
// }
// char _license[] SEC(".license") = "GPL";
// ```
//
// This program, once compiled, can be injected into Linux kernel, with tc for instance. Sadly, we
// need to bring some modifications to the generated bytecode in order to run it: the three
// instructions with opcode 0x61 load data from a packet area as 4-byte words, where we need to
// load it as 8-bytes double words (0x79). The kernel does the same kind of translation before
// running the program, but rbpf does not implement this.
//
// In addition, the offset at which the pointer to the packet data is stored must be changed: since
// we use 8 bytes instead of 4 for the start and end addresses of the data packet, we cannot use
// the offsets produced by clang (0x4c and 0x50), the addresses would overlap. Instead we can use,
// for example, 0x40 and 0x50. See comments on the bytecode below to see the modifications.
//
// Once the bytecode has been (manually, in our case) edited, we can load the bytecode directly
// from the ELF object file. This is easy to do, but requires the addition of two crates in the
// Cargo.toml file (see comments above), so here we use just the hardcoded bytecode instructions
// instead.
#[test]
#[cfg(feature = "std")]
fn test_vm_block_port() {
// To load the bytecode from an object file instead of using the hardcoded instructions,
// use the additional crates commented at the beginning of this file (and also add them to your
// Cargo.toml). See comments above.
//
// ---
// let filename = "my_ebpf_object_file.o";
//
// let path = PathBuf::from(filename);
// let file = match elf::File::open_path(&path) {
// Ok(f) => f,
// Err(e) => panic!("Error: {:?}", e),
// };
//
// let text_scn = match file.get_section(".classifier") {
// Some(s) => s,
// None => panic!("Failed to look up .classifier section"),
// };
//
// let prog = &text_scn.data;
// ---
let prog = &[
0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x12, 0x50, 0x00, 0x00, 0x00, 0x00,
0x00, // 0x79 instead of 0x61
0x79, 0x11, 0x40, 0x00, 0x00, 0x00, 0x00,
0x00, // 0x79 instead of 0x61, 0x40 i.o. 0x4c
0xbf, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x03, 0x00, 0x00, 0x36, 0x00, 0x00,
0x00, 0x2d, 0x23, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x12, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x55, 0x02, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x71, 0x12, 0x17, 0x00, 0x00,
0x00, 0x00, 0x00, 0x55, 0x02, 0x0e, 0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x11, 0x22,
0x00, 0x00, 0x00, 0x00, 0x00, // 0x79 instead of 0x61
0xbf, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0x02, 0x00, 0x00, 0xff, 0xff, 0x00,
0x00, 0x15, 0x02, 0x08, 0x00, 0x99, 0x99, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x18, 0x02, 0x00, 0x00,
0x00, 0x00, 0x99, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x21, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let packet = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x99, 0x99, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let mut vm = rbpf::EbpfVmFixedMbuff::new(Some(prog), 0x40, 0x50).unwrap();
vm.register_helper(helpers::BPF_TRACE_PRINTK_IDX, helpers::bpf_trace_printf)
.unwrap();
let res = vm.execute_program(packet).unwrap();
println!("Program returned: {res:?} ({res:#x})");
assert_eq!(res, 0xffffffff);
}
#[test]
#[cfg(all(not(windows), feature = "std"))]
fn test_jit_block_port() {
// To load the bytecode from an object file instead of using the hardcoded instructions,
// use the additional crates commented at the beginning of this file (and also add them to your
// Cargo.toml). See comments above.
//
// ---
// let filename = "my_ebpf_object_file.o";
//
// let path = PathBuf::from(filename);
// let file = match elf::File::open_path(&path) {
// Ok(f) => f,
// Err(e) => panic!("Error: {:?}", e),
// };
//
// let text_scn = match file.get_section(".classifier") {
// Some(s) => s,
// None => panic!("Failed to look up .classifier section"),
// };
//
// let prog = &text_scn.data;
// ---
let prog = &[
0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x12, 0x50, 0x00, 0x00, 0x00, 0x00,
0x00, // 0x79 instead of 0x61
0x79, 0x11, 0x40, 0x00, 0x00, 0x00, 0x00,
0x00, // 0x79 instead of 0x61, 0x40 i.o. 0x4c
0xbf, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x03, 0x00, 0x00, 0x36, 0x00, 0x00,
0x00, 0x2d, 0x23, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x12, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x55, 0x02, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x71, 0x12, 0x17, 0x00, 0x00,
0x00, 0x00, 0x00, 0x55, 0x02, 0x0e, 0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x11, 0x22,
0x00, 0x00, 0x00, 0x00, 0x00, // 0x79 instead of 0x61
0xbf, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0x02, 0x00, 0x00, 0xff, 0xff, 0x00,
0x00, 0x15, 0x02, 0x08, 0x00, 0x99, 0x99, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x18, 0x02, 0x00, 0x00,
0x00, 0x00, 0x99, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x21, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let packet = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x99, 0x99, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let mut vm = rbpf::EbpfVmFixedMbuff::new(Some(prog), 0x40, 0x50).unwrap();
vm.register_helper(helpers::BPF_TRACE_PRINTK_IDX, helpers::bpf_trace_printf)
.unwrap();
vm.jit_compile().unwrap();
unsafe {
let res = vm.execute_program_jit(packet).unwrap();
println!("Program returned: {res:?} ({res:#x})");
assert_eq!(res, 0xffffffff);
}
}
// Program and memory come from uBPF test ldxh.
#[test]
fn test_vm_mbuff() {
let prog = &[
// Load mem from mbuff into R1
0x79, 0x11, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, // ldhx r1[2], r0
0x69, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &[0xaa, 0xbb, 0x11, 0x22, 0xcc, 0xdd];
let mbuff = [0u8; 32];
unsafe {
let mut data = mbuff.as_ptr().offset(8) as *mut u64;
let mut data_end = mbuff.as_ptr().offset(24) as *mut u64;
data.write_unaligned(mem.as_ptr() as u64);
data_end.write_unaligned(mem.as_ptr() as u64 + mem.len() as u64);
}
let vm = rbpf::EbpfVmMbuff::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem, &mbuff).unwrap(), 0x2211);
}
// Program and memory come from uBPF test ldxh.
#[test]
fn test_vm_mbuff_with_rust_api() {
use rbpf::insn_builder::*;
let mut program = BpfCode::new();
program
.load_x(MemSize::DoubleWord)
.set_dst(0x01)
.set_src(0x01)
.set_off(0x00_08)
.push()
.load_x(MemSize::HalfWord)
.set_dst(0x00)
.set_src(0x01)
.set_off(0x00_02)
.push()
.exit()
.push();
let mem = &[0xaa, 0xbb, 0x11, 0x22, 0xcc, 0xdd];
let mbuff = [0u8; 32];
unsafe {
let mut data = mbuff.as_ptr().offset(8) as *mut u64;
let mut data_end = mbuff.as_ptr().offset(24) as *mut u64;
*data = mem.as_ptr() as u64;
*data_end = mem.as_ptr() as u64 + mem.len() as u64;
}
let vm = rbpf::EbpfVmMbuff::new(Some(program.into_bytes())).unwrap();
assert_eq!(vm.execute_program(mem, &mbuff).unwrap(), 0x2211);
}
// Program and memory come from uBPF test ldxh.
#[test]
#[cfg(all(not(windows), feature = "std"))]
fn test_jit_mbuff() {
let prog = &[
// Load mem from mbuff into R1
0x79, 0x11, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, // ldhx r1[2], r0
0x69, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [0xaa, 0xbb, 0x11, 0x22, 0xcc, 0xdd];
let mut mbuff = [0u8; 32];
unsafe {
let mut data = mbuff.as_ptr().offset(8) as *mut u64;
let mut data_end = mbuff.as_ptr().offset(24) as *mut u64;
*data = mem.as_ptr() as u64;
*data_end = mem.as_ptr() as u64 + mem.len() as u64;
}
unsafe {
let mut vm = rbpf::EbpfVmMbuff::new(Some(prog)).unwrap();
vm.jit_compile().unwrap();
assert_eq!(vm.execute_program_jit(mem, &mut mbuff).unwrap(), 0x2211);
}
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldabsb() {
let prog = &[
0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x33);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x33);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldabsh() {
let prog = &[
0x28, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x4433);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x4433);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldabsw() {
let prog = &[
0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x66554433);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x66554433);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldabsdw() {
let prog = &[
0x38, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0xaa99887766554433);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0xaa99887766554433);
};
}
#[test]
#[should_panic(expected = "Error: out of bounds memory load (insn #1),")]
fn test_vm_err_ldabsb_oob() {
let prog = &[
0x38, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
vm.execute_program(mem).unwrap();
// Memory check not implemented for JIT yet.
}
#[test]
#[should_panic(expected = "Error: out of bounds memory load (insn #1),")]
fn test_vm_err_ldabsb_nomem() {
let prog = &[
0x38, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
vm.execute_program().unwrap();
// Memory check not implemented for JIT yet.
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldindb() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x50, 0x10, 0x00, 0x00, 0x03, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x88);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x88);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldindh() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x48, 0x10, 0x00, 0x00, 0x03, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x9988);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x9988);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldindw() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x40, 0x10, 0x00, 0x00, 0x01, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0x88776655);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0x88776655);
};
}
#[cfg(all(not(windows), feature = "std"))]
#[test]
fn test_vm_jit_ldinddw() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x58, 0x10, 0x00, 0x00, 0x03, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let mut vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
assert_eq!(vm.execute_program(mem).unwrap(), 0xccbbaa9988776655);
vm.jit_compile().unwrap();
unsafe {
assert_eq!(vm.execute_program_jit(mem).unwrap(), 0xccbbaa9988776655);
};
}
#[test]
#[should_panic(expected = "Error: out of bounds memory load (insn #2),")]
fn test_vm_err_ldindb_oob() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x38, 0x10, 0x00, 0x00, 0x33, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let mem = &mut [
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee,
0xff,
];
let vm = rbpf::EbpfVmRaw::new(Some(prog)).unwrap();
vm.execute_program(mem).unwrap();
// Memory check not implemented for JIT yet.
}
#[test]
#[should_panic(expected = "Error: out of bounds memory load (insn #2),")]
fn test_vm_err_ldindb_nomem() {
let prog = &[
0xb7, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x38, 0x10, 0x00, 0x00, 0x03, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
vm.execute_program().unwrap();
// Memory check not implemented for JIT yet.
}
#[test]
#[should_panic(expected = "Error: No program set, call prog_set() to load one")]
fn test_vm_exec_no_program() {
let vm = rbpf::EbpfVmNoData::new(None).unwrap();
assert_eq!(vm.execute_program().unwrap(), 0xBEE);
}
fn verifier_success(_prog: &[u8]) -> Result<(), Error> {
Ok(())
}
fn verifier_fail(_prog: &[u8]) -> Result<(), Error> {
Err(Error::new(ErrorKind::Other, "Gaggablaghblagh!"))
}
#[test]
fn test_verifier_success() {
let prog = assemble(
"mov32 r0, 0xBEE
exit",
)
.unwrap();
let mut vm = rbpf::EbpfVmNoData::new(None).unwrap();
vm.set_verifier(verifier_success).unwrap();
vm.set_program(&prog).unwrap();
assert_eq!(vm.execute_program().unwrap(), 0xBEE);
}
#[test]
#[should_panic(expected = "Gaggablaghblagh!")]
fn test_verifier_fail() {
let prog = assemble(
"mov32 r0, 0xBEE
exit",
)
.unwrap();
let mut vm = rbpf::EbpfVmNoData::new(None).unwrap();
vm.set_verifier(verifier_fail).unwrap();
vm.set_program(&prog).unwrap();
assert_eq!(vm.execute_program().unwrap(), 0xBEE);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,177 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Converted from the tests for uBPF <https://github.com/iovisor/ubpf>
// Copyright 2015 Big Switch Networks, Inc
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// The tests contained in this file are extracted from the unit tests of uBPF software. Each test
// in this file has a name in the form `test_verifier_<name>`, and corresponds to the
// (human-readable) code in `ubpf/tree/master/tests/<name>`, available at
// <https://github.com/iovisor/ubpf/tree/master/tests> (hyphen had to be replaced with underscores
// as Rust will not accept them in function names). It is strongly advised to refer to the uBPF
// version to understand what these program do.
//
// Each program was assembled from the uBPF version with the assembler provided by uBPF itself, and
// available at <https://github.com/iovisor/ubpf/tree/master/ubpf>.
// The very few modifications that have been realized should be indicated.
// These are unit tests for the eBPF “verifier”.
extern crate rbpf;
use rbpf::{assembler::assemble, ebpf};
#[test]
#[should_panic(expected = "[Verifier] Error: unsupported argument for LE/BE (insn #0)")]
fn test_verifier_err_endian_size() {
let prog = &[
0xdc, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xb7, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: incomplete LD_DW instruction (insn #0)")]
fn test_verifier_err_incomplete_lddw() {
// Note: ubpf has test-err-incomplete-lddw2, which is the same
let prog = &[
0x18, 0x00, 0x00, 0x00, 0x88, 0x77, 0x66, 0x55, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: infinite loop")]
fn test_verifier_err_infinite_loop() {
let prog = assemble(
"
ja -1
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: invalid destination register (insn #0)")]
fn test_verifier_err_invalid_reg_dst() {
let prog = assemble(
"
mov r11, 1
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: invalid source register (insn #0)")]
fn test_verifier_err_invalid_reg_src() {
let prog = assemble(
"
mov r0, r11
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: jump to middle of LD_DW at #2 (insn #0)")]
fn test_verifier_err_jmp_lddw() {
let prog = assemble(
"
ja +1
lddw r0, 0x1122334455667788
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: jump out of code to #3 (insn #0)")]
fn test_verifier_err_jmp_out() {
let prog = assemble(
"
ja +2
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: program does not end with “EXIT” instruction")]
fn test_verifier_err_no_exit() {
let prog = assemble(
"
mov32 r0, 0",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
fn test_verifier_err_no_exit_backward_jump() {
let prog = assemble(
"
ja +1
exit
ja -2",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: eBPF program length limited to 1000000, here 1000001")]
fn test_verifier_err_too_many_instructions() {
// uBPF uses 65637 instructions, because it sets its limit at 65636.
// We use the classic 4096 limit from kernel, so no need to produce as many instructions.
let mut prog = (0..(1_000_000 * ebpf::INSN_SIZE))
.map(|x| match x % 8 {
0 => 0xb7,
1 => 0x01,
_ => 0,
})
.collect::<Vec<u8>>();
prog.append(&mut vec![0x95, 0, 0, 0, 0, 0, 0, 0]);
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: unknown eBPF opcode 0x6 (insn #0)")]
fn test_verifier_err_unknown_opcode() {
let prog = &[
0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
];
let vm = rbpf::EbpfVmNoData::new(Some(prog)).unwrap();
vm.execute_program().unwrap();
}
#[test]
#[should_panic(expected = "[Verifier] Error: cannot write into register r10 (insn #0)")]
fn test_verifier_err_write_r10() {
let prog = assemble(
"
mov r10, 1
exit",
)
.unwrap();
let vm = rbpf::EbpfVmNoData::new(Some(&prog)).unwrap();
vm.execute_program().unwrap();
}

File diff suppressed because it is too large Load Diff

View File

@ -3,12 +3,12 @@
//! 架构相关的处理逻辑参考: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/riscv/kernel/traps.c //! 架构相关的处理逻辑参考: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/riscv/kernel/traps.c
use core::hint::spin_loop; use core::hint::spin_loop;
use log::error; use log::{error, trace};
use system_error::SystemError; use system_error::SystemError;
use crate::{arch::syscall::syscall_handler, driver::irqchip::riscv_intc::riscv_intc_irq};
use super::TrapFrame; use super::TrapFrame;
use crate::exception::ebreak::EBreak;
use crate::{arch::syscall::syscall_handler, driver::irqchip::riscv_intc::riscv_intc_irq};
type ExceptionHandler = fn(&mut TrapFrame) -> Result<(), SystemError>; type ExceptionHandler = fn(&mut TrapFrame) -> Result<(), SystemError>;
@ -93,11 +93,10 @@ fn do_trap_insn_illegal(_trap_frame: &mut TrapFrame) -> Result<(), SystemError>
} }
/// 处理断点异常 #3 /// 处理断点异常 #3
fn do_trap_break(_trap_frame: &mut TrapFrame) -> Result<(), SystemError> { fn do_trap_break(trap_frame: &mut TrapFrame) -> Result<(), SystemError> {
error!("riscv64_do_irq: do_trap_break"); trace!("riscv64_do_irq: do_trap_break");
loop { // handle breakpoint
spin_loop(); EBreak::handle(trap_frame)
}
} }
/// 处理加载地址不对齐异常 #4 /// 处理加载地址不对齐异常 #4

View File

@ -1,3 +1,5 @@
use core::any::Any;
use kprobe::ProbeArgs;
use riscv::register::{scause::Scause, sstatus::Sstatus}; use riscv::register::{scause::Scause, sstatus::Sstatus};
use system_error::SystemError; use system_error::SystemError;
@ -160,4 +162,21 @@ impl TrapFrame {
pub fn set_return_value(&mut self, value: usize) { pub fn set_return_value(&mut self, value: usize) {
self.a0 = value; self.a0 = value;
} }
/// 设置当前的程序计数器
pub fn set_pc(&mut self, pc: usize) {
self.epc = pc;
}
}
impl ProbeArgs for TrapFrame {
fn as_any(&self) -> &dyn Any {
self
}
fn break_address(&self) -> usize {
self.epc
}
fn debug_address(&self) -> usize {
self.epc
}
} }

View File

@ -0,0 +1,85 @@
use crate::arch::interrupt::TrapFrame;
pub fn setup_single_step(frame: &mut TrapFrame, step_addr: usize) {
frame.set_pc(step_addr);
}
pub fn clear_single_step(frame: &mut TrapFrame, return_addr: usize) {
frame.set_pc(return_addr);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct KProbeContext {
pub pc: usize,
pub ra: usize,
pub sp: usize,
pub gp: usize,
pub tp: usize,
pub t0: usize,
pub t1: usize,
pub t2: usize,
pub s0: usize,
pub s1: usize,
pub a0: usize,
pub a1: usize,
pub a2: usize,
pub a3: usize,
pub a4: usize,
pub a5: usize,
pub a6: usize,
pub a7: usize,
pub s2: usize,
pub s3: usize,
pub s4: usize,
pub s5: usize,
pub s6: usize,
pub s7: usize,
pub s8: usize,
pub s9: usize,
pub s10: usize,
pub s11: usize,
pub t3: usize,
pub t4: usize,
pub t5: usize,
pub t6: usize,
}
impl From<&TrapFrame> for KProbeContext {
fn from(trap_frame: &TrapFrame) -> Self {
Self {
pc: trap_frame.epc,
ra: trap_frame.ra,
sp: trap_frame.sp,
gp: trap_frame.gp,
tp: trap_frame.tp,
t0: trap_frame.t0,
t1: trap_frame.t1,
t2: trap_frame.t2,
s0: trap_frame.s0,
s1: trap_frame.s1,
a0: trap_frame.a0,
a1: trap_frame.a1,
a2: trap_frame.a2,
a3: trap_frame.a3,
a4: trap_frame.a4,
a5: trap_frame.a5,
a6: trap_frame.a6,
a7: trap_frame.a7,
s2: trap_frame.s2,
s3: trap_frame.s3,
s4: trap_frame.s4,
s5: trap_frame.s5,
s6: trap_frame.s6,
s7: trap_frame.s7,
s8: trap_frame.s8,
s9: trap_frame.s9,
s10: trap_frame.s10,
s11: trap_frame.s11,
t3: trap_frame.t3,
t4: trap_frame.t4,
t5: trap_frame.t5,
t6: trap_frame.t6,
}
}
}

View File

@ -5,6 +5,7 @@ pub mod elf;
pub mod init; pub mod init;
pub mod interrupt; pub mod interrupt;
pub mod ipc; pub mod ipc;
pub mod kprobe;
mod kvm; mod kvm;
pub mod mm; pub mod mm;
pub mod msi; pub mod msi;

View File

@ -4,11 +4,12 @@ pub mod ipi;
pub mod msi; pub mod msi;
pub mod trap; pub mod trap;
use core::any::Any;
use core::{ use core::{
arch::asm, arch::asm,
sync::atomic::{compiler_fence, Ordering}, sync::atomic::{compiler_fence, Ordering},
}; };
use kprobe::ProbeArgs;
use log::error; use log::error;
use system_error::SystemError; use system_error::SystemError;
@ -177,4 +178,21 @@ impl TrapFrame {
pub fn is_from_user(&self) -> bool { pub fn is_from_user(&self) -> bool {
return (self.cs & 0x3) != 0; return (self.cs & 0x3) != 0;
} }
/// 设置当前的程序计数器
pub fn set_pc(&mut self, pc: usize) {
self.rip = pc as u64;
}
}
impl ProbeArgs for TrapFrame {
fn as_any(&self) -> &dyn Any {
self
}
fn break_address(&self) -> usize {
(self.rip - 1) as usize
}
fn debug_address(&self) -> usize {
self.rip as usize
}
} }

View File

@ -1,6 +1,12 @@
use log::{error, warn}; use log::{error, trace, warn};
use system_error::SystemError; use system_error::SystemError;
use super::{
entry::{set_intr_gate, set_system_trap_gate},
TrapFrame,
};
use crate::exception::debug::DebugException;
use crate::exception::ebreak::EBreak;
use crate::{ use crate::{
arch::{CurrentIrqArch, MMArch}, arch::{CurrentIrqArch, MMArch},
exception::InterruptArch, exception::InterruptArch,
@ -9,11 +15,6 @@ use crate::{
smp::core::smp_get_processor_id, smp::core::smp_get_processor_id,
}; };
use super::{
entry::{set_intr_gate, set_system_trap_gate},
TrapFrame,
};
extern "C" { extern "C" {
fn trap_divide_error(); fn trap_divide_error();
fn trap_debug(); fn trap_debug();
@ -125,8 +126,8 @@ unsafe extern "C" fn do_divide_error(regs: &'static TrapFrame, error_code: u64)
/// 处理调试异常 1 #DB /// 处理调试异常 1 #DB
#[no_mangle] #[no_mangle]
unsafe extern "C" fn do_debug(regs: &'static TrapFrame, error_code: u64) { unsafe extern "C" fn do_debug(regs: &'static mut TrapFrame, error_code: u64) {
error!( trace!(
"do_debug(1), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}", "do_debug(1), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}",
error_code, error_code,
regs.rsp, regs.rsp,
@ -134,7 +135,7 @@ unsafe extern "C" fn do_debug(regs: &'static TrapFrame, error_code: u64) {
smp_get_processor_id().data(), smp_get_processor_id().data(),
ProcessManager::current_pid() ProcessManager::current_pid()
); );
panic!("Debug Exception"); DebugException::handle(regs).unwrap();
} }
/// 处理NMI中断 2 NMI /// 处理NMI中断 2 NMI
@ -153,8 +154,8 @@ unsafe extern "C" fn do_nmi(regs: &'static TrapFrame, error_code: u64) {
/// 处理断点异常 3 #BP /// 处理断点异常 3 #BP
#[no_mangle] #[no_mangle]
unsafe extern "C" fn do_int3(regs: &'static TrapFrame, error_code: u64) { unsafe extern "C" fn do_int3(regs: &'static mut TrapFrame, error_code: u64) {
error!( trace!(
"do_int3(3), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}", "do_int3(3), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}",
error_code, error_code,
regs.rsp, regs.rsp,
@ -162,7 +163,7 @@ unsafe extern "C" fn do_int3(regs: &'static TrapFrame, error_code: u64) {
smp_get_processor_id().data(), smp_get_processor_id().data(),
ProcessManager::current_pid() ProcessManager::current_pid()
); );
panic!("Int3"); EBreak::handle(regs).unwrap();
} }
/// 处理溢出异常 4 #OF /// 处理溢出异常 4 #OF

View File

@ -0,0 +1,65 @@
use crate::arch::interrupt::TrapFrame;
pub fn setup_single_step(frame: &mut TrapFrame, step_addr: usize) {
frame.rflags |= 0x100;
frame.set_pc(step_addr);
}
pub fn clear_single_step(frame: &mut TrapFrame, return_addr: usize) {
frame.rflags &= !0x100;
frame.set_pc(return_addr);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct KProbeContext {
pub r15: ::core::ffi::c_ulong,
pub r14: ::core::ffi::c_ulong,
pub r13: ::core::ffi::c_ulong,
pub r12: ::core::ffi::c_ulong,
pub rbp: ::core::ffi::c_ulong,
pub rbx: ::core::ffi::c_ulong,
pub r11: ::core::ffi::c_ulong,
pub r10: ::core::ffi::c_ulong,
pub r9: ::core::ffi::c_ulong,
pub r8: ::core::ffi::c_ulong,
pub rax: ::core::ffi::c_ulong,
pub rcx: ::core::ffi::c_ulong,
pub rdx: ::core::ffi::c_ulong,
pub rsi: ::core::ffi::c_ulong,
pub rdi: ::core::ffi::c_ulong,
pub orig_rax: ::core::ffi::c_ulong,
pub rip: ::core::ffi::c_ulong,
pub cs: ::core::ffi::c_ulong,
pub eflags: ::core::ffi::c_ulong,
pub rsp: ::core::ffi::c_ulong,
pub ss: ::core::ffi::c_ulong,
}
impl From<&TrapFrame> for KProbeContext {
fn from(trap_frame: &TrapFrame) -> Self {
Self {
r15: trap_frame.r15,
r14: trap_frame.r14,
r13: trap_frame.r13,
r12: trap_frame.r12,
rbp: trap_frame.rbp,
rbx: trap_frame.rbx,
r11: trap_frame.r11,
r10: trap_frame.r10,
r9: trap_frame.r9,
r8: trap_frame.r8,
rax: trap_frame.rax,
rcx: trap_frame.rcx,
rdx: trap_frame.rdx,
rsi: trap_frame.rsi,
rdi: trap_frame.rdi,
orig_rax: 0,
rip: trap_frame.rip,
cs: trap_frame.cs,
eflags: trap_frame.rflags,
rsp: trap_frame.rsp,
ss: trap_frame.ss,
}
}
}

View File

@ -8,6 +8,7 @@ pub mod fpu;
pub mod init; pub mod init;
pub mod interrupt; pub mod interrupt;
pub mod ipc; pub mod ipc;
pub mod kprobe;
pub mod kvm; pub mod kvm;
pub mod libs; pub mod libs;
pub mod mm; pub mod mm;

View File

@ -0,0 +1,11 @@
pub const HELPER_MAP_LOOKUP_ELEM: u32 = 1;
pub const HELPER_MAP_UPDATE_ELEM: u32 = 2;
pub const HELPER_MAP_DELETE_ELEM: u32 = 3;
pub const HELPER_MAP_FOR_EACH_ELEM: u32 = 164;
pub const HELPER_MAP_LOOKUP_PERCPU_ELEM: u32 = 195;
pub const HELPER_PERF_EVENT_OUTPUT: u32 = 25;
pub const HELPER_BPF_PROBE_READ: u32 = 4;
pub const HELPER_TRACE_PRINTF: u32 = 6;
pub const HELPER_MAP_PUSH_ELEM: u32 = 87;
pub const HELPER_MAP_POP_ELEM: u32 = 88;
pub const HELPER_MAP_PEEK_ELEM: u32 = 89;

View File

@ -0,0 +1,340 @@
mod consts;
mod print;
use crate::bpf::helper::print::trace_printf;
use crate::bpf::map::{BpfCallBackFn, BpfMap};
use crate::include::bindings::linux_bpf::BPF_F_CURRENT_CPU;
use crate::libs::lazy_init::Lazy;
use crate::smp::core::smp_get_processor_id;
use alloc::{collections::BTreeMap, sync::Arc};
use core::ffi::c_void;
use system_error::SystemError;
type RawBPFHelperFn = fn(u64, u64, u64, u64, u64) -> u64;
type Result<T> = core::result::Result<T, SystemError>;
macro_rules! define_func {
($name:ident) => {
core::mem::transmute::<usize, RawBPFHelperFn>($name as usize)
};
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
unsafe fn raw_map_lookup_elem(map: *mut c_void, key: *const c_void) -> *const c_void {
let map = Arc::from_raw(map as *const BpfMap);
let key_size = map.key_size();
let key = core::slice::from_raw_parts(key as *const u8, key_size);
let value = map_lookup_elem(&map, key);
// log::info!("<raw_map_lookup_elem>: {:x?}", value);
// warning: We need to keep the map alive, so we don't drop it here.
let _ = Arc::into_raw(map);
match value {
Ok(Some(value)) => value as *const c_void,
_ => core::ptr::null_mut(),
}
}
pub fn map_lookup_elem(map: &Arc<BpfMap>, key: &[u8]) -> Result<Option<*const u8>> {
let mut binding = map.inner_map().lock();
let value = binding.lookup_elem(key);
match value {
Ok(Some(value)) => Ok(Some(value.as_ptr())),
_ => Ok(None),
}
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_perf_event_output/
///
/// See https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
unsafe fn raw_perf_event_output(
ctx: *mut c_void,
map: *mut c_void,
flags: u64,
data: *mut c_void,
size: u64,
) -> i64 {
// log::info!("<raw_perf_event_output>: {:x?}", data);
let map = Arc::from_raw(map as *const BpfMap);
let data = core::slice::from_raw_parts(data as *const u8, size as usize);
let res = perf_event_output(ctx, &map, flags, data);
// warning: We need to keep the map alive, so we don't drop it here.
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
pub fn perf_event_output(
ctx: *mut c_void,
map: &Arc<BpfMap>,
flags: u64,
data: &[u8],
) -> Result<()> {
let mut binding = map.inner_map().lock();
let index = flags as u32;
let flags = (flags >> 32) as u32;
let key = if index == BPF_F_CURRENT_CPU as u32 {
smp_get_processor_id().data()
} else {
index
};
let fd = binding
.lookup_elem(&key.to_ne_bytes())?
.ok_or(SystemError::ENOENT)?;
let fd = u32::from_ne_bytes(fd.try_into().map_err(|_| SystemError::EINVAL)?);
crate::perf::perf_event_output(ctx, fd as usize, flags, data)?;
Ok(())
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_probe_read/
fn raw_bpf_probe_read(dst: *mut c_void, size: u32, unsafe_ptr: *const c_void) -> i64 {
log::info!(
"raw_bpf_probe_read, dst:{:x}, size:{}, unsafe_ptr: {:x}",
dst as usize,
size,
unsafe_ptr as usize
);
let (dst, src) = unsafe {
let dst = core::slice::from_raw_parts_mut(dst as *mut u8, size as usize);
let src = core::slice::from_raw_parts(unsafe_ptr as *const u8, size as usize);
(dst, src)
};
let res = bpf_probe_read(dst, src);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
/// For tracing programs, safely attempt to read size
/// bytes from kernel space address unsafe_ptr and
/// store the data in dst.
pub fn bpf_probe_read(dst: &mut [u8], src: &[u8]) -> Result<()> {
log::info!("bpf_probe_read: len: {}", dst.len());
dst.copy_from_slice(src);
Ok(())
}
unsafe fn raw_map_update_elem(
map: *mut c_void,
key: *const c_void,
value: *const c_void,
flags: u64,
) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let key_size = map.key_size();
let value_size = map.value_size();
// log::info!("<raw_map_update_elem>: flags: {:x?}", flags);
let key = core::slice::from_raw_parts(key as *const u8, key_size);
let value = core::slice::from_raw_parts(value as *const u8, value_size);
let res = map_update_elem(&map, key, value, flags);
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as _,
}
}
pub fn map_update_elem(map: &Arc<BpfMap>, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
let mut binding = map.inner_map().lock();
let value = binding.update_elem(key, value, flags);
value
}
/// Delete entry with key from map.
///
/// The delete map element helper call is used to delete values from maps.
unsafe fn raw_map_delete_elem(map: *mut c_void, key: *const c_void) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let key_size = map.key_size();
let key = core::slice::from_raw_parts(key as *const u8, key_size);
let res = map_delete_elem(&map, key);
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
pub fn map_delete_elem(map: &Arc<BpfMap>, key: &[u8]) -> Result<()> {
let mut binding = map.inner_map().lock();
let value = binding.delete_elem(key);
value
}
/// For each element in map, call callback_fn function with map, callback_ctx and other map-specific
/// parameters. The callback_fn should be a static function and the callback_ctx should be a pointer
/// to the stack. The flags is used to control certain aspects of the helper. Currently, the flags must
/// be 0.
///
/// The following are a list of supported map types and their respective expected callback signatures:
/// - BPF_MAP_TYPE_HASH
/// - BPF_MAP_TYPE_PERCPU_HASH
/// - BPF_MAP_TYPE_LRU_HASH
/// - BPF_MAP_TYPE_LRU_PERCPU_HASH
/// - BPF_MAP_TYPE_ARRAY
/// - BPF_MAP_TYPE_PERCPU_ARRAY
///
/// `long (*callback_fn)(struct bpf_map *map, const void key, void *value, void *ctx);`
///
/// For per_cpu maps, the map_value is the value on the cpu where the bpf_prog is running.
unsafe fn raw_map_for_each_elem(
map: *mut c_void,
cb: *const c_void,
ctx: *const c_void,
flags: u64,
) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let cb = *core::mem::transmute::<*const c_void, *const BpfCallBackFn>(cb);
let res = map_for_each_elem(&map, cb, ctx as _, flags);
let _ = Arc::into_raw(map);
match res {
Ok(v) => v as i64,
Err(e) => e as i64,
}
}
pub fn map_for_each_elem(
map: &Arc<BpfMap>,
cb: BpfCallBackFn,
ctx: *const u8,
flags: u64,
) -> Result<u32> {
let mut binding = map.inner_map().lock();
let value = binding.for_each_elem(cb, ctx, flags);
value
}
/// Perform a lookup in percpu map for an entry associated to key on cpu.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_percpu_elem/
unsafe fn raw_map_lookup_percpu_elem(
map: *mut c_void,
key: *const c_void,
cpu: u32,
) -> *const c_void {
let map = Arc::from_raw(map as *const BpfMap);
let key_size = map.key_size();
let key = core::slice::from_raw_parts(key as *const u8, key_size);
let value = map_lookup_percpu_elem(&map, key, cpu);
// warning: We need to keep the map alive, so we don't drop it here.
let _ = Arc::into_raw(map);
match value {
Ok(Some(value)) => value as *const c_void,
_ => core::ptr::null_mut(),
}
}
pub fn map_lookup_percpu_elem(
map: &Arc<BpfMap>,
key: &[u8],
cpu: u32,
) -> Result<Option<*const u8>> {
let mut binding = map.inner_map().lock();
let value = binding.lookup_percpu_elem(key, cpu);
match value {
Ok(Some(value)) => Ok(Some(value.as_ptr())),
_ => Ok(None),
}
}
/// Push an element value in map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_push_elem/
unsafe fn raw_map_push_elem(map: *mut c_void, value: *const c_void, flags: u64) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let value_size = map.value_size();
let value = core::slice::from_raw_parts(value as *const u8, value_size);
let res = map_push_elem(&map, value, flags);
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
pub fn map_push_elem(map: &Arc<BpfMap>, value: &[u8], flags: u64) -> Result<()> {
let mut binding = map.inner_map().lock();
let value = binding.push_elem(value, flags);
value
}
/// Pop an element from map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_pop_elem/
unsafe fn raw_map_pop_elem(map: *mut c_void, value: *mut c_void) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let value_size = map.value_size();
let value = core::slice::from_raw_parts_mut(value as *mut u8, value_size);
let res = map_pop_elem(&map, value);
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
pub fn map_pop_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
let mut binding = map.inner_map().lock();
let value = binding.pop_elem(value);
value
}
/// Get an element from map without removing it.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_peek_elem/
unsafe fn raw_map_peek_elem(map: *mut c_void, value: *mut c_void) -> i64 {
let map = Arc::from_raw(map as *const BpfMap);
let value_size = map.value_size();
let value = core::slice::from_raw_parts_mut(value as *mut u8, value_size);
let res = map_peek_elem(&map, value);
let _ = Arc::into_raw(map);
match res {
Ok(_) => 0,
Err(e) => e as i64,
}
}
pub fn map_peek_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
let binding = map.inner_map().lock();
let value = binding.peek_elem(value);
value
}
pub static BPF_HELPER_FUN_SET: Lazy<BTreeMap<u32, RawBPFHelperFn>> = Lazy::new();
/// Initialize the helper functions.
pub fn init_helper_functions() {
use consts::*;
let mut map = BTreeMap::new();
unsafe {
// Map helpers::Generic map helpers
map.insert(HELPER_MAP_LOOKUP_ELEM, define_func!(raw_map_lookup_elem));
map.insert(HELPER_MAP_UPDATE_ELEM, define_func!(raw_map_update_elem));
map.insert(HELPER_MAP_DELETE_ELEM, define_func!(raw_map_delete_elem));
map.insert(
HELPER_MAP_FOR_EACH_ELEM,
define_func!(raw_map_for_each_elem),
);
map.insert(
HELPER_MAP_LOOKUP_PERCPU_ELEM,
define_func!(raw_map_lookup_percpu_elem),
);
// map.insert(93,define_func!(raw_bpf_spin_lock);
// map.insert(94,define_func!(raw_bpf_spin_unlock);
// Map helpers::Perf event array helpers
map.insert(
HELPER_PERF_EVENT_OUTPUT,
define_func!(raw_perf_event_output),
);
// Probe and trace helpers::Memory helpers
map.insert(HELPER_BPF_PROBE_READ, define_func!(raw_bpf_probe_read));
// Print helpers
map.insert(HELPER_TRACE_PRINTF, define_func!(trace_printf));
// Map helpers::Queue and stack helpers
map.insert(HELPER_MAP_PUSH_ELEM, define_func!(raw_map_push_elem));
map.insert(HELPER_MAP_POP_ELEM, define_func!(raw_map_pop_elem));
map.insert(HELPER_MAP_PEEK_ELEM, define_func!(raw_map_peek_elem));
}
BPF_HELPER_FUN_SET.init(map);
}

View File

@ -0,0 +1,25 @@
use core::{
ffi::{c_char, c_int},
fmt::Write,
};
use printf_compat::{format, output};
/// Printf according to the format string, function will return the number of bytes written(including '\0')
pub unsafe extern "C" fn printf(w: &mut impl Write, str: *const c_char, mut args: ...) -> c_int {
let bytes_written = format(str as _, args.as_va_list(), output::fmt_write(w));
bytes_written + 1
}
struct TerminalOut;
impl Write for TerminalOut {
fn write_str(&mut self, s: &str) -> core::fmt::Result {
print!("{}", s);
Ok(())
}
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_trace_printk/
pub fn trace_printf(fmt_ptr: u64, _fmt_len: u64, arg3: u64, arg4: u64, arg5: u64) -> u64 {
unsafe { printf(&mut TerminalOut, fmt_ptr as _, arg3, arg4, arg5) as u64 }
}

View File

@ -0,0 +1,283 @@
//! BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY
//!
//!
//! See https://docs.kernel.org/bpf/map_array.html
use super::super::Result;
use crate::bpf::map::util::round_up;
use crate::bpf::map::{BpfCallBackFn, BpfMapCommonOps, BpfMapMeta};
use crate::mm::percpu::{PerCpu, PerCpuVar};
use crate::smp::cpu::{smp_cpu_manager, ProcessorId};
use alloc::{vec, vec::Vec};
use core::{
fmt::{Debug, Formatter},
ops::{Index, IndexMut},
};
use log::info;
use system_error::SystemError;
/// The array map type is a generic map type with no restrictions on the structure of the value.
/// Like a normal array, the array map has a numeric key starting at 0 and incrementing.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_ARRAY/
#[derive(Debug)]
pub struct ArrayMap {
max_entries: u32,
data: ArrayMapData,
}
struct ArrayMapData {
elem_size: u32,
/// The data is stored in a Vec<u8> with the size of elem_size * max_entries.
data: Vec<u8>,
}
impl Debug for ArrayMapData {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
f.debug_struct("ArrayMapData")
.field("elem_size", &self.elem_size)
.field("data_len", &self.data.len())
.finish()
}
}
impl ArrayMapData {
pub fn new(elem_size: u32, max_entries: u32) -> Self {
debug_assert!(elem_size % 8 == 0);
let total_size = elem_size * max_entries;
let data = vec![0; total_size as usize];
ArrayMapData { elem_size, data }
}
}
impl Index<u32> for ArrayMapData {
type Output = [u8];
fn index(&self, index: u32) -> &Self::Output {
let start = index * self.elem_size;
&self.data[start as usize..(start + self.elem_size) as usize]
}
}
impl IndexMut<u32> for ArrayMapData {
fn index_mut(&mut self, index: u32) -> &mut Self::Output {
let start = index * self.elem_size;
&mut self.data[start as usize..(start + self.elem_size) as usize]
}
}
impl ArrayMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
if attr.value_size == 0 || attr.max_entries == 0 || attr.key_size != 4 {
return Err(SystemError::EINVAL);
}
let elem_size = round_up(attr.value_size as usize, 8);
let data = ArrayMapData::new(elem_size as u32, attr.max_entries);
Ok(ArrayMap {
max_entries: attr.max_entries,
data,
})
}
}
impl BpfMapCommonOps for ArrayMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
if key.len() != 4 {
return Err(SystemError::EINVAL);
}
let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
if index >= self.max_entries {
return Err(SystemError::EINVAL);
}
let val = self.data.index(index);
Ok(Some(val))
}
fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
if key.len() != 4 {
return Err(SystemError::EINVAL);
}
let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
if index >= self.max_entries {
return Err(SystemError::EINVAL);
}
if value.len() > self.data.elem_size as usize {
return Err(SystemError::EINVAL);
}
let old_value = self.data.index_mut(index);
old_value[..value.len()].copy_from_slice(value);
Ok(())
}
/// For ArrayMap, delete_elem is not supported.
fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
Err(SystemError::EINVAL)
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
if flags != 0 {
return Err(SystemError::EINVAL);
}
let mut total_used = 0;
for i in 0..self.max_entries {
let key = i.to_ne_bytes();
let value = self.data.index(i);
total_used += 1;
let res = cb(&key, value, ctx);
// return value: 0 - continue, 1 - stop and return
if res != 0 {
break;
}
}
Ok(total_used)
}
fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
Err(SystemError::EINVAL)
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
if let Some(key) = key {
if key.len() != 4 {
return Err(SystemError::EINVAL);
}
let index = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
if index == self.max_entries - 1 {
return Err(SystemError::ENOENT);
}
let next_index = index + 1;
next_key.copy_from_slice(&next_index.to_ne_bytes());
} else {
next_key.copy_from_slice(&0u32.to_ne_bytes());
}
Ok(())
}
fn freeze(&self) -> Result<()> {
info!("fake freeze done for ArrayMap");
Ok(())
}
fn first_value_ptr(&self) -> Result<*const u8> {
Ok(self.data.data.as_ptr())
}
}
/// This is the per-CPU variant of the [ArrayMap] map type.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERCPU_ARRAY/
pub struct PerCpuArrayMap {
per_cpu_data: PerCpuVar<ArrayMap>,
}
impl Debug for PerCpuArrayMap {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PerCpuArrayMap")
.field("data", &self.per_cpu_data)
.finish()
}
}
impl PerCpuArrayMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
let num_cpus = PerCpu::MAX_CPU_NUM;
let mut data = Vec::with_capacity(num_cpus as usize);
for _ in 0..num_cpus {
let array_map = ArrayMap::new(attr)?;
data.push(array_map);
}
let per_cpu_data = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
Ok(PerCpuArrayMap { per_cpu_data })
}
}
impl BpfMapCommonOps for PerCpuArrayMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
self.per_cpu_data.get_mut().lookup_elem(key)
}
fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
self.per_cpu_data.get_mut().update_elem(key, value, flags)
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
self.per_cpu_data.get_mut().delete_elem(key)
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
self.per_cpu_data.get_mut().for_each_elem(cb, ctx, flags)
}
fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
Err(SystemError::EINVAL)
}
fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
unsafe {
self.per_cpu_data
.force_get_mut(ProcessorId::new(cpu))
.lookup_elem(key)
}
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
self.per_cpu_data.get_mut().get_next_key(key, next_key)
}
fn first_value_ptr(&self) -> Result<*const u8> {
self.per_cpu_data.get_mut().first_value_ptr()
}
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERF_EVENT_ARRAY/
pub struct PerfEventArrayMap {
// The value is the file descriptor of the perf event.
fds: ArrayMapData,
}
impl Debug for PerfEventArrayMap {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PerfEventArrayMap")
.field("fds", &self.fds)
.finish()
}
}
impl PerfEventArrayMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
let num_cpus = smp_cpu_manager().possible_cpus_count();
if attr.key_size != 4 || attr.value_size != 4 || attr.max_entries != num_cpus {
return Err(SystemError::EINVAL);
}
let fds = ArrayMapData::new(4, num_cpus);
Ok(PerfEventArrayMap { fds })
}
}
impl BpfMapCommonOps for PerfEventArrayMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
let value = self.fds.index(cpu_id);
Ok(Some(value))
}
fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
assert_eq!(value.len(), 4);
let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
let old_value = self.fds.index_mut(cpu_id);
old_value.copy_from_slice(value);
Ok(())
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
let cpu_id = u32::from_ne_bytes(key.try_into().map_err(|_| SystemError::EINVAL)?);
self.fds.index_mut(cpu_id).copy_from_slice(&[0; 4]);
Ok(())
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, _flags: u64) -> Result<u32> {
let mut total_used = 0;
let num_cpus = smp_cpu_manager().possible_cpus_count();
for i in 0..num_cpus {
let key = i.to_ne_bytes();
let value = self.fds.index(i);
total_used += 1;
let res = cb(&key, value, ctx);
if res != 0 {
break;
}
}
Ok(total_used)
}
fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
Err(SystemError::EINVAL)
}
fn first_value_ptr(&self) -> Result<*const u8> {
Ok(self.fds.data.as_ptr())
}
}

View File

@ -0,0 +1,156 @@
use super::Result;
use crate::bpf::map::util::{round_up, BpfMapUpdateElemFlags};
use crate::bpf::map::{BpfCallBackFn, BpfMapCommonOps, BpfMapMeta};
use crate::mm::percpu::{PerCpu, PerCpuVar};
use crate::smp::cpu::ProcessorId;
use alloc::{collections::BTreeMap, vec::Vec};
use core::fmt::Debug;
use system_error::SystemError;
type BpfHashMapKey = Vec<u8>;
type BpfHashMapValue = Vec<u8>;
/// The hash map type is a generic map type with no restrictions on the structure of the key and value.
/// Hash-maps are implemented using a hash table, allowing for lookups with arbitrary keys.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_HASH/
#[derive(Debug)]
pub struct BpfHashMap {
_max_entries: u32,
_key_size: u32,
_value_size: u32,
data: BTreeMap<BpfHashMapKey, BpfHashMapValue>,
}
impl BpfHashMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
if attr.value_size == 0 || attr.max_entries == 0 {
return Err(SystemError::EINVAL);
}
let value_size = round_up(attr.value_size as usize, 8);
Ok(Self {
_max_entries: attr.max_entries,
_key_size: attr.key_size,
_value_size: value_size as u32,
data: BTreeMap::new(),
})
}
}
impl BpfMapCommonOps for BpfHashMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
let value = self.data.get(key).map(|v| v.as_slice());
Ok(value)
}
fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
let _flags = BpfMapUpdateElemFlags::from_bits_truncate(flags);
self.data.insert(key.to_vec(), value.to_vec());
Ok(())
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
self.data.remove(key);
Ok(())
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
if flags != 0 {
return Err(SystemError::EINVAL);
}
let mut total_used = 0;
for (key, value) in self.data.iter() {
let res = cb(key, value, ctx);
// return value: 0 - continue, 1 - stop and return
if res != 0 {
break;
}
total_used += 1;
}
Ok(total_used)
}
fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
let v = self
.data
.get(key)
.map(|v| v.as_slice())
.ok_or(SystemError::ENOENT)?;
value.copy_from_slice(v);
self.data.remove(key);
Ok(())
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
let mut iter = self.data.iter();
if let Some(key) = key {
for (k, _) in iter.by_ref() {
if k.as_slice() == key {
break;
}
}
}
let res = iter.next();
match res {
Some((k, _)) => {
next_key.copy_from_slice(k.as_slice());
Ok(())
}
None => Err(SystemError::ENOENT),
}
}
}
/// This is the per-CPU variant of the [BpfHashMap] map type.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_PERCPU_HASH/
pub struct PerCpuHashMap {
per_cpu_maps: PerCpuVar<BpfHashMap>,
}
impl Debug for PerCpuHashMap {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PerCpuHashMap")
.field("maps", &self.per_cpu_maps)
.finish()
}
}
impl PerCpuHashMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
let num_cpus = PerCpu::MAX_CPU_NUM;
let mut data = Vec::with_capacity(num_cpus as usize);
for _ in 0..num_cpus {
let array_map = BpfHashMap::new(attr)?;
data.push(array_map);
}
let per_cpu_maps = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
Ok(PerCpuHashMap { per_cpu_maps })
}
}
impl BpfMapCommonOps for PerCpuHashMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
self.per_cpu_maps.get_mut().lookup_elem(key)
}
fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
self.per_cpu_maps.get_mut().update_elem(key, value, flags)
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
self.per_cpu_maps.get_mut().delete_elem(key)
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
self.per_cpu_maps.get_mut().for_each_elem(cb, ctx, flags)
}
fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
self.per_cpu_maps
.get_mut()
.lookup_and_delete_elem(key, value)
}
fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
unsafe {
self.per_cpu_maps
.force_get_mut(ProcessorId::new(cpu))
.lookup_elem(key)
}
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
self.per_cpu_maps.get_mut().get_next_key(key, next_key)
}
fn first_value_ptr(&self) -> Result<*const u8> {
self.per_cpu_maps.get_mut().first_value_ptr()
}
}

151
kernel/src/bpf/map/lru.rs Normal file
View File

@ -0,0 +1,151 @@
use super::{BpfCallBackFn, BpfMapCommonOps, Result};
use crate::bpf::map::util::BpfMapMeta;
use crate::mm::percpu::{PerCpu, PerCpuVar};
use crate::smp::cpu::ProcessorId;
use alloc::vec::Vec;
use core::fmt::Debug;
use core::num::NonZero;
use lru::LruCache;
use system_error::SystemError;
type BpfHashMapKey = Vec<u8>;
type BpfHashMapValue = Vec<u8>;
/// This map is the LRU (Least Recently Used) variant of the BPF_MAP_TYPE_HASH.
/// It is a generic map type that stores a fixed maximum number of key/value pairs.
/// When the map starts to get at capacity, the approximately least recently
/// used elements is removed to make room for new elements.
///
/// See https://docs.ebpf.io/linux/map-type/BPF_MAP_TYPE_LRU_HASH/
#[derive(Debug)]
pub struct LruMap {
_max_entries: u32,
data: LruCache<BpfHashMapKey, BpfHashMapValue>,
}
impl LruMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
if attr.value_size == 0 || attr.max_entries == 0 {
return Err(SystemError::EINVAL);
}
Ok(Self {
_max_entries: attr.max_entries,
data: LruCache::new(
NonZero::new(attr.max_entries as usize).ok_or(SystemError::EINVAL)?,
),
})
}
}
impl BpfMapCommonOps for LruMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
let value = self.data.get(key).map(|v| v.as_slice());
Ok(value)
}
fn update_elem(&mut self, key: &[u8], value: &[u8], _flags: u64) -> Result<()> {
self.data.put(key.to_vec(), value.to_vec());
Ok(())
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
self.data.pop(key);
Ok(())
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
if flags != 0 {
return Err(SystemError::EINVAL);
}
let mut total_used = 0;
for (key, value) in self.data.iter() {
let res = cb(key, value, ctx);
// return value: 0 - continue, 1 - stop and return
if res != 0 {
break;
}
total_used += 1;
}
Ok(total_used)
}
fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
let v = self
.data
.get(key)
.map(|v| v.as_slice())
.ok_or(SystemError::ENOENT)?;
value.copy_from_slice(v);
self.data.pop(key);
Ok(())
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
let mut iter = self.data.iter();
if let Some(key) = key {
for (k, _) in iter.by_ref() {
if k.as_slice() == key {
break;
}
}
}
let res = iter.next();
match res {
Some((k, _)) => {
next_key.copy_from_slice(k.as_slice());
Ok(())
}
None => Err(SystemError::ENOENT),
}
}
}
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_LRU_PERCPU_HASH/
pub struct PerCpuLruMap {
per_cpu_maps: PerCpuVar<LruMap>,
}
impl Debug for PerCpuLruMap {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PerCpuLruMap")
.field("maps", &self.per_cpu_maps)
.finish()
}
}
impl PerCpuLruMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
let num_cpus = PerCpu::MAX_CPU_NUM;
let mut data = Vec::with_capacity(num_cpus as usize);
for _ in 0..num_cpus {
let array_map = LruMap::new(attr)?;
data.push(array_map);
}
let per_cpu_maps = PerCpuVar::new(data).ok_or(SystemError::EINVAL)?;
Ok(PerCpuLruMap { per_cpu_maps })
}
}
impl BpfMapCommonOps for PerCpuLruMap {
fn lookup_elem(&mut self, key: &[u8]) -> Result<Option<&[u8]>> {
self.per_cpu_maps.get_mut().lookup_elem(key)
}
fn update_elem(&mut self, key: &[u8], value: &[u8], flags: u64) -> Result<()> {
self.per_cpu_maps.get_mut().update_elem(key, value, flags)
}
fn delete_elem(&mut self, key: &[u8]) -> Result<()> {
self.per_cpu_maps.get_mut().delete_elem(key)
}
fn for_each_elem(&mut self, cb: BpfCallBackFn, ctx: *const u8, flags: u64) -> Result<u32> {
self.per_cpu_maps.get_mut().for_each_elem(cb, ctx, flags)
}
fn lookup_and_delete_elem(&mut self, key: &[u8], value: &mut [u8]) -> Result<()> {
self.per_cpu_maps
.get_mut()
.lookup_and_delete_elem(key, value)
}
fn lookup_percpu_elem(&mut self, key: &[u8], cpu: u32) -> Result<Option<&[u8]>> {
unsafe {
self.per_cpu_maps
.force_get_mut(ProcessorId::new(cpu))
.lookup_elem(key)
}
}
fn get_next_key(&self, key: Option<&[u8]>, next_key: &mut [u8]) -> Result<()> {
self.per_cpu_maps.get_mut().get_next_key(key, next_key)
}
}

416
kernel/src/bpf/map/mod.rs Normal file
View File

@ -0,0 +1,416 @@
mod array_map;
mod hash_map;
mod lru;
mod queue;
mod util;
use super::Result;
use crate::bpf::map::array_map::{ArrayMap, PerCpuArrayMap, PerfEventArrayMap};
use crate::bpf::map::hash_map::PerCpuHashMap;
use crate::bpf::map::util::{BpfMapGetNextKeyArg, BpfMapMeta, BpfMapUpdateArg};
use crate::filesystem::vfs::file::{File, FileMode};
use crate::filesystem::vfs::syscall::ModeType;
use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
use crate::include::bindings::linux_bpf::{bpf_attr, bpf_map_type};
use crate::libs::casting::DowncastArc;
use crate::libs::spinlock::{SpinLock, SpinLockGuard};
use crate::process::ProcessManager;
use crate::syscall::user_access::{UserBufferReader, UserBufferWriter};
use alloc::boxed::Box;
use alloc::string::String;
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::any::Any;
use core::fmt::Debug;
use intertrait::CastFromSync;
use log::{error, info};
use system_error::SystemError;
#[derive(Debug)]
pub struct BpfMap {
inner_map: SpinLock<Box<dyn BpfMapCommonOps>>,
meta: BpfMapMeta,
}
pub type BpfCallBackFn = fn(key: &[u8], value: &[u8], ctx: *const u8) -> i32;
pub trait BpfMapCommonOps: Send + Sync + Debug + CastFromSync {
/// Lookup an element in the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
Err(SystemError::ENOSYS)
}
/// Update an element in the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_update_elem/
fn update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Delete an element from the map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_delete_elem/
fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// For each element in map, call callback_fn function with map,
/// callback_ctx and other map-specific parameters.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_for_each_map_elem/
fn for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32> {
Err(SystemError::ENOSYS)
}
/// Look up an element with the given key in the map referred to by the file descriptor fd,
/// and if found, delete the element.
fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// erform a lookup in percpu map for an entry associated to key on cpu.
fn lookup_percpu_elem(&mut self, _key: &[u8], _cpu: u32) -> Result<Option<&[u8]>> {
Err(SystemError::ENOSYS)
}
/// Get the next key in the map. If key is None, get the first key.
///
/// Called from syscall
fn get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Push an element value in map.
fn push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Pop an element value from map.
fn pop_elem(&mut self, _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Peek an element value from map.
fn peek_elem(&self, _value: &mut [u8]) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Freeze the map.
///
/// It's useful for .rodata maps.
fn freeze(&self) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Get the first value pointer.
fn first_value_ptr(&self) -> Result<*const u8> {
Err(SystemError::ENOSYS)
}
}
impl DowncastArc for dyn BpfMapCommonOps {
fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any> {
self
}
}
impl BpfMap {
pub fn new(map: Box<dyn BpfMapCommonOps>, meta: BpfMapMeta) -> Self {
assert_ne!(meta.key_size, 0);
BpfMap {
inner_map: SpinLock::new(map),
meta,
}
}
pub fn inner_map(&self) -> &SpinLock<Box<dyn BpfMapCommonOps>> {
&self.inner_map
}
pub fn key_size(&self) -> usize {
self.meta.key_size as usize
}
pub fn value_size(&self) -> usize {
self.meta.value_size as usize
}
}
impl IndexNode for BpfMap {
fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
Ok(())
}
fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
Ok(())
}
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
Err(SystemError::ENOSYS)
}
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
Err(SystemError::ENOSYS)
}
fn metadata(&self) -> Result<Metadata> {
let meta = Metadata {
mode: ModeType::from_bits_truncate(0o755),
file_type: FileType::File,
..Default::default()
};
Ok(meta)
}
fn resize(&self, _len: usize) -> Result<()> {
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
todo!("BpfMap does not have a filesystem")
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn list(&self) -> Result<Vec<String>> {
Err(SystemError::ENOSYS)
}
}
/// Create a map and return a file descriptor that refers to
/// the map. The close-on-exec file descriptor flag
/// is automatically enabled for the new file descriptor.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_CREATE/
pub fn bpf_map_create(attr: &bpf_attr) -> Result<usize> {
let map_meta = BpfMapMeta::try_from(attr)?;
info!("The map attr is {:#?}", map_meta);
let map: Box<dyn BpfMapCommonOps> = match map_meta.map_type {
bpf_map_type::BPF_MAP_TYPE_ARRAY => {
let array_map = ArrayMap::new(&map_meta)?;
Box::new(array_map)
}
bpf_map_type::BPF_MAP_TYPE_PERCPU_ARRAY => {
let per_cpu_array_map = PerCpuArrayMap::new(&map_meta)?;
Box::new(per_cpu_array_map)
}
bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY => {
let perf_event_array_map = PerfEventArrayMap::new(&map_meta)?;
Box::new(perf_event_array_map)
}
bpf_map_type::BPF_MAP_TYPE_CPUMAP
| bpf_map_type::BPF_MAP_TYPE_DEVMAP
| bpf_map_type::BPF_MAP_TYPE_DEVMAP_HASH => {
error!("bpf map type {:?} not implemented", map_meta.map_type);
Err(SystemError::EINVAL)?
}
bpf_map_type::BPF_MAP_TYPE_HASH => {
let hash_map = hash_map::BpfHashMap::new(&map_meta)?;
Box::new(hash_map)
}
bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH => {
let per_cpu_hash_map = PerCpuHashMap::new(&map_meta)?;
Box::new(per_cpu_hash_map)
}
bpf_map_type::BPF_MAP_TYPE_QUEUE => {
let queue_map = queue::QueueMap::new(&map_meta)?;
Box::new(queue_map)
}
bpf_map_type::BPF_MAP_TYPE_STACK => {
let stack_map = queue::StackMap::new(&map_meta)?;
Box::new(stack_map)
}
bpf_map_type::BPF_MAP_TYPE_LRU_HASH => {
let lru_hash_map = lru::LruMap::new(&map_meta)?;
Box::new(lru_hash_map)
}
bpf_map_type::BPF_MAP_TYPE_LRU_PERCPU_HASH => {
let lru_per_cpu_hash_map = lru::PerCpuLruMap::new(&map_meta)?;
Box::new(lru_per_cpu_hash_map)
}
_ => {
unimplemented!("bpf map type {:?} not implemented", map_meta.map_type)
}
};
let bpf_map = BpfMap::new(map, map_meta);
let fd_table = ProcessManager::current_pcb().fd_table();
let file = File::new(Arc::new(bpf_map), FileMode::O_RDWR | FileMode::O_CLOEXEC)?;
let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
info!("create map with fd: [{}]", fd);
Ok(fd)
}
/// Create or update an element (key/value pair) in a specified map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_UPDATE_ELEM/
pub fn bpf_map_update_elem(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapUpdateArg::from(attr);
info!("<bpf_map_update_elem>: {:#x?}", arg);
let map = get_map_file(arg.map_fd as i32)?;
let meta = &map.meta;
let key_size = meta.key_size as usize;
let value_size = meta.value_size as usize;
let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
let value_buf = UserBufferReader::new(arg.value as *const u8, value_size, true)?;
let key = key_buf.read_from_user(0)?;
let value = value_buf.read_from_user(0)?;
map.inner_map.lock().update_elem(key, value, arg.flags)?;
info!("bpf_map_update_elem ok");
Ok(0)
}
pub fn bpf_map_freeze(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapUpdateArg::from(attr);
let map_fd = arg.map_fd;
info!("<bpf_map_freeze>: map_fd: {:}", map_fd);
let map = get_map_file(map_fd as i32)?;
map.inner_map.lock().freeze()?;
Ok(0)
}
/// Look up an element by key in a specified map and return its value.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_ELEM/
pub fn bpf_lookup_elem(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapUpdateArg::from(attr);
// info!("<bpf_lookup_elem>: {:#x?}", arg);
let map = get_map_file(arg.map_fd as _)?;
let meta = &map.meta;
let key_size = meta.key_size as usize;
let value_size = meta.value_size as usize;
let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
let key = key_buf.read_from_user(0)?;
let mut inner = map.inner_map.lock();
let r_value = inner.lookup_elem(key)?;
if let Some(r_value) = r_value {
value_buf.copy_to_user(r_value, 0)?;
Ok(0)
} else {
Err(SystemError::ENOENT)
}
}
/// Look up an element by key in a specified map and return the key of the next element.
///
/// - If key is `None`, the operation returns zero and sets the next_key pointer to the key of the first element.
/// - If key is `Some(T)`, the operation returns zero and sets the next_key pointer to the key of the next element.
/// - If key is the last element, returns -1 and errno is set to ENOENT.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_GET_NEXT_KEY/
pub fn bpf_map_get_next_key(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapGetNextKeyArg::from(attr);
// info!("<bpf_map_get_next_key>: {:#x?}", arg);
let map = get_map_file(arg.map_fd as i32)?;
let meta = &map.meta;
let key_size = meta.key_size as usize;
let key = if let Some(key_ptr) = arg.key {
let key_buf = UserBufferReader::new(key_ptr as *const u8, key_size, true)?;
let key = key_buf.read_from_user(0)?.to_vec();
Some(key)
} else {
None
};
let key = key.as_deref();
let mut next_key_buf = UserBufferWriter::new(arg.next_key as *mut u8, key_size, true)?;
let inner = map.inner_map.lock();
let next_key = next_key_buf.buffer(0)?;
inner.get_next_key(key, next_key)?;
// info!("next_key: {:?}", next_key);
Ok(0)
}
/// Look up and delete an element by key in a specified map.
///
/// # WARN
///
/// Not all map types (particularly array maps) support this operation,
/// instead a zero value can be written to the map value. Check the map types page to check for support.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_DELETE_ELEM/
pub fn bpf_map_delete_elem(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapUpdateArg::from(attr);
// info!("<bpf_map_delete_elem>: {:#x?}", arg);
let map = get_map_file(arg.map_fd as i32)?;
let meta = &map.meta;
let key_size = meta.key_size as usize;
let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
let key = key_buf.read_from_user(0)?;
map.inner_map.lock().delete_elem(key)?;
Ok(0)
}
/// Iterate and fetch multiple elements in a map.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_BATCH/
pub fn bpf_map_lookup_batch(_attr: &bpf_attr) -> Result<usize> {
todo!()
}
/// Look up an element with the given key in the map referred to by the file descriptor fd,
/// and if found, delete the element.
///
/// For BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types, the flags argument needs to be set to 0,
/// but for other map types, it may be specified as:
/// - BPF_F_LOCK : If this flag is set, the command will acquire the spin-lock of the map value we are looking up.
///
/// If the map contains no spin-lock in its value, -EINVAL will be returned by the command.
///
/// The BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types implement this command as a “pop” operation,
/// deleting the top element rather than one corresponding to key.
/// The key and key_len parameters should be zeroed when issuing this operation for these map types.
///
/// This command is only valid for the following map types:
/// - BPF_MAP_TYPE_QUEUE
/// - BPF_MAP_TYPE_STACK
/// - BPF_MAP_TYPE_HASH
/// - BPF_MAP_TYPE_PERCPU_HASH
/// - BPF_MAP_TYPE_LRU_HASH
/// - BPF_MAP_TYPE_LRU_PERCPU_HASH
///
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_AND_DELETE_ELEM/
pub fn bpf_map_lookup_and_delete_elem(attr: &bpf_attr) -> Result<usize> {
let arg = BpfMapUpdateArg::from(attr);
// info!("<bpf_map_lookup_and_delete_elem>: {:#x?}", arg);
let map = get_map_file(arg.map_fd as i32)?;
let meta = &map.meta;
let key_size = meta.key_size as usize;
let value_size = meta.value_size as usize;
let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
let value = value_buf.buffer(0)?;
let key = key_buf.read_from_user(0)?;
let mut inner = map.inner_map.lock();
inner.lookup_and_delete_elem(key, value)?;
Ok(0)
}
fn get_map_file(fd: i32) -> Result<Arc<BpfMap>> {
let fd_table = ProcessManager::current_pcb().fd_table();
let map = fd_table
.read()
.get_file_by_fd(fd)
.ok_or(SystemError::EBADF)?;
let map = map
.inode()
.downcast_arc::<BpfMap>()
.ok_or(SystemError::EINVAL)?;
Ok(map)
}

154
kernel/src/bpf/map/queue.rs Normal file
View File

@ -0,0 +1,154 @@
use super::{BpfMapCommonOps, Result};
use crate::bpf::map::util::{BpfMapMeta, BpfMapUpdateElemFlags};
use alloc::vec::Vec;
use core::fmt::Debug;
use core::ops::Deref;
use core::ops::DerefMut;
use system_error::SystemError;
type BpfQueueValue = Vec<u8>;
/// BPF_MAP_TYPE_QUEUE provides FIFO storage and BPF_MAP_TYPE_STACK provides LIFO storage for BPF programs.
/// These maps support peek, pop and push operations that are exposed to BPF programs through the respective helpers.
/// These operations are exposed to userspace applications using the existing bpf syscall in the following way:
/// - `BPF_MAP_LOOKUP_ELEM` -> `peek`
/// - `BPF_MAP_UPDATE_ELEM` -> `push`
/// - `BPF_MAP_LOOKUP_AND_DELETE_ELEM ` -> `pop`
///
/// See https://docs.kernel.org/bpf/map_queue_stack.html
pub trait SpecialMap: Debug + Send + Sync + 'static {
/// Returns the number of elements the queue can hold.
fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()>;
/// Removes the first element and returns it.
fn pop(&mut self) -> Option<BpfQueueValue>;
/// Returns the first element without removing it.
fn peek(&self) -> Option<&BpfQueueValue>;
}
/// The queue map type is a generic map type, resembling a FIFO (First-In First-Out) queue.
///
/// This map type has no keys, only values. The size and type of the values can be specified by the user
/// to fit a large variety of use cases. The typical use-case for this map type is to keep track of
/// a pool of elements such as available network ports when implementing NAT (network address translation).
///
/// As apposed to most map types, this map type uses a custom set of helpers to pop, peek and push elements.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_QUEUE/
#[derive(Debug)]
pub struct QueueMap {
max_entries: u32,
data: Vec<BpfQueueValue>,
}
impl QueueMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
if attr.value_size == 0 || attr.max_entries == 0 || attr.key_size != 0 {
return Err(SystemError::EINVAL);
}
let data = Vec::with_capacity(attr.max_entries as usize);
Ok(Self {
max_entries: attr.max_entries,
data,
})
}
}
impl SpecialMap for QueueMap {
fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()> {
if self.data.len() == self.max_entries as usize {
if flags.contains(BpfMapUpdateElemFlags::BPF_EXIST) {
// remove the first element
self.data.remove(0);
} else {
return Err(SystemError::ENOSPC);
}
}
self.data.push(value);
Ok(())
}
fn pop(&mut self) -> Option<BpfQueueValue> {
if self.data.is_empty() {
return None;
}
Some(self.data.remove(0))
}
fn peek(&self) -> Option<&BpfQueueValue> {
self.data.first()
}
}
/// The stack map type is a generic map type, resembling a stack data structure.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/map-type/BPF_MAP_TYPE_STACK/
#[derive(Debug)]
pub struct StackMap(QueueMap);
impl StackMap {
pub fn new(attr: &BpfMapMeta) -> Result<Self> {
QueueMap::new(attr).map(StackMap)
}
}
impl Deref for StackMap {
type Target = QueueMap;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for StackMap {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl SpecialMap for StackMap {
fn push(&mut self, value: BpfQueueValue, flags: BpfMapUpdateElemFlags) -> Result<()> {
if self.data.len() == self.max_entries as usize {
if flags.contains(BpfMapUpdateElemFlags::BPF_EXIST) {
// remove the last element
self.data.pop();
} else {
return Err(SystemError::ENOSPC);
}
}
self.data.push(value);
Ok(())
}
fn pop(&mut self) -> Option<BpfQueueValue> {
self.data.pop()
}
fn peek(&self) -> Option<&BpfQueueValue> {
self.data.last()
}
}
impl<T: SpecialMap> BpfMapCommonOps for T {
/// Equal to [QueueMap::peek]
fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
Ok(self.peek().map(|v| v.as_slice()))
}
/// Equal to [QueueMap::push]
fn update_elem(&mut self, _key: &[u8], value: &[u8], flags: u64) -> Result<()> {
let flag = BpfMapUpdateElemFlags::from_bits_truncate(flags);
self.push(value.to_vec(), flag)
}
/// Equal to [QueueMap::pop]
fn lookup_and_delete_elem(&mut self, _key: &[u8], value: &mut [u8]) -> Result<()> {
if let Some(v) = self.pop() {
value.copy_from_slice(&v);
Ok(())
} else {
Err(SystemError::ENOENT)
}
}
fn push_elem(&mut self, value: &[u8], flags: u64) -> Result<()> {
self.update_elem(&[], value, flags)
}
fn pop_elem(&mut self, value: &mut [u8]) -> Result<()> {
self.lookup_and_delete_elem(&[], value)
}
fn peek_elem(&self, value: &mut [u8]) -> Result<()> {
self.peek()
.map(|v| value.copy_from_slice(v))
.ok_or(SystemError::ENOENT)
}
}

100
kernel/src/bpf/map/util.rs Normal file
View File

@ -0,0 +1,100 @@
use crate::include::bindings::linux_bpf::{bpf_attr, bpf_map_type};
use alloc::string::{String, ToString};
use core::ffi::CStr;
use num_traits::FromPrimitive;
use system_error::SystemError;
#[derive(Debug, Clone)]
pub struct BpfMapMeta {
pub map_type: bpf_map_type,
pub key_size: u32,
pub value_size: u32,
pub max_entries: u32,
pub _map_flags: u32,
pub _map_name: String,
}
impl TryFrom<&bpf_attr> for BpfMapMeta {
type Error = SystemError;
fn try_from(value: &bpf_attr) -> Result<Self, Self::Error> {
let u = unsafe { &value.__bindgen_anon_1 };
let map_name_slice = unsafe {
core::slice::from_raw_parts(u.map_name.as_ptr() as *const u8, u.map_name.len())
};
let map_name = CStr::from_bytes_until_nul(map_name_slice)
.map_err(|_| SystemError::EINVAL)?
.to_str()
.map_err(|_| SystemError::EINVAL)?
.to_string();
let map_type = bpf_map_type::from_u32(u.map_type).ok_or(SystemError::EINVAL)?;
Ok(BpfMapMeta {
map_type,
key_size: u.key_size,
value_size: u.value_size,
max_entries: u.max_entries,
_map_flags: u.map_flags,
_map_name: map_name,
})
}
}
#[derive(Debug)]
pub struct BpfMapUpdateArg {
pub map_fd: u32,
pub key: u64,
pub value: u64,
pub flags: u64,
}
impl From<&bpf_attr> for BpfMapUpdateArg {
fn from(value: &bpf_attr) -> Self {
unsafe {
let u = &value.__bindgen_anon_2;
BpfMapUpdateArg {
map_fd: u.map_fd,
key: u.key,
value: u.__bindgen_anon_1.value,
flags: u.flags,
}
}
}
}
#[derive(Debug)]
pub struct BpfMapGetNextKeyArg {
pub map_fd: u32,
pub key: Option<u64>,
pub next_key: u64,
}
impl From<&bpf_attr> for BpfMapGetNextKeyArg {
fn from(value: &bpf_attr) -> Self {
unsafe {
let u = &value.__bindgen_anon_2;
BpfMapGetNextKeyArg {
map_fd: u.map_fd,
key: if u.key != 0 { Some(u.key) } else { None },
next_key: u.__bindgen_anon_1.next_key,
}
}
}
}
#[inline]
/// Round up `x` to the nearest multiple of `align`.
pub fn round_up(x: usize, align: usize) -> usize {
(x + align - 1) & !(align - 1)
}
bitflags! {
/// flags for BPF_MAP_UPDATE_ELEM command
pub struct BpfMapUpdateElemFlags: u64 {
/// create new element or update existing
const BPF_ANY = 0;
/// create new element if it didn't exist
const BPF_NOEXIST = 1;
/// update existing element
const BPF_EXIST = 2;
/// spin_lock-ed map_lookup/map_update
const BPF_F_LOCK = 4;
}
}

50
kernel/src/bpf/mod.rs Normal file
View File

@ -0,0 +1,50 @@
pub mod helper;
pub mod map;
pub mod prog;
use crate::include::bindings::linux_bpf::{bpf_attr, bpf_cmd};
use crate::syscall::user_access::UserBufferReader;
use crate::syscall::Syscall;
use log::error;
use num_traits::FromPrimitive;
use system_error::SystemError;
type Result<T> = core::result::Result<T, SystemError>;
impl Syscall {
pub fn sys_bpf(cmd: u32, attr: *mut u8, size: u32) -> Result<usize> {
let buf = UserBufferReader::new(attr, size as usize, true)?;
let attr = buf.read_one_from_user::<bpf_attr>(0)?;
let cmd = bpf_cmd::from_u32(cmd).ok_or(SystemError::EINVAL)?;
bpf(cmd, attr)
}
}
pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result<usize> {
let res = match cmd {
// Map related commands
bpf_cmd::BPF_MAP_CREATE => map::bpf_map_create(attr),
bpf_cmd::BPF_MAP_UPDATE_ELEM => map::bpf_map_update_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_ELEM => map::bpf_lookup_elem(attr),
bpf_cmd::BPF_MAP_GET_NEXT_KEY => map::bpf_map_get_next_key(attr),
bpf_cmd::BPF_MAP_DELETE_ELEM => map::bpf_map_delete_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_AND_DELETE_ELEM => map::bpf_map_lookup_and_delete_elem(attr),
bpf_cmd::BPF_MAP_LOOKUP_BATCH => map::bpf_map_lookup_batch(attr),
bpf_cmd::BPF_MAP_FREEZE => map::bpf_map_freeze(attr),
// Program related commands
bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr),
// Object creation commands
bpf_cmd::BPF_BTF_LOAD => {
error!("bpf cmd {:?} not implemented", cmd);
return Err(SystemError::ENOSYS);
}
ty => {
unimplemented!("bpf cmd {:?} not implemented", ty)
}
};
res
}
/// Initialize the BPF system
pub fn init_bpf_system() {
helper::init_helper_functions();
}

123
kernel/src/bpf/prog/mod.rs Normal file
View File

@ -0,0 +1,123 @@
mod util;
mod verifier;
use super::Result;
use crate::bpf::map::BpfMap;
use crate::bpf::prog::util::{BpfProgMeta, BpfProgVerifierInfo};
use crate::bpf::prog::verifier::BpfProgVerifier;
use crate::filesystem::vfs::file::{File, FileMode};
use crate::filesystem::vfs::syscall::ModeType;
use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
use crate::include::bindings::linux_bpf::bpf_attr;
use crate::libs::spinlock::SpinLockGuard;
use crate::process::ProcessManager;
use alloc::string::String;
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::any::Any;
use system_error::SystemError;
#[derive(Debug)]
pub struct BpfProg {
meta: BpfProgMeta,
raw_file_ptr: Vec<usize>,
}
impl BpfProg {
pub fn new(meta: BpfProgMeta) -> Self {
Self {
meta,
raw_file_ptr: Vec::new(),
}
}
pub fn insns(&self) -> &[u8] {
&self.meta.insns
}
pub fn insns_mut(&mut self) -> &mut [u8] {
&mut self.meta.insns
}
pub fn insert_map(&mut self, map_ptr: usize) {
self.raw_file_ptr.push(map_ptr);
}
}
impl IndexNode for BpfProg {
fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
Ok(())
}
fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
Ok(())
}
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
Err(SystemError::ENOSYS)
}
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
Err(SystemError::ENOSYS)
}
fn metadata(&self) -> Result<Metadata> {
let meta = Metadata {
mode: ModeType::from_bits_truncate(0o755),
file_type: FileType::File,
..Default::default()
};
Ok(meta)
}
fn resize(&self, _len: usize) -> Result<()> {
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
panic!("BpfProg does not have a filesystem")
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn list(&self) -> Result<Vec<String>> {
Err(SystemError::ENOSYS)
}
}
impl Drop for BpfProg {
fn drop(&mut self) {
unsafe {
for ptr in self.raw_file_ptr.iter() {
let file = Arc::from_raw(*ptr as *const u8 as *const BpfMap);
drop(file)
}
}
}
}
/// Load a BPF program into the kernel.
///
/// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_PROG_LOAD/
pub fn bpf_prog_load(attr: &bpf_attr) -> Result<usize> {
let args = BpfProgMeta::try_from(attr)?;
// info!("bpf_prog_load: {:#?}", args);
let log_info = BpfProgVerifierInfo::from(attr);
let prog = BpfProg::new(args);
let fd_table = ProcessManager::current_pcb().fd_table();
let prog = BpfProgVerifier::new(prog, log_info.log_level, &mut []).verify(&fd_table)?;
let file = File::new(Arc::new(prog), FileMode::O_RDWR)?;
let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
Ok(fd)
}

112
kernel/src/bpf/prog/util.rs Normal file
View File

@ -0,0 +1,112 @@
use crate::include::bindings::linux_bpf::{bpf_attach_type, bpf_attr, bpf_prog_type};
use crate::syscall::user_access::{check_and_clone_cstr, UserBufferReader};
use alloc::string::{String, ToString};
use alloc::vec::Vec;
use core::ffi::CStr;
use core::fmt::Debug;
use num_traits::FromPrimitive;
use system_error::SystemError;
bitflags::bitflags! {
pub struct VerifierLogLevel: u32 {
/// Sets no verifier logging.
const DISABLE = 0;
/// Enables debug verifier logging.
const DEBUG = 1;
/// Enables verbose verifier logging.
const VERBOSE = 2 | Self::DEBUG.bits();
/// Enables verifier stats.
const STATS = 4;
}
}
#[derive(Debug)]
pub struct BpfProgVerifierInfo {
/// This attribute specifies the level/detail of the log output. Valid values are.
pub log_level: VerifierLogLevel,
/// This attributes indicates the size of the memory region in bytes
/// indicated by `log_buf` which can safely be written to by the kernel.
pub _log_buf_size: u32,
/// This attributes can be set to a pointer to a memory region
/// allocated/reservedby the loader process where the verifier log will
/// be written to.
/// The detail of the log is set by log_level. The verifier log
/// is often the only indication in addition to the error code of
/// why the syscall command failed to load the program.
///
/// The log is also written to on success. If the kernel runs out of
/// space in the buffer while loading, the loading process will fail
/// and the command will return with an error code of -ENOSPC. So it
/// is important to correctly size the buffer when enabling logging.
pub _log_buf_ptr: usize,
}
impl From<&bpf_attr> for BpfProgVerifierInfo {
fn from(attr: &bpf_attr) -> Self {
unsafe {
let u = &attr.__bindgen_anon_3;
Self {
log_level: VerifierLogLevel::from_bits_truncate(u.log_level),
_log_buf_size: u.log_size,
_log_buf_ptr: u.log_buf as usize,
}
}
}
}
pub struct BpfProgMeta {
pub prog_flags: u32,
pub prog_type: bpf_prog_type,
pub expected_attach_type: bpf_attach_type,
pub insns: Vec<u8>,
pub license: String,
pub kern_version: u32,
pub name: String,
}
impl Debug for BpfProgMeta {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("BpfProgMeta")
.field("prog_flags", &self.prog_flags)
.field("prog_type", &self.prog_type)
.field("expected_attach_type", &self.expected_attach_type)
.field("insns_len", &(self.insns.len() / 8))
.field("license", &self.license)
.field("kern_version", &self.kern_version)
.field("name", &self.name)
.finish()
}
}
impl TryFrom<&bpf_attr> for BpfProgMeta {
type Error = SystemError;
fn try_from(attr: &bpf_attr) -> Result<Self, Self::Error> {
let u = unsafe { &attr.__bindgen_anon_3 };
let prog_type = bpf_prog_type::from_u32(u.prog_type).ok_or(SystemError::EINVAL)?;
let expected_attach_type =
bpf_attach_type::from_u32(u.expected_attach_type).ok_or(SystemError::EINVAL)?;
unsafe {
let insns_buf =
UserBufferReader::new(u.insns as *mut u8, u.insn_cnt as usize * 8, true)?;
let insns = insns_buf.read_from_user::<u8>(0)?.to_vec();
let name_slice =
core::slice::from_raw_parts(u.prog_name.as_ptr() as *const u8, u.prog_name.len());
let prog_name = CStr::from_bytes_until_nul(name_slice)
.map_err(|_| SystemError::EINVAL)?
.to_str()
.map_err(|_| SystemError::EINVAL)?
.to_string();
let license = check_and_clone_cstr(u.license as *const u8, None)?;
Ok(Self {
prog_flags: u.prog_flags,
prog_type,
expected_attach_type,
insns,
license: license.into_string().map_err(|_| SystemError::EINVAL)?,
kern_version: u.kern_version,
name: prog_name,
})
}
}
}

View File

@ -0,0 +1,131 @@
use super::super::Result;
use crate::bpf::map::BpfMap;
use crate::bpf::prog::util::VerifierLogLevel;
use crate::bpf::prog::BpfProg;
use crate::filesystem::vfs::file::FileDescriptorVec;
use crate::include::bindings::linux_bpf::*;
use crate::libs::casting::DowncastArc;
use crate::libs::rwlock::RwLock;
use alloc::{sync::Arc, vec::Vec};
use log::{error, info};
use rbpf::ebpf;
use rbpf::ebpf::to_insn_vec;
use system_error::SystemError;
/// The BPF program verifier.
///
/// See https://docs.kernel.org/bpf/verifier.html
#[derive(Debug)]
pub struct BpfProgVerifier<'a> {
prog: BpfProg,
_log_level: VerifierLogLevel,
_log_buf: &'a mut [u8],
}
impl<'a> BpfProgVerifier<'a> {
pub fn new(prog: BpfProg, log_level: VerifierLogLevel, log_buf: &'a mut [u8]) -> Self {
Self {
prog,
_log_level: log_level,
_log_buf: log_buf,
}
}
/// Relocate the program.
///
/// This function will relocate the program, and update the program's instructions.
fn relocation(&mut self, fd_table: &Arc<RwLock<FileDescriptorVec>>) -> Result<()> {
let instructions = self.prog.insns_mut();
let mut fmt_insn = to_insn_vec(instructions);
let mut index = 0;
let mut raw_file_ptr = vec![];
loop {
if index >= fmt_insn.len() {
break;
}
let mut insn = fmt_insn[index].clone();
if insn.opc == ebpf::LD_DW_IMM {
// relocate the instruction
let mut next_insn = fmt_insn[index + 1].clone();
// the imm is the map_fd because user lib has already done the relocation
let map_fd = insn.imm as usize;
let src_reg = insn.src;
// See https://www.kernel.org/doc/html/latest/bpf/standardization/instruction-set.html#id23
let ptr = match src_reg as u32 {
BPF_PSEUDO_MAP_VALUE => {
// dst = map_val(map_by_fd(imm)) + next_imm
// map_val(map) gets the address of the first value in a given map
let file = fd_table
.read()
.get_file_by_fd(map_fd as i32)
.ok_or(SystemError::EBADF)?;
let bpf_map = file
.inode()
.downcast_arc::<BpfMap>()
.ok_or(SystemError::EINVAL)?;
let first_value_ptr =
bpf_map.inner_map().lock().first_value_ptr()? as usize;
let offset = next_insn.imm as usize;
info!(
"Relocate for BPF_PSEUDO_MAP_VALUE, instruction index: {}, map_fd: {}",
index, map_fd
);
Some(first_value_ptr + offset)
}
BPF_PSEUDO_MAP_FD => {
// dst = map_by_fd(imm)
// map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map
let bpf_map = fd_table
.read()
.get_file_by_fd(map_fd as i32)
.ok_or(SystemError::EBADF)?
.inode()
.downcast_arc::<BpfMap>()
.ok_or(SystemError::EINVAL)?;
// todo!(warning: We need release after prog unload)
let map_ptr = Arc::into_raw(bpf_map) as usize;
info!(
"Relocate for BPF_PSEUDO_MAP_FD, instruction index: {}, map_fd: {}, ptr: {:#x}",
index, map_fd, map_ptr
);
raw_file_ptr.push(map_ptr);
Some(map_ptr)
}
ty => {
error!(
"relocation for ty: {} not implemented, instruction index: {}",
ty, index
);
None
}
};
if let Some(ptr) = ptr {
// The current ins store the map_data_ptr low 32 bits,
// the next ins store the map_data_ptr high 32 bits
insn.imm = ptr as i32;
next_insn.imm = (ptr >> 32) as i32;
fmt_insn[index] = insn;
fmt_insn[index + 1] = next_insn;
index += 2;
} else {
index += 1;
}
} else {
index += 1;
}
}
let fmt_insn = fmt_insn
.iter()
.flat_map(|ins| ins.to_vec())
.collect::<Vec<u8>>();
instructions.copy_from_slice(&fmt_insn);
for ptr in raw_file_ptr {
self.prog.insert_map(ptr);
}
Ok(())
}
pub fn verify(mut self, fd_table: &Arc<RwLock<FileDescriptorVec>>) -> Result<BpfProg> {
self.relocation(fd_table)?;
Ok(self.prog)
}
}

View File

@ -11,3 +11,12 @@ static inline int strlen(const char *s) {
} }
return __res; return __res;
} }
static inline int strcmp(const char *s1, const char *s2) {
while (*s1 && *s2 && *s1 == *s2) {
++s1;
++s2;
}
return *s1 - *s2;
}

View File

@ -0,0 +1,66 @@
use alloc::boxed::Box;
use alloc::string::String;
use kprobe::{CallBackFunc, KprobeBuilder, ProbeArgs};
use log::warn;
use system_error::SystemError;
pub struct KprobeInfo {
pub pre_handler: fn(&dyn ProbeArgs),
pub post_handler: fn(&dyn ProbeArgs),
pub fault_handler: Option<fn(&dyn ProbeArgs)>,
pub event_callback: Option<Box<dyn CallBackFunc>>,
pub symbol: Option<String>,
pub addr: Option<usize>,
pub offset: usize,
pub enable: bool,
}
extern "C" {
fn addr_from_symbol(symbol: *const u8) -> usize;
}
impl TryFrom<KprobeInfo> for KprobeBuilder {
type Error = SystemError;
fn try_from(kprobe_info: KprobeInfo) -> Result<Self, Self::Error> {
// 检查参数: symbol和addr必须有一个但不能同时有
if kprobe_info.symbol.is_none() && kprobe_info.addr.is_none() {
return Err(SystemError::EINVAL);
}
if kprobe_info.symbol.is_some() && kprobe_info.addr.is_some() {
return Err(SystemError::EINVAL);
}
let func_addr = if let Some(symbol) = kprobe_info.symbol.clone() {
let mut symbol_sting = symbol;
if !symbol_sting.ends_with("\0") {
symbol_sting.push('\0');
}
let symbol = symbol_sting.as_ptr();
let func_addr = unsafe { addr_from_symbol(symbol) };
if func_addr == 0 {
warn!(
"register_kprobe: the symbol: {:?} not found",
kprobe_info.symbol
);
return Err(SystemError::ENXIO);
}
func_addr
} else {
kprobe_info.addr.unwrap()
};
let mut builder = KprobeBuilder::new(
kprobe_info.symbol,
func_addr,
kprobe_info.offset,
kprobe_info.pre_handler,
kprobe_info.post_handler,
kprobe_info.enable,
);
if let Some(fault_handler) = kprobe_info.fault_handler {
builder = builder.with_fault_handler(fault_handler);
}
if let Some(event_callback) = kprobe_info.event_callback {
builder = builder.with_event_callback(event_callback);
}
Ok(builder)
}
}

View File

@ -0,0 +1,183 @@
use crate::debug::kprobe::args::KprobeInfo;
use crate::libs::rwlock::RwLock;
use crate::libs::spinlock::SpinLock;
use alloc::collections::BTreeMap;
use alloc::sync::Arc;
use alloc::vec::Vec;
use kprobe::{Kprobe, KprobeBuilder, KprobeOps, KprobePoint};
use system_error::SystemError;
pub mod args;
#[cfg(feature = "kprobe_test")]
mod test;
pub type LockKprobe = Arc<RwLock<Kprobe>>;
pub static KPROBE_MANAGER: SpinLock<KprobeManager> = SpinLock::new(KprobeManager::new());
static KPROBE_POINT_LIST: SpinLock<BTreeMap<usize, Arc<KprobePoint>>> =
SpinLock::new(BTreeMap::new());
/// 管理所有的kprobe探测点
#[derive(Debug, Default)]
pub struct KprobeManager {
break_list: BTreeMap<usize, Vec<LockKprobe>>,
debug_list: BTreeMap<usize, Vec<LockKprobe>>,
}
impl KprobeManager {
pub const fn new() -> Self {
KprobeManager {
break_list: BTreeMap::new(),
debug_list: BTreeMap::new(),
}
}
/// # 插入一个kprobe
///
/// ## 参数
/// - `kprobe`: kprobe的实例
pub fn insert_kprobe(&mut self, kprobe: LockKprobe) {
let probe_point = kprobe.read().probe_point().clone();
self.insert_break_point(probe_point.break_address(), kprobe.clone());
self.insert_debug_point(probe_point.debug_address(), kprobe);
}
/// # 向break_list中插入一个kprobe
///
/// ## 参数
/// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
/// - `kprobe`: kprobe的实例
fn insert_break_point(&mut self, address: usize, kprobe: LockKprobe) {
let list = self.break_list.entry(address).or_default();
list.push(kprobe);
}
/// # 向debug_list中插入一个kprobe
///
/// ## 参数
/// - `address`: kprobe的单步执行地址由`KprobePoint::debug_address()`返回
/// - `kprobe`: kprobe的实例
fn insert_debug_point(&mut self, address: usize, kprobe: LockKprobe) {
let list = self.debug_list.entry(address).or_default();
list.push(kprobe);
}
pub fn get_break_list(&self, address: usize) -> Option<&Vec<LockKprobe>> {
self.break_list.get(&address)
}
pub fn get_debug_list(&self, address: usize) -> Option<&Vec<LockKprobe>> {
self.debug_list.get(&address)
}
/// # 返回一个地址上注册的kprobe数量
///
/// ## 参数
/// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
pub fn kprobe_num(&self, address: usize) -> usize {
self.break_list_len(address)
}
#[inline]
fn break_list_len(&self, address: usize) -> usize {
self.break_list
.get(&address)
.map(|list| list.len())
.unwrap_or(0)
}
#[inline]
fn debug_list_len(&self, address: usize) -> usize {
self.debug_list
.get(&address)
.map(|list| list.len())
.unwrap_or(0)
}
/// # 移除一个kprobe
///
/// ## 参数
/// - `kprobe`: kprobe的实例
pub fn remove_kprobe(&mut self, kprobe: &LockKprobe) {
let probe_point = kprobe.read().probe_point().clone();
self.remove_one_break(probe_point.break_address(), kprobe);
self.remove_one_debug(probe_point.debug_address(), kprobe);
}
/// # 从break_list中移除一个kprobe
///
/// 如果没有其他kprobe注册在这个地址上则删除列表
///
/// ## 参数
/// - `address`: kprobe的地址, 由`KprobePoint::break_address()`或者`KprobeBuilder::probe_addr()`返回
/// - `kprobe`: kprobe的实例
fn remove_one_break(&mut self, address: usize, kprobe: &LockKprobe) {
if let Some(list) = self.break_list.get_mut(&address) {
list.retain(|x| !Arc::ptr_eq(x, kprobe));
}
if self.break_list_len(address) == 0 {
self.break_list.remove(&address);
}
}
/// # 从debug_list中移除一个kprobe
///
/// 如果没有其他kprobe注册在这个地址上则删除列表
///
/// ## 参数
/// - `address`: kprobe的单步执行地址由`KprobePoint::debug_address()`返回
/// - `kprobe`: kprobe的实例
fn remove_one_debug(&mut self, address: usize, kprobe: &LockKprobe) {
if let Some(list) = self.debug_list.get_mut(&address) {
list.retain(|x| !Arc::ptr_eq(x, kprobe));
}
if self.debug_list_len(address) == 0 {
self.debug_list.remove(&address);
}
}
}
#[cfg(feature = "kprobe_test")]
#[allow(unused)]
/// This function is only used for testing kprobe
pub fn kprobe_test() {
test::kprobe_test();
}
/// # 注册一个kprobe
///
/// 该函数会根据`symbol`查找对应的函数地址,如果找不到则返回错误。
///
/// ## 参数
/// - `kprobe_info`: kprobe的信息
pub fn register_kprobe(kprobe_info: KprobeInfo) -> Result<LockKprobe, SystemError> {
let kprobe_builder = KprobeBuilder::try_from(kprobe_info)?;
let address = kprobe_builder.probe_addr();
let existed_point = KPROBE_POINT_LIST.lock().get(&address).map(Clone::clone);
let kprobe = match existed_point {
Some(existed_point) => {
kprobe_builder
.with_probe_point(existed_point.clone())
.install()
.0
}
None => {
let (kprobe, probe_point) = kprobe_builder.install();
KPROBE_POINT_LIST.lock().insert(address, probe_point);
kprobe
}
};
let kprobe = Arc::new(RwLock::new(kprobe));
KPROBE_MANAGER.lock().insert_kprobe(kprobe.clone());
Ok(kprobe)
}
/// # 注销一个kprobe
///
/// ## 参数
/// - `kprobe`: 已安装的kprobe
pub fn unregister_kprobe(kprobe: LockKprobe) {
let kprobe_addr = kprobe.read().probe_point().break_address();
KPROBE_MANAGER.lock().remove_kprobe(&kprobe);
// 如果没有其他kprobe注册在这个地址上则删除探测点
if KPROBE_MANAGER.lock().kprobe_num(kprobe_addr) == 0 {
KPROBE_POINT_LIST.lock().remove(&kprobe_addr);
}
}

View File

@ -0,0 +1,84 @@
use crate::arch::interrupt::TrapFrame;
use crate::debug::kprobe::{register_kprobe, unregister_kprobe, KprobeInfo};
use alloc::string::ToString;
use kprobe::ProbeArgs;
use log::info;
#[inline(never)]
fn detect_func(x: usize, y: usize) -> usize {
let hart = 0;
info!("detect_func: hart_id: {}, x: {}, y:{}", hart, x, y);
hart
}
fn pre_handler(regs: &dyn ProbeArgs) {
let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
info!(
"call pre_handler, the sp is {:#x}",
pt_regs as *const _ as usize
);
}
fn post_handler(regs: &dyn ProbeArgs) {
let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
info!(
"call post_handler, the sp is {:#x}",
pt_regs as *const _ as usize
);
}
fn fault_handler(regs: &dyn ProbeArgs) {
let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
info!(
"call fault_handler, the sp is {:#x}",
pt_regs as *const _ as usize
);
}
pub fn kprobe_test() {
info!("kprobe test for [detect_func]: {:#x}", detect_func as usize);
let kprobe_info = KprobeInfo {
pre_handler,
post_handler,
fault_handler: Some(fault_handler),
event_callback: None,
symbol: None,
addr: Some(detect_func as usize),
offset: 0,
enable: true,
};
let kprobe = register_kprobe(kprobe_info).unwrap();
let new_pre_handler = |regs: &dyn ProbeArgs| {
let pt_regs = regs.as_any().downcast_ref::<TrapFrame>().unwrap();
info!(
"call new pre_handler, the sp is {:#x}",
pt_regs as *const _ as usize
);
};
let kprobe_info = KprobeInfo {
pre_handler: new_pre_handler,
post_handler,
fault_handler: Some(fault_handler),
event_callback: None,
symbol: Some("dragonos_kernel::debug::kprobe::test::detect_func".to_string()),
addr: None,
offset: 0,
enable: true,
};
let kprobe2 = register_kprobe(kprobe_info).unwrap();
info!(
"install 2 kprobes at [detect_func]: {:#x}",
detect_func as usize
);
detect_func(1, 2);
unregister_kprobe(kprobe);
unregister_kprobe(kprobe2);
info!(
"uninstall 2 kprobes at [detect_func]: {:#x}",
detect_func as usize
);
detect_func(1, 2);
info!("kprobe test end");
}

View File

@ -1 +1,2 @@
pub mod klog; pub mod klog;
pub mod kprobe;

View File

@ -1,5 +1,6 @@
#include "traceback.h" #include "traceback.h"
#include <common/printk.h> #include <common/printk.h>
#include <common/string.h>
#include <process/process.h> #include <process/process.h>
int lookup_kallsyms(uint64_t addr, int level) int lookup_kallsyms(uint64_t addr, int level)
@ -26,6 +27,18 @@ int lookup_kallsyms(uint64_t addr, int level)
return -1; return -1;
} }
uint64_t addr_from_symbol(const char *symbol)
{
const char *str = (const char *)&kallsyms_names;
for (uint64_t i = 0; i < kallsyms_num; ++i)
{
if (strcmp(&str[kallsyms_names_index[i]], symbol) == 0)
return kallsyms_address[i];
}
return 0;
}
/** /**
* @brief * @brief
* *

View File

@ -14,4 +14,5 @@ extern const char *kallsyms_names __attribute__((weak));
* *
* @param regs * @param regs
*/ */
void traceback(struct pt_regs *regs); void traceback(struct pt_regs *regs);
uint64_t addr_from_symbol(const char *symbol);

View File

@ -11,8 +11,6 @@ use crate::{
libs::rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard}, libs::rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard},
}; };
use system_error::SystemError;
use super::{ use super::{
class::Class, class::Class,
device::{ device::{
@ -24,29 +22,36 @@ use super::{
kset::KSet, kset::KSet,
subsys::SubSysPrivate, subsys::SubSysPrivate,
}; };
use crate::filesystem::sysfs::file::sysfs_emit_str;
use crate::filesystem::sysfs::{Attribute, AttributeGroup, SysFSOpsSupport};
use crate::filesystem::vfs::syscall::ModeType;
use crate::libs::lazy_init::Lazy;
use system_error::SystemError;
#[inline(always)] static CPU_DEVICE_MANAGER: Lazy<CpuDeviceManager> = Lazy::new();
pub fn cpu_device_manager() -> &'static CpuDeviceManager {
return &CpuDeviceManager;
}
#[derive(Debug)] #[derive(Debug)]
pub struct CpuDeviceManager; pub struct CpuDeviceManager {
_root_device: Arc<CpuSubSystemFakeRootDevice>,
}
impl CpuDeviceManager { impl CpuDeviceManager {
/// 初始化设备驱动模型的CPU子系统 /// 初始化设备驱动模型的CPU子系统
/// ///
/// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/drivers/base/cpu.c?fi=get_cpu_device#622 /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/drivers/base/cpu.c?fi=get_cpu_device#622
pub fn init(&self) -> Result<(), SystemError> { pub fn init() -> Result<(), SystemError> {
let cpu_subsys = CpuSubSystem::new(); let cpu_subsys = CpuSubSystem::new();
let root_device = CpuSubSystemFakeRootDevice::new(); let root_device = CpuSubSystemFakeRootDevice::new();
subsystem_manager() subsystem_manager()
.subsys_system_register( .subsys_system_register(
&(cpu_subsys as Arc<dyn Bus>), &(cpu_subsys as Arc<dyn Bus>),
&(root_device as Arc<dyn Device>), &(root_device.clone() as Arc<dyn Device>),
) )
.expect("register cpu subsys failed"); .expect("register cpu subsys failed");
let manager = Self {
_root_device: root_device,
};
CPU_DEVICE_MANAGER.init(manager);
return Ok(()); return Ok(());
} }
} }
@ -190,6 +195,10 @@ impl Device for CpuSubSystemFakeRootDevice {
fn set_dev_parent(&self, dev_parent: Option<Weak<dyn Device>>) { fn set_dev_parent(&self, dev_parent: Option<Weak<dyn Device>>) {
self.inner.write().device_common.parent = dev_parent; self.inner.write().device_common.parent = dev_parent;
} }
fn attribute_groups(&self) -> Option<&'static [&'static dyn AttributeGroup]> {
Some(&[&AttrGroupCpu])
}
} }
impl KObject for CpuSubSystemFakeRootDevice { impl KObject for CpuSubSystemFakeRootDevice {
@ -249,3 +258,70 @@ impl KObject for CpuSubSystemFakeRootDevice {
*self.kobj_state_mut() = state; *self.kobj_state_mut() = state;
} }
} }
#[derive(Debug)]
pub struct AttrGroupCpu;
impl AttributeGroup for AttrGroupCpu {
fn name(&self) -> Option<&str> {
None
}
fn attrs(&self) -> &[&'static dyn Attribute] {
&[&AttrCpuPossible, &AttrCpuOnline]
}
fn is_visible(
&self,
_kobj: Arc<dyn KObject>,
_attr: &'static dyn Attribute,
) -> Option<ModeType> {
None
}
}
#[derive(Debug)]
pub struct AttrCpuPossible;
impl Attribute for AttrCpuPossible {
fn name(&self) -> &str {
"possible"
}
fn mode(&self) -> ModeType {
ModeType::S_IRUGO
}
fn support(&self) -> SysFSOpsSupport {
SysFSOpsSupport::ATTR_SHOW
}
fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
let cpu_manager = crate::smp::cpu::smp_cpu_manager();
let cpus = cpu_manager.possible_cpus_count();
let data = format!("0-{}", cpus - 1);
sysfs_emit_str(buf, &data)
}
}
#[derive(Debug)]
pub struct AttrCpuOnline;
impl Attribute for AttrCpuOnline {
fn name(&self) -> &str {
"online"
}
fn mode(&self) -> ModeType {
ModeType::S_IRUGO
}
fn support(&self) -> SysFSOpsSupport {
SysFSOpsSupport::ATTR_SHOW
}
fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
let cpu_manager = crate::smp::cpu::smp_cpu_manager();
let cpus = cpu_manager.present_cpus_count();
let data = format!("0-{}", cpus - 1);
sysfs_emit_str(buf, &data)
}
}

View File

@ -3,7 +3,7 @@ use system_error::SystemError;
use super::{ use super::{
class::classes_init, class::classes_init,
cpu::cpu_device_manager, cpu::CpuDeviceManager,
device::{bus::buses_init, init::devices_init}, device::{bus::buses_init, init::devices_init},
firmware::firmware_init, firmware::firmware_init,
hypervisor::hypervisor_init, hypervisor::hypervisor_init,
@ -20,7 +20,7 @@ pub fn driver_init() -> Result<(), SystemError> {
hypervisor_init()?; hypervisor_init()?;
platform_bus_init()?; platform_bus_init()?;
serio_bus_init()?; serio_bus_init()?;
cpu_device_manager().init()?; CpuDeviceManager::init()?;
// 至此,已完成设备驱动模型的初始化 // 至此,已完成设备驱动模型的初始化
return Ok(()); return Ok(());

View File

@ -0,0 +1,33 @@
use crate::arch::interrupt::TrapFrame;
use crate::arch::kprobe::clear_single_step;
use crate::debug::kprobe::KPROBE_MANAGER;
use kprobe::{KprobeOps, ProbeArgs};
use log::debug;
use system_error::SystemError;
#[derive(Debug)]
pub struct DebugException;
impl DebugException {
pub fn handle(frame: &mut TrapFrame) -> Result<(), SystemError> {
Self::post_kprobe_handler(frame)
}
fn post_kprobe_handler(frame: &mut TrapFrame) -> Result<(), SystemError> {
let pc = frame.debug_address();
if let Some(kprobe_list) = KPROBE_MANAGER.lock().get_debug_list(pc) {
for kprobe in kprobe_list {
let guard = kprobe.read();
if guard.is_enabled() {
guard.call_post_handler(frame);
guard.call_event_callback(frame);
}
}
let return_address = kprobe_list[0].read().probe_point().return_address();
clear_single_step(frame, return_address);
} else {
debug!("There is no kprobe on pc {:#x}", pc);
}
Ok(())
}
}

View File

@ -0,0 +1,37 @@
use crate::arch::interrupt::TrapFrame;
use crate::arch::kprobe::setup_single_step;
use crate::debug::kprobe::KPROBE_MANAGER;
use crate::exception::debug::DebugException;
use kprobe::{KprobeOps, ProbeArgs};
use system_error::SystemError;
#[derive(Debug)]
pub struct EBreak;
impl EBreak {
pub fn handle(frame: &mut TrapFrame) -> Result<(), SystemError> {
Self::kprobe_handler(frame)
}
fn kprobe_handler(frame: &mut TrapFrame) -> Result<(), SystemError> {
let break_addr = frame.break_address();
let guard = KPROBE_MANAGER.lock();
let kprobe_list = guard.get_break_list(break_addr);
if let Some(kprobe_list) = kprobe_list {
for kprobe in kprobe_list {
let guard = kprobe.read();
if guard.is_enabled() {
guard.call_pre_handler(frame);
}
}
let single_step_address = kprobe_list[0].read().probe_point().single_step_address();
// setup_single_step
setup_single_step(frame, single_step_address);
} else {
// For some architectures, they do not support single step execution,
// and we need to use breakpoint exceptions to simulate
drop(guard);
DebugException::handle(frame)?;
}
Ok(())
}
}

View File

@ -4,7 +4,9 @@ use system_error::SystemError;
use crate::arch::CurrentIrqArch; use crate::arch::CurrentIrqArch;
pub mod debug;
pub mod dummychip; pub mod dummychip;
pub mod ebreak;
pub mod handle; pub mod handle;
pub mod init; pub mod init;
pub mod ipi; pub mod ipi;

View File

@ -11,6 +11,8 @@ use system_error::SystemError;
use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData}; use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData};
use crate::filesystem::eventfd::EventFdInode; use crate::filesystem::eventfd::EventFdInode;
use crate::libs::lazy_init::Lazy;
use crate::perf::PerfEventInode;
use crate::{ use crate::{
arch::MMArch, arch::MMArch,
driver::{ driver::{
@ -125,7 +127,7 @@ impl FileMode {
/// 页面缓存 /// 页面缓存
pub struct PageCache { pub struct PageCache {
xarray: SpinLock<XArray<Arc<Page>>>, xarray: SpinLock<XArray<Arc<Page>>>,
inode: Option<Weak<dyn IndexNode>>, inode: Lazy<Weak<dyn IndexNode>>,
} }
impl core::fmt::Debug for PageCache { impl core::fmt::Debug for PageCache {
@ -148,13 +150,19 @@ impl PageCache {
pub fn new(inode: Option<Weak<dyn IndexNode>>) -> Arc<PageCache> { pub fn new(inode: Option<Weak<dyn IndexNode>>) -> Arc<PageCache> {
let page_cache = Self { let page_cache = Self {
xarray: SpinLock::new(XArray::new()), xarray: SpinLock::new(XArray::new()),
inode, inode: {
let v: Lazy<Weak<dyn IndexNode>> = Lazy::new();
if let Some(inode) = inode {
v.init(inode);
}
v
},
}; };
Arc::new(page_cache) Arc::new(page_cache)
} }
pub fn inode(&self) -> Option<Weak<dyn IndexNode>> { pub fn inode(&self) -> Option<Weak<dyn IndexNode>> {
self.inode.clone() self.inode.try_get().cloned()
} }
pub fn add_page(&self, offset: usize, page: &Arc<Page>) { pub fn add_page(&self, offset: usize, page: &Arc<Page>) {
@ -176,8 +184,12 @@ impl PageCache {
cursor.remove(); cursor.remove();
} }
pub fn set_inode(&mut self, inode: Weak<dyn IndexNode>) { pub fn set_inode(&self, inode: Weak<dyn IndexNode>) -> Result<(), SystemError> {
self.inode = Some(inode) if self.inode.initialized() {
return Err(SystemError::EINVAL);
}
self.inode.init(inode);
Ok(())
} }
} }
@ -603,11 +615,15 @@ impl File {
inode.inner().lock().remove_epoll(epoll) inode.inner().lock().remove_epoll(epoll)
} }
_ => { _ => {
let inode = self.inode.downcast_ref::<EventFdInode>();
if let Some(inode) = inode {
return inode.remove_epoll(epoll);
}
let inode = self let inode = self
.inode .inode
.downcast_ref::<EventFdInode>() .downcast_ref::<PerfEventInode>()
.ok_or(SystemError::ENOSYS)?; .ok_or(SystemError::ENOSYS)?;
inode.remove_epoll(epoll) return inode.remove_epoll(epoll);
} }
} }
} }
@ -745,7 +761,6 @@ impl FileDescriptorVec {
// 把文件描述符数组对应位置设置为空 // 把文件描述符数组对应位置设置为空
let file = self.fds[fd as usize].take().unwrap(); let file = self.fds[fd as usize].take().unwrap();
return Ok(file); return Ok(file);
} }

View File

@ -125,6 +125,9 @@ bitflags! {
} }
pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync { pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
return Err(SystemError::ENOSYS);
}
/// @brief 打开文件 /// @brief 打开文件
/// ///
/// @return 成功Ok() /// @return 成功Ok()

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,10 @@
#[allow(clippy::module_inception)] #![allow(
dead_code,
non_camel_case_types,
non_snake_case,
clippy::all,
missing_docs,
clippy::module_inception
)]
pub mod bindings; pub mod bindings;
pub mod linux_bpf;

View File

@ -1,5 +1,3 @@
use log::warn;
use crate::{ use crate::{
arch::{ arch::{
init::{early_setup_arch, setup_arch, setup_arch_post}, init::{early_setup_arch, setup_arch, setup_arch_post},
@ -30,6 +28,7 @@ use crate::{
clocksource::clocksource_boot_finish, timekeeping::timekeeping_init, timer::timer_init, clocksource::clocksource_boot_finish, timekeeping::timekeeping_init, timer::timer_init,
}, },
}; };
use log::warn;
use super::{ use super::{
boot::{boot_callback_except_early, boot_callbacks}, boot::{boot_callback_except_early, boot_callbacks},
@ -89,9 +88,8 @@ fn do_start_kernel() {
kthread_init(); kthread_init();
setup_arch_post().expect("setup_arch_post failed"); setup_arch_post().expect("setup_arch_post failed");
clocksource_boot_finish(); clocksource_boot_finish();
Futex::init(); Futex::init();
crate::bpf::init_bpf_system();
#[cfg(all(target_arch = "x86_64", feature = "kvm"))] #[cfg(all(target_arch = "x86_64", feature = "kvm"))]
crate::virt::kvm::kvm_init(); crate::virt::kvm::kvm_init();
} }

View File

@ -21,6 +21,7 @@
#![feature(slice_ptr_get)] #![feature(slice_ptr_get)]
#![feature(sync_unsafe_cell)] #![feature(sync_unsafe_cell)]
#![feature(vec_into_raw_parts)] #![feature(vec_into_raw_parts)]
#![feature(c_variadic)]
#![cfg_attr(target_os = "none", no_std)] #![cfg_attr(target_os = "none", no_std)]
#![allow(internal_features)] #![allow(internal_features)]
// clippy的配置 // clippy的配置
@ -46,6 +47,7 @@ mod arch;
mod libs; mod libs;
#[macro_use] #[macro_use]
mod include; mod include;
mod bpf;
mod debug; mod debug;
mod driver; // 如果driver依赖了libs应该在libs后面导出 mod driver; // 如果driver依赖了libs应该在libs后面导出
mod exception; mod exception;
@ -55,12 +57,12 @@ mod ipc;
mod misc; mod misc;
mod mm; mod mm;
mod net; mod net;
mod perf;
mod process; mod process;
mod sched; mod sched;
mod smp; mod smp;
mod syscall; mod syscall;
mod time; mod time;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
mod virt; mod virt;

View File

@ -272,16 +272,16 @@ impl PageFaultHandler {
/// - VmFaultReason: 页面错误处理信息标志 /// - VmFaultReason: 页面错误处理信息标志
pub unsafe fn do_fault(pfm: &mut PageFaultMessage) -> VmFaultReason { pub unsafe fn do_fault(pfm: &mut PageFaultMessage) -> VmFaultReason {
if !pfm.flags().contains(FaultFlags::FAULT_FLAG_WRITE) { if !pfm.flags().contains(FaultFlags::FAULT_FLAG_WRITE) {
return Self::do_read_fault(pfm); Self::do_read_fault(pfm)
} else if !pfm } else if !pfm
.vma() .vma()
.lock_irqsave() .lock_irqsave()
.vm_flags() .vm_flags()
.contains(VmFlags::VM_SHARED) .contains(VmFlags::VM_SHARED)
{ {
return Self::do_cow_fault(pfm); Self::do_cow_fault(pfm)
} else { } else {
return Self::do_shared_fault(pfm); Self::do_shared_fault(pfm)
} }
} }

View File

@ -377,7 +377,7 @@ impl InnerAddressSpace {
PageFrameCount::from_bytes(len).unwrap(), PageFrameCount::from_bytes(len).unwrap(),
prot_flags, prot_flags,
map_flags, map_flags,
move |page, count, vm_flags, flags, mapper, flusher| { |page, count, vm_flags, flags, mapper, flusher| {
if allocate_at_once { if allocate_at_once {
VMA::zeroed( VMA::zeroed(
page, page,
@ -386,7 +386,7 @@ impl InnerAddressSpace {
flags, flags,
mapper, mapper,
flusher, flusher,
file, file.clone(),
Some(pgoff), Some(pgoff),
) )
} else { } else {
@ -394,13 +394,17 @@ impl InnerAddressSpace {
VirtRegion::new(page.virt_address(), count.data() * MMArch::PAGE_SIZE), VirtRegion::new(page.virt_address(), count.data() * MMArch::PAGE_SIZE),
vm_flags, vm_flags,
flags, flags,
file, file.clone(),
Some(pgoff), Some(pgoff),
false, false,
))) )))
} }
}, },
)?; )?;
// todo!(impl mmap for other file)
// https://github.com/DragonOS-Community/DragonOS/pull/912#discussion_r1765334272
let file = file.unwrap();
let _ = file.inode().mmap(start_vaddr.data(), len, offset);
return Ok(start_page); return Ok(start_page);
} }

333
kernel/src/perf/bpf.rs Normal file
View File

@ -0,0 +1,333 @@
use super::{PerfEventOps, Result};
use crate::arch::mm::LockedFrameAllocator;
use crate::arch::MMArch;
use crate::filesystem::vfs::file::PageCache;
use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
use crate::include::bindings::linux_bpf::{
perf_event_header, perf_event_mmap_page, perf_event_type,
};
use crate::libs::spinlock::{SpinLock, SpinLockGuard};
use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
use crate::mm::page::{page_manager_lock_irqsave, Page};
use crate::mm::{MemoryManagementArch, PhysAddr};
use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
use alloc::string::String;
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::any::Any;
use core::fmt::Debug;
use system_error::SystemError;
const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
#[derive(Debug)]
pub struct BpfPerfEvent {
_args: PerfProbeArgs,
data: SpinLock<BpfPerfEventData>,
}
#[derive(Debug)]
pub struct BpfPerfEventData {
enabled: bool,
mmap_page: RingPage,
page_cache: Arc<PageCache>,
offset: usize,
}
#[derive(Debug)]
pub struct RingPage {
size: usize,
ptr: usize,
data_region_size: usize,
lost: usize,
phys_addr: PhysAddr,
}
impl RingPage {
pub fn empty() -> Self {
RingPage {
ptr: 0,
size: 0,
data_region_size: 0,
lost: 0,
phys_addr: PhysAddr::new(0),
}
}
pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
Self::init(start as _, len, phys_addr)
}
fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
assert_eq!(size % PAGE_SIZE, 0);
assert!(size / PAGE_SIZE >= 2);
// The first page will be filled with perf_event_mmap_page
unsafe {
let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
// user will read sample or lost record from data_tail
perf_event_mmap_page.data_tail = 0;
// kernel will write sample or lost record from data_head
perf_event_mmap_page.data_head = 0;
// It is a ring buffer.
}
RingPage {
ptr: ptr as usize,
size,
data_region_size: size - PAGE_SIZE,
lost: 0,
phys_addr,
}
}
fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
if (data_head + 1) % self.data_region_size == data_tail {
// The buffer is full
return false;
}
let capacity = if data_head >= data_tail {
self.data_region_size - data_head + data_tail
} else {
data_tail - data_head
};
data_size <= capacity
}
pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
// data_tail..data_head is the region that can be written
// check if there is enough space to write the event
let sample_size = PerfSample::calculate_size(data.len());
let can_write_sample =
self.can_write(sample_size, *data_tail as usize, *data_head as usize);
// log::error!(
// "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
// can_write_sample,
// *data_tail,
// *data_head,
// data.len(),
// self.data_region_size
// );
if !can_write_sample {
//we need record it to the lost record
self.lost += 1;
// log::error!(
// "Lost record: {}, data_tail: {}, data_head: {}",
// self.lost,
// *data_tail,
// *data_head
// );
Ok(())
} else {
// we can write the sample to the page
// If the lost record is not zero, we need to write the lost record first.
let can_write_lost_record = self.can_write(
size_of::<LostSamples>(),
*data_tail as usize,
*data_head as usize,
);
if self.lost > 0 && can_write_lost_record {
let new_data_head = self.write_lost(*data_head as usize)?;
*data_head = new_data_head as u64;
// log::info!(
// "Write lost record: {}, data_tail: {}, new_data_head: {}",
// self.lost,
// *data_tail,
// *data_head
// );
self.lost = 0;
self.write_event(data)
} else {
let new_data_head = self.write_sample(data, *data_head as usize)?;
*data_head = new_data_head as u64;
// log::info!(
// "Write sample record, data_tail: {}, new_data_head: {}",
// *data_tail,
// *data_head
// );
Ok(())
}
}
}
/// Write any data to the page.
///
/// Return the new data_head
fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
let data_region_len = self.data_region_size;
let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
let data_len = data.len();
let end = (data_head + data_len) % data_region_len;
let start = data_head;
if start < end {
data_region[start..end].copy_from_slice(data);
} else {
let first_len = data_region_len - start;
data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
data_region[0..end].copy_from_slice(&data[first_len..]);
}
Ok(end)
}
/// Write a sample to the page.
fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
let perf_sample = PerfSample {
s_hdr: SampleHeader {
header: perf_event_header {
type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
misc: 0,
size: size_of::<SampleHeader>() as u16 + data.len() as u16,
},
size: data.len() as u32,
},
value: data,
};
let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
self.write_any(perf_sample.value, new_head)
}
/// Write a lost record to the page.
///
/// Return the new data_head
fn write_lost(&mut self, data_head: usize) -> Result<usize> {
let lost = LostSamples {
header: perf_event_header {
type_: perf_event_type::PERF_RECORD_LOST as u32,
misc: 0,
size: size_of::<LostSamples>() as u16,
},
id: 0,
count: self.lost as u64,
};
self.write_any(lost.as_bytes(), data_head)
}
pub fn readable(&self) -> bool {
let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
data_tail != data_head
}
pub fn as_slice(&self) -> &[u8] {
unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
}
pub fn as_mut_slice(&mut self) -> &mut [u8] {
unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
}
}
impl BpfPerfEvent {
pub fn new(args: PerfProbeArgs) -> Self {
BpfPerfEvent {
_args: args,
data: SpinLock::new(BpfPerfEventData {
enabled: false,
mmap_page: RingPage::empty(),
page_cache: PageCache::new(None),
offset: 0,
}),
}
}
pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
let mut data = self.data.lock();
// alloc page frame
let (phy_addr, page_count) =
unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
.ok_or(SystemError::ENOSPC)?;
let mut page_manager_guard = page_manager_lock_irqsave();
let mut cur_phys = PhysPageFrame::new(phy_addr);
for i in 0..page_count.data() {
let page = Arc::new(Page::new(true, cur_phys.phys_address()));
let paddr = cur_phys.phys_address();
page_manager_guard.insert(paddr, &page);
data.page_cache.add_page(i, &page);
cur_phys = cur_phys.next();
}
let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
// create mmap page
let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
data.mmap_page = mmap_page;
data.offset = offset;
Ok(())
}
pub fn write_event(&self, data: &[u8]) -> Result<()> {
let mut inner_data = self.data.lock();
inner_data.mmap_page.write_event(data)?;
Ok(())
}
}
impl Drop for BpfPerfEvent {
fn drop(&mut self) {
let mut page_manager_guard = page_manager_lock_irqsave();
let data = self.data.lock();
let phy_addr = data.mmap_page.phys_addr;
let len = data.mmap_page.size;
let page_count = PageFrameCount::new(len / PAGE_SIZE);
let mut cur_phys = PhysPageFrame::new(phy_addr);
for _ in 0..page_count.data() {
page_manager_guard.remove_page(&cur_phys.phys_address());
cur_phys = cur_phys.next();
}
}
}
impl IndexNode for BpfPerfEvent {
fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
self.do_mmap(start, len, offset)
}
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("PerfEventInode does not support read")
}
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("PerfEventInode does not support write")
}
fn fs(&self) -> Arc<dyn FileSystem> {
panic!("PerfEventInode does not have a filesystem")
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn list(&self) -> Result<Vec<String>> {
Err(SystemError::ENOSYS)
}
fn page_cache(&self) -> Option<Arc<PageCache>> {
Some(self.data.lock().page_cache.clone())
}
}
impl PerfEventOps for BpfPerfEvent {
fn enable(&self) -> Result<()> {
self.data.lock().enabled = true;
Ok(())
}
fn disable(&self) -> Result<()> {
self.data.lock().enabled = false;
Ok(())
}
fn readable(&self) -> bool {
self.data.lock().mmap_page.readable()
}
}
pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
BpfPerfEvent::new(args)
}

159
kernel/src/perf/kprobe.rs Normal file
View File

@ -0,0 +1,159 @@
use super::Result;
use crate::arch::interrupt::TrapFrame;
use crate::arch::kprobe::KProbeContext;
use crate::bpf::helper::BPF_HELPER_FUN_SET;
use crate::bpf::prog::BpfProg;
use crate::debug::kprobe::args::KprobeInfo;
use crate::debug::kprobe::{register_kprobe, unregister_kprobe, LockKprobe};
use crate::filesystem::vfs::file::{File, PageCache};
use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
use crate::libs::casting::DowncastArc;
use crate::libs::spinlock::SpinLockGuard;
use crate::perf::util::PerfProbeArgs;
use crate::perf::PerfEventOps;
use alloc::boxed::Box;
use alloc::string::String;
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::any::Any;
use core::fmt::Debug;
use kprobe::{CallBackFunc, ProbeArgs};
use rbpf::EbpfVmRawOwned;
use system_error::SystemError;
#[derive(Debug)]
pub struct KprobePerfEvent {
_args: PerfProbeArgs,
kprobe: LockKprobe,
}
impl Drop for KprobePerfEvent {
fn drop(&mut self) {
unregister_kprobe(self.kprobe.clone());
}
}
impl KprobePerfEvent {
pub fn do_set_bpf_prog(&self, prog_file: Arc<File>) -> Result<()> {
let file = prog_file
.inode()
.downcast_arc::<BpfProg>()
.ok_or(SystemError::EINVAL)?;
let prog_slice = file.insns();
let mut vm =
EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|_| SystemError::EINVAL)?;
vm.register_helper_set(BPF_HELPER_FUN_SET.get())
.map_err(|_| SystemError::EINVAL)?;
// create a callback to execute the ebpf prog
let callback = Box::new(KprobePerfCallBack::new(file, vm));
// update callback for kprobe
self.kprobe.write().update_event_callback(callback);
Ok(())
}
}
pub struct KprobePerfCallBack {
_bpf_prog_file: Arc<BpfProg>,
vm: EbpfVmRawOwned,
}
impl KprobePerfCallBack {
fn new(bpf_prog_file: Arc<BpfProg>, vm: EbpfVmRawOwned) -> Self {
Self {
_bpf_prog_file: bpf_prog_file,
vm,
}
}
}
impl CallBackFunc for KprobePerfCallBack {
fn call(&self, trap_frame: &dyn ProbeArgs) {
let trap_frame = trap_frame.as_any().downcast_ref::<TrapFrame>().unwrap();
let pt_regs = KProbeContext::from(trap_frame);
let probe_context = unsafe {
core::slice::from_raw_parts_mut(
&pt_regs as *const KProbeContext as *mut u8,
size_of::<KProbeContext>(),
)
};
let _res = self
.vm
.execute_program(probe_context)
.map_err(|_| SystemError::EINVAL);
}
}
impl IndexNode for KprobePerfEvent {
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("read_at not implemented for PerfEvent");
}
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("write_at not implemented for PerfEvent");
}
fn fs(&self) -> Arc<dyn FileSystem> {
panic!("fs not implemented for PerfEvent");
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn list(&self) -> Result<Vec<String>> {
Err(SystemError::ENOSYS)
}
fn page_cache(&self) -> Option<Arc<PageCache>> {
None
}
}
impl PerfEventOps for KprobePerfEvent {
fn set_bpf_prog(&self, bpf_prog: Arc<File>) -> Result<()> {
self.do_set_bpf_prog(bpf_prog)
}
fn enable(&self) -> Result<()> {
self.kprobe.write().enable();
Ok(())
}
fn disable(&self) -> Result<()> {
self.kprobe.write().disable();
Ok(())
}
fn readable(&self) -> bool {
true
}
}
pub fn perf_event_open_kprobe(args: PerfProbeArgs) -> KprobePerfEvent {
let symbol = args.name.clone();
log::info!("create kprobe for symbol: {symbol}");
let kprobe_info = KprobeInfo {
pre_handler: |_| {},
post_handler: |_| {},
fault_handler: None,
event_callback: None,
symbol: Some(symbol),
addr: None,
offset: 0,
enable: false,
};
let kprobe = register_kprobe(kprobe_info).expect("create kprobe failed");
KprobePerfEvent {
_args: args,
kprobe,
}
}

337
kernel/src/perf/mod.rs Normal file
View File

@ -0,0 +1,337 @@
mod bpf;
mod kprobe;
mod util;
use crate::filesystem::vfs::file::{File, FileMode, PageCache};
use crate::filesystem::vfs::syscall::ModeType;
use crate::filesystem::vfs::{
FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
};
use crate::include::bindings::linux_bpf::{
perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
};
use crate::libs::casting::DowncastArc;
use crate::libs::spinlock::{SpinLock, SpinLockGuard};
use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
use crate::mm::VmFaultReason;
use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
use crate::perf::bpf::BpfPerfEvent;
use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
use crate::process::ProcessManager;
use crate::syscall::user_access::UserBufferReader;
use crate::syscall::Syscall;
use alloc::boxed::Box;
use alloc::collections::LinkedList;
use alloc::string::String;
use alloc::sync::{Arc, Weak};
use alloc::vec::Vec;
use core::any::Any;
use core::ffi::c_void;
use core::fmt::Debug;
use core::ops::Deref;
use intertrait::{CastFrom, CastFromSync};
use log::info;
use num_traits::FromPrimitive;
use system_error::SystemError;
type Result<T> = core::result::Result<T, SystemError>;
pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode {
/// Set the bpf program for the perf event
fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Enable the perf event
fn enable(&self) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Disable the perf event
fn disable(&self) -> Result<()> {
Err(SystemError::ENOSYS)
}
/// Whether the perf event is readable
fn readable(&self) -> bool;
}
#[derive(Debug)]
pub struct PerfEventInode {
event: Box<dyn PerfEventOps>,
epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
}
impl PerfEventInode {
pub fn new(event: Box<dyn PerfEventOps>) -> Self {
Self {
event,
epitems: SpinLock::new(LinkedList::new()),
}
}
pub fn remove_epoll(
&self,
epoll: &Weak<SpinLock<EventPoll>>,
) -> core::result::Result<(), SystemError> {
let is_remove = !self
.epitems
.lock_irqsave()
.extract_if(|x| x.epoll().ptr_eq(epoll))
.collect::<Vec<_>>()
.is_empty();
if is_remove {
return Ok(());
}
Err(SystemError::ENOENT)
}
fn do_poll(&self) -> Result<usize> {
let mut events = EPollEventType::empty();
if self.event.readable() {
events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
}
return Ok(events.bits() as usize);
}
fn epoll_callback(&self) -> Result<()> {
let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32);
// 唤醒epoll中等待的进程
EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))
}
}
impl Deref for PerfEventInode {
type Target = Box<dyn PerfEventOps>;
fn deref(&self) -> &Self::Target {
&self.event
}
}
impl IndexNode for PerfEventInode {
fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
self.event.mmap(start, len, offset)
}
fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
Ok(())
}
fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
Ok(())
}
fn read_at(
&self,
_offset: usize,
_len: usize,
_buf: &mut [u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("read_at not implemented for PerfEvent");
}
fn write_at(
&self,
_offset: usize,
_len: usize,
_buf: &[u8],
_data: SpinLockGuard<FilePrivateData>,
) -> Result<usize> {
panic!("write_at not implemented for PerfEvent");
}
fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
self.do_poll()
}
fn metadata(&self) -> Result<Metadata> {
let meta = Metadata {
mode: ModeType::from_bits_truncate(0o755),
file_type: FileType::File,
..Default::default()
};
Ok(meta)
}
fn resize(&self, _len: usize) -> Result<()> {
Ok(())
}
fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> {
let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?;
info!("perf_event_ioctl: request: {:?}, arg: {}", req, data);
match req {
PerfEventIoc::Enable => {
self.event.enable()?;
Ok(0)
}
PerfEventIoc::Disable => {
self.event.disable()?;
Ok(0)
}
PerfEventIoc::SetBpf => {
info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data);
let bpf_prog_fd = data;
let fd_table = ProcessManager::current_pcb().fd_table();
let file = fd_table
.read()
.get_file_by_fd(bpf_prog_fd as _)
.ok_or(SystemError::EBADF)?;
self.event.set_bpf_prog(file)?;
Ok(0)
}
}
}
fn kernel_ioctl(
&self,
arg: Arc<dyn KernelIoctlData>,
_data: &FilePrivateData,
) -> core::result::Result<usize, SystemError> {
let epitem = arg
.arc_any()
.downcast::<EPollItem>()
.map_err(|_| SystemError::EFAULT)?;
self.epitems.lock().push_back(epitem);
Ok(0)
}
fn fs(&self) -> Arc<dyn FileSystem> {
// panic!("PerfEvent does not have a filesystem")
Arc::new(PerfFakeFs)
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn list(&self) -> Result<Vec<String>> {
Err(SystemError::ENOSYS)
}
fn page_cache(&self) -> Option<Arc<PageCache>> {
self.event.page_cache()
}
}
#[derive(Debug)]
struct PerfFakeFs;
impl FileSystem for PerfFakeFs {
fn root_inode(&self) -> Arc<dyn IndexNode> {
panic!("PerfFakeFs does not have a root inode")
}
fn info(&self) -> FsInfo {
panic!("PerfFakeFs does not have a filesystem info")
}
fn as_any_ref(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"perf"
}
fn super_block(&self) -> SuperBlock {
panic!("PerfFakeFs does not have a super block")
}
unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
let res = PageFaultHandler::filemap_fault(pfm);
res
}
unsafe fn map_pages(
&self,
pfm: &mut PageFaultMessage,
start_pgoff: usize,
end_pgoff: usize,
) -> VmFaultReason {
PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff)
}
}
impl Syscall {
pub fn sys_perf_event_open(
attr: *const u8,
pid: i32,
cpu: i32,
group_fd: i32,
flags: u32,
) -> Result<usize> {
let buf = UserBufferReader::new(
attr as *const perf_event_attr,
size_of::<perf_event_attr>(),
true,
)?;
let attr = buf.read_one_from_user(0)?;
perf_event_open(attr, pid, cpu, group_fd, flags)
}
}
pub fn perf_event_open(
attr: &perf_event_attr,
pid: i32,
cpu: i32,
group_fd: i32,
flags: u32,
) -> Result<usize> {
let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?;
log::info!("perf_event_process: {:#?}", args);
let file_mode = if args
.flags
.contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC)
{
FileMode::O_RDWR | FileMode::O_CLOEXEC
} else {
FileMode::O_RDWR
};
let event: Box<dyn PerfEventOps> = match args.type_ {
// Kprobe
// See /sys/bus/event_source/devices/kprobe/type
perf_type_id::PERF_TYPE_MAX => {
let kprobe_event = kprobe::perf_event_open_kprobe(args);
Box::new(kprobe_event)
}
perf_type_id::PERF_TYPE_SOFTWARE => {
// For bpf prog output
assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
assert_eq!(
args.sample_type,
Some(perf_event_sample_format::PERF_SAMPLE_RAW)
);
let bpf_event = bpf::perf_event_open_bpf(args);
Box::new(bpf_event)
}
_ => {
unimplemented!("perf_event_process: unknown type: {:?}", args);
}
};
let page_cache = event.page_cache();
let perf_event = Arc::new(PerfEventInode::new(event));
if let Some(cache) = page_cache {
cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?;
}
let file = File::new(perf_event, file_mode)?;
let fd_table = ProcessManager::current_pcb().fd_table();
let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
Ok(fd)
}
pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> {
let file = get_perf_event_file(fd)?;
let bpf_event_file = file.deref().deref();
let bpf_event_file = bpf_event_file
.deref()
.ref_any()
.downcast_ref::<BpfPerfEvent>()
.ok_or(SystemError::EINVAL)?;
bpf_event_file.write_event(data)?;
file.epoll_callback()?;
Ok(())
}
fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> {
let fd_table = ProcessManager::current_pcb().fd_table();
let file = fd_table
.read()
.get_file_by_fd(fd as _)
.ok_or(SystemError::EBADF)?;
let event = file
.inode()
.downcast_arc::<PerfEventInode>()
.ok_or(SystemError::EINVAL)?;
Ok(event)
}

123
kernel/src/perf/util.rs Normal file
View File

@ -0,0 +1,123 @@
use crate::include::bindings::linux_bpf::{
perf_event_attr, perf_event_header, perf_event_sample_format, perf_sw_ids, perf_type_id,
};
use crate::syscall::user_access::check_and_clone_cstr;
use alloc::string::String;
use num_traits::FromPrimitive;
use system_error::SystemError;
bitflags! {
pub struct PerfEventOpenFlags: u32 {
const PERF_FLAG_FD_NO_GROUP = 1;
const PERF_FLAG_FD_OUTPUT = 2;
const PERF_FLAG_PID_CGROUP = 4;
const PERF_FLAG_FD_CLOEXEC = 8;
}
}
/// The `PerfEventIoc` enum is used to define the ioctl commands for perf events.
///
/// See https://elixir.bootlin.com/linux/v6.1/source/include/uapi/linux/perf_event.h#L544
#[repr(u32)]
#[derive(Debug, Copy, Clone, FromPrimitive)]
pub enum PerfEventIoc {
/// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_ENABLE].
Enable = 9216,
/// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_DISABLE].
Disable = 9217,
/// Equivalent to [crate::include::bindings::linux_bpf::AYA_PERF_EVENT_IOC_SET_BPF].
SetBpf = 1074013192,
}
#[derive(Debug, Clone)]
#[allow(unused)]
/// `perf_event_open` syscall arguments.
pub struct PerfProbeArgs {
pub config: perf_sw_ids,
pub name: String,
pub offset: u64,
pub size: u32,
pub type_: perf_type_id,
pub pid: i32,
pub cpu: i32,
pub group_fd: i32,
pub flags: PerfEventOpenFlags,
pub sample_type: Option<perf_event_sample_format>,
}
impl PerfProbeArgs {
pub fn try_from(
attr: &perf_event_attr,
pid: i32,
cpu: i32,
group_fd: i32,
flags: u32,
) -> Result<Self, SystemError> {
let ty = perf_type_id::from_u32(attr.type_).ok_or(SystemError::EINVAL)?;
let config = perf_sw_ids::from_u32(attr.config as u32).ok_or(SystemError::EINVAL)?;
let name = if ty == perf_type_id::PERF_TYPE_MAX {
let name_ptr = unsafe { attr.__bindgen_anon_3.config1 } as *const u8;
let name = check_and_clone_cstr(name_ptr, None)?;
name.into_string().map_err(|_| SystemError::EINVAL)?
} else {
String::new()
};
let sample_ty = perf_event_sample_format::from_u32(attr.sample_type as u32);
let args = PerfProbeArgs {
config,
name,
offset: unsafe { attr.__bindgen_anon_4.config2 },
size: attr.size,
type_: ty,
pid,
cpu,
group_fd,
flags: PerfEventOpenFlags::from_bits_truncate(flags),
sample_type: sample_ty,
};
Ok(args)
}
}
/// The event type in our particular use case will be `PERF_RECORD_SAMPLE` or `PERF_RECORD_LOST`.
/// `PERF_RECORD_SAMPLE` indicating that there is an actual sample after this header.
/// And `PERF_RECORD_LOST` indicating that there is a record lost header following the perf event header.
#[repr(C)]
#[derive(Debug)]
pub struct LostSamples {
pub header: perf_event_header,
pub id: u64,
pub count: u64,
}
impl LostSamples {
pub fn as_bytes(&self) -> &[u8] {
unsafe { core::slice::from_raw_parts(self as *const Self as *const u8, size_of::<Self>()) }
}
}
#[repr(C)]
#[derive(Debug)]
pub struct SampleHeader {
pub header: perf_event_header,
pub size: u32,
}
impl SampleHeader {
pub fn as_bytes(&self) -> &[u8] {
unsafe { core::slice::from_raw_parts(self as *const Self as *const u8, size_of::<Self>()) }
}
}
#[repr(C)]
#[derive(Debug)]
pub struct PerfSample<'a> {
pub s_hdr: SampleHeader,
pub value: &'a [u8],
}
impl<'a> PerfSample<'a> {
pub fn calculate_size(value_size: usize) -> usize {
size_of::<SampleHeader>() + value_size
}
}

View File

@ -134,7 +134,6 @@ impl SmpCpuManager {
&self.possible_cpus &self.possible_cpus
} }
#[allow(dead_code)]
pub fn possible_cpus_count(&self) -> u32 { pub fn possible_cpus_count(&self) -> u32 {
self.possible_cnt.load(core::sync::atomic::Ordering::SeqCst) self.possible_cnt.load(core::sync::atomic::Ordering::SeqCst)
} }

View File

@ -1159,6 +1159,20 @@ impl Syscall {
let flags = args[1] as u32; let flags = args[1] as u32;
Self::sys_eventfd(initval, flags) Self::sys_eventfd(initval, flags)
} }
SYS_BPF => {
let cmd = args[0] as u32;
let attr = args[1] as *mut u8;
let size = args[2] as u32;
Self::sys_bpf(cmd, attr, size)
}
SYS_PERF_EVENT_OPEN => {
let attr = args[0] as *const u8;
let pid = args[1] as i32;
let cpu = args[2] as i32;
let group_fd = args[3] as i32;
let flags = args[4] as u32;
Self::sys_perf_event_open(attr, pid, cpu, group_fd, flags)
}
_ => panic!("Unsupported syscall ID: {}", syscall_num), _ => panic!("Unsupported syscall ID: {}", syscall_num),
}; };

Some files were not shown because too many files have changed in this diff Show More