feat(ebpf):[WIP] add eBPF support (#948)

* feat(kprobe): Add basic kprobe support for x86_64

* feat: add ebpf support (#912)

- 实现bpf()一部分命令,包括几种基本map,相关的helper函数
- 实现部分perf相关的数据结构
- 暂时为文件实现简单mmap
- 实现一个使用kprobe统计syscall 调用次数的ebpf程序

对eBPF支持程度(基本):

- 简单的eBPF程序(没有指定特殊的Map)
- 使用内核已经实现的Map的eBPF程序
- 可以和kprobe配合使用
- 内核Map相关的接口定义已经实现,添加新的Map较为简单

不支持的功能:
- 区分不同的eBPF程序类型(Network/Cgroup)并限定可调用的helper函数集
- 与内核其它跟踪机制配合(tracepoint)
- 其它helper和Map


todo

- [ ]  修改mmap,需要讨论,因为这个和块缓存层相关
- [x]  添加文档
- [x]  修复可能的错误
- [x] 增加rbpf版本信息

* feat: add /sys/devices/system/cpu/possible file

* feat: add /sys/devices/system/cpu/online
This commit is contained in:
linfeng
2024-10-25 15:59:57 +08:00
committed by GitHub
parent 80c9e8f8f0
commit fae6e9ade4
126 changed files with 29529 additions and 62 deletions

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
extern crate rbpf;
use rbpf::disassembler;
// Simply disassemble a program into human-readable instructions.
fn main() {
let prog = &[
0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x12, 0x50, 0x00, 0x00, 0x00, 0x00,
0x00, 0x79, 0x11, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf, 0x13, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x07, 0x03, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x2d, 0x23, 0x12, 0x00, 0x00,
0x00, 0x00, 0x00, 0x69, 0x12, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x02, 0x10, 0x00,
0x08, 0x00, 0x00, 0x00, 0x71, 0x12, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x02, 0x0e,
0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x11, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf,
0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0x02, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
0x15, 0x02, 0x08, 0x00, 0x99, 0x99, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x00, 0x00, 0xff,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x21, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x18, 0x02, 0x00, 0x00, 0x00,
0x00, 0x99, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x21, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
];
disassembler::disassemble(prog);
}

View File

@ -0,0 +1,3 @@
fn main() {
rbpf::helpers::show_helper();
}

View File

@ -0,0 +1,115 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
#![allow(clippy::unreadable_literal)]
extern crate elf;
use std::path::PathBuf;
extern crate rbpf;
use rbpf::helpers;
// The following example uses an ELF file that has been compiled from the C program available in
// `load_elf__block_a_port.c` in the same directory.
//
// It was compiled with the following command:
//
// ```bash
// clang -O2 -emit-llvm -c load_elf__block_a_port.c -o - | \
// llc -march=bpf -filetype=obj -o load_elf__block_a_port.o
// ```
//
// Once compiled, this program can be injected into Linux kernel, with tc for instance. Sadly, we
// need to bring some modifications to the generated bytecode in order to run it: the three
// instructions with opcode 0x61 load data from a packet area as 4-byte words, where we need to
// load it as 8-bytes double words (0x79). The kernel does the same kind of translation before
// running the program, but rbpf does not implement this.
//
// In addition, the offset at which the pointer to the packet data is stored must be changed: since
// we use 8 bytes instead of 4 for the start and end addresses of the data packet, we cannot use
// the offsets produced by clang (0x4c and 0x50), the addresses would overlap. Instead we can use,
// for example, 0x40 and 0x50.
//
// These change were applied with the following script:
//
// ```bash
// xxd load_elf__block_a_port.o | sed '
// s/6112 5000 0000 0000/7912 5000 0000 0000/ ;
// s/6111 4c00 0000 0000/7911 4000 0000 0000/ ;
// s/6111 2200 0000 0000/7911 2200 0000 0000/' | xxd -r > load_elf__block_a_port.tmp
// mv load_elf__block_a_port.tmp load_elf__block_a_port.o
// ```
//
// The eBPF program was placed into the `.classifier` ELF section (see C code above), which means
// that you can retrieve the raw bytecode with `readelf -x .classifier load_elf__block_a_port.o` or
// with `objdump -s -j .classifier load_elf__block_a_port.o`.
//
// Once the bytecode has been edited, we can load the bytecode directly from the ELF object file.
fn main() {
let filename = "examples/load_elf__block_a_port.elf";
let path = PathBuf::from(filename);
let file = match elf::File::open_path(path) {
Ok(f) => f,
Err(e) => panic!("Error: {:?}", e),
};
let text_scn = match file.get_section(".classifier") {
Some(s) => s,
None => panic!("Failed to look up .classifier section"),
};
let prog = &text_scn.data;
let packet1 = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x99, 0x99, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let packet2 = &mut [
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x08,
0x00, // ethertype
0x45, 0x00, 0x00, 0x3b, // start ip_hdr
0xa6, 0xab, 0x40, 0x00, 0x40, 0x06, 0x96, 0x0f, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00,
0x01,
// Program matches the next two bytes: 0x9999 returns 0xffffffff, else return 0.
0x98, 0x76, 0xc6, 0xcc, // start tcp_hdr
0xd1, 0xe5, 0xc4, 0x9d, 0xd4, 0x30, 0xb5, 0xd2, 0x80, 0x18, 0x01, 0x56, 0xfe, 0x2f, 0x00,
0x00, 0x01, 0x01, 0x08, 0x0a, // start data
0x00, 0x23, 0x75, 0x89, 0x00, 0x23, 0x63, 0x2d, 0x71, 0x64, 0x66, 0x73, 0x64, 0x66, 0x0au8,
];
let mut vm = rbpf::EbpfVmFixedMbuff::new(Some(prog), 0x40, 0x50).unwrap();
vm.register_helper(helpers::BPF_TRACE_PRINTK_IDX, helpers::bpf_trace_printf)
.unwrap();
let res = vm.execute_program(packet1).unwrap();
println!("Packet #1, program returned: {res:?} ({res:#x})");
assert_eq!(res, 0xffffffff);
#[cfg(not(windows))]
{
vm.jit_compile().unwrap();
let res = unsafe { vm.execute_program_jit(packet2).unwrap() };
println!("Packet #2, program returned: {res:?} ({res:#x})");
assert_eq!(res, 0);
}
#[cfg(windows)]
{
let res = vm.execute_program(packet2).unwrap();
println!("Packet #2, program returned: {:?} ({:#x})", res, res);
assert_eq!(res, 0);
}
}

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: (APACHE-2.0 OR MIT)
// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
// Block TCP packets on source or destination port 0x9999.
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#define ETH_ALEN 6
#define ETH_P_IP 0x0008 /* htons(0x0800) */
#define TCP_HDR_LEN 20
#define BLOCKED_TCP_PORT 0x9999
struct eth_hdr {
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
unsigned short h_proto;
};
#define SEC(NAME) __attribute__((section(NAME), used))
SEC(".classifier")
int handle_ingress(struct __sk_buff *skb)
{
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct eth_hdr *eth = data;
struct iphdr *iph = data + sizeof(*eth);
struct tcphdr *tcp = data + sizeof(*eth) + sizeof(*iph);
/* single length check */
if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*tcp) > data_end)
return 0;
if (eth->h_proto != ETH_P_IP)
return 0;
if (iph->protocol != IPPROTO_TCP)
return 0;
if (tcp->source == BLOCKED_TCP_PORT || tcp->dest == BLOCKED_TCP_PORT)
return -1;
return 0;
}

View File

@ -0,0 +1,126 @@
// Copyright Microsoft Corporation
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Path: examples/rbpf_plugin.rs
use std::io::Read;
// Helper function used by https://github.com/Alan-Jowett/bpf_conformance/blob/main/tests/call_unwind_fail.data
fn _unwind(a: u64, _b: u64, _c: u64, _d: u64, _e: u64) -> u64 {
a
}
// This is a plugin for the bpf_conformance test suite (https://github.com/Alan-Jowett/bpf_conformance)
// It accepts a single argument, the memory contents to pass to the VM.
// It reads the program from stdin.
fn main() {
let mut args: Vec<String> = std::env::args().collect();
#[allow(unused_mut)] // In no_std the jit variable isn't mutated.
let mut jit: bool = false;
let mut cranelift: bool = false;
let mut program_text = String::new();
let mut memory_text = String::new();
args.remove(0);
// Memory is always the first argument.
if !args.is_empty() {
memory_text.clone_from(&args[0]);
// Strip whitespace
memory_text.retain(|c| !c.is_whitespace());
args.remove(0);
}
// Process the rest of the arguments.
while !args.is_empty() {
match args[0].as_str() {
"--help" => {
println!("Usage: rbpf_plugin [memory] < program");
return;
}
"--jit" => {
#[cfg(any(windows, not(feature = "std")))]
{
println!("JIT not supported");
return;
}
#[cfg(all(not(windows), feature = "std"))]
{
jit = true;
}
}
"--cranelift" => {
cranelift = true;
#[cfg(not(feature = "cranelift"))]
{
let _ = cranelift;
println!("Cranelift is not enabled");
return;
}
}
"--program" => {
if args.len() < 2 {
println!("Missing argument to --program");
return;
}
args.remove(0);
if !args.is_empty() {
program_text.clone_from(&args[0]);
args.remove(0);
}
}
_ => panic!("Unknown argument {}", args[0]),
}
args.remove(0);
}
if program_text.is_empty() {
// Read program text from stdin
std::io::stdin().read_to_string(&mut program_text).unwrap();
}
// Strip whitespace
program_text.retain(|c| !c.is_whitespace());
// Convert program from hex to bytecode
let bytecode = hex::decode(program_text).unwrap();
// Convert memory from hex to bytes
let mut memory: Vec<u8> = hex::decode(memory_text).unwrap();
// Create rbpf vm
let mut vm = rbpf::EbpfVmRaw::new(Some(&bytecode)).unwrap();
// Register the helper function used by call_unwind_fail.data test.
vm.register_helper(5, _unwind).unwrap();
let result: u64;
if jit {
#[cfg(any(windows, not(feature = "std")))]
{
println!("JIT not supported");
return;
}
#[cfg(all(not(windows), feature = "std"))]
{
unsafe {
vm.jit_compile().unwrap();
result = vm.execute_program_jit(&mut memory).unwrap();
}
}
} else if cranelift {
#[cfg(not(feature = "cranelift"))]
{
println!("Cranelift is not enabled");
return;
}
#[cfg(feature = "cranelift")]
{
vm.cranelift_compile().unwrap();
result = vm.execute_program_cranelift(&mut memory).unwrap();
}
} else {
result = vm.execute_program(&mut memory).unwrap();
}
println!("{result:x}");
}

View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
#[macro_use]
extern crate json;
extern crate elf;
use std::path::PathBuf;
extern crate rbpf;
use rbpf::disassembler;
// Turn a program into a JSON string.
//
// Relies on `json` crate.
//
// You may copy this function and adapt it according to your needs. For instance, you may want to:
//
// * Remove the "desc" (description) attributes from the output.
// * Print integers as integers, and not as strings containing their hexadecimal representation
// (just replace the relevant `format!()` calls by the commented values.
fn to_json(prog: &[u8]) -> String {
// This call returns a high-level representation of the instructions, with the two parts of
// `LD_DW_IMM` instructions merged, and name and descriptions of the instructions.
// If you prefer to use a lower-level representation, use `ebpf::to_insn_vec()` function
// instead.
let insns = disassembler::to_insn_vec(prog);
let mut json_insns = vec![];
for insn in insns {
json_insns.push(object!(
"opc" => format!("{:#x}", insn.opc), // => insn.opc,
"dst" => format!("{:#x}", insn.dst), // => insn.dst,
"src" => format!("{:#x}", insn.src), // => insn.src,
"off" => format!("{:#x}", insn.off), // => insn.off,
// Warning: for imm we use a i64 instead of a i32 (to have correct values for
// `lddw` operation. If we print a number in the JSON this is not a problem, the
// internal i64 has the same value with extended sign on 32 most significant bytes.
// If we print the hexadecimal value as a string however, we want to cast as a i32
// to prevent all other instructions to print spurious `ffffffff` prefix if the
// number is negative. When values takes more than 32 bits with `lddw`, the cast
// has no effect and the complete value is printed anyway.
"imm" => format!("{:#x}", insn.imm as i32), // => insn.imm,
"desc" => insn.desc
));
}
json::stringify_pretty(
object!(
"size" => json_insns.len(),
"insns" => json_insns
),
4,
)
}
// Load a program from an object file, and prints it to standard output as a JSON string.
fn main() {
// Let's reuse this file from `load_elf/example`.
let filename = "examples/load_elf__block_a_port.elf";
let path = PathBuf::from(filename);
let file = match elf::File::open_path(path) {
Ok(f) => f,
Err(e) => panic!("Error: {:?}", e),
};
let text_scn = match file.get_section(".classifier") {
Some(s) => s,
None => panic!("Failed to look up .classifier section"),
};
let prog = &text_scn.data;
println!("{}", to_json(prog));
}

View File

@ -0,0 +1,78 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
extern crate rbpf;
use rbpf::helpers;
// The main objectives of this example is to show:
//
// * the use of EbpfVmNoData function,
// * and the use of a helper.
//
// The two eBPF programs are independent and are not related to one another.
fn main() {
let prog1 = &[
0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov32 r0, 0
0xb4, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // mov32 r1, 2
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // add32 r0, 1
0x0c, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // add32 r0, r1
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit and return r0
];
// We use helper `bpf_time_getns()`, which is similar to helper `bpf_ktime_getns()` from Linux
// kernel. Hence rbpf::helpers module provides the index of this in-kernel helper as a
// constant, so that we can remain compatible with programs for the kernel. Here we also cast
// it to a u8 so as to use it directly in program instructions.
let hkey = helpers::BPF_KTIME_GETNS_IDX as u8;
let prog2 = &[
0xb7, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0xb7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, 0
0x85, 0x00, 0x00, 0x00, hkey, 0x00, 0x00, 0x00, // call helper <hkey>
0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit and return r0
];
// Create a VM: this one takes no data. Load prog1 in it.
let mut vm = rbpf::EbpfVmNoData::new(Some(prog1)).unwrap();
// Execute prog1.
assert_eq!(vm.execute_program().unwrap(), 0x3);
// As struct EbpfVmNoData does not takes any memory area, its return value is mostly
// deterministic. So we know prog1 will always return 3. There is an exception: when it uses
// helpers, the latter may have non-deterministic values, and all calls may not return the same
// value.
//
// In the following example we use a helper to get the elapsed time since boot time: we
// reimplement uptime in eBPF, in Rust. Because why not.
vm.set_program(prog2).unwrap();
vm.register_helper(helpers::BPF_KTIME_GETNS_IDX, helpers::bpf_time_getns)
.unwrap();
let time;
#[cfg(all(not(windows), feature = "std"))]
{
vm.jit_compile().unwrap();
time = unsafe { vm.execute_program_jit().unwrap() };
}
#[cfg(any(windows, not(feature = "std")))]
{
time = vm.execute_program().unwrap();
}
let days = time / 10u64.pow(9) / 60 / 60 / 24;
let hours = (time / 10u64.pow(9) / 60 / 60) % 24;
let minutes = (time / 10u64.pow(9) / 60) % 60;
let seconds = (time / 10u64.pow(9)) % 60;
let nanosec = time % 10u64.pow(9);
println!(
"Uptime: {:#x} ns == {} days {:02}:{:02}:{:02}, {} ns",
time, days, hours, minutes, seconds, nanosec
);
}