Introduce cargo osdk profile for flame graph profiling

This commit is contained in:
Zhang Junyang
2024-09-21 20:37:30 +08:00
committed by Tate, Hongliang Tian
parent ee1656ba35
commit eca9aacbf0
8 changed files with 781 additions and 27 deletions

View File

@ -5,13 +5,14 @@
mod build;
mod debug;
mod new;
mod profile;
mod run;
mod test;
mod util;
pub use self::{
build::execute_build_command, debug::execute_debug_command, new::execute_new_command,
run::execute_run_command, test::execute_test_command,
profile::execute_profile_command, run::execute_run_command, test::execute_test_command,
};
use crate::arch::get_default_arch;

View File

@ -0,0 +1,363 @@
// SPDX-License-Identifier: MPL-2.0
//! OSDK profile command implementation.
//!
//! The profile command is used to collect stack traces when running the target
//! kernel in QEMU. It attaches to the GDB server initiated with [`super::run`]
//! and collects the stack trace periodically. The collected data can be
//! further analyzed using tools like
//! [flame graph](https://github.com/brendangregg/FlameGraph).
use crate::{
cli::{ProfileArgs, ProfileFormat},
commands::util::bin_file_name,
util::{get_current_crate_info, get_target_directory},
};
use regex::Regex;
use std::{collections::HashMap, fs::File, io::Write, path::PathBuf, process::Command};
pub fn execute_profile_command(_profile: &str, args: &ProfileArgs) {
if let Some(parse_input) = &args.parse {
do_parse_stack_traces(parse_input, args);
} else {
do_collect_stack_traces(args);
}
}
fn do_parse_stack_traces(target_file: &PathBuf, args: &ProfileArgs) {
let out_args = &args.out_args;
let in_file = File::open(target_file).expect("Failed to open input file");
let profile: Profile =
serde_json::from_reader(in_file).expect("Failed to parse the input JSON file");
let out_file = File::create(out_args.output_path(Some(target_file)))
.expect("Failed to create output file");
let out_format = out_args.format();
if matches!(out_format, ProfileFormat::Json) {
println!("Warning: parsing JSON profile to the same format.");
return;
}
profile.serialize_to(out_format, out_args.cpu_mask, out_file);
}
fn do_collect_stack_traces(args: &ProfileArgs) {
let file_path = get_target_directory()
.join("osdk")
.join(get_current_crate_info().name)
.join(bin_file_name());
let remote = &args.remote;
let samples = &args.samples;
let interval = &args.interval;
let mut profile_buffer = ProfileBuffer::new();
println!("Profiling \"{}\" at \"{}\".", file_path.display(), remote);
use indicatif::{ProgressIterator, ProgressStyle};
let style = ProgressStyle::default_bar().progress_chars("#>-");
for _ in (0..*samples).progress_with_style(style) {
// Use GDB to halt the remote, get stack traces, and resume
let output = Command::new("gdb")
.args([
"-batch",
"-ex",
"set pagination 0",
"-ex",
&format!("file {}", file_path.display()),
"-ex",
&format!("target remote {}", remote),
"-ex",
"thread apply all bt -frame-arguments presence -frame-info short-location",
])
.output()
.expect("Failed to execute gdb");
for line in String::from_utf8_lossy(&output.stdout).lines() {
profile_buffer.append_raw_line(line);
}
// Sleep between samples
std::thread::sleep(std::time::Duration::from_secs_f64(*interval));
}
let out_args = &args.out_args;
let out_path = out_args.output_path(None);
println!(
"Profile data collected. Writing the output to \"{}\".",
out_path.display()
);
let out_file = File::create(out_path).expect("Failed to create output file");
profile_buffer
.cur_profile
.serialize_to(out_args.format(), out_args.cpu_mask, out_file);
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Profile {
// Index 0: capture; Index 1: CPU ID; Index 2: stack frame
stack_traces: Vec<HashMap<u32, Vec<String>>>,
}
impl Profile {
fn serialize_to<W: Write>(&self, format: ProfileFormat, cpu_mask: u128, mut target: W) {
match format {
ProfileFormat::Folded => {
let mut folded = HashMap::new();
// Process each stack trace and fold it for flame graph format
for capture in &self.stack_traces {
for (cpu_id, stack) in capture {
if *cpu_id >= 128 || cpu_mask & (1u128 << *cpu_id) == 0 {
continue;
}
// Fold the stack trace
let folded_key = stack.iter().rev().cloned().collect::<Vec<_>>().join(";");
*folded.entry(folded_key).or_insert(0) += 1;
}
}
// Write the folded traces
for (key, count) in folded {
writeln!(&mut target, "{} {}", key, count)
.expect("Failed to write folded output");
}
}
ProfileFormat::Json => {
// Filter out the stack traces based on the CPU mask
let filtered_traces = self
.stack_traces
.iter()
.map(|capture| {
capture
.iter()
.filter(|(cpu_id, _)| {
**cpu_id < 128 && cpu_mask & (1u128 << **cpu_id) != 0
})
.map(|(cpu_id, stack)| (*cpu_id, stack.clone()))
.collect::<HashMap<_, _>>()
})
.collect::<Vec<_>>();
let filtered = Profile {
stack_traces: filtered_traces,
};
serde_json::to_writer(target, &filtered).expect("Failed to write JSON output");
}
}
}
}
#[derive(Debug)]
struct ProfileBuffer {
cur_profile: Profile,
// Pre-compile regex patterns for cleaning the input.
hex_in_pattern: Regex,
impl_pattern: Regex,
// The state
cur_cpu: Option<u32>,
}
impl ProfileBuffer {
fn new() -> Self {
Self {
cur_profile: Profile::default(),
hex_in_pattern: Regex::new(r"0x[0-9a-f]+ in").unwrap(),
impl_pattern: Regex::new(r"::\{.*?\}").unwrap(),
cur_cpu: None,
}
}
fn append_raw_line(&mut self, line: &str) {
// Lines starting with '#' are stack frames
if !line.starts_with('#') {
// Otherwise it may initiate a new capture or a new CPU stack trace
// Check if this is a new CPU trace (starts with `Thread` and contains `CPU#N`)
if line.starts_with("Thread") {
let cpu_id_idx = line.find("CPU#").unwrap();
let cpu_id = line[cpu_id_idx + 4..]
.split_whitespace()
.next()
.unwrap()
.parse::<u32>()
.unwrap();
self.cur_cpu = Some(cpu_id);
// if the new CPU id is already in the stack traces, start a new capture
match self.cur_profile.stack_traces.last() {
Some(capture) => {
if capture.contains_key(&cpu_id) {
self.cur_profile.stack_traces.push(HashMap::new());
}
}
None => {
self.cur_profile.stack_traces.push(HashMap::new());
}
}
}
return;
}
// Clean the input line
let mut processed = line.trim().to_string();
// Remove everything between angle brackets '<...>'
processed = Self::remove_generics(&processed);
// Remove "::impl{}" and hex addresses
processed = self.impl_pattern.replace_all(&processed, "").to_string();
processed = self.hex_in_pattern.replace_all(&processed, "").to_string();
// Remove unnecessary parts like "()" and "(...)"
processed = processed.replace("(...)", "");
processed = processed.replace("()", "");
// Split the line by spaces and expect the second part to be the function name
let parts: Vec<&str> = processed.split_whitespace().collect();
if parts.len() > 1 {
let func_name = parts[1].to_string();
// Append the function name to the latest stack trace
let current_capture = self.cur_profile.stack_traces.last_mut().unwrap();
let cur_cpu = self.cur_cpu.unwrap();
current_capture.entry(cur_cpu).or_default().push(func_name);
}
}
fn remove_generics(line: &str) -> String {
let mut result = String::new();
let mut bracket_depth = 0;
for c in line.chars() {
match c {
'<' => bracket_depth += 1,
'>' => {
if bracket_depth > 0 {
bracket_depth -= 1;
}
}
_ => {
if bracket_depth == 0 {
result.push(c);
}
}
}
}
result
}
}
#[cfg(test)]
#[test]
fn test_profile_parse_raw() {
let test_case = r#"
0xffffffff880b0f6f in aster_nix::sched::priority_scheduler::{impl#4}::pick_next_current<ostd::task::Task> (self=0xffffffff88489808 <_ZN4ostd2mm14heap_allocator10HEAP_SPACE17h85a5340e6564f69dE.llvm.15305379556759765072+992480>) at src/sched/priority_scheduler.rs:156
156 let next_entity = if !self.real_time_entities.is_empty() {
Thread 2 (Thread 1.2 (CPU#1 [running])):
#0 ostd::sync::spin::SpinLock<aster_nix::sched::priority_scheduler::PreemptRunQueue<ostd::task::Task>, ostd::sync::spin::PreemptDisabled>::acquire_lock<aster_nix::sched::priority_scheduler::PreemptRunQueue<ostd::task::Task>, ostd::sync::spin::PreemptDisabled> (...)
#1 ostd::sync::spin::SpinLock<aster_nix::sched::priority_scheduler::PreemptRunQueue<ostd::task::Task>, ostd::sync::spin::PreemptDisabled>::lock<aster_nix::sched::priority_scheduler::PreemptRunQueue<ostd::task::Task>, ostd::sync::spin::PreemptDisabled> (...)
#2 aster_nix::sched::priority_scheduler::{impl#1}::local_mut_rq_with<ostd::task::Task> (...)
#3 0xffffffff8826b205 in ostd::task::scheduler::reschedule<ostd::task::scheduler::yield_now::{closure_env#0}> (...)
#4 ostd::task::scheduler::yield_now ()
#5 0xffffffff880a92c5 in ostd::task::Task::yield_now ()
#6 aster_nix::thread::Thread::yield_now ()
#7 aster_nix::ap_init::ap_idle_thread ()
#8 core::ops::function::Fn::call<fn(), ()> ()
#9 0xffffffff880b341e in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#10 aster_nix::thread::kernel_thread::create_new_kernel_task::{closure#0} ()
#11 0xffffffff882a3ea8 in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#12 ostd::task::{impl#2}::build::kernel_task_entry ()
#13 0x0000000000000000 in ?? ()
Thread 1 (Thread 1.1 (CPU#0 [running])):
#0 aster_nix::sched::priority_scheduler::{impl#1}::local_mut_rq_with<ostd::task::Task> (...)
#1 0xffffffff8826b205 in ostd::task::scheduler::reschedule<ostd::task::scheduler::yield_now::{closure_env#0}> (...)
#2 ostd::task::scheduler::yield_now ()
#3 0xffffffff880a92c5 in ostd::task::Task::yield_now ()
#4 aster_nix::thread::Thread::yield_now ()
#5 aster_nix::ap_init::ap_idle_thread ()
#6 core::ops::function::Fn::call<fn(), ()> ()
#7 0xffffffff880b341e in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#8 aster_nix::thread::kernel_thread::create_new_kernel_task::{closure#0} ()
#9 0xffffffff882a3ea8 in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#10 ostd::task::{impl#2}::build::kernel_task_entry ()
#11 0x0000000000000000 in ?? ()
[Inferior 1 (process 1) detached]
0xffffffff880b0f6f in aster_nix::sched::priority_scheduler::{impl#4}::pick_next_current<ostd::task::Task> (self=0xffffffff88489808 <_ZN4ostd2mm14heap_allocator10HEAP_SPACE17h85a5340e6564f69dE.llvm.15305379556759765072+992480>) at src/sched/priority_scheduler.rs:156
156 let next_entity = if !self.real_time_entities.is_empty() {
Thread 2 (Thread 1.2 (CPU#1 [running])):
#0 0xffffffff880b0f6f in aster_nix::sched::priority_scheduler::{impl#4}::pick_next_current<ostd::task::Task> (...)
#1 0xffffffff8826b3e0 in ostd::task::scheduler::yield_now::{closure#0} (...)
#2 ostd::task::scheduler::reschedule::{closure#0}<ostd::task::scheduler::yield_now::{closure_env#0}> (...)
#3 0xffffffff880b0cff in aster_nix::sched::priority_scheduler::{impl#1}::local_mut_rq_with<ostd::task::Task> (...)
#4 0xffffffff8826b205 in ostd::task::scheduler::reschedule<ostd::task::scheduler::yield_now::{closure_env#0}> (...)
#5 ostd::task::scheduler::yield_now ()
#6 0xffffffff880a92c5 in ostd::task::Task::yield_now ()
#7 aster_nix::thread::Thread::yield_now ()
#8 aster_nix::ap_init::ap_idle_thread ()
#9 core::ops::function::Fn::call<fn(), ()> ()
#10 0xffffffff880b341e in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#11 aster_nix::thread::kernel_thread::create_new_kernel_task::{closure#0} ()
#12 0xffffffff882a3ea8 in alloc::boxed::{impl#50}::call<(), (dyn core::ops::function::Fn<(), Output=()> + core::marker::Send + core::marker::Sync), alloc::alloc::Global> (...)
#13 ostd::task::{impl#2}::build::kernel_task_entry ()
#14 0x0000000000000000 in ?? ()
Thread 1 (Thread 1.1 (CPU#0 [running])):
#0 ostd::arch::x86::interrupts_ack (...)
#1 0xffffffff8828d704 in ostd::trap::handler::call_irq_callback_functions (...)
#2 0xffffffff88268e48 in ostd::arch::x86::trap::trap_handler (...)
#3 0xffffffff88274db6 in __from_kernel ()
#4 0x0000000000000001 in ?? ()
#5 0x0000000000000001 in ?? ()
#6 0x00000000000001c4 in ?? ()
#7 0xffffffff882c8580 in ?? ()
#8 0x0000000000000002 in ?? ()
#9 0xffffffff88489808 in _ZN4ostd2mm14heap_allocator10HEAP_SPACE17h85a5340e6564f69dE.llvm.15305379556759765072 ()
#10 0x0000000000000000 in ?? ()
[Inferior 1 (process 1) detached]
"#;
let mut buffer = ProfileBuffer::new();
for line in test_case.lines() {
buffer.append_raw_line(line);
}
let profile = &buffer.cur_profile;
assert_eq!(profile.stack_traces.len(), 2);
assert_eq!(profile.stack_traces[0].len(), 2);
assert_eq!(profile.stack_traces[1].len(), 2);
let stack00 = profile.stack_traces[0].get(&0).unwrap();
assert_eq!(stack00.len(), 12);
assert_eq!(
stack00[0],
"aster_nix::sched::priority_scheduler::local_mut_rq_with"
);
assert_eq!(stack00[11], "??");
let stack01 = profile.stack_traces[0].get(&1).unwrap();
assert_eq!(stack01.len(), 14);
assert_eq!(stack01[9], "alloc::boxed::call");
let stack10 = profile.stack_traces[1].get(&0).unwrap();
assert_eq!(stack10.len(), 11);
assert_eq!(
stack10[9],
"_ZN4ostd2mm14heap_allocator10HEAP_SPACE17h85a5340e6564f69dE.llvm.15305379556759765072"
);
let stack11 = profile.stack_traces[1].get(&1).unwrap();
assert_eq!(stack11.len(), 15);
assert_eq!(
stack11[0],
"aster_nix::sched::priority_scheduler::pick_next_current"
);
assert_eq!(stack11[14], "??");
}

View File

@ -8,7 +8,7 @@ use crate::{
};
pub fn execute_run_command(config: &Config, gdb_server_args: &GdbServerArgs) {
if gdb_server_args.is_gdb_enabled {
if gdb_server_args.enabled {
use std::env;
env::set_var(
"RUSTFLAGS",
@ -21,19 +21,19 @@ pub fn execute_run_command(config: &Config, gdb_server_args: &GdbServerArgs) {
let target_name = get_current_crate_info().name;
let mut config = config.clone();
if gdb_server_args.is_gdb_enabled {
if gdb_server_args.enabled {
let qemu_gdb_args = {
let gdb_stub_addr = gdb_server_args.gdb_server_addr.as_str();
let gdb_stub_addr = gdb_server_args.host_addr.as_str();
match gdb::stub_type_of(gdb_stub_addr) {
gdb::StubAddrType::Unix => {
format!(
" -chardev socket,path={},server=on,wait=off,id=gdb0 -gdb chardev:gdb0 -S",
" -chardev socket,path={},server=on,wait=off,id=gdb0 -gdb chardev:gdb0",
gdb_stub_addr
)
}
gdb::StubAddrType::Tcp => {
format!(
" -gdb tcp:{} -S",
" -gdb tcp:{}",
gdb::tcp_addr_util::format_tcp_addr(gdb_stub_addr)
)
}
@ -41,6 +41,10 @@ pub fn execute_run_command(config: &Config, gdb_server_args: &GdbServerArgs) {
};
config.run.qemu.args += &qemu_gdb_args;
if gdb_server_args.wait_client {
config.run.qemu.args += " -S";
}
// Ensure debug info added when debugging in the release profile.
if config.run.build.profile.contains("release") {
config
@ -53,7 +57,7 @@ pub fn execute_run_command(config: &Config, gdb_server_args: &GdbServerArgs) {
let _vsc_launch_file = gdb_server_args.vsc_launch_file.then(|| {
vsc::check_gdb_config(gdb_server_args);
let profile = super::util::profile_name_adapter(&config.run.build.profile);
vsc::VscLaunchConfig::new(profile, &gdb_server_args.gdb_server_addr)
vsc::VscLaunchConfig::new(profile, &gdb_server_args.host_addr)
});
let default_bundle_directory = osdk_output_directory.join(target_name);
@ -205,7 +209,7 @@ mod vsc {
use crate::{error::Errno, error_msg};
use std::process::exit;
if !args.is_gdb_enabled {
if !args.enabled {
error_msg!(
"No need for a VSCode launch file without launching GDB server,\
pass '-h' for help"
@ -214,7 +218,7 @@ mod vsc {
}
// check GDB server address
let gdb_stub_addr = args.gdb_server_addr.as_str();
let gdb_stub_addr = args.host_addr.as_str();
if gdb_stub_addr.is_empty() {
error_msg!("GDB server address is required to generate a VSCode launch file");
exit(Errno::ParseMetadata as _);