diff --git a/Makefile b/Makefile index 768bcb6a..4f2185e9 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ OSTD_TASK_STACK_SIZE_IN_PAGES ?= 64 # GDB debugging and profiling options. GDB_TCP_PORT ?= 1234 -GDB_PROFILE_FORMAT ?= folded +GDB_PROFILE_FORMAT ?= flame-graph GDB_PROFILE_COUNT ?= 200 GDB_PROFILE_INTERVAL ?= 0.1 # End of GDB options. diff --git a/docs/src/osdk/reference/commands/profile.md b/docs/src/osdk/reference/commands/profile.md index 96de52ce..daf6d398 100644 --- a/docs/src/osdk/reference/commands/profile.md +++ b/docs/src/osdk/reference/commands/profile.md @@ -4,9 +4,9 @@ The profile command is used to collect stack traces when running the target kernel in QEMU. It attaches to the GDB server initiated with the run subcommand -and collects the stack trace periodically. The collected data can be -further analyzed using tools like -[flame graph](https://github.com/brendangregg/FlameGraph). +and collects the stack trace periodically. The collected information can be +used to directly generate a flame graph, or be stored for later analysis using +[the original flame graph tool](https://github.com/brendangregg/FlameGraph). ## Options @@ -33,10 +33,11 @@ Parse a collected JSON profile file into other formats. Possible values: - `json`: The parsed stack trace log from GDB in JSON. - `folded`: The folded stack trace for flame graph. + - `flame-graph`: A SVG flame graph. If the user does not specify the format, it will be inferred from the output file extension. If the output file does not have an extension, -the default format is folded stack traces. +the default format is flame graph. `--cpu-mask `: diff --git a/osdk/Cargo.lock b/osdk/Cargo.lock index 0594daf5..c50b84b4 100644 --- a/osdk/Cargo.lock +++ b/osdk/Cargo.lock @@ -15,6 +15,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "getrandom", "once_cell", "version_check", "zerocopy", @@ -98,6 +99,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert_cmd" version = "2.0.14" @@ -125,6 +132,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + [[package]] name = "block-buffer" version = "0.10.4" @@ -181,6 +194,7 @@ dependencies = [ "env_logger", "indexmap", "indicatif", + "inferno", "lazy_static", "linux-bzimage-builder", "log", @@ -316,6 +330,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crypto-common" version = "0.1.6" @@ -332,6 +361,20 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "difflib" version = "0.4.0" @@ -399,6 +442,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -415,6 +469,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "humantime" version = "2.1.0" @@ -467,6 +527,29 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "inferno" +version = "0.11.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" +dependencies = [ + "ahash", + "clap", + "crossbeam-channel", + "crossbeam-utils", + "dashmap", + "env_logger", + "indexmap", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml", + "rgb", + "str_stack", +] + [[package]] name = "instant" version = "0.1.13" @@ -476,6 +559,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "itoa" version = "1.0.10" @@ -499,9 +593,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libflate" @@ -531,13 +625,23 @@ dependencies = [ name = "linux-bzimage-builder" version = "0.2.0" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bytemuck", "libflate", "serde", "xmas-elf", ] +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.20" @@ -550,6 +654,16 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -571,6 +685,19 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "portable-atomic" version = "1.8.0" @@ -613,6 +740,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.35" @@ -622,6 +758,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355ae415ccd3a04315d3f8246e86d67689ea74d88d915576e1589a351062a13b" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "regex" version = "1.10.4" @@ -660,6 +805,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "rgb" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" +dependencies = [ + "bytemuck", +] + [[package]] name = "rle-decode-fast" version = "1.0.3" @@ -672,6 +826,12 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.197" @@ -729,6 +889,18 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "strsim" version = "0.11.0" @@ -826,6 +998,12 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wasm-bindgen" version = "0.2.93" diff --git a/osdk/Cargo.toml b/osdk/Cargo.toml index bc4943a0..2b00eb81 100644 --- a/osdk/Cargo.toml +++ b/osdk/Cargo.toml @@ -21,6 +21,7 @@ version = "0.2.0" clap = { version = "4.4.17", features = ["cargo", "derive"] } chrono = "0.4.38" env_logger = "0.11.0" +inferno = "0.11.21" indexmap = "2.2.1" indicatif = "0.17.8" # For a commandline progress bar lazy_static = "1.4.0" diff --git a/osdk/src/cli.rs b/osdk/src/cli.rs index f88abf31..14ec5575 100644 --- a/osdk/src/cli.rs +++ b/osdk/src/cli.rs @@ -254,9 +254,11 @@ pub struct ProfileArgs { pub enum ProfileFormat { /// The raw stack trace log parsed from GDB in JSON Json, - /// The folded stack trace for a - /// [flame graph](https://github.com/brendangregg/FlameGraph) + /// The folded stack trace for generating a flame graph later using + /// [the original tool](https://github.com/brendangregg/FlameGraph) Folded, + /// A SVG flame graph + FlameGraph, } impl ProfileFormat { @@ -264,6 +266,7 @@ impl ProfileFormat { match self { ProfileFormat::Json => "json", ProfileFormat::Folded => "folded", + ProfileFormat::FlameGraph => "svg", } } } @@ -291,17 +294,18 @@ impl DebugProfileOutArgs { /// /// If the user does not specify the format, it will be inferred from the /// output file extension. If the output file does not have an extension, - /// the default format is folded stack traces. + /// the default format is flame graph. pub fn format(&self) -> ProfileFormat { self.format.unwrap_or_else(|| { if self.output.is_some() { match self.output.as_ref().unwrap().extension() { Some(ext) if ext == "folded" => ProfileFormat::Folded, Some(ext) if ext == "json" => ProfileFormat::Json, - _ => ProfileFormat::Folded, + Some(ext) if ext == "svg" => ProfileFormat::FlameGraph, + _ => ProfileFormat::FlameGraph, } } else { - ProfileFormat::Folded + ProfileFormat::FlameGraph } }) } diff --git a/osdk/src/commands/profile.rs b/osdk/src/commands/profile.rs index 2e6bf00a..587a645d 100644 --- a/osdk/src/commands/profile.rs +++ b/osdk/src/commands/profile.rs @@ -8,6 +8,8 @@ //! further analyzed using tools like //! [flame graph](https://github.com/brendangregg/FlameGraph). +use inferno::flamegraph; + use crate::{ cli::{ProfileArgs, ProfileFormat}, commands::util::bin_file_name, @@ -103,51 +105,69 @@ impl Profile { fn serialize_to(&self, format: ProfileFormat, cpu_mask: u128, mut target: W) { match format { ProfileFormat::Folded => { - let mut folded = HashMap::new(); + let folded = self.fold(cpu_mask); - // Process each stack trace and fold it for flame graph format - for capture in &self.stack_traces { - for (cpu_id, stack) in capture { - if *cpu_id >= 128 || cpu_mask & (1u128 << *cpu_id) == 0 { - continue; - } - - // Fold the stack trace - let folded_key = stack.iter().rev().cloned().collect::>().join(";"); - *folded.entry(folded_key).or_insert(0) += 1; - } - } - - // Write the folded traces + // Write the folded traces to the target text writer. for (key, count) in folded { writeln!(&mut target, "{} {}", key, count) .expect("Failed to write folded output"); } } ProfileFormat::Json => { - // Filter out the stack traces based on the CPU mask - let filtered_traces = self - .stack_traces - .iter() - .map(|capture| { - capture - .iter() - .filter(|(cpu_id, _)| { - **cpu_id < 128 && cpu_mask & (1u128 << **cpu_id) != 0 - }) - .map(|(cpu_id, stack)| (*cpu_id, stack.clone())) - .collect::>() - }) - .collect::>(); - - let filtered = Profile { - stack_traces: filtered_traces, - }; + let filtered = self.filter_cpu(cpu_mask); serde_json::to_writer(target, &filtered).expect("Failed to write JSON output"); } + ProfileFormat::FlameGraph => { + let folded = self.fold(cpu_mask); + + // Generate the flame graph folded text lines. + let lines = folded + .iter() + .map(|(key, count)| format!("{} {}", key, count)) + .collect::>(); + + // Generate the flame graph to the target SVG writer. + let mut opt = flamegraph::Options::default(); + flamegraph::from_lines(&mut opt, lines.iter().map(|s| s.as_str()), target).unwrap(); + } } } + + fn filter_cpu(&self, cpu_mask: u128) -> Profile { + let filtered_traces = self + .stack_traces + .iter() + .map(|capture| { + capture + .iter() + .filter(|(cpu_id, _)| **cpu_id < 128 && cpu_mask & (1u128 << **cpu_id) != 0) + .map(|(cpu_id, stack)| (*cpu_id, stack.clone())) + .collect::>() + }) + .collect::>(); + + Self { + stack_traces: filtered_traces, + } + } + + fn fold(&self, cpu_mask: u128) -> HashMap { + let mut folded = HashMap::new(); + + for capture in &self.stack_traces { + for (cpu_id, stack) in capture { + if *cpu_id >= 128 || cpu_mask & (1u128 << *cpu_id) == 0 { + continue; + } + + let folded_key = stack.iter().rev().cloned().collect::>().join(";"); + *folded.entry(folded_key).or_insert(0) += 1; + } + } + + folded + } } #[derive(Debug)]