From 841ded3bee62c337cc4782f85cb80b4fe721c12c Mon Sep 17 00:00:00 2001 From: Simon Gellis Date: Sun, 31 Aug 2025 23:01:50 -0400 Subject: [PATCH] Include inline functions when profiling --- Cargo.lock | 16 +++- Cargo.toml | 1 + src/emulator.rs | 25 +++--- src/emulator/game_info.rs | 128 +++++++++++++++++++++++++++++-- src/emulator/inline_stack_map.rs | 87 +++++++++++++++++++++ src/emulator/shrooms_vb_core.rs | 61 +++++++++++---- src/profiler.rs | 19 ++++- src/profiler/recording.rs | 23 ++++-- src/profiler/state.rs | 34 +++++--- 9 files changed, 342 insertions(+), 52 deletions(-) create mode 100644 src/emulator/inline_stack_map.rs diff --git a/Cargo.lock b/Cargo.lock index fb7bbe3..f5fd8c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,7 +35,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "fallible-iterator", - "gimli", + "gimli 0.31.1", ] [[package]] @@ -1620,6 +1620,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "gimli" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6298e594375a7fead9efd5568f0a46e6a154fb6a9bdcbe3c06946ffd81a5f6" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + [[package]] name = "gl_generator" version = "0.14.0" @@ -2173,6 +2184,7 @@ dependencies = [ "fixed", "fxprof-processed-profile", "gilrs", + "gimli 0.32.2", "hex", "image", "itertools 0.14.0", @@ -3868,7 +3880,7 @@ dependencies = [ "debugid", "elsa", "flate2", - "gimli", + "gimli 0.31.1", "linux-perf-data", "lzma-rs", "macho-unwind-info", diff --git a/Cargo.toml b/Cargo.toml index 449e0ad..b9bdac6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ egui-wgpu = { version = "0.32", features = ["winit"] } fxprof-processed-profile = "0.8" fixed = { version = "1.28", features = ["num-traits"] } gilrs = { version = "0.11", features = ["serde-serialize"] } +gimli = "0.32" hex = "0.4" image = { version = "0.25", default-features = false, features = ["png"] } itertools = "0.14" diff --git a/src/emulator.rs b/src/emulator.rs index 2c31bde..5c2c91c 100644 --- a/src/emulator.rs +++ b/src/emulator.rs @@ -18,17 +18,20 @@ use tracing::{error, warn}; use crate::{ audio::Audio, - emulator::cart::Cart, graphics::TextureSink, memory::{MemoryRange, MemoryRegion}, }; +use cart::Cart; pub use game_info::GameInfo; +pub use inline_stack_map::InlineStack; +use inline_stack_map::InlineStackMap; use shrooms_vb_core::{EXPECTED_FRAME_SIZE, Sim, StopReason}; pub use shrooms_vb_core::{SimEvent, VBKey, VBRegister, VBWatchpointType}; mod address_set; mod cart; mod game_info; +mod inline_stack_map; mod shrooms_vb_core; #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)] @@ -231,11 +234,11 @@ impl Emulator { }) .is_ok() { - sim.monitor_events(true); + sim.monitor_events(true, cart.info.inline_stack_map().clone()); profiling = true; } if !profiling { - sim.monitor_events(false); + sim.monitor_events(false, InlineStackMap::empty()); } if self.sim_state[index].load(Ordering::Acquire) == SimState::Ready { @@ -476,15 +479,18 @@ impl Emulator { if !running { continue; } - if let Some(p) = profiler - && p.send(ProfileEvent::Update { + if let Some(p) = profiler { + let (event, inline_stack) = sim.take_profiler_updates(); + if p.send(ProfileEvent::Update { cycles, - event: sim.take_sim_event(), + event, + inline_stack, }) .is_err() - { - sim.monitor_events(false); - *profiler = None; + { + sim.monitor_events(false, InlineStackMap::empty()); + *profiler = None; + } } } @@ -842,6 +848,7 @@ pub enum ProfileEvent { Update { cycles: u32, event: Option, + inline_stack: Option, }, } diff --git a/src/emulator/game_info.rs b/src/emulator/game_info.rs index 0ff679f..9f72e7c 100644 --- a/src/emulator/game_info.rs +++ b/src/emulator/game_info.rs @@ -1,13 +1,16 @@ -use std::{path::Path, sync::Arc}; +use std::{borrow::Cow, path::Path, sync::Arc}; -use anyhow::Result; +use anyhow::{Result, bail}; use fxprof_processed_profile::{LibraryInfo, Symbol, SymbolTable, debugid::DebugId}; -use object::{Object, ObjectSymbol}; +use object::{Object, ObjectSection, ObjectSymbol}; use wholesym::samply_symbols::{DebugIdExt, demangle_any}; +use crate::emulator::inline_stack_map::{InlineStackMap, InlineStackMapBuilder}; + #[derive(Debug)] pub struct GameInfo { library_info: LibraryInfo, + inline_stack_map: InlineStackMap, } impl GameInfo { @@ -29,6 +32,9 @@ impl GameInfo { }); } + let inline_stack_map = + build_inline_stack_map(file).unwrap_or_else(|_| InlineStackMap::empty()); + let library_info = LibraryInfo { name: name.clone(), debug_name: name, @@ -40,7 +46,10 @@ impl GameInfo { symbol_table: Some(Arc::new(SymbolTable::new(symbols))), }; - Ok(Self { library_info }) + Ok(Self { + library_info, + inline_stack_map, + }) } pub fn empty(file_path: &Path) -> Self { @@ -55,7 +64,11 @@ impl GameInfo { arch: None, symbol_table: None, }; - Self { library_info } + let inline_stack_map = InlineStackMap::empty(); + Self { + library_info, + inline_stack_map, + } } pub fn name(&self) -> &str { @@ -65,6 +78,111 @@ impl GameInfo { pub fn library_info(&self) -> &LibraryInfo { &self.library_info } + + pub fn inline_stack_map(&self) -> &InlineStackMap { + &self.inline_stack_map + } +} + +fn build_inline_stack_map(file: object::File) -> Result { + let endian = if file.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + fn load_section<'a>(file: &'a object::File, id: gimli::SectionId) -> Result> { + let input = match file.section_by_name(id.name()) { + Some(section) => section.uncompressed_data()?, + None => Cow::Owned(vec![]), + }; + Ok(input) + } + let dorf = gimli::DwarfSections::load(|id| load_section(&file, id))?; + let dorf = dorf.borrow(|sec| gimli::EndianSlice::new(sec, endian)); + let mut units = dorf.units(); + let mut frames = InlineStackMap::builder(); + while let Some(header) = units.next()? { + let unit = dorf.unit(header)?; + let mut entree = unit.entries_tree(None)?; + let root = entree.root()?; + let mut ctx = ParseContext { + dorf: &dorf, + unit: &unit, + frames: &mut frames, + }; + parse_inline(&mut ctx, root)?; + } + Ok(frames.build()) +} + +type Reader<'a> = gimli::EndianSlice<'a, gimli::RunTimeEndian>; + +struct ParseContext<'a> { + dorf: &'a gimli::Dwarf>, + unit: &'a gimli::Unit>, + frames: &'a mut InlineStackMapBuilder, +} +impl ParseContext<'_> { + fn name_attr(&self, attr: gimli::AttributeValue) -> Result> { + match attr { + gimli::AttributeValue::DebugInfoRef(offset) => { + let mut units = self.dorf.units(); + while let Some(header) = units.next()? { + if let Some(offset) = offset.to_unit_offset(&header) { + let unit = self.dorf.unit(header)?; + return self.name_entry(&unit, offset); + } + } + Ok(None) + } + gimli::AttributeValue::UnitRef(offset) => self.name_entry(self.unit, offset), + other => { + bail!("unrecognized attr {other:?}"); + } + } + } + + fn name_entry( + &self, + unit: &gimli::Unit, + offset: gimli::UnitOffset, + ) -> Result> { + let abbreviations = self.dorf.abbreviations(&unit.header)?; + let mut entries = unit.header.entries_raw(&abbreviations, Some(offset))?; + let Some(abbrev) = entries.read_abbreviation()? else { + return Ok(None); + }; + let mut name = None; + for spec in abbrev.attributes() { + let attr = entries.read_attribute(*spec)?; + if attr.name() == gimli::DW_AT_linkage_name + || (attr.name() == gimli::DW_AT_name && name.is_none()) + { + name = Some(self.dorf.attr_string(unit, attr.value())?) + } + } + Ok(name.map(|n| demangle_any(&String::from_utf8_lossy(&n)))) + } +} + +fn parse_inline(ctx: &mut ParseContext, node: gimli::EntriesTreeNode) -> Result<()> { + if node.entry().tag() == gimli::DW_TAG_inlined_subroutine + && let Some(attr) = node.entry().attr_value(gimli::DW_AT_abstract_origin)? + && let Some(name) = ctx.name_attr(attr)? + { + let name = Arc::new(name); + let mut ranges = ctx.dorf.die_ranges(ctx.unit, node.entry())?; + while let Some(range) = ranges.next()? { + let start = range.begin as u32; + let end = range.end as u32; + ctx.frames.add(start, end, name.clone()); + } + } + let mut children = node.children(); + while let Some(child) = children.next()? { + parse_inline(ctx, child)?; + } + Ok(()) } fn name_and_path(file_path: &Path) -> (String, String) { diff --git a/src/emulator/inline_stack_map.rs b/src/emulator/inline_stack_map.rs new file mode 100644 index 0000000..ca1f50c --- /dev/null +++ b/src/emulator/inline_stack_map.rs @@ -0,0 +1,87 @@ +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; + +pub type InlineStack = Arc>>; + +#[derive(Debug, Clone)] +pub struct InlineStackMap { + entries: Vec<(u32, InlineStack)>, + empty: InlineStack, +} + +impl InlineStackMap { + pub fn empty() -> Self { + Self { + entries: vec![], + empty: Arc::new(vec![]), + } + } + pub fn builder() -> InlineStackMapBuilder { + InlineStackMapBuilder { + events: BTreeMap::new(), + } + } + pub fn get(&self, address: u32) -> &InlineStack { + match self.entries.binary_search_by_key(&address, |(a, _)| *a) { + Ok(index) => self.entries.get(index), + Err(after) => after.checked_sub(1).and_then(|i| self.entries.get(i)), + } + .map(|(_, s)| s) + .unwrap_or(&self.empty) + } +} + +#[derive(Default)] +struct Event { + end: usize, + start: Vec>, +} + +pub struct InlineStackMapBuilder { + events: BTreeMap, +} + +impl InlineStackMapBuilder { + pub fn add(&mut self, start: u32, end: u32, name: Arc) { + self.events.entry(start).or_default().start.push(name); + self.events.entry(end).or_default().end += 1; + } + + pub fn build(self) -> InlineStackMap { + let empty = Arc::new(vec![]); + let mut entries = vec![]; + let mut stack_indexes = vec![]; + let mut stack = vec![]; + let mut string_dedup = HashMap::new(); + let mut stack_dedup = BTreeMap::new(); + stack_dedup.insert(vec![], empty.clone()); + + for (address, event) in self.events { + for _ in 0..event.end { + stack.pop(); + stack_indexes.pop(); + } + for call in event.start { + if let Some(index) = string_dedup.get(&call) { + stack.push(call); + stack_indexes.push(*index); + } else { + let index = string_dedup.len(); + string_dedup.insert(call.clone(), index); + stack.push(call); + stack_indexes.push(index); + } + } + if let Some(stack) = stack_dedup.get(&stack_indexes) { + entries.push((address, stack.clone())); + } else { + let stack = Arc::new(stack.clone()); + stack_dedup.insert(stack_indexes.clone(), stack.clone()); + entries.push((address, stack)); + } + } + InlineStackMap { entries, empty } + } +} diff --git a/src/emulator/shrooms_vb_core.rs b/src/emulator/shrooms_vb_core.rs index 56684b6..b00e9b6 100644 --- a/src/emulator/shrooms_vb_core.rs +++ b/src/emulator/shrooms_vb_core.rs @@ -1,10 +1,12 @@ -use std::{borrow::Cow, ffi::c_void, ptr, slice}; +use std::{borrow::Cow, ffi::c_void, ptr, slice, sync::Arc}; use anyhow::{Result, anyhow}; use bitflags::bitflags; use num_derive::{FromPrimitive, ToPrimitive}; use serde::{Deserialize, Serialize}; +use crate::emulator::inline_stack_map::{InlineStack, InlineStackMap}; + use super::address_set::AddressSet; #[repr(C)] @@ -206,11 +208,9 @@ extern "C" fn on_execute(sim: *mut VB, address: u32, code: *const u16, length: c if data.monitor.enabled { // SAFETY: length is the length of code, in elements let code = unsafe { slice::from_raw_parts(code, length as usize) }; - if data.monitor.detect_event(sim, address, code) { - // Something interesting will happen after this instruction is run. - // The on_fetch callback will fire when it does. - unsafe { vb_set_fetch_callback(sim, Some(on_fetch)) }; - } + data.monitor.detect_event(sim, address, code); + // Something interesting will happen after this instruction is run. + // We'll react in the on_fetch callback it does. } let mut stopped = data.stop_reason.is_some() || data.monitor.event.is_some(); @@ -231,7 +231,7 @@ extern "C" fn on_execute(sim: *mut VB, address: u32, code: *const u16, length: c extern "C" fn on_fetch( sim: *mut VB, _fetch: c_int, - _address: u32, + address: u32, _value: *mut i32, _cycles: *mut u32, ) -> c_int { @@ -239,9 +239,13 @@ extern "C" fn on_fetch( // There is no way for the userdata to be null or otherwise invalid. let data: &mut VBState = unsafe { &mut *vb_get_user_data(sim).cast() }; data.monitor.event = data.monitor.queued_event.take(); + data.monitor.new_inline_stack = data.monitor.detect_new_inline_stack(address); unsafe { vb_set_exception_callback(sim, Some(on_exception)) }; - unsafe { vb_set_fetch_callback(sim, None) }; - 1 + if data.monitor.event.is_some() || data.monitor.new_inline_stack.is_some() { + 1 + } else { + 0 + } } #[unsafe(no_mangle)] @@ -249,17 +253,21 @@ extern "C" fn on_exception(sim: *mut VB, cause: *mut u16) -> c_int { // SAFETY: the *mut VB owns its userdata. // There is no way for the userdata to be null or otherwise invalid. let data: &mut VBState = unsafe { &mut *vb_get_user_data(sim).cast() }; - data.monitor.event = data.monitor.queued_event.take(); let cause = unsafe { *cause }; let pc = if cause == 0xff70 { 0xffffff60 } else { (cause & 0xfff0) as u32 | 0xffff0000 }; + data.monitor.event = data.monitor.queued_event.take(); + data.monitor.new_inline_stack = data.monitor.detect_new_inline_stack(pc); data.monitor.queued_event = Some(SimEvent::Interrupt(cause, pc)); unsafe { vb_set_exception_callback(sim, None) }; - unsafe { vb_set_fetch_callback(sim, Some(on_fetch)) }; - if data.monitor.event.is_some() { 1 } else { 0 } + if data.monitor.event.is_some() || data.monitor.new_inline_stack.is_some() { + 1 + } else { + 0 + } } #[unsafe(no_mangle)] @@ -339,18 +347,35 @@ struct EventMonitor { event: Option, queued_event: Option, just_halted: bool, + inline_stack_map: InlineStackMap, + new_inline_stack: Option, + last_inline_stack: InlineStack, } impl EventMonitor { fn new() -> Self { + let inline_stack_map = InlineStackMap::empty(); + let last_inline_stack = inline_stack_map.get(0).clone(); Self { enabled: false, event: None, queued_event: None, just_halted: false, + inline_stack_map, + new_inline_stack: None, + last_inline_stack, } } + fn detect_new_inline_stack(&mut self, address: u32) -> Option { + let stack = self.inline_stack_map.get(address); + if Arc::ptr_eq(stack, &self.last_inline_stack) { + return None; + } + self.last_inline_stack = stack.clone(); + Some(stack.clone()) + } + fn detect_event(&mut self, sim: *mut VB, address: u32, code: &[u16]) -> bool { self.queued_event = self.do_detect_event(sim, address, code); self.queued_event.is_some() @@ -492,14 +517,18 @@ impl Sim { unsafe { vb_reset(self.sim) }; } - pub fn monitor_events(&mut self, enabled: bool) { + pub fn monitor_events(&mut self, enabled: bool, inline_stack_map: InlineStackMap) { let state = self.get_state(); state.monitor.enabled = enabled; state.monitor.event = None; state.monitor.queued_event = None; + state.monitor.new_inline_stack = None; + state.monitor.last_inline_stack = inline_stack_map.get(0).clone(); + state.monitor.inline_stack_map = inline_stack_map; if enabled { unsafe { vb_set_execute_callback(self.sim, Some(on_execute)) }; unsafe { vb_set_exception_callback(self.sim, Some(on_exception)) }; + unsafe { vb_set_fetch_callback(self.sim, Some(on_fetch)) }; } else { if !state.needs_execute_callback() { unsafe { vb_set_execute_callback(self.sim, None) }; @@ -816,9 +845,11 @@ impl Sim { reason } - pub fn take_sim_event(&mut self) -> Option { + pub fn take_profiler_updates(&mut self) -> (Option, Option) { let data = self.get_state(); - data.monitor.event.take() + let event = data.monitor.event.take(); + let inline_stack = data.monitor.new_inline_stack.take(); + (event, inline_stack) } fn get_state(&mut self) -> &mut VBState { diff --git a/src/profiler.rs b/src/profiler.rs index fe16404..8148aad 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -6,7 +6,9 @@ use std::{ use anyhow::Result; use tokio::{select, sync::mpsc}; -use crate::emulator::{EmulatorClient, EmulatorCommand, GameInfo, ProfileEvent, SimEvent, SimId}; +use crate::emulator::{ + EmulatorClient, EmulatorCommand, GameInfo, InlineStack, ProfileEvent, SimEvent, SimId, +}; use recording::Recording; use state::ProgramState; @@ -131,11 +133,18 @@ async fn run_profile( async fn handle_event(event: ProfileEvent, session: &mut ProfilerSession) -> Result<()> { match event { ProfileEvent::Start { info } => session.start_profiling(info).await, - ProfileEvent::Update { cycles, event } => { + ProfileEvent::Update { + cycles, + event, + inline_stack, + } => { session.track_elapsed_cycles(cycles); if let Some(event) = event { session.track_event(event)?; } + if let Some(stack) = inline_stack { + session.track_inline_stack(stack); + } } } Ok(()) @@ -233,6 +242,12 @@ impl ProfilerSession { } } + fn track_inline_stack(&mut self, inline_stack: InlineStack) { + if let Some(program) = &mut self.program { + program.track_inline_stack(inline_stack); + } + } + fn start_recording(&mut self) { if let Some(program) = &self.program { self.recording = Some(Recording::new(program)); diff --git a/src/profiler/recording.rs b/src/profiler/recording.rs index 0723be9..69ee13e 100644 --- a/src/profiler/recording.rs +++ b/src/profiler/recording.rs @@ -89,14 +89,21 @@ impl Recording { thread: ThreadHandle, frames: &[StackFrame], ) -> Option { - self.profile.intern_stack_frames( - thread, - frames.iter().map(|f| FrameInfo { - frame: Frame::InstructionPointer(f.address as u64), - category_pair: CategoryHandle::OTHER.into(), - flags: FrameFlags::empty(), - }), - ) + let frames = frames + .iter() + .map(|f| { + let frame = match f { + StackFrame::Address(address) => Frame::InstructionPointer(*address as u64), + StackFrame::Label(label) => Frame::Label(self.profile.intern_string(label)), + }; + FrameInfo { + frame, + category_pair: CategoryHandle::OTHER.into(), + flags: FrameFlags::empty(), + } + }) + .collect::>(); + self.profile.intern_stack_frames(thread, frames.into_iter()) } } diff --git a/src/profiler/state.rs b/src/profiler/state.rs index ed7e9e8..f305bbc 100644 --- a/src/profiler/state.rs +++ b/src/profiler/state.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc}; use anyhow::{Result, bail}; use fxprof_processed_profile::LibraryInfo; -use crate::emulator::GameInfo; +use crate::emulator::{GameInfo, InlineStack}; pub struct ProgramState { info: Arc, @@ -11,20 +11,16 @@ pub struct ProgramState { context_stack: Vec, } -pub struct StackFrame { - pub address: u32, +pub enum StackFrame { + Address(u32), + Label(Arc), } pub const RESET_CODE: u16 = 0xfff0; impl ProgramState { pub async fn new(info: Arc) -> Self { let mut call_stacks = HashMap::new(); - call_stacks.insert( - RESET_CODE, - vec![StackFrame { - address: 0xfffffff0, - }], - ); + call_stacks.insert(RESET_CODE, vec![StackFrame::Address(0xfffffff0)]); Self { info, call_stacks, @@ -53,7 +49,7 @@ impl ProgramState { let Some(stack) = self.call_stacks.get_mut(code) else { bail!("missing stack {code:04x}"); }; - stack.push(StackFrame { address }); + stack.push(StackFrame::Address(address)); Ok(()) } @@ -89,7 +85,7 @@ impl ProgramState { self.context_stack.push(code); if self .call_stacks - .insert(code, vec![StackFrame { address }]) + .insert(code, vec![StackFrame::Address(address)]) .is_some() { bail!("{code:04x} fired twice"); @@ -109,4 +105,20 @@ impl ProgramState { } Ok(()) } + + pub fn track_inline_stack(&mut self, inline_stack: InlineStack) { + let Some(code) = self.context_stack.last() else { + return; + }; + let Some(call_stack) = self.call_stacks.get_mut(code) else { + return; + }; + while call_stack + .pop_if(|f| matches!(f, StackFrame::Label(_))) + .is_some() + {} + for label in inline_stack.iter() { + call_stack.push(StackFrame::Label(label.clone())); + } + } }