diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 36b2300e1..c370cd4cb 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -240,8 +240,7 @@ static void resize_bbcc_hash(void) } -static __inline -BBCC** new_recursion(int size) +BBCC** CLG_(new_recursion)(int size) { BBCC** bbccs; int i; @@ -313,8 +312,7 @@ BBCC* new_bbcc(BB* bb) * Recursion level doesn't need to be set as this is not included * in the hash key: Only BBCCs with rec level 0 are in hashes. */ -static -void insert_bbcc_into_hash(BBCC* bbcc) +void CLG_(insert_bbcc_into_hash)(BBCC* bbcc) { UInt idx; @@ -389,10 +387,10 @@ static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) bbcc->rec_index = 0; bbcc->cxt = cxt; - bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); + bbcc->rec_array = CLG_(new_recursion)(cxt->fn[0]->separate_recursions); bbcc->rec_array[0] = bbcc; - insert_bbcc_into_hash(bbcc); + CLG_(insert_bbcc_into_hash)(bbcc); } else { if (CLG_(clo).separate_threads) @@ -513,16 +511,21 @@ static void handleUnderflow(BB* bb) CLG_(current_fn_stack).top--; CLG_(current_state).cxt = 0; caller = CLG_(get_fn_node)(bb); + VG_(message)(Vg_UserMsg, + "underflow reset: cxt=0, BB=%#lx, fn-about-to-push='%s' " + "obj='%s' skip=%d\n", + bb_addr(bb), caller->name, + caller->file->obj->name, caller->skip); CLG_(push_cxt)( caller ); if (!seen_before) { /* set rec array for source BBCC: this is at rec level 1 */ - source_bbcc->rec_array = new_recursion(caller->separate_recursions); + source_bbcc->rec_array = CLG_(new_recursion)(caller->separate_recursions); source_bbcc->rec_array[0] = source_bbcc; CLG_ASSERT(source_bbcc->cxt == 0); source_bbcc->cxt = CLG_(current_state).cxt; - insert_bbcc_into_hash(source_bbcc); + CLG_(insert_bbcc_into_hash)(source_bbcc); } CLG_ASSERT(CLG_(current_state).bbcc); @@ -725,20 +728,40 @@ void CLG_(setup_bbcc)(BB* bb) } } - if (jmpkind == jk_Call) { + /* Check obj-skip on every BB entry, not only jk_Call. + * The interpreter / perf trampoline can enter functions via jk_Jump + * or fall-through; if we only checked on jk_Call, skip would never + * latch for those fns and they'd leak into the dump. */ + { fn_node* node = CLG_(get_fn_node)(bb); skip = node->skip; if (!skip && !node->obj_skip_checked){ HChar* obj_name = node->file->obj->name; - // VG_(printf)(" %s\n", obj_name); + Int cmp_results[CLG_(clo).objs_to_skip_count]; for (int i=0; iskip = True; skip = True; - break; } } + if (skip) { + VG_(message)(Vg_UserMsg, + "obj_skip HIT: fn='%s' obj='%s' jmpkind=%d\n", + node->name, obj_name, (int)jmpkind); + } + if (!skip && CLG_(clo).objs_to_skip_count > 0) { + VG_(message)(Vg_UserMsg, + "obj_skip miss: fn='%s' obj='%s' (len=%lu, %d entries) jmpkind=%d\n", + node->name, obj_name, + VG_(strlen)(obj_name), CLG_(clo).objs_to_skip_count, + (int)jmpkind); + for (int i=0; iobj_skip_checked = True; } } @@ -794,9 +817,26 @@ void CLG_(setup_bbcc)(BB* bb) } } - /* Change new context if needed, taking delayed_push into account */ + /* Change new context if needed, taking delayed_push into account. + * + * The `cxt == 0` clause used to fire regardless of skip, which meant + * that on the first BB after instrumentation start / call-stack + * underflow, a skipped libpython fn would still be pushed as the new + * top context and appear as its own fn= block in the dump. + * + * Now: if the fn is skip, we still push it (otherwise the assert at + * the end of this block fires when fn_stack is empty), but emit a + * diagnostic so we can measure how often the leak happens. */ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { - CLG_(push_cxt)(CLG_(get_fn_node)(bb)); + fn_node* push_fn = CLG_(get_fn_node)(bb); + if (skip && CLG_(current_state).cxt == 0) { + VG_(message)(Vg_UserMsg, + "push_cxt FORCED for skipped fn (cxt==0): fn='%s' obj='%s' jmpkind=%d delayed_push=%d\n", + push_fn->name, + push_fn->file->obj->name, + (int)jmpkind, (int)delayed_push); + } + CLG_(push_cxt)(push_fn); } CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); @@ -807,10 +847,10 @@ void CLG_(setup_bbcc)(BB* bb) bbcc->cxt = CLG_(current_state).cxt; bbcc->rec_array = - new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); + CLG_(new_recursion)((*CLG_(current_fn_stack).top)->separate_recursions); bbcc->rec_array[0] = bbcc; - insert_bbcc_into_hash(bbcc); + CLG_(insert_bbcc_into_hash)(bbcc); } else { /* get BBCC with current context */ diff --git a/callgrind/callstack.c b/callgrind/callstack.c index 20669e9cd..cc876e0cc 100644 --- a/callgrind/callstack.c +++ b/callgrind/callstack.c @@ -25,6 +25,7 @@ */ #include "global.h" +#include "pub_tool_stacktrace.h" /*------------------------------------------------------------*/ /*--- Call stack, operations ---*/ @@ -361,6 +362,19 @@ void CLG_(pop_call_stack)(void) if (depth == 0) function_left(to_fn); } + else if (lower_entry->cxt != 0) { + /* Seeded entry from reconstruct_call_stack_from_native: jcc=0 + * (skip-style) but push_cxt did run, so cxt was changed. Restore + * it here so the seeded wrapper doesn't stay stuck on top of the + * cxt chain and phantom-parent every subsequent call from the + * real caller. Real skip-entries (push_call_stack(skip=True) + * without a prior push_cxt) have lower_entry->cxt==0 and skip + * this branch — their cxt was never changed, so nothing to + * restore. */ + CLG_(current_state).cxt = lower_entry->cxt; + CLG_(current_fn_stack).top = + CLG_(current_fn_stack).bottom + lower_entry->fn_sp; + } /* To allow for an assertion in push_call_stack() */ lower_entry->cxt = 0; @@ -433,3 +447,111 @@ Int CLG_(unwind_call_stack)(Addr sp, Int minpops) CLG_DEBUG(4,"- unwind_call_stack\n"); return unwind_count; } + + +/* Seed callgrind's shadow call stack from the client's native stack so a + * later `ret` past unseen frames pops cleanly instead of underflowing. + * + * Called on the OFF->ON instrumentation transition: the client (e.g. + * pytest_codspeed) typically reaches CALLGRIND_START_INSTRUMENTATION several + * libpython frames deep. Without seeding, csp stays at 0 while the real + * stack is non-empty, and every subsequent ret trips handleUnderflow and + * leaks the returned-into fn as a top-level fn= block. + * + * For each native frame we push a (jcc=0, skip-style) call_entry with the + * captured SP and a ret_addr computed from the caller's IP. To make obj-skip + * cost-folding work across the seeded chain, we also synthesize a BBCC for + * each non-skipped caller frame so push_call_stack-style `nonskipped` + * attribution has a target on the first non-skip -> skip transition. + */ +#define CLG_RECON_MAX_FRAMES 256 + +void CLG_(reconstruct_call_stack_from_native)(ThreadId tid) +{ + Addr ips[CLG_RECON_MAX_FRAMES]; + Addr sps[CLG_RECON_MAX_FRAMES]; + call_stack* cs = &CLG_(current_call_stack); + + if (cs->sp != 0) return; + + UInt n = VG_(get_StackTrace)(tid, ips, CLG_RECON_MAX_FRAMES, sps, NULL, 0); + if (n == 0) return; + + /* Caller's synthesized BBCC, latched for use as nonskipped target on + * the first non-skipped -> skipped transition. */ + BBCC* caller_bbcc = 0; + + /* Push bottom-up: oldest caller first, current frame last. */ + for (Int frame = n - 1; frame >= 0; frame--) { + fn_node* fn = CLG_(get_fn_node_for_addr)(ips[frame]); + + /* Latch obj-skip on first encounter, matching bbcc.c's check. */ + if (!fn->obj_skip_checked) { + const HChar* obj = fn->file->obj->name; + for (Int j = 0; j < CLG_(clo).objs_to_skip_count; j++) { + if (VG_(strcmp)(obj, CLG_(clo).objs_to_skip[j]) == 0) { + fn->skip = True; + break; + } + } + fn->obj_skip_checked = True; + } + + ensure_stack_size(cs->sp + 1); + BBCC* prev_nonskipped = CLG_(current_state).nonskipped; + + /* Anonymous JIT frames (V8 trampolines, generated code with no + * DebugInfo) resolve to fn->name == "???". Don't push_cxt them: + * they have no useful identity, and because no later RET ever + * pops them (JS resumes via dispatch, not C-ABI ret), they would + * stay stuck on top of the cxt chain forever and become a phantom + * "???" root of every user fn. Push only a bare call_entry so SP + * unwind still works; ce->cxt stays 0, signaling pop_call_stack + * to leave cxt alone. */ + Bool anonymous = (VG_(strcmp)(fn->name, "???") == 0); + + if (!anonymous) { + CLG_(push_cxt)(fn); + + /* Create a BBCC for non-skipped caller frames. ips[frame] for + * frame>=1 is "last byte of the call instruction" per + * VG_(get_StackTrace), so it's never a real BB start and the + * 0-insn synthetic BB cannot collide with later real + * instrumentation. The top frame's IP can land on a real BB, + * so we don't synthesize there — real BBCC will be created + * naturally on the first instrumented BB. */ + if (frame > 0 && !fn->skip) { + Bool seen; + BBCC* b = CLG_(get_bbcc)(CLG_(get_bb)(ips[frame], NULL, &seen)); + if (!seen) { + b->rec_array = CLG_(new_recursion)(fn->separate_recursions); + b->rec_array[0] = b; + b->cxt = CLG_(current_state).cxt; + CLG_(insert_bbcc_into_hash)(b); + } + caller_bbcc = b; + } + + /* Mirror push_call_stack's nonskipped transition. */ + if (!fn->skip) { + CLG_(current_state).nonskipped = 0; + } else if (prev_nonskipped == 0 && caller_bbcc) { + CLG_(current_state).nonskipped = caller_bbcc; + if (!caller_bbcc->skipped) + CLG_(init_cost_lz)(CLG_(sets).full, &caller_bbcc->skipped); + } + } + + call_entry* ce = &cs->entry[cs->sp]; + ce->jcc = 0; + ce->sp = sps[frame]; + ce->ret_addr = (frame + 1 < (Int)n) ? ips[frame + 1] + 1 : 0; + ce->nonskipped = prev_nonskipped; + + cs->sp++; + ensure_stack_size(cs->sp + 1); + cs->entry[cs->sp].cxt = 0; + } + + if (caller_bbcc) CLG_(current_state).bbcc = caller_bbcc; +} diff --git a/callgrind/clo.c b/callgrind/clo.c index fc2084869..74d76501d 100644 --- a/callgrind/clo.c +++ b/callgrind/clo.c @@ -402,12 +402,25 @@ void CLG_(update_fn_config)(fn_node* fn) void CLG_(add_obj_to_skip)(const HChar* obj_name) { + VG_(message)(Vg_UserMsg, "add_obj_to_skip: '%s'\n", obj_name); HChar* dup = VG_(strdup)("cl.clo.aots.1", obj_name); CLG_(clo).objs_to_skip_count++; CLG_(clo).objs_to_skip = VG_(realloc)("cl.clo.aots.2", CLG_(clo).objs_to_skip, CLG_(clo).objs_to_skip_count * sizeof(HChar*)); CLG_(clo).objs_to_skip[CLG_(clo).objs_to_skip_count - 1] = dup; + + VG_(message)(Vg_UserMsg, "obj-skip list now has %d entries:\n", + CLG_(clo).objs_to_skip_count); + for (Int i = 0; i < CLG_(clo).objs_to_skip_count; i++) { + VG_(message)(Vg_UserMsg, " [%d] '%s'\n", i, CLG_(clo).objs_to_skip[i]); + } + + Int checked = 0, skipped = 0; + CLG_(count_obj_skip_checked_fns)(&checked, &skipped); + VG_(message)(Vg_UserMsg, + "fn_nodes already obj_skip_checked: %d (of which marked skip: %d)\n", + checked, skipped); } diff --git a/callgrind/dump.c b/callgrind/dump.c index 3a3164c4b..bffbd4992 100644 --- a/callgrind/dump.c +++ b/callgrind/dump.c @@ -1636,6 +1636,8 @@ void CLG_(dump_profile)(const HChar* trigger, Bool only_current_thread) print_bbccs(trigger, only_current_thread); + CLG_(dump_python_fn_summary)(); + bbs_done = CLG_(stat).bb_executions++; if (VG_(clo_verbosity) > 1) diff --git a/callgrind/fn.c b/callgrind/fn.c index efa5430de..41f4a6a00 100644 --- a/callgrind/fn.c +++ b/callgrind/fn.c @@ -307,6 +307,64 @@ void CLG_(init_obj_table)(void) obj_table[i] = 0; } +void CLG_(count_obj_skip_checked_fns)(Int* checked, Int* skipped) +{ + *checked = 0; + *skipped = 0; + for (Int i = 0; i < N_OBJ_ENTRIES; i++) { + for (obj_node* obj = obj_table[i]; obj != NULL; obj = obj->next) { + for (Int f = 0; f < N_FILE_ENTRIES; f++) { + for (file_node* file = obj->files[f]; file != NULL; file = file->next) { + for (Int n = 0; n < N_FN_ENTRIES; n++) { + for (fn_node* fn = file->fns[n]; fn != NULL; fn = fn->next) { + if (fn->obj_skip_checked) (*checked)++; + if (fn->skip) (*skipped)++; + } + } + } + } + } + } +} + +static Bool name_contains(const HChar* hay, const HChar* needle) +{ + if (!hay || !needle) return False; + Int hlen = VG_(strlen)(hay), nlen = VG_(strlen)(needle); + for (Int i = 0; i + nlen <= hlen; i++) + if (VG_(strncmp)(hay + i, needle, nlen) == 0) return True; + return False; +} + +void CLG_(dump_python_fn_summary)(void) +{ + Int total = 0, checked = 0, skipped = 0; + VG_(message)(Vg_UserMsg, "=== python fn summary (dump) ===\n"); + for (Int i = 0; i < N_OBJ_ENTRIES; i++) { + for (obj_node* obj = obj_table[i]; obj != NULL; obj = obj->next) { + if (!name_contains(obj->name, "python")) continue; + for (Int f = 0; f < N_FILE_ENTRIES; f++) { + for (file_node* file = obj->files[f]; file != NULL; file = file->next) { + for (Int n = 0; n < N_FN_ENTRIES; n++) { + for (fn_node* fn = file->fns[n]; fn != NULL; fn = fn->next) { + total++; + if (fn->obj_skip_checked) checked++; + if (fn->skip) skipped++; + VG_(message)(Vg_UserMsg, + " fn='%s' obj='%s' skip=%d checked=%d\n", + fn->name, obj->name, + fn->skip, fn->obj_skip_checked); + } + } + } + } + } + } + VG_(message)(Vg_UserMsg, + "=== python fn summary: total=%d checked=%d skipped=%d ===\n", + total, checked, skipped); +} + #define HASH_CONSTANT 256 static UInt str_hash(const HChar *s, UInt table_size) @@ -453,6 +511,10 @@ fn_node* new_fn_node(const HChar *fnname, fn->toggle_collect = False; fn->skip = False; fn->obj_skip_checked = False; + + VG_(message)(Vg_UserMsg, "new_fn_node: fn='%s' obj='%s'\n", + fn->name, + (file && file->obj && file->obj->name) ? file->obj->name : "(null)"); fn->pop_on_jump = CLG_(clo).pop_on_jump; fn->is_malloc = False; fn->is_realloc = False; @@ -519,6 +581,21 @@ fn_node* get_fn_node_inseg(DebugInfo* di, } +/* Resolve a raw code address to a fn_node, creating obj/file/fn entries if + * needed. Addresses without DebugInfo (anonymous JIT mappings, ld glue) + * resolve to the shared `???`/anonymous obj. Used by the START-instrumentation + * stack reconstruction path, which has IPs but no BBs. */ +fn_node* CLG_(get_fn_node_for_addr)(Addr ip) +{ + const HChar *dirname, *filename, *fnname; + UInt line_num; + DebugInfo* di; + + CLG_(get_debug_info)(ip, &dirname, &filename, &fnname, &line_num, &di); + return get_fn_node_inseg(di, dirname, filename, fnname); +} + + Bool CLG_(get_debug_info)(Addr instr_addr, const HChar **dir, const HChar **file, diff --git a/callgrind/global.h b/callgrind/global.h index c2fda1cce..5e7c86af4 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -723,10 +723,13 @@ void CLG_(set_current_fn_array)(fn_array*); UInt* CLG_(get_fn_entry)(Int n); void CLG_(init_obj_table)(void); +void CLG_(count_obj_skip_checked_fns)(Int* checked, Int* skipped); +void CLG_(dump_python_fn_summary)(void); obj_node* CLG_(get_obj_node)(DebugInfo* si); file_node* CLG_(get_file_node)(obj_node*, const HChar *dirname, const HChar* filename); fn_node* CLG_(get_fn_node)(BB* bb); +fn_node* CLG_(get_fn_node_for_addr)(Addr ip); /* from bbcc.c */ void CLG_(init_bbcc_hash)(bbcc_hash* bbccs); @@ -736,6 +739,8 @@ void CLG_(set_current_bbcc_hash)(bbcc_hash*); void CLG_(forall_bbccs)(void (*func)(BBCC*)); void CLG_(zero_bbcc)(BBCC* bbcc); BBCC* CLG_(get_bbcc)(BB* bb); +BBCC** CLG_(new_recursion)(int size); +void CLG_(insert_bbcc_into_hash)(BBCC* bbcc); BBCC* CLG_(clone_bbcc)(BBCC* orig, Context* cxt, Int rec_index); void CLG_(setup_bbcc)(BB* bb) VG_REGPARM(1); @@ -755,6 +760,7 @@ call_entry* CLG_(get_call_entry)(Int n); void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip); void CLG_(pop_call_stack)(void); Int CLG_(unwind_call_stack)(Addr sp, Int); +void CLG_(reconstruct_call_stack_from_native)(ThreadId tid); /* from context.c */ void CLG_(init_fn_stack)(fn_stack*); diff --git a/callgrind/main.c b/callgrind/main.c index 3761c1448..90a3c77d5 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -1453,6 +1453,13 @@ void CLG_(set_instrument_state)(const HChar* reason, Bool state) reason, state ? "ON" : "OFF"); return; } + VG_(message)(Vg_UserMsg, + "instrument_state -> %s (reason='%s', cxt=%p, " + "fn_stack_depth=%ld)\n", + state ? "ON" : "OFF", reason, + (void*)CLG_(current_state).cxt, + (long)(CLG_(current_fn_stack).top - + CLG_(current_fn_stack).bottom)); CLG_(instrument_state) = state; CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", reason, state ? "ON" : "OFF"); @@ -1672,6 +1679,7 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) case VG_USERREQ__START_INSTRUMENTATION: CLG_(set_instrument_state)("Client Request", True); + CLG_(reconstruct_call_stack_from_native)(tid); *ret = 0; /* meaningless */ break; diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am index b6dc3de89..4edd5a81c 100644 --- a/callgrind/tests/Makefile.am +++ b/callgrind/tests/Makefile.am @@ -13,6 +13,12 @@ EXTRA_DIST = \ find_debuginfo.vgtest find_debuginfo.stderr.exp find_debuginfo.post.exp \ runtime_obj_skip_py.vgtest runtime_obj_skip_py.stderr.exp runtime_obj_skip_py.post.exp \ runtime_obj_skip_py.py runtime_obj_skip_py_shim.c \ + runtime_obj_skip_py314.vgtest runtime_obj_skip_py314.stderr.exp runtime_obj_skip_py314.post.exp \ + runtime_obj_skip_c.vgtest runtime_obj_skip_c.stderr.exp runtime_obj_skip_c.post.exp \ + runtime_obj_skip_c.c runtime_obj_skip_c_lib.c \ + runtime_obj_skip_underflow.vgtest runtime_obj_skip_underflow.stderr.exp runtime_obj_skip_underflow.post.exp \ + runtime_obj_skip_underflow.c runtime_obj_skip_underflow_lib.c \ + phantom_root.vgtest phantom_root.stderr.exp phantom_root.post.exp \ bug497723.stderr.exp bug497723.post.exp bug497723.vgtest \ simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \ simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \ @@ -31,7 +37,7 @@ EXTRA_DIST = \ inline-crossfile.vgtest inline-crossfile.stderr.exp inline-crossfile.stdout.exp inline-crossfile.post.exp \ inline-crossfile-helper1.h inline-crossfile-helper2.h filter_inline -check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile +check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile runtime_obj_skip_c runtime_obj_skip_underflow phantom_root AM_CFLAGS += $(AM_FLAG_M3264_PRI) AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) @@ -44,10 +50,29 @@ threads_LDADD = -lpthread # Shim loaded by runtime_obj_skip_py.py via ctypes. Built unconditionally; # the test's prereq skips it if the .so is missing. -check_DATA = runtime_obj_skip_py_shim.so +check_DATA = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so runtime_obj_skip_py_shim.so: runtime_obj_skip_py_shim.c $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ $< -o $@ -CLEANFILES = runtime_obj_skip_py_shim.so +# Shared lib for the runtime_obj_skip_c test. Lives in a separate ELF +# so the main binary can register its path for runtime obj-skip. +runtime_obj_skip_c_lib.so: runtime_obj_skip_c_lib.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ + +runtime_obj_skip_c_LDADD = -l:runtime_obj_skip_c_lib.so -ldl +runtime_obj_skip_c_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN' +runtime_obj_skip_c_DEPENDENCIES = runtime_obj_skip_c_lib.so + +# Shared lib for the runtime_obj_skip_underflow test. +runtime_obj_skip_underflow_lib.so: runtime_obj_skip_underflow_lib.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ + +runtime_obj_skip_underflow_LDADD = -l:runtime_obj_skip_underflow_lib.so -ldl +runtime_obj_skip_underflow_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN' +runtime_obj_skip_underflow_DEPENDENCIES = runtime_obj_skip_underflow_lib.so + +CLEANFILES = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so diff --git a/callgrind/tests/filter_stderr b/callgrind/tests/filter_stderr index fbaca9b1d..1c51f078a 100755 --- a/callgrind/tests/filter_stderr +++ b/callgrind/tests/filter_stderr @@ -30,4 +30,8 @@ sed "/warning: L4 cache found, using its data for the LL simulation./d" | sed "/Warning: Cannot auto-detect cache config, using defaults./d" | sed "/Run with -v to see./d" | sed "/warning: specified LL cache: line_size .*$/d" | -sed "/warning: simulated LL cache: line_size .*$/d" +sed "/warning: simulated LL cache: line_size .*$/d" | + +# Drop callgrind diagnostic logs (verbose obj-skip / cxt / underflow tracing). +# These are chore-level diagnostics that vary by run/host and aren't assertions. +sed -E "/^(add_obj_to_skip|obj-skip list now has| \[[0-9]+\] '|fn_nodes already obj_skip_checked|instrument_state ->|new_fn_node:|obj_skip (HIT|miss):| vs \[[0-9]+\] strcmp=|push_cxt FORCED|underflow reset:|reconstruct_call_stack:|=== python fn summary| fn='[^']*' obj=)/d" diff --git a/callgrind/tests/phantom_root.c b/callgrind/tests/phantom_root.c new file mode 100644 index 000000000..a32313441 --- /dev/null +++ b/callgrind/tests/phantom_root.c @@ -0,0 +1,36 @@ +/* Reproducer for the seeded-shadow-stack "phantom root" bug. + * + * When CALLGRIND_START_INSTRUMENTATION fires inside a wrapper chain + * that then unwinds, the seed pushes every native frame onto callgrind's + * cxt with jcc=0. pop_call_stack only restores cxt when jcc!=0, so each + * ret on the way back fails to pop the cxt — leaving the deepest + * un-returned wrapper stuck on top, phantom-parenting every later call + * from the real caller. + * + * We model a 3-deep wrapper chain (mirroring e.g. a Node.js binding: + * macro -> C export -> N-API trampoline -> user code) so the fix is + * exercised across multiple consecutive seeded pops, not just frame 0. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +static void leaf(void) { sink++; } + +__attribute__((noinline)) +static void wrapper_inner(void) { CALLGRIND_START_INSTRUMENTATION; } + +__attribute__((noinline)) +static void wrapper_middle(void) { wrapper_inner(); } + +__attribute__((noinline)) +static void wrapper_outer(void) { wrapper_middle(); } + +int main(void) +{ + wrapper_outer(); + leaf(); + CALLGRIND_STOP_INSTRUMENTATION; + return 0; +} diff --git a/callgrind/tests/phantom_root.post.exp b/callgrind/tests/phantom_root.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/phantom_root.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/phantom_root.stderr.exp b/callgrind/tests/phantom_root.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/phantom_root.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/phantom_root.vgtest b/callgrind/tests/phantom_root.vgtest new file mode 100644 index 000000000..e44bfb45c --- /dev/null +++ b/callgrind/tests/phantom_root.vgtest @@ -0,0 +1,4 @@ +prog: phantom_root +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.phantom_root +post: sh -c 'callers=$(awk -v q="'\''" "/^fn=/{fn=\$0; sub(q\"[0-9]+\$\",\"\",fn)} /^cfn=leaf\$/{print fn}" callgrind.out.phantom_root | sort -u); if [ "$callers" != "fn=main" ]; then echo "FAIL: leaf called by unexpected caller(s):"; echo "$callers"; exit 1; else echo OK; fi' +cleanup: rm -f callgrind.out.phantom_root diff --git a/callgrind/tests/runtime_obj_skip_c.c b/callgrind/tests/runtime_obj_skip_c.c new file mode 100644 index 000000000..9e5e650bc --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.c @@ -0,0 +1,30 @@ +/* Minimal C reproducer for the runtime obj-skip leak: a fn from a + * skipped object ends up as a top-level fn= block in the callgrind + * output when it is the first BB instrumented after START. + * + * Strategy: register the lib for skip, then call into the lib BEFORE + * starting instrumentation. The lib itself calls + * CALLGRIND_START_INSTRUMENTATION mid-function, so the first BB + * processed by callgrind lives in the skipped object — which trips + * the (cxt == 0) push_cxt path that ignores the skip flag. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int n); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(1000); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_c.post.exp b/callgrind/tests/runtime_obj_skip_c.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_c.stderr.exp b/callgrind/tests/runtime_obj_skip_c.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_c.vgtest b/callgrind/tests/runtime_obj_skip_c.vgtest new file mode 100644 index 000000000..5817c245a --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.vgtest @@ -0,0 +1,5 @@ +prereq: test -f runtime_obj_skip_c && test -f runtime_obj_skip_c_lib.so +prog-asis: ./runtime_obj_skip_c +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_c +post: sh -c 'leaked=$(grep "^fn=skipme_" callgrind.out.runtime_obj_skip_c); if [ -n "$leaked" ]; then echo "FAIL: skipped fn leaked into top-level fn= block:"; echo "$leaked"; exit 1; else echo OK; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_c diff --git a/callgrind/tests/runtime_obj_skip_c_lib.c b/callgrind/tests/runtime_obj_skip_c_lib.c new file mode 100644 index 000000000..83f9eebb1 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c_lib.c @@ -0,0 +1,27 @@ +/* Library that lives in a separate ELF object so the main binary + * can register its path for runtime obj-skip. + * + * skipme_run() flips instrumentation on from *inside* the skipped + * object, then calls skipme_func. This is the trigger for the + * `current_state.cxt == 0` push path in setup_bbcc: the very first + * BB after instrumentation start lives in a skipped object, so the + * (cxt==0) clause force-pushes a skipped fn as the new top context + * and it leaks into the dump as a top-level fn= block. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_func(int n) +{ + for (int i = 0; i < n; i++) sink += i; +} + +__attribute__((noinline)) +void skipme_run(int n) +{ + CALLGRIND_START_INSTRUMENTATION; + skipme_func(n); + CALLGRIND_STOP_INSTRUMENTATION; +} diff --git a/callgrind/tests/runtime_obj_skip_py314.post.exp b/callgrind/tests/runtime_obj_skip_py314.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py314.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_py314.stderr.exp b/callgrind/tests/runtime_obj_skip_py314.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py314.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_py314.vgtest b/callgrind/tests/runtime_obj_skip_py314.vgtest new file mode 100644 index 000000000..bebb4fd41 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py314.vgtest @@ -0,0 +1,6 @@ +prereq: command -v python3.14 >/dev/null 2>&1 && test -f runtime_obj_skip_py_shim.so || exit 1 +prog-asis: python3.14 +args: runtime_obj_skip_py.py +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_py314 +post: sh -c 'c=$(awk "/^ob=/{p=(\$0~/libpython/)} /^fn=/&&p{c++} END{print c+0}" callgrind.out.runtime_obj_skip_py314); if [ "$c" -lt 100 ]; then echo OK; else echo "FAIL libpython fns=$c"; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_py314 diff --git a/callgrind/tests/runtime_obj_skip_underflow.c b/callgrind/tests/runtime_obj_skip_underflow.c new file mode 100644 index 000000000..ffc1e6a6f --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.c @@ -0,0 +1,22 @@ +/* Driver for the underflow-channel obj-skip leak reproducer. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int depth); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(5); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_underflow.post.exp b/callgrind/tests/runtime_obj_skip_underflow.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_underflow.stderr.exp b/callgrind/tests/runtime_obj_skip_underflow.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_underflow.vgtest b/callgrind/tests/runtime_obj_skip_underflow.vgtest new file mode 100644 index 000000000..c0fc04cd0 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.vgtest @@ -0,0 +1,5 @@ +prereq: test -f runtime_obj_skip_underflow && test -f runtime_obj_skip_underflow_lib.so +prog-asis: ./runtime_obj_skip_underflow +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_underflow +post: sh -c 'leaked=$(grep "^fn=skipme_" callgrind.out.runtime_obj_skip_underflow); if [ -n "$leaked" ]; then echo "FAIL: skipped fn leaked into top-level fn= block:"; echo "$leaked"; exit 1; else echo OK; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_underflow diff --git a/callgrind/tests/runtime_obj_skip_underflow_lib.c b/callgrind/tests/runtime_obj_skip_underflow_lib.c new file mode 100644 index 000000000..abaf58cc6 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow_lib.c @@ -0,0 +1,37 @@ +/* Library that triggers the call-stack-underflow leak channel in + * callgrind obj-skip. + * + * Setup: recursive function in the skipped lib. Main calls in with + * instrumentation OFF, so callgrind's call stack is never populated. + * At the deepest frame, instrumentation is flipped ON. Each RET on + * the way back then sees csp == 0, hits handleUnderflow, resets + * cxt = 0, and force-pushes the current fn (which lives in the + * skipped lib) as the new top context — leaking N times for an + * N-deep stack. + * + * This is the same shape as Python 3.14's interpreter dispatch + * leaks: deep recursive eval-loop frames where instrumentation was + * started somewhere down the stack and every return pops past an + * empty callgrind stack. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_recurse(int n) +{ + if (n == 0) { + CALLGRIND_START_INSTRUMENTATION; + return; + } + skipme_recurse(n - 1); + sink += n; +} + +__attribute__((noinline)) +void skipme_run(int depth) +{ + skipme_recurse(depth); + CALLGRIND_STOP_INSTRUMENTATION; +}