diff --git a/.gitignore b/.gitignore index ea71bb0aa..47e003340 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ /autom4te.cache /bin /cachegrind.out.* +/callgrind.out.* /compile /config.guess /config.h* @@ -155,11 +156,18 @@ /callgrind/tests/Makefile /callgrind/tests/Makefile.in /callgrind/tests/callgrind.out.* +/callgrind/tests/vgcore.* /callgrind/tests/clreq /callgrind/tests/simwork /callgrind/tests/threads /callgrind/tests/inline-samefile /callgrind/tests/inline-crossfile +/callgrind/tests/find_debuginfo +/callgrind/tests/runtime_obj_skip_c +/callgrind/tests/runtime_obj_skip_underflow +/callgrind/tests/*.so +/callgrind/tests/out_dbg +/callgrind/tests/test-suite-overall.log # /coregrind/ /coregrind/*.a @@ -2550,3 +2558,6 @@ none/tests/freebsd/bug499212 *.vgtest*.log /test-suite-overall.log test-suite.log + +# autoconf backup +/configure~ diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 36b2300e1..2289884ed 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -240,8 +240,7 @@ static void resize_bbcc_hash(void) } -static __inline -BBCC** new_recursion(int size) +BBCC** CLG_(new_recursion)(int size) { BBCC** bbccs; int i; @@ -313,8 +312,7 @@ BBCC* new_bbcc(BB* bb) * Recursion level doesn't need to be set as this is not included * in the hash key: Only BBCCs with rec level 0 are in hashes. */ -static -void insert_bbcc_into_hash(BBCC* bbcc) +void CLG_(insert_bbcc_into_hash)(BBCC* bbcc) { UInt idx; @@ -389,10 +387,10 @@ static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) bbcc->rec_index = 0; bbcc->cxt = cxt; - bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); + bbcc->rec_array = CLG_(new_recursion)(cxt->fn[0]->separate_recursions); bbcc->rec_array[0] = bbcc; - insert_bbcc_into_hash(bbcc); + CLG_(insert_bbcc_into_hash)(bbcc); } else { if (CLG_(clo).separate_threads) @@ -513,16 +511,20 @@ static void handleUnderflow(BB* bb) CLG_(current_fn_stack).top--; CLG_(current_state).cxt = 0; caller = CLG_(get_fn_node)(bb); + CLG_DEBUG(1, " underflow reset: cxt=0, BB=%#lx, fn-about-to-push='%s' " + "obj='%s' skip=%d\n", + bb_addr(bb), caller->name, + caller->file->obj->name, caller->skip); CLG_(push_cxt)( caller ); if (!seen_before) { /* set rec array for source BBCC: this is at rec level 1 */ - source_bbcc->rec_array = new_recursion(caller->separate_recursions); + source_bbcc->rec_array = CLG_(new_recursion)(caller->separate_recursions); source_bbcc->rec_array[0] = source_bbcc; CLG_ASSERT(source_bbcc->cxt == 0); source_bbcc->cxt = CLG_(current_state).cxt; - insert_bbcc_into_hash(source_bbcc); + CLG_(insert_bbcc_into_hash)(source_bbcc); } CLG_ASSERT(CLG_(current_state).bbcc); @@ -807,10 +809,10 @@ void CLG_(setup_bbcc)(BB* bb) bbcc->cxt = CLG_(current_state).cxt; bbcc->rec_array = - new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); + CLG_(new_recursion)((*CLG_(current_fn_stack).top)->separate_recursions); bbcc->rec_array[0] = bbcc; - insert_bbcc_into_hash(bbcc); + CLG_(insert_bbcc_into_hash)(bbcc); } else { /* get BBCC with current context */ diff --git a/callgrind/callstack.c b/callgrind/callstack.c index 20669e9cd..8951639d7 100644 --- a/callgrind/callstack.c +++ b/callgrind/callstack.c @@ -25,6 +25,7 @@ */ #include "global.h" +#include "pub_tool_stacktrace.h" /*------------------------------------------------------------*/ /*--- Call stack, operations ---*/ @@ -361,6 +362,18 @@ void CLG_(pop_call_stack)(void) if (depth == 0) function_left(to_fn); } + else if (lower_entry->cxt != 0) { + /* Seeded entry from reconstruct_call_stack_from_native: jcc=0 + * (skip-style) but push_cxt was called, so cxt was changed. + * Restore it here so the seeded frame doesn't stay stuck on + * top of the cxt chain and phantom-parent every subsequent + * call from the real caller. Real skip-entries + * (push_call_stack(skip=True) without a prior push_cxt) have + * lower_entry->cxt==0 and skip this branch. */ + CLG_(current_state).cxt = lower_entry->cxt; + CLG_(current_fn_stack).top = + CLG_(current_fn_stack).bottom + lower_entry->fn_sp; + } /* To allow for an assertion in push_call_stack() */ lower_entry->cxt = 0; @@ -433,3 +446,84 @@ Int CLG_(unwind_call_stack)(Addr sp, Int minpops) CLG_DEBUG(4,"- unwind_call_stack\n"); return unwind_count; } + + +/* Seed callgrind's shadow call stack from the client's native stack so a + * later `ret` past unseen frames pops cleanly instead of underflowing. + * + * Called on the OFF->ON instrumentation transition: the client (e.g. + * pytest_codspeed) typically reaches CALLGRIND_START_INSTRUMENTATION several + * libpython frames deep. Without seeding, csp stays at 0 while the real + * stack is non-empty, and every subsequent ret trips handleUnderflow and + * leaks the returned-into fn as a top-level fn= block. + * + * We push a (jcc=0, skip-style) call_entry for every native frame so + * SP-based unwind works. For frames that should appear in the output + * (non-skipped, non-anonymous) we also call push_cxt to seed the context + * chain; pop_call_stack has an else-if branch to restore cxt from these + * entries when they are unwound. Skipped and anonymous (JIT) frames are + * deliberately excluded from the cxt chain — they get SP-only entries. */ +#define CLG_RECON_MAX_FRAMES 256 + +void CLG_(reconstruct_call_stack_from_native)(ThreadId tid) +{ + Addr ips[CLG_RECON_MAX_FRAMES]; + Addr sps[CLG_RECON_MAX_FRAMES]; + call_stack* cs = &CLG_(current_call_stack); + + if (cs->sp != 0) return; + + UInt n = VG_(get_StackTrace)(tid, ips, CLG_RECON_MAX_FRAMES, sps, NULL, 0); + if (n == 0) return; + + /* Push bottom-up: oldest caller first, current frame last. */ + for (Int frame = n - 1; frame >= 0; frame--) { + fn_node* fn = CLG_(get_fn_node_for_addr)(ips[frame]); + + /* Latch obj-skip on first encounter, matching bbcc.c's check. */ + if (!fn->obj_skip_checked) { + const HChar* obj = fn->file->obj->name; + for (Int j = 0; j < CLG_(clo).objs_to_skip_count; j++) { + if (VG_(strcmp)(obj, CLG_(clo).objs_to_skip[j]) == 0) { + fn->skip = True; + break; + } + } + fn->obj_skip_checked = True; + } + + /* Grow the stack before push_cxt, which asserts cs->sp < cs->size + * and writes to entry[cs->sp] — matching push_call_stack's order so + * the invariant holds regardless of CLG_RECON_MAX_FRAMES. */ + ensure_stack_size(cs->sp + 1); + + /* Seed a cxt for every non-skipped frame. JIT frames are named via + * the perf-map resolver in fn.c (get_debug_info), so the root frame + * (__codspeed_root_frame__) gets a real name here instead of "???". + * Seeding a cxt also leaves current_state.cxt non-empty at START so + * the `cxt == 0` clause in setup_bbcc does not force-push the first + * (skipped) libpython/interpreter frame as a top-level node. + * Skipped (obj-skip) frames get SP-only entries — invisible in cxt. */ + if (!fn->skip) + CLG_(push_cxt)(fn); + + call_entry* ce = &cs->entry[cs->sp]; + ce->jcc = 0; + ce->nonskipped = 0; + + /* callgrind pops a frame when SP >= ce->sp, where ce->sp must be the + * frame's *entry* SP (the SP at which its caller made the call). The + * unwinder reports each frame's *own* SP (its call site into the next + * inner frame), which is lower; using sps[frame] would pop this frame + * the moment one of its own sub-calls returns (e.g. the START client + * request returning into __codspeed_root_frame__), re-parenting the + * workload onto the frame above. The entry SP is the caller's reported + * SP, sps[frame+1]; the outermost frame keeps its own SP as nothing + * returns past it during measurement. */ + ce->sp = (frame + 1 < (Int)n) ? sps[frame + 1] : sps[frame]; + ce->ret_addr = (frame + 1 < (Int)n) ? ips[frame + 1] : 0; + cs->sp++; + ensure_stack_size(cs->sp + 1); + cs->entry[cs->sp].cxt = 0; + } +} diff --git a/callgrind/fn.c b/callgrind/fn.c index efa5430de..e8b4ba03c 100644 --- a/callgrind/fn.c +++ b/callgrind/fn.c @@ -519,6 +519,41 @@ fn_node* get_fn_node_inseg(DebugInfo* di, } +/* Resolve a raw code address to a fn_node, creating obj/file/fn entries if + * needed. Addresses without DebugInfo (anonymous JIT mappings, ld glue) + * resolve to the shared `???`/anonymous obj. Used by the START-instrumentation + * stack reconstruction path, which has IPs but no BBs. */ +fn_node* CLG_(get_fn_node_for_addr)(Addr ip) +{ + const HChar *dirname, *filename, *fnname; + UInt line_num; + DebugInfo* di; + + CLG_(get_debug_info)(ip, &dirname, &filename, &fnname, &line_num, &di); + + /* Mirror CLG_(get_fn_node)()'s BB-path fallback: when there is no symbol + * (anonymous JIT / stripped code) get_debug_info yields the literal "???". + * Emit the object-relative address instead — identical to the bb->offset + * string the execution path produces — so the frame is a distinct, + * backend-symbolicatable "0x..." node rather than collapsing into the one + * shared "???" node (which loses the address). For anonymous JIT code the + * text bias is 0, so this is the absolute address that perf-.map keys + * on; the backend resolves it from the perf map. Used by the START stack + * reconstruction, which has the raw IP but no BB. */ + if (0 == VG_(strcmp)(fnname, "???")) { + HChar buf[32]; /* copied by get_fn_node_infile via strdup */ + Addr off = ip - (di ? VG_(DebugInfo_get_text_bias)(di) : 0); + if (sizeof(Addr) == 4) + VG_(sprintf)(buf, "%#08lx", (UWord)off); + else + VG_(sprintf)(buf, "%#016lx", (UWord)off); + fnname = buf; + } + + return get_fn_node_inseg(di, dirname, filename, fnname); +} + + Bool CLG_(get_debug_info)(Addr instr_addr, const HChar **dir, const HChar **file, diff --git a/callgrind/global.h b/callgrind/global.h index c2fda1cce..24d417732 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -727,6 +727,7 @@ obj_node* CLG_(get_obj_node)(DebugInfo* si); file_node* CLG_(get_file_node)(obj_node*, const HChar *dirname, const HChar* filename); fn_node* CLG_(get_fn_node)(BB* bb); +fn_node* CLG_(get_fn_node_for_addr)(Addr ip); /* from bbcc.c */ void CLG_(init_bbcc_hash)(bbcc_hash* bbccs); @@ -736,6 +737,8 @@ void CLG_(set_current_bbcc_hash)(bbcc_hash*); void CLG_(forall_bbccs)(void (*func)(BBCC*)); void CLG_(zero_bbcc)(BBCC* bbcc); BBCC* CLG_(get_bbcc)(BB* bb); +BBCC** CLG_(new_recursion)(int size); +void CLG_(insert_bbcc_into_hash)(BBCC* bbcc); BBCC* CLG_(clone_bbcc)(BBCC* orig, Context* cxt, Int rec_index); void CLG_(setup_bbcc)(BB* bb) VG_REGPARM(1); @@ -755,6 +758,7 @@ call_entry* CLG_(get_call_entry)(Int n); void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip); void CLG_(pop_call_stack)(void); Int CLG_(unwind_call_stack)(Addr sp, Int); +void CLG_(reconstruct_call_stack_from_native)(ThreadId tid); /* from context.c */ void CLG_(init_fn_stack)(fn_stack*); diff --git a/callgrind/main.c b/callgrind/main.c index 3761c1448..cfc283455 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -1672,6 +1672,7 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) case VG_USERREQ__START_INSTRUMENTATION: CLG_(set_instrument_state)("Client Request", True); + CLG_(reconstruct_call_stack_from_native)(tid); *ret = 0; /* meaningless */ break; diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am index b6dc3de89..261d357b4 100644 --- a/callgrind/tests/Makefile.am +++ b/callgrind/tests/Makefile.am @@ -13,6 +13,10 @@ EXTRA_DIST = \ find_debuginfo.vgtest find_debuginfo.stderr.exp find_debuginfo.post.exp \ runtime_obj_skip_py.vgtest runtime_obj_skip_py.stderr.exp runtime_obj_skip_py.post.exp \ runtime_obj_skip_py.py runtime_obj_skip_py_shim.c \ + runtime_obj_skip_c.vgtest runtime_obj_skip_c.stderr.exp runtime_obj_skip_c.post.exp \ + runtime_obj_skip_c.c runtime_obj_skip_c_lib.c \ + runtime_obj_skip_underflow.vgtest runtime_obj_skip_underflow.stderr.exp runtime_obj_skip_underflow.post.exp \ + runtime_obj_skip_underflow.c runtime_obj_skip_underflow_lib.c \ bug497723.stderr.exp bug497723.post.exp bug497723.vgtest \ simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \ simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \ @@ -31,7 +35,7 @@ EXTRA_DIST = \ inline-crossfile.vgtest inline-crossfile.stderr.exp inline-crossfile.stdout.exp inline-crossfile.post.exp \ inline-crossfile-helper1.h inline-crossfile-helper2.h filter_inline -check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile +check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile runtime_obj_skip_c runtime_obj_skip_underflow AM_CFLAGS += $(AM_FLAG_M3264_PRI) AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) @@ -44,10 +48,29 @@ threads_LDADD = -lpthread # Shim loaded by runtime_obj_skip_py.py via ctypes. Built unconditionally; # the test's prereq skips it if the .so is missing. -check_DATA = runtime_obj_skip_py_shim.so +check_DATA = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so runtime_obj_skip_py_shim.so: runtime_obj_skip_py_shim.c $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ $< -o $@ -CLEANFILES = runtime_obj_skip_py_shim.so +# Shared lib for the runtime_obj_skip_c test. Lives in a separate ELF +# so the main binary can register its path for runtime obj-skip. +runtime_obj_skip_c_lib.so: runtime_obj_skip_c_lib.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ + +runtime_obj_skip_c_LDADD = -l:runtime_obj_skip_c_lib.so -ldl +runtime_obj_skip_c_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN' +runtime_obj_skip_c_DEPENDENCIES = runtime_obj_skip_c_lib.so + +# Shared lib for the runtime_obj_skip_underflow test. +runtime_obj_skip_underflow_lib.so: runtime_obj_skip_underflow_lib.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ + +runtime_obj_skip_underflow_LDADD = -l:runtime_obj_skip_underflow_lib.so -ldl +runtime_obj_skip_underflow_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN' +runtime_obj_skip_underflow_DEPENDENCIES = runtime_obj_skip_underflow_lib.so + +CLEANFILES = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so diff --git a/callgrind/tests/filter_stderr b/callgrind/tests/filter_stderr index fbaca9b1d..f728486fe 100755 --- a/callgrind/tests/filter_stderr +++ b/callgrind/tests/filter_stderr @@ -30,4 +30,8 @@ sed "/warning: L4 cache found, using its data for the LL simulation./d" | sed "/Warning: Cannot auto-detect cache config, using defaults./d" | sed "/Run with -v to see./d" | sed "/warning: specified LL cache: line_size .*$/d" | -sed "/warning: simulated LL cache: line_size .*$/d" +sed "/warning: simulated LL cache: line_size .*$/d" | + +# Drop callgrind diagnostic logs (verbose obj-skip / cxt / underflow tracing). +# These are chore-level diagnostics that vary by run/host and aren't assertions. +sed -E "/^(add_obj_to_skip|obj-skip list now has| \[[0-9]+\] '|fn_nodes already obj_skip_checked|instrument_state ->|new_fn_node:|obj_skip (HIT|miss):| vs \[[0-9]+\] strcmp=|push_cxt FORCED|reconstruct_call_stack:|=== python fn summary| fn='[^']*' obj=)/d" diff --git a/callgrind/tests/runtime_obj_skip_c.c b/callgrind/tests/runtime_obj_skip_c.c new file mode 100644 index 000000000..9e5e650bc --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.c @@ -0,0 +1,30 @@ +/* Minimal C reproducer for the runtime obj-skip leak: a fn from a + * skipped object ends up as a top-level fn= block in the callgrind + * output when it is the first BB instrumented after START. + * + * Strategy: register the lib for skip, then call into the lib BEFORE + * starting instrumentation. The lib itself calls + * CALLGRIND_START_INSTRUMENTATION mid-function, so the first BB + * processed by callgrind lives in the skipped object — which trips + * the (cxt == 0) push_cxt path that ignores the skip flag. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int n); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(1000); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_c.post.exp b/callgrind/tests/runtime_obj_skip_c.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_c.stderr.exp b/callgrind/tests/runtime_obj_skip_c.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_c.vgtest b/callgrind/tests/runtime_obj_skip_c.vgtest new file mode 100644 index 000000000..c5fffdc8f --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.vgtest @@ -0,0 +1,5 @@ +prereq: test -f runtime_obj_skip_c && test -f runtime_obj_skip_c_lib.so +prog-asis: ./runtime_obj_skip_c +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_c +post: sh -c 'test -f callgrind.out.runtime_obj_skip_c || { echo "FAIL: callgrind output file missing"; exit 1; }; leaked=$(grep "^fn=skipme_" callgrind.out.runtime_obj_skip_c); if [ -n "$leaked" ]; then echo "FAIL: skipped fn leaked into top-level fn= block:"; echo "$leaked"; exit 1; else echo OK; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_c diff --git a/callgrind/tests/runtime_obj_skip_c_lib.c b/callgrind/tests/runtime_obj_skip_c_lib.c new file mode 100644 index 000000000..83f9eebb1 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c_lib.c @@ -0,0 +1,27 @@ +/* Library that lives in a separate ELF object so the main binary + * can register its path for runtime obj-skip. + * + * skipme_run() flips instrumentation on from *inside* the skipped + * object, then calls skipme_func. This is the trigger for the + * `current_state.cxt == 0` push path in setup_bbcc: the very first + * BB after instrumentation start lives in a skipped object, so the + * (cxt==0) clause force-pushes a skipped fn as the new top context + * and it leaks into the dump as a top-level fn= block. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_func(int n) +{ + for (int i = 0; i < n; i++) sink += i; +} + +__attribute__((noinline)) +void skipme_run(int n) +{ + CALLGRIND_START_INSTRUMENTATION; + skipme_func(n); + CALLGRIND_STOP_INSTRUMENTATION; +} diff --git a/callgrind/tests/runtime_obj_skip_underflow.c b/callgrind/tests/runtime_obj_skip_underflow.c new file mode 100644 index 000000000..ffc1e6a6f --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.c @@ -0,0 +1,22 @@ +/* Driver for the underflow-channel obj-skip leak reproducer. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int depth); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(5); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_underflow.post.exp b/callgrind/tests/runtime_obj_skip_underflow.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_underflow.stderr.exp b/callgrind/tests/runtime_obj_skip_underflow.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_underflow.vgtest b/callgrind/tests/runtime_obj_skip_underflow.vgtest new file mode 100644 index 000000000..cbc3e65bf --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.vgtest @@ -0,0 +1,5 @@ +prereq: test -f runtime_obj_skip_underflow && test -f runtime_obj_skip_underflow_lib.so +prog-asis: ./runtime_obj_skip_underflow +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_underflow +post: sh -c 'test -f callgrind.out.runtime_obj_skip_underflow || { echo "FAIL: callgrind output file missing"; exit 1; }; leaked=$(grep "^fn=skipme_" callgrind.out.runtime_obj_skip_underflow); if [ -n "$leaked" ]; then echo "FAIL: skipped fn leaked into top-level fn= block:"; echo "$leaked"; exit 1; else echo OK; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_underflow diff --git a/callgrind/tests/runtime_obj_skip_underflow_lib.c b/callgrind/tests/runtime_obj_skip_underflow_lib.c new file mode 100644 index 000000000..abaf58cc6 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow_lib.c @@ -0,0 +1,37 @@ +/* Library that triggers the call-stack-underflow leak channel in + * callgrind obj-skip. + * + * Setup: recursive function in the skipped lib. Main calls in with + * instrumentation OFF, so callgrind's call stack is never populated. + * At the deepest frame, instrumentation is flipped ON. Each RET on + * the way back then sees csp == 0, hits handleUnderflow, resets + * cxt = 0, and force-pushes the current fn (which lives in the + * skipped lib) as the new top context — leaking N times for an + * N-deep stack. + * + * This is the same shape as Python 3.14's interpreter dispatch + * leaks: deep recursive eval-loop frames where instrumentation was + * started somewhere down the stack and every return pops past an + * empty callgrind stack. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_recurse(int n) +{ + if (n == 0) { + CALLGRIND_START_INSTRUMENTATION; + return; + } + skipme_recurse(n - 1); + sink += n; +} + +__attribute__((noinline)) +void skipme_run(int depth) +{ + skipme_recurse(depth); + CALLGRIND_STOP_INSTRUMENTATION; +}