diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 36b2300e1..737f129b8 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -513,6 +513,13 @@ static void handleUnderflow(BB* bb) CLG_(current_fn_stack).top--; CLG_(current_state).cxt = 0; caller = CLG_(get_fn_node)(bb); + + /* A (sentinel): if the fn we'd return into is itself skipped, push + * the (skipped) sentinel instead so the skipped fn doesn't surface + * as its own fn= block in the dump. */ + if (caller->skip) + caller = CLG_(get_skipped_sentinel)(); + CLG_(push_cxt)( caller ); if (!seen_before) { @@ -725,7 +732,11 @@ void CLG_(setup_bbcc)(BB* bb) } } - if (jmpkind == jk_Call) { + /* Check obj-skip on every BB entry, not only jk_Call. + * The interpreter / perf trampoline can enter functions via jk_Jump + * or fall-through; if we only checked on jk_Call, skip would never + * latch for those fns and they'd leak into the dump. */ + { fn_node* node = CLG_(get_fn_node)(bb); skip = node->skip; if (!skip && !node->obj_skip_checked){ @@ -794,9 +805,22 @@ void CLG_(setup_bbcc)(BB* bb) } } - /* Change new context if needed, taking delayed_push into account */ + /* Change new context if needed, taking delayed_push into account. + * + * The `cxt == 0` clause used to fire regardless of skip, which meant + * that on the first BB after instrumentation start / call-stack + * underflow, a skipped libpython fn would still be pushed as the new + * top context and appear as its own fn= block in the dump. + * + * Now: if the fn is skip, we substitute the skipped sentinel so the + * skipped fn doesn't appear as its own fn= block in the dump. */ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { - CLG_(push_cxt)(CLG_(get_fn_node)(bb)); + fn_node* push_fn = CLG_(get_fn_node)(bb); + /* A (sentinel): substitute the (skipped) sentinel so the + * skipped fn doesn't appear as its own fn= block in the dump. */ + if (skip && CLG_(current_state).cxt == 0) + push_fn = CLG_(get_skipped_sentinel)(); + CLG_(push_cxt)(push_fn); } CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); diff --git a/callgrind/dump.c b/callgrind/dump.c index 3a3164c4b..4fee29af2 100644 --- a/callgrind/dump.c +++ b/callgrind/dump.c @@ -1553,7 +1553,17 @@ static void print_bbccs_of_thread(thread_info* ti) } if (*p == 0) break; - + + /* Don't emit BBCCs whose top context fn is flagged for obj-skip. + * This happens when the (cxt == 0) clause in setup_bbcc force- + * pushes a skipped fn (first BB after instrumentation start that + * landed in a skipped object). Without this filter the skipped fn + * leaks into the dump as a top-level fn= block. */ + if ((*p)->cxt->fn[0]->skip) { + p++; + continue; + } + if (print_fn_pos(print_fp, &lastFnPos, *p)) { /* new function */ diff --git a/callgrind/fn.c b/callgrind/fn.c index efa5430de..2525063d8 100644 --- a/callgrind/fn.c +++ b/callgrind/fn.c @@ -307,6 +307,27 @@ void CLG_(init_obj_table)(void) obj_table[i] = 0; } +static fn_node* new_fn_node(const HChar *fnname, + file_node* file, fn_node* next); + +/* Singleton sentinel fn_node used as a placeholder cxt when we'd + * otherwise be forced to push a skipped fn into an empty (cxt == 0) + * context. Keeping skip == False on the sentinel itself is crucial: + * the (cxt == 0 && skip) check that would push it must NOT recurse + * on the sentinel. */ +static fn_node* skipped_sentinel = NULL; + +fn_node* CLG_(get_skipped_sentinel)(void) +{ + if (skipped_sentinel) return skipped_sentinel; + + obj_node* obj = CLG_(get_obj_node)(NULL); /* anonymous "???" obj */ + file_node* file = CLG_(get_file_node)(obj, "", "(callgrind-internal)"); + skipped_sentinel = new_fn_node("(skipped)", file, NULL); + skipped_sentinel->skip = False; + return skipped_sentinel; +} + #define HASH_CONSTANT 256 static UInt str_hash(const HChar *s, UInt table_size) diff --git a/callgrind/global.h b/callgrind/global.h index c2fda1cce..730e665c9 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -723,6 +723,7 @@ void CLG_(set_current_fn_array)(fn_array*); UInt* CLG_(get_fn_entry)(Int n); void CLG_(init_obj_table)(void); +fn_node* CLG_(get_skipped_sentinel)(void); obj_node* CLG_(get_obj_node)(DebugInfo* si); file_node* CLG_(get_file_node)(obj_node*, const HChar *dirname, const HChar* filename); diff --git a/callgrind/tests/runtime_obj_skip_underflow.c b/callgrind/tests/runtime_obj_skip_underflow.c new file mode 100644 index 000000000..ffc1e6a6f --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.c @@ -0,0 +1,22 @@ +/* Driver for the underflow-channel obj-skip leak reproducer. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int depth); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(5); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_underflow_lib.c b/callgrind/tests/runtime_obj_skip_underflow_lib.c new file mode 100644 index 000000000..abaf58cc6 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow_lib.c @@ -0,0 +1,37 @@ +/* Library that triggers the call-stack-underflow leak channel in + * callgrind obj-skip. + * + * Setup: recursive function in the skipped lib. Main calls in with + * instrumentation OFF, so callgrind's call stack is never populated. + * At the deepest frame, instrumentation is flipped ON. Each RET on + * the way back then sees csp == 0, hits handleUnderflow, resets + * cxt = 0, and force-pushes the current fn (which lives in the + * skipped lib) as the new top context — leaking N times for an + * N-deep stack. + * + * This is the same shape as Python 3.14's interpreter dispatch + * leaks: deep recursive eval-loop frames where instrumentation was + * started somewhere down the stack and every return pops past an + * empty callgrind stack. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_recurse(int n) +{ + if (n == 0) { + CALLGRIND_START_INSTRUMENTATION; + return; + } + skipme_recurse(n - 1); + sink += n; +} + +__attribute__((noinline)) +void skipme_run(int depth) +{ + skipme_recurse(depth); + CALLGRIND_STOP_INSTRUMENTATION; +}