diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 36b2300e1..c8d5b39a4 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -287,6 +287,7 @@ BBCC* new_bbcc(BB* bb) bbcc->jmp[i].jcc_list = 0; } bbcc->ecounter_sum = 0; + bbcc->from_underflow = False; /* Init pointer caches (LRU) */ bbcc->lru_next_bbcc = 0; @@ -502,17 +503,27 @@ static void handleUnderflow(BB* bb) source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); source_bbcc = CLG_(get_bbcc)(source_bb); + /* Mark this synthesized BBCC so dump.c discards it: an underflow + * means we never saw the real caller, so any phantom frame and its + * accumulated cost should not leak into the .out file. This also + * covers the case where the synthesized caller resolves to an + * --obj-skip fn (e.g. CPython's eval frame). */ + source_bbcc->from_underflow = True; + /* seen_before can be true if RET from a signal handler */ if (!seen_before) { - source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; + source_bbcc->ecounter_sum = 0; } - else if (CLG_(current_state).collect) - source_bbcc->ecounter_sum++; /* Force a new top context, will be set active by push_cxt() */ CLG_(current_fn_stack).top--; CLG_(current_state).cxt = 0; caller = CLG_(get_fn_node)(bb); + VG_(message)(Vg_UserMsg, + "underflow reset: cxt=0, BB=%#lx, fn-about-to-push='%s' " + "obj='%s' skip=%d\n", + bb_addr(bb), caller->name, + caller->file->obj->name, caller->skip); CLG_(push_cxt)( caller ); if (!seen_before) { @@ -542,13 +553,32 @@ static void handleUnderflow(BB* bb) (Addr)-1, False); call_entry_up = &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); - /* assume this call is lasting since last dump or - * for a signal handler since it's call */ - if (CLG_(current_state).sig == 0) - CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, - CLG_(get_current_thread)()->lastdump_cost ); - else - CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); + /* Discard cost accumulated since last dump: attributing it to a + * phantom underflow caller produces bogus inclusive cost on a frame + * that won't be emitted anyway. */ + CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); +} + + +/* Resolve --obj-skip for a fn lazily: if the fn's containing object + * was registered for skipping (via CLO or CALLGRIND_ADD_OBJ_SKIP) and + * we haven't yet propagated that to fn->skip, do so now. Returns the + * final skip state. Cheap on the hot path due to obj_skip_checked. */ +static Bool resolve_obj_skip(fn_node* node) +{ + if (node->skip) return True; + if (node->obj_skip_checked) return False; + + HChar* obj_name = node->file->obj->name; + for (int i = 0; i < CLG_(clo).objs_to_skip_count; i++) { + if (VG_(strcmp)(obj_name, CLG_(clo).objs_to_skip[i]) == 0) { + node->skip = True; + node->obj_skip_checked = True; + return True; + } + } + node->obj_skip_checked = True; + return False; } @@ -726,21 +756,7 @@ void CLG_(setup_bbcc)(BB* bb) } if (jmpkind == jk_Call) { - fn_node* node = CLG_(get_fn_node)(bb); - skip = node->skip; - if (!skip && !node->obj_skip_checked){ - HChar* obj_name = node->file->obj->name; - // VG_(printf)(" %s\n", obj_name); - for (int i=0; iskip = True; - skip = True; - break; - } - } - node->obj_skip_checked = True; - } + skip = resolve_obj_skip(CLG_(get_fn_node)(bb)); } CLG_DEBUGIF(1) { @@ -794,9 +810,17 @@ void CLG_(setup_bbcc)(BB* bb) } } - /* Change new context if needed, taking delayed_push into account */ + /* Change new context if needed, taking delayed_push into account. + * Also resolve --obj-skip on the force-push (cxt==0) path so that + * the BBCC ends up with fn->skip set and gets dropped at dump time. + * Without this the fn->skip propagation only runs from the jk_Call + * branch above, and any first BB after START_INSTRUMENTATION that + * lives in a skipped object leaks into the .out file. */ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { - CLG_(push_cxt)(CLG_(get_fn_node)(bb)); + fn_node* node = CLG_(get_fn_node)(bb); + if (CLG_(current_state).cxt == 0) + resolve_obj_skip(node); + CLG_(push_cxt)(node); } CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); diff --git a/callgrind/dump.c b/callgrind/dump.c index 3a3164c4b..bb8190877 100644 --- a/callgrind/dump.c +++ b/callgrind/dump.c @@ -1553,7 +1553,28 @@ static void print_bbccs_of_thread(thread_info* ti) } if (*p == 0) break; - + + /* Don't emit BBCCs whose top context fn is flagged for obj-skip. + * This happens when the (cxt == 0) clause in setup_bbcc force- + * pushes a skipped fn (first BB after instrumentation start that + * landed in a skipped object). Without this filter the skipped fn + * leaks into the dump as a top-level fn= block. */ + if ((*p)->cxt->fn[0]->skip) { + p++; + continue; + } + + /* Drop BBCCs synthesized by handleUnderflow. These are phantom + * callers fabricated when a RET happens with no matching CALL on + * the shadow stack; the caller fn is a guess (return addr - 1), + * the cost is whatever accumulated since the last dump, and the + * fn may even resolve to an --obj-skip target. None of that + * belongs in the output. */ + if ((*p)->from_underflow) { + p++; + continue; + } + if (print_fn_pos(print_fp, &lastFnPos, *p)) { /* new function */ diff --git a/callgrind/global.h b/callgrind/global.h index c2fda1cce..e7d98d0d0 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -389,6 +389,7 @@ struct _BBCC { BBCC* next; /* entry chain in hash */ ULong* cost; /* start of 64bit costs for this BBCC */ ULong ecounter_sum; /* execution counter for first instruction of BB */ + Bool from_underflow; /* synthesized by handleUnderflow: drop at dump */ JmpData jmp[0]; }; diff --git a/callgrind/main.c b/callgrind/main.c index 3761c1448..ee8aa2102 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -1453,6 +1453,13 @@ void CLG_(set_instrument_state)(const HChar* reason, Bool state) reason, state ? "ON" : "OFF"); return; } + VG_(message)(Vg_UserMsg, + "instrument_state -> %s (reason='%s', cxt=%p, " + "fn_stack_depth=%ld)\n", + state ? "ON" : "OFF", reason, + (void*)CLG_(current_state).cxt, + (long)(CLG_(current_fn_stack).top - + CLG_(current_fn_stack).bottom)); CLG_(instrument_state) = state; CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", reason, state ? "ON" : "OFF"); diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am index b6dc3de89..e916bb97c 100644 --- a/callgrind/tests/Makefile.am +++ b/callgrind/tests/Makefile.am @@ -13,6 +13,8 @@ EXTRA_DIST = \ find_debuginfo.vgtest find_debuginfo.stderr.exp find_debuginfo.post.exp \ runtime_obj_skip_py.vgtest runtime_obj_skip_py.stderr.exp runtime_obj_skip_py.post.exp \ runtime_obj_skip_py.py runtime_obj_skip_py_shim.c \ + runtime_obj_skip_c.vgtest runtime_obj_skip_c.stderr.exp runtime_obj_skip_c.post.exp \ + runtime_obj_skip_c.c runtime_obj_skip_c_lib.c \ bug497723.stderr.exp bug497723.post.exp bug497723.vgtest \ simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \ simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \ @@ -31,7 +33,7 @@ EXTRA_DIST = \ inline-crossfile.vgtest inline-crossfile.stderr.exp inline-crossfile.stdout.exp inline-crossfile.post.exp \ inline-crossfile-helper1.h inline-crossfile-helper2.h filter_inline -check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile +check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile runtime_obj_skip_c AM_CFLAGS += $(AM_FLAG_M3264_PRI) AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) @@ -44,10 +46,21 @@ threads_LDADD = -lpthread # Shim loaded by runtime_obj_skip_py.py via ctypes. Built unconditionally; # the test's prereq skips it if the .so is missing. -check_DATA = runtime_obj_skip_py_shim.so +check_DATA = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_py_shim.so: runtime_obj_skip_py_shim.c $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ $< -o $@ -CLEANFILES = runtime_obj_skip_py_shim.so +# Shared lib for the runtime_obj_skip_c test. Lives in a separate ELF +# so the main binary can register its path for runtime obj-skip. +runtime_obj_skip_c_lib.so: runtime_obj_skip_c_lib.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ -ldl + +runtime_obj_skip_c_LDADD = -ldl +runtime_obj_skip_c_LDFLAGS = $(AM_LDFLAGS) -L. -l:runtime_obj_skip_c_lib.so \ + -Wl,-rpath,'$$ORIGIN' +runtime_obj_skip_c_DEPENDENCIES = runtime_obj_skip_c_lib.so + +CLEANFILES = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so diff --git a/callgrind/tests/runtime_obj_skip_c.c b/callgrind/tests/runtime_obj_skip_c.c new file mode 100644 index 000000000..543e74896 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.c @@ -0,0 +1,32 @@ +/* Minimal C reproducer for the runtime obj-skip leak: a fn from a + * skipped object ends up as a top-level fn= block in the callgrind + * output when it is the first BB instrumented after START. + * + * Strategy: register the lib for skip, then call into the lib BEFORE + * starting instrumentation. The lib itself calls + * CALLGRIND_START_INSTRUMENTATION mid-function, so the first BB + * processed by callgrind lives in the skipped object — which trips + * the (cxt == 0) push_cxt path that ignores the skip flag. */ + +#include +#include "../callgrind.h" + +extern void skipme_run(int n); +extern const char* skipme_self_path(void); + +int main(void) +{ + /* Resolve the lib's path from *inside* the lib: taking &skipme_run + * here gives a PLT stub in the main binary, whose dladdr returns + * the main binary path — registering the wrong object for skip. */ + const char* path = skipme_self_path(); + if (!path) { + fprintf(stderr, "skipme_self_path failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(path); + + skipme_run(1000); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_c.post.exp b/callgrind/tests/runtime_obj_skip_c.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_c.stderr.exp b/callgrind/tests/runtime_obj_skip_c.stderr.exp new file mode 100644 index 000000000..e69de29bb diff --git a/callgrind/tests/runtime_obj_skip_c.vgtest b/callgrind/tests/runtime_obj_skip_c.vgtest new file mode 100644 index 000000000..2895e5141 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c.vgtest @@ -0,0 +1,5 @@ +prereq: test -f runtime_obj_skip_c && test -f runtime_obj_skip_c_lib.so +prog-asis: ./runtime_obj_skip_c +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_c +post: sh -c 'if grep -q "^fn=skipme" callgrind.out.runtime_obj_skip_c; then echo "FAIL: skipme_* leaked into top-level fn= block"; else echo OK; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_c diff --git a/callgrind/tests/runtime_obj_skip_c_lib.c b/callgrind/tests/runtime_obj_skip_c_lib.c new file mode 100644 index 000000000..d7d9a8a23 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_c_lib.c @@ -0,0 +1,40 @@ +/* Library that lives in a separate ELF object so the main binary + * can register its path for runtime obj-skip. + * + * skipme_run() flips instrumentation on from *inside* the skipped + * object, then calls skipme_func. This is the trigger for the + * `current_state.cxt == 0` push path in setup_bbcc: the very first + * BB after instrumentation start lives in a skipped object, so the + * (cxt==0) clause force-pushes a skipped fn as the new top context + * and it leaks into the dump as a top-level fn= block. */ + +#define _GNU_SOURCE +#include +#include "../callgrind.h" + +volatile long sink; + +/* Returns the path of the .so this code is compiled into. Done inside + * the lib because taking &skipme_run from the main binary yields a + * PLT stub address whose dladdr resolves to the main binary, not the + * lib — which would register the wrong object for skip. */ +const char* skipme_self_path(void) +{ + Dl_info info; + if (dladdr((void*)&skipme_self_path, &info) == 0) return 0; + return info.dli_fname; +} + +__attribute__((noinline)) +void skipme_func(int n) +{ + for (int i = 0; i < n; i++) sink += i; +} + +__attribute__((noinline)) +void skipme_run(int n) +{ + CALLGRIND_START_INSTRUMENTATION; + skipme_func(n); + CALLGRIND_STOP_INSTRUMENTATION; +}