Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
/autom4te.cache
/bin
/cachegrind.out.*
/callgrind.out.*
/compile
/config.guess
/config.h*
Expand Down Expand Up @@ -155,11 +156,18 @@
/callgrind/tests/Makefile
/callgrind/tests/Makefile.in
/callgrind/tests/callgrind.out.*
/callgrind/tests/vgcore.*
/callgrind/tests/clreq
/callgrind/tests/simwork
/callgrind/tests/threads
/callgrind/tests/inline-samefile
/callgrind/tests/inline-crossfile
/callgrind/tests/find_debuginfo
/callgrind/tests/runtime_obj_skip_c
/callgrind/tests/runtime_obj_skip_underflow
/callgrind/tests/*.so
/callgrind/tests/out_dbg
/callgrind/tests/test-suite-overall.log

# /coregrind/
/coregrind/*.a
Expand Down Expand Up @@ -2550,3 +2558,6 @@ none/tests/freebsd/bug499212
*.vgtest*.log
/test-suite-overall.log
test-suite.log

# autoconf backup
/configure~
22 changes: 12 additions & 10 deletions callgrind/bbcc.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,7 @@ static void resize_bbcc_hash(void)
}


static __inline
BBCC** new_recursion(int size)
BBCC** CLG_(new_recursion)(int size)
{
BBCC** bbccs;
int i;
Expand Down Expand Up @@ -313,8 +312,7 @@ BBCC* new_bbcc(BB* bb)
* Recursion level doesn't need to be set as this is not included
* in the hash key: Only BBCCs with rec level 0 are in hashes.
*/
static
void insert_bbcc_into_hash(BBCC* bbcc)
void CLG_(insert_bbcc_into_hash)(BBCC* bbcc)
{
UInt idx;

Expand Down Expand Up @@ -389,10 +387,10 @@ static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)

bbcc->rec_index = 0;
bbcc->cxt = cxt;
bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
bbcc->rec_array = CLG_(new_recursion)(cxt->fn[0]->separate_recursions);
bbcc->rec_array[0] = bbcc;

insert_bbcc_into_hash(bbcc);
CLG_(insert_bbcc_into_hash)(bbcc);
}
else {
if (CLG_(clo).separate_threads)
Expand Down Expand Up @@ -513,16 +511,20 @@ static void handleUnderflow(BB* bb)
CLG_(current_fn_stack).top--;
CLG_(current_state).cxt = 0;
caller = CLG_(get_fn_node)(bb);
CLG_DEBUG(1, " underflow reset: cxt=0, BB=%#lx, fn-about-to-push='%s' "
"obj='%s' skip=%d\n",
bb_addr(bb), caller->name,
caller->file->obj->name, caller->skip);
CLG_(push_cxt)( caller );

if (!seen_before) {
/* set rec array for source BBCC: this is at rec level 1 */
source_bbcc->rec_array = new_recursion(caller->separate_recursions);
source_bbcc->rec_array = CLG_(new_recursion)(caller->separate_recursions);
source_bbcc->rec_array[0] = source_bbcc;

CLG_ASSERT(source_bbcc->cxt == 0);
source_bbcc->cxt = CLG_(current_state).cxt;
insert_bbcc_into_hash(source_bbcc);
CLG_(insert_bbcc_into_hash)(source_bbcc);
}
CLG_ASSERT(CLG_(current_state).bbcc);

Expand Down Expand Up @@ -807,10 +809,10 @@ void CLG_(setup_bbcc)(BB* bb)

bbcc->cxt = CLG_(current_state).cxt;
bbcc->rec_array =
new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
CLG_(new_recursion)((*CLG_(current_fn_stack).top)->separate_recursions);
bbcc->rec_array[0] = bbcc;

insert_bbcc_into_hash(bbcc);
CLG_(insert_bbcc_into_hash)(bbcc);
}
else {
/* get BBCC with current context */
Expand Down
94 changes: 94 additions & 0 deletions callgrind/callstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*/

#include "global.h"
#include "pub_tool_stacktrace.h"

/*------------------------------------------------------------*/
/*--- Call stack, operations ---*/
Expand Down Expand Up @@ -361,6 +362,18 @@ void CLG_(pop_call_stack)(void)

if (depth == 0) function_left(to_fn);
}
else if (lower_entry->cxt != 0) {
/* Seeded entry from reconstruct_call_stack_from_native: jcc=0
* (skip-style) but push_cxt was called, so cxt was changed.
* Restore it here so the seeded frame doesn't stay stuck on
* top of the cxt chain and phantom-parent every subsequent
* call from the real caller. Real skip-entries
* (push_call_stack(skip=True) without a prior push_cxt) have
* lower_entry->cxt==0 and skip this branch. */
CLG_(current_state).cxt = lower_entry->cxt;
CLG_(current_fn_stack).top =
CLG_(current_fn_stack).bottom + lower_entry->fn_sp;
}

/* To allow for an assertion in push_call_stack() */
lower_entry->cxt = 0;
Expand Down Expand Up @@ -433,3 +446,84 @@ Int CLG_(unwind_call_stack)(Addr sp, Int minpops)
CLG_DEBUG(4,"- unwind_call_stack\n");
return unwind_count;
}


/* Seed callgrind's shadow call stack from the client's native stack so a
* later `ret` past unseen frames pops cleanly instead of underflowing.
*
* Called on the OFF->ON instrumentation transition: the client (e.g.
* pytest_codspeed) typically reaches CALLGRIND_START_INSTRUMENTATION several
* libpython frames deep. Without seeding, csp stays at 0 while the real
* stack is non-empty, and every subsequent ret trips handleUnderflow and
* leaks the returned-into fn as a top-level fn= block.
*
* We push a (jcc=0, skip-style) call_entry for every native frame so
* SP-based unwind works. For frames that should appear in the output
* (non-skipped, non-anonymous) we also call push_cxt to seed the context
* chain; pop_call_stack has an else-if branch to restore cxt from these
* entries when they are unwound. Skipped and anonymous (JIT) frames are
* deliberately excluded from the cxt chain — they get SP-only entries. */
#define CLG_RECON_MAX_FRAMES 256

void CLG_(reconstruct_call_stack_from_native)(ThreadId tid)
{
Addr ips[CLG_RECON_MAX_FRAMES];
Addr sps[CLG_RECON_MAX_FRAMES];
call_stack* cs = &CLG_(current_call_stack);

if (cs->sp != 0) return;

UInt n = VG_(get_StackTrace)(tid, ips, CLG_RECON_MAX_FRAMES, sps, NULL, 0);
if (n == 0) return;

/* Push bottom-up: oldest caller first, current frame last. */
for (Int frame = n - 1; frame >= 0; frame--) {
fn_node* fn = CLG_(get_fn_node_for_addr)(ips[frame]);

/* Latch obj-skip on first encounter, matching bbcc.c's check. */
if (!fn->obj_skip_checked) {
const HChar* obj = fn->file->obj->name;
for (Int j = 0; j < CLG_(clo).objs_to_skip_count; j++) {
if (VG_(strcmp)(obj, CLG_(clo).objs_to_skip[j]) == 0) {
fn->skip = True;
break;
}
}
fn->obj_skip_checked = True;
}

/* Grow the stack before push_cxt, which asserts cs->sp < cs->size
* and writes to entry[cs->sp] — matching push_call_stack's order so
* the invariant holds regardless of CLG_RECON_MAX_FRAMES. */
ensure_stack_size(cs->sp + 1);

/* Seed a cxt for every non-skipped frame. JIT frames are named via
* the perf-map resolver in fn.c (get_debug_info), so the root frame
* (__codspeed_root_frame__) gets a real name here instead of "???".
* Seeding a cxt also leaves current_state.cxt non-empty at START so
* the `cxt == 0` clause in setup_bbcc does not force-push the first
* (skipped) libpython/interpreter frame as a top-level node.
* Skipped (obj-skip) frames get SP-only entries — invisible in cxt. */
if (!fn->skip)
CLG_(push_cxt)(fn);

call_entry* ce = &cs->entry[cs->sp];
Comment thread
greptile-apps[bot] marked this conversation as resolved.
ce->jcc = 0;
ce->nonskipped = 0;

/* callgrind pops a frame when SP >= ce->sp, where ce->sp must be the
* frame's *entry* SP (the SP at which its caller made the call). The
* unwinder reports each frame's *own* SP (its call site into the next
* inner frame), which is lower; using sps[frame] would pop this frame
* the moment one of its own sub-calls returns (e.g. the START client
* request returning into __codspeed_root_frame__), re-parenting the
* workload onto the frame above. The entry SP is the caller's reported
* SP, sps[frame+1]; the outermost frame keeps its own SP as nothing
* returns past it during measurement. */
ce->sp = (frame + 1 < (Int)n) ? sps[frame + 1] : sps[frame];
ce->ret_addr = (frame + 1 < (Int)n) ? ips[frame + 1] : 0;
cs->sp++;
ensure_stack_size(cs->sp + 1);
cs->entry[cs->sp].cxt = 0;
}
}
35 changes: 35 additions & 0 deletions callgrind/fn.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,41 @@ fn_node* get_fn_node_inseg(DebugInfo* di,
}


/* Resolve a raw code address to a fn_node, creating obj/file/fn entries if
* needed. Addresses without DebugInfo (anonymous JIT mappings, ld glue)
* resolve to the shared `???`/anonymous obj. Used by the START-instrumentation
* stack reconstruction path, which has IPs but no BBs. */
fn_node* CLG_(get_fn_node_for_addr)(Addr ip)
{
const HChar *dirname, *filename, *fnname;
UInt line_num;
DebugInfo* di;

CLG_(get_debug_info)(ip, &dirname, &filename, &fnname, &line_num, &di);

/* Mirror CLG_(get_fn_node)()'s BB-path fallback: when there is no symbol
* (anonymous JIT / stripped code) get_debug_info yields the literal "???".
* Emit the object-relative address instead — identical to the bb->offset
* string the execution path produces — so the frame is a distinct,
* backend-symbolicatable "0x..." node rather than collapsing into the one
* shared "???" node (which loses the address). For anonymous JIT code the
* text bias is 0, so this is the absolute address that perf-<pid>.map keys
* on; the backend resolves it from the perf map. Used by the START stack
* reconstruction, which has the raw IP but no BB. */
if (0 == VG_(strcmp)(fnname, "???")) {
HChar buf[32]; /* copied by get_fn_node_infile via strdup */
Addr off = ip - (di ? VG_(DebugInfo_get_text_bias)(di) : 0);
if (sizeof(Addr) == 4)
VG_(sprintf)(buf, "%#08lx", (UWord)off);
else
VG_(sprintf)(buf, "%#016lx", (UWord)off);
fnname = buf;
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.

return get_fn_node_inseg(di, dirname, filename, fnname);
}


Bool CLG_(get_debug_info)(Addr instr_addr,
const HChar **dir,
const HChar **file,
Expand Down
4 changes: 4 additions & 0 deletions callgrind/global.h
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ obj_node* CLG_(get_obj_node)(DebugInfo* si);
file_node* CLG_(get_file_node)(obj_node*, const HChar *dirname,
const HChar* filename);
fn_node* CLG_(get_fn_node)(BB* bb);
fn_node* CLG_(get_fn_node_for_addr)(Addr ip);

/* from bbcc.c */
void CLG_(init_bbcc_hash)(bbcc_hash* bbccs);
Expand All @@ -736,6 +737,8 @@ void CLG_(set_current_bbcc_hash)(bbcc_hash*);
void CLG_(forall_bbccs)(void (*func)(BBCC*));
void CLG_(zero_bbcc)(BBCC* bbcc);
BBCC* CLG_(get_bbcc)(BB* bb);
BBCC** CLG_(new_recursion)(int size);
void CLG_(insert_bbcc_into_hash)(BBCC* bbcc);
BBCC* CLG_(clone_bbcc)(BBCC* orig, Context* cxt, Int rec_index);
void CLG_(setup_bbcc)(BB* bb) VG_REGPARM(1);

Expand All @@ -755,6 +758,7 @@ call_entry* CLG_(get_call_entry)(Int n);
void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip);
void CLG_(pop_call_stack)(void);
Int CLG_(unwind_call_stack)(Addr sp, Int);
void CLG_(reconstruct_call_stack_from_native)(ThreadId tid);

/* from context.c */
void CLG_(init_fn_stack)(fn_stack*);
Expand Down
1 change: 1 addition & 0 deletions callgrind/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,7 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)

case VG_USERREQ__START_INSTRUMENTATION:
CLG_(set_instrument_state)("Client Request", True);
CLG_(reconstruct_call_stack_from_native)(tid);
*ret = 0; /* meaningless */
break;

Expand Down
29 changes: 26 additions & 3 deletions callgrind/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ EXTRA_DIST = \
find_debuginfo.vgtest find_debuginfo.stderr.exp find_debuginfo.post.exp \
runtime_obj_skip_py.vgtest runtime_obj_skip_py.stderr.exp runtime_obj_skip_py.post.exp \
runtime_obj_skip_py.py runtime_obj_skip_py_shim.c \
runtime_obj_skip_c.vgtest runtime_obj_skip_c.stderr.exp runtime_obj_skip_c.post.exp \
runtime_obj_skip_c.c runtime_obj_skip_c_lib.c \
runtime_obj_skip_underflow.vgtest runtime_obj_skip_underflow.stderr.exp runtime_obj_skip_underflow.post.exp \
runtime_obj_skip_underflow.c runtime_obj_skip_underflow_lib.c \
bug497723.stderr.exp bug497723.post.exp bug497723.vgtest \
simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \
simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \
Expand All @@ -31,7 +35,7 @@ EXTRA_DIST = \
inline-crossfile.vgtest inline-crossfile.stderr.exp inline-crossfile.stdout.exp inline-crossfile.post.exp \
inline-crossfile-helper1.h inline-crossfile-helper2.h filter_inline

check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile
check_PROGRAMS = clreq find_debuginfo simwork threads inline-samefile inline-crossfile runtime_obj_skip_c runtime_obj_skip_underflow

AM_CFLAGS += $(AM_FLAG_M3264_PRI)
AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
Expand All @@ -44,10 +48,29 @@ threads_LDADD = -lpthread

# Shim loaded by runtime_obj_skip_py.py via ctypes. Built unconditionally;
# the test's prereq skips it if the .so is missing.
check_DATA = runtime_obj_skip_py_shim.so
check_DATA = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so

runtime_obj_skip_py_shim.so: runtime_obj_skip_py_shim.c
$(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \
$< -o $@

CLEANFILES = runtime_obj_skip_py_shim.so
# Shared lib for the runtime_obj_skip_c test. Lives in a separate ELF
# so the main binary can register its path for runtime obj-skip.
runtime_obj_skip_c_lib.so: runtime_obj_skip_c_lib.c
$(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \
$< -o $@

runtime_obj_skip_c_LDADD = -l:runtime_obj_skip_c_lib.so -ldl
runtime_obj_skip_c_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN'
runtime_obj_skip_c_DEPENDENCIES = runtime_obj_skip_c_lib.so

# Shared lib for the runtime_obj_skip_underflow test.
runtime_obj_skip_underflow_lib.so: runtime_obj_skip_underflow_lib.c
$(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \
$< -o $@

runtime_obj_skip_underflow_LDADD = -l:runtime_obj_skip_underflow_lib.so -ldl
runtime_obj_skip_underflow_LDFLAGS = $(AM_LDFLAGS) -L. -Wl,-rpath,'$$ORIGIN'
runtime_obj_skip_underflow_DEPENDENCIES = runtime_obj_skip_underflow_lib.so

CLEANFILES = runtime_obj_skip_py_shim.so runtime_obj_skip_c_lib.so runtime_obj_skip_underflow_lib.so
6 changes: 5 additions & 1 deletion callgrind/tests/filter_stderr
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ sed "/warning: L4 cache found, using its data for the LL simulation./d" |
sed "/Warning: Cannot auto-detect cache config, using defaults./d" |
sed "/Run with -v to see./d" |
sed "/warning: specified LL cache: line_size .*$/d" |
sed "/warning: simulated LL cache: line_size .*$/d"
sed "/warning: simulated LL cache: line_size .*$/d" |

# Drop callgrind diagnostic logs (verbose obj-skip / cxt / underflow tracing).
# These are chore-level diagnostics that vary by run/host and aren't assertions.
sed -E "/^(add_obj_to_skip|obj-skip list now has| \[[0-9]+\] '|fn_nodes already obj_skip_checked|instrument_state ->|new_fn_node:|obj_skip (HIT|miss):| vs \[[0-9]+\] strcmp=|push_cxt FORCED|reconstruct_call_stack:|=== python fn summary| fn='[^']*' obj=)/d"
30 changes: 30 additions & 0 deletions callgrind/tests/runtime_obj_skip_c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* Minimal C reproducer for the runtime obj-skip leak: a fn from a
* skipped object ends up as a top-level fn= block in the callgrind
* output when it is the first BB instrumented after START.
*
* Strategy: register the lib for skip, then call into the lib BEFORE
* starting instrumentation. The lib itself calls
* CALLGRIND_START_INSTRUMENTATION mid-function, so the first BB
* processed by callgrind lives in the skipped object — which trips
* the (cxt == 0) push_cxt path that ignores the skip flag. */

#define _GNU_SOURCE
#include <dlfcn.h>
#include <stdio.h>
#include "../callgrind.h"

extern void skipme_run(int n);

int main(void)
{
Dl_info info;
if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) {
fprintf(stderr, "dladdr failed\n");
return 1;
}
CALLGRIND_ADD_OBJ_SKIP(info.dli_fname);

skipme_run(1000);

return 0;
}
1 change: 1 addition & 0 deletions callgrind/tests/runtime_obj_skip_c.post.exp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
6 changes: 6 additions & 0 deletions callgrind/tests/runtime_obj_skip_c.stderr.exp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@


Events : Ir
Collected :

I refs:
5 changes: 5 additions & 0 deletions callgrind/tests/runtime_obj_skip_c.vgtest
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
prereq: test -f runtime_obj_skip_c && test -f runtime_obj_skip_c_lib.so
prog-asis: ./runtime_obj_skip_c
vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_c
post: sh -c 'test -f callgrind.out.runtime_obj_skip_c || { echo "FAIL: callgrind output file missing"; exit 1; }; leaked=$(grep "^fn=skipme_" callgrind.out.runtime_obj_skip_c); if [ -n "$leaked" ]; then echo "FAIL: skipped fn leaked into top-level fn= block:"; echo "$leaked"; exit 1; else echo OK; fi'
cleanup: rm -f callgrind.out.runtime_obj_skip_c
Loading
Loading