mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 00:16:11 +00:00
918 lines
25 KiB
C
918 lines
25 KiB
C
/*--------------------------------------------------------------------*/
|
|
/*--- Callgrind ---*/
|
|
/*--- bbcc.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Callgrind, a Valgrind tool for call tracing.
|
|
|
|
Copyright (C) 2002-2015, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "global.h"
|
|
#include "costs.h"
|
|
|
|
#include "pub_tool_threadstate.h"
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- BBCC operations ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
#define N_BBCC_INITIAL_ENTRIES 10437
|
|
|
|
/* BBCC table (key is BB/Context), per thread, resizable */
|
|
bbcc_hash current_bbccs;
|
|
|
|
void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
|
|
{
|
|
Int i;
|
|
|
|
CLG_ASSERT(bbccs != 0);
|
|
|
|
bbccs->size = N_BBCC_INITIAL_ENTRIES;
|
|
bbccs->entries = 0;
|
|
bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
|
|
bbccs->size * sizeof(BBCC*));
|
|
|
|
for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
|
|
}
|
|
|
|
void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
|
|
{
|
|
CLG_ASSERT(dst != 0);
|
|
|
|
dst->size = current_bbccs.size;
|
|
dst->entries = current_bbccs.entries;
|
|
dst->table = current_bbccs.table;
|
|
}
|
|
|
|
bbcc_hash* CLG_(get_current_bbcc_hash)()
|
|
{
|
|
return ¤t_bbccs;
|
|
}
|
|
|
|
void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
|
|
{
|
|
CLG_ASSERT(h != 0);
|
|
|
|
current_bbccs.size = h->size;
|
|
current_bbccs.entries = h->entries;
|
|
current_bbccs.table = h->table;
|
|
}
|
|
|
|
/*
|
|
* Zero all costs of a BBCC
|
|
*/
|
|
void CLG_(zero_bbcc)(BBCC* bbcc)
|
|
{
|
|
Int i;
|
|
jCC* jcc;
|
|
|
|
CLG_ASSERT(bbcc->cxt != 0);
|
|
CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %u "
|
|
"(fn '%s', rec %u)\n",
|
|
bb_addr(bbcc->bb),
|
|
bbcc->cxt->base_number + bbcc->rec_index,
|
|
bbcc->cxt->fn[0]->name,
|
|
bbcc->rec_index);
|
|
|
|
if ((bbcc->ecounter_sum ==0) &&
|
|
(bbcc->ret_counter ==0)) return;
|
|
|
|
for(i=0;i<bbcc->bb->cost_count;i++)
|
|
bbcc->cost[i] = 0;
|
|
for(i=0;i <= bbcc->bb->cjmp_count;i++) {
|
|
bbcc->jmp[i].ecounter = 0;
|
|
for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
|
|
CLG_(init_cost)( CLG_(sets).full, jcc->cost );
|
|
}
|
|
bbcc->ecounter_sum = 0;
|
|
bbcc->ret_counter = 0;
|
|
}
|
|
|
|
|
|
|
|
void CLG_(forall_bbccs)(void (*func)(BBCC*))
|
|
{
|
|
BBCC *bbcc, *bbcc2;
|
|
int i, j;
|
|
|
|
for (i = 0; i < current_bbccs.size; i++) {
|
|
if ((bbcc=current_bbccs.table[i]) == NULL) continue;
|
|
while (bbcc) {
|
|
/* every bbcc should have a rec_array */
|
|
CLG_ASSERT(bbcc->rec_array != 0);
|
|
|
|
for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
|
|
if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
|
|
|
|
(*func)(bbcc2);
|
|
}
|
|
bbcc = bbcc->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* All BBCCs for recursion level 0 are inserted into a
|
|
* thread specific hash table with key
|
|
* - address of BB structure (unique, as never freed)
|
|
* - current context (includes caller chain)
|
|
* BBCCs for other recursion levels are in bbcc->rec_array.
|
|
*
|
|
* The hash is used in setup_bb(), i.e. to find the cost
|
|
* counters to be changed in the execution of a BB.
|
|
*/
|
|
|
|
static __inline__
|
|
UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
|
|
{
|
|
CLG_ASSERT(bb != 0);
|
|
CLG_ASSERT(cxt != 0);
|
|
|
|
return ((Addr)bb + (Addr)cxt) % size;
|
|
}
|
|
|
|
|
|
/* Lookup for a BBCC in hash.
|
|
*/
|
|
static
|
|
BBCC* lookup_bbcc(BB* bb, Context* cxt)
|
|
{
|
|
BBCC* bbcc = bb->last_bbcc;
|
|
UInt idx;
|
|
|
|
/* check LRU */
|
|
if (bbcc->cxt == cxt) {
|
|
if (!CLG_(clo).separate_threads) {
|
|
/* if we don't dump threads separate, tid doesn't have to match */
|
|
return bbcc;
|
|
}
|
|
if (bbcc->tid == CLG_(current_tid)) return bbcc;
|
|
}
|
|
|
|
CLG_(stat).bbcc_lru_misses++;
|
|
|
|
idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
|
|
bbcc = current_bbccs.table[idx];
|
|
while (bbcc &&
|
|
(bb != bbcc->bb ||
|
|
cxt != bbcc->cxt)) {
|
|
bbcc = bbcc->next;
|
|
}
|
|
|
|
CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %u, fn '%s'): %p (tid %u)\n",
|
|
bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
|
|
bbcc, bbcc ? bbcc->tid : 0);
|
|
|
|
CLG_DEBUGIF(2)
|
|
if (bbcc) CLG_(print_bbcc)(-2,bbcc);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/* double size of hash table 1 (addr->BBCC) */
|
|
static void resize_bbcc_hash(void)
|
|
{
|
|
Int i, new_size, conflicts1 = 0, conflicts2 = 0;
|
|
BBCC** new_table;
|
|
UInt new_idx;
|
|
BBCC *curr_BBCC, *next_BBCC;
|
|
|
|
new_size = 2*current_bbccs.size+3;
|
|
new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
|
|
new_size * sizeof(BBCC*));
|
|
|
|
for (i = 0; i < new_size; i++)
|
|
new_table[i] = NULL;
|
|
|
|
for (i = 0; i < current_bbccs.size; i++) {
|
|
if (current_bbccs.table[i] == NULL) continue;
|
|
|
|
curr_BBCC = current_bbccs.table[i];
|
|
while (NULL != curr_BBCC) {
|
|
next_BBCC = curr_BBCC->next;
|
|
|
|
new_idx = bbcc_hash_idx(curr_BBCC->bb,
|
|
curr_BBCC->cxt,
|
|
new_size);
|
|
|
|
curr_BBCC->next = new_table[new_idx];
|
|
new_table[new_idx] = curr_BBCC;
|
|
if (curr_BBCC->next) {
|
|
conflicts1++;
|
|
if (curr_BBCC->next->next)
|
|
conflicts2++;
|
|
}
|
|
|
|
curr_BBCC = next_BBCC;
|
|
}
|
|
}
|
|
|
|
VG_(free)(current_bbccs.table);
|
|
|
|
|
|
CLG_DEBUG(0,"Resize BBCC Hash: %u => %d (entries %u, conflicts %d/%d)\n",
|
|
current_bbccs.size, new_size,
|
|
current_bbccs.entries, conflicts1, conflicts2);
|
|
|
|
current_bbccs.size = new_size;
|
|
current_bbccs.table = new_table;
|
|
CLG_(stat).bbcc_hash_resizes++;
|
|
}
|
|
|
|
|
|
static __inline
|
|
BBCC** new_recursion(int size)
|
|
{
|
|
BBCC** bbccs;
|
|
int i;
|
|
|
|
bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
|
|
for(i=0;i<size;i++)
|
|
bbccs[i] = 0;
|
|
|
|
CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs);
|
|
|
|
return bbccs;
|
|
}
|
|
|
|
|
|
/*
|
|
* Allocate a new BBCC
|
|
*
|
|
* Uninitialized:
|
|
* cxt, rec_index, rec_array, next_bbcc, next1, next2
|
|
*/
|
|
static __inline__
|
|
BBCC* new_bbcc(BB* bb)
|
|
{
|
|
BBCC* bbcc;
|
|
Int i;
|
|
|
|
/* We need cjmp_count+1 JmpData structs:
|
|
* the last is for the unconditional jump/call/ret at end of BB
|
|
*/
|
|
bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
|
|
sizeof(BBCC) +
|
|
(bb->cjmp_count+1) * sizeof(JmpData));
|
|
bbcc->bb = bb;
|
|
bbcc->tid = CLG_(current_tid);
|
|
|
|
bbcc->ret_counter = 0;
|
|
bbcc->skipped = 0;
|
|
bbcc->cost = CLG_(get_costarray)(bb->cost_count);
|
|
for(i=0;i<bb->cost_count;i++)
|
|
bbcc->cost[i] = 0;
|
|
for(i=0; i<=bb->cjmp_count; i++) {
|
|
bbcc->jmp[i].ecounter = 0;
|
|
bbcc->jmp[i].jcc_list = 0;
|
|
}
|
|
bbcc->ecounter_sum = 0;
|
|
|
|
/* Init pointer caches (LRU) */
|
|
bbcc->lru_next_bbcc = 0;
|
|
bbcc->lru_from_jcc = 0;
|
|
bbcc->lru_to_jcc = 0;
|
|
|
|
CLG_(stat).distinct_bbccs++;
|
|
|
|
CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n",
|
|
bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/**
|
|
* Inserts a new BBCC into hashes.
|
|
* BBCC specific items must be set as this is used for the hash
|
|
* keys:
|
|
* fn : current function
|
|
* tid : current thread ID
|
|
* from : position where current function is called from
|
|
*
|
|
* Recursion level doesn't need to be set as this is not included
|
|
* in the hash key: Only BBCCs with rec level 0 are in hashes.
|
|
*/
|
|
static
|
|
void insert_bbcc_into_hash(BBCC* bbcc)
|
|
{
|
|
UInt idx;
|
|
|
|
CLG_ASSERT(bbcc->cxt != 0);
|
|
|
|
CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n",
|
|
bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
|
|
|
|
/* check fill degree of hash and resize if needed (>90%) */
|
|
current_bbccs.entries++;
|
|
if (100 * current_bbccs.entries / current_bbccs.size > 90)
|
|
resize_bbcc_hash();
|
|
|
|
idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
|
|
bbcc->next = current_bbccs.table[idx];
|
|
current_bbccs.table[idx] = bbcc;
|
|
|
|
CLG_DEBUG(3,"- insert_bbcc_into_hash: %u entries\n",
|
|
current_bbccs.entries);
|
|
}
|
|
|
|
/* String is returned in a dynamically allocated buffer. Caller is
|
|
responsible for free'ing it. */
|
|
static HChar* mangled_cxt(const Context* cxt, Int rec_index)
|
|
{
|
|
Int i, p;
|
|
|
|
if (!cxt) return VG_(strdup)("cl.bbcc.mcxt", "(no context)");
|
|
|
|
/* Overestimate the number of bytes we need to hold the string. */
|
|
SizeT need = 20; // rec_index + nul-terminator
|
|
for (i = 0; i < cxt->size; ++i)
|
|
need += VG_(strlen)(cxt->fn[i]->name) + 1; // 1 for leading '
|
|
|
|
HChar *mangled = CLG_MALLOC("cl.bbcc.mcxt", need);
|
|
p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
|
|
if (rec_index >0)
|
|
p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
|
|
for(i=1;i<cxt->size;i++)
|
|
p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
|
|
|
|
return mangled;
|
|
}
|
|
|
|
|
|
/* Create a new BBCC as a copy of an existing one,
|
|
* but with costs set to 0 and jcc chains empty.
|
|
*
|
|
* This is needed when a BB is executed in another context than
|
|
* the one at instrumentation time of the BB.
|
|
*
|
|
* Use cases:
|
|
* rec_index == 0: clone from a BBCC with differing tid/cxt
|
|
* and insert into hashes
|
|
* rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0
|
|
* don't insert into hashes
|
|
*/
|
|
static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
|
|
{
|
|
BBCC* bbcc;
|
|
|
|
CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n",
|
|
bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
|
|
|
|
bbcc = new_bbcc(orig->bb);
|
|
|
|
if (rec_index == 0) {
|
|
|
|
/* hash insertion is only allowed if tid or cxt is different */
|
|
CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
|
|
(orig->cxt != cxt));
|
|
|
|
bbcc->rec_index = 0;
|
|
bbcc->cxt = cxt;
|
|
bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
|
|
bbcc->rec_array[0] = bbcc;
|
|
|
|
insert_bbcc_into_hash(bbcc);
|
|
}
|
|
else {
|
|
if (CLG_(clo).separate_threads)
|
|
CLG_ASSERT(orig->tid == CLG_(current_tid));
|
|
|
|
CLG_ASSERT(orig->cxt == cxt);
|
|
CLG_ASSERT(orig->rec_array);
|
|
CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
|
|
CLG_ASSERT(orig->rec_array[rec_index] ==0);
|
|
|
|
/* new BBCC will only have differing recursion level */
|
|
bbcc->rec_index = rec_index;
|
|
bbcc->cxt = cxt;
|
|
bbcc->rec_array = orig->rec_array;
|
|
bbcc->rec_array[rec_index] = bbcc;
|
|
}
|
|
|
|
/* update list of BBCCs for same BB */
|
|
bbcc->next_bbcc = orig->bb->bbcc_list;
|
|
orig->bb->bbcc_list = bbcc;
|
|
|
|
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_bbcc)(-2, bbcc);
|
|
|
|
HChar *mangled_orig = mangled_cxt(orig->cxt, orig->rec_index);
|
|
HChar *mangled_bbcc = mangled_cxt(bbcc->cxt, bbcc->rec_index);
|
|
CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n"
|
|
" orig %s\n"
|
|
" new %s\n",
|
|
orig, rec_index, bb_addr(orig->bb),
|
|
mangled_orig,
|
|
mangled_bbcc);
|
|
CLG_FREE(mangled_orig);
|
|
CLG_FREE(mangled_bbcc);
|
|
|
|
CLG_(stat).bbcc_clones++;
|
|
|
|
return bbcc;
|
|
};
|
|
|
|
|
|
|
|
/* Get a pointer to the cost centre structure for given basic block
|
|
* address. If created, the BBCC is inserted into the BBCC hash.
|
|
* Also sets BB_seen_before by reference.
|
|
*
|
|
*/
|
|
BBCC* CLG_(get_bbcc)(BB* bb)
|
|
{
|
|
BBCC* bbcc;
|
|
|
|
CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb));
|
|
|
|
bbcc = bb->bbcc_list;
|
|
|
|
if (!bbcc) {
|
|
bbcc = new_bbcc(bb);
|
|
|
|
/* initialize BBCC */
|
|
bbcc->cxt = 0;
|
|
bbcc->rec_array = 0;
|
|
bbcc->rec_index = 0;
|
|
|
|
bbcc->next_bbcc = bb->bbcc_list;
|
|
bb->bbcc_list = bbcc;
|
|
bb->last_bbcc = bbcc;
|
|
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_bbcc)(-2, bbcc);
|
|
}
|
|
|
|
CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n",
|
|
bb_addr(bb), bbcc);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/* Callgrind manages its own call stack for each thread.
|
|
* When leaving a function, a underflow can happen when
|
|
* Callgrind's tracing was switched on in the middle of
|
|
* a run, i.e. when Callgrind was not able to trace the
|
|
* call instruction.
|
|
* This function tries to reconstruct the original call.
|
|
* As we know the return address (the address following
|
|
* the CALL instruction), we can detect the function
|
|
* we return back to, but the original call site is unknown.
|
|
* We suppose a call site at return address - 1.
|
|
* (TODO: other heuristic: lookup info of instrumented BBs).
|
|
*/
|
|
static void handleUnderflow(BB* bb)
|
|
{
|
|
/* RET at top of call stack */
|
|
BBCC* source_bbcc;
|
|
BB* source_bb;
|
|
Bool seen_before;
|
|
fn_node* caller;
|
|
int fn_number;
|
|
unsigned *pactive;
|
|
call_entry* call_entry_up;
|
|
|
|
CLG_DEBUG(1," Callstack underflow !\n");
|
|
|
|
/* we emulate an old call from the function we return to
|
|
* by using (<return address> -1) */
|
|
source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
|
|
source_bbcc = CLG_(get_bbcc)(source_bb);
|
|
|
|
/* seen_before can be true if RET from a signal handler */
|
|
if (!seen_before) {
|
|
source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
|
|
}
|
|
else if (CLG_(current_state).collect)
|
|
source_bbcc->ecounter_sum++;
|
|
|
|
/* Force a new top context, will be set active by push_cxt() */
|
|
CLG_(current_fn_stack).top--;
|
|
CLG_(current_state).cxt = 0;
|
|
caller = CLG_(get_fn_node)(bb);
|
|
CLG_(push_cxt)( caller );
|
|
|
|
if (!seen_before) {
|
|
/* set rec array for source BBCC: this is at rec level 1 */
|
|
source_bbcc->rec_array = new_recursion(caller->separate_recursions);
|
|
source_bbcc->rec_array[0] = source_bbcc;
|
|
|
|
CLG_ASSERT(source_bbcc->cxt == 0);
|
|
source_bbcc->cxt = CLG_(current_state).cxt;
|
|
insert_bbcc_into_hash(source_bbcc);
|
|
}
|
|
CLG_ASSERT(CLG_(current_state).bbcc);
|
|
|
|
/* correct active counts */
|
|
fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
|
|
pactive = CLG_(get_fn_entry)(fn_number);
|
|
(*pactive)--;
|
|
|
|
/* This assertion is not correct for reentrant
|
|
* signal handlers */
|
|
/* CLG_ASSERT(*pactive == 0); */
|
|
|
|
CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
|
|
/* back to current context */
|
|
CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
|
|
CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
|
|
(Addr)-1, False);
|
|
call_entry_up =
|
|
&(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
|
|
/* assume this call is lasting since last dump or
|
|
* for a signal handler since it's call */
|
|
if (CLG_(current_state).sig == 0)
|
|
CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
|
|
CLG_(get_current_thread)()->lastdump_cost );
|
|
else
|
|
CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
|
|
}
|
|
|
|
|
|
/*
|
|
* Helper function called at start of each instrumented BB to setup
|
|
* pointer to costs for current thread/context/recursion level
|
|
*/
|
|
|
|
VG_REGPARM(1)
|
|
void CLG_(setup_bbcc)(BB* bb)
|
|
{
|
|
BBCC *bbcc, *last_bbcc;
|
|
Bool call_emulation = False, delayed_push = False, skip = False;
|
|
Addr sp;
|
|
BB* last_bb;
|
|
ThreadId tid;
|
|
ClgJumpKind jmpkind;
|
|
Bool isConditionalJump;
|
|
Int passed = 0, csp;
|
|
Bool ret_without_call = False;
|
|
Int popcount_on_return = 1;
|
|
|
|
CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb));
|
|
|
|
/* This is needed because thread switches can not reliable be tracked
|
|
* with callback CLG_(run_thread) only: we have otherwise no way to get
|
|
* the thread ID after a signal handler returns.
|
|
* This could be removed again if that bug is fixed in Valgrind.
|
|
* This is in the hot path but hopefully not to costly.
|
|
*/
|
|
tid = VG_(get_running_tid)();
|
|
#if 1
|
|
/* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid).
|
|
* As this is on the hot path, we only call CLG_(switch_thread)(tid)
|
|
* if tid differs from the CLG_(current_tid).
|
|
*/
|
|
if (UNLIKELY(tid != CLG_(current_tid)))
|
|
CLG_(switch_thread)(tid);
|
|
#else
|
|
CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
|
|
#endif
|
|
|
|
sp = VG_(get_SP)(tid);
|
|
last_bbcc = CLG_(current_state).bbcc;
|
|
last_bb = last_bbcc ? last_bbcc->bb : 0;
|
|
|
|
if (last_bb) {
|
|
passed = CLG_(current_state).jmps_passed;
|
|
CLG_ASSERT(passed <= last_bb->cjmp_count);
|
|
jmpkind = last_bb->jmp[passed].jmpkind;
|
|
isConditionalJump = (passed < last_bb->cjmp_count);
|
|
|
|
if (CLG_(current_state).collect) {
|
|
if (!CLG_(current_state).nonskipped) {
|
|
last_bbcc->ecounter_sum++;
|
|
last_bbcc->jmp[passed].ecounter++;
|
|
if (!CLG_(clo).simulate_cache) {
|
|
/* update Ir cost */
|
|
UInt instr_count = last_bb->jmp[passed].instr+1;
|
|
CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
|
|
}
|
|
}
|
|
else {
|
|
/* do not increment exe counter of BBs in skipped functions, as it
|
|
* would fool dumping code */
|
|
if (!CLG_(clo).simulate_cache) {
|
|
/* update Ir cost */
|
|
UInt instr_count = last_bb->jmp[passed].instr+1;
|
|
CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
|
|
CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ]
|
|
+= instr_count;
|
|
}
|
|
}
|
|
}
|
|
|
|
CLG_DEBUGIF(4) {
|
|
CLG_(print_execstate)(-2, &CLG_(current_state) );
|
|
CLG_(print_bbcc_cost)(-2, last_bbcc);
|
|
}
|
|
}
|
|
else {
|
|
jmpkind = jk_None;
|
|
isConditionalJump = False;
|
|
}
|
|
|
|
/* Manipulate JmpKind if needed, only using BB specific info */
|
|
|
|
csp = CLG_(current_call_stack).sp;
|
|
|
|
/* A return not matching the top call in our callstack is a jump */
|
|
if ( (jmpkind == jk_Return) && (csp >0)) {
|
|
Int csp_up = csp-1;
|
|
call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
|
|
|
|
/* We have a real return if
|
|
* - the stack pointer (SP) left the current stack frame, or
|
|
* - SP has the same value as when reaching the current function
|
|
* and the address of this BB is the return address of last call
|
|
* (we even allow to leave multiple frames if the SP stays the
|
|
* same and we find a matching return address)
|
|
* The latter condition is needed because on PPC, SP can stay
|
|
* the same over CALL=b(c)l / RET=b(c)lr boundaries
|
|
*/
|
|
if (sp < top_ce->sp) popcount_on_return = 0;
|
|
else if (top_ce->sp == sp) {
|
|
while(1) {
|
|
if (top_ce->ret_addr == bb_addr(bb)) break;
|
|
if (csp_up>0) {
|
|
csp_up--;
|
|
top_ce = &(CLG_(current_call_stack).entry[csp_up]);
|
|
if (top_ce->sp == sp) {
|
|
popcount_on_return++;
|
|
continue;
|
|
}
|
|
}
|
|
popcount_on_return = 0;
|
|
break;
|
|
}
|
|
}
|
|
if (popcount_on_return == 0) {
|
|
jmpkind = jk_Jump;
|
|
ret_without_call = True;
|
|
}
|
|
}
|
|
|
|
/* Should this jump be converted to call or pop/call ? */
|
|
if (( jmpkind != jk_Return) &&
|
|
( jmpkind != jk_Call) && last_bb) {
|
|
|
|
/* We simulate a JMP/Cont to be a CALL if
|
|
* - jump is in another ELF object or section kind
|
|
* - jump is to first instruction of a function (tail recursion)
|
|
*/
|
|
if (ret_without_call ||
|
|
/* This is for detection of optimized tail recursion.
|
|
* On PPC, this is only detected as call when going to another
|
|
* function. The problem is that on PPC it can go wrong
|
|
* more easily (no stack frame setup needed)
|
|
*/
|
|
#if defined(VGA_ppc32)
|
|
(bb->is_entry && (last_bb->fn != bb->fn)) ||
|
|
#else
|
|
bb->is_entry ||
|
|
#endif
|
|
(last_bb->sect_kind != bb->sect_kind) ||
|
|
(last_bb->obj->number != bb->obj->number)) {
|
|
|
|
CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n",
|
|
last_bb->fn->name, last_bb->obj->name,
|
|
bb->fn->name, bb->obj->name,
|
|
ret_without_call?" (RET w/o CALL)":"");
|
|
|
|
if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
|
|
|
|
call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
|
|
|
|
if (top_ce->jcc) {
|
|
|
|
CLG_DEBUG(1," Pop on Jump!\n");
|
|
|
|
/* change source for delayed push */
|
|
CLG_(current_state).bbcc = top_ce->jcc->from;
|
|
sp = top_ce->sp;
|
|
passed = top_ce->jcc->jmp;
|
|
CLG_(pop_call_stack)();
|
|
}
|
|
else {
|
|
CLG_ASSERT(CLG_(current_state).nonskipped != 0);
|
|
}
|
|
}
|
|
|
|
jmpkind = jk_Call;
|
|
call_emulation = True;
|
|
}
|
|
}
|
|
|
|
if (jmpkind == jk_Call)
|
|
skip = CLG_(get_fn_node)(bb)->skip;
|
|
|
|
CLG_DEBUGIF(1) {
|
|
if (isConditionalJump)
|
|
VG_(printf)("Cond-");
|
|
switch(jmpkind) {
|
|
case jk_None: VG_(printf)("Fall-through"); break;
|
|
case jk_Jump: VG_(printf)("Jump"); break;
|
|
case jk_Call: VG_(printf)("Call"); break;
|
|
case jk_Return: VG_(printf)("Return"); break;
|
|
default: tl_assert(0);
|
|
}
|
|
VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
|
|
last_bb ? bb_jmpaddr(last_bb) : 0,
|
|
bb_addr(bb), sp);
|
|
}
|
|
|
|
/* Handle CALL/RET and update context to get correct BBCC */
|
|
|
|
if (jmpkind == jk_Return) {
|
|
|
|
if ((csp == 0) ||
|
|
((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
|
|
( *(CLG_(current_fn_stack).top-1)==0)) ) {
|
|
|
|
/* On an empty call stack or at a signal separation marker,
|
|
* a RETURN generates an call stack underflow.
|
|
*/
|
|
handleUnderflow(bb);
|
|
CLG_(pop_call_stack)();
|
|
}
|
|
else {
|
|
CLG_ASSERT(popcount_on_return >0);
|
|
CLG_(unwind_call_stack)(sp, popcount_on_return);
|
|
}
|
|
}
|
|
else {
|
|
Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
|
|
if (unwind_count > 0) {
|
|
/* if unwinding was done, this actually is a return */
|
|
jmpkind = jk_Return;
|
|
}
|
|
|
|
if (jmpkind == jk_Call) {
|
|
delayed_push = True;
|
|
|
|
csp = CLG_(current_call_stack).sp;
|
|
if (call_emulation && csp>0)
|
|
sp = CLG_(current_call_stack).entry[csp-1].sp;
|
|
|
|
}
|
|
}
|
|
|
|
/* Change new context if needed, taking delayed_push into account */
|
|
if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
|
|
CLG_(push_cxt)(CLG_(get_fn_node)(bb));
|
|
}
|
|
CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
|
|
|
|
/* If there is a fresh instrumented BBCC, assign current context */
|
|
bbcc = CLG_(get_bbcc)(bb);
|
|
if (bbcc->cxt == 0) {
|
|
CLG_ASSERT(bbcc->rec_array == 0);
|
|
|
|
bbcc->cxt = CLG_(current_state).cxt;
|
|
bbcc->rec_array =
|
|
new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
|
|
bbcc->rec_array[0] = bbcc;
|
|
|
|
insert_bbcc_into_hash(bbcc);
|
|
}
|
|
else {
|
|
/* get BBCC with current context */
|
|
|
|
/* first check LRU of last bbcc executed */
|
|
|
|
if (last_bbcc) {
|
|
bbcc = last_bbcc->lru_next_bbcc;
|
|
if (bbcc &&
|
|
((bbcc->bb != bb) ||
|
|
(bbcc->cxt != CLG_(current_state).cxt)))
|
|
bbcc = 0;
|
|
}
|
|
else
|
|
bbcc = 0;
|
|
|
|
if (!bbcc)
|
|
bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
|
|
if (!bbcc)
|
|
bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
|
|
|
|
bb->last_bbcc = bbcc;
|
|
}
|
|
|
|
/* save for fast lookup */
|
|
if (last_bbcc)
|
|
last_bbcc->lru_next_bbcc = bbcc;
|
|
|
|
if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
|
|
UInt level, idx;
|
|
fn_node* top = *(CLG_(current_fn_stack).top);
|
|
|
|
level = *CLG_(get_fn_entry)(top->number);
|
|
|
|
if (delayed_push && !skip) {
|
|
if (CLG_(clo).skip_direct_recursion) {
|
|
/* a call was detected, which means that the source BB != 0 */
|
|
CLG_ASSERT(CLG_(current_state).bbcc != 0);
|
|
/* only increment rec. level if called from different function */
|
|
if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])
|
|
level++;
|
|
}
|
|
else level++;
|
|
}
|
|
if (level> top->separate_recursions)
|
|
level = top->separate_recursions;
|
|
|
|
if (level == 0) {
|
|
/* can only happen if instrumentation just was switched on */
|
|
level = 1;
|
|
*CLG_(get_fn_entry)(top->number) = 1;
|
|
}
|
|
|
|
idx = level -1;
|
|
if (bbcc->rec_array[idx])
|
|
bbcc = bbcc->rec_array[idx];
|
|
else
|
|
bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
|
|
|
|
CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
|
|
}
|
|
|
|
if (delayed_push) {
|
|
if (!skip && CLG_(current_state).nonskipped) {
|
|
/* a call from skipped to nonskipped */
|
|
CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
|
|
/* FIXME: take the real passed count from shadow stack */
|
|
passed = CLG_(current_state).bbcc->bb->cjmp_count;
|
|
}
|
|
CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
|
|
bbcc, sp, skip);
|
|
}
|
|
|
|
if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
|
|
|
|
/* Handle conditional jumps followed, i.e. trace arcs
|
|
* This uses JCC structures, too */
|
|
|
|
jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
|
|
CLG_ASSERT(jcc != 0);
|
|
// Change from default, and check if already changed
|
|
if (jcc->jmpkind == jk_Call)
|
|
jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
|
|
else {
|
|
// FIXME: Why can this fail?
|
|
// CLG_ASSERT(jcc->jmpkind == jmpkind);
|
|
}
|
|
|
|
jcc->call_counter++;
|
|
if (isConditionalJump)
|
|
CLG_(stat).jcnd_counter++;
|
|
else
|
|
CLG_(stat).jump_counter++;
|
|
}
|
|
|
|
CLG_(current_state).bbcc = bbcc;
|
|
/* Even though this will be set in instrumented code directly before
|
|
* side exits, it needs to be set to 0 here in case an exception
|
|
* happens in first instructions of the BB */
|
|
CLG_(current_state).jmps_passed = 0;
|
|
// needed for log_* handlers called in this BB
|
|
CLG_(bb_base) = bb->obj->offset + bb->offset;
|
|
CLG_(cost_base) = bbcc->cost;
|
|
|
|
CLG_DEBUGIF(1) {
|
|
VG_(printf)(" ");
|
|
CLG_(print_bbcc_fn)(bbcc);
|
|
VG_(printf)("\n");
|
|
}
|
|
|
|
CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %u), Instrs %u (Len %u)\n",
|
|
bb_addr(bb), bbcc->cost, bb->cost_count,
|
|
bb->instr_count, bb->instr_len);
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
|
|
CLG_DEBUG(3,"\n");
|
|
|
|
CLG_(stat).bb_executions++;
|
|
}
|