mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-07-03 12:46:17 +00:00
1858 lines
64 KiB
C
1858 lines
64 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Cachegrind: everything but the simulation itself. ---*/
|
|
/*--- cg_main.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Cachegrind, a Valgrind tool for cache
|
|
profiling programs.
|
|
|
|
Copyright (C) 2002-2015 Nicholas Nethercote
|
|
njn@valgrind.org
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "pub_tool_basics.h"
|
|
#include "pub_tool_debuginfo.h"
|
|
#include "pub_tool_libcbase.h"
|
|
#include "pub_tool_libcassert.h"
|
|
#include "pub_tool_libcfile.h"
|
|
#include "pub_tool_libcprint.h"
|
|
#include "pub_tool_libcproc.h"
|
|
#include "pub_tool_mallocfree.h"
|
|
#include "pub_tool_options.h"
|
|
#include "pub_tool_oset.h"
|
|
#include "pub_tool_tooliface.h"
|
|
#include "pub_tool_xarray.h"
|
|
#include "pub_tool_clientstate.h"
|
|
#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
|
|
|
|
#include "cg_arch.h"
|
|
#include "cg_sim.c"
|
|
#include "cg_branchpred.c"
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Constants ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/* Set to 1 for very verbose debugging */
|
|
#define DEBUG_CG 0
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Options ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
static Bool clo_cache_sim = True; /* do cache simulation? */
|
|
static Bool clo_branch_sim = False; /* do branch simulation? */
|
|
static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cachesim configuration ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Types and Data Structures ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
typedef
|
|
struct {
|
|
ULong a; /* total # memory accesses of this kind */
|
|
ULong m1; /* misses in the first level cache */
|
|
ULong mL; /* misses in the second level cache */
|
|
}
|
|
CacheCC;
|
|
|
|
typedef
|
|
struct {
|
|
ULong b; /* total # branches of this kind */
|
|
ULong mp; /* number of branches mispredicted */
|
|
}
|
|
BranchCC;
|
|
|
|
//------------------------------------------------------------
|
|
// Primary data structure #1: CC table
|
|
// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
|
|
// - an ordered set of CCs. CC indexing done by file/function/line (as
|
|
// determined from the instrAddr).
|
|
// - Traversed for dumping stats at end in file/func/line hierarchy.
|
|
|
|
typedef struct {
|
|
HChar* file;
|
|
const HChar* fn;
|
|
Int line;
|
|
}
|
|
CodeLoc;
|
|
|
|
typedef struct {
|
|
CodeLoc loc; /* Source location that these counts pertain to */
|
|
CacheCC Ir; /* Insn read counts */
|
|
CacheCC Dr; /* Data read counts */
|
|
CacheCC Dw; /* Data write/modify counts */
|
|
BranchCC Bc; /* Conditional branch counts */
|
|
BranchCC Bi; /* Indirect branch counts */
|
|
} LineCC;
|
|
|
|
// First compare file, then fn, then line.
|
|
static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
|
|
{
|
|
Word res;
|
|
const CodeLoc* a = (const CodeLoc*)vloc;
|
|
const CodeLoc* b = &(((const LineCC*)vcc)->loc);
|
|
|
|
res = VG_(strcmp)(a->file, b->file);
|
|
if (0 != res)
|
|
return res;
|
|
|
|
res = VG_(strcmp)(a->fn, b->fn);
|
|
if (0 != res)
|
|
return res;
|
|
|
|
return a->line - b->line;
|
|
}
|
|
|
|
static OSet* CC_table;
|
|
|
|
//------------------------------------------------------------
|
|
// Primary data structure #2: InstrInfo table
|
|
// - Holds the cached info about each instr that is used for simulation.
|
|
// - table(SB_start_addr, list(InstrInfo))
|
|
// - For each SB, each InstrInfo in the list holds info about the
|
|
// instruction (instrLen, instrAddr, etc), plus a pointer to its line
|
|
// CC. This node is what's passed to the simulation function.
|
|
// - When SBs are discarded the relevant list(instr_details) is freed.
|
|
|
|
typedef struct _InstrInfo InstrInfo;
|
|
struct _InstrInfo {
|
|
Addr instr_addr;
|
|
UChar instr_len;
|
|
LineCC* parent; // parent line-CC
|
|
};
|
|
|
|
typedef struct _SB_info SB_info;
|
|
struct _SB_info {
|
|
Addr SB_addr; // key; MUST BE FIRST
|
|
Int n_instrs;
|
|
InstrInfo instrs[0];
|
|
};
|
|
|
|
static OSet* instrInfoTable;
|
|
|
|
//------------------------------------------------------------
|
|
// Secondary data structure: string table
|
|
// - holds strings, avoiding dups
|
|
// - used for filenames and function names, each of which will be
|
|
// pointed to by one or more CCs.
|
|
// - it also allows equality checks just by pointer comparison, which
|
|
// is good when printing the output file at the end.
|
|
|
|
static OSet* stringTable;
|
|
|
|
//------------------------------------------------------------
|
|
// Stats
|
|
static Int distinct_files = 0;
|
|
static Int distinct_fns = 0;
|
|
static Int distinct_lines = 0;
|
|
static Int distinct_instrsGen = 0;
|
|
static Int distinct_instrsNoX = 0;
|
|
|
|
static Int full_debugs = 0;
|
|
static Int file_line_debugs = 0;
|
|
static Int fn_debugs = 0;
|
|
static Int no_debugs = 0;
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- String table operations ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
static Word stringCmp( const void* key, const void* elem )
|
|
{
|
|
return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
|
|
}
|
|
|
|
// Get a permanent string; either pull it out of the string table if it's
|
|
// been encountered before, or dup it and put it into the string table.
|
|
static HChar* get_perm_string(const HChar* s)
|
|
{
|
|
HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
|
|
if (s_ptr) {
|
|
return *s_ptr;
|
|
} else {
|
|
HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
|
|
*s_node = VG_(strdup)("cg.main.gps.1", s);
|
|
VG_(OSetGen_Insert)(stringTable, s_node);
|
|
return *s_node;
|
|
}
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- CC table operations ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
static void get_debug_info(Addr instr_addr, const HChar **dir,
|
|
const HChar **file, const HChar **fn, UInt* line)
|
|
{
|
|
Bool found_file_line = VG_(get_filename_linenum)(
|
|
instr_addr,
|
|
file, dir,
|
|
line
|
|
);
|
|
Bool found_fn = VG_(get_fnname)(instr_addr, fn);
|
|
|
|
if (!found_file_line) {
|
|
*file = "???";
|
|
*line = 0;
|
|
}
|
|
if (!found_fn) {
|
|
*fn = "???";
|
|
}
|
|
|
|
if (found_file_line) {
|
|
if (found_fn) full_debugs++;
|
|
else file_line_debugs++;
|
|
} else {
|
|
if (found_fn) fn_debugs++;
|
|
else no_debugs++;
|
|
}
|
|
}
|
|
|
|
// Do a three step traversal: by file, then fn, then line.
|
|
// Returns a pointer to the line CC, creates a new one if necessary.
|
|
static LineCC* get_lineCC(Addr origAddr)
|
|
{
|
|
const HChar *fn, *file, *dir;
|
|
UInt line;
|
|
CodeLoc loc;
|
|
LineCC* lineCC;
|
|
|
|
get_debug_info(origAddr, &dir, &file, &fn, &line);
|
|
|
|
// Form an absolute pathname if a directory is available
|
|
HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
|
|
|
|
if (dir[0]) {
|
|
VG_(sprintf)(absfile, "%s/%s", dir, file);
|
|
} else {
|
|
VG_(sprintf)(absfile, "%s", file);
|
|
}
|
|
|
|
loc.file = absfile;
|
|
loc.fn = fn;
|
|
loc.line = line;
|
|
|
|
lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
|
|
if (!lineCC) {
|
|
// Allocate and zero a new node.
|
|
lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
|
|
lineCC->loc.file = get_perm_string(loc.file);
|
|
lineCC->loc.fn = get_perm_string(loc.fn);
|
|
lineCC->loc.line = loc.line;
|
|
lineCC->Ir.a = 0;
|
|
lineCC->Ir.m1 = 0;
|
|
lineCC->Ir.mL = 0;
|
|
lineCC->Dr.a = 0;
|
|
lineCC->Dr.m1 = 0;
|
|
lineCC->Dr.mL = 0;
|
|
lineCC->Dw.a = 0;
|
|
lineCC->Dw.m1 = 0;
|
|
lineCC->Dw.mL = 0;
|
|
lineCC->Bc.b = 0;
|
|
lineCC->Bc.mp = 0;
|
|
lineCC->Bi.b = 0;
|
|
lineCC->Bi.mp = 0;
|
|
VG_(OSetGen_Insert)(CC_table, lineCC);
|
|
}
|
|
|
|
return lineCC;
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cache simulation functions ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/* A common case for an instruction read event is that the
|
|
* bytes read belong to the same cache line in both L1I and LL
|
|
* (if cache line sizes of L1 and LL are the same).
|
|
* As this can be detected at instrumentation time, and results
|
|
* in faster simulation, special-casing is benefical.
|
|
*
|
|
* Abbrevations used in var/function names:
|
|
* IrNoX - instruction read does not cross cache lines
|
|
* IrGen - generic instruction read; not detected as IrNoX
|
|
* Ir - not known / not important whether it is an IrNoX
|
|
*/
|
|
|
|
// Only used with --cache-sim=no.
|
|
static VG_REGPARM(1)
|
|
void log_1Ir(InstrInfo* n)
|
|
{
|
|
n->parent->Ir.a++;
|
|
}
|
|
|
|
// Only used with --cache-sim=no.
|
|
static VG_REGPARM(2)
|
|
void log_2Ir(InstrInfo* n, InstrInfo* n2)
|
|
{
|
|
n->parent->Ir.a++;
|
|
n2->parent->Ir.a++;
|
|
}
|
|
|
|
// Only used with --cache-sim=no.
|
|
static VG_REGPARM(3)
|
|
void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
|
|
{
|
|
n->parent->Ir.a++;
|
|
n2->parent->Ir.a++;
|
|
n3->parent->Ir.a++;
|
|
}
|
|
|
|
// Generic case for instruction reads: may cross cache lines.
|
|
// All other Ir handlers expect IrNoX instruction reads.
|
|
static VG_REGPARM(1)
|
|
void log_1IrGen_0D_cache_access(InstrInfo* n)
|
|
{
|
|
//VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
|
|
// n, n->instr_addr, n->instr_len);
|
|
cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
}
|
|
|
|
static VG_REGPARM(1)
|
|
void log_1IrNoX_0D_cache_access(InstrInfo* n)
|
|
{
|
|
//VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
|
|
// n, n->instr_addr, n->instr_len);
|
|
cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
}
|
|
|
|
static VG_REGPARM(2)
|
|
void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
|
|
{
|
|
//VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
|
|
// " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
|
|
// n, n->instr_addr, n->instr_len,
|
|
// n2, n2->instr_addr, n2->instr_len);
|
|
cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
|
|
&n2->parent->Ir.m1, &n2->parent->Ir.mL);
|
|
n2->parent->Ir.a++;
|
|
}
|
|
|
|
static VG_REGPARM(3)
|
|
void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
|
|
{
|
|
//VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
|
|
// " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
|
|
// " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
|
|
// n, n->instr_addr, n->instr_len,
|
|
// n2, n2->instr_addr, n2->instr_len,
|
|
// n3, n3->instr_addr, n3->instr_len);
|
|
cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
|
|
&n2->parent->Ir.m1, &n2->parent->Ir.mL);
|
|
n2->parent->Ir.a++;
|
|
cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
|
|
&n3->parent->Ir.m1, &n3->parent->Ir.mL);
|
|
n3->parent->Ir.a++;
|
|
}
|
|
|
|
static VG_REGPARM(3)
|
|
void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
|
|
{
|
|
//VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
|
|
// " daddr=0x%010lx, dsize=%lu\n",
|
|
// n, n->instr_addr, n->instr_len, data_addr, data_size);
|
|
cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
|
|
cachesim_D1_doref(data_addr, data_size,
|
|
&n->parent->Dr.m1, &n->parent->Dr.mL);
|
|
n->parent->Dr.a++;
|
|
}
|
|
|
|
static VG_REGPARM(3)
|
|
void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
|
|
{
|
|
//VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
|
|
// " daddr=0x%010lx, dsize=%lu\n",
|
|
// n, n->instr_addr, n->instr_len, data_addr, data_size);
|
|
cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
|
|
&n->parent->Ir.m1, &n->parent->Ir.mL);
|
|
n->parent->Ir.a++;
|
|
|
|
cachesim_D1_doref(data_addr, data_size,
|
|
&n->parent->Dw.m1, &n->parent->Dw.mL);
|
|
n->parent->Dw.a++;
|
|
}
|
|
|
|
/* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
|
|
and log_0Ir_1Dw_cache_access have exactly the same prototype. If
|
|
you change them, you must change addEvent_D_guarded too. */
|
|
static VG_REGPARM(3)
|
|
void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
|
|
{
|
|
//VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
|
|
// n, data_addr, data_size);
|
|
cachesim_D1_doref(data_addr, data_size,
|
|
&n->parent->Dr.m1, &n->parent->Dr.mL);
|
|
n->parent->Dr.a++;
|
|
}
|
|
|
|
/* See comment on log_0Ir_1Dr_cache_access. */
|
|
static VG_REGPARM(3)
|
|
void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
|
|
{
|
|
//VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
|
|
// n, data_addr, data_size);
|
|
cachesim_D1_doref(data_addr, data_size,
|
|
&n->parent->Dw.m1, &n->parent->Dw.mL);
|
|
n->parent->Dw.a++;
|
|
}
|
|
|
|
/* For branches, we consult two different predictors, one which
|
|
predicts taken/untaken for conditional branches, and the other
|
|
which predicts the branch target address for indirect branches
|
|
(jump-to-register style ones). */
|
|
|
|
static VG_REGPARM(2)
|
|
void log_cond_branch(InstrInfo* n, Word taken)
|
|
{
|
|
//VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
|
|
// n, taken);
|
|
n->parent->Bc.b++;
|
|
n->parent->Bc.mp
|
|
+= (1 & do_cond_branch_predict(n->instr_addr, taken));
|
|
}
|
|
|
|
static VG_REGPARM(2)
|
|
void log_ind_branch(InstrInfo* n, UWord actual_dst)
|
|
{
|
|
//VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
|
|
// n, actual_dst);
|
|
n->parent->Bi.b++;
|
|
n->parent->Bi.mp
|
|
+= (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
|
|
}
|
|
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Instrumentation types and structures ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/* Maintain an ordered list of memory events which are outstanding, in
|
|
the sense that no IR has yet been generated to do the relevant
|
|
helper calls. The BB is scanned top to bottom and memory events
|
|
are added to the end of the list, merging with the most recent
|
|
notified event where possible (Dw immediately following Dr and
|
|
having the same size and EA can be merged).
|
|
|
|
This merging is done so that for architectures which have
|
|
load-op-store instructions (x86, amd64), the insn is treated as if
|
|
it makes just one memory reference (a modify), rather than two (a
|
|
read followed by a write at the same address).
|
|
|
|
At various points the list will need to be flushed, that is, IR
|
|
generated from it. That must happen before any possible exit from
|
|
the block (the end, or an IRStmt_Exit). Flushing also takes place
|
|
when there is no space to add a new event.
|
|
|
|
If we require the simulation statistics to be up to date with
|
|
respect to possible memory exceptions, then the list would have to
|
|
be flushed before each memory reference. That would however lose
|
|
performance by inhibiting event-merging during flushing.
|
|
|
|
Flushing the list consists of walking it start to end and emitting
|
|
instrumentation IR for each event, in the order in which they
|
|
appear. It may be possible to emit a single call for two adjacent
|
|
events in order to reduce the number of helper function calls made.
|
|
For example, it could well be profitable to handle two adjacent Ir
|
|
events with a single helper call. */
|
|
|
|
typedef
|
|
IRExpr
|
|
IRAtom;
|
|
|
|
typedef
|
|
enum {
|
|
Ev_IrNoX, // Instruction read not crossing cache lines
|
|
Ev_IrGen, // Generic Ir, not being detected as IrNoX
|
|
Ev_Dr, // Data read
|
|
Ev_Dw, // Data write
|
|
Ev_Dm, // Data modify (read then write)
|
|
Ev_Bc, // branch conditional
|
|
Ev_Bi // branch indirect (to unknown destination)
|
|
}
|
|
EventTag;
|
|
|
|
typedef
|
|
struct {
|
|
EventTag tag;
|
|
InstrInfo* inode;
|
|
union {
|
|
struct {
|
|
} IrGen;
|
|
struct {
|
|
} IrNoX;
|
|
struct {
|
|
IRAtom* ea;
|
|
Int szB;
|
|
} Dr;
|
|
struct {
|
|
IRAtom* ea;
|
|
Int szB;
|
|
} Dw;
|
|
struct {
|
|
IRAtom* ea;
|
|
Int szB;
|
|
} Dm;
|
|
struct {
|
|
IRAtom* taken; /* :: Ity_I1 */
|
|
} Bc;
|
|
struct {
|
|
IRAtom* dst;
|
|
} Bi;
|
|
} Ev;
|
|
}
|
|
Event;
|
|
|
|
static void init_Event ( Event* ev ) {
|
|
VG_(memset)(ev, 0, sizeof(Event));
|
|
}
|
|
|
|
static IRAtom* get_Event_dea ( Event* ev ) {
|
|
switch (ev->tag) {
|
|
case Ev_Dr: return ev->Ev.Dr.ea;
|
|
case Ev_Dw: return ev->Ev.Dw.ea;
|
|
case Ev_Dm: return ev->Ev.Dm.ea;
|
|
default: tl_assert(0);
|
|
}
|
|
}
|
|
|
|
static Int get_Event_dszB ( Event* ev ) {
|
|
switch (ev->tag) {
|
|
case Ev_Dr: return ev->Ev.Dr.szB;
|
|
case Ev_Dw: return ev->Ev.Dw.szB;
|
|
case Ev_Dm: return ev->Ev.Dm.szB;
|
|
default: tl_assert(0);
|
|
}
|
|
}
|
|
|
|
|
|
/* Up to this many unnotified events are allowed. Number is
|
|
arbitrary. Larger numbers allow more event merging to occur, but
|
|
potentially induce more spilling due to extending live ranges of
|
|
address temporaries. */
|
|
#define N_EVENTS 16
|
|
|
|
|
|
/* A struct which holds all the running state during instrumentation.
|
|
Mostly to avoid passing loads of parameters everywhere. */
|
|
typedef
|
|
struct {
|
|
/* The current outstanding-memory-event list. */
|
|
Event events[N_EVENTS];
|
|
Int events_used;
|
|
|
|
/* The array of InstrInfo bins for the BB. */
|
|
SB_info* sbInfo;
|
|
|
|
/* Number InstrInfo bins 'used' so far. */
|
|
Int sbInfo_i;
|
|
|
|
/* The output SB being constructed. */
|
|
IRSB* sbOut;
|
|
}
|
|
CgState;
|
|
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Instrumentation main ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
// Note that origAddr is the real origAddr, not the address of the first
|
|
// instruction in the block (they can be different due to redirection).
|
|
static
|
|
SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
|
|
{
|
|
Int i, n_instrs;
|
|
IRStmt* st;
|
|
SB_info* sbInfo;
|
|
|
|
// Count number of original instrs in SB
|
|
n_instrs = 0;
|
|
for (i = 0; i < sbIn->stmts_used; i++) {
|
|
st = sbIn->stmts[i];
|
|
if (Ist_IMark == st->tag) n_instrs++;
|
|
}
|
|
|
|
// Check that we don't have an entry for this BB in the instr-info table.
|
|
// If this assertion fails, there has been some screwup: some
|
|
// translations must have been discarded but Cachegrind hasn't discarded
|
|
// the corresponding entries in the instr-info table.
|
|
sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
|
|
tl_assert(NULL == sbInfo);
|
|
|
|
// BB never translated before (at this address, at least; could have
|
|
// been unloaded and then reloaded elsewhere in memory)
|
|
sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
|
|
sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
|
|
sbInfo->SB_addr = origAddr;
|
|
sbInfo->n_instrs = n_instrs;
|
|
VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
|
|
|
|
return sbInfo;
|
|
}
|
|
|
|
|
|
static void showEvent ( Event* ev )
|
|
{
|
|
switch (ev->tag) {
|
|
case Ev_IrGen:
|
|
VG_(printf)("IrGen %p\n", ev->inode);
|
|
break;
|
|
case Ev_IrNoX:
|
|
VG_(printf)("IrNoX %p\n", ev->inode);
|
|
break;
|
|
case Ev_Dr:
|
|
VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
|
|
ppIRExpr(ev->Ev.Dr.ea);
|
|
VG_(printf)("\n");
|
|
break;
|
|
case Ev_Dw:
|
|
VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
|
|
ppIRExpr(ev->Ev.Dw.ea);
|
|
VG_(printf)("\n");
|
|
break;
|
|
case Ev_Dm:
|
|
VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
|
|
ppIRExpr(ev->Ev.Dm.ea);
|
|
VG_(printf)("\n");
|
|
break;
|
|
case Ev_Bc:
|
|
VG_(printf)("Bc %p GA=", ev->inode);
|
|
ppIRExpr(ev->Ev.Bc.taken);
|
|
VG_(printf)("\n");
|
|
break;
|
|
case Ev_Bi:
|
|
VG_(printf)("Bi %p DST=", ev->inode);
|
|
ppIRExpr(ev->Ev.Bi.dst);
|
|
VG_(printf)("\n");
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Reserve and initialise an InstrInfo for the first mention of a new insn.
|
|
static
|
|
InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
|
|
{
|
|
InstrInfo* i_node;
|
|
tl_assert(cgs->sbInfo_i >= 0);
|
|
tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
|
|
i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
|
|
i_node->instr_addr = instr_addr;
|
|
i_node->instr_len = instr_len;
|
|
i_node->parent = get_lineCC(instr_addr);
|
|
cgs->sbInfo_i++;
|
|
return i_node;
|
|
}
|
|
|
|
|
|
/* Generate code for all outstanding memory events, and mark the queue
|
|
empty. Code is generated into cgs->bbOut, and this activity
|
|
'consumes' slots in cgs->sbInfo. */
|
|
|
|
static void flushEvents ( CgState* cgs )
|
|
{
|
|
Int i, regparms;
|
|
const HChar* helperName;
|
|
void* helperAddr;
|
|
IRExpr** argv;
|
|
IRExpr* i_node_expr;
|
|
IRDirty* di;
|
|
Event* ev;
|
|
Event* ev2;
|
|
Event* ev3;
|
|
|
|
i = 0;
|
|
while (i < cgs->events_used) {
|
|
|
|
helperName = NULL;
|
|
helperAddr = NULL;
|
|
argv = NULL;
|
|
regparms = 0;
|
|
|
|
/* generate IR to notify event i and possibly the ones
|
|
immediately following it. */
|
|
tl_assert(i >= 0 && i < cgs->events_used);
|
|
|
|
ev = &cgs->events[i];
|
|
ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
|
|
ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
|
|
|
|
if (DEBUG_CG) {
|
|
VG_(printf)(" flush ");
|
|
showEvent( ev );
|
|
}
|
|
|
|
i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
|
|
|
|
/* Decide on helper fn to call and args to pass it, and advance
|
|
i appropriately. */
|
|
switch (ev->tag) {
|
|
case Ev_IrNoX:
|
|
/* Merge an IrNoX with a following Dr/Dm. */
|
|
if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
|
|
/* Why is this true? It's because we're merging an Ir
|
|
with a following Dr or Dm. The Ir derives from the
|
|
instruction's IMark and the Dr/Dm from data
|
|
references which follow it. In short it holds
|
|
because each insn starts with an IMark, hence an
|
|
Ev_Ir, and so these Dr/Dm must pertain to the
|
|
immediately preceding Ir. Same applies to analogous
|
|
assertions in the subsequent cases. */
|
|
tl_assert(ev2->inode == ev->inode);
|
|
helperName = "log_1IrNoX_1Dr_cache_access";
|
|
helperAddr = &log_1IrNoX_1Dr_cache_access;
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
get_Event_dea(ev2),
|
|
mkIRExpr_HWord( get_Event_dszB(ev2) ) );
|
|
regparms = 3;
|
|
i += 2;
|
|
}
|
|
/* Merge an IrNoX with a following Dw. */
|
|
else
|
|
if (ev2 && ev2->tag == Ev_Dw) {
|
|
tl_assert(ev2->inode == ev->inode);
|
|
helperName = "log_1IrNoX_1Dw_cache_access";
|
|
helperAddr = &log_1IrNoX_1Dw_cache_access;
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
get_Event_dea(ev2),
|
|
mkIRExpr_HWord( get_Event_dszB(ev2) ) );
|
|
regparms = 3;
|
|
i += 2;
|
|
}
|
|
/* Merge an IrNoX with two following IrNoX's. */
|
|
else
|
|
if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
|
|
{
|
|
if (clo_cache_sim) {
|
|
helperName = "log_3IrNoX_0D_cache_access";
|
|
helperAddr = &log_3IrNoX_0D_cache_access;
|
|
} else {
|
|
helperName = "log_3Ir";
|
|
helperAddr = &log_3Ir;
|
|
}
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
mkIRExpr_HWord( (HWord)ev2->inode ),
|
|
mkIRExpr_HWord( (HWord)ev3->inode ) );
|
|
regparms = 3;
|
|
i += 3;
|
|
}
|
|
/* Merge an IrNoX with one following IrNoX. */
|
|
else
|
|
if (ev2 && ev2->tag == Ev_IrNoX) {
|
|
if (clo_cache_sim) {
|
|
helperName = "log_2IrNoX_0D_cache_access";
|
|
helperAddr = &log_2IrNoX_0D_cache_access;
|
|
} else {
|
|
helperName = "log_2Ir";
|
|
helperAddr = &log_2Ir;
|
|
}
|
|
argv = mkIRExprVec_2( i_node_expr,
|
|
mkIRExpr_HWord( (HWord)ev2->inode ) );
|
|
regparms = 2;
|
|
i += 2;
|
|
}
|
|
/* No merging possible; emit as-is. */
|
|
else {
|
|
if (clo_cache_sim) {
|
|
helperName = "log_1IrNoX_0D_cache_access";
|
|
helperAddr = &log_1IrNoX_0D_cache_access;
|
|
} else {
|
|
helperName = "log_1Ir";
|
|
helperAddr = &log_1Ir;
|
|
}
|
|
argv = mkIRExprVec_1( i_node_expr );
|
|
regparms = 1;
|
|
i++;
|
|
}
|
|
break;
|
|
case Ev_IrGen:
|
|
if (clo_cache_sim) {
|
|
helperName = "log_1IrGen_0D_cache_access";
|
|
helperAddr = &log_1IrGen_0D_cache_access;
|
|
} else {
|
|
helperName = "log_1Ir";
|
|
helperAddr = &log_1Ir;
|
|
}
|
|
argv = mkIRExprVec_1( i_node_expr );
|
|
regparms = 1;
|
|
i++;
|
|
break;
|
|
case Ev_Dr:
|
|
case Ev_Dm:
|
|
/* Data read or modify */
|
|
helperName = "log_0Ir_1Dr_cache_access";
|
|
helperAddr = &log_0Ir_1Dr_cache_access;
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
get_Event_dea(ev),
|
|
mkIRExpr_HWord( get_Event_dszB(ev) ) );
|
|
regparms = 3;
|
|
i++;
|
|
break;
|
|
case Ev_Dw:
|
|
/* Data write */
|
|
helperName = "log_0Ir_1Dw_cache_access";
|
|
helperAddr = &log_0Ir_1Dw_cache_access;
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
get_Event_dea(ev),
|
|
mkIRExpr_HWord( get_Event_dszB(ev) ) );
|
|
regparms = 3;
|
|
i++;
|
|
break;
|
|
case Ev_Bc:
|
|
/* Conditional branch */
|
|
helperName = "log_cond_branch";
|
|
helperAddr = &log_cond_branch;
|
|
argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
|
|
regparms = 2;
|
|
i++;
|
|
break;
|
|
case Ev_Bi:
|
|
/* Branch to an unknown destination */
|
|
helperName = "log_ind_branch";
|
|
helperAddr = &log_ind_branch;
|
|
argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
|
|
regparms = 2;
|
|
i++;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
|
|
/* Add the helper. */
|
|
tl_assert(helperName);
|
|
tl_assert(helperAddr);
|
|
tl_assert(argv);
|
|
di = unsafeIRDirty_0_N( regparms,
|
|
helperName, VG_(fnptr_to_fnentry)( helperAddr ),
|
|
argv );
|
|
addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
|
|
}
|
|
|
|
cgs->events_used = 0;
|
|
}
|
|
|
|
static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
|
|
{
|
|
Event* evt;
|
|
if (cgs->events_used == N_EVENTS)
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
|
|
evt = &cgs->events[cgs->events_used];
|
|
init_Event(evt);
|
|
evt->inode = inode;
|
|
if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
|
|
evt->tag = Ev_IrNoX;
|
|
distinct_instrsNoX++;
|
|
} else {
|
|
evt->tag = Ev_IrGen;
|
|
distinct_instrsGen++;
|
|
}
|
|
cgs->events_used++;
|
|
}
|
|
|
|
static
|
|
void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
|
|
{
|
|
Event* evt;
|
|
tl_assert(isIRAtom(ea));
|
|
tl_assert(datasize >= 1 && datasize <= min_line_size);
|
|
if (!clo_cache_sim)
|
|
return;
|
|
if (cgs->events_used == N_EVENTS)
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
|
|
evt = &cgs->events[cgs->events_used];
|
|
init_Event(evt);
|
|
evt->tag = Ev_Dr;
|
|
evt->inode = inode;
|
|
evt->Ev.Dr.szB = datasize;
|
|
evt->Ev.Dr.ea = ea;
|
|
cgs->events_used++;
|
|
}
|
|
|
|
static
|
|
void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
|
|
{
|
|
Event* lastEvt;
|
|
Event* evt;
|
|
|
|
tl_assert(isIRAtom(ea));
|
|
tl_assert(datasize >= 1 && datasize <= min_line_size);
|
|
|
|
if (!clo_cache_sim)
|
|
return;
|
|
|
|
/* Is it possible to merge this write with the preceding read? */
|
|
lastEvt = &cgs->events[cgs->events_used-1];
|
|
if (cgs->events_used > 0
|
|
&& lastEvt->tag == Ev_Dr
|
|
&& lastEvt->Ev.Dr.szB == datasize
|
|
&& lastEvt->inode == inode
|
|
&& eqIRAtom(lastEvt->Ev.Dr.ea, ea))
|
|
{
|
|
lastEvt->tag = Ev_Dm;
|
|
return;
|
|
}
|
|
|
|
/* No. Add as normal. */
|
|
if (cgs->events_used == N_EVENTS)
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
|
|
evt = &cgs->events[cgs->events_used];
|
|
init_Event(evt);
|
|
evt->tag = Ev_Dw;
|
|
evt->inode = inode;
|
|
evt->Ev.Dw.szB = datasize;
|
|
evt->Ev.Dw.ea = ea;
|
|
cgs->events_used++;
|
|
}
|
|
|
|
static
|
|
void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
|
|
Int datasize, IRAtom* ea, IRAtom* guard,
|
|
Bool isWrite )
|
|
{
|
|
tl_assert(isIRAtom(ea));
|
|
tl_assert(guard);
|
|
tl_assert(isIRAtom(guard));
|
|
tl_assert(datasize >= 1 && datasize <= min_line_size);
|
|
|
|
if (!clo_cache_sim)
|
|
return;
|
|
|
|
/* Adding guarded memory actions and merging them with the existing
|
|
queue is too complex. Simply flush the queue and add this
|
|
action immediately. Since guarded loads and stores are pretty
|
|
rare, this is not thought likely to cause any noticeable
|
|
performance loss as a result of the loss of event-merging
|
|
opportunities. */
|
|
tl_assert(cgs->events_used >= 0);
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used == 0);
|
|
/* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
|
|
IRExpr* i_node_expr;
|
|
const HChar* helperName;
|
|
void* helperAddr;
|
|
IRExpr** argv;
|
|
Int regparms;
|
|
IRDirty* di;
|
|
i_node_expr = mkIRExpr_HWord( (HWord)inode );
|
|
helperName = isWrite ? "log_0Ir_1Dw_cache_access"
|
|
: "log_0Ir_1Dr_cache_access";
|
|
helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
|
|
: &log_0Ir_1Dr_cache_access;
|
|
argv = mkIRExprVec_3( i_node_expr,
|
|
ea, mkIRExpr_HWord( datasize ) );
|
|
regparms = 3;
|
|
di = unsafeIRDirty_0_N(
|
|
regparms,
|
|
helperName, VG_(fnptr_to_fnentry)( helperAddr ),
|
|
argv );
|
|
di->guard = guard;
|
|
addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
|
|
}
|
|
|
|
|
|
static
|
|
void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
|
|
{
|
|
Event* evt;
|
|
tl_assert(isIRAtom(guard));
|
|
tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
|
|
== (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
|
|
if (!clo_branch_sim)
|
|
return;
|
|
if (cgs->events_used == N_EVENTS)
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
|
|
evt = &cgs->events[cgs->events_used];
|
|
init_Event(evt);
|
|
evt->tag = Ev_Bc;
|
|
evt->inode = inode;
|
|
evt->Ev.Bc.taken = guard;
|
|
cgs->events_used++;
|
|
}
|
|
|
|
static
|
|
void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
|
|
{
|
|
Event* evt;
|
|
tl_assert(isIRAtom(whereTo));
|
|
tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
|
|
== (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
|
|
if (!clo_branch_sim)
|
|
return;
|
|
if (cgs->events_used == N_EVENTS)
|
|
flushEvents(cgs);
|
|
tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
|
|
evt = &cgs->events[cgs->events_used];
|
|
init_Event(evt);
|
|
evt->tag = Ev_Bi;
|
|
evt->inode = inode;
|
|
evt->Ev.Bi.dst = whereTo;
|
|
cgs->events_used++;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
|
|
|
|
static
|
|
IRSB* cg_instrument ( VgCallbackClosure* closure,
|
|
IRSB* sbIn,
|
|
const VexGuestLayout* layout,
|
|
const VexGuestExtents* vge,
|
|
const VexArchInfo* archinfo_host,
|
|
IRType gWordTy, IRType hWordTy )
|
|
{
|
|
Int i;
|
|
UInt isize;
|
|
IRStmt* st;
|
|
Addr cia; /* address of current insn */
|
|
CgState cgs;
|
|
IRTypeEnv* tyenv = sbIn->tyenv;
|
|
InstrInfo* curr_inode = NULL;
|
|
|
|
if (gWordTy != hWordTy) {
|
|
/* We don't currently support this case. */
|
|
VG_(tool_panic)("host/guest word size mismatch");
|
|
}
|
|
|
|
// Set up new SB
|
|
cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
|
|
|
|
// Copy verbatim any IR preamble preceding the first IMark
|
|
i = 0;
|
|
while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
|
|
addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
|
|
i++;
|
|
}
|
|
|
|
// Get the first statement, and initial cia from it
|
|
tl_assert(sbIn->stmts_used > 0);
|
|
tl_assert(i < sbIn->stmts_used);
|
|
st = sbIn->stmts[i];
|
|
tl_assert(Ist_IMark == st->tag);
|
|
|
|
cia = st->Ist.IMark.addr;
|
|
isize = st->Ist.IMark.len;
|
|
// If Vex fails to decode an instruction, the size will be zero.
|
|
// Pretend otherwise.
|
|
if (isize == 0) isize = VG_MIN_INSTR_SZB;
|
|
|
|
// Set up running state and get block info
|
|
tl_assert(closure->readdr == vge->base[0]);
|
|
cgs.events_used = 0;
|
|
cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
|
|
cgs.sbInfo_i = 0;
|
|
|
|
if (DEBUG_CG)
|
|
VG_(printf)("\n\n---------- cg_instrument ----------\n");
|
|
|
|
// Traverse the block, initialising inodes, adding events and flushing as
|
|
// necessary.
|
|
for (/*use current i*/; i < sbIn->stmts_used; i++) {
|
|
|
|
st = sbIn->stmts[i];
|
|
tl_assert(isFlatIRStmt(st));
|
|
|
|
switch (st->tag) {
|
|
case Ist_NoOp:
|
|
case Ist_AbiHint:
|
|
case Ist_Put:
|
|
case Ist_PutI:
|
|
case Ist_MBE:
|
|
break;
|
|
|
|
case Ist_IMark:
|
|
cia = st->Ist.IMark.addr;
|
|
isize = st->Ist.IMark.len;
|
|
|
|
// If Vex fails to decode an instruction, the size will be zero.
|
|
// Pretend otherwise.
|
|
if (isize == 0) isize = VG_MIN_INSTR_SZB;
|
|
|
|
// Sanity-check size.
|
|
tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
|
|
|| VG_CLREQ_SZB == isize );
|
|
|
|
// Get space for and init the inode, record it as the current one.
|
|
// Subsequent Dr/Dw/Dm events from the same instruction will
|
|
// also use it.
|
|
curr_inode = setup_InstrInfo(&cgs, cia, isize);
|
|
|
|
addEvent_Ir( &cgs, curr_inode );
|
|
break;
|
|
|
|
case Ist_WrTmp: {
|
|
IRExpr* data = st->Ist.WrTmp.data;
|
|
if (data->tag == Iex_Load) {
|
|
IRExpr* aexpr = data->Iex.Load.addr;
|
|
// Note also, endianness info is ignored. I guess
|
|
// that's not interesting.
|
|
addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
|
|
aexpr );
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_Store: {
|
|
IRExpr* data = st->Ist.Store.data;
|
|
IRExpr* aexpr = st->Ist.Store.addr;
|
|
addEvent_Dw( &cgs, curr_inode,
|
|
sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
|
|
break;
|
|
}
|
|
|
|
case Ist_StoreG: {
|
|
IRStoreG* sg = st->Ist.StoreG.details;
|
|
IRExpr* data = sg->data;
|
|
IRExpr* addr = sg->addr;
|
|
IRType type = typeOfIRExpr(tyenv, data);
|
|
tl_assert(type != Ity_INVALID);
|
|
addEvent_D_guarded( &cgs, curr_inode,
|
|
sizeofIRType(type), addr, sg->guard,
|
|
True/*isWrite*/ );
|
|
break;
|
|
}
|
|
|
|
case Ist_LoadG: {
|
|
IRLoadG* lg = st->Ist.LoadG.details;
|
|
IRType type = Ity_INVALID; /* loaded type */
|
|
IRType typeWide = Ity_INVALID; /* after implicit widening */
|
|
IRExpr* addr = lg->addr;
|
|
typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
|
|
tl_assert(type != Ity_INVALID);
|
|
addEvent_D_guarded( &cgs, curr_inode,
|
|
sizeofIRType(type), addr, lg->guard,
|
|
False/*!isWrite*/ );
|
|
break;
|
|
}
|
|
|
|
case Ist_Dirty: {
|
|
Int dataSize;
|
|
IRDirty* d = st->Ist.Dirty.details;
|
|
if (d->mFx != Ifx_None) {
|
|
/* This dirty helper accesses memory. Collect the details. */
|
|
tl_assert(d->mAddr != NULL);
|
|
tl_assert(d->mSize != 0);
|
|
dataSize = d->mSize;
|
|
// Large (eg. 28B, 108B, 512B on x86) data-sized
|
|
// instructions will be done inaccurately, but they're
|
|
// very rare and this avoids errors from hitting more
|
|
// than two cache lines in the simulation.
|
|
if (dataSize > min_line_size)
|
|
dataSize = min_line_size;
|
|
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
|
|
addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
|
|
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
|
|
addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
|
|
} else {
|
|
tl_assert(d->mAddr == NULL);
|
|
tl_assert(d->mSize == 0);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_CAS: {
|
|
/* We treat it as a read and a write of the location. I
|
|
think that is the same behaviour as it was before IRCAS
|
|
was introduced, since prior to that point, the Vex
|
|
front ends would translate a lock-prefixed instruction
|
|
into a (normal) read followed by a (normal) write. */
|
|
Int dataSize;
|
|
IRCAS* cas = st->Ist.CAS.details;
|
|
tl_assert(cas->addr != NULL);
|
|
tl_assert(cas->dataLo != NULL);
|
|
dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
|
|
if (cas->dataHi != NULL)
|
|
dataSize *= 2; /* since it's a doubleword-CAS */
|
|
/* I don't think this can ever happen, but play safe. */
|
|
if (dataSize > min_line_size)
|
|
dataSize = min_line_size;
|
|
addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
|
|
addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
|
|
break;
|
|
}
|
|
|
|
case Ist_LLSC: {
|
|
IRType dataTy;
|
|
if (st->Ist.LLSC.storedata == NULL) {
|
|
/* LL */
|
|
dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
|
|
addEvent_Dr( &cgs, curr_inode,
|
|
sizeofIRType(dataTy), st->Ist.LLSC.addr );
|
|
/* flush events before LL, should help SC to succeed */
|
|
flushEvents( &cgs );
|
|
} else {
|
|
/* SC */
|
|
dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
|
|
addEvent_Dw( &cgs, curr_inode,
|
|
sizeofIRType(dataTy), st->Ist.LLSC.addr );
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_Exit: {
|
|
// call branch predictor only if this is a branch in guest code
|
|
if ( (st->Ist.Exit.jk == Ijk_Boring) ||
|
|
(st->Ist.Exit.jk == Ijk_Call) ||
|
|
(st->Ist.Exit.jk == Ijk_Ret) )
|
|
{
|
|
/* Stuff to widen the guard expression to a host word, so
|
|
we can pass it to the branch predictor simulation
|
|
functions easily. */
|
|
Bool inverted;
|
|
Addr nia, sea;
|
|
IRConst* dst;
|
|
IRType tyW = hWordTy;
|
|
IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
|
|
IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
|
|
IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
|
|
IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
|
|
IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
|
|
IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
|
|
: IRExpr_Const(IRConst_U64(1));
|
|
|
|
/* First we need to figure out whether the side exit got
|
|
inverted by the ir optimiser. To do that, figure out
|
|
the next (fallthrough) instruction's address and the
|
|
side exit address and see if they are the same. */
|
|
nia = cia + isize;
|
|
|
|
/* Side exit address */
|
|
dst = st->Ist.Exit.dst;
|
|
if (tyW == Ity_I32) {
|
|
tl_assert(dst->tag == Ico_U32);
|
|
sea = dst->Ico.U32;
|
|
} else {
|
|
tl_assert(tyW == Ity_I64);
|
|
tl_assert(dst->tag == Ico_U64);
|
|
sea = dst->Ico.U64;
|
|
}
|
|
|
|
inverted = nia == sea;
|
|
|
|
/* Widen the guard expression. */
|
|
addStmtToIRSB( cgs.sbOut,
|
|
IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
|
|
addStmtToIRSB( cgs.sbOut,
|
|
IRStmt_WrTmp( guardW,
|
|
IRExpr_Unop(widen,
|
|
IRExpr_RdTmp(guard1))) );
|
|
/* If the exit is inverted, invert the sense of the guard. */
|
|
addStmtToIRSB(
|
|
cgs.sbOut,
|
|
IRStmt_WrTmp(
|
|
guard,
|
|
inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
|
|
: IRExpr_RdTmp(guardW)
|
|
));
|
|
/* And post the event. */
|
|
addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
|
|
}
|
|
|
|
/* We may never reach the next statement, so need to flush
|
|
all outstanding transactions now. */
|
|
flushEvents( &cgs );
|
|
break;
|
|
}
|
|
|
|
default:
|
|
ppIRStmt(st);
|
|
tl_assert(0);
|
|
break;
|
|
}
|
|
|
|
/* Copy the original statement */
|
|
addStmtToIRSB( cgs.sbOut, st );
|
|
|
|
if (DEBUG_CG) {
|
|
ppIRStmt(st);
|
|
VG_(printf)("\n");
|
|
}
|
|
}
|
|
|
|
/* Deal with branches to unknown destinations. Except ignore ones
|
|
which are function returns as we assume the return stack
|
|
predictor never mispredicts. */
|
|
if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
|
|
if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
|
|
switch (sbIn->next->tag) {
|
|
case Iex_Const:
|
|
break; /* boring - branch to known address */
|
|
case Iex_RdTmp:
|
|
/* looks like an indirect branch (branch to unknown) */
|
|
addEvent_Bi( &cgs, curr_inode, sbIn->next );
|
|
break;
|
|
default:
|
|
/* shouldn't happen - if the incoming IR is properly
|
|
flattened, should only have tmp and const cases to
|
|
consider. */
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
/* At the end of the bb. Flush outstandings. */
|
|
flushEvents( &cgs );
|
|
|
|
/* done. stay sane ... */
|
|
tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
|
|
|
|
if (DEBUG_CG) {
|
|
VG_(printf)( "goto {");
|
|
ppIRJumpKind(sbIn->jumpkind);
|
|
VG_(printf)( "} ");
|
|
ppIRExpr( sbIn->next );
|
|
VG_(printf)( "}\n");
|
|
}
|
|
|
|
return cgs.sbOut;
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cache configuration ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
static cache_t clo_I1_cache = UNDEFINED_CACHE;
|
|
static cache_t clo_D1_cache = UNDEFINED_CACHE;
|
|
static cache_t clo_LL_cache = UNDEFINED_CACHE;
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- cg_fini() and related function ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
// Total reads/writes/misses. Calculated during CC traversal at the end.
|
|
// All auto-zeroed.
|
|
static CacheCC Ir_total;
|
|
static CacheCC Dr_total;
|
|
static CacheCC Dw_total;
|
|
static BranchCC Bc_total;
|
|
static BranchCC Bi_total;
|
|
|
|
static void fprint_CC_table_and_calc_totals(void)
|
|
{
|
|
Int i;
|
|
VgFile *fp;
|
|
HChar *currFile = NULL;
|
|
const HChar *currFn = NULL;
|
|
LineCC* lineCC;
|
|
|
|
// Setup output filename. Nb: it's important to do this now, ie. as late
|
|
// as possible. If we do it at start-up and the program forks and the
|
|
// output file format string contains a %p (pid) specifier, both the
|
|
// parent and child will incorrectly write to the same file; this
|
|
// happened in 3.3.0.
|
|
HChar* cachegrind_out_file =
|
|
VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
|
|
|
|
fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
|
|
VKI_S_IRUSR|VKI_S_IWUSR);
|
|
if (fp == NULL) {
|
|
// If the file can't be opened for whatever reason (conflict
|
|
// between multiple cachegrinded processes?), give up now.
|
|
VG_(umsg)("error: can't open cache simulation output file '%s'\n",
|
|
cachegrind_out_file );
|
|
VG_(umsg)(" ... so simulation results will be missing.\n");
|
|
VG_(free)(cachegrind_out_file);
|
|
return;
|
|
} else {
|
|
VG_(free)(cachegrind_out_file);
|
|
}
|
|
|
|
// "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
|
|
// the 2nd colon makes cg_annotate's output look nicer.
|
|
VG_(fprintf)(fp, "desc: I1 cache: %s\n"
|
|
"desc: D1 cache: %s\n"
|
|
"desc: LL cache: %s\n",
|
|
I1.desc_line, D1.desc_line, LL.desc_line);
|
|
|
|
// "cmd:" line
|
|
VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
|
|
for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
|
|
HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
|
|
VG_(fprintf)(fp, " %s", arg);
|
|
}
|
|
// "events:" line
|
|
if (clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
|
|
"Bc Bcm Bi Bim\n");
|
|
}
|
|
else if (clo_cache_sim && !clo_branch_sim) {
|
|
VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
|
|
"\n");
|
|
}
|
|
else if (!clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
|
|
}
|
|
else {
|
|
VG_(fprintf)(fp, "\nevents: Ir\n");
|
|
}
|
|
|
|
// Traverse every lineCC
|
|
VG_(OSetGen_ResetIter)(CC_table);
|
|
while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
|
|
Bool just_hit_a_new_file = False;
|
|
// If we've hit a new file, print a "fl=" line. Note that because
|
|
// each string is stored exactly once in the string table, we can use
|
|
// pointer comparison rather than strcmp() to test for equality, which
|
|
// is good because most of the time the comparisons are equal and so
|
|
// the whole strings would have to be checked.
|
|
if ( lineCC->loc.file != currFile ) {
|
|
currFile = lineCC->loc.file;
|
|
VG_(fprintf)(fp, "fl=%s\n", currFile);
|
|
distinct_files++;
|
|
just_hit_a_new_file = True;
|
|
}
|
|
// If we've hit a new function, print a "fn=" line. We know to do
|
|
// this when the function name changes, and also every time we hit a
|
|
// new file (in which case the new function name might be the same as
|
|
// in the old file, hence the just_hit_a_new_file test).
|
|
if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
|
|
currFn = lineCC->loc.fn;
|
|
VG_(fprintf)(fp, "fn=%s\n", currFn);
|
|
distinct_fns++;
|
|
}
|
|
|
|
// Print the LineCC
|
|
if (clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "%d %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu %llu\n",
|
|
lineCC->loc.line,
|
|
lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
|
|
lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
|
|
lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
|
|
lineCC->Bc.b, lineCC->Bc.mp,
|
|
lineCC->Bi.b, lineCC->Bi.mp);
|
|
}
|
|
else if (clo_cache_sim && !clo_branch_sim) {
|
|
VG_(fprintf)(fp, "%d %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu\n",
|
|
lineCC->loc.line,
|
|
lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
|
|
lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
|
|
lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
|
|
}
|
|
else if (!clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "%d %llu"
|
|
" %llu %llu %llu %llu\n",
|
|
lineCC->loc.line,
|
|
lineCC->Ir.a,
|
|
lineCC->Bc.b, lineCC->Bc.mp,
|
|
lineCC->Bi.b, lineCC->Bi.mp);
|
|
}
|
|
else {
|
|
VG_(fprintf)(fp, "%d %llu\n",
|
|
lineCC->loc.line,
|
|
lineCC->Ir.a);
|
|
}
|
|
|
|
// Update summary stats
|
|
Ir_total.a += lineCC->Ir.a;
|
|
Ir_total.m1 += lineCC->Ir.m1;
|
|
Ir_total.mL += lineCC->Ir.mL;
|
|
Dr_total.a += lineCC->Dr.a;
|
|
Dr_total.m1 += lineCC->Dr.m1;
|
|
Dr_total.mL += lineCC->Dr.mL;
|
|
Dw_total.a += lineCC->Dw.a;
|
|
Dw_total.m1 += lineCC->Dw.m1;
|
|
Dw_total.mL += lineCC->Dw.mL;
|
|
Bc_total.b += lineCC->Bc.b;
|
|
Bc_total.mp += lineCC->Bc.mp;
|
|
Bi_total.b += lineCC->Bi.b;
|
|
Bi_total.mp += lineCC->Bi.mp;
|
|
|
|
distinct_lines++;
|
|
}
|
|
|
|
// Summary stats must come after rest of table, since we calculate them
|
|
// during traversal. */
|
|
if (clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "summary:"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu %llu\n",
|
|
Ir_total.a, Ir_total.m1, Ir_total.mL,
|
|
Dr_total.a, Dr_total.m1, Dr_total.mL,
|
|
Dw_total.a, Dw_total.m1, Dw_total.mL,
|
|
Bc_total.b, Bc_total.mp,
|
|
Bi_total.b, Bi_total.mp);
|
|
}
|
|
else if (clo_cache_sim && !clo_branch_sim) {
|
|
VG_(fprintf)(fp, "summary:"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu"
|
|
" %llu %llu %llu\n",
|
|
Ir_total.a, Ir_total.m1, Ir_total.mL,
|
|
Dr_total.a, Dr_total.m1, Dr_total.mL,
|
|
Dw_total.a, Dw_total.m1, Dw_total.mL);
|
|
}
|
|
else if (!clo_cache_sim && clo_branch_sim) {
|
|
VG_(fprintf)(fp, "summary:"
|
|
" %llu"
|
|
" %llu %llu %llu %llu\n",
|
|
Ir_total.a,
|
|
Bc_total.b, Bc_total.mp,
|
|
Bi_total.b, Bi_total.mp);
|
|
}
|
|
else {
|
|
VG_(fprintf)(fp, "summary:"
|
|
" %llu\n",
|
|
Ir_total.a);
|
|
}
|
|
|
|
VG_(fclose)(fp);
|
|
}
|
|
|
|
static UInt ULong_width(ULong n)
|
|
{
|
|
UInt w = 0;
|
|
while (n > 0) {
|
|
n = n / 10;
|
|
w++;
|
|
}
|
|
if (w == 0) w = 1;
|
|
return w + (w-1)/3; // add space for commas
|
|
}
|
|
|
|
static void cg_fini(Int exitcode)
|
|
{
|
|
static HChar fmt[128]; // OK; large enough
|
|
|
|
CacheCC D_total;
|
|
BranchCC B_total;
|
|
ULong LL_total_m, LL_total_mr, LL_total_mw,
|
|
LL_total, LL_total_r, LL_total_w;
|
|
Int l1, l2, l3;
|
|
|
|
fprint_CC_table_and_calc_totals();
|
|
|
|
if (VG_(clo_verbosity) == 0)
|
|
return;
|
|
|
|
// Nb: this isn't called "MAX" because that overshadows a global on Darwin.
|
|
#define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
|
|
|
|
/* I cache results. Use the I_refs value to determine the first column
|
|
* width. */
|
|
l1 = ULong_width(Ir_total.a);
|
|
l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
|
|
l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
|
|
|
|
/* Make format string, getting width right for numbers */
|
|
VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
|
|
|
|
/* Always print this */
|
|
VG_(umsg)(fmt, "I refs: ", Ir_total.a);
|
|
|
|
/* If cache profiling is enabled, show D access numbers and all
|
|
miss numbers */
|
|
if (clo_cache_sim) {
|
|
VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
|
|
VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
|
|
|
|
if (0 == Ir_total.a) Ir_total.a = 1;
|
|
VG_(umsg)("I1 miss rate: %*.2f%%\n", l1,
|
|
Ir_total.m1 * 100.0 / Ir_total.a);
|
|
VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
|
|
Ir_total.mL * 100.0 / Ir_total.a);
|
|
VG_(umsg)("\n");
|
|
|
|
/* D cache results. Use the D_refs.rd and D_refs.wr values to
|
|
* determine the width of columns 2 & 3. */
|
|
D_total.a = Dr_total.a + Dw_total.a;
|
|
D_total.m1 = Dr_total.m1 + Dw_total.m1;
|
|
D_total.mL = Dr_total.mL + Dw_total.mL;
|
|
|
|
/* Make format string, getting width right for numbers */
|
|
VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
|
|
l1, l2, l3);
|
|
|
|
VG_(umsg)(fmt, "D refs: ",
|
|
D_total.a, Dr_total.a, Dw_total.a);
|
|
VG_(umsg)(fmt, "D1 misses: ",
|
|
D_total.m1, Dr_total.m1, Dw_total.m1);
|
|
VG_(umsg)(fmt, "LLd misses: ",
|
|
D_total.mL, Dr_total.mL, Dw_total.mL);
|
|
|
|
if (0 == D_total.a) D_total.a = 1;
|
|
if (0 == Dr_total.a) Dr_total.a = 1;
|
|
if (0 == Dw_total.a) Dw_total.a = 1;
|
|
VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
|
|
l1, D_total.m1 * 100.0 / D_total.a,
|
|
l2, Dr_total.m1 * 100.0 / Dr_total.a,
|
|
l3, Dw_total.m1 * 100.0 / Dw_total.a);
|
|
VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
|
|
l1, D_total.mL * 100.0 / D_total.a,
|
|
l2, Dr_total.mL * 100.0 / Dr_total.a,
|
|
l3, Dw_total.mL * 100.0 / Dw_total.a);
|
|
VG_(umsg)("\n");
|
|
|
|
/* LL overall results */
|
|
|
|
LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
|
|
LL_total_r = Dr_total.m1 + Ir_total.m1;
|
|
LL_total_w = Dw_total.m1;
|
|
VG_(umsg)(fmt, "LL refs: ",
|
|
LL_total, LL_total_r, LL_total_w);
|
|
|
|
LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
|
|
LL_total_mr = Dr_total.mL + Ir_total.mL;
|
|
LL_total_mw = Dw_total.mL;
|
|
VG_(umsg)(fmt, "LL misses: ",
|
|
LL_total_m, LL_total_mr, LL_total_mw);
|
|
|
|
VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
|
|
l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a),
|
|
l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
|
|
l3, LL_total_mw * 100.0 / Dw_total.a);
|
|
}
|
|
|
|
/* If branch profiling is enabled, show branch overall results. */
|
|
if (clo_branch_sim) {
|
|
/* Make format string, getting width right for numbers */
|
|
VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
|
|
l1, l2, l3);
|
|
|
|
if (0 == Bc_total.b) Bc_total.b = 1;
|
|
if (0 == Bi_total.b) Bi_total.b = 1;
|
|
B_total.b = Bc_total.b + Bi_total.b;
|
|
B_total.mp = Bc_total.mp + Bi_total.mp;
|
|
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)(fmt, "Branches: ",
|
|
B_total.b, Bc_total.b, Bi_total.b);
|
|
|
|
VG_(umsg)(fmt, "Mispredicts: ",
|
|
B_total.mp, Bc_total.mp, Bi_total.mp);
|
|
|
|
VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
|
|
l1, B_total.mp * 100.0 / B_total.b,
|
|
l2, Bc_total.mp * 100.0 / Bc_total.b,
|
|
l3, Bi_total.mp * 100.0 / Bi_total.b);
|
|
}
|
|
|
|
// Various stats
|
|
if (VG_(clo_stats)) {
|
|
Int debug_lookups = full_debugs + fn_debugs +
|
|
file_line_debugs + no_debugs;
|
|
|
|
VG_(dmsg)("\n");
|
|
VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
|
|
VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
|
|
VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
|
|
VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
|
|
VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
|
|
VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
|
|
|
|
VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n",
|
|
full_debugs * 100.0 / debug_lookups, full_debugs);
|
|
VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
|
|
file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
|
|
VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n",
|
|
fn_debugs * 100.0 / debug_lookups, fn_debugs);
|
|
VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n",
|
|
no_debugs * 100.0 / debug_lookups, no_debugs);
|
|
|
|
VG_(dmsg)("cachegrind: string table size: %u\n",
|
|
VG_(OSetGen_Size)(stringTable));
|
|
VG_(dmsg)("cachegrind: CC table size: %u\n",
|
|
VG_(OSetGen_Size)(CC_table));
|
|
VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
|
|
VG_(OSetGen_Size)(instrInfoTable));
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Discarding BB info ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
// Called when a translation is removed from the translation cache for
|
|
// any reason at all: to free up space, because the guest code was
|
|
// unmapped or modified, or for any arbitrary reason.
|
|
static
|
|
void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
|
|
{
|
|
SB_info* sbInfo;
|
|
Addr orig_addr = vge.base[0];
|
|
|
|
tl_assert(vge.n_used > 0);
|
|
|
|
if (DEBUG_CG)
|
|
VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
|
|
(void*)orig_addr,
|
|
(void*)vge.base[0], (ULong)vge.len[0]);
|
|
|
|
// Get BB info, remove from table, free BB info. Simple! Note that we
|
|
// use orig_addr, not the first instruction address in vge.
|
|
sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
|
|
tl_assert(NULL != sbInfo);
|
|
VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Command line processing ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
static Bool cg_process_cmd_line_option(const HChar* arg)
|
|
{
|
|
if (VG_(str_clo_cache_opt)(arg,
|
|
&clo_I1_cache,
|
|
&clo_D1_cache,
|
|
&clo_LL_cache)) {}
|
|
|
|
else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
|
|
else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
|
|
else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
|
|
else
|
|
return False;
|
|
|
|
return True;
|
|
}
|
|
|
|
static void cg_print_usage(void)
|
|
{
|
|
VG_(print_cache_clo_opts)();
|
|
VG_(printf)(
|
|
" --cache-sim=yes|no [yes] collect cache stats?\n"
|
|
" --branch-sim=yes|no [no] collect branch prediction stats?\n"
|
|
" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
|
|
);
|
|
}
|
|
|
|
static void cg_print_debug_usage(void)
|
|
{
|
|
VG_(printf)(
|
|
" (none)\n"
|
|
);
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Setup ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
static void cg_post_clo_init(void); /* just below */
|
|
|
|
static void cg_pre_clo_init(void)
|
|
{
|
|
VG_(details_name) ("Cachegrind");
|
|
VG_(details_version) (NULL);
|
|
VG_(details_description) ("a cache and branch-prediction profiler");
|
|
VG_(details_copyright_author)(
|
|
"Copyright (C) 2002-2015, and GNU GPL'd, by Nicholas Nethercote et al.");
|
|
VG_(details_bug_reports_to) (VG_BUGS_TO);
|
|
VG_(details_avg_translation_sizeB) ( 500 );
|
|
|
|
VG_(clo_vex_control).iropt_register_updates_default
|
|
= VG_(clo_px_file_backed)
|
|
= VexRegUpdSpAtMemAccess; // overridable by the user.
|
|
|
|
VG_(basic_tool_funcs) (cg_post_clo_init,
|
|
cg_instrument,
|
|
cg_fini);
|
|
|
|
VG_(needs_superblock_discards)(cg_discard_superblock_info);
|
|
VG_(needs_command_line_options)(cg_process_cmd_line_option,
|
|
cg_print_usage,
|
|
cg_print_debug_usage);
|
|
}
|
|
|
|
static void cg_post_clo_init(void)
|
|
{
|
|
cache_t I1c, D1c, LLc;
|
|
|
|
CC_table =
|
|
VG_(OSetGen_Create)(offsetof(LineCC, loc),
|
|
cmp_CodeLoc_LineCC,
|
|
VG_(malloc), "cg.main.cpci.1",
|
|
VG_(free));
|
|
instrInfoTable =
|
|
VG_(OSetGen_Create)(/*keyOff*/0,
|
|
NULL,
|
|
VG_(malloc), "cg.main.cpci.2",
|
|
VG_(free));
|
|
stringTable =
|
|
VG_(OSetGen_Create)(/*keyOff*/0,
|
|
stringCmp,
|
|
VG_(malloc), "cg.main.cpci.3",
|
|
VG_(free));
|
|
|
|
VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
|
|
&clo_I1_cache,
|
|
&clo_D1_cache,
|
|
&clo_LL_cache);
|
|
|
|
// min_line_size is used to make sure that we never feed
|
|
// accesses to the simulator straddling more than two
|
|
// cache lines at any cache level
|
|
min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
|
|
min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
|
|
|
|
Int largest_load_or_store_size
|
|
= VG_(machine_get_size_of_largest_guest_register)();
|
|
if (min_line_size < largest_load_or_store_size) {
|
|
/* We can't continue, because the cache simulation might
|
|
straddle more than 2 lines, and it will assert. So let's
|
|
just stop before we start. */
|
|
VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
|
|
(Int)min_line_size);
|
|
VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
|
|
largest_load_or_store_size );
|
|
VG_(umsg)(" but it is not. Exiting now.\n");
|
|
VG_(exit)(1);
|
|
}
|
|
|
|
cachesim_initcaches(I1c, D1c, LLc);
|
|
}
|
|
|
|
VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|