mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 00:16:11 +00:00
1403 lines
44 KiB
C
1403 lines
44 KiB
C
|
|
//--------------------------------------------------------------------*/
|
|
//--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---*/
|
|
//--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of DHAT, a Valgrind tool for profiling the
|
|
heap usage of programs.
|
|
|
|
Copyright (C) 2010-2015 Mozilla Inc
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
/* Contributed by Julian Seward <jseward@acm.org> */
|
|
|
|
|
|
#include "pub_tool_basics.h"
|
|
#include "pub_tool_libcbase.h"
|
|
#include "pub_tool_libcassert.h"
|
|
#include "pub_tool_libcprint.h"
|
|
#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
|
|
#include "pub_tool_mallocfree.h"
|
|
#include "pub_tool_options.h"
|
|
#include "pub_tool_replacemalloc.h"
|
|
#include "pub_tool_tooliface.h"
|
|
#include "pub_tool_wordfm.h"
|
|
|
|
#define HISTOGRAM_SIZE_LIMIT 1024
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- Globals ---//
|
|
//------------------------------------------------------------//
|
|
|
|
// Number of guest instructions executed so far. This is
|
|
// incremented directly from the generated code.
|
|
static ULong g_guest_instrs_executed = 0;
|
|
|
|
// Summary statistics for the entire run.
|
|
static ULong g_tot_blocks = 0; // total blocks allocated
|
|
static ULong g_tot_bytes = 0; // total bytes allocated
|
|
|
|
static ULong g_cur_blocks_live = 0; // curr # blocks live
|
|
static ULong g_cur_bytes_live = 0; // curr # bytes live
|
|
|
|
static ULong g_max_blocks_live = 0; // bytes and blocks at
|
|
static ULong g_max_bytes_live = 0; // the max residency point
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- an Interval Tree of live blocks ---//
|
|
//------------------------------------------------------------//
|
|
|
|
/* Tracks information about live blocks. */
|
|
typedef
|
|
struct {
|
|
Addr payload;
|
|
SizeT req_szB;
|
|
ExeContext* ap; /* allocation ec */
|
|
ULong allocd_at; /* instruction number */
|
|
ULong n_reads;
|
|
ULong n_writes;
|
|
/* Approx histogram, one byte per payload byte. Counts latch up
|
|
therefore at 0xFFFF. Can be NULL if the block is resized or if
|
|
the block is larger than HISTOGRAM_SIZE_LIMIT. */
|
|
UShort* histoW; /* [0 .. req_szB-1] */
|
|
}
|
|
Block;
|
|
|
|
/* May not contain zero-sized blocks. May not contain
|
|
overlapping blocks. */
|
|
static WordFM* interval_tree = NULL; /* WordFM* Block* void */
|
|
|
|
/* Here's the comparison function. Since the tree is required
|
|
to contain non-zero sized, non-overlapping blocks, it's good
|
|
enough to consider any overlap as a match. */
|
|
static Word interval_tree_Cmp ( UWord k1, UWord k2 )
|
|
{
|
|
Block* b1 = (Block*)k1;
|
|
Block* b2 = (Block*)k2;
|
|
tl_assert(b1->req_szB > 0);
|
|
tl_assert(b2->req_szB > 0);
|
|
if (b1->payload + b1->req_szB <= b2->payload) return -1;
|
|
if (b2->payload + b2->req_szB <= b1->payload) return 1;
|
|
return 0;
|
|
}
|
|
|
|
// 2-entry cache for find_Block_containing
|
|
static Block* fbc_cache0 = NULL;
|
|
static Block* fbc_cache1 = NULL;
|
|
|
|
static UWord stats__n_fBc_cached = 0;
|
|
static UWord stats__n_fBc_uncached = 0;
|
|
static UWord stats__n_fBc_notfound = 0;
|
|
|
|
static Block* find_Block_containing ( Addr a )
|
|
{
|
|
if (LIKELY(fbc_cache0
|
|
&& fbc_cache0->payload <= a
|
|
&& a < fbc_cache0->payload + fbc_cache0->req_szB)) {
|
|
// found at 0
|
|
stats__n_fBc_cached++;
|
|
return fbc_cache0;
|
|
}
|
|
if (LIKELY(fbc_cache1
|
|
&& fbc_cache1->payload <= a
|
|
&& a < fbc_cache1->payload + fbc_cache1->req_szB)) {
|
|
// found at 1; swap 0 and 1
|
|
Block* tmp = fbc_cache0;
|
|
fbc_cache0 = fbc_cache1;
|
|
fbc_cache1 = tmp;
|
|
stats__n_fBc_cached++;
|
|
return fbc_cache0;
|
|
}
|
|
Block fake;
|
|
fake.payload = a;
|
|
fake.req_szB = 1;
|
|
UWord foundkey = 1;
|
|
UWord foundval = 1;
|
|
Bool found = VG_(lookupFM)( interval_tree,
|
|
&foundkey, &foundval, (UWord)&fake );
|
|
if (!found) {
|
|
stats__n_fBc_notfound++;
|
|
return NULL;
|
|
}
|
|
tl_assert(foundval == 0); // we don't store vals in the interval tree
|
|
tl_assert(foundkey != 1);
|
|
Block* res = (Block*)foundkey;
|
|
tl_assert(res != &fake);
|
|
// put at the top position
|
|
fbc_cache1 = fbc_cache0;
|
|
fbc_cache0 = res;
|
|
stats__n_fBc_uncached++;
|
|
return res;
|
|
}
|
|
|
|
// delete a block; asserts if not found. (viz, 'a' must be
|
|
// known to be present.)
|
|
static void delete_Block_starting_at ( Addr a )
|
|
{
|
|
Block fake;
|
|
fake.payload = a;
|
|
fake.req_szB = 1;
|
|
Bool found = VG_(delFromFM)( interval_tree,
|
|
NULL, NULL, (Addr)&fake );
|
|
tl_assert(found);
|
|
fbc_cache0 = fbc_cache1 = NULL;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- a FM of allocation points (APs) ---//
|
|
//------------------------------------------------------------//
|
|
|
|
typedef
|
|
struct {
|
|
// the allocation point that we're summarising stats for
|
|
ExeContext* ap;
|
|
// used when printing results
|
|
Bool shown;
|
|
// The current number of blocks and bytes live for this AP
|
|
ULong cur_blocks_live;
|
|
ULong cur_bytes_live;
|
|
// The number of blocks and bytes live at the max-liveness
|
|
// point. Note this is a bit subtle. max_blocks_live is not
|
|
// the maximum number of live blocks, but rather the number of
|
|
// blocks live at the point of maximum byte liveness. These are
|
|
// not necessarily the same thing.
|
|
ULong max_blocks_live;
|
|
ULong max_bytes_live;
|
|
// Total number of blocks and bytes allocated by this AP.
|
|
ULong tot_blocks;
|
|
ULong tot_bytes;
|
|
// Sum of death ages for all blocks allocated by this AP,
|
|
// that have subsequently been freed.
|
|
ULong death_ages_sum;
|
|
ULong deaths;
|
|
// Total number of reads and writes in all blocks allocated
|
|
// by this AP.
|
|
ULong n_reads;
|
|
ULong n_writes;
|
|
/* Histogram information. We maintain a histogram aggregated for
|
|
all retiring Blocks allocated by this AP, but only if:
|
|
- this AP has only ever allocated objects of one size
|
|
- that size is <= HISTOGRAM_SIZE_LIMIT
|
|
What we need therefore is a mechanism to see if this AP
|
|
has only ever allocated blocks of one size.
|
|
|
|
3 states:
|
|
Unknown because no retirement yet
|
|
Exactly xsize all retiring blocks are of this size
|
|
Mixed multiple different sizes seen
|
|
*/
|
|
enum { Unknown=999, Exactly, Mixed } xsize_tag;
|
|
SizeT xsize;
|
|
UInt* histo; /* [0 .. xsize-1] */
|
|
}
|
|
APInfo;
|
|
|
|
/* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
|
|
.ap field in the values. */
|
|
static WordFM* apinfo = NULL; /* WordFM* ExeContext* APInfo* */
|
|
|
|
|
|
/* 'bk' is being introduced (has just been allocated). Find the
|
|
relevant APInfo entry for it, or create one, based on the block's
|
|
allocation EC. Then, update the APInfo to the extent that we
|
|
actually can, to reflect the allocation. */
|
|
static void intro_Block ( Block* bk )
|
|
{
|
|
tl_assert(bk);
|
|
tl_assert(bk->ap);
|
|
|
|
APInfo* api = NULL;
|
|
UWord keyW = 0;
|
|
UWord valW = 0;
|
|
Bool found = VG_(lookupFM)( apinfo,
|
|
&keyW, &valW, (UWord)bk->ap );
|
|
if (found) {
|
|
api = (APInfo*)valW;
|
|
tl_assert(keyW == (UWord)bk->ap);
|
|
} else {
|
|
api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
|
|
VG_(memset)(api, 0, sizeof(*api));
|
|
api->ap = bk->ap;
|
|
Bool present = VG_(addToFM)( apinfo,
|
|
(UWord)bk->ap, (UWord)api );
|
|
tl_assert(!present);
|
|
// histo stuff
|
|
tl_assert(api->deaths == 0);
|
|
api->xsize_tag = Unknown;
|
|
api->xsize = 0;
|
|
if (0) VG_(printf)("api %p --> Unknown\n", api);
|
|
}
|
|
|
|
tl_assert(api->ap == bk->ap);
|
|
|
|
/* So: update stats to reflect an allocation */
|
|
|
|
// # live blocks
|
|
api->cur_blocks_live++;
|
|
|
|
// # live bytes
|
|
api->cur_bytes_live += bk->req_szB;
|
|
if (api->cur_bytes_live > api->max_bytes_live) {
|
|
api->max_bytes_live = api->cur_bytes_live;
|
|
api->max_blocks_live = api->cur_blocks_live;
|
|
}
|
|
|
|
// total blocks and bytes allocated here
|
|
api->tot_blocks++;
|
|
api->tot_bytes += bk->req_szB;
|
|
|
|
// update summary globals
|
|
g_tot_blocks++;
|
|
g_tot_bytes += bk->req_szB;
|
|
|
|
g_cur_blocks_live++;
|
|
g_cur_bytes_live += bk->req_szB;
|
|
if (g_cur_bytes_live > g_max_bytes_live) {
|
|
g_max_bytes_live = g_cur_bytes_live;
|
|
g_max_blocks_live = g_cur_blocks_live;
|
|
}
|
|
}
|
|
|
|
|
|
/* 'bk' is retiring (being freed). Find the relevant APInfo entry for
|
|
it, which must already exist. Then, fold info from 'bk' into that
|
|
entry. 'because_freed' is True if the block is retiring because
|
|
the client has freed it. If it is False then the block is retiring
|
|
because the program has finished, in which case we want to skip the
|
|
updates of the total blocks live etc for this AP, but still fold in
|
|
the access counts and histo data that have so far accumulated for
|
|
the block. */
|
|
static void retire_Block ( Block* bk, Bool because_freed )
|
|
{
|
|
tl_assert(bk);
|
|
tl_assert(bk->ap);
|
|
|
|
APInfo* api = NULL;
|
|
UWord keyW = 0;
|
|
UWord valW = 0;
|
|
Bool found = VG_(lookupFM)( apinfo,
|
|
&keyW, &valW, (UWord)bk->ap );
|
|
|
|
tl_assert(found);
|
|
api = (APInfo*)valW;
|
|
tl_assert(api->ap == bk->ap);
|
|
|
|
// update stats following this free.
|
|
if (0)
|
|
VG_(printf)("ec %p api->c_by_l %llu bk->rszB %llu\n",
|
|
bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
|
|
|
|
// update total blocks live etc for this AP
|
|
if (because_freed) {
|
|
tl_assert(api->cur_blocks_live >= 1);
|
|
tl_assert(api->cur_bytes_live >= bk->req_szB);
|
|
api->cur_blocks_live--;
|
|
api->cur_bytes_live -= bk->req_szB;
|
|
|
|
api->deaths++;
|
|
|
|
tl_assert(bk->allocd_at <= g_guest_instrs_executed);
|
|
api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
|
|
|
|
// update global summary stats
|
|
tl_assert(g_cur_blocks_live > 0);
|
|
g_cur_blocks_live--;
|
|
tl_assert(g_cur_bytes_live >= bk->req_szB);
|
|
g_cur_bytes_live -= bk->req_szB;
|
|
}
|
|
|
|
// access counts
|
|
api->n_reads += bk->n_reads;
|
|
api->n_writes += bk->n_writes;
|
|
|
|
// histo stuff. First, do state transitions for xsize/xsize_tag.
|
|
switch (api->xsize_tag) {
|
|
|
|
case Unknown:
|
|
tl_assert(api->xsize == 0);
|
|
tl_assert(api->deaths == 1 || api->deaths == 0);
|
|
tl_assert(!api->histo);
|
|
api->xsize_tag = Exactly;
|
|
api->xsize = bk->req_szB;
|
|
if (0) VG_(printf)("api %p --> Exactly(%lu)\n", api, api->xsize);
|
|
// and allocate the histo
|
|
if (bk->histoW) {
|
|
api->histo = VG_(malloc)("dh.main.retire_Block.1",
|
|
api->xsize * sizeof(UInt));
|
|
VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
|
|
}
|
|
break;
|
|
|
|
case Exactly:
|
|
//tl_assert(api->deaths > 1);
|
|
if (bk->req_szB != api->xsize) {
|
|
if (0) VG_(printf)("api %p --> Mixed(%lu -> %lu)\n",
|
|
api, api->xsize, bk->req_szB);
|
|
api->xsize_tag = Mixed;
|
|
api->xsize = 0;
|
|
// deallocate the histo, if any
|
|
if (api->histo) {
|
|
VG_(free)(api->histo);
|
|
api->histo = NULL;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case Mixed:
|
|
//tl_assert(api->deaths > 1);
|
|
break;
|
|
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
|
|
// See if we can fold the histo data from this block into
|
|
// the data for the AP
|
|
if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
|
|
tl_assert(api->xsize == bk->req_szB);
|
|
UWord i;
|
|
for (i = 0; i < api->xsize; i++) {
|
|
// FIXME: do something better in case of overflow of api->histo[..]
|
|
// Right now, at least don't let it overflow/wrap around
|
|
if (api->histo[i] <= 0xFFFE0000)
|
|
api->histo[i] += (UInt)bk->histoW[i];
|
|
}
|
|
if (0) VG_(printf)("fold in, AP = %p\n", api);
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
if (bk->histoB) {
|
|
VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
|
|
UWord i;
|
|
for (i = 0; i < bk->req_szB; i++)
|
|
VG_(printf)("%u ", (UInt)bk->histoB[i]);
|
|
VG_(printf)("\n");
|
|
} else {
|
|
VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* This handles block resizing. When a block with AP 'ec' has a
|
|
size change of 'delta', call here to update the APInfo. */
|
|
static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
|
|
{
|
|
APInfo* api = NULL;
|
|
UWord keyW = 0;
|
|
UWord valW = 0;
|
|
Bool found = VG_(lookupFM)( apinfo,
|
|
&keyW, &valW, (UWord)ec );
|
|
|
|
tl_assert(found);
|
|
api = (APInfo*)valW;
|
|
tl_assert(api->ap == ec);
|
|
|
|
if (delta < 0) {
|
|
tl_assert(api->cur_bytes_live >= -delta);
|
|
tl_assert(g_cur_bytes_live >= -delta);
|
|
}
|
|
|
|
// adjust current live size
|
|
api->cur_bytes_live += delta;
|
|
g_cur_bytes_live += delta;
|
|
|
|
if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
|
|
api->max_bytes_live = api->cur_bytes_live;
|
|
api->max_blocks_live = api->cur_blocks_live;
|
|
}
|
|
|
|
// update global summary stats
|
|
if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
|
|
g_max_bytes_live = g_cur_bytes_live;
|
|
g_max_blocks_live = g_cur_blocks_live;
|
|
}
|
|
if (delta > 0)
|
|
g_tot_bytes += delta;
|
|
|
|
// adjust total allocation size
|
|
if (delta > 0)
|
|
api->tot_bytes += delta;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- update both Block and APInfos after {m,re}alloc/free ---//
|
|
//------------------------------------------------------------//
|
|
|
|
static
|
|
void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
|
|
Bool is_zeroed )
|
|
{
|
|
tl_assert(p == NULL); // don't handle custom allocators right now
|
|
SizeT actual_szB /*, slop_szB*/;
|
|
|
|
if ((SSizeT)req_szB < 0) return NULL;
|
|
|
|
if (req_szB == 0)
|
|
req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
|
|
|
|
// Allocate and zero if necessary
|
|
if (!p) {
|
|
p = VG_(cli_malloc)( req_alignB, req_szB );
|
|
if (!p) {
|
|
return NULL;
|
|
}
|
|
if (is_zeroed) VG_(memset)(p, 0, req_szB);
|
|
actual_szB = VG_(cli_malloc_usable_size)(p);
|
|
tl_assert(actual_szB >= req_szB);
|
|
/* slop_szB = actual_szB - req_szB; */
|
|
} else {
|
|
/* slop_szB = 0; */
|
|
}
|
|
|
|
// Make new HP_Chunk node, add to malloc_list
|
|
Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
|
|
bk->payload = (Addr)p;
|
|
bk->req_szB = req_szB;
|
|
bk->ap = VG_(record_ExeContext)(tid, 0/*first word delta*/);
|
|
bk->allocd_at = g_guest_instrs_executed;
|
|
bk->n_reads = 0;
|
|
bk->n_writes = 0;
|
|
// set up histogram array, if the block isn't too large
|
|
bk->histoW = NULL;
|
|
if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
|
|
bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
|
|
VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
|
|
}
|
|
|
|
Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
|
|
tl_assert(!present);
|
|
fbc_cache0 = fbc_cache1 = NULL;
|
|
|
|
intro_Block(bk);
|
|
|
|
if (0) VG_(printf)("ALLOC %lu -> %p\n", req_szB, p);
|
|
|
|
return p;
|
|
}
|
|
|
|
static
|
|
void die_block ( void* p, Bool custom_free )
|
|
{
|
|
tl_assert(!custom_free); // at least for now
|
|
|
|
Block* bk = find_Block_containing( (Addr)p );
|
|
|
|
if (!bk) {
|
|
return; // bogus free
|
|
}
|
|
|
|
tl_assert(bk->req_szB > 0);
|
|
// assert the block finder is behaving sanely
|
|
tl_assert(bk->payload <= (Addr)p);
|
|
tl_assert( (Addr)p < bk->payload + bk->req_szB );
|
|
|
|
if (bk->payload != (Addr)p) {
|
|
return; // bogus free
|
|
}
|
|
|
|
if (0) VG_(printf)(" FREE %p %llu\n",
|
|
p, g_guest_instrs_executed - bk->allocd_at);
|
|
|
|
retire_Block(bk, True/*because_freed*/);
|
|
|
|
VG_(cli_free)( (void*)bk->payload );
|
|
delete_Block_starting_at( bk->payload );
|
|
if (bk->histoW) {
|
|
VG_(free)( bk->histoW );
|
|
bk->histoW = NULL;
|
|
}
|
|
VG_(free)( bk );
|
|
}
|
|
|
|
|
|
static
|
|
void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
|
|
{
|
|
if (0) VG_(printf)("REALL %p %lu\n", p_old, new_req_szB);
|
|
void* p_new = NULL;
|
|
|
|
tl_assert(new_req_szB > 0); // map 0 to 1
|
|
|
|
// Find the old block.
|
|
Block* bk = find_Block_containing( (Addr)p_old );
|
|
if (!bk) {
|
|
return NULL; // bogus realloc
|
|
}
|
|
|
|
tl_assert(bk->req_szB > 0);
|
|
// assert the block finder is behaving sanely
|
|
tl_assert(bk->payload <= (Addr)p_old);
|
|
tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
|
|
|
|
if (bk->payload != (Addr)p_old) {
|
|
return NULL; // bogus realloc
|
|
}
|
|
|
|
// Keeping the histogram alive in any meaningful way across
|
|
// block resizing is too darn complicated. Just throw it away.
|
|
if (bk->histoW) {
|
|
VG_(free)(bk->histoW);
|
|
bk->histoW = NULL;
|
|
}
|
|
|
|
// Actually do the allocation, if necessary.
|
|
if (new_req_szB <= bk->req_szB) {
|
|
|
|
// New size is smaller or same; block not moved.
|
|
apinfo_change_cur_bytes_live(bk->ap,
|
|
(Long)new_req_szB - (Long)bk->req_szB);
|
|
bk->req_szB = new_req_szB;
|
|
return p_old;
|
|
|
|
} else {
|
|
|
|
// New size is bigger; make new block, copy shared contents, free old.
|
|
p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
|
|
if (!p_new) {
|
|
// Nb: if realloc fails, NULL is returned but the old block is not
|
|
// touched. What an awful function.
|
|
return NULL;
|
|
}
|
|
tl_assert(p_new != p_old);
|
|
|
|
VG_(memcpy)(p_new, p_old, bk->req_szB);
|
|
VG_(cli_free)(p_old);
|
|
|
|
// Since the block has moved, we need to re-insert it into the
|
|
// interval tree at the new place. Do this by removing
|
|
// and re-adding it.
|
|
delete_Block_starting_at( (Addr)p_old );
|
|
// now 'bk' is no longer in the tree, but the Block itself
|
|
// is still alive
|
|
|
|
// Update the metadata.
|
|
apinfo_change_cur_bytes_live(bk->ap,
|
|
(Long)new_req_szB - (Long)bk->req_szB);
|
|
bk->payload = (Addr)p_new;
|
|
bk->req_szB = new_req_szB;
|
|
|
|
// and re-add
|
|
Bool present
|
|
= VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
|
|
tl_assert(!present);
|
|
fbc_cache0 = fbc_cache1 = NULL;
|
|
|
|
return p_new;
|
|
}
|
|
/*NOTREACHED*/
|
|
tl_assert(0);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- malloc() et al replacement wrappers ---//
|
|
//------------------------------------------------------------//
|
|
|
|
static void* dh_malloc ( ThreadId tid, SizeT szB )
|
|
{
|
|
return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
|
|
}
|
|
|
|
static void* dh___builtin_new ( ThreadId tid, SizeT szB )
|
|
{
|
|
return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
|
|
}
|
|
|
|
static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
|
|
{
|
|
return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
|
|
}
|
|
|
|
static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
|
|
{
|
|
return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
|
|
}
|
|
|
|
static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
|
|
{
|
|
return new_block( tid, NULL, szB, alignB, False );
|
|
}
|
|
|
|
static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
|
|
{
|
|
die_block( p, /*custom_free*/False );
|
|
}
|
|
|
|
static void dh___builtin_delete ( ThreadId tid, void* p )
|
|
{
|
|
die_block( p, /*custom_free*/False);
|
|
}
|
|
|
|
static void dh___builtin_vec_delete ( ThreadId tid, void* p )
|
|
{
|
|
die_block( p, /*custom_free*/False );
|
|
}
|
|
|
|
static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
|
|
{
|
|
if (p_old == NULL) {
|
|
return dh_malloc(tid, new_szB);
|
|
}
|
|
if (new_szB == 0) {
|
|
dh_free(tid, p_old);
|
|
return NULL;
|
|
}
|
|
return renew_block(tid, p_old, new_szB);
|
|
}
|
|
|
|
static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
|
|
{
|
|
Block* bk = find_Block_containing( (Addr)p );
|
|
return bk ? bk->req_szB : 0;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- memory references ---//
|
|
//------------------------------------------------------------//
|
|
|
|
static
|
|
void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
|
|
{
|
|
UWord i, offMin, offMax1;
|
|
offMin = addr - bk->payload;
|
|
tl_assert(offMin < bk->req_szB);
|
|
offMax1 = offMin + szB;
|
|
if (offMax1 > bk->req_szB)
|
|
offMax1 = bk->req_szB;
|
|
//VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
|
|
for (i = offMin; i < offMax1; i++) {
|
|
UShort n = bk->histoW[i];
|
|
if (n < 0xFFFF) n++;
|
|
bk->histoW[i] = n;
|
|
}
|
|
}
|
|
|
|
static VG_REGPARM(2)
|
|
void dh_handle_write ( Addr addr, UWord szB )
|
|
{
|
|
Block* bk = find_Block_containing(addr);
|
|
if (bk) {
|
|
bk->n_writes += szB;
|
|
if (bk->histoW)
|
|
inc_histo_for_block(bk, addr, szB);
|
|
}
|
|
}
|
|
|
|
static VG_REGPARM(2)
|
|
void dh_handle_read ( Addr addr, UWord szB )
|
|
{
|
|
Block* bk = find_Block_containing(addr);
|
|
if (bk) {
|
|
bk->n_reads += szB;
|
|
if (bk->histoW)
|
|
inc_histo_for_block(bk, addr, szB);
|
|
}
|
|
}
|
|
|
|
|
|
// Handle reads and writes by syscalls (read == kernel
|
|
// reads user space, write == kernel writes user space).
|
|
// Assumes no such read or write spans a heap block
|
|
// boundary and so we can treat it just as one giant
|
|
// read or write.
|
|
static
|
|
void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
|
|
Addr base, SizeT size )
|
|
{
|
|
switch (part) {
|
|
case Vg_CoreSysCall:
|
|
dh_handle_read(base, size);
|
|
break;
|
|
case Vg_CoreSysCallArgInMem:
|
|
break;
|
|
case Vg_CoreTranslate:
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
static
|
|
void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
|
|
Addr base, SizeT size )
|
|
{
|
|
switch (part) {
|
|
case Vg_CoreSysCall:
|
|
dh_handle_write(base, size);
|
|
break;
|
|
case Vg_CoreSignal:
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- Instrumentation ---//
|
|
//------------------------------------------------------------//
|
|
|
|
#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
|
|
#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
|
|
#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
|
|
#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
|
|
#define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
|
|
|
|
static
|
|
void add_counter_update(IRSB* sbOut, Int n)
|
|
{
|
|
#if defined(VG_BIGENDIAN)
|
|
# define END Iend_BE
|
|
#elif defined(VG_LITTLEENDIAN)
|
|
# define END Iend_LE
|
|
#else
|
|
# error "Unknown endianness"
|
|
#endif
|
|
// Add code to increment 'g_guest_instrs_executed' by 'n', like this:
|
|
// WrTmp(t1, Load64(&g_guest_instrs_executed))
|
|
// WrTmp(t2, Add64(RdTmp(t1), Const(n)))
|
|
// Store(&g_guest_instrs_executed, t2)
|
|
IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
|
|
IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
|
|
IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
|
|
|
|
IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
|
|
IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
|
|
IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
|
|
|
|
addStmtToIRSB( sbOut, st1 );
|
|
addStmtToIRSB( sbOut, st2 );
|
|
addStmtToIRSB( sbOut, st3 );
|
|
}
|
|
|
|
static
|
|
void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
|
|
Int goff_sp)
|
|
{
|
|
IRType tyAddr = Ity_INVALID;
|
|
const HChar* hName= NULL;
|
|
void* hAddr = NULL;
|
|
IRExpr** argv = NULL;
|
|
IRDirty* di = NULL;
|
|
|
|
const Int THRESH = 4096 * 4; // somewhat arbitrary
|
|
const Int rz_szB = VG_STACK_REDZONE_SZB;
|
|
|
|
tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
|
|
tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
|
|
|
|
if (isWrite) {
|
|
hName = "dh_handle_write";
|
|
hAddr = &dh_handle_write;
|
|
} else {
|
|
hName = "dh_handle_read";
|
|
hAddr = &dh_handle_read;
|
|
}
|
|
|
|
argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
|
|
|
|
/* Add the helper. */
|
|
tl_assert(hName);
|
|
tl_assert(hAddr);
|
|
tl_assert(argv);
|
|
di = unsafeIRDirty_0_N( 2/*regparms*/,
|
|
hName, VG_(fnptr_to_fnentry)( hAddr ),
|
|
argv );
|
|
|
|
/* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
|
|
some arbitrary N. If that fails then addr is in the range (SP -
|
|
RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
|
|
addr is within a page of SP and so can't possibly be a heap
|
|
access, and so can be skipped. */
|
|
IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
|
|
addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
|
|
|
|
IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
|
|
addStmtToIRSB(
|
|
sbOut,
|
|
assign(sp_minus_rz,
|
|
tyAddr == Ity_I32
|
|
? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
|
|
: binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
|
|
);
|
|
|
|
IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
|
|
addStmtToIRSB(
|
|
sbOut,
|
|
assign(diff,
|
|
tyAddr == Ity_I32
|
|
? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
|
|
: binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
|
|
);
|
|
|
|
IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
|
|
addStmtToIRSB(
|
|
sbOut,
|
|
assign(guard,
|
|
tyAddr == Ity_I32
|
|
? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
|
|
: binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
|
|
);
|
|
di->guard = mkexpr(guard);
|
|
|
|
addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
|
|
}
|
|
|
|
static
|
|
IRSB* dh_instrument ( VgCallbackClosure* closure,
|
|
IRSB* sbIn,
|
|
const VexGuestLayout* layout,
|
|
const VexGuestExtents* vge,
|
|
const VexArchInfo* archinfo_host,
|
|
IRType gWordTy, IRType hWordTy )
|
|
{
|
|
Int i, n = 0;
|
|
IRSB* sbOut;
|
|
IRTypeEnv* tyenv = sbIn->tyenv;
|
|
|
|
const Int goff_sp = layout->offset_SP;
|
|
|
|
// We increment the instruction count in two places:
|
|
// - just before any Ist_Exit statements;
|
|
// - just before the IRSB's end.
|
|
// In the former case, we zero 'n' and then continue instrumenting.
|
|
|
|
sbOut = deepCopyIRSBExceptStmts(sbIn);
|
|
|
|
// Copy verbatim any IR preamble preceding the first IMark
|
|
i = 0;
|
|
while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
|
|
addStmtToIRSB( sbOut, sbIn->stmts[i] );
|
|
i++;
|
|
}
|
|
|
|
for (/*use current i*/; i < sbIn->stmts_used; i++) {
|
|
IRStmt* st = sbIn->stmts[i];
|
|
|
|
if (!st || st->tag == Ist_NoOp) continue;
|
|
|
|
switch (st->tag) {
|
|
|
|
case Ist_IMark: {
|
|
n++;
|
|
break;
|
|
}
|
|
|
|
case Ist_Exit: {
|
|
if (n > 0) {
|
|
// Add an increment before the Exit statement, then reset 'n'.
|
|
add_counter_update(sbOut, n);
|
|
n = 0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_WrTmp: {
|
|
IRExpr* data = st->Ist.WrTmp.data;
|
|
if (data->tag == Iex_Load) {
|
|
IRExpr* aexpr = data->Iex.Load.addr;
|
|
// Note also, endianness info is ignored. I guess
|
|
// that's not interesting.
|
|
addMemEvent( sbOut, False/*!isWrite*/,
|
|
sizeofIRType(data->Iex.Load.ty),
|
|
aexpr, goff_sp );
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_Store: {
|
|
IRExpr* data = st->Ist.Store.data;
|
|
IRExpr* aexpr = st->Ist.Store.addr;
|
|
addMemEvent( sbOut, True/*isWrite*/,
|
|
sizeofIRType(typeOfIRExpr(tyenv, data)),
|
|
aexpr, goff_sp );
|
|
break;
|
|
}
|
|
|
|
case Ist_Dirty: {
|
|
Int dataSize;
|
|
IRDirty* d = st->Ist.Dirty.details;
|
|
if (d->mFx != Ifx_None) {
|
|
/* This dirty helper accesses memory. Collect the details. */
|
|
tl_assert(d->mAddr != NULL);
|
|
tl_assert(d->mSize != 0);
|
|
dataSize = d->mSize;
|
|
// Large (eg. 28B, 108B, 512B on x86) data-sized
|
|
// instructions will be done inaccurately, but they're
|
|
// very rare and this avoids errors from hitting more
|
|
// than two cache lines in the simulation.
|
|
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
|
|
addMemEvent( sbOut, False/*!isWrite*/,
|
|
dataSize, d->mAddr, goff_sp );
|
|
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
|
|
addMemEvent( sbOut, True/*isWrite*/,
|
|
dataSize, d->mAddr, goff_sp );
|
|
} else {
|
|
tl_assert(d->mAddr == NULL);
|
|
tl_assert(d->mSize == 0);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Ist_CAS: {
|
|
/* We treat it as a read and a write of the location. I
|
|
think that is the same behaviour as it was before IRCAS
|
|
was introduced, since prior to that point, the Vex
|
|
front ends would translate a lock-prefixed instruction
|
|
into a (normal) read followed by a (normal) write. */
|
|
Int dataSize;
|
|
IRCAS* cas = st->Ist.CAS.details;
|
|
tl_assert(cas->addr != NULL);
|
|
tl_assert(cas->dataLo != NULL);
|
|
dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
|
|
if (cas->dataHi != NULL)
|
|
dataSize *= 2; /* since it's a doubleword-CAS */
|
|
addMemEvent( sbOut, False/*!isWrite*/,
|
|
dataSize, cas->addr, goff_sp );
|
|
addMemEvent( sbOut, True/*isWrite*/,
|
|
dataSize, cas->addr, goff_sp );
|
|
break;
|
|
}
|
|
|
|
case Ist_LLSC: {
|
|
IRType dataTy;
|
|
if (st->Ist.LLSC.storedata == NULL) {
|
|
/* LL */
|
|
dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
|
|
addMemEvent( sbOut, False/*!isWrite*/,
|
|
sizeofIRType(dataTy),
|
|
st->Ist.LLSC.addr, goff_sp );
|
|
} else {
|
|
/* SC */
|
|
dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
|
|
addMemEvent( sbOut, True/*isWrite*/,
|
|
sizeofIRType(dataTy),
|
|
st->Ist.LLSC.addr, goff_sp );
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
addStmtToIRSB( sbOut, st );
|
|
}
|
|
|
|
if (n > 0) {
|
|
// Add an increment before the SB end.
|
|
add_counter_update(sbOut, n);
|
|
}
|
|
return sbOut;
|
|
}
|
|
|
|
#undef binop
|
|
#undef mkexpr
|
|
#undef mkU32
|
|
#undef mkU64
|
|
#undef assign
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- Command line args ---//
|
|
//------------------------------------------------------------//
|
|
|
|
// FORWARDS
|
|
static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
|
|
/*OUT*/Bool* increasingP,
|
|
const HChar* metric_name );
|
|
|
|
static Int clo_show_top_n = 10;
|
|
static const HChar *clo_sort_by = "max-bytes-live";
|
|
|
|
static Bool dh_process_cmd_line_option(const HChar* arg)
|
|
{
|
|
if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
|
|
|
|
else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
|
|
ULong (*dummyFn)(APInfo*);
|
|
Bool dummyB;
|
|
Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
|
|
if (!ok)
|
|
return False;
|
|
// otherwise it's OK, in which case leave it alone.
|
|
// show_top_n_apinfos will later convert the string by a
|
|
// second call to identify_metric.
|
|
}
|
|
|
|
else
|
|
return VG_(replacement_malloc_process_cmd_line_option)(arg);
|
|
|
|
return True;
|
|
}
|
|
|
|
|
|
static void dh_print_usage(void)
|
|
{
|
|
VG_(printf)(
|
|
" --show-top-n=number show the top <number> alloc points [10]\n"
|
|
" --sort-by=string\n"
|
|
" sort the allocation points by the metric\n"
|
|
" defined by <string>, thusly:\n"
|
|
" max-bytes-live maximum live bytes [default]\n"
|
|
" tot-bytes-allocd total allocation (turnover)\n"
|
|
" max-blocks-live maximum live blocks\n"
|
|
);
|
|
}
|
|
|
|
static void dh_print_debug_usage(void)
|
|
{
|
|
VG_(printf)(
|
|
" (none)\n"
|
|
);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- Finalisation ---//
|
|
//------------------------------------------------------------//
|
|
|
|
static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
|
|
{
|
|
ULong nK = n / 100;
|
|
ULong nR = n % 100;
|
|
VG_(sprintf)(buf, "%llu.%s%llu", nK,
|
|
nR < 10 ? "0" : "",
|
|
nR);
|
|
}
|
|
|
|
static void show_APInfo ( APInfo* api )
|
|
{
|
|
HChar bufA[80]; // large enough
|
|
VG_(memset)(bufA, 0, sizeof(bufA));
|
|
if (api->tot_blocks > 0) {
|
|
show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
|
|
/ (ULong)api->tot_blocks );
|
|
} else {
|
|
bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
|
|
}
|
|
|
|
VG_(umsg)("max-live: %'llu in %'llu blocks\n",
|
|
api->max_bytes_live, api->max_blocks_live);
|
|
VG_(umsg)("tot-alloc: %'llu in %'llu blocks (avg size %s)\n",
|
|
api->tot_bytes, api->tot_blocks, bufA);
|
|
|
|
tl_assert(api->tot_blocks >= api->max_blocks_live);
|
|
tl_assert(api->tot_bytes >= api->max_bytes_live);
|
|
|
|
if (api->deaths > 0) {
|
|
// Average Age at Death
|
|
ULong aad = api->deaths == 0
|
|
? 0 : (api->death_ages_sum / api->deaths);
|
|
// AAD as a fraction of the total program lifetime (so far)
|
|
// measured in ten-thousand-ths (aad_frac_10k == 10000 means the
|
|
// complete lifetime of the program.
|
|
ULong aad_frac_10k
|
|
= g_guest_instrs_executed == 0
|
|
? 0 : (10000ULL * aad) / g_guest_instrs_executed;
|
|
HChar buf[80]; // large enough
|
|
show_N_div_100(buf, aad_frac_10k);
|
|
VG_(umsg)("deaths: %'llu, at avg age %'llu "
|
|
"(%s%% of prog lifetime)\n",
|
|
api->deaths, aad, buf );
|
|
} else {
|
|
VG_(umsg)("deaths: none (none of these blocks were freed)\n");
|
|
}
|
|
|
|
HChar bufR[80], bufW[80]; // large enough
|
|
VG_(memset)(bufR, 0, sizeof(bufR));
|
|
VG_(memset)(bufW, 0, sizeof(bufW));
|
|
if (api->tot_bytes > 0) {
|
|
show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
|
|
show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
|
|
} else {
|
|
VG_(strcat)(bufR, "Inf");
|
|
VG_(strcat)(bufW, "Inf");
|
|
}
|
|
|
|
VG_(umsg)("acc-ratios: %s rd, %s wr "
|
|
" (%'llu b-read, %'llu b-written)\n",
|
|
bufR, bufW,
|
|
api->n_reads, api->n_writes);
|
|
|
|
VG_(pp_ExeContext)(api->ap);
|
|
|
|
if (api->histo && api->xsize_tag == Exactly) {
|
|
VG_(umsg)("\nAggregated access counts by offset:\n");
|
|
VG_(umsg)("\n");
|
|
UWord i;
|
|
if (api->xsize > 0)
|
|
VG_(umsg)("[ 0] ");
|
|
for (i = 0; i < api->xsize; i++) {
|
|
if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("[%4lu] ", i);
|
|
}
|
|
VG_(umsg)("%u ", api->histo[i]);
|
|
}
|
|
VG_(umsg)("\n");
|
|
}
|
|
}
|
|
|
|
|
|
/* Metric-access functions for APInfos. */
|
|
static ULong get_metric__max_bytes_live ( APInfo* api ) {
|
|
return api->max_bytes_live;
|
|
}
|
|
static ULong get_metric__tot_bytes ( APInfo* api ) {
|
|
return api->tot_bytes;
|
|
}
|
|
static ULong get_metric__max_blocks_live ( APInfo* api ) {
|
|
return api->max_blocks_live;
|
|
}
|
|
|
|
/* Given a string, return the metric-access function and also a Bool
|
|
indicating whether we want increasing or decreasing values of the
|
|
metric. This is used twice, once in command line processing, and
|
|
then again in show_top_n_apinfos. Returns False if the given
|
|
string could not be identified.*/
|
|
static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
|
|
/*OUT*/Bool* increasingP,
|
|
const HChar* metric_name )
|
|
{
|
|
if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
|
|
*get_metricP = get_metric__max_bytes_live;
|
|
*increasingP = False;
|
|
return True;
|
|
}
|
|
if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
|
|
*get_metricP = get_metric__tot_bytes;
|
|
*increasingP = False;
|
|
return True;
|
|
}
|
|
if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
|
|
*get_metricP = get_metric__max_blocks_live;
|
|
*increasingP = False;
|
|
return True;
|
|
}
|
|
return False;
|
|
}
|
|
|
|
|
|
static void show_top_n_apinfos ( void )
|
|
{
|
|
Int i;
|
|
UWord keyW, valW;
|
|
ULong (*get_metric)(APInfo*);
|
|
Bool increasing;
|
|
|
|
const HChar* metric_name = clo_sort_by;
|
|
tl_assert(metric_name); // ensured by clo processing
|
|
|
|
Bool ok = identify_metric( &get_metric, &increasing, metric_name );
|
|
tl_assert(ok); // ensured by clo processing
|
|
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("======== ORDERED BY %s \"%s\": "
|
|
"top %d allocators ========\n",
|
|
increasing ? "increasing" : "decreasing",
|
|
metric_name, clo_show_top_n );
|
|
|
|
// Clear all .shown bits
|
|
VG_(initIterFM)( apinfo );
|
|
while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
|
|
APInfo* api = (APInfo*)valW;
|
|
tl_assert(api && api->ap == (ExeContext*)keyW);
|
|
api->shown = False;
|
|
}
|
|
VG_(doneIterFM)( apinfo );
|
|
|
|
// Now print the top N entries. Each one requires a
|
|
// complete scan of the set. Duh.
|
|
for (i = 0; i < clo_show_top_n; i++) {
|
|
ULong best_metric = increasing ? ~0ULL : 0ULL;
|
|
APInfo* best_api = NULL;
|
|
|
|
VG_(initIterFM)( apinfo );
|
|
while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
|
|
APInfo* api = (APInfo*)valW;
|
|
if (api->shown)
|
|
continue;
|
|
ULong metric = get_metric(api);
|
|
if (increasing ? (metric < best_metric) : (metric > best_metric)) {
|
|
best_metric = metric;
|
|
best_api = api;
|
|
}
|
|
}
|
|
VG_(doneIterFM)( apinfo );
|
|
|
|
if (!best_api)
|
|
break; // all APIs have been shown. Stop.
|
|
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("-------------------- %d of %d --------------------\n",
|
|
i+1, clo_show_top_n );
|
|
show_APInfo(best_api);
|
|
best_api->shown = True;
|
|
}
|
|
|
|
VG_(umsg)("\n");
|
|
}
|
|
|
|
|
|
static void dh_fini(Int exit_status)
|
|
{
|
|
// Before printing statistics, we must harvest access counts for
|
|
// all the blocks that are still alive. Not doing so gives
|
|
// access ratios which are too low (zero, in the worst case)
|
|
// for such blocks, since the accesses that do get made will
|
|
// (if we skip this step) not get folded into the AP summaries.
|
|
UWord keyW, valW;
|
|
VG_(initIterFM)( interval_tree );
|
|
while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
|
|
Block* bk = (Block*)keyW;
|
|
tl_assert(valW == 0);
|
|
tl_assert(bk);
|
|
retire_Block(bk, False/*!because_freed*/);
|
|
}
|
|
VG_(doneIterFM)( interval_tree );
|
|
|
|
// show results
|
|
VG_(umsg)("======== SUMMARY STATISTICS ========\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("guest_insns: %'llu\n", g_guest_instrs_executed);
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("max_live: %'llu in %'llu blocks\n",
|
|
g_max_bytes_live, g_max_blocks_live);
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("tot_alloc: %'llu in %'llu blocks\n",
|
|
g_tot_bytes, g_tot_blocks);
|
|
VG_(umsg)("\n");
|
|
if (g_tot_bytes > 0) {
|
|
VG_(umsg)("insns per allocated byte: %'llu\n",
|
|
g_guest_instrs_executed / g_tot_bytes);
|
|
VG_(umsg)("\n");
|
|
}
|
|
|
|
show_top_n_apinfos();
|
|
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("==============================================================\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("Some hints: (see --help for command line option details):\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("* summary stats for whole program are at the top of this output\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("* --show-top-n= controls how many alloc points are shown.\n");
|
|
VG_(umsg)(" You probably want to set it much higher than\n");
|
|
VG_(umsg)(" the default value (10)\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("* --sort-by= specifies the sort key for output.\n");
|
|
VG_(umsg)(" See --help for details.\n");
|
|
VG_(umsg)("\n");
|
|
VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
|
|
VG_(umsg)(" a separate alloc point. This causes the data to be spread out\n");
|
|
VG_(umsg)(" over far too many alloc points. I strongly suggest using\n");
|
|
VG_(umsg)(" --num-callers=4 or some such, to reduce the spreading.\n");
|
|
VG_(umsg)("\n");
|
|
|
|
if (VG_(clo_stats)) {
|
|
VG_(dmsg)(" dhat: find_Block_containing:\n");
|
|
VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
|
|
stats__n_fBc_cached + stats__n_fBc_uncached,
|
|
stats__n_fBc_cached,
|
|
stats__n_fBc_uncached);
|
|
VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
|
|
VG_(dmsg)("\n");
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------//
|
|
//--- Initialisation ---//
|
|
//------------------------------------------------------------//
|
|
|
|
static void dh_post_clo_init(void)
|
|
{
|
|
}
|
|
|
|
static void dh_pre_clo_init(void)
|
|
{
|
|
VG_(details_name) ("DHAT");
|
|
VG_(details_version) (NULL);
|
|
VG_(details_description) ("a dynamic heap analysis tool");
|
|
VG_(details_copyright_author)(
|
|
"Copyright (C) 2010-2015, and GNU GPL'd, by Mozilla Inc");
|
|
VG_(details_bug_reports_to) (VG_BUGS_TO);
|
|
|
|
// Basic functions.
|
|
VG_(basic_tool_funcs) (dh_post_clo_init,
|
|
dh_instrument,
|
|
dh_fini);
|
|
//zz
|
|
// Needs.
|
|
VG_(needs_libc_freeres)();
|
|
VG_(needs_command_line_options)(dh_process_cmd_line_option,
|
|
dh_print_usage,
|
|
dh_print_debug_usage);
|
|
//zz VG_(needs_client_requests) (dh_handle_client_request);
|
|
//zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
|
|
//zz dh_expensive_sanity_check);
|
|
VG_(needs_malloc_replacement) (dh_malloc,
|
|
dh___builtin_new,
|
|
dh___builtin_vec_new,
|
|
dh_memalign,
|
|
dh_calloc,
|
|
dh_free,
|
|
dh___builtin_delete,
|
|
dh___builtin_vec_delete,
|
|
dh_realloc,
|
|
dh_malloc_usable_size,
|
|
0 );
|
|
|
|
VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
|
|
//VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
|
|
VG_(track_post_mem_write) ( dh_handle_noninsn_write );
|
|
|
|
tl_assert(!interval_tree);
|
|
tl_assert(!fbc_cache0);
|
|
tl_assert(!fbc_cache1);
|
|
|
|
interval_tree = VG_(newFM)( VG_(malloc),
|
|
"dh.main.interval_tree.1",
|
|
VG_(free),
|
|
interval_tree_Cmp );
|
|
|
|
apinfo = VG_(newFM)( VG_(malloc),
|
|
"dh.main.apinfo.1",
|
|
VG_(free),
|
|
NULL/*unboxedcmp*/ );
|
|
}
|
|
|
|
VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
|
|
|
|
//--------------------------------------------------------------------//
|
|
//--- end dh_main.c ---//
|
|
//--------------------------------------------------------------------//
|