mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-10 01:16:12 +00:00
504 lines
16 KiB
C
504 lines
16 KiB
C
/*--------------------------------------------------------------------*/
|
|
/*--- Cachegrind: cache configuration. cg-arch.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Cachegrind, a Valgrind tool for cache
|
|
profiling programs.
|
|
|
|
Copyright (C) 2011-2015 Nicholas Nethercote
|
|
njn@valgrind.org
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "pub_tool_basics.h"
|
|
#include "pub_tool_libcassert.h"
|
|
#include "pub_tool_libcbase.h"
|
|
#include "pub_tool_libcprint.h"
|
|
#include "pub_tool_options.h"
|
|
#include "pub_tool_machine.h"
|
|
|
|
#include "cg_arch.h"
|
|
|
|
static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
|
|
Bool all_caches_clo_defined);
|
|
|
|
// Checks cache config is ok. Returns NULL if ok, or a pointer to an error
|
|
// string otherwise.
|
|
static const HChar* check_cache(cache_t* cache)
|
|
{
|
|
// Simulator requires set count to be a power of two.
|
|
if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
|
|
(-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
|
|
{
|
|
return "Cache set count is not a power of two.\n";
|
|
}
|
|
|
|
// Simulator requires line size to be a power of two.
|
|
if (-1 == VG_(log2)(cache->line_size)) {
|
|
return "Cache line size is not a power of two.\n";
|
|
}
|
|
|
|
// Then check line size >= 16 -- any smaller and a single instruction could
|
|
// straddle three cache lines, which breaks a simulation assertion and is
|
|
// stupid anyway.
|
|
if (cache->line_size < MIN_LINE_SIZE) {
|
|
return "Cache line size is too small.\n";
|
|
}
|
|
|
|
/* Then check cache size > line size (causes seg faults if not). */
|
|
if (cache->size <= cache->line_size) {
|
|
return "Cache size <= line size.\n";
|
|
}
|
|
|
|
/* Then check assoc <= (size / line size) (seg faults otherwise). */
|
|
if (cache->assoc > (cache->size / cache->line_size)) {
|
|
return "Cache associativity > (size / line size).\n";
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void parse_cache_opt ( cache_t* cache, const HChar* opt,
|
|
const HChar* optval )
|
|
{
|
|
Long i1, i2, i3;
|
|
HChar* endptr;
|
|
const HChar* checkRes;
|
|
|
|
// Option argument looks like "65536,2,64". Extract them.
|
|
i1 = VG_(strtoll10)(optval, &endptr); if (*endptr != ',') goto bad;
|
|
i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
|
|
i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
|
|
|
|
// Check for overflow.
|
|
cache->size = (Int)i1;
|
|
cache->assoc = (Int)i2;
|
|
cache->line_size = (Int)i3;
|
|
if (cache->size != i1) goto overflow;
|
|
if (cache->assoc != i2) goto overflow;
|
|
if (cache->line_size != i3) goto overflow;
|
|
|
|
checkRes = check_cache(cache);
|
|
if (checkRes) {
|
|
VG_(fmsg)("%s", checkRes);
|
|
goto bad;
|
|
}
|
|
|
|
return;
|
|
|
|
bad:
|
|
VG_(fmsg_bad_option)(opt, "Bad argument '%s'\n", optval);
|
|
|
|
overflow:
|
|
VG_(fmsg_bad_option)(opt,
|
|
"One of the cache parameters was too large and overflowed.\n");
|
|
}
|
|
|
|
|
|
Bool VG_(str_clo_cache_opt)(const HChar *arg,
|
|
cache_t* clo_I1c,
|
|
cache_t* clo_D1c,
|
|
cache_t* clo_LLc)
|
|
{
|
|
const HChar* tmp_str;
|
|
|
|
if VG_STR_CLO(arg, "--I1", tmp_str) {
|
|
parse_cache_opt(clo_I1c, arg, tmp_str);
|
|
return True;
|
|
} else if VG_STR_CLO(arg, "--D1", tmp_str) {
|
|
parse_cache_opt(clo_D1c, arg, tmp_str);
|
|
return True;
|
|
} else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
|
|
VG_STR_CLO(arg, "--LL", tmp_str)) {
|
|
parse_cache_opt(clo_LLc, arg, tmp_str);
|
|
return True;
|
|
} else
|
|
return False;
|
|
}
|
|
|
|
static void umsg_cache_img(const HChar* desc, cache_t* c)
|
|
{
|
|
VG_(umsg)(" %s: %'d B, %d-way, %d B lines\n", desc,
|
|
c->size, c->assoc, c->line_size);
|
|
}
|
|
|
|
// Verifies if c is a valid cache.
|
|
// An invalid value causes an assert, unless clo_redefined is True.
|
|
static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
|
|
{
|
|
const HChar* checkRes;
|
|
|
|
checkRes = check_cache(c);
|
|
if (checkRes) {
|
|
VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
|
|
desc, checkRes);
|
|
umsg_cache_img(desc, c);
|
|
if (!clo_redefined) {
|
|
VG_(umsg)("As it probably should be supported, please report a bug!\n");
|
|
VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* If the LL cache config isn't something the simulation functions
|
|
can handle, try to adjust it so it is. Caches are characterised
|
|
by (total size T, line size L, associativity A), and then we
|
|
have
|
|
|
|
number of sets S = T / (L * A)
|
|
|
|
The required constraints are:
|
|
|
|
* L must be a power of 2, but it always is in practice, so
|
|
no problem there
|
|
|
|
* A can be any value >= 1
|
|
|
|
* T can be any value, but ..
|
|
|
|
* S must be a power of 2.
|
|
|
|
That sometimes gives a problem. For example, some Core iX based
|
|
Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
|
|
sets. Some AMD cpus have T = 5MB, A = 48, L = 64, which gives
|
|
1706.667 sets (!).
|
|
|
|
The "fix" is to force S down to the nearest power of two below its
|
|
original value, and increase A proportionately, so as to keep the
|
|
total cache size the same. In fact to be safe we recalculate the
|
|
cache size afterwards anyway, to guarantee that it divides exactly
|
|
between the new number of sets.
|
|
|
|
The "fix" is "justified" (cough, cough) by alleging that
|
|
increases of associativity above about 4 have very little effect
|
|
on the actual miss rate. It would be far more inaccurate to
|
|
fudge this by changing the size of the simulated cache --
|
|
changing the associativity is a much better option.
|
|
*/
|
|
|
|
/* (Helper function) Returns the largest power of 2 that is <= |x|.
|
|
Even works when |x| == 0. */
|
|
static UInt floor_power_of_2 ( UInt x )
|
|
{
|
|
x = x | (x >> 1);
|
|
x = x | (x >> 2);
|
|
x = x | (x >> 4);
|
|
x = x | (x >> 8);
|
|
x = x | (x >> 16);
|
|
return x - (x >> 1);
|
|
}
|
|
|
|
static void
|
|
maybe_tweak_LLc(cache_t *LLc)
|
|
{
|
|
if (LLc->size == 0 || LLc->assoc == 0 || LLc->line_size == 0)
|
|
return;
|
|
|
|
tl_assert(LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0);
|
|
|
|
UInt old_size = (UInt)LLc->size;
|
|
UInt old_assoc = (UInt)LLc->assoc;
|
|
UInt old_line_size = (UInt)LLc->line_size;
|
|
|
|
UInt new_size = old_size;
|
|
UInt new_assoc = old_assoc;
|
|
UInt new_line_size = old_line_size;
|
|
|
|
UInt old_nSets = old_size / (old_assoc * old_line_size);
|
|
if (old_nSets == 0) {
|
|
/* This surely can't happen; but would cause chaos with the maths
|
|
* below if it did. Just give up if it does. */
|
|
return;
|
|
}
|
|
|
|
if (-1 != VG_(log2_64)(old_nSets)) {
|
|
/* The number of sets is already a power of 2. Make sure that
|
|
the size divides exactly between the sets. Almost all of the
|
|
time this will have no effect. */
|
|
new_size = old_line_size * old_assoc * old_nSets;
|
|
} else {
|
|
/* The number of sets isn't a power of two. Calculate some
|
|
scale-down factor which causes the number of sets to become a
|
|
power of two. Then, increase the associativity by that
|
|
factor. Finally, re-calculate the total size so as to make
|
|
sure it divides exactly between the sets. */
|
|
tl_assert(old_nSets >= 0);
|
|
UInt new_nSets = floor_power_of_2 ( old_nSets );
|
|
tl_assert(new_nSets > 0 && new_nSets < old_nSets);
|
|
Double factor = (Double)old_nSets / (Double)new_nSets;
|
|
tl_assert(factor >= 1.0);
|
|
|
|
new_assoc = (UInt)(0.5 + factor * (Double)old_assoc);
|
|
tl_assert(new_assoc >= old_assoc);
|
|
|
|
new_size = old_line_size * new_assoc * new_nSets;
|
|
}
|
|
|
|
tl_assert(new_line_size == old_line_size); /* we never change this */
|
|
if (new_size == old_size && new_assoc == old_assoc)
|
|
return;
|
|
|
|
VG_(dmsg)("warning: "
|
|
"specified LL cache: line_size %u assoc %u total_size %'u\n",
|
|
old_line_size, old_assoc, old_size);
|
|
VG_(dmsg)("warning: "
|
|
"simulated LL cache: line_size %u assoc %u total_size %'u\n",\
|
|
new_line_size, new_assoc, new_size);
|
|
|
|
LLc->size = new_size;
|
|
LLc->assoc = new_assoc;
|
|
LLc->line_size = new_line_size;
|
|
}
|
|
|
|
void VG_(post_clo_init_configure_caches)(cache_t* I1c,
|
|
cache_t* D1c,
|
|
cache_t* LLc,
|
|
cache_t* clo_I1c,
|
|
cache_t* clo_D1c,
|
|
cache_t* clo_LLc)
|
|
{
|
|
#define DEFINED(L) (-1 != L->size || -1 != L->assoc || -1 != L->line_size)
|
|
|
|
// Count how many were defined on the command line.
|
|
Bool all_caches_clo_defined =
|
|
(DEFINED(clo_I1c) &&
|
|
DEFINED(clo_D1c) &&
|
|
DEFINED(clo_LLc));
|
|
|
|
// Set the cache config (using auto-detection, if supported by the
|
|
// architecture).
|
|
configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
|
|
|
|
maybe_tweak_LLc( LLc );
|
|
|
|
// Check the default/auto-detected values.
|
|
// Allow the user to override invalid auto-detected caches
|
|
// with command line.
|
|
check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
|
|
check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
|
|
check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
|
|
|
|
// Then replace with any defined on the command line. (Already checked in
|
|
// VG(parse_clo_cache_opt)().)
|
|
if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
|
|
if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
|
|
if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
|
|
|
|
if (VG_(clo_verbosity) >= 2) {
|
|
VG_(umsg)("Cache configuration used:\n");
|
|
umsg_cache_img ("I1", I1c);
|
|
umsg_cache_img ("D1", D1c);
|
|
umsg_cache_img ("LL", LLc);
|
|
}
|
|
#undef DEFINED
|
|
}
|
|
|
|
void VG_(print_cache_clo_opts)()
|
|
{
|
|
VG_(printf)(
|
|
" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
|
|
" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
|
|
" --LL=<size>,<assoc>,<line_size> set LL cache manually\n"
|
|
);
|
|
}
|
|
|
|
|
|
// Traverse the cache info and return a cache of the given kind and level.
|
|
// Return NULL if no such cache exists.
|
|
static const VexCache *
|
|
locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
|
|
{
|
|
const VexCache *c;
|
|
|
|
for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
|
|
if (c->level == level && c->kind == kind) {
|
|
return c;
|
|
}
|
|
}
|
|
return NULL; // not found
|
|
}
|
|
|
|
|
|
// Gives the auto-detected configuration of I1, D1 and LL caches. They get
|
|
// overridden by any cache configurations specified on the command line.
|
|
static void
|
|
configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
|
|
Bool all_caches_clo_defined)
|
|
{
|
|
VexArchInfo vai;
|
|
const VexCacheInfo *ci;
|
|
const VexCache *i1, *d1, *ll;
|
|
|
|
VG_(machine_get_VexArchInfo)(NULL, &vai);
|
|
ci = &vai.hwcache_info;
|
|
|
|
// Extract what we need
|
|
i1 = locate_cache(ci, INSN_CACHE, 1);
|
|
d1 = locate_cache(ci, DATA_CACHE, 1);
|
|
ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
|
|
|
|
if (ci->num_caches > 0 && ll == NULL) {
|
|
VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
|
|
}
|
|
|
|
if (ll && ci->num_levels > 2) {
|
|
VG_(dmsg)("warning: L%u cache found, using its data for the "
|
|
"LL simulation.\n", ci->num_levels);
|
|
}
|
|
|
|
if (i1 && d1 && ll) {
|
|
if (i1->is_trace_cache) {
|
|
/* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
|
|
* conversion to byte size is a total guess; treat the 12K and 16K
|
|
* cases the same since the cache byte size must be a power of two for
|
|
* everything to work!. Also guessing 32 bytes for the line size...
|
|
*/
|
|
UInt adjusted_size, guessed_line_size = 32;
|
|
|
|
if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
|
|
adjusted_size = 16 * 1024;
|
|
} else {
|
|
adjusted_size = 32 * 1024;
|
|
}
|
|
VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
|
|
i1->sizeB / 1024);
|
|
VG_(dmsg)(" Simulating a %u KB I-cache with %u B lines\n",
|
|
adjusted_size / 1024, guessed_line_size);
|
|
|
|
*I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
|
|
} else {
|
|
*I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
|
|
}
|
|
*D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
|
|
*LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
|
|
|
|
return;
|
|
}
|
|
|
|
// Cache information could not be queried; choose some default
|
|
// architecture specific default setting.
|
|
|
|
#if defined(VGA_ppc32)
|
|
|
|
// Default cache configuration
|
|
*I1c = (cache_t) { 65536, 2, 64 };
|
|
*D1c = (cache_t) { 65536, 2, 64 };
|
|
*LLc = (cache_t) { 262144, 8, 64 };
|
|
|
|
#elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
|
|
|
|
// Default cache configuration
|
|
*I1c = (cache_t) { 65536, 2, 64 };
|
|
*D1c = (cache_t) { 65536, 2, 64 };
|
|
*LLc = (cache_t) { 262144, 8, 64 };
|
|
|
|
#elif defined(VGA_arm)
|
|
|
|
// Set caches to default (for Cortex-A8 ?)
|
|
*I1c = (cache_t) { 16384, 4, 64 };
|
|
*D1c = (cache_t) { 16384, 4, 64 };
|
|
*LLc = (cache_t) { 262144, 8, 64 };
|
|
|
|
#elif defined(VGA_arm64)
|
|
|
|
// Copy the 32-bit ARM version until such time as we have
|
|
// some real hardware to run on
|
|
*I1c = (cache_t) { 16384, 4, 64 };
|
|
*D1c = (cache_t) { 16384, 4, 64 };
|
|
*LLc = (cache_t) { 262144, 8, 64 };
|
|
|
|
#elif defined(VGA_s390x)
|
|
//
|
|
// Here is the cache data from older machine models:
|
|
//
|
|
// I1 D1 I/D L2
|
|
// z900 256k/256/4 256k/256/4 16MB
|
|
// z800 256k/256/4 256k/256/4 8MB
|
|
// z990 256k/256/4 256k/256/4 32MB
|
|
// z890 256k/256/4 256k/256/4 32MB
|
|
// z9 256k/256/4 256k/256/4 40MB
|
|
//
|
|
// Sources:
|
|
// (1) IBM System z9 109 Technical Introduction
|
|
// www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
|
|
// (2) The microarchitecture of the IBM eServer z900 processor
|
|
// IBM Journal of Research and Development
|
|
// Volume 46, Number 4/5, pp 381-395, July/September 2002
|
|
// (3) The IBM eServer z990 microprocessor
|
|
// IBM Journal of Research and Development
|
|
// Volume 48, Number 3/4, pp 295-309, May/July 2004
|
|
// (4) Charles Webb, IBM
|
|
//
|
|
// L2 data is unfortunately incomplete. Otherwise, we could support
|
|
// machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
|
|
|
|
// Default cache configuration is z10-EC (Source: ECAG insn)
|
|
*I1c = (cache_t) { 65536, 4, 256 };
|
|
*D1c = (cache_t) { 131072, 8, 256 };
|
|
*LLc = (cache_t) { 50331648, 24, 256 };
|
|
|
|
#elif defined(VGA_mips32)
|
|
|
|
// Set caches to default (for MIPS32-r2(mips 74kc))
|
|
*I1c = (cache_t) { 32768, 4, 32 };
|
|
*D1c = (cache_t) { 32768, 4, 32 };
|
|
*LLc = (cache_t) { 524288, 8, 32 };
|
|
|
|
#elif defined(VGA_mips64)
|
|
|
|
// Set caches to default (for MIPS64 - 5kc)
|
|
*I1c = (cache_t) { 32768, 4, 32 };
|
|
*D1c = (cache_t) { 32768, 4, 32 };
|
|
*LLc = (cache_t) { 524288, 8, 32 };
|
|
|
|
#elif defined(VGA_x86) || defined(VGA_amd64)
|
|
|
|
*I1c = (cache_t) { 65536, 2, 64 };
|
|
*D1c = (cache_t) { 65536, 2, 64 };
|
|
*LLc = (cache_t) { 262144, 8, 64 };
|
|
|
|
#elif defined(VGA_tilegx)
|
|
|
|
// Set caches to default for Tilegx.
|
|
*I1c = (cache_t) { 0x8000, 2, 64 };
|
|
*D1c = (cache_t) { 0x8000, 2, 64 };
|
|
*LLc = (cache_t) { 0x40000, 8, 64 };
|
|
|
|
#else
|
|
|
|
#error "Unknown arch"
|
|
|
|
#endif
|
|
|
|
if (!all_caches_clo_defined) {
|
|
const HChar warning[] =
|
|
"Warning: Cannot auto-detect cache config, using defaults.\n"
|
|
" Run with -v to see.\n";
|
|
VG_(dmsg)("%s", warning);
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end ---*/
|
|
/*--------------------------------------------------------------------*/
|