mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-09 17:06:24 +00:00
240 lines
7.3 KiB
C
240 lines
7.3 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Cache simulation cg_sim.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Cachegrind, a Valgrind tool for cache
|
|
profiling programs.
|
|
|
|
Copyright (C) 2002-2015 Nicholas Nethercote
|
|
njn@valgrind.org
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
/* Notes:
|
|
- simulates a write-allocate cache
|
|
- (block --> set) hash function uses simple bit selection
|
|
- handling of references straddling two cache blocks:
|
|
- counts as only one cache access (not two)
|
|
- both blocks hit --> one hit
|
|
- one block hits, the other misses --> one miss
|
|
- both blocks miss --> one miss (not two)
|
|
*/
|
|
|
|
typedef struct {
|
|
Int size; /* bytes */
|
|
Int assoc;
|
|
Int line_size; /* bytes */
|
|
Int sets;
|
|
Int sets_min_1;
|
|
Int line_size_bits;
|
|
Int tag_shift;
|
|
HChar desc_line[128]; /* large enough */
|
|
UWord* tags;
|
|
} cache_t2;
|
|
|
|
/* By this point, the size/assoc/line_size has been checked. */
|
|
static void cachesim_initcache(cache_t config, cache_t2* c)
|
|
{
|
|
Int i;
|
|
|
|
c->size = config.size;
|
|
c->assoc = config.assoc;
|
|
c->line_size = config.line_size;
|
|
|
|
c->sets = (c->size / c->line_size) / c->assoc;
|
|
c->sets_min_1 = c->sets - 1;
|
|
c->line_size_bits = VG_(log2)(c->line_size);
|
|
c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
|
|
|
|
if (c->assoc == 1) {
|
|
VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
|
|
c->size, c->line_size);
|
|
} else {
|
|
VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
|
|
c->size, c->line_size, c->assoc);
|
|
}
|
|
|
|
c->tags = VG_(malloc)("cg.sim.ci.1",
|
|
sizeof(UWord) * c->sets * c->assoc);
|
|
|
|
for (i = 0; i < c->sets * c->assoc; i++)
|
|
c->tags[i] = 0;
|
|
}
|
|
|
|
/* This attribute forces GCC to inline the function, getting rid of a
|
|
* lot of indirection around the cache_t2 pointer, if it is known to be
|
|
* constant in the caller (the caller is inlined itself).
|
|
* Without inlining of simulator functions, cachegrind can get 40% slower.
|
|
*/
|
|
__attribute__((always_inline))
|
|
static __inline__
|
|
Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
|
|
{
|
|
int i, j;
|
|
UWord *set;
|
|
|
|
set = &(c->tags[set_no * c->assoc]);
|
|
|
|
/* This loop is unrolled for just the first case, which is the most */
|
|
/* common. We can't unroll any further because it would screw up */
|
|
/* if we have a direct-mapped (1-way) cache. */
|
|
if (tag == set[0])
|
|
return False;
|
|
|
|
/* If the tag is one other than the MRU, move it into the MRU spot */
|
|
/* and shuffle the rest down. */
|
|
for (i = 1; i < c->assoc; i++) {
|
|
if (tag == set[i]) {
|
|
for (j = i; j > 0; j--) {
|
|
set[j] = set[j - 1];
|
|
}
|
|
set[0] = tag;
|
|
|
|
return False;
|
|
}
|
|
}
|
|
|
|
/* A miss; install this tag as MRU, shuffle rest down. */
|
|
for (j = c->assoc - 1; j > 0; j--) {
|
|
set[j] = set[j - 1];
|
|
}
|
|
set[0] = tag;
|
|
|
|
return True;
|
|
}
|
|
|
|
__attribute__((always_inline))
|
|
static __inline__
|
|
Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
|
|
{
|
|
/* A memory block has the size of a cache line */
|
|
UWord block1 = a >> c->line_size_bits;
|
|
UWord block2 = (a+size-1) >> c->line_size_bits;
|
|
UInt set1 = block1 & c->sets_min_1;
|
|
|
|
/* Tags used in real caches are minimal to save space.
|
|
* As the last bits of the block number of addresses mapping
|
|
* into one cache set are the same, real caches use as tag
|
|
* tag = block >> log2(#sets)
|
|
* But using the memory block as more specific tag is fine,
|
|
* and saves instructions.
|
|
*/
|
|
UWord tag1 = block1;
|
|
|
|
/* Access entirely within line. */
|
|
if (block1 == block2)
|
|
return cachesim_setref_is_miss(c, set1, tag1);
|
|
|
|
/* Access straddles two lines. */
|
|
else if (block1 + 1 == block2) {
|
|
UInt set2 = block2 & c->sets_min_1;
|
|
UWord tag2 = block2;
|
|
|
|
/* always do both, as state is updated as side effect */
|
|
if (cachesim_setref_is_miss(c, set1, tag1)) {
|
|
cachesim_setref_is_miss(c, set2, tag2);
|
|
return True;
|
|
}
|
|
return cachesim_setref_is_miss(c, set2, tag2);
|
|
}
|
|
VG_(printf)("addr: %lx size: %u blocks: %lu %lu",
|
|
a, size, block1, block2);
|
|
VG_(tool_panic)("item straddles more than two cache sets");
|
|
/* not reached */
|
|
return True;
|
|
}
|
|
|
|
|
|
static cache_t2 LL;
|
|
static cache_t2 I1;
|
|
static cache_t2 D1;
|
|
|
|
static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
|
|
{
|
|
cachesim_initcache(I1c, &I1);
|
|
cachesim_initcache(D1c, &D1);
|
|
cachesim_initcache(LLc, &LL);
|
|
}
|
|
|
|
__attribute__((always_inline))
|
|
static __inline__
|
|
void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
|
|
{
|
|
if (cachesim_ref_is_miss(&I1, a, size)) {
|
|
(*m1)++;
|
|
if (cachesim_ref_is_miss(&LL, a, size))
|
|
(*mL)++;
|
|
}
|
|
}
|
|
|
|
// common special case IrNoX
|
|
__attribute__((always_inline))
|
|
static __inline__
|
|
void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
|
|
{
|
|
UWord block = a >> I1.line_size_bits;
|
|
UInt I1_set = block & I1.sets_min_1;
|
|
|
|
// use block as tag
|
|
if (cachesim_setref_is_miss(&I1, I1_set, block)) {
|
|
UInt LL_set = block & LL.sets_min_1;
|
|
(*m1)++;
|
|
// can use block as tag as L1I and LL cache line sizes are equal
|
|
if (cachesim_setref_is_miss(&LL, LL_set, block))
|
|
(*mL)++;
|
|
}
|
|
}
|
|
|
|
__attribute__((always_inline))
|
|
static __inline__
|
|
void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
|
|
{
|
|
if (cachesim_ref_is_miss(&D1, a, size)) {
|
|
(*m1)++;
|
|
if (cachesim_ref_is_miss(&LL, a, size))
|
|
(*mL)++;
|
|
}
|
|
}
|
|
|
|
/* Check for special case IrNoX. Called at instrumentation time.
|
|
*
|
|
* Does this Ir only touch one cache line, and are L1I/LL cache
|
|
* line sizes the same? This allows to get rid of a runtime check.
|
|
*
|
|
* Returning false is always fine, as this calls the generic case
|
|
*/
|
|
static Bool cachesim_is_IrNoX(Addr a, UChar size)
|
|
{
|
|
UWord block1, block2;
|
|
|
|
if (I1.line_size_bits != LL.line_size_bits) return False;
|
|
block1 = a >> I1.line_size_bits;
|
|
block2 = (a+size-1) >> I1.line_size_bits;
|
|
if (block1 != block2) return False;
|
|
|
|
return True;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end cg_sim.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|