intel_perf_counters: Add support for Gen7 platforms.

We finally received permission to release this; the counters should be
properly documented in the Haswell PRMs.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Kenneth Graunke 2013-04-02 22:54:08 -07:00
parent 16b61932bb
commit 11d5859b28

View File

@ -137,6 +137,163 @@ const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
[28] = "SF active and stalled",
};
#define GEN7_COUNTER_COUNT 44
/**
* Names for aggregating counters A0-A44. Uninitialized fields are "Reserved."
*/
const char *gen7_counter_names[GEN7_COUNTER_COUNT] = {
/* A0:
* The sum of all cycles on all cores actively executing instructions
* This does not count the time taken to service Send instructions.
* This time is considered by shader active counters to give the result.
*/
[0] = "Aggregated Core Array Active",
/* A1:
* The sum of all cycles on all cores where the EU is not idle and is
* not actively executing ISA instructions. Generally this means that
* all loaded threads on the EU are stalled on some data dependency,
* but this also includes the time during which the TS is loading the
* thread dispatch header into the EU prior to thread execution and no
* other thread is fully loaded.
*/
[1] = "Aggregated Core Array Stalled",
/* A2:
* Total time in clocks the vertex shader spent active on all cores.
*/
[2] = "Vertex Shader Active Time",
/* A4:
* Total time in clocks the vertex shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[4] = "Vertex Shader Stall Time - Core Stall",
/* A5: Number of VS threads loaded at any given time in the EUs. */
[5] = "# VS threads loaded",
/* A7:
* Total time in clocks the Hull shader spent active on all cores.
*/
[7] = "Hull Shader Active Time",
/* A9:
* Total time in clocks the Hull shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[9] = "Hull Shader Stall Time - Core Stall",
/* A10: Number of HS threads loaded at any given time in the EUs. */
[10] = "# HS threads loaded",
/* A12:
* Total time in clocks the Domain shader spent active on all cores.
*/
[12] = "Domain Shader Active Time",
/* A14:
* Total time in clocks the domain shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[14] = "Domain Shader Stall Time - Core Stall",
/* A15: Number of DS threads loaded at any given time in the EUs. */
[15] = "# DS threads loaded",
/* A17:
* Total time in clocks the compute shader spent active on all cores.
*/
[17] = "Compute Shader Active Time",
/* A19:
* Total time in clocks the compute shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[19] = "Compute Shader Stall Time - Core Stall",
/* A20: Number of CS threads loaded at any given time in the EUs. */
[20] = "# CS threads loaded",
/* A22:
* Total time in clocks the geometry shader spent active on all cores.
*/
[22] = "Geometry Shader Active Time",
/* A24:
* Total time in clocks the geometry shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[24] = "Geometry Shader Stall Time - Core Stall",
/* A25: Number of GS threads loaded at any time in the EUs. */
[25] = "# GS threads loaded",
/* A27:
* Total time in clocks the pixel shader spent active on all cores.
*/
[27] = "Pixel Shader Active Time",
/* A29:
* Total time in clocks the pixel shader spent stalled on all cores -
* and the entire core was stalled as well.
*/
[29] = "Pixel Shader Stall Time - Core Stall",
/* A30: Number of PS threads loaded at any given time in the EUs. */
[30] = "# PS threads loaded",
/* A32: Count of pixels that pass the fast check (8x8). */
[32] = "HiZ Fast Z Test Pixels Passing",
/* A33: Count of pixels that fail the fast check (8x8). */
[33] = "HiZ Fast Z Test Pixels Failing",
/* A34: Count of pixels passing the slow check (2x2). */
[34] = "Slow Z Test Pixels Passing",
/* A35: Count of pixels that fail the slow check (2x2). */
[35] = "Slow Z Test Pixels Failing",
/* A36: Number of pixels/samples killed in the pixel shader.
* Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for
* dual source render target messages i.e. when PS has two output colors.
*/
[36] = "Pixel Kill Count",
/* A37:
* Number of pixels/samples that fail alpha-test. Alpha to coverage
* may have some challenges in per-pixel invocation.
*/
[37] = "Alpha Test Pixels Failed",
/* A38:
* Number of pixels/samples failing stencil test after the pixel shader
* has executed.
*/
[38] = "Post PS Stencil Pixels Failed",
/* A39:
* Number of pixels/samples fail Z test after the pixel shader has
* executed.
*/
[39] = "Post PS Z buffer Pixels Failed",
/* A40:
* Number of render target writes. MRT scenarios will cause this
* counter to increment multiple times.
*/
[40] = "3D/GPGPU Render Target Writes",
/* A41: Render engine is not idle.
*
* GPU Busy aggregate counter doesn't increment under the following
* conditions:
*
* 1. Context Switch in Progress.
* 2. GPU stalled on executing MI_WAIT_FOR_EVENT.
* 3. GPU stalled on execution MI_SEMAPHORE_MBOX.
* 4. RCS idle but other parts of GPU active (e.g. only media engines
* active)
*/
[41] = "Render Engine Busy",
/* A42:
* VSunit is stalling VF (upstream unit) and starving HS (downstream
* unit).
*/
[42] = "VS bottleneck",
/* A43:
* GSunit is stalling DS (upstream unit) and starving SOL (downstream
* unit).
*/
[43] = "GS bottleneck",
};
/**
* Ivybridge - Counter Select = 101
* A4 A3 A2 A1 A0 TIMESTAMP ReportID
* A12 A11 A10 A9 A8 A7 A6 A5
* A20 A19 A18 A17 A16 A15 A14 A13
* A28 A27 A26 A25 A24 A23 A22 A21
* A36 A35 A34 A33 A32 A31 A30 A29
* A44 A43 A42 A41 A40 A39 A38 A37
* C3 C2 C1 C0 B3 B2 B1 B0
* C11 C10 C9 C8 C7 C6 C5 C4
*/
const int gen7_counter_format = 5; /* 0b101 */
int have_totals = 0;
uint32_t *totals;
uint32_t *last_counter;
@ -243,6 +400,40 @@ gen6_get_counters(void)
drm_intel_bo_unreference(stats_bo);
}
static void
gen7_get_counters(void)
{
int i;
drm_intel_bo *stats_bo;
uint32_t *stats_result;
stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
BEGIN_BATCH(3);
OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
OUT_RELOC(stats_bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
OUT_BATCH(0);
ADVANCE_BATCH();
intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
drm_intel_bo_map(stats_bo, 0);
stats_result = stats_bo->virtual;
/* skip REPORT_ID, TIMESTAMP */
stats_result += 3;
for (i = 0; i < GEN7_COUNTER_COUNT; i++) {
/* Ignore "Reserved" counters */
if (!gen7_counter_names[i])
continue;
totals[i] += stats_result[i] - last_counter[i];
last_counter[i] = stats_result[i];
}
drm_intel_bo_unmap(stats_bo);
drm_intel_bo_unreference(stats_bo);
}
#define STATS_CHECK_FREQUENCY 100
#define STATS_REPORT_FREQUENCY 2
@ -279,6 +470,11 @@ main(int argc, char **argv)
counter_count = GEN6_COUNTER_COUNT;
counter_format = gen6_counter_format;
get_counters = gen6_get_counters;
} else if (IS_GEN7(devid)) {
counter_name = gen7_counter_names;
counter_count = GEN7_COUNTER_COUNT;
counter_format = gen7_counter_format;
get_counters = gen7_get_counters;
} else {
printf("This tool is not yet supported on your platform.\n");
abort();
@ -304,6 +500,9 @@ main(int argc, char **argv)
if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) {
if (have_totals) {
for (i = 0; i < counter_count; i++) {
/* Ignore "Reserved" counters */
if (!counter_name[i])
continue;
printf("%s: %u\n", counter_name[i],
totals[i]);
totals[i] = 0;