mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-22 15:26:21 +00:00
intel_perf_counters: Add support for Gen7 platforms.
We finally received permission to release this; the counters should be properly documented in the Haswell PRMs. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
16b61932bb
commit
11d5859b28
@ -137,6 +137,163 @@ const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
|
||||
[28] = "SF active and stalled",
|
||||
};
|
||||
|
||||
#define GEN7_COUNTER_COUNT 44
|
||||
|
||||
/**
|
||||
* Names for aggregating counters A0-A44. Uninitialized fields are "Reserved."
|
||||
*/
|
||||
const char *gen7_counter_names[GEN7_COUNTER_COUNT] = {
|
||||
/* A0:
|
||||
* The sum of all cycles on all cores actively executing instructions
|
||||
* This does not count the time taken to service Send instructions.
|
||||
* This time is considered by shader active counters to give the result.
|
||||
*/
|
||||
[0] = "Aggregated Core Array Active",
|
||||
/* A1:
|
||||
* The sum of all cycles on all cores where the EU is not idle and is
|
||||
* not actively executing ISA instructions. Generally this means that
|
||||
* all loaded threads on the EU are stalled on some data dependency,
|
||||
* but this also includes the time during which the TS is loading the
|
||||
* thread dispatch header into the EU prior to thread execution and no
|
||||
* other thread is fully loaded.
|
||||
*/
|
||||
[1] = "Aggregated Core Array Stalled",
|
||||
/* A2:
|
||||
* Total time in clocks the vertex shader spent active on all cores.
|
||||
*/
|
||||
[2] = "Vertex Shader Active Time",
|
||||
/* A4:
|
||||
* Total time in clocks the vertex shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[4] = "Vertex Shader Stall Time - Core Stall",
|
||||
/* A5: Number of VS threads loaded at any given time in the EUs. */
|
||||
[5] = "# VS threads loaded",
|
||||
/* A7:
|
||||
* Total time in clocks the Hull shader spent active on all cores.
|
||||
*/
|
||||
[7] = "Hull Shader Active Time",
|
||||
/* A9:
|
||||
* Total time in clocks the Hull shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[9] = "Hull Shader Stall Time - Core Stall",
|
||||
/* A10: Number of HS threads loaded at any given time in the EUs. */
|
||||
[10] = "# HS threads loaded",
|
||||
/* A12:
|
||||
* Total time in clocks the Domain shader spent active on all cores.
|
||||
*/
|
||||
[12] = "Domain Shader Active Time",
|
||||
/* A14:
|
||||
* Total time in clocks the domain shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[14] = "Domain Shader Stall Time - Core Stall",
|
||||
/* A15: Number of DS threads loaded at any given time in the EUs. */
|
||||
[15] = "# DS threads loaded",
|
||||
/* A17:
|
||||
* Total time in clocks the compute shader spent active on all cores.
|
||||
*/
|
||||
[17] = "Compute Shader Active Time",
|
||||
/* A19:
|
||||
* Total time in clocks the compute shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[19] = "Compute Shader Stall Time - Core Stall",
|
||||
/* A20: Number of CS threads loaded at any given time in the EUs. */
|
||||
[20] = "# CS threads loaded",
|
||||
/* A22:
|
||||
* Total time in clocks the geometry shader spent active on all cores.
|
||||
*/
|
||||
[22] = "Geometry Shader Active Time",
|
||||
/* A24:
|
||||
* Total time in clocks the geometry shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[24] = "Geometry Shader Stall Time - Core Stall",
|
||||
/* A25: Number of GS threads loaded at any time in the EUs. */
|
||||
[25] = "# GS threads loaded",
|
||||
/* A27:
|
||||
* Total time in clocks the pixel shader spent active on all cores.
|
||||
*/
|
||||
[27] = "Pixel Shader Active Time",
|
||||
/* A29:
|
||||
* Total time in clocks the pixel shader spent stalled on all cores -
|
||||
* and the entire core was stalled as well.
|
||||
*/
|
||||
[29] = "Pixel Shader Stall Time - Core Stall",
|
||||
/* A30: Number of PS threads loaded at any given time in the EUs. */
|
||||
[30] = "# PS threads loaded",
|
||||
/* A32: Count of pixels that pass the fast check (8x8). */
|
||||
[32] = "HiZ Fast Z Test Pixels Passing",
|
||||
/* A33: Count of pixels that fail the fast check (8x8). */
|
||||
[33] = "HiZ Fast Z Test Pixels Failing",
|
||||
/* A34: Count of pixels passing the slow check (2x2). */
|
||||
[34] = "Slow Z Test Pixels Passing",
|
||||
/* A35: Count of pixels that fail the slow check (2x2). */
|
||||
[35] = "Slow Z Test Pixels Failing",
|
||||
/* A36: Number of pixels/samples killed in the pixel shader.
|
||||
* Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for
|
||||
* dual source render target messages i.e. when PS has two output colors.
|
||||
*/
|
||||
[36] = "Pixel Kill Count",
|
||||
/* A37:
|
||||
* Number of pixels/samples that fail alpha-test. Alpha to coverage
|
||||
* may have some challenges in per-pixel invocation.
|
||||
*/
|
||||
[37] = "Alpha Test Pixels Failed",
|
||||
/* A38:
|
||||
* Number of pixels/samples failing stencil test after the pixel shader
|
||||
* has executed.
|
||||
*/
|
||||
[38] = "Post PS Stencil Pixels Failed",
|
||||
/* A39:
|
||||
* Number of pixels/samples fail Z test after the pixel shader has
|
||||
* executed.
|
||||
*/
|
||||
[39] = "Post PS Z buffer Pixels Failed",
|
||||
/* A40:
|
||||
* Number of render target writes. MRT scenarios will cause this
|
||||
* counter to increment multiple times.
|
||||
*/
|
||||
[40] = "3D/GPGPU Render Target Writes",
|
||||
/* A41: Render engine is not idle.
|
||||
*
|
||||
* GPU Busy aggregate counter doesn't increment under the following
|
||||
* conditions:
|
||||
*
|
||||
* 1. Context Switch in Progress.
|
||||
* 2. GPU stalled on executing MI_WAIT_FOR_EVENT.
|
||||
* 3. GPU stalled on execution MI_SEMAPHORE_MBOX.
|
||||
* 4. RCS idle but other parts of GPU active (e.g. only media engines
|
||||
* active)
|
||||
*/
|
||||
[41] = "Render Engine Busy",
|
||||
/* A42:
|
||||
* VSunit is stalling VF (upstream unit) and starving HS (downstream
|
||||
* unit).
|
||||
*/
|
||||
[42] = "VS bottleneck",
|
||||
/* A43:
|
||||
* GSunit is stalling DS (upstream unit) and starving SOL (downstream
|
||||
* unit).
|
||||
*/
|
||||
[43] = "GS bottleneck",
|
||||
};
|
||||
|
||||
/**
|
||||
* Ivybridge - Counter Select = 101
|
||||
* A4 A3 A2 A1 A0 TIMESTAMP ReportID
|
||||
* A12 A11 A10 A9 A8 A7 A6 A5
|
||||
* A20 A19 A18 A17 A16 A15 A14 A13
|
||||
* A28 A27 A26 A25 A24 A23 A22 A21
|
||||
* A36 A35 A34 A33 A32 A31 A30 A29
|
||||
* A44 A43 A42 A41 A40 A39 A38 A37
|
||||
* C3 C2 C1 C0 B3 B2 B1 B0
|
||||
* C11 C10 C9 C8 C7 C6 C5 C4
|
||||
*/
|
||||
const int gen7_counter_format = 5; /* 0b101 */
|
||||
|
||||
int have_totals = 0;
|
||||
uint32_t *totals;
|
||||
uint32_t *last_counter;
|
||||
@ -243,6 +400,40 @@ gen6_get_counters(void)
|
||||
drm_intel_bo_unreference(stats_bo);
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_get_counters(void)
|
||||
{
|
||||
int i;
|
||||
drm_intel_bo *stats_bo;
|
||||
uint32_t *stats_result;
|
||||
|
||||
stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
|
||||
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
|
||||
OUT_RELOC(stats_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
|
||||
|
||||
drm_intel_bo_map(stats_bo, 0);
|
||||
stats_result = stats_bo->virtual;
|
||||
/* skip REPORT_ID, TIMESTAMP */
|
||||
stats_result += 3;
|
||||
for (i = 0; i < GEN7_COUNTER_COUNT; i++) {
|
||||
/* Ignore "Reserved" counters */
|
||||
if (!gen7_counter_names[i])
|
||||
continue;
|
||||
totals[i] += stats_result[i] - last_counter[i];
|
||||
last_counter[i] = stats_result[i];
|
||||
}
|
||||
|
||||
drm_intel_bo_unmap(stats_bo);
|
||||
drm_intel_bo_unreference(stats_bo);
|
||||
}
|
||||
|
||||
#define STATS_CHECK_FREQUENCY 100
|
||||
#define STATS_REPORT_FREQUENCY 2
|
||||
|
||||
@ -279,6 +470,11 @@ main(int argc, char **argv)
|
||||
counter_count = GEN6_COUNTER_COUNT;
|
||||
counter_format = gen6_counter_format;
|
||||
get_counters = gen6_get_counters;
|
||||
} else if (IS_GEN7(devid)) {
|
||||
counter_name = gen7_counter_names;
|
||||
counter_count = GEN7_COUNTER_COUNT;
|
||||
counter_format = gen7_counter_format;
|
||||
get_counters = gen7_get_counters;
|
||||
} else {
|
||||
printf("This tool is not yet supported on your platform.\n");
|
||||
abort();
|
||||
@ -304,6 +500,9 @@ main(int argc, char **argv)
|
||||
if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) {
|
||||
if (have_totals) {
|
||||
for (i = 0; i < counter_count; i++) {
|
||||
/* Ignore "Reserved" counters */
|
||||
if (!counter_name[i])
|
||||
continue;
|
||||
printf("%s: %u\n", counter_name[i],
|
||||
totals[i]);
|
||||
totals[i] = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user