overlay: Monitor per-ring context switch rate

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2013-08-29 12:01:38 +01:00
parent 75ef36713a
commit 474ce5396e
3 changed files with 61 additions and 11 deletions

View File

@ -40,10 +40,12 @@
#if defined(__i386__) #if defined(__i386__)
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#endif #endif
#if defined(__x86_64__) #if defined(__x86_64__)
#define rmb() asm volatile("lfence" ::: "memory") #define rmb() asm volatile("lfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
#endif #endif
#define N_PAGES 32 #define N_PAGES 32
@ -228,6 +230,14 @@ static int flip_complete(struct gpu_perf *gp, const void *event)
return 1; return 1;
} }
static int ctx_switch(struct gpu_perf *gp, const void *event)
{
const struct sample_event *sample = event;
gp->ctx_switch[sample->raw[1]]++;
return 1;
}
static int ring_sync(struct gpu_perf *gp, const void *event) static int ring_sync(struct gpu_perf *gp, const void *event)
{ {
const struct sample_event *sample = event; const struct sample_event *sample = event;
@ -293,6 +303,7 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)
perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end); perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end);
perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete); perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete);
perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync); perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync);
perf_tracepoint_open(gp, "i915", "i915_gem_ring_switch_context", ctx_switch);
if (gp->nr_events == 0) { if (gp->nr_events == 0) {
gp->error = "i915.ko tracepoints not available"; gp->error = "i915.ko tracepoints not available";
@ -303,20 +314,19 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)
return; return;
} }
static int process_sample(struct gpu_perf *gp, static int process_sample(struct gpu_perf *gp, int cpu,
const struct perf_event_header *header) const struct perf_event_header *header)
{ {
const struct sample_event *sample = (const struct sample_event *)header; const struct sample_event *sample = (const struct sample_event *)header;
int n, update = 0; int n, update = 0;
/* hash me! */ /* hash me! */
for (n = 0; n < gp->nr_cpus * gp->nr_events; n++) { for (n = 0; n < gp->nr_events; n++) {
if (gp->sample[n].id != sample->id) int m = n * gp->nr_cpus + cpu;
if (gp->sample[m].id != sample->id)
continue; continue;
update = 1; update = gp->sample[m].func(gp, sample);
if (gp->sample[n].func)
update = gp->sample[n].func(gp, sample);
break; break;
} }
@ -380,13 +390,14 @@ int gpu_perf_update(struct gpu_perf *gp)
} }
if (header->type == PERF_RECORD_SAMPLE) if (header->type == PERF_RECORD_SAMPLE)
update += process_sample(gp, header); update += process_sample(gp, n, header);
tail += header->size; tail += header->size;
} }
if (wrap) if (wrap)
tail &= mask; tail &= mask;
mmap->data_tail = tail; mmap->data_tail = tail;
wmb();
} }
free(buffer); free(buffer);

View File

@ -41,7 +41,9 @@ struct gpu_perf {
int (*func)(struct gpu_perf *, const void *); int (*func)(struct gpu_perf *, const void *);
} *sample; } *sample;
int flip_complete[4]; unsigned flip_complete[MAX_RINGS];
unsigned ctx_switch[MAX_RINGS];
struct gpu_perf_comm { struct gpu_perf_comm {
struct gpu_perf_comm *next; struct gpu_perf_comm *next;
char name[256]; char name[256];

View File

@ -102,6 +102,7 @@ struct overlay_gpu_top {
struct overlay_gpu_perf { struct overlay_gpu_perf {
struct gpu_perf gpu_perf; struct gpu_perf gpu_perf;
time_t show_ctx;
}; };
struct overlay_gpu_freq { struct overlay_gpu_freq {
@ -127,6 +128,8 @@ struct overlay_context {
cairo_t *cr; cairo_t *cr;
int width, height; int width, height;
time_t time;
struct overlay_gpu_top gpu_top; struct overlay_gpu_top gpu_top;
struct overlay_gpu_perf gpu_perf; struct overlay_gpu_perf gpu_perf;
struct overlay_gpu_freq gpu_freq; struct overlay_gpu_freq gpu_freq;
@ -270,6 +273,8 @@ static void init_gpu_perf(struct overlay_context *ctx,
struct overlay_gpu_perf *gp) struct overlay_gpu_perf *gp)
{ {
gpu_perf_init(&gp->gpu_perf, 0); gpu_perf_init(&gp->gpu_perf, 0);
gp->show_ctx = 0;
} }
static char *get_comm(pid_t pid, char *comm, int len) static char *get_comm(pid_t pid, char *comm, int len)
@ -310,6 +315,16 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
char buf[1024]; char buf[1024];
cairo_pattern_t *linear; cairo_pattern_t *linear;
int x, y, y1, y2, n; int x, y, y1, y2, n;
int has_ctx = 0;
gpu_perf_update(&gp->gpu_perf);
for (n = 4; n > 0; n--) {
if (gp->gpu_perf.ctx_switch[n-1]) {
has_ctx = n;
break;
}
}
cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1); cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1);
cairo_set_source_rgb(ctx->cr, .15, .15, .15); cairo_set_source_rgb(ctx->cr, .15, .15, .15);
@ -326,12 +341,9 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
return; return;
} }
gpu_perf_update(&gp->gpu_perf);
y = PAD + 12 - 2; y = PAD + 12 - 2;
x = ctx->width/2 + HALF_PAD; x = ctx->width/2 + HALF_PAD;
for (comm = gp->gpu_perf.comm; comm; comm = comm->next) { for (comm = gp->gpu_perf.comm; comm; comm = comm->next) {
int total; int total;
@ -369,6 +381,8 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
chart_draw(comm->user_data, ctx->cr); chart_draw(comm->user_data, ctx->cr);
y2 += 14; y2 += 14;
} }
if (has_ctx || gp->show_ctx)
y2 += 14;
y1 += -12 - 2; y1 += -12 - 2;
y2 += 14 - 14 + 4; y2 += 14 - 14 + 4;
@ -465,6 +479,27 @@ skip_comm:
cairo_move_to(ctx->cr, x, y); cairo_move_to(ctx->cr, x, y);
cairo_show_text(ctx->cr, buf); cairo_show_text(ctx->cr, buf);
y += 14; y += 14;
cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1);
cairo_move_to(ctx->cr, x, y);
if (has_ctx) {
int len = sprintf(buf, "Contexts:");
for (n = 0; n < has_ctx; n++)
len += sprintf(buf + len, "%s %d",
n ? "," : "",
gp->gpu_perf.ctx_switch[n]);
memset(gp->gpu_perf.ctx_switch, 0, sizeof(gp->gpu_perf.ctx_switch));
gp->show_ctx = ctx->time;
cairo_show_text(ctx->cr, buf);
y += 14;
} else if (gp->show_ctx) {
cairo_show_text(ctx->cr, "Contexts: 0");
y += 14;
if (ctx->time - gp->show_ctx > 10)
gp->show_ctx = 0;
}
} }
static void init_gpu_freq(struct overlay_context *ctx, static void init_gpu_freq(struct overlay_context *ctx,
@ -841,6 +876,8 @@ int main(int argc, char **argv)
i = 0; i = 0;
while (1) { while (1) {
ctx.time = time(NULL);
ctx.cr = cairo_create(ctx.surface); ctx.cr = cairo_create(ctx.surface);
cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR); cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR);
cairo_paint(ctx.cr); cairo_paint(ctx.cr);