mirror of
				https://github.com/tiagovignatti/intel-gpu-tools.git
				synced 2025-11-04 03:58:27 +00:00 
			
		
		
		
	overlay: Monitor per-ring context switch rate
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
		
							parent
							
								
									75ef36713a
								
							
						
					
					
						commit
						474ce5396e
					
				@ -40,10 +40,12 @@
 | 
			
		||||
 | 
			
		||||
#if defined(__i386__)
 | 
			
		||||
#define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 | 
			
		||||
#define wmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__x86_64__)
 | 
			
		||||
#define rmb()           asm volatile("lfence" ::: "memory")
 | 
			
		||||
#define wmb()           asm volatile("sfence" ::: "memory")
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define N_PAGES 32
 | 
			
		||||
@ -228,6 +230,14 @@ static int flip_complete(struct gpu_perf *gp, const void *event)
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ctx_switch(struct gpu_perf *gp, const void *event)
 | 
			
		||||
{
 | 
			
		||||
	const struct sample_event *sample = event;
 | 
			
		||||
 | 
			
		||||
	gp->ctx_switch[sample->raw[1]]++;
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ring_sync(struct gpu_perf *gp, const void *event)
 | 
			
		||||
{
 | 
			
		||||
	const struct sample_event *sample = event;
 | 
			
		||||
@ -293,6 +303,7 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)
 | 
			
		||||
		perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end);
 | 
			
		||||
	perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete);
 | 
			
		||||
	perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync);
 | 
			
		||||
	perf_tracepoint_open(gp, "i915", "i915_gem_ring_switch_context", ctx_switch);
 | 
			
		||||
 | 
			
		||||
	if (gp->nr_events == 0) {
 | 
			
		||||
		gp->error = "i915.ko tracepoints not available";
 | 
			
		||||
@ -303,20 +314,19 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)
 | 
			
		||||
		return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int process_sample(struct gpu_perf *gp,
 | 
			
		||||
static int process_sample(struct gpu_perf *gp, int cpu,
 | 
			
		||||
			  const struct perf_event_header *header)
 | 
			
		||||
{
 | 
			
		||||
	const struct sample_event *sample = (const struct sample_event *)header;
 | 
			
		||||
	int n, update = 0;
 | 
			
		||||
 | 
			
		||||
	/* hash me! */
 | 
			
		||||
	for (n = 0; n < gp->nr_cpus * gp->nr_events; n++) {
 | 
			
		||||
		if (gp->sample[n].id != sample->id)
 | 
			
		||||
	for (n = 0; n < gp->nr_events; n++) {
 | 
			
		||||
		int m = n * gp->nr_cpus + cpu;
 | 
			
		||||
		if (gp->sample[m].id != sample->id)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		update = 1;
 | 
			
		||||
		if (gp->sample[n].func)
 | 
			
		||||
			update = gp->sample[n].func(gp, sample);
 | 
			
		||||
		update = gp->sample[m].func(gp, sample);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -380,13 +390,14 @@ int gpu_perf_update(struct gpu_perf *gp)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (header->type == PERF_RECORD_SAMPLE)
 | 
			
		||||
				update += process_sample(gp, header);
 | 
			
		||||
				update += process_sample(gp, n, header);
 | 
			
		||||
			tail += header->size;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (wrap)
 | 
			
		||||
			tail &= mask;
 | 
			
		||||
		mmap->data_tail = tail;
 | 
			
		||||
		wmb();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	free(buffer);
 | 
			
		||||
 | 
			
		||||
@ -41,7 +41,9 @@ struct gpu_perf {
 | 
			
		||||
		int (*func)(struct gpu_perf *, const void *);
 | 
			
		||||
	} *sample;
 | 
			
		||||
 | 
			
		||||
	int flip_complete[4];
 | 
			
		||||
	unsigned flip_complete[MAX_RINGS];
 | 
			
		||||
	unsigned ctx_switch[MAX_RINGS];
 | 
			
		||||
 | 
			
		||||
	struct gpu_perf_comm {
 | 
			
		||||
		struct gpu_perf_comm *next;
 | 
			
		||||
		char name[256];
 | 
			
		||||
 | 
			
		||||
@ -102,6 +102,7 @@ struct overlay_gpu_top {
 | 
			
		||||
 | 
			
		||||
struct overlay_gpu_perf {
 | 
			
		||||
	struct gpu_perf gpu_perf;
 | 
			
		||||
	time_t show_ctx;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct overlay_gpu_freq {
 | 
			
		||||
@ -127,6 +128,8 @@ struct overlay_context {
 | 
			
		||||
	cairo_t *cr;
 | 
			
		||||
	int width, height;
 | 
			
		||||
 | 
			
		||||
	time_t time;
 | 
			
		||||
 | 
			
		||||
	struct overlay_gpu_top gpu_top;
 | 
			
		||||
	struct overlay_gpu_perf gpu_perf;
 | 
			
		||||
	struct overlay_gpu_freq gpu_freq;
 | 
			
		||||
@ -270,6 +273,8 @@ static void init_gpu_perf(struct overlay_context *ctx,
 | 
			
		||||
			  struct overlay_gpu_perf *gp)
 | 
			
		||||
{
 | 
			
		||||
	gpu_perf_init(&gp->gpu_perf, 0);
 | 
			
		||||
 | 
			
		||||
	gp->show_ctx = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static char *get_comm(pid_t pid, char *comm, int len)
 | 
			
		||||
@ -310,6 +315,16 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
 | 
			
		||||
	char buf[1024];
 | 
			
		||||
	cairo_pattern_t *linear;
 | 
			
		||||
	int x, y, y1, y2, n;
 | 
			
		||||
	int has_ctx = 0;
 | 
			
		||||
 | 
			
		||||
	gpu_perf_update(&gp->gpu_perf);
 | 
			
		||||
 | 
			
		||||
	for (n = 4; n > 0; n--) {
 | 
			
		||||
		if (gp->gpu_perf.ctx_switch[n-1]) {
 | 
			
		||||
			has_ctx = n;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1);
 | 
			
		||||
	cairo_set_source_rgb(ctx->cr, .15, .15, .15);
 | 
			
		||||
@ -326,12 +341,9 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	gpu_perf_update(&gp->gpu_perf);
 | 
			
		||||
 | 
			
		||||
	y = PAD + 12 - 2;
 | 
			
		||||
	x = ctx->width/2 + HALF_PAD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	for (comm = gp->gpu_perf.comm; comm; comm = comm->next) {
 | 
			
		||||
		int total;
 | 
			
		||||
 | 
			
		||||
@ -369,6 +381,8 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
 | 
			
		||||
		chart_draw(comm->user_data, ctx->cr);
 | 
			
		||||
		y2 += 14;
 | 
			
		||||
	}
 | 
			
		||||
	if (has_ctx || gp->show_ctx)
 | 
			
		||||
		y2 += 14;
 | 
			
		||||
	y1 += -12 - 2;
 | 
			
		||||
	y2 += 14 - 14 + 4;
 | 
			
		||||
 | 
			
		||||
@ -465,6 +479,27 @@ skip_comm:
 | 
			
		||||
	cairo_move_to(ctx->cr, x, y);
 | 
			
		||||
	cairo_show_text(ctx->cr, buf);
 | 
			
		||||
	y += 14;
 | 
			
		||||
 | 
			
		||||
	cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1);
 | 
			
		||||
	cairo_move_to(ctx->cr, x, y);
 | 
			
		||||
	if (has_ctx) {
 | 
			
		||||
		int len = sprintf(buf, "Contexts:");
 | 
			
		||||
		for (n = 0; n < has_ctx; n++)
 | 
			
		||||
			len += sprintf(buf + len, "%s %d",
 | 
			
		||||
				       n ? "," : "",
 | 
			
		||||
				       gp->gpu_perf.ctx_switch[n]);
 | 
			
		||||
 | 
			
		||||
		memset(gp->gpu_perf.ctx_switch, 0, sizeof(gp->gpu_perf.ctx_switch));
 | 
			
		||||
		gp->show_ctx = ctx->time;
 | 
			
		||||
 | 
			
		||||
		cairo_show_text(ctx->cr, buf);
 | 
			
		||||
		y += 14;
 | 
			
		||||
	} else if (gp->show_ctx) {
 | 
			
		||||
		cairo_show_text(ctx->cr, "Contexts: 0");
 | 
			
		||||
		y += 14;
 | 
			
		||||
		if (ctx->time - gp->show_ctx > 10)
 | 
			
		||||
			gp->show_ctx = 0;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void init_gpu_freq(struct overlay_context *ctx,
 | 
			
		||||
@ -841,6 +876,8 @@ int main(int argc, char **argv)
 | 
			
		||||
 | 
			
		||||
	i = 0;
 | 
			
		||||
	while (1) {
 | 
			
		||||
		ctx.time = time(NULL);
 | 
			
		||||
 | 
			
		||||
		ctx.cr = cairo_create(ctx.surface);
 | 
			
		||||
		cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR);
 | 
			
		||||
		cairo_paint(ctx.cr);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user