mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-21 14:56:18 +00:00
benchmark/gem_exec_trace: Inline everything
Avoid the globals and make the dispatch one huge function and hope GCC works some magic. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
a64e6c39b1
commit
77b8af218c
@ -84,24 +84,49 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
|
||||
return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
|
||||
}
|
||||
|
||||
int fd;
|
||||
|
||||
struct bo {
|
||||
static void replay(const char *filename)
|
||||
{
|
||||
struct timespec t_start, t_end;
|
||||
struct drm_i915_gem_execbuffer2 eb = {};
|
||||
struct bo {
|
||||
uint32_t handle;
|
||||
uint64_t offset;
|
||||
|
||||
struct drm_i915_gem_relocation_entry *relocs;
|
||||
uint32_t max_relocs;
|
||||
} *bo, **offsets;
|
||||
int num_bo;
|
||||
} *bo = NULL, **offsets = NULL;
|
||||
int num_bo = 0;
|
||||
struct drm_i915_gem_exec_object2 *exec_objects = NULL;
|
||||
int max_objects = 0;
|
||||
struct stat st;
|
||||
uint8_t *ptr, *end;
|
||||
int fd;
|
||||
|
||||
struct drm_i915_gem_exec_object2 *exec_objects;
|
||||
int max_objects;
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return;
|
||||
|
||||
static void *add_bo(void *ptr)
|
||||
{
|
||||
struct trace_add_bo *t = ptr;
|
||||
if (fstat(fd, &st) < 0)
|
||||
return;
|
||||
|
||||
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
close(fd);
|
||||
|
||||
if (ptr == MAP_FAILED)
|
||||
return;
|
||||
|
||||
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
|
||||
|
||||
end = ptr + st.st_size;
|
||||
fd = drm_open_any();
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_start);
|
||||
do {
|
||||
switch (*ptr++) {
|
||||
case ADD_BO: {
|
||||
uint32_t bb = 0xa << 23;
|
||||
struct trace_add_bo *t = (void *)ptr;
|
||||
ptr = (void *)(t + 1);
|
||||
|
||||
if (t->handle >= num_bo) {
|
||||
int new_bo = (t->handle + 4096) & -4096;
|
||||
@ -112,13 +137,11 @@ static void *add_bo(void *ptr)
|
||||
|
||||
bo[t->handle].handle = gem_create(fd, t->size);
|
||||
gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb));
|
||||
|
||||
return t + 1;
|
||||
}
|
||||
|
||||
static void *del_bo(void *ptr)
|
||||
{
|
||||
struct trace_del_bo *t = ptr;
|
||||
break;
|
||||
}
|
||||
case DEL_BO: {
|
||||
struct trace_del_bo *t = (void *)ptr;
|
||||
ptr = (void *)(t + 1);
|
||||
|
||||
gem_close(fd, bo[t->handle].handle);
|
||||
bo[t->handle].handle = 0;
|
||||
@ -126,36 +149,32 @@ static void *del_bo(void *ptr)
|
||||
free(bo[t->handle].relocs);
|
||||
bo[t->handle].relocs = NULL;
|
||||
bo[t->handle].max_relocs = 0;
|
||||
|
||||
return t + 1;
|
||||
}
|
||||
|
||||
static void *exec(void *ptr)
|
||||
{
|
||||
struct trace_exec *t = ptr;
|
||||
struct drm_i915_gem_execbuffer2 eb;
|
||||
break;
|
||||
}
|
||||
case EXEC: {
|
||||
struct trace_exec *t = (void *)ptr;
|
||||
uint32_t i, j;
|
||||
ptr = (void *)(t + 1);
|
||||
|
||||
memset(&eb, 0, sizeof(eb));
|
||||
eb.buffer_count = t->object_count;
|
||||
eb.flags = t->flags & ~I915_EXEC_RING_MASK;
|
||||
|
||||
if (t->object_count > max_objects) {
|
||||
if (eb.buffer_count > max_objects) {
|
||||
free(exec_objects);
|
||||
free(offsets);
|
||||
|
||||
max_objects = ALIGN(t->object_count, 4096);
|
||||
max_objects = ALIGN(eb.buffer_count, 4096);
|
||||
|
||||
exec_objects = malloc(max_objects*sizeof(*exec_objects));
|
||||
offsets = malloc(max_objects*sizeof(*offsets));
|
||||
}
|
||||
eb.buffers_ptr = (uintptr_t)exec_objects;
|
||||
|
||||
ptr = t + 1;
|
||||
for (i = 0; i < t->object_count; i++) {
|
||||
eb.buffers_ptr = (uintptr_t)exec_objects;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb.buffer_count; i++) {
|
||||
struct drm_i915_gem_relocation_entry *relocs;
|
||||
struct trace_exec_object *to = ptr;
|
||||
ptr = to + 1;
|
||||
struct trace_exec_object *to = (void *)ptr;
|
||||
ptr = (void *)(to + 1);
|
||||
|
||||
offsets[i] = &bo[to->handle];
|
||||
|
||||
@ -180,10 +199,10 @@ static void *exec(void *ptr)
|
||||
exec_objects[i].relocs_ptr = (uintptr_t)relocs;
|
||||
|
||||
for (j = 0; j < to->relocation_count; j++) {
|
||||
struct trace_exec_relocation *tr = ptr;
|
||||
ptr = tr + 1;
|
||||
struct trace_exec_relocation *tr = (void *)ptr;
|
||||
ptr = (void *)(tr + 1);
|
||||
|
||||
if (t->flags & I915_EXEC_HANDLE_LUT) {
|
||||
if (eb.flags & I915_EXEC_HANDLE_LUT) {
|
||||
uint32_t handle;
|
||||
|
||||
relocs[j].target_handle = tr->target_handle;
|
||||
@ -203,54 +222,22 @@ static void *exec(void *ptr)
|
||||
|
||||
gem_execbuf(fd, &eb);
|
||||
|
||||
for (i = 0; i < t->object_count; i++)
|
||||
for (i = 0; i < eb.buffer_count; i++)
|
||||
offsets[i]->offset = exec_objects[i].offset;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void replay(const char *filename)
|
||||
{
|
||||
struct timespec t_start, t_end;
|
||||
struct stat st;
|
||||
uint8_t *ptr, *end;
|
||||
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return;
|
||||
|
||||
if (fstat(fd, &st) < 0)
|
||||
return;
|
||||
|
||||
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
close(fd);
|
||||
|
||||
if (ptr == MAP_FAILED)
|
||||
return;
|
||||
|
||||
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
|
||||
|
||||
end = ptr + st.st_size;
|
||||
fd = drm_open_any();
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_start);
|
||||
do {
|
||||
switch (*ptr++) {
|
||||
case ADD_BO:
|
||||
ptr = add_bo(ptr);
|
||||
break;
|
||||
case DEL_BO:
|
||||
ptr = del_bo(ptr);
|
||||
break;
|
||||
case EXEC:
|
||||
ptr = exec(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (ptr < end);
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_end);
|
||||
close(fd);
|
||||
munmap(end-st.st_size, st.st_size);
|
||||
|
||||
for (fd = 0; fd < num_bo; fd++)
|
||||
free(bo[fd].relocs);
|
||||
free(bo);
|
||||
free(offsets);
|
||||
|
||||
printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user