benchmark/gem_exec_trace: Inline everything

Avoid the globals and make the dispatch one huge function and hope GCC
works some magic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2015-08-14 20:35:18 +01:00
parent a64e6c39b1
commit 77b8af218c

View File

@ -84,24 +84,49 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec); return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
} }
int fd; static void replay(const char *filename)
{
struct timespec t_start, t_end;
struct drm_i915_gem_execbuffer2 eb = {};
struct bo { struct bo {
uint32_t handle; uint32_t handle;
uint64_t offset; uint64_t offset;
struct drm_i915_gem_relocation_entry *relocs; struct drm_i915_gem_relocation_entry *relocs;
uint32_t max_relocs; uint32_t max_relocs;
} *bo, **offsets; } *bo = NULL, **offsets = NULL;
int num_bo; int num_bo = 0;
struct drm_i915_gem_exec_object2 *exec_objects = NULL;
int max_objects = 0;
struct stat st;
uint8_t *ptr, *end;
int fd;
struct drm_i915_gem_exec_object2 *exec_objects; fd = open(filename, O_RDONLY);
int max_objects; if (fd < 0)
return;
static void *add_bo(void *ptr) if (fstat(fd, &st) < 0)
{ return;
struct trace_add_bo *t = ptr;
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
close(fd);
if (ptr == MAP_FAILED)
return;
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
end = ptr + st.st_size;
fd = drm_open_any();
clock_gettime(CLOCK_MONOTONIC, &t_start);
do {
switch (*ptr++) {
case ADD_BO: {
uint32_t bb = 0xa << 23; uint32_t bb = 0xa << 23;
struct trace_add_bo *t = (void *)ptr;
ptr = (void *)(t + 1);
if (t->handle >= num_bo) { if (t->handle >= num_bo) {
int new_bo = (t->handle + 4096) & -4096; int new_bo = (t->handle + 4096) & -4096;
@ -112,13 +137,11 @@ static void *add_bo(void *ptr)
bo[t->handle].handle = gem_create(fd, t->size); bo[t->handle].handle = gem_create(fd, t->size);
gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb)); gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb));
break;
return t + 1;
} }
case DEL_BO: {
static void *del_bo(void *ptr) struct trace_del_bo *t = (void *)ptr;
{ ptr = (void *)(t + 1);
struct trace_del_bo *t = ptr;
gem_close(fd, bo[t->handle].handle); gem_close(fd, bo[t->handle].handle);
bo[t->handle].handle = 0; bo[t->handle].handle = 0;
@ -126,36 +149,32 @@ static void *del_bo(void *ptr)
free(bo[t->handle].relocs); free(bo[t->handle].relocs);
bo[t->handle].relocs = NULL; bo[t->handle].relocs = NULL;
bo[t->handle].max_relocs = 0; bo[t->handle].max_relocs = 0;
break;
return t + 1;
} }
case EXEC: {
static void *exec(void *ptr) struct trace_exec *t = (void *)ptr;
{
struct trace_exec *t = ptr;
struct drm_i915_gem_execbuffer2 eb;
uint32_t i, j; uint32_t i, j;
ptr = (void *)(t + 1);
memset(&eb, 0, sizeof(eb));
eb.buffer_count = t->object_count; eb.buffer_count = t->object_count;
eb.flags = t->flags & ~I915_EXEC_RING_MASK; eb.flags = t->flags & ~I915_EXEC_RING_MASK;
if (t->object_count > max_objects) { if (eb.buffer_count > max_objects) {
free(exec_objects); free(exec_objects);
free(offsets); free(offsets);
max_objects = ALIGN(t->object_count, 4096); max_objects = ALIGN(eb.buffer_count, 4096);
exec_objects = malloc(max_objects*sizeof(*exec_objects)); exec_objects = malloc(max_objects*sizeof(*exec_objects));
offsets = malloc(max_objects*sizeof(*offsets)); offsets = malloc(max_objects*sizeof(*offsets));
}
eb.buffers_ptr = (uintptr_t)exec_objects;
ptr = t + 1; eb.buffers_ptr = (uintptr_t)exec_objects;
for (i = 0; i < t->object_count; i++) { }
for (i = 0; i < eb.buffer_count; i++) {
struct drm_i915_gem_relocation_entry *relocs; struct drm_i915_gem_relocation_entry *relocs;
struct trace_exec_object *to = ptr; struct trace_exec_object *to = (void *)ptr;
ptr = to + 1; ptr = (void *)(to + 1);
offsets[i] = &bo[to->handle]; offsets[i] = &bo[to->handle];
@ -180,10 +199,10 @@ static void *exec(void *ptr)
exec_objects[i].relocs_ptr = (uintptr_t)relocs; exec_objects[i].relocs_ptr = (uintptr_t)relocs;
for (j = 0; j < to->relocation_count; j++) { for (j = 0; j < to->relocation_count; j++) {
struct trace_exec_relocation *tr = ptr; struct trace_exec_relocation *tr = (void *)ptr;
ptr = tr + 1; ptr = (void *)(tr + 1);
if (t->flags & I915_EXEC_HANDLE_LUT) { if (eb.flags & I915_EXEC_HANDLE_LUT) {
uint32_t handle; uint32_t handle;
relocs[j].target_handle = tr->target_handle; relocs[j].target_handle = tr->target_handle;
@ -203,54 +222,22 @@ static void *exec(void *ptr)
gem_execbuf(fd, &eb); gem_execbuf(fd, &eb);
for (i = 0; i < t->object_count; i++) for (i = 0; i < eb.buffer_count; i++)
offsets[i]->offset = exec_objects[i].offset; offsets[i]->offset = exec_objects[i].offset;
return ptr; break;
} }
static void replay(const char *filename)
{
struct timespec t_start, t_end;
struct stat st;
uint8_t *ptr, *end;
fd = open(filename, O_RDONLY);
if (fd < 0)
return;
if (fstat(fd, &st) < 0)
return;
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
close(fd);
if (ptr == MAP_FAILED)
return;
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
end = ptr + st.st_size;
fd = drm_open_any();
clock_gettime(CLOCK_MONOTONIC, &t_start);
do {
switch (*ptr++) {
case ADD_BO:
ptr = add_bo(ptr);
break;
case DEL_BO:
ptr = del_bo(ptr);
break;
case EXEC:
ptr = exec(ptr);
break;
} }
} while (ptr < end); } while (ptr < end);
clock_gettime(CLOCK_MONOTONIC, &t_end); clock_gettime(CLOCK_MONOTONIC, &t_end);
close(fd); close(fd);
munmap(end-st.st_size, st.st_size); munmap(end-st.st_size, st.st_size);
for (fd = 0; fd < num_bo; fd++)
free(bo[fd].relocs);
free(bo);
free(offsets);
printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end)); printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end));
} }