mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-21 14:56:18 +00:00
benchmark/gem_exec_trace: Inline everything
Avoid the globals and make the dispatch one huge function and hope GCC works some magic. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
a64e6c39b1
commit
77b8af218c
@ -84,24 +84,49 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
|
|||||||
return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
|
return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
|
||||||
}
|
}
|
||||||
|
|
||||||
int fd;
|
static void replay(const char *filename)
|
||||||
|
{
|
||||||
struct bo {
|
struct timespec t_start, t_end;
|
||||||
|
struct drm_i915_gem_execbuffer2 eb = {};
|
||||||
|
struct bo {
|
||||||
uint32_t handle;
|
uint32_t handle;
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
|
|
||||||
struct drm_i915_gem_relocation_entry *relocs;
|
struct drm_i915_gem_relocation_entry *relocs;
|
||||||
uint32_t max_relocs;
|
uint32_t max_relocs;
|
||||||
} *bo, **offsets;
|
} *bo = NULL, **offsets = NULL;
|
||||||
int num_bo;
|
int num_bo = 0;
|
||||||
|
struct drm_i915_gem_exec_object2 *exec_objects = NULL;
|
||||||
|
int max_objects = 0;
|
||||||
|
struct stat st;
|
||||||
|
uint8_t *ptr, *end;
|
||||||
|
int fd;
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 *exec_objects;
|
fd = open(filename, O_RDONLY);
|
||||||
int max_objects;
|
if (fd < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
static void *add_bo(void *ptr)
|
if (fstat(fd, &st) < 0)
|
||||||
{
|
return;
|
||||||
struct trace_add_bo *t = ptr;
|
|
||||||
|
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
if (ptr == MAP_FAILED)
|
||||||
|
return;
|
||||||
|
|
||||||
|
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
|
||||||
|
|
||||||
|
end = ptr + st.st_size;
|
||||||
|
fd = drm_open_any();
|
||||||
|
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &t_start);
|
||||||
|
do {
|
||||||
|
switch (*ptr++) {
|
||||||
|
case ADD_BO: {
|
||||||
uint32_t bb = 0xa << 23;
|
uint32_t bb = 0xa << 23;
|
||||||
|
struct trace_add_bo *t = (void *)ptr;
|
||||||
|
ptr = (void *)(t + 1);
|
||||||
|
|
||||||
if (t->handle >= num_bo) {
|
if (t->handle >= num_bo) {
|
||||||
int new_bo = (t->handle + 4096) & -4096;
|
int new_bo = (t->handle + 4096) & -4096;
|
||||||
@ -112,13 +137,11 @@ static void *add_bo(void *ptr)
|
|||||||
|
|
||||||
bo[t->handle].handle = gem_create(fd, t->size);
|
bo[t->handle].handle = gem_create(fd, t->size);
|
||||||
gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb));
|
gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb));
|
||||||
|
break;
|
||||||
return t + 1;
|
}
|
||||||
}
|
case DEL_BO: {
|
||||||
|
struct trace_del_bo *t = (void *)ptr;
|
||||||
static void *del_bo(void *ptr)
|
ptr = (void *)(t + 1);
|
||||||
{
|
|
||||||
struct trace_del_bo *t = ptr;
|
|
||||||
|
|
||||||
gem_close(fd, bo[t->handle].handle);
|
gem_close(fd, bo[t->handle].handle);
|
||||||
bo[t->handle].handle = 0;
|
bo[t->handle].handle = 0;
|
||||||
@ -126,36 +149,32 @@ static void *del_bo(void *ptr)
|
|||||||
free(bo[t->handle].relocs);
|
free(bo[t->handle].relocs);
|
||||||
bo[t->handle].relocs = NULL;
|
bo[t->handle].relocs = NULL;
|
||||||
bo[t->handle].max_relocs = 0;
|
bo[t->handle].max_relocs = 0;
|
||||||
|
break;
|
||||||
return t + 1;
|
}
|
||||||
}
|
case EXEC: {
|
||||||
|
struct trace_exec *t = (void *)ptr;
|
||||||
static void *exec(void *ptr)
|
|
||||||
{
|
|
||||||
struct trace_exec *t = ptr;
|
|
||||||
struct drm_i915_gem_execbuffer2 eb;
|
|
||||||
uint32_t i, j;
|
uint32_t i, j;
|
||||||
|
ptr = (void *)(t + 1);
|
||||||
|
|
||||||
memset(&eb, 0, sizeof(eb));
|
|
||||||
eb.buffer_count = t->object_count;
|
eb.buffer_count = t->object_count;
|
||||||
eb.flags = t->flags & ~I915_EXEC_RING_MASK;
|
eb.flags = t->flags & ~I915_EXEC_RING_MASK;
|
||||||
|
|
||||||
if (t->object_count > max_objects) {
|
if (eb.buffer_count > max_objects) {
|
||||||
free(exec_objects);
|
free(exec_objects);
|
||||||
free(offsets);
|
free(offsets);
|
||||||
|
|
||||||
max_objects = ALIGN(t->object_count, 4096);
|
max_objects = ALIGN(eb.buffer_count, 4096);
|
||||||
|
|
||||||
exec_objects = malloc(max_objects*sizeof(*exec_objects));
|
exec_objects = malloc(max_objects*sizeof(*exec_objects));
|
||||||
offsets = malloc(max_objects*sizeof(*offsets));
|
offsets = malloc(max_objects*sizeof(*offsets));
|
||||||
}
|
|
||||||
eb.buffers_ptr = (uintptr_t)exec_objects;
|
|
||||||
|
|
||||||
ptr = t + 1;
|
eb.buffers_ptr = (uintptr_t)exec_objects;
|
||||||
for (i = 0; i < t->object_count; i++) {
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < eb.buffer_count; i++) {
|
||||||
struct drm_i915_gem_relocation_entry *relocs;
|
struct drm_i915_gem_relocation_entry *relocs;
|
||||||
struct trace_exec_object *to = ptr;
|
struct trace_exec_object *to = (void *)ptr;
|
||||||
ptr = to + 1;
|
ptr = (void *)(to + 1);
|
||||||
|
|
||||||
offsets[i] = &bo[to->handle];
|
offsets[i] = &bo[to->handle];
|
||||||
|
|
||||||
@ -180,10 +199,10 @@ static void *exec(void *ptr)
|
|||||||
exec_objects[i].relocs_ptr = (uintptr_t)relocs;
|
exec_objects[i].relocs_ptr = (uintptr_t)relocs;
|
||||||
|
|
||||||
for (j = 0; j < to->relocation_count; j++) {
|
for (j = 0; j < to->relocation_count; j++) {
|
||||||
struct trace_exec_relocation *tr = ptr;
|
struct trace_exec_relocation *tr = (void *)ptr;
|
||||||
ptr = tr + 1;
|
ptr = (void *)(tr + 1);
|
||||||
|
|
||||||
if (t->flags & I915_EXEC_HANDLE_LUT) {
|
if (eb.flags & I915_EXEC_HANDLE_LUT) {
|
||||||
uint32_t handle;
|
uint32_t handle;
|
||||||
|
|
||||||
relocs[j].target_handle = tr->target_handle;
|
relocs[j].target_handle = tr->target_handle;
|
||||||
@ -203,54 +222,22 @@ static void *exec(void *ptr)
|
|||||||
|
|
||||||
gem_execbuf(fd, &eb);
|
gem_execbuf(fd, &eb);
|
||||||
|
|
||||||
for (i = 0; i < t->object_count; i++)
|
for (i = 0; i < eb.buffer_count; i++)
|
||||||
offsets[i]->offset = exec_objects[i].offset;
|
offsets[i]->offset = exec_objects[i].offset;
|
||||||
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void replay(const char *filename)
|
|
||||||
{
|
|
||||||
struct timespec t_start, t_end;
|
|
||||||
struct stat st;
|
|
||||||
uint8_t *ptr, *end;
|
|
||||||
|
|
||||||
fd = open(filename, O_RDONLY);
|
|
||||||
if (fd < 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (fstat(fd, &st) < 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
|
||||||
close(fd);
|
|
||||||
|
|
||||||
if (ptr == MAP_FAILED)
|
|
||||||
return;
|
|
||||||
|
|
||||||
madvise(ptr, st.st_size, MADV_SEQUENTIAL);
|
|
||||||
|
|
||||||
end = ptr + st.st_size;
|
|
||||||
fd = drm_open_any();
|
|
||||||
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &t_start);
|
|
||||||
do {
|
|
||||||
switch (*ptr++) {
|
|
||||||
case ADD_BO:
|
|
||||||
ptr = add_bo(ptr);
|
|
||||||
break;
|
|
||||||
case DEL_BO:
|
|
||||||
ptr = del_bo(ptr);
|
|
||||||
break;
|
|
||||||
case EXEC:
|
|
||||||
ptr = exec(ptr);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} while (ptr < end);
|
} while (ptr < end);
|
||||||
clock_gettime(CLOCK_MONOTONIC, &t_end);
|
clock_gettime(CLOCK_MONOTONIC, &t_end);
|
||||||
close(fd);
|
close(fd);
|
||||||
munmap(end-st.st_size, st.st_size);
|
munmap(end-st.st_size, st.st_size);
|
||||||
|
|
||||||
|
for (fd = 0; fd < num_bo; fd++)
|
||||||
|
free(bo[fd].relocs);
|
||||||
|
free(bo);
|
||||||
|
free(offsets);
|
||||||
|
|
||||||
printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end));
|
printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user