mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-10 09:26:10 +00:00
igt/gem_ringfill: Reduce ringfill to just filling the rings
The objective of this test is to check how the driver handles a full ring. To that end we need only submit enough work to fill the ring by submitting work faster than the GPU can execute it. If we are more careful in our batch construction, we can feed them much faster and achieve the same results much quicker. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
c4bcffcd10
commit
e180bec7bc
@ -32,108 +32,24 @@
|
||||
*/
|
||||
|
||||
#include "igt.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include "igt_gt.h"
|
||||
|
||||
#include <drm.h>
|
||||
|
||||
|
||||
struct bo {
|
||||
const char *ring;
|
||||
drm_intel_bo *src, *dst, *tmp;
|
||||
};
|
||||
|
||||
static const int width = 512, height = 512;
|
||||
int fd;
|
||||
|
||||
static void create_bo(drm_intel_bufmgr *bufmgr,
|
||||
struct bo *b,
|
||||
const char *ring)
|
||||
static void check_bo(int fd, uint32_t handle)
|
||||
{
|
||||
int size = 4 * width * height, i;
|
||||
uint32_t *map;
|
||||
|
||||
b->ring = ring;
|
||||
b->src = drm_intel_bo_alloc(bufmgr, "src", size, 4096);
|
||||
b->dst = drm_intel_bo_alloc(bufmgr, "dst", size, 4096);
|
||||
b->tmp = drm_intel_bo_alloc(bufmgr, "tmp", size, 4096);
|
||||
|
||||
/* Fill the src with indexes of the pixels */
|
||||
drm_intel_bo_map(b->src, true);
|
||||
map = b->src->virtual;
|
||||
for (i = 0; i < width * height; i++)
|
||||
map[i] = i;
|
||||
drm_intel_bo_unmap(b->src);
|
||||
|
||||
/* Fill the dst with garbage. */
|
||||
drm_intel_bo_map(b->dst, true);
|
||||
map = b->dst->virtual;
|
||||
for (i = 0; i < width * height; i++)
|
||||
map[i] = 0xd0d0d0d0;
|
||||
drm_intel_bo_unmap(b->dst);
|
||||
}
|
||||
|
||||
static int check_bo(struct bo *b)
|
||||
{
|
||||
const uint32_t *map;
|
||||
int i, fails = 0;
|
||||
|
||||
igt_debug("verifying\n");
|
||||
|
||||
do_or_die(drm_intel_bo_map(b->dst, false));
|
||||
|
||||
map = b->dst->virtual;
|
||||
for (i = 0; i < width*height; i++) {
|
||||
if (map[i] != i && ++fails <= 9) {
|
||||
int x = i % width;
|
||||
int y = i / width;
|
||||
|
||||
igt_info("%s: copy #%d at %d,%d failed: read 0x%08x\n",
|
||||
b->ring, i, x, y, map[i]);
|
||||
}
|
||||
}
|
||||
drm_intel_bo_unmap(b->dst);
|
||||
|
||||
return fails;
|
||||
}
|
||||
|
||||
static void destroy_bo(struct bo *b)
|
||||
{
|
||||
drm_intel_bo_unreference(b->src);
|
||||
drm_intel_bo_unreference(b->tmp);
|
||||
drm_intel_bo_unreference(b->dst);
|
||||
}
|
||||
|
||||
static void fill_ring(drm_intel_bufmgr *bufmgr,
|
||||
const char *ring,
|
||||
igt_render_copyfunc_t copy)
|
||||
{
|
||||
struct intel_batchbuffer *batch;
|
||||
struct igt_buf src, tmp, dst;
|
||||
struct bo bo;
|
||||
int i;
|
||||
|
||||
batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
|
||||
igt_assert(batch);
|
||||
igt_debug("Verifying result\n");
|
||||
map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
|
||||
gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
|
||||
for (i = 0; i < 1024; i++)
|
||||
igt_assert_eq(map[i], i);
|
||||
munmap(map, 4096);
|
||||
}
|
||||
|
||||
create_bo(bufmgr, &bo, ring);
|
||||
|
||||
src.stride = 4 * width;
|
||||
src.tiling = 0;
|
||||
src.size = 4 * width * height;
|
||||
src.num_tiles = 4 * width * height;
|
||||
dst = tmp = src;
|
||||
|
||||
src.bo = bo.src;
|
||||
tmp.bo = bo.tmp;
|
||||
dst.bo = bo.dst;
|
||||
static void fill_ring(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* The ring we've been using is 128k, and each rendering op
|
||||
* will use at least 8 dwords:
|
||||
@ -150,120 +66,168 @@ static void fill_ring(drm_intel_bufmgr *bufmgr,
|
||||
* So iterate just a little more than that -- if we don't fill the ring
|
||||
* doing this, we aren't likely to with this test.
|
||||
*/
|
||||
for (i = 0; i < width * height; i++) {
|
||||
int x = i % width;
|
||||
int y = i / width;
|
||||
|
||||
igt_assert_lt(y, height);
|
||||
|
||||
/* Dummy load to fill the ring */
|
||||
copy(batch, NULL, &src, 0, 0, width, height, &tmp, 0, 0);
|
||||
/* And copy the src into dst, pixel by pixel */
|
||||
copy(batch, NULL, &src, x, y, 1, 1, &dst, x, y);
|
||||
}
|
||||
|
||||
/* verify */
|
||||
igt_assert_eq(check_bo(&bo), 0);
|
||||
destroy_bo(&bo);
|
||||
intel_batchbuffer_free(batch);
|
||||
igt_debug("Executing execbuf %d times\n", 128*1024/(8*4));
|
||||
for (i = 0; i < 128*1024 / (8 * 4); i++)
|
||||
gem_execbuf(fd, execbuf);
|
||||
}
|
||||
|
||||
static void blt_copy(struct intel_batchbuffer *batch,
|
||||
drm_intel_context *context,
|
||||
struct igt_buf *src, unsigned src_x, unsigned src_y,
|
||||
unsigned w, unsigned h,
|
||||
struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
|
||||
static int __gem_execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
BLIT_COPY_BATCH_START(0);
|
||||
OUT_BATCH((3 << 24) | /* 32 bits */
|
||||
(0xcc << 16) | /* copy ROP */
|
||||
dst->stride);
|
||||
OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
|
||||
OUT_BATCH(((dst_y + h) << 16) | (dst_x + w)); /* dst x2,y2 */
|
||||
OUT_RELOC_FENCED(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
|
||||
OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
|
||||
OUT_BATCH(src->stride);
|
||||
OUT_RELOC_FENCED(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
intel_batchbuffer_flush(batch);
|
||||
int err = 0;
|
||||
if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf))
|
||||
err = -errno;
|
||||
return err;
|
||||
}
|
||||
|
||||
static void run_test(int ring, bool interruptible, int nchild) {
|
||||
drm_intel_bufmgr *bufmgr;
|
||||
igt_render_copyfunc_t copy;
|
||||
const char* ring_name;
|
||||
#define INTERRUPTIBLE 0x1
|
||||
#define HANG 0x2
|
||||
#define CHILD 0x8
|
||||
#define FORKED 0x8
|
||||
#define BOMB 0x10
|
||||
|
||||
bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
|
||||
igt_assert(bufmgr);
|
||||
drm_intel_bufmgr_gem_enable_reuse(bufmgr);
|
||||
static void run_test(int fd, unsigned ring, unsigned flags)
|
||||
{
|
||||
const int gen = intel_gen(intel_get_drm_devid(fd));
|
||||
const uint32_t bbe = MI_BATCH_BUFFER_END;
|
||||
struct drm_i915_gem_exec_object2 obj[2];
|
||||
struct drm_i915_gem_relocation_entry reloc[1024];
|
||||
struct drm_i915_gem_execbuffer2 execbuf;
|
||||
struct igt_hang_ring hang;
|
||||
uint32_t *batch, *b;
|
||||
int i;
|
||||
|
||||
if (ring == I915_EXEC_RENDER) {
|
||||
copy = igt_get_render_copyfunc(intel_get_drm_devid(fd));
|
||||
ring_name = "render";
|
||||
} else if (ring == I915_EXEC_BLT) {
|
||||
copy = blt_copy;
|
||||
ring_name = "blt";
|
||||
} else {
|
||||
igt_fail_on_f(true, "Unsupported ring.");
|
||||
}
|
||||
memset(&execbuf, 0, sizeof(execbuf));
|
||||
execbuf.buffers_ptr = (uintptr_t)obj;
|
||||
execbuf.buffer_count = 2;
|
||||
execbuf.flags = ring | (1 << 11);
|
||||
|
||||
/* Not all platforms have dedicated render ring. */
|
||||
igt_require(copy);
|
||||
memset(obj, 0, sizeof(obj));
|
||||
obj[0].handle = gem_create(fd, 4096);
|
||||
obj[1].handle = gem_create(fd, 1024*16 + 4096);
|
||||
gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
|
||||
igt_require(__gem_execbuf(fd, &execbuf) == 0);
|
||||
|
||||
if (interruptible) {
|
||||
igt_fork_signal_helper();
|
||||
}
|
||||
obj[1].relocs_ptr = (uintptr_t)reloc;
|
||||
obj[1].relocation_count = 1024;
|
||||
|
||||
if (nchild) {
|
||||
igt_fork(child, nchild) {
|
||||
fill_ring(bufmgr, ring_name, copy);
|
||||
batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096,
|
||||
PROT_WRITE | PROT_READ);
|
||||
gem_set_domain(fd, obj[1].handle,
|
||||
I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
|
||||
|
||||
memset(reloc, 0, sizeof(reloc));
|
||||
b = batch;
|
||||
for (i = 0; i < 1024; i++) {
|
||||
uint64_t offset;
|
||||
|
||||
reloc[i].target_handle = obj[0].handle;
|
||||
reloc[i].offset = (b - batch + 1) * sizeof(*batch);
|
||||
reloc[i].presumed_offset = obj[0].offset;
|
||||
reloc[i].delta = i * sizeof(uint32_t);
|
||||
reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
|
||||
reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
|
||||
|
||||
offset = obj[0].offset + reloc[i].delta;
|
||||
*b++ = MI_STORE_DWORD_IMM;
|
||||
if (gen >= 8) {
|
||||
*b++ = offset;
|
||||
*b++ = offset >> 32;
|
||||
} else if (gen >= 4) {
|
||||
*b++ = 0;
|
||||
*b++ = offset;
|
||||
reloc[i].offset += sizeof(*batch);
|
||||
} else {
|
||||
*b++ = offset;
|
||||
}
|
||||
*b++ = i;
|
||||
}
|
||||
*b++ = MI_BATCH_BUFFER_END;
|
||||
munmap(batch, 16*1024+4096);
|
||||
gem_execbuf(fd, &execbuf);
|
||||
|
||||
memset(&hang, 0, sizeof(hang));
|
||||
if (flags & HANG)
|
||||
hang = igt_hang_ring(fd, ring & ~(3<<13));
|
||||
|
||||
if (flags & INTERRUPTIBLE)
|
||||
igt_fork_signal_helper();
|
||||
|
||||
if (flags & (CHILD | FORKED | BOMB)) {
|
||||
int nchild;
|
||||
|
||||
if (flags & CHILD)
|
||||
nchild = 1;
|
||||
else if (flags & FORKED)
|
||||
nchild = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
else
|
||||
nchild = 8*sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
igt_debug("Forking %d children\n", nchild);
|
||||
igt_fork(child, nchild)
|
||||
fill_ring(fd, &execbuf);
|
||||
|
||||
igt_waitchildren();
|
||||
} else {
|
||||
fill_ring(bufmgr, ring_name, copy);
|
||||
}
|
||||
} else
|
||||
fill_ring(fd, &execbuf);
|
||||
|
||||
if (interruptible) {
|
||||
if (flags & INTERRUPTIBLE)
|
||||
igt_stop_signal_helper();
|
||||
}
|
||||
|
||||
drm_intel_bufmgr_destroy(bufmgr);
|
||||
if (flags & HANG)
|
||||
igt_post_hang_ring(fd, hang);
|
||||
else
|
||||
check_bo(fd, obj[0].handle);
|
||||
|
||||
gem_close(fd, obj[1].handle);
|
||||
gem_close(fd, obj[0].handle);
|
||||
}
|
||||
|
||||
igt_main
|
||||
{
|
||||
const struct {
|
||||
const char *prefix;
|
||||
const char *suffix;
|
||||
unsigned flags;
|
||||
} modes[] = {
|
||||
{ "basic-", "", 0 },
|
||||
{ "", "-interuptible", INTERRUPTIBLE },
|
||||
{ "", "-hang", HANG },
|
||||
{ "", "-child", CHILD },
|
||||
{ "", "-forked", FORKED },
|
||||
{ "", "-bomb", BOMB | INTERRUPTIBLE },
|
||||
{ NULL, NULL, 0 }
|
||||
}, *mode;
|
||||
const struct {
|
||||
const char *name;
|
||||
unsigned int flags;
|
||||
} rings[] = {
|
||||
{ "default", I915_EXEC_DEFAULT },
|
||||
{ "rcs", I915_EXEC_RENDER },
|
||||
{ "bcs", I915_EXEC_BLT },
|
||||
{ "bsd", I915_EXEC_BSD },
|
||||
{ "bsd1", I915_EXEC_BSD | 1 << 13 },
|
||||
{ "bsd2", I915_EXEC_BSD | 2 << 13 },
|
||||
{ "vebox", I915_EXEC_VEBOX },
|
||||
{ NULL, 0 }
|
||||
}, *ring;
|
||||
int fd;
|
||||
|
||||
igt_skip_on_simulation();
|
||||
|
||||
igt_fixture {
|
||||
igt_fixture
|
||||
fd = drm_open_driver(DRIVER_INTEL);
|
||||
|
||||
for (mode = modes; mode->prefix; mode++) {
|
||||
for (ring = rings; ring->name; ring++) {
|
||||
gem_quiescent_gpu(fd);
|
||||
igt_subtest_f("%s%s%s",
|
||||
ring->flags || mode->flags ? "" : mode->prefix,
|
||||
ring->name,
|
||||
mode->suffix)
|
||||
run_test(fd, ring->flags, mode->flags);
|
||||
}
|
||||
}
|
||||
|
||||
igt_subtest("blitter")
|
||||
run_test(I915_EXEC_BLT, false, 0);
|
||||
|
||||
igt_subtest("render")
|
||||
run_test(I915_EXEC_RENDER, false, 0);
|
||||
|
||||
igt_subtest("blitter-interruptible")
|
||||
run_test(I915_EXEC_BLT, true, 0);
|
||||
|
||||
igt_subtest("render-interruptible")
|
||||
run_test(I915_EXEC_RENDER, true, 0);
|
||||
|
||||
igt_subtest("blitter-forked-1")
|
||||
run_test(I915_EXEC_BLT, false, 1);
|
||||
|
||||
igt_subtest("render-forked-1")
|
||||
run_test(I915_EXEC_RENDER, false, 1);
|
||||
|
||||
igt_subtest("blitter-forked-4")
|
||||
run_test(I915_EXEC_BLT, false, 4);
|
||||
|
||||
igt_subtest("render-forked-4")
|
||||
run_test(I915_EXEC_RENDER, false, 4);
|
||||
|
||||
igt_fixture {
|
||||
igt_fixture
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user