mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-10 17:36:11 +00:00
tests/drv_hangman: test for acthd increasing through invalid VM space
The hangcheck logic will not flag an hang if acthd keeps increasing. However, if a malformed batch jumps to an invalid offset in the ppgtt it can potentially continue executing through the whole address space without triggering the hangcheck mechanism. This patch adds a test to simulate the issue. I've kept the test running for more than 10 minutes before killing it on a BDW and no hang occurred. I've sampled i915_hangcheck_info a few times during the run and got the following: Hangcheck active, fires in 468ms render ring: seqno = fffff55e [current fffff55e] ACTHD = 0x47df685ecc [current 0x4926b81d90] max ACTHD = 0x47df685ecc score = 0 action = 2 instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000 Hangcheck active, fires in 424ms render ring: seqno = fffff55e [current fffff55e] ACTHD = 0x6c953d3a34 [current 0x6de5e76fa4] max ACTHD = 0x6c953d3a34 score = 0 action = 2 instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000 Hangcheck active, fires in 1692ms render ring: seqno = fffff55e [current fffff55e] ACTHD = 0x1f49b0366dc [current 0x1f4dcbd88ec] max ACTHD = 0x1f49b0366dc score = 0 action = 2 instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000 v2: use the new gem_wait() function (Chris) v3: switch to unterminated batch and rename test, remove redundant check, update test requirements (Chris), update top comment v4: force gpu reset if the hang detection fails (Mika) Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Arun Siluvery <arun.siluvery@linux.intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> [Mika: removed batch_len=8] Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
This commit is contained in:
parent
03c7f84eb1
commit
95ca7644db
@ -284,6 +284,46 @@ static void test_error_state_capture(unsigned ring_id,
|
||||
check_error_state(gen, cmd_parser, ring_name, offset);
|
||||
}
|
||||
|
||||
|
||||
/* This test covers the case where we end up in an uninitialised area of the
|
||||
* ppgtt and keep executing through it. This is particularly relevant if 48b
|
||||
* ppgtt is enabled because the ppgtt is massively bigger compared to the 32b
|
||||
* case and it takes a lot more time to wrap, so the acthd can potentially keep
|
||||
* increasing for a long time
|
||||
*/
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
static void hangcheck_unterminated(void)
|
||||
{
|
||||
int fd;
|
||||
/* timeout needs to be greater than ~5*hangcheck */
|
||||
int64_t timeout_ns = 100 * NSEC_PER_SEC; /* 100 seconds */
|
||||
struct drm_i915_gem_execbuffer2 execbuf;
|
||||
struct drm_i915_gem_exec_object2 gem_exec;
|
||||
uint32_t handle;
|
||||
|
||||
fd = drm_open_driver(DRIVER_INTEL);
|
||||
igt_require(gem_uses_full_ppgtt(fd));
|
||||
igt_require_hang_ring(fd, 0);
|
||||
|
||||
handle = gem_create(fd, 4096);
|
||||
|
||||
memset(&gem_exec, 0, sizeof(gem_exec));
|
||||
gem_exec.handle = handle;
|
||||
|
||||
memset(&execbuf, 0, sizeof(execbuf));
|
||||
execbuf.buffers_ptr = (uintptr_t)&gem_exec;
|
||||
execbuf.buffer_count = 1;
|
||||
|
||||
gem_execbuf(fd, &execbuf);
|
||||
if (gem_wait(fd, handle, &timeout_ns) != 0) {
|
||||
/* need to manually trigger an hang to clean before failing */
|
||||
igt_force_gpu_reset();
|
||||
igt_assert_f(0, "unterminated batch did not trigger an hang!");
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
igt_main
|
||||
{
|
||||
const struct intel_execution_engine *e;
|
||||
@ -310,4 +350,7 @@ igt_main
|
||||
test_error_state_capture(e->exec_id | e->flags,
|
||||
e->full_name);
|
||||
}
|
||||
|
||||
igt_subtest("hangcheck-unterminated")
|
||||
hangcheck_unterminated();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user