mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-11 01:46:14 +00:00
lib: Add a GPU error detector
If we listen to the uevents from the kernel, we can detect when the GPU hangs. This requires us to fork a helper process to do so and send a signal back to the parent. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
eb572106b4
commit
756f3e0cb7
@ -3,7 +3,7 @@ include Makefile.sources
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
||||
AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la
|
||||
|
||||
benchmarks_LTLIBRARIES = gem_exec_tracer.la
|
||||
gem_exec_tracer_la_LDFLAGS = -module -avoid-version -no-undefined
|
||||
|
@ -15,4 +15,4 @@ AM_CFLAGS = \
|
||||
$(LIBUNWIND_CFLAGS) \
|
||||
$(CWARNFLAGS)
|
||||
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la
|
||||
|
@ -4,4 +4,4 @@ bin_PROGRAMS = \
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
||||
AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la
|
||||
|
@ -15,12 +15,20 @@ if HAVE_VC4
|
||||
endif
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)
|
||||
AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
|
||||
AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
|
||||
-DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
|
||||
-DIGT_DATADIR=\""$(pkgdatadir)"\" \
|
||||
-DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
|
||||
-pthread
|
||||
|
||||
LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
|
||||
AM_CFLAGS += $(CAIRO_CFLAGS)
|
||||
|
||||
libintel_tools_la_LIBADD = \
|
||||
$(DRM_LIBS) \
|
||||
$(PCIACCESS_LIBS) \
|
||||
$(CAIRO_LIBS) \
|
||||
$(LIBUDEV_LIBS) \
|
||||
$(LIBUNWIND_LIBS) \
|
||||
$(TIMER_LIBS) \
|
||||
-lm
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
@ -359,6 +360,85 @@ void igt_stop_signal_helper(void)
|
||||
sig_stat = 0;
|
||||
}
|
||||
|
||||
#if HAVE_UDEV
|
||||
#include <libudev.h>
|
||||
|
||||
static struct igt_helper_process hang_detector;
|
||||
static void __attribute__((noreturn))
|
||||
hang_detector_process(pid_t pid, dev_t rdev)
|
||||
{
|
||||
struct udev_monitor *mon =
|
||||
udev_monitor_new_from_netlink(udev_new(), "kernel");
|
||||
struct pollfd pfd;
|
||||
|
||||
udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
|
||||
udev_monitor_enable_receiving(mon);
|
||||
|
||||
pfd.fd = udev_monitor_get_fd(mon);
|
||||
pfd.events = POLLIN;
|
||||
|
||||
while (poll(&pfd, 1, -1) > 0) {
|
||||
struct udev_device *dev = udev_monitor_receive_device(mon);
|
||||
dev_t devnum;
|
||||
|
||||
if (dev == NULL)
|
||||
break;
|
||||
|
||||
devnum = udev_device_get_devnum(dev);
|
||||
if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
|
||||
const char *str;
|
||||
|
||||
str = udev_device_get_property_value(dev, "ERROR");
|
||||
if (str && atoi(str) == 1)
|
||||
kill(pid, SIGRTMAX);
|
||||
}
|
||||
|
||||
udev_device_unref(dev);
|
||||
if (kill(pid, 0)) /* Parent has died, so must we. */
|
||||
break;
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void sig_abort(int sig)
|
||||
{
|
||||
igt_assert(!"GPU hung");
|
||||
}
|
||||
|
||||
void igt_fork_hang_detector(int fd)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
if (igt_only_list_subtests())
|
||||
return;
|
||||
|
||||
igt_assert(fstat(fd, &st) == 0);
|
||||
|
||||
signal(SIGRTMAX, sig_abort);
|
||||
igt_fork_helper(&hang_detector)
|
||||
hang_detector_process(getppid(), st.st_rdev);
|
||||
}
|
||||
|
||||
void igt_stop_hang_detector(void)
|
||||
{
|
||||
if (igt_only_list_subtests())
|
||||
return;
|
||||
|
||||
igt_stop_helper(&hang_detector);
|
||||
}
|
||||
#else
|
||||
void igt_fork_hang_detector(int fd)
|
||||
{
|
||||
if (igt_only_list_subtests())
|
||||
return;
|
||||
}
|
||||
|
||||
void igt_stop_hang_detector(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* igt_check_boolean_env_var:
|
||||
* @env_var: environment variable name
|
||||
|
@ -40,6 +40,9 @@ extern int num_trash_bos;
|
||||
void igt_fork_signal_helper(void);
|
||||
void igt_stop_signal_helper(void);
|
||||
|
||||
void igt_fork_hang_detector(int fd);
|
||||
void igt_stop_hang_detector(void);
|
||||
|
||||
struct igt_sigiter {
|
||||
unsigned pass;
|
||||
};
|
||||
|
@ -56,9 +56,8 @@ AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(DEBUG_CFLAGS)\
|
||||
$(LIBUNWIND_CFLAGS) \
|
||||
$(NULL)
|
||||
|
||||
LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
|
||||
LDADD = ../lib/libintel_tools.la $(GLIB_LIBS)
|
||||
|
||||
LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS) -lm
|
||||
AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
|
||||
AM_LDFLAGS = -Wl,--as-needed
|
||||
|
||||
|
@ -368,6 +368,8 @@ igt_main
|
||||
igt_fixture
|
||||
fd = drm_open_driver_master(DRIVER_INTEL);
|
||||
|
||||
igt_fork_hang_detector(fd);
|
||||
|
||||
for (const struct mode *m = modes; m->name; m++)
|
||||
igt_subtest_f("%s", *m->name ? m->name : "basic")
|
||||
whisper(fd, -1, m->flags);
|
||||
@ -382,6 +384,8 @@ igt_main
|
||||
whisper(fd, e->exec_id | e->flags, m->flags);
|
||||
}
|
||||
|
||||
igt_stop_hang_detector();
|
||||
|
||||
igt_fixture
|
||||
close(fd);
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ SUBDIRS = null_state_gen registers
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
||||
AM_CFLAGS = $(DEBUG_CFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la
|
||||
AM_LDFLAGS = -Wl,--as-needed
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user