mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-07 16:06:25 +00:00
intel_l3_parity: Support a daemonic mode
v2: Add a comment explaining the dangers of directly accessing the DFT register (Daniel) Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
This commit is contained in:
parent
bfa7a5906d
commit
799aeb6d00
@ -39,7 +39,7 @@ dist_bin_SCRIPTS = intel_gpu_abrt
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
||||
AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS)
|
||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS)
|
||||
|
||||
intel_dump_decode_SOURCES = \
|
||||
intel_dump_decode.c
|
||||
@ -50,3 +50,7 @@ intel_error_decode_SOURCES = \
|
||||
intel_bios_reader_SOURCES = \
|
||||
intel_bios_reader.c \
|
||||
intel_bios.h
|
||||
|
||||
intel_l3_parity_SOURCES = \
|
||||
intel_l3_parity.c \
|
||||
intel_l3_udev_listener.c
|
||||
|
@ -37,6 +37,14 @@
|
||||
#include "intel_chipset.h"
|
||||
#include "intel_gpu_tools.h"
|
||||
#include "drmtest.h"
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#if HAVE_UDEV
|
||||
#include <libudev.h>
|
||||
#include <syslog.h>
|
||||
#endif
|
||||
#include "intel_l3_parity.h"
|
||||
|
||||
static unsigned int devid;
|
||||
/* L3 size is always a function of banks. The number of banks cannot be
|
||||
@ -157,7 +165,8 @@ static void usage(const char *name)
|
||||
" -r, --row=[row] The row to act upon (default 0)\n"
|
||||
" -b, --bank=[bank] The bank to act upon (default 0)\n"
|
||||
" -s, --subbank=[subbank] The subbank to act upon (default 0)\n"
|
||||
" -w, --slice=[slice] Which slice to act on (default: -1 [all])"
|
||||
" -w, --slice=[slice] Which slice to act on (default: -1 [all])\n"
|
||||
" , --daemon Run the listener (-L) as a daemon\n"
|
||||
" ACTIONS (only 1 may be specified at a time):\n"
|
||||
" -h, --help Display this help\n"
|
||||
" -H, --hw-info Display the current L3 properties\n"
|
||||
@ -166,7 +175,8 @@ static void usage(const char *name)
|
||||
" -e, --enable Enable row, bank, subbank (undo -d)\n"
|
||||
" -d, --disable=<row,bank,subbank> Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
|
||||
" -i, --inject [HSW only] Cause hardware to inject a row errors\n"
|
||||
" -u, --uninject [HSW only] Turn off hardware error injectection (undo -i)\n",
|
||||
" -u, --uninject [HSW only] Turn off hardware error injectection (undo -i)\n"
|
||||
" -L, --listen Listen for uevent errors\n",
|
||||
name);
|
||||
}
|
||||
|
||||
@ -179,6 +189,7 @@ int main(int argc, char *argv[])
|
||||
int fd[REAL_MAX_SLICES] = {0}, ret, i;
|
||||
int action = '0';
|
||||
int drm_fd = drm_open_any();
|
||||
int daemonize = 0;
|
||||
devid = intel_get_drm_devid(drm_fd);
|
||||
|
||||
if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid))
|
||||
@ -202,11 +213,18 @@ int main(int argc, char *argv[])
|
||||
assert(lseek(fd[i], 0, SEEK_SET) == 0);
|
||||
}
|
||||
|
||||
/* NB: It is potentially unsafe to read this register if the kernel is
|
||||
* actively using this register range, or we're running multiple
|
||||
* instances of this tool. Since neither of those cases should occur
|
||||
* (and the tool should be root only) we can safely ignore this for
|
||||
* now. Just be aware of this if for some reason a hang is reported
|
||||
* when using this tool.
|
||||
*/
|
||||
dft = intel_register_read(0xb038);
|
||||
|
||||
while (1) {
|
||||
int c, option_index = 0;
|
||||
static struct option long_options[] = {
|
||||
struct option long_options[] = {
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "list", no_argument, 0, 'l' },
|
||||
{ "clear-all", no_argument, 0, 'a' },
|
||||
@ -215,18 +233,23 @@ int main(int argc, char *argv[])
|
||||
{ "inject", no_argument, 0, 'i' },
|
||||
{ "uninject", no_argument, 0, 'u' },
|
||||
{ "hw-info", no_argument, 0, 'H' },
|
||||
{ "listen", no_argument, 0, 'L' },
|
||||
{ "row", required_argument, 0, 'r' },
|
||||
{ "bank", required_argument, 0, 'b' },
|
||||
{ "subbank", required_argument, 0, 's' },
|
||||
{ "slice", required_argument, 0, 'w' },
|
||||
{ "daemon", no_argument, &daemonize, 1 },
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
|
||||
c = getopt_long(argc, argv, "hHr:b:s:w:aled::iuL", long_options,
|
||||
&option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
if (c == 0)
|
||||
continue;
|
||||
|
||||
switch (c) {
|
||||
case '?':
|
||||
case 'h':
|
||||
@ -274,6 +297,7 @@ int main(int argc, char *argv[])
|
||||
case 'a':
|
||||
case 'l':
|
||||
case 'e':
|
||||
case 'L':
|
||||
if (action != '0') {
|
||||
fprintf(stderr, "Only one action may be specified\n");
|
||||
exit(EXIT_FAILURE);
|
||||
@ -299,6 +323,20 @@ int main(int argc, char *argv[])
|
||||
printf("warning: overwriting existing injections. This is very dangerous.\n");
|
||||
}
|
||||
|
||||
/* Daemon doesn't work like the other commands */
|
||||
if (action == 'L') {
|
||||
struct l3_parity par;
|
||||
struct l3_location loc;
|
||||
if (daemonize) {
|
||||
assert(daemon(0, 0) == 0);
|
||||
openlog(argv[0], LOG_CONS | LOG_PID, LOG_USER);
|
||||
}
|
||||
memset(&par, 0, sizeof(par));
|
||||
assert(l3_uevent_setup(&par) == 0);
|
||||
assert(l3_listen(&par, daemonize == 1, &loc) == 0);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (action == 'l')
|
||||
decode_dft(dft);
|
||||
|
||||
|
31
tools/intel_l3_parity.h
Normal file
31
tools/intel_l3_parity.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef INTEL_L3_PARITY_H_
|
||||
#define INTEL_L3_PARITY_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct l3_parity {
|
||||
struct udev *udev;
|
||||
struct udev_monitor *uevent_monitor;
|
||||
int fd;
|
||||
fd_set fdset;
|
||||
};
|
||||
|
||||
struct l3_location {
|
||||
uint8_t slice;
|
||||
uint16_t row;
|
||||
uint8_t bank;
|
||||
uint8_t subbank;
|
||||
};
|
||||
|
||||
#if HAVE_UDEV
|
||||
int l3_uevent_setup(struct l3_parity *par);
|
||||
/* Listens (blocks) for an l3 parity event. Returns the location of the error. */
|
||||
int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc);
|
||||
#define l3_uevent_teardown(par) {}
|
||||
#else
|
||||
#define l3_uevent_setup(par, daemon, loc) -1
|
||||
#define l3_listen(par) -1
|
||||
#endif
|
||||
|
||||
#endif
|
108
tools/intel_l3_udev_listener.c
Normal file
108
tools/intel_l3_udev_listener.c
Normal file
@ -0,0 +1,108 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if HAVE_UDEV
|
||||
#include <libudev.h>
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <syslog.h>
|
||||
#include "i915_drm.h"
|
||||
#include "intel_l3_parity.h"
|
||||
|
||||
#ifndef I915_L3_PARITY_UEVENT
|
||||
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
|
||||
#endif
|
||||
|
||||
int l3_uevent_setup(struct l3_parity *par)
|
||||
{
|
||||
struct udev *udev;
|
||||
struct udev_monitor *uevent_monitor;
|
||||
fd_set fdset;
|
||||
int fd, ret = -1;
|
||||
|
||||
udev = udev_new();
|
||||
if (!udev) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uevent_monitor = udev_monitor_new_from_netlink(udev, "udev");
|
||||
if (!uevent_monitor)
|
||||
goto err_out;
|
||||
|
||||
ret = udev_monitor_filter_add_match_subsystem_devtype(uevent_monitor, "drm", "drm_minor");
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
|
||||
ret = udev_monitor_enable_receiving(uevent_monitor);
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
|
||||
fd = udev_monitor_get_fd(uevent_monitor);
|
||||
FD_ZERO(&fdset);
|
||||
FD_SET(fd, &fdset);
|
||||
|
||||
par->udev = udev;
|
||||
par->fd = fd;
|
||||
par->fdset = fdset;
|
||||
par->uevent_monitor = uevent_monitor;
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
udev_unref(udev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc)
|
||||
{
|
||||
struct udev_device *udev_dev;
|
||||
const char *parity_status;
|
||||
char *err_msg;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
ret = select(par->fd + 1, &par->fdset, NULL, NULL, NULL);
|
||||
/* Number of bits set is returned, must be >= 1 */
|
||||
if (ret <= 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
assert(FD_ISSET(par->fd, &par->fdset));
|
||||
|
||||
udev_dev = udev_monitor_receive_device(par->uevent_monitor);
|
||||
if (!udev_dev)
|
||||
return -1;
|
||||
|
||||
parity_status = udev_device_get_property_value(udev_dev, I915_L3_PARITY_UEVENT);
|
||||
if (strncmp(parity_status, "1", 1))
|
||||
goto again;
|
||||
|
||||
loc->slice = atoi(udev_device_get_property_value(udev_dev, "SLICE"));
|
||||
loc->row = atoi(udev_device_get_property_value(udev_dev, "ROW"));
|
||||
loc->bank = atoi(udev_device_get_property_value(udev_dev, "BANK"));
|
||||
loc->subbank = atoi(udev_device_get_property_value(udev_dev, "SUBBANK"));
|
||||
|
||||
udev_device_unref(udev_dev);
|
||||
|
||||
asprintf(&err_msg, "Parity error detected on: %d,%d,%d,%d. "
|
||||
"Try to run intel_l3_parity -r %d -b %d -s %d -w %d -d",
|
||||
loc->slice, loc->row, loc->bank, loc->subbank,
|
||||
loc->row, loc->bank, loc->subbank, loc->slice);
|
||||
if (daemon) {
|
||||
syslog(LOG_INFO, "%s\n", err_msg);
|
||||
goto again;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s\n", err_msg);
|
||||
|
||||
free(err_msg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user