mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-08 08:26:10 +00:00
intel_l3_parity: Support a daemonic mode
v2: Add a comment explaining the dangers of directly accessing the DFT register (Daniel) Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
This commit is contained in:
parent
bfa7a5906d
commit
799aeb6d00
@ -39,7 +39,7 @@ dist_bin_SCRIPTS = intel_gpu_abrt
|
|||||||
|
|
||||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
|
||||||
AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
|
AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
|
||||||
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS)
|
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS)
|
||||||
|
|
||||||
intel_dump_decode_SOURCES = \
|
intel_dump_decode_SOURCES = \
|
||||||
intel_dump_decode.c
|
intel_dump_decode.c
|
||||||
@ -50,3 +50,7 @@ intel_error_decode_SOURCES = \
|
|||||||
intel_bios_reader_SOURCES = \
|
intel_bios_reader_SOURCES = \
|
||||||
intel_bios_reader.c \
|
intel_bios_reader.c \
|
||||||
intel_bios.h
|
intel_bios.h
|
||||||
|
|
||||||
|
intel_l3_parity_SOURCES = \
|
||||||
|
intel_l3_parity.c \
|
||||||
|
intel_l3_udev_listener.c
|
||||||
|
@ -37,6 +37,14 @@
|
|||||||
#include "intel_chipset.h"
|
#include "intel_chipset.h"
|
||||||
#include "intel_gpu_tools.h"
|
#include "intel_gpu_tools.h"
|
||||||
#include "drmtest.h"
|
#include "drmtest.h"
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
#if HAVE_UDEV
|
||||||
|
#include <libudev.h>
|
||||||
|
#include <syslog.h>
|
||||||
|
#endif
|
||||||
|
#include "intel_l3_parity.h"
|
||||||
|
|
||||||
static unsigned int devid;
|
static unsigned int devid;
|
||||||
/* L3 size is always a function of banks. The number of banks cannot be
|
/* L3 size is always a function of banks. The number of banks cannot be
|
||||||
@ -157,7 +165,8 @@ static void usage(const char *name)
|
|||||||
" -r, --row=[row] The row to act upon (default 0)\n"
|
" -r, --row=[row] The row to act upon (default 0)\n"
|
||||||
" -b, --bank=[bank] The bank to act upon (default 0)\n"
|
" -b, --bank=[bank] The bank to act upon (default 0)\n"
|
||||||
" -s, --subbank=[subbank] The subbank to act upon (default 0)\n"
|
" -s, --subbank=[subbank] The subbank to act upon (default 0)\n"
|
||||||
" -w, --slice=[slice] Which slice to act on (default: -1 [all])"
|
" -w, --slice=[slice] Which slice to act on (default: -1 [all])\n"
|
||||||
|
" , --daemon Run the listener (-L) as a daemon\n"
|
||||||
" ACTIONS (only 1 may be specified at a time):\n"
|
" ACTIONS (only 1 may be specified at a time):\n"
|
||||||
" -h, --help Display this help\n"
|
" -h, --help Display this help\n"
|
||||||
" -H, --hw-info Display the current L3 properties\n"
|
" -H, --hw-info Display the current L3 properties\n"
|
||||||
@ -166,7 +175,8 @@ static void usage(const char *name)
|
|||||||
" -e, --enable Enable row, bank, subbank (undo -d)\n"
|
" -e, --enable Enable row, bank, subbank (undo -d)\n"
|
||||||
" -d, --disable=<row,bank,subbank> Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
|
" -d, --disable=<row,bank,subbank> Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
|
||||||
" -i, --inject [HSW only] Cause hardware to inject a row errors\n"
|
" -i, --inject [HSW only] Cause hardware to inject a row errors\n"
|
||||||
" -u, --uninject [HSW only] Turn off hardware error injectection (undo -i)\n",
|
" -u, --uninject [HSW only] Turn off hardware error injectection (undo -i)\n"
|
||||||
|
" -L, --listen Listen for uevent errors\n",
|
||||||
name);
|
name);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,6 +189,7 @@ int main(int argc, char *argv[])
|
|||||||
int fd[REAL_MAX_SLICES] = {0}, ret, i;
|
int fd[REAL_MAX_SLICES] = {0}, ret, i;
|
||||||
int action = '0';
|
int action = '0';
|
||||||
int drm_fd = drm_open_any();
|
int drm_fd = drm_open_any();
|
||||||
|
int daemonize = 0;
|
||||||
devid = intel_get_drm_devid(drm_fd);
|
devid = intel_get_drm_devid(drm_fd);
|
||||||
|
|
||||||
if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid))
|
if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid))
|
||||||
@ -202,11 +213,18 @@ int main(int argc, char *argv[])
|
|||||||
assert(lseek(fd[i], 0, SEEK_SET) == 0);
|
assert(lseek(fd[i], 0, SEEK_SET) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* NB: It is potentially unsafe to read this register if the kernel is
|
||||||
|
* actively using this register range, or we're running multiple
|
||||||
|
* instances of this tool. Since neither of those cases should occur
|
||||||
|
* (and the tool should be root only) we can safely ignore this for
|
||||||
|
* now. Just be aware of this if for some reason a hang is reported
|
||||||
|
* when using this tool.
|
||||||
|
*/
|
||||||
dft = intel_register_read(0xb038);
|
dft = intel_register_read(0xb038);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
int c, option_index = 0;
|
int c, option_index = 0;
|
||||||
static struct option long_options[] = {
|
struct option long_options[] = {
|
||||||
{ "help", no_argument, 0, 'h' },
|
{ "help", no_argument, 0, 'h' },
|
||||||
{ "list", no_argument, 0, 'l' },
|
{ "list", no_argument, 0, 'l' },
|
||||||
{ "clear-all", no_argument, 0, 'a' },
|
{ "clear-all", no_argument, 0, 'a' },
|
||||||
@ -215,18 +233,23 @@ int main(int argc, char *argv[])
|
|||||||
{ "inject", no_argument, 0, 'i' },
|
{ "inject", no_argument, 0, 'i' },
|
||||||
{ "uninject", no_argument, 0, 'u' },
|
{ "uninject", no_argument, 0, 'u' },
|
||||||
{ "hw-info", no_argument, 0, 'H' },
|
{ "hw-info", no_argument, 0, 'H' },
|
||||||
|
{ "listen", no_argument, 0, 'L' },
|
||||||
{ "row", required_argument, 0, 'r' },
|
{ "row", required_argument, 0, 'r' },
|
||||||
{ "bank", required_argument, 0, 'b' },
|
{ "bank", required_argument, 0, 'b' },
|
||||||
{ "subbank", required_argument, 0, 's' },
|
{ "subbank", required_argument, 0, 's' },
|
||||||
{ "slice", required_argument, 0, 'w' },
|
{ "slice", required_argument, 0, 'w' },
|
||||||
|
{ "daemon", no_argument, &daemonize, 1 },
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
|
c = getopt_long(argc, argv, "hHr:b:s:w:aled::iuL", long_options,
|
||||||
&option_index);
|
&option_index);
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (c == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '?':
|
case '?':
|
||||||
case 'h':
|
case 'h':
|
||||||
@ -274,6 +297,7 @@ int main(int argc, char *argv[])
|
|||||||
case 'a':
|
case 'a':
|
||||||
case 'l':
|
case 'l':
|
||||||
case 'e':
|
case 'e':
|
||||||
|
case 'L':
|
||||||
if (action != '0') {
|
if (action != '0') {
|
||||||
fprintf(stderr, "Only one action may be specified\n");
|
fprintf(stderr, "Only one action may be specified\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
@ -299,6 +323,20 @@ int main(int argc, char *argv[])
|
|||||||
printf("warning: overwriting existing injections. This is very dangerous.\n");
|
printf("warning: overwriting existing injections. This is very dangerous.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Daemon doesn't work like the other commands */
|
||||||
|
if (action == 'L') {
|
||||||
|
struct l3_parity par;
|
||||||
|
struct l3_location loc;
|
||||||
|
if (daemonize) {
|
||||||
|
assert(daemon(0, 0) == 0);
|
||||||
|
openlog(argv[0], LOG_CONS | LOG_PID, LOG_USER);
|
||||||
|
}
|
||||||
|
memset(&par, 0, sizeof(par));
|
||||||
|
assert(l3_uevent_setup(&par) == 0);
|
||||||
|
assert(l3_listen(&par, daemonize == 1, &loc) == 0);
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
if (action == 'l')
|
if (action == 'l')
|
||||||
decode_dft(dft);
|
decode_dft(dft);
|
||||||
|
|
||||||
|
31
tools/intel_l3_parity.h
Normal file
31
tools/intel_l3_parity.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef INTEL_L3_PARITY_H_
|
||||||
|
#define INTEL_L3_PARITY_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
struct l3_parity {
|
||||||
|
struct udev *udev;
|
||||||
|
struct udev_monitor *uevent_monitor;
|
||||||
|
int fd;
|
||||||
|
fd_set fdset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct l3_location {
|
||||||
|
uint8_t slice;
|
||||||
|
uint16_t row;
|
||||||
|
uint8_t bank;
|
||||||
|
uint8_t subbank;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if HAVE_UDEV
|
||||||
|
int l3_uevent_setup(struct l3_parity *par);
|
||||||
|
/* Listens (blocks) for an l3 parity event. Returns the location of the error. */
|
||||||
|
int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc);
|
||||||
|
#define l3_uevent_teardown(par) {}
|
||||||
|
#else
|
||||||
|
#define l3_uevent_setup(par, daemon, loc) -1
|
||||||
|
#define l3_listen(par) -1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
108
tools/intel_l3_udev_listener.c
Normal file
108
tools/intel_l3_udev_listener.c
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_UDEV
|
||||||
|
#include <libudev.h>
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <syslog.h>
|
||||||
|
#include "i915_drm.h"
|
||||||
|
#include "intel_l3_parity.h"
|
||||||
|
|
||||||
|
#ifndef I915_L3_PARITY_UEVENT
|
||||||
|
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int l3_uevent_setup(struct l3_parity *par)
|
||||||
|
{
|
||||||
|
struct udev *udev;
|
||||||
|
struct udev_monitor *uevent_monitor;
|
||||||
|
fd_set fdset;
|
||||||
|
int fd, ret = -1;
|
||||||
|
|
||||||
|
udev = udev_new();
|
||||||
|
if (!udev) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uevent_monitor = udev_monitor_new_from_netlink(udev, "udev");
|
||||||
|
if (!uevent_monitor)
|
||||||
|
goto err_out;
|
||||||
|
|
||||||
|
ret = udev_monitor_filter_add_match_subsystem_devtype(uevent_monitor, "drm", "drm_minor");
|
||||||
|
if (ret < 0)
|
||||||
|
goto err_out;
|
||||||
|
|
||||||
|
ret = udev_monitor_enable_receiving(uevent_monitor);
|
||||||
|
if (ret < 0)
|
||||||
|
goto err_out;
|
||||||
|
|
||||||
|
fd = udev_monitor_get_fd(uevent_monitor);
|
||||||
|
FD_ZERO(&fdset);
|
||||||
|
FD_SET(fd, &fdset);
|
||||||
|
|
||||||
|
par->udev = udev;
|
||||||
|
par->fd = fd;
|
||||||
|
par->fdset = fdset;
|
||||||
|
par->uevent_monitor = uevent_monitor;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_out:
|
||||||
|
udev_unref(udev);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc)
|
||||||
|
{
|
||||||
|
struct udev_device *udev_dev;
|
||||||
|
const char *parity_status;
|
||||||
|
char *err_msg;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
again:
|
||||||
|
ret = select(par->fd + 1, &par->fdset, NULL, NULL, NULL);
|
||||||
|
/* Number of bits set is returned, must be >= 1 */
|
||||||
|
if (ret <= 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(FD_ISSET(par->fd, &par->fdset));
|
||||||
|
|
||||||
|
udev_dev = udev_monitor_receive_device(par->uevent_monitor);
|
||||||
|
if (!udev_dev)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
parity_status = udev_device_get_property_value(udev_dev, I915_L3_PARITY_UEVENT);
|
||||||
|
if (strncmp(parity_status, "1", 1))
|
||||||
|
goto again;
|
||||||
|
|
||||||
|
loc->slice = atoi(udev_device_get_property_value(udev_dev, "SLICE"));
|
||||||
|
loc->row = atoi(udev_device_get_property_value(udev_dev, "ROW"));
|
||||||
|
loc->bank = atoi(udev_device_get_property_value(udev_dev, "BANK"));
|
||||||
|
loc->subbank = atoi(udev_device_get_property_value(udev_dev, "SUBBANK"));
|
||||||
|
|
||||||
|
udev_device_unref(udev_dev);
|
||||||
|
|
||||||
|
asprintf(&err_msg, "Parity error detected on: %d,%d,%d,%d. "
|
||||||
|
"Try to run intel_l3_parity -r %d -b %d -s %d -w %d -d",
|
||||||
|
loc->slice, loc->row, loc->bank, loc->subbank,
|
||||||
|
loc->row, loc->bank, loc->subbank, loc->slice);
|
||||||
|
if (daemon) {
|
||||||
|
syslog(LOG_INFO, "%s\n", err_msg);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s\n", err_msg);
|
||||||
|
|
||||||
|
free(err_msg);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user