Subject: zpcictl: Introduce new tool zpcictl From: Jan Hoeppner Summary: zpcictl: Add tool to manage PCI devices Description: Use the zpcictl tool to manage PCI devices on the IBM Z platform. Initial functions include generating firmware error logs, resetting PCI devices, and preparing a device for further repair actions. Upstream-ID: 177cf8cfeb83f85bc164c462b5534f93be3bd979 Problem-ID: RAS1703 Upstream-Description: zpcictl: Introduce new tool zpcictl zpcictl is used to manage PCI devices on z Systems. In this first version it is mainly used to handle erroneous PCI devices by changing their state and make those changes known to the SE. Log data, such as S.M.A.R.T. data for NVMe devices, is sent alongside those state changes. The state change is issued by sending data via the PCI 'report_error' sysfs attribute. It's a binary attribute which will cause the host to send an Adapter Notification Event. Signed-off-by: Jan Höppner Signed-off-by: Jan Hoeppner --- .gitignore | 1 Makefile | 2 zpcictl/Makefile | 18 ++ zpcictl/zpcictl.8 | 80 +++++++++++ zpcictl/zpcictl.c | 378 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ zpcictl/zpcictl.h | 60 ++++++++ 6 files changed, 538 insertions(+), 1 deletion(-) --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,4 @@ zipl/boot/data.h zipl/src/chreipl_helper.device-mapper zipl/src/zipl zkey/zkey +zpcictl/zpcictl --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ TOOL_DIRS = zipl zdump fdasd dasdfmt das tape390 osasnmpd qetharp ip_watcher qethconf scripts zconf \ vmconvert vmcp man mon_tools dasdinfo vmur cpuplugd ipl_tools \ ziomon iucvterm hyptop cmsfs-fuse qethqoat zfcpdump zdsfs cpumf \ - systemd hmcdrvfs cpacfstats zdev dump2tar zkey netboot + systemd hmcdrvfs cpacfstats zdev dump2tar zkey netboot zpcictl SUB_DIRS = $(LIB_DIRS) $(TOOL_DIRS) all: $(TOOL_DIRS) --- /dev/null +++ b/zpcictl/Makefile @@ -0,0 +1,18 @@ +include ../common.mak + +all: zpcictl + +libs = $(rootdir)/libutil/libutil.a + +zpcictl: zpcictl.o $(libs) + +install: all + $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) -g $(GROUP) -o $(OWNER) -m 755 zpcictl $(DESTDIR)$(BINDIR) + $(INSTALL) -g $(GROUP) -o $(OWNER) -m 644 zpcictl.8 \ + $(DESTDIR)$(MANDIR)/man8 + +clean: + rm -f *.o *~ zpcictl core + +.PHONY: all install clean --- /dev/null +++ b/zpcictl/zpcictl.8 @@ -0,0 +1,80 @@ +.\" Copyright 2017 IBM Corp. +.\" s390-tools is free software; you can redistribute it and/or modify +.\" it under the terms of the MIT license. See LICENSE for details. +.\" +.\" Macro for inserting an option description prologue. +.\" .OD [] [args] +.de OD +. ds args " +. if !'\\$3'' .as args \fI\\$3\fP +. if !'\\$4'' .as args \\$4 +. if !'\\$5'' .as args \fI\\$5\fP +. if !'\\$6'' .as args \\$6 +. if !'\\$7'' .as args \fI\\$7\fP +. PD 0 +. if !'\\$2'' .IP "\fB\-\\$2\fP \\*[args]" 4 +. if !'\\$1'' .IP "\fB\-\-\\$1\fP \\*[args]" 4 +. PD +.. +. +.TH zpcictl 8 "Oct 2018" s390-tools zpcictl +. +.SH NAME +zpcictl - Manage PCI devices on z Systems +. +. +.SH SYNOPSIS +.B "zpcictl" +.I "OPTIONS" +.I "DEVICE" +. +. +.SH DESCRIPTION +.B zpcictl +is a tool for managing PCI devices on the IBM z Systems platform. It is +especially used for reporting errorneous PCI devices to the service element. + +.B Note: +For NVMe devices additional data (such as S.M.A.R.T. data) is collected and sent +with any error handling action. The smartmontools are required to be installed +for this to work. +.PP +. +. +.SH DEVICE +.B DEVICE +can be either the PCI slot address (e.g. 0000:00:00.0) or the main device node +of an NVMe device (e.g. /dev/nvme0). +. +. +.SH OPTIONS +.SS Error Handling +.OD reset "" "DEVICE" +Reset +.I DEVICE +and initiate a re-initialisation of the adapter. +.PP +. +.OD deconfigure "" "DEVICE" +De-configure +.I DEVICE +and prepare for any repair action. This action will move the +PCI device from a configured to a reserved state. +.PP +. +.OD report-error "" "DEVICE" +Report any device error for +.IR DEVICE . +The +.I DEVICE +is marked as erroneous and no further action is initiated on it. +.PP +. +.SS Misc +.OD help "h" "" +Print usage information, then exit. +.PP +. +.OD version "v" "" +Print version information, then exit. +.PP --- /dev/null +++ b/zpcictl/zpcictl.c @@ -0,0 +1,378 @@ +/* + * zpcictl - Manage PCI devices on z Systems + * + * Copyright IBM Corp. 2018 + * + * s390-tools is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include +#include +#include +#include + +#include "lib/util_base.h" +#include "lib/util_libc.h" +#include "lib/util_opt.h" +#include "lib/util_path.h" +#include "lib/util_prg.h" +#include "lib/util_proc.h" +#include "lib/util_rec.h" +#include "lib/util_scandir.h" + +#include "zpcictl.h" + +#define SMARTCTL_CMDLINE "smartctl -x %s 2>/dev/null" + +static const struct util_prg prg = { + .desc = "Use zpcictl to manage PCI devices on s390\n" + "DEVICE is the slot id or node of the device (e.g. /dev/nvme0)", + .args = "DEVICE", + .copyright_vec = { + { + .owner = "IBM Corp.", + .pub_first = 2018, + .pub_last = 2018, + }, + UTIL_PRG_COPYRIGHT_END + } +}; + +/* Defines for options with no short command */ +#define OPT_RESET 128 +#define OPT_DECONF 129 +#define OPT_REPORT_ERR 130 + +static struct util_opt opt_vec[] = { + UTIL_OPT_SECTION("ERROR HANDLING"), + { + .option = { "reset", no_argument, NULL, OPT_RESET }, + .desc = "Reset device", + .flags = UTIL_OPT_FLAG_NOSHORT, + }, + { + .option = { "deconfigure", no_argument, NULL, OPT_DECONF }, + .desc = "De-configure device and prepare for any repair action", + .flags = UTIL_OPT_FLAG_NOSHORT, + }, + { + .option = { "report-error", no_argument, NULL, OPT_REPORT_ERR }, + .desc = "Report device error to service element (SE)", + .flags = UTIL_OPT_FLAG_NOSHORT, + }, + UTIL_OPT_SECTION("MISC"), + UTIL_OPT_HELP, + UTIL_OPT_VERSION, + UTIL_OPT_END +}; + +static int is_char_dev(const char *dev) +{ + struct stat s; + + if (stat(dev, &s)) + return 0; + + return S_ISCHR(s.st_mode); +} + +static int is_blk_dev(const char *dev) +{ + struct stat s; + + if (stat(dev, &s)) + return 0; + + return S_ISBLK(s.st_mode); +} + +static void fopen_err(char *path) +{ + warnx("Could not open file %s: %s", path, strerror(errno)); + free(path); + exit(EXIT_FAILURE); +} + +#define READ_CHUNK_SIZE 512 + +static char *collect_smart_data(struct zpci_device *pdev) +{ + char *buffer = NULL; + size_t count = 0; + char *cmd; + FILE *fd; + + util_asprintf(&cmd, SMARTCTL_CMDLINE, pdev->device); + fd = popen(cmd, "r"); + if (!fd) + goto out; + + while (!feof(fd)) { + buffer = realloc(buffer, count + READ_CHUNK_SIZE); + if (!buffer) { + warnx("Could not collect S.M.A.R.T. data"); + goto out; + } + count += fread(&buffer[count], 1, READ_CHUNK_SIZE, fd); + if (ferror(fd)) { + free(buffer); + buffer = NULL; + goto out; + } + } + + buffer = realloc(buffer, count); + if (!buffer && count > 0) + warnx("Could not collect S.M.A.R.T. data"); + if (buffer) + buffer[count] = '\0'; + +out: + pclose(fd); + free(cmd); + + return buffer; +} + +static unsigned int sysfs_read_value(struct zpci_device *pdev, const char *attr) +{ + unsigned int val; + char *path; + FILE *fp; + + path = util_path_sysfs("bus/pci/devices/%s/%s", pdev->slot, attr); + fp = fopen(path, "r"); + if (!fp) + fopen_err(path); + fscanf(fp, "%x", &val); + fclose(fp); + free(path); + + return val; +} + +static void sysfs_write_data(struct zpci_report_error *report, char *slot) +{ + char *path; + int fd, rc; + + path = util_path_sysfs("bus/pci/devices/%s/report_error", slot); + fd = open(path, O_WRONLY); + if (!fd) + fopen_err(path); + rc = write(fd, report, sizeof(*report)); + if (rc == -1) + warnx("Could not write to file: %s: %s", path, strerror(errno)); + if (close(fd)) + warnx("Could not close file: %s: %s", path, strerror(errno)); + free(path); +} + +static void sysfs_get_slot_addr(const char *dev, char *slot) +{ + unsigned int major, minor; + struct stat dev_stat; + char addr[13]; + char *path; + FILE *fp; + + if (stat(dev, &dev_stat) != 0) { + errx(EXIT_FAILURE, "Could not get stat information for %s: %s", + dev, strerror(errno)); + } + major = major(dev_stat.st_rdev); + minor = minor(dev_stat.st_rdev); + + path = util_path_sysfs("dev/char/%u:%u/address", major, minor); + fp = fopen(path, "r"); + if (!fp) + fopen_err(path); + fscanf(fp, "%s", addr); + fclose(fp); + free(path); + + strcpy(slot, addr); +} + +static void get_device_node(struct zpci_device *pdev) +{ + struct dirent **de_vec; + char *path, *dev; + char slot[13]; + int count, i; + + path = util_path_sysfs("bus/pci/devices/%s/nvme", pdev->slot); + count = util_scandir(&de_vec, alphasort, path, "nvme*"); + if (count == -1) { + warnx("Could not read directory %s: %s", path, strerror(errno)); + free(path); + exit(EXIT_FAILURE); + } + + for (i = 0; i < count; i++) { + util_asprintf(&dev, "/dev/%s", de_vec[i]->d_name); + sysfs_get_slot_addr(dev, slot); + if (strcmp(slot, pdev->slot) == 0) { + pdev->device = dev; + break; + } + } + + util_scandir_free(de_vec, count); + free(path); +} + +static int device_exists(char *dev) +{ + char *path; + int rc = 0; + + path = util_path_sysfs("bus/pci/devices/%s", dev); + if (util_path_exists(path) || util_path_exists(dev)) + rc = 1; + free(path); + + return rc; +} + +static void get_device_info(struct zpci_device *pdev, char *dev) +{ + if (!device_exists(dev)) + errx(EXIT_FAILURE, "Device %s not found", dev); + if (is_blk_dev(dev)) + errx(EXIT_FAILURE, "Unsupported device type %s", dev); + if (is_char_dev(dev)) { + sysfs_get_slot_addr(dev, pdev->slot); + pdev->device = dev; + } else { + strcpy(pdev->slot, dev); + } + + pdev->class = sysfs_read_value(pdev, "class"); + pdev->fid = sysfs_read_value(pdev, "function_id"); + pdev->pchid = sysfs_read_value(pdev, "pchid"); + + /* In case a slot address was specified, we still need to figure out + * the device node for NVMe devices. Otherwise we won't be able to + * collect S.M.A.R.T. data at a later point. + */ + if (!pdev->device && pdev->class == PCI_CLASS_NVME) + get_device_node(pdev); +} + +/* + * Issue an SCLP Adapter Error Notification event with a specific action + * qualifier. + * + * Collect additional information when possible (e.g. S.M.A.R.T. data for NVMe + * devices). + */ +static void sclp_issue_action(struct zpci_device *pdev, int action) +{ + struct zpci_report_error report = { + .header = { 0 }, + .data = { 0 } + }; + char *sdata = NULL; + + report.header.version = 1; + report.header.action = action; + report.header.length = sizeof(report.data); + report.data.timestamp = (__u64)time(NULL); + report.data.err_log_id = 0x4713; + + if (pdev->class == PCI_CLASS_NVME) + sdata = collect_smart_data(pdev); + if (sdata) { + strncpy(report.data.log_data, sdata, sizeof(report.data.log_data)); + free(sdata); + } + sysfs_write_data(&report, pdev->slot); +} + +/* + * Reset the PCI device and initiate a re-initialization. + */ +static void sclp_reset_device(struct zpci_device *pdev) +{ + sclp_issue_action(pdev, SCLP_ERRNOTIFY_AQ_RESET); +} + +/* + * De-Configure/repair PCI device. Moves the device from configured + * to reserved state. + */ +static void sclp_deconfigure(struct zpci_device *pdev) +{ + sclp_issue_action(pdev, SCLP_ERRNOTIFY_AQ_DECONF); +} + +/* + * Report an error to the SE. + */ +static void sclp_report_error(struct zpci_device *pdev) +{ + sclp_issue_action(pdev, SCLP_ERRNOTIFY_AQ_REPORT_ERR); +} + +static void parse_cmdline(int argc, char *argv[], struct options *opts) +{ + int cmd; + + util_prg_init(&prg); + util_opt_init(opt_vec, NULL); + + do { + cmd = util_opt_getopt_long(argc, argv); + + switch (cmd) { + case OPT_RESET: + opts->reset = 1; + break; + case OPT_DECONF: + opts->deconfigure = 1; + break; + case OPT_REPORT_ERR: + opts->report = 1; + break; + case 'h': + util_prg_print_help(); + util_opt_print_help(); + exit(EXIT_SUCCESS); + case 'v': + util_prg_print_version(); + exit(EXIT_SUCCESS); + case -1: + /* End of options string */ + if (argc == 1) { + errx(EXIT_FAILURE, + "Use '%s --help' for more information", + argv[0]); + } + break; + } + } while (cmd != -1); +} + +int main(int argc, char *argv[]) +{ + struct zpci_device pdev = { 0 }; + struct options opts = { 0 }; + + parse_cmdline(argc, argv, &opts); + + if (optind >= argc) + errx(EXIT_FAILURE, "No device specified"); + + get_device_info(&pdev, argv[optind]); + + if (opts.reset) + sclp_reset_device(&pdev); + else if (opts.deconfigure) + sclp_deconfigure(&pdev); + else if (opts.report) + sclp_report_error(&pdev); + + return 0; +} --- /dev/null +++ b/zpcictl/zpcictl.h @@ -0,0 +1,60 @@ +/* + * zpcictl - Manage PCI devices on z Systems + * + * Copyright IBM Corp. 2018 + * + * s390-tools is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef ZPCICTL_H +#define ZPCICTL_H + +#include +#include "lib/zt_common.h" + +#define SCLP_ERRNOTIFY_AQ_RESET 0 +#define SCLP_ERRNOTIFY_AQ_DECONF 1 +#define SCLP_ERRNOTIFY_AQ_REPORT_ERR 2 + +#define PCI_CLASS_UNCLASSIFIED 0x000000U +#define PCI_CLASS_NVME 0x010802U +#define PCI_CLASS_NETWORK 0x020000U + +struct options { + unsigned int reset; + unsigned int deconfigure; + unsigned int report; +}; + +struct zpci_device { + u16 fid; + u16 pchid; + u32 class; + char slot[13]; + char *device; +}; + +struct zpci_report_error_header { + __u8 version; /* Interface version byte */ + __u8 action; /* Action qualifier byte + * 0: Adapter Reset Request + * 1: Deconfigure and repair action requested + * 2: Informational Report + */ + __u16 length; /* Length of Subsequent Data (up to 4K – SCLP header) */ + __u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */ +}; + +struct zpci_report_error_data { + __u64 timestamp; + __u64 err_log_id; + char log_data[4054]; /* We cannot exceed a total of 4074 bytes (header + data) */ +}; + +struct zpci_report_error { + struct zpci_report_error_header header; + struct zpci_report_error_data data; +} __packed; + +#endif /* ZPCICTL_H */