From 09405115a26c2b4c7fb38a07e6fa15fde104c9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Fri, 1 Mar 2024 16:57:40 +0100 Subject: [PATCH] Sync from SUSE:ALP:Source:Standard:1.0 ppc64-diag revision 68babf57b28a3079f7d752c5d55dcd94 --- ...rovements-to-status-and-err-messages.patch | 42 +++++ ...call_home-command-fails-on-nvmf-driv.patch | 76 +++++++++ ...me-remove-d-and-f-as-visible-options.patch | 149 ++++++++++++++++++ ...me-remove-d-and-f-from-usage-message.patch | 29 ++++ ppc64-diag.changes | 9 ++ ppc64-diag.spec | 6 +- 6 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 diag_nvme-improvements-to-status-and-err-messages.patch create mode 100644 diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch create mode 100644 nvme_call_home-remove-d-and-f-as-visible-options.patch create mode 100644 nvme_call_home-remove-d-and-f-from-usage-message.patch diff --git a/diag_nvme-improvements-to-status-and-err-messages.patch b/diag_nvme-improvements-to-status-and-err-messages.patch new file mode 100644 index 0000000..74a4363 --- /dev/null +++ b/diag_nvme-improvements-to-status-and-err-messages.patch @@ -0,0 +1,42 @@ +From 3ad587d20868f154bb7ab972ee7812add1380d7a Mon Sep 17 00:00:00 2001 +From: Greg Joyce +Date: Wed, 11 Oct 2023 12:10:40 -0500 +Subject: [PATCH] diag_nvme: improvements to status and err messages + +Signed-off-by: Greg Joyce +--- + diags/diag_nvme.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index df191f2d1ac8..00efec212056 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -166,9 +166,9 @@ int main(int argc, char *argv[]) { + } + + if (rc == 0) +- fprintf(stdout, "Command completed successfully\n"); ++ fprintf(stdout, "NVMe diag command completed successfully\n"); + else +- fprintf(stderr, "Command failed, exiting with rc %d\n", rc); ++ fprintf(stderr, "NVMe diag command failed with rc %d\n", rc); + + return rc; + } +@@ -724,8 +724,11 @@ extern int get_smart_file(char *file_path, struct nvme_smart_log_page *log) { + int num_elements = 0; + struct dictionary dict[MAX_DICT_ELEMENTS]; + +- if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0) ++ if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0) { ++ fprintf(stderr, "read_file_dict failed: %s, rc % d\n", ++ file_path, num_elements); + return num_elements; ++ } + return set_smart_log_field(log, dict, num_elements); + } + +-- +2.43.2 + diff --git a/diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch b/diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch new file mode 100644 index 0000000..a6d726d --- /dev/null +++ b/diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch @@ -0,0 +1,76 @@ +From db0c6d7974d7f8909878384d77ec02457759d6df Mon Sep 17 00:00:00 2001 +From: Nilay Shroff +Date: Tue, 16 Jan 2024 13:55:03 +0530 +Subject: [PATCH] diags/diag_nvme: call_home command fails on nvmf drive + +The diag_nvme command needs to retrieve the VPD log page from NVMe for +filling in the product data while generating the call-home event. +However, call-home feature is supported for directly attached NVMe +module. In the current diag_nvme implementation, if user doesn't +provide NVMe device name for diagnostics then it(diag_nvme) loops +through each NVMe moudle (directly connected to the system/LPAR as +well as discovered over fabrics) and attempt retrieving the SMART log +page as well as VPD page. Unfortunately, diag_nvme fails to retrieve +the VPD page for NVMe connected over fabrics and that causes the +diag_nvme to print "not-so-nice" failure messages on console. + +Henec fixed the diag_nvme code so that for call-home event reporting, +it skips the NVMe which is connected over fabrics and prints a +"nice-message" informing the user that it's skipping diagnosting for +NVMe module connected over fabrics. In a nutshell, with this fix now +diag_nvme would only diagnose the NVMe module which is directtly +attached (over PCIe) to the system. + +Signed-off-by: Nilay Shroff +--- + diags/diag_nvme.c | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index c1c0a20ddf14..e86786ccdccd 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -375,9 +375,40 @@ static int diagnose_nvme(char *device_name, struct notify *notify, char *file_pa + char endurance_s[sizeof(vpd.endurance) + 1], capacity_s[sizeof(vpd.capacity)+1]; + uint64_t event_id; + uint8_t severity; ++ FILE *fp; ++ char tr_file_path[PATH_MAX]; + uint32_t raw_data_len = 0; + unsigned char *raw_data = NULL; + ++ /* ++ * Skip diag test if NVMe is connected over fabric ++ */ ++ snprintf(tr_file_path, sizeof(tr_file_path), ++ NVME_SYS_PATH"/%s/%s", device_name, "transport"); ++ fp = fopen(tr_file_path, "r"); ++ if (fp) { ++ char buf[12]; ++ int n = fread(buf, 1, sizeof(buf), fp); ++ ++ if (n) { ++ /* ++ * If NVMe transport is anything but pcie then skip the diag test ++ */ ++ if (strncmp(buf, "pcie", 4) != 0) { ++ fprintf(stdout, "Skipping diagnostics for nvmf : %s\n", ++ device_name); ++ fclose(fp); ++ return 0; ++ } ++ } ++ fclose(fp); ++ } else { ++ fprintf(stderr, "Skipping diagnostics for %s:\n" ++ "Unable to find the nvme transport type\n", ++ device_name); ++ return -1; ++ } ++ + tmp_rc = regex_controller(controller_name, device_name); + if (tmp_rc != 0) + return -1; +-- +2.43.2 + diff --git a/nvme_call_home-remove-d-and-f-as-visible-options.patch b/nvme_call_home-remove-d-and-f-as-visible-options.patch new file mode 100644 index 0000000..d67c3b5 --- /dev/null +++ b/nvme_call_home-remove-d-and-f-as-visible-options.patch @@ -0,0 +1,149 @@ +From 11cb2a44a59b63bdc23c94e386c4e2f43ea7eb61 Mon Sep 17 00:00:00 2001 +From: Greg Joyce +Date: Fri, 22 Sep 2023 15:19:34 -0500 +Subject: [PATCH] nvme_call_home: remove -d and -f as visible options + +The dump(-d) and file(-f) options have been removed as option in usage +and also in the man page. The options are for internal test only and +should not have been exposed. Also added some helpful status messages. + +Signed-off-by: Greg Joyce +--- + diags/diag_nvme.c | 23 ++++++++++++++------- + diags/man/diag_nvme.8 | 48 +++---------------------------------------- + 2 files changed, 19 insertions(+), 52 deletions(-) + +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index 2a78034ecfd9..df191f2d1ac8 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -165,6 +165,11 @@ int main(int argc, char *argv[]) { + optind++; + } + ++ if (rc == 0) ++ fprintf(stdout, "Command completed successfully\n"); ++ else ++ fprintf(stderr, "Command failed, exiting with rc %d\n", rc); ++ + return rc; + } + +@@ -310,6 +315,7 @@ extern int dump_smart_data(char *device_name, char *dump_path) { + int fd, rc; + FILE *fp; + struct nvme_smart_log_page smart_log = { 0 }; ++ char ans; + + /* Read SMART data from device */ + snprintf(dev_path,sizeof(dev_path), "/dev/%s", device_name); +@@ -331,8 +337,16 @@ extern int dump_smart_data(char *device_name, char *dump_path) { + } + fp = fopen(dump_path, "wx"); + if (fp == NULL) { +- fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno)); +- return -1; ++ if (errno == EEXIST) { ++ fprintf(stdout, "File %s exists. Overwrite (y/n)? ", dump_path); ++ rc = scanf("%c", &ans); ++ if (ans == 'y' || ans == 'Y') ++ fp = fopen(dump_path, "w"); ++ } ++ if (fp == NULL) { ++ fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno)); ++ return -1; ++ } + } + write_smart_file(fp, &smart_log); + fclose(fp); +@@ -974,11 +988,6 @@ extern int open_nvme(char *dev_path) { + static void print_usage(char *command) { + printf("Usage: %s [-h] [-d ] [-f ] []\n" + "\t-h or --help: print this help message\n" +- "\t-d or --dump: dump SMART data to the specified path and file name \n" +- "\t one is expected with this option\n" +- "\t-f or --file: use SMART data from the specified path and file name \n" +- "\t instead of device, one is expected with\n" +- "\t this option\n" + "\t: the NVMe devices on which to operate, for\n" + "\t example nvme0; if not specified, all detected\n" + "\t nvme devices will be diagnosed\n", command); +diff --git a/diags/man/diag_nvme.8 b/diags/man/diag_nvme.8 +index 45567ac26bfc..c023ed6719eb 100644 +--- a/diags/man/diag_nvme.8 ++++ b/diags/man/diag_nvme.8 +@@ -1,7 +1,7 @@ + .\" +-.\" Copyright (C) 2022 IBM Corporation ++.\" Copyright (C) 2022, 2023 IBM Corporation + .\" +-.TH "DIAG_NVME" "8" "June 2022" "Linux" "PowerLinux Diagnostic Tools" ++.TH "DIAG_NVME" "8" "September 2023" "Linux" "PowerLinux Diagnostic Tools" + .hy + .SH NAME + .PP +@@ -12,7 +12,7 @@ diag_nvme \- diagnose NVMe devices + .PD 0 + .P + .PD +-\f[B]diag_nvme\f[] [\-d <\f[I]file\f[]>] [\-f <\f[I]file\f[]>] ++\f[B]diag_nvme\f[] + <\f[B]nvme\f[]\f[I]n\f[]> + .PD 0 + .P +@@ -29,52 +29,10 @@ go through the diagnostics procedure. + The user can control which events will be reported through the + configuration file \f[I]/etc/ppc64\-diag/diag_nvme.config\f[] + .SH OPTIONS +-.TP +-.B \f[B]\-d\f[], \f[B]--dump\f[] \f[I]file\f[] +-Dump SMART data to the specified path and file name \f[I]file\f[]. +-The SMART data is extracted from an NVMe device, so specifying one is +-mandatory if this option is selected. +-File created is in a simple key=value format. +-.RS +-.RE +-.TP +-.B \f[B]\-f\f[], \f[B]--file\f[] \f[I]file\f[] +-This option usage is for testing only. +-Use SMART data from the specified path and file name \f[I]file\f[] +-instead of device, one NVMe is mandatory if this option is selected. +-The expected format of the file is a simple key=value that is the same +-one provided with the \-d / --dump option. +-If \f[I]file\f[] is missing from the filesystem it will be treated as a +-failure to retrieve SMART data and an event will be reported. +-.RS +-.RE +-.TP +-.B \f[B]\-h\f[], \f[B]--help\f[] +-Print a help message and exit +-.RS +-.RE +-.SH EXAMPLES +-.TP +-.B \f[B]diag_nvme\f[] +-Run diagnostics in all NVMe devices detected in the system. +-.RS +-.RE +-.TP + .B \f[B]diag_nvme nvme0 nvme1\f[] + Run diagnostics only in nvme0 and nvme1 devices. + .RS + .RE +-.TP +-.B \f[B]diag_nvme \-d smart.txt nvme0\f[] +-Dump SMART data from nvme0 into file smart.txt. +-.RS +-.RE +-.TP +-.B \f[B]diag_nvme \-f smart.txt nvme0\f[] +-Read SMART data from file smart.txt and use it as health information for +-diagnostics of device nvme0. +-.RS +-.RE + .SH REPORTING BUGS + .PP + Patches and issues may be submitted at +-- +2.43.2 + diff --git a/nvme_call_home-remove-d-and-f-from-usage-message.patch b/nvme_call_home-remove-d-and-f-from-usage-message.patch new file mode 100644 index 0000000..c574c83 --- /dev/null +++ b/nvme_call_home-remove-d-and-f-from-usage-message.patch @@ -0,0 +1,29 @@ +From 316d2baf5dba0d00447a3ac49f2f95339dbdf5cd Mon Sep 17 00:00:00 2001 +From: Greg Joyce +Date: Wed, 15 Nov 2023 11:28:05 -0600 +Subject: [PATCH] nvme_call_home: remove -d and -f from usage message + +The -d and -f weren't removed from the first line of the +usage message. + +Signed-off-by: Greg Joyce +--- + diags/diag_nvme.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index 00efec212056..164afe1cadd6 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -989,7 +989,7 @@ extern int open_nvme(char *dev_path) { + } + + static void print_usage(char *command) { +- printf("Usage: %s [-h] [-d ] [-f ] []\n" ++ printf("Usage: %s [-h] []\n" + "\t-h or --help: print this help message\n" + "\t: the NVMe devices on which to operate, for\n" + "\t example nvme0; if not specified, all detected\n" +-- +2.43.2 + diff --git a/ppc64-diag.changes b/ppc64-diag.changes index 145ec17..141ef0e 100644 --- a/ppc64-diag.changes +++ b/ppc64-diag.changes @@ -1,3 +1,12 @@ +------------------------------------------------------------------- +Mon Feb 26 14:22:42 UTC 2024 - Michal Suchanek + +- Fix NVMe diagnostics support with nvmf drive (bsc#1220345 ltc#205706). + * nvme_call_home-remove-d-and-f-as-visible-options.patch + * nvme_call_home-remove-d-and-f-from-usage-message.patch + * diag_nvme-improvements-to-status-and-err-messages.patch + * diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch + ------------------------------------------------------------------- Fri Nov 17 18:18:03 UTC 2023 - Martin Schreiner diff --git a/ppc64-diag.spec b/ppc64-diag.spec index b91236a..bdd3164 100644 --- a/ppc64-diag.spec +++ b/ppc64-diag.spec @@ -1,7 +1,7 @@ # # spec file for package ppc64-diag # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -36,6 +36,10 @@ Patch2: rtas_errd-Handle-multiple-platform-dumps.patch Patch3: 0001-ppc64-diag-Move-trim_trail_space-function-to-common-.patch Patch4: 0002-ppc64-diag-lp_diag-Utilize-trim_trail_space-function.patch Patch5: 0003-ppc64-diag-lp_diag-Enable-light-path-diagnostics-for.patch +Patch6: nvme_call_home-remove-d-and-f-as-visible-options.patch +Patch7: nvme_call_home-remove-d-and-f-from-usage-message.patch +Patch8: diag_nvme-improvements-to-status-and-err-messages.patch +Patch9: diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: bison