Sync from SUSE:SLFO:Main ppc64-diag revision a6a86ef1aefcdcb4348c5cc3e9fd4e0b

This commit is contained in:
Adrian Schröter 2024-09-30 10:43:59 +02:00
parent 64f620dc3e
commit 8cd77f6191
12 changed files with 24 additions and 686 deletions

View File

@ -1,148 +0,0 @@
From c507319d1b5f0286d67e08a3598949ca4144f475 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:12 +0530
Subject: [PATCH 1/3] ppc64-diag: Move trim_trail_space() function to
common/utils.c
Upstream: merged, expected in 2.7.10
Git-commit: c507319d1b5f0286d67e08a3598949ca4144f475
Currently, trim_trail_space() function is used in diags/diag_nvme.c file
to be able to trim trailing white spaces from a given location code. Allow
code reusability by moving the trim_trail_space() function from
diags/diag_nvme.c to common/utils.c.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
common/utils.c | 25 +++++++++++++++++++++++++
common/utils.h | 1 +
diags/Makefile.am | 4 +++-
diags/diag_nvme.c | 24 +-----------------------
4 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/common/utils.c b/common/utils.c
index 031294328fc3..2349878334b2 100644
--- a/common/utils.c
+++ b/common/utils.c
@@ -24,9 +24,34 @@
#include <fcntl.h>
#include <string.h>
#include <assert.h>
+#include <ctype.h>
#include "utils.h"
+/* trim_trail_space - Trim trailing white spaces from string
+ * @string - Null terminated string to remove white spaces from
+ *
+ * This function will alter the passed string by removing any trailing white spaces and null
+ * terminating it at that point.
+ */
+void trim_trail_space(char *string)
+{
+ char *end;
+ size_t length;
+
+ if (string == NULL)
+ return;
+
+ length = strlen(string);
+ if (length == 0)
+ return;
+
+ end = string + length - 1;
+ while (end >= string && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+}
+
static int process_child(char *argv[], int pipefd[])
{
int nullfd;
diff --git a/common/utils.h b/common/utils.h
index ec2072db5a5b..2459b5be330b 100644
--- a/common/utils.h
+++ b/common/utils.h
@@ -18,6 +18,7 @@
#ifndef UTILS_H
#define UTILS_H
+void trim_trail_space(char *string);
FILE *spopen(char **, pid_t *);
int spclose(FILE *, pid_t);
diff --git a/diags/Makefile.am b/diags/Makefile.am
index 4ac81b8160be..dea0a79e0d2d 100644
--- a/diags/Makefile.am
+++ b/diags/Makefile.am
@@ -13,7 +13,8 @@ encl_led_h_files = diags/encl_led.h \
$(diag_common_h_files)
diag_nvme_h_files = diags/diag_nvme.h \
- common/platform.h
+ common/platform.h \
+ common/utils.h
sbin_PROGRAMS += diags/diag_encl diags/encl_led diags/diag_nvme
@@ -41,6 +42,7 @@ diags_encl_led_SOURCES = diags/encl_led.c \
diags_diag_nvme_SOURCES = diags/diag_nvme.c \
common/platform.c \
+ common/utils.c \
$(diag_nvme_h_files)
diags_diag_nvme_LDADD = -lservicelog -lm
diags_diag_nvme_CFLAGS = $(AM_CFLAGS) -Wno-stringop-truncation
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 2a78034ecfd9..2606f2cb7784 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -27,6 +27,7 @@
#include <sys/utsname.h>
#include "diag_nvme.h"
#include "platform.h"
+#include "utils.h"
#define ITEM_DATA_LENGTH 255
#define MIN_HOURS_ON 720
@@ -71,7 +72,6 @@ static int raw_data_smart(unsigned char **raw_data, uint32_t *raw_data_len, stru
static int raw_data_vpd(unsigned char **raw_data, uint32_t *raw_data_len, struct nvme_ibm_vpd *vpd);
static int regex_controller(char *controller_name, char *device_name);
static void set_notify(struct notify *notify, struct dictionary *dict, int num_elements);
-static void trim_trail_space(char *string);
static long double uint128_to_long_double(uint8_t *data);
int main(int argc, char *argv[]) {
@@ -1426,28 +1426,6 @@ extern void set_vpd_pcie_field(const char *keyword, const char *vpd_data, struct
strncpy(vpd->firmware_level, vpd_data, sizeof(vpd->firmware_level));
}
-/* trim_trail_space - Trim trailing white spaces from string
- * @string - Null terminated string to remove white spaces from
- *
- * This function will alter the passed string by removing any trailing white spaces and null
- * terminating it at that point.
- */
-static void trim_trail_space(char *string) {
- char *end;
- size_t length;
-
- if (string == NULL)
- return;
-
- if ((length = strlen(string)) == 0)
- return;
-
- end = string + length - 1;
- while (end >= string && isspace(*end))
- end--;
- *(end + 1) = '\0';
-}
-
static long double uint128_to_long_double(uint8_t *data) {
int i;
long double value = 0;
--
2.42.0

View File

@ -1,46 +0,0 @@
From 476b0af7516b86c4d98cfa229fb0c6b856eea31d Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:13 +0530
Subject: [PATCH 2/3] ppc64-diag/lp_diag: Utilize trim_trail_space() function
in event_fru_callout()
Upstream: merged, expected in 2.7.10
Git-commit: 476b0af7516b86c4d98cfa229fb0c6b856eea31d
Update the event_fru_callout() function to use the trim_trail_space()
function to be able to remove any trailing spaces from the location code.
This change aims to address an issue where the presence of trailing spaces
in the location code results in failure to find an indicator for the given
location code. Use trim_trail_space() on the location to ensure that the
device location code is properly compared with the indicator list.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
lpd/lp_diag.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index 988a021168b9..e6f5d3cb64cf 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -35,6 +35,7 @@
#include "servicelog.h"
#include "indicator.h"
#include "lp_util.h"
+#include "utils.h"
/* FRU callout priority as defined in PAPR+
*
@@ -344,6 +345,8 @@ event_fru_callout(struct sl_callout *callouts, struct loc_code *list,
/* get FRUs nearest fault indicator */
strncpy(location, callout->location, LOCATION_LENGTH);
location[LOCATION_LENGTH - 1] = '\0';
+ trim_trail_space(location);
+
loc_led = get_fru_indicator(list, location, &truncated);
if (!loc_led) { /* No indicator found for the given loc code */
*attn_on = 1;
--
2.42.0

View File

@ -1,72 +0,0 @@
From 0fa486dbe800bea05c81fc33eee197873573fefb Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:14 +0530
Subject: [PATCH 3/3] ppc64-diag/lp_diag: Enable light path diagnostics for
RTAS events
Upstream: merged, expected in 2.7.10
Git-commit: 0fa486dbe800bea05c81fc33eee197873573fefb
Currently, Light Path Diagnostics support is enabled only for OS and
Enclosure type events. Enable light path diagnostics support for RTAS
type events by turning on only the high priority FRU callouts.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
lpd/lp_diag.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index e6f5d3cb64cf..e67db0250ef4 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -37,6 +37,8 @@
#include "lp_util.h"
#include "utils.h"
+static int rtas_event;
+
/* FRU callout priority as defined in PAPR+
*
* Note: Order of the priority is important!
@@ -173,8 +175,10 @@ service_event_supported(struct sl_event *event)
return 0;
}
break;
- case SL_TYPE_BMC:
case SL_TYPE_RTAS:
+ rtas_event = 1;
+ break;
+ case SL_TYPE_BMC:
case SL_TYPE_BASIC:
default:
return 0;
@@ -446,14 +450,20 @@ parse_service_event(int event_id)
attn_loc = &list[0];
if (operating_mode == LED_MODE_LIGHT_PATH) {
- if (event->callouts)
+ if (event->callouts) {
/* Run over FRU callout priority in order and
* enable fault indicator
*/
- for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ if (!rtas_event) {
+ for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ rc = event_fru_callout(event->callouts, list,
+ FRU_CALLOUT_PRIORITY[i],
+ &attn_on);
+ } else {
rc = event_fru_callout(event->callouts, list,
- FRU_CALLOUT_PRIORITY[i],
- &attn_on);
+ 'H', &attn_on);
+ }
+ }
else {
/* No callout list, enable check log indicator */
indicator_log_write("Empty callout list");
--
2.42.0

View File

@ -1,42 +0,0 @@
From 3ad587d20868f154bb7ab972ee7812add1380d7a Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Wed, 11 Oct 2023 12:10:40 -0500
Subject: [PATCH] diag_nvme: improvements to status and err messages
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index df191f2d1ac8..00efec212056 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -166,9 +166,9 @@ int main(int argc, char *argv[]) {
}
if (rc == 0)
- fprintf(stdout, "Command completed successfully\n");
+ fprintf(stdout, "NVMe diag command completed successfully\n");
else
- fprintf(stderr, "Command failed, exiting with rc %d\n", rc);
+ fprintf(stderr, "NVMe diag command failed with rc %d\n", rc);
return rc;
}
@@ -724,8 +724,11 @@ extern int get_smart_file(char *file_path, struct nvme_smart_log_page *log) {
int num_elements = 0;
struct dictionary dict[MAX_DICT_ELEMENTS];
- if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0)
+ if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0) {
+ fprintf(stderr, "read_file_dict failed: %s, rc % d\n",
+ file_path, num_elements);
return num_elements;
+ }
return set_smart_log_field(log, dict, num_elements);
}
--
2.43.2

View File

@ -1,76 +0,0 @@
From db0c6d7974d7f8909878384d77ec02457759d6df Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Tue, 16 Jan 2024 13:55:03 +0530
Subject: [PATCH] diags/diag_nvme: call_home command fails on nvmf drive
The diag_nvme command needs to retrieve the VPD log page from NVMe for
filling in the product data while generating the call-home event.
However, call-home feature is supported for directly attached NVMe
module. In the current diag_nvme implementation, if user doesn't
provide NVMe device name for diagnostics then it(diag_nvme) loops
through each NVMe moudle (directly connected to the system/LPAR as
well as discovered over fabrics) and attempt retrieving the SMART log
page as well as VPD page. Unfortunately, diag_nvme fails to retrieve
the VPD page for NVMe connected over fabrics and that causes the
diag_nvme to print "not-so-nice" failure messages on console.
Henec fixed the diag_nvme code so that for call-home event reporting,
it skips the NVMe which is connected over fabrics and prints a
"nice-message" informing the user that it's skipping diagnosting for
NVMe module connected over fabrics. In a nutshell, with this fix now
diag_nvme would only diagnose the NVMe module which is directtly
attached (over PCIe) to the system.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
diags/diag_nvme.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index c1c0a20ddf14..e86786ccdccd 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -375,9 +375,40 @@ static int diagnose_nvme(char *device_name, struct notify *notify, char *file_pa
char endurance_s[sizeof(vpd.endurance) + 1], capacity_s[sizeof(vpd.capacity)+1];
uint64_t event_id;
uint8_t severity;
+ FILE *fp;
+ char tr_file_path[PATH_MAX];
uint32_t raw_data_len = 0;
unsigned char *raw_data = NULL;
+ /*
+ * Skip diag test if NVMe is connected over fabric
+ */
+ snprintf(tr_file_path, sizeof(tr_file_path),
+ NVME_SYS_PATH"/%s/%s", device_name, "transport");
+ fp = fopen(tr_file_path, "r");
+ if (fp) {
+ char buf[12];
+ int n = fread(buf, 1, sizeof(buf), fp);
+
+ if (n) {
+ /*
+ * If NVMe transport is anything but pcie then skip the diag test
+ */
+ if (strncmp(buf, "pcie", 4) != 0) {
+ fprintf(stdout, "Skipping diagnostics for nvmf : %s\n",
+ device_name);
+ fclose(fp);
+ return 0;
+ }
+ }
+ fclose(fp);
+ } else {
+ fprintf(stderr, "Skipping diagnostics for %s:\n"
+ "Unable to find the nvme transport type\n",
+ device_name);
+ return -1;
+ }
+
tmp_rc = regex_controller(controller_name, device_name);
if (tmp_rc != 0)
return -1;
--
2.43.2

View File

@ -1,149 +0,0 @@
From 11cb2a44a59b63bdc23c94e386c4e2f43ea7eb61 Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2023 15:19:34 -0500
Subject: [PATCH] nvme_call_home: remove -d and -f as visible options
The dump(-d) and file(-f) options have been removed as option in usage
and also in the man page. The options are for internal test only and
should not have been exposed. Also added some helpful status messages.
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 23 ++++++++++++++-------
diags/man/diag_nvme.8 | 48 +++----------------------------------------
2 files changed, 19 insertions(+), 52 deletions(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 2a78034ecfd9..df191f2d1ac8 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -165,6 +165,11 @@ int main(int argc, char *argv[]) {
optind++;
}
+ if (rc == 0)
+ fprintf(stdout, "Command completed successfully\n");
+ else
+ fprintf(stderr, "Command failed, exiting with rc %d\n", rc);
+
return rc;
}
@@ -310,6 +315,7 @@ extern int dump_smart_data(char *device_name, char *dump_path) {
int fd, rc;
FILE *fp;
struct nvme_smart_log_page smart_log = { 0 };
+ char ans;
/* Read SMART data from device */
snprintf(dev_path,sizeof(dev_path), "/dev/%s", device_name);
@@ -331,8 +337,16 @@ extern int dump_smart_data(char *device_name, char *dump_path) {
}
fp = fopen(dump_path, "wx");
if (fp == NULL) {
- fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno));
- return -1;
+ if (errno == EEXIST) {
+ fprintf(stdout, "File %s exists. Overwrite (y/n)? ", dump_path);
+ rc = scanf("%c", &ans);
+ if (ans == 'y' || ans == 'Y')
+ fp = fopen(dump_path, "w");
+ }
+ if (fp == NULL) {
+ fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno));
+ return -1;
+ }
}
write_smart_file(fp, &smart_log);
fclose(fp);
@@ -974,11 +988,6 @@ extern int open_nvme(char *dev_path) {
static void print_usage(char *command) {
printf("Usage: %s [-h] [-d <file>] [-f <file>] [<nvme_devices>]\n"
"\t-h or --help: print this help message\n"
- "\t-d or --dump: dump SMART data to the specified path and file name <file>\n"
- "\t one <nvme_device> is expected with this option\n"
- "\t-f or --file: use SMART data from the specified path and file name <file>\n"
- "\t instead of device, one <nvme_device> is expected with\n"
- "\t this option\n"
"\t<nvme_devices>: the NVMe devices on which to operate, for\n"
"\t example nvme0; if not specified, all detected\n"
"\t nvme devices will be diagnosed\n", command);
diff --git a/diags/man/diag_nvme.8 b/diags/man/diag_nvme.8
index 45567ac26bfc..c023ed6719eb 100644
--- a/diags/man/diag_nvme.8
+++ b/diags/man/diag_nvme.8
@@ -1,7 +1,7 @@
.\"
-.\" Copyright (C) 2022 IBM Corporation
+.\" Copyright (C) 2022, 2023 IBM Corporation
.\"
-.TH "DIAG_NVME" "8" "June 2022" "Linux" "PowerLinux Diagnostic Tools"
+.TH "DIAG_NVME" "8" "September 2023" "Linux" "PowerLinux Diagnostic Tools"
.hy
.SH NAME
.PP
@@ -12,7 +12,7 @@ diag_nvme \- diagnose NVMe devices
.PD 0
.P
.PD
-\f[B]diag_nvme\f[] [\-d <\f[I]file\f[]>] [\-f <\f[I]file\f[]>]
+\f[B]diag_nvme\f[]
<\f[B]nvme\f[]\f[I]n\f[]>
.PD 0
.P
@@ -29,52 +29,10 @@ go through the diagnostics procedure.
The user can control which events will be reported through the
configuration file \f[I]/etc/ppc64\-diag/diag_nvme.config\f[]
.SH OPTIONS
-.TP
-.B \f[B]\-d\f[], \f[B]--dump\f[] \f[I]file\f[]
-Dump SMART data to the specified path and file name \f[I]file\f[].
-The SMART data is extracted from an NVMe device, so specifying one is
-mandatory if this option is selected.
-File created is in a simple key=value format.
-.RS
-.RE
-.TP
-.B \f[B]\-f\f[], \f[B]--file\f[] \f[I]file\f[]
-This option usage is for testing only.
-Use SMART data from the specified path and file name \f[I]file\f[]
-instead of device, one NVMe is mandatory if this option is selected.
-The expected format of the file is a simple key=value that is the same
-one provided with the \-d / --dump option.
-If \f[I]file\f[] is missing from the filesystem it will be treated as a
-failure to retrieve SMART data and an event will be reported.
-.RS
-.RE
-.TP
-.B \f[B]\-h\f[], \f[B]--help\f[]
-Print a help message and exit
-.RS
-.RE
-.SH EXAMPLES
-.TP
-.B \f[B]diag_nvme\f[]
-Run diagnostics in all NVMe devices detected in the system.
-.RS
-.RE
-.TP
.B \f[B]diag_nvme nvme0 nvme1\f[]
Run diagnostics only in nvme0 and nvme1 devices.
.RS
.RE
-.TP
-.B \f[B]diag_nvme \-d smart.txt nvme0\f[]
-Dump SMART data from nvme0 into file smart.txt.
-.RS
-.RE
-.TP
-.B \f[B]diag_nvme \-f smart.txt nvme0\f[]
-Read SMART data from file smart.txt and use it as health information for
-diagnostics of device nvme0.
-.RS
-.RE
.SH REPORTING BUGS
.PP
Patches and issues may be submitted at
--
2.43.2

View File

@ -1,29 +0,0 @@
From 316d2baf5dba0d00447a3ac49f2f95339dbdf5cd Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Wed, 15 Nov 2023 11:28:05 -0600
Subject: [PATCH] nvme_call_home: remove -d and -f from usage message
The -d and -f weren't removed from the first line of the
usage message.
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 00efec212056..164afe1cadd6 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -989,7 +989,7 @@ extern int open_nvme(char *dev_path) {
}
static void print_usage(char *command) {
- printf("Usage: %s [-h] [-d <file>] [-f <file>] [<nvme_devices>]\n"
+ printf("Usage: %s [-h] [<nvme_devices>]\n"
"\t-h or --help: print this help message\n"
"\t<nvme_devices>: the NVMe devices on which to operate, for\n"
"\t example nvme0; if not specified, all detected\n"
--
2.43.2

BIN
ppc64-diag-2.7.10.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ppc64-diag-2.7.9.tar.gz (Stored with Git LFS)

Binary file not shown.

View File

@ -1,3 +1,23 @@
-------------------------------------------------------------------
Tue Sep 17 14:28:38 UTC 2024 - Michal Suchanek <msuchanek@suse.com>
- Version 2.6.10 (jsc#PED-9918).
* Add support for multiple platform dumps
* Add support for light path diagnostics for rtas events
* Enable correct display of model and system-id for IPS Power systems
* Fix call home feature for nvmf devices
* Fix crash in rtas_errd due to invalid -f option values
* Fix build warnings with GCC-15
- Remove upstreamed patches
* rtas_errd-Handle-multiple-platform-dumps.patch
* 0001-ppc64-diag-Move-trim_trail_space-function-to-common-.patch
* 0002-ppc64-diag-lp_diag-Utilize-trim_trail_space-function.patch
* 0003-ppc64-diag-lp_diag-Enable-light-path-diagnostics-for.patch
* nvme_call_home-remove-d-and-f-as-visible-options.patch
* nvme_call_home-remove-d-and-f-from-usage-message.patch
* diag_nvme-improvements-to-status-and-err-messages.patch
* diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Mon Feb 26 14:22:42 UTC 2024 - Michal Suchanek <msuchanek@suse.de> Mon Feb 26 14:22:42 UTC 2024 - Michal Suchanek <msuchanek@suse.de>

View File

@ -17,7 +17,7 @@
Name: ppc64-diag Name: ppc64-diag
Version: 2.7.9 Version: 2.7.10
Release: 0 Release: 0
Summary: Linux for Power Platform Diagnostics Summary: Linux for Power Platform Diagnostics
License: GPL-2.0-or-later License: GPL-2.0-or-later
@ -30,16 +30,6 @@ Source3: ppc64-diag-nvme.service
Source4: ppc64-diag-nvme.timer Source4: ppc64-diag-nvme.timer
#PATCH-FIX-OPENSUSE - ppc64-diag.varunused.patch - fix unused variables #PATCH-FIX-OPENSUSE - ppc64-diag.varunused.patch - fix unused variables
Patch1: ppc64-diag.varunused.patch Patch1: ppc64-diag.varunused.patch
#PATCH-FIX-UPSTREAM - rtas_errd-Handle-multiple-platform-dumps.patch - store multiple dumps
Patch2: rtas_errd-Handle-multiple-platform-dumps.patch
#PATCH-FIX-UPSTREAM - Enclosure fault LED support
Patch3: 0001-ppc64-diag-Move-trim_trail_space-function-to-common-.patch
Patch4: 0002-ppc64-diag-lp_diag-Utilize-trim_trail_space-function.patch
Patch5: 0003-ppc64-diag-lp_diag-Enable-light-path-diagnostics-for.patch
Patch6: nvme_call_home-remove-d-and-f-as-visible-options.patch
Patch7: nvme_call_home-remove-d-and-f-from-usage-message.patch
Patch8: diag_nvme-improvements-to-status-and-err-messages.patch
Patch9: diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: bison BuildRequires: bison

View File

@ -1,110 +0,0 @@
From d05654e5ec6f37cf6caa491fc7d95b336f9603e2 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 10 Jul 2023 13:43:21 +0530
Subject: [PATCH] rtas_errd: Handle multiple platform dumps
References: bsc#1209274 ltc#198526
Upstream: merged, expected in 2.7.10
Git-commit: d05654e5ec6f37cf6caa491fc7d95b336f9603e2
Currently, whenever a new dump arrives, old dump file of that specific dump
type is removed before writing the new dump out. Any dump file with the
same prefix (dump type) gets deleted. This means only one set of dump files
is saved, since only one dump file per dump type is saved.
Handle multiple dumps on Linux by allowing as many dumps to be offloaded
until disk space is available. To do this, remove the function that checks
for prefix size and removes old dump files. In the event of not enough
disk space available, log an error to the user along with the dump tag.
User will free up space and run extract_platdump tool using the dump tag
provided in the error message to offload the dump. Error log can be viewed
by the user by issuing 'journalctl -p err -t rtas_errd' command.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
rtas_errd/dump.c | 29 ++++++++++++++++++++++++++++-
rtas_errd/extract_platdump.c | 6 ------
2 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/rtas_errd/dump.c b/rtas_errd/dump.c
index cc50d91b593b..494c322c4164 100644
--- a/rtas_errd/dump.c
+++ b/rtas_errd/dump.c
@@ -30,8 +30,10 @@
#include <fcntl.h>
#include <librtas.h>
#include <librtasevent.h>
+#include <syslog.h>
#include <sys/stat.h>
#include <sys/wait.h>
+#include <sys/statvfs.h>
#include "utils.h"
#include "rtas_errd.h"
@@ -284,7 +286,9 @@ void
check_platform_dump(struct event *event)
{
struct rtas_dump_scn *dump_scn;
+ struct statvfs vfs;
uint64_t dump_tag;
+ uint64_t dump_size;
char filename[DUMP_MAX_FNAME_LEN + 20], *pos;
char *pathname = NULL;
FILE *f;
@@ -306,11 +310,34 @@ check_platform_dump(struct event *event)
return;
}
- /* Retrieve the dump */
+ /* Retrieve the dump tag */
dump_tag = dump_scn->id;
dump_tag |= ((uint64_t)dump_scn->v6hdr.subtype << 32);
dbg("Dump ID: 0x%016LX", dump_tag);
+ if (statvfs(d_cfg.platform_dump_path, &vfs) == -1) {
+ log_msg(event, "statvfs() failed on %s: %s",
+ d_cfg.platform_dump_path, strerror(errno));
+ return;
+ }
+
+ /* Retrieve the size of the platform dump */
+ dump_size = dump_scn->size_hi;
+ dump_size <<= 32;
+ dump_size |= dump_scn->size_lo;
+
+ /* Check if there is sufficient space in the file system to store the dump */
+ if (vfs.f_bavail * vfs.f_frsize < dump_size) {
+ syslog(LOG_ERR, "Insufficient space in %s to store platform dump for dump ID: "
+ "0x%016lX (required: %lu bytes, available: %lu bytes)",
+ d_cfg.platform_dump_path, dump_tag, dump_size,
+ (vfs.f_bavail * vfs.f_frsize));
+ syslog(LOG_ERR, "After clearing space, run 'extract_platdump "
+ "0x%016lX'.\n", dump_tag);
+ return;
+ }
+
+ /* Retrieve the dump */
snprintf(tmp_sys_arg, 60, "0x%016LX", (long long unsigned int)dump_tag);
system_args[0] = EXTRACT_PLATDUMP_CMD;
system_args[1] = tmp_sys_arg;
diff --git a/rtas_errd/extract_platdump.c b/rtas_errd/extract_platdump.c
index fbe65b2fe5c5..831e57ea8b69 100644
--- a/rtas_errd/extract_platdump.c
+++ b/rtas_errd/extract_platdump.c
@@ -290,12 +290,6 @@ extract_platform_dump(uint64_t dump_tag)
}
}
- /*
- * Before writing the new dump out, we need to see if any older
- * dumps need to be removed first
- */
- remove_old_dumpfiles(filename, prefix_size);
-
/* Copy the dump off to the filesystem */
pathname[0] = '\0';
strcpy(pathname, d_cfg.platform_dump_path);
--
2.41.0