Sync from SUSE:SLFO:Main ppc64-diag revision 68babf57b28a3079f7d752c5d55dcd94

This commit is contained in:
Adrian Schröter 2024-05-03 19:35:17 +02:00
commit 64f620dc3e
17 changed files with 1409 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

View File

@ -0,0 +1,148 @@
From c507319d1b5f0286d67e08a3598949ca4144f475 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:12 +0530
Subject: [PATCH 1/3] ppc64-diag: Move trim_trail_space() function to
common/utils.c
Upstream: merged, expected in 2.7.10
Git-commit: c507319d1b5f0286d67e08a3598949ca4144f475
Currently, trim_trail_space() function is used in diags/diag_nvme.c file
to be able to trim trailing white spaces from a given location code. Allow
code reusability by moving the trim_trail_space() function from
diags/diag_nvme.c to common/utils.c.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
common/utils.c | 25 +++++++++++++++++++++++++
common/utils.h | 1 +
diags/Makefile.am | 4 +++-
diags/diag_nvme.c | 24 +-----------------------
4 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/common/utils.c b/common/utils.c
index 031294328fc3..2349878334b2 100644
--- a/common/utils.c
+++ b/common/utils.c
@@ -24,9 +24,34 @@
#include <fcntl.h>
#include <string.h>
#include <assert.h>
+#include <ctype.h>
#include "utils.h"
+/* trim_trail_space - Trim trailing white spaces from string
+ * @string - Null terminated string to remove white spaces from
+ *
+ * This function will alter the passed string by removing any trailing white spaces and null
+ * terminating it at that point.
+ */
+void trim_trail_space(char *string)
+{
+ char *end;
+ size_t length;
+
+ if (string == NULL)
+ return;
+
+ length = strlen(string);
+ if (length == 0)
+ return;
+
+ end = string + length - 1;
+ while (end >= string && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+}
+
static int process_child(char *argv[], int pipefd[])
{
int nullfd;
diff --git a/common/utils.h b/common/utils.h
index ec2072db5a5b..2459b5be330b 100644
--- a/common/utils.h
+++ b/common/utils.h
@@ -18,6 +18,7 @@
#ifndef UTILS_H
#define UTILS_H
+void trim_trail_space(char *string);
FILE *spopen(char **, pid_t *);
int spclose(FILE *, pid_t);
diff --git a/diags/Makefile.am b/diags/Makefile.am
index 4ac81b8160be..dea0a79e0d2d 100644
--- a/diags/Makefile.am
+++ b/diags/Makefile.am
@@ -13,7 +13,8 @@ encl_led_h_files = diags/encl_led.h \
$(diag_common_h_files)
diag_nvme_h_files = diags/diag_nvme.h \
- common/platform.h
+ common/platform.h \
+ common/utils.h
sbin_PROGRAMS += diags/diag_encl diags/encl_led diags/diag_nvme
@@ -41,6 +42,7 @@ diags_encl_led_SOURCES = diags/encl_led.c \
diags_diag_nvme_SOURCES = diags/diag_nvme.c \
common/platform.c \
+ common/utils.c \
$(diag_nvme_h_files)
diags_diag_nvme_LDADD = -lservicelog -lm
diags_diag_nvme_CFLAGS = $(AM_CFLAGS) -Wno-stringop-truncation
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 2a78034ecfd9..2606f2cb7784 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -27,6 +27,7 @@
#include <sys/utsname.h>
#include "diag_nvme.h"
#include "platform.h"
+#include "utils.h"
#define ITEM_DATA_LENGTH 255
#define MIN_HOURS_ON 720
@@ -71,7 +72,6 @@ static int raw_data_smart(unsigned char **raw_data, uint32_t *raw_data_len, stru
static int raw_data_vpd(unsigned char **raw_data, uint32_t *raw_data_len, struct nvme_ibm_vpd *vpd);
static int regex_controller(char *controller_name, char *device_name);
static void set_notify(struct notify *notify, struct dictionary *dict, int num_elements);
-static void trim_trail_space(char *string);
static long double uint128_to_long_double(uint8_t *data);
int main(int argc, char *argv[]) {
@@ -1426,28 +1426,6 @@ extern void set_vpd_pcie_field(const char *keyword, const char *vpd_data, struct
strncpy(vpd->firmware_level, vpd_data, sizeof(vpd->firmware_level));
}
-/* trim_trail_space - Trim trailing white spaces from string
- * @string - Null terminated string to remove white spaces from
- *
- * This function will alter the passed string by removing any trailing white spaces and null
- * terminating it at that point.
- */
-static void trim_trail_space(char *string) {
- char *end;
- size_t length;
-
- if (string == NULL)
- return;
-
- if ((length = strlen(string)) == 0)
- return;
-
- end = string + length - 1;
- while (end >= string && isspace(*end))
- end--;
- *(end + 1) = '\0';
-}
-
static long double uint128_to_long_double(uint8_t *data) {
int i;
long double value = 0;
--
2.42.0

View File

@ -0,0 +1,46 @@
From 476b0af7516b86c4d98cfa229fb0c6b856eea31d Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:13 +0530
Subject: [PATCH 2/3] ppc64-diag/lp_diag: Utilize trim_trail_space() function
in event_fru_callout()
Upstream: merged, expected in 2.7.10
Git-commit: 476b0af7516b86c4d98cfa229fb0c6b856eea31d
Update the event_fru_callout() function to use the trim_trail_space()
function to be able to remove any trailing spaces from the location code.
This change aims to address an issue where the presence of trailing spaces
in the location code results in failure to find an indicator for the given
location code. Use trim_trail_space() on the location to ensure that the
device location code is properly compared with the indicator list.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
lpd/lp_diag.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index 988a021168b9..e6f5d3cb64cf 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -35,6 +35,7 @@
#include "servicelog.h"
#include "indicator.h"
#include "lp_util.h"
+#include "utils.h"
/* FRU callout priority as defined in PAPR+
*
@@ -344,6 +345,8 @@ event_fru_callout(struct sl_callout *callouts, struct loc_code *list,
/* get FRUs nearest fault indicator */
strncpy(location, callout->location, LOCATION_LENGTH);
location[LOCATION_LENGTH - 1] = '\0';
+ trim_trail_space(location);
+
loc_led = get_fru_indicator(list, location, &truncated);
if (!loc_led) { /* No indicator found for the given loc code */
*attn_on = 1;
--
2.42.0

View File

@ -0,0 +1,72 @@
From 0fa486dbe800bea05c81fc33eee197873573fefb Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri, 8 Sep 2023 12:35:14 +0530
Subject: [PATCH 3/3] ppc64-diag/lp_diag: Enable light path diagnostics for
RTAS events
Upstream: merged, expected in 2.7.10
Git-commit: 0fa486dbe800bea05c81fc33eee197873573fefb
Currently, Light Path Diagnostics support is enabled only for OS and
Enclosure type events. Enable light path diagnostics support for RTAS
type events by turning on only the high priority FRU callouts.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
lpd/lp_diag.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index e6f5d3cb64cf..e67db0250ef4 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -37,6 +37,8 @@
#include "lp_util.h"
#include "utils.h"
+static int rtas_event;
+
/* FRU callout priority as defined in PAPR+
*
* Note: Order of the priority is important!
@@ -173,8 +175,10 @@ service_event_supported(struct sl_event *event)
return 0;
}
break;
- case SL_TYPE_BMC:
case SL_TYPE_RTAS:
+ rtas_event = 1;
+ break;
+ case SL_TYPE_BMC:
case SL_TYPE_BASIC:
default:
return 0;
@@ -446,14 +450,20 @@ parse_service_event(int event_id)
attn_loc = &list[0];
if (operating_mode == LED_MODE_LIGHT_PATH) {
- if (event->callouts)
+ if (event->callouts) {
/* Run over FRU callout priority in order and
* enable fault indicator
*/
- for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ if (!rtas_event) {
+ for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ rc = event_fru_callout(event->callouts, list,
+ FRU_CALLOUT_PRIORITY[i],
+ &attn_on);
+ } else {
rc = event_fru_callout(event->callouts, list,
- FRU_CALLOUT_PRIORITY[i],
- &attn_on);
+ 'H', &attn_on);
+ }
+ }
else {
/* No callout list, enable check log indicator */
indicator_log_write("Empty callout list");
--
2.42.0

View File

@ -0,0 +1,42 @@
From 3ad587d20868f154bb7ab972ee7812add1380d7a Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Wed, 11 Oct 2023 12:10:40 -0500
Subject: [PATCH] diag_nvme: improvements to status and err messages
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index df191f2d1ac8..00efec212056 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -166,9 +166,9 @@ int main(int argc, char *argv[]) {
}
if (rc == 0)
- fprintf(stdout, "Command completed successfully\n");
+ fprintf(stdout, "NVMe diag command completed successfully\n");
else
- fprintf(stderr, "Command failed, exiting with rc %d\n", rc);
+ fprintf(stderr, "NVMe diag command failed with rc %d\n", rc);
return rc;
}
@@ -724,8 +724,11 @@ extern int get_smart_file(char *file_path, struct nvme_smart_log_page *log) {
int num_elements = 0;
struct dictionary dict[MAX_DICT_ELEMENTS];
- if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0)
+ if ((num_elements = read_file_dict(file_path, dict, MAX_DICT_ELEMENTS)) < 0) {
+ fprintf(stderr, "read_file_dict failed: %s, rc % d\n",
+ file_path, num_elements);
return num_elements;
+ }
return set_smart_log_field(log, dict, num_elements);
}
--
2.43.2

View File

@ -0,0 +1,76 @@
From db0c6d7974d7f8909878384d77ec02457759d6df Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Tue, 16 Jan 2024 13:55:03 +0530
Subject: [PATCH] diags/diag_nvme: call_home command fails on nvmf drive
The diag_nvme command needs to retrieve the VPD log page from NVMe for
filling in the product data while generating the call-home event.
However, call-home feature is supported for directly attached NVMe
module. In the current diag_nvme implementation, if user doesn't
provide NVMe device name for diagnostics then it(diag_nvme) loops
through each NVMe moudle (directly connected to the system/LPAR as
well as discovered over fabrics) and attempt retrieving the SMART log
page as well as VPD page. Unfortunately, diag_nvme fails to retrieve
the VPD page for NVMe connected over fabrics and that causes the
diag_nvme to print "not-so-nice" failure messages on console.
Henec fixed the diag_nvme code so that for call-home event reporting,
it skips the NVMe which is connected over fabrics and prints a
"nice-message" informing the user that it's skipping diagnosting for
NVMe module connected over fabrics. In a nutshell, with this fix now
diag_nvme would only diagnose the NVMe module which is directtly
attached (over PCIe) to the system.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
diags/diag_nvme.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index c1c0a20ddf14..e86786ccdccd 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -375,9 +375,40 @@ static int diagnose_nvme(char *device_name, struct notify *notify, char *file_pa
char endurance_s[sizeof(vpd.endurance) + 1], capacity_s[sizeof(vpd.capacity)+1];
uint64_t event_id;
uint8_t severity;
+ FILE *fp;
+ char tr_file_path[PATH_MAX];
uint32_t raw_data_len = 0;
unsigned char *raw_data = NULL;
+ /*
+ * Skip diag test if NVMe is connected over fabric
+ */
+ snprintf(tr_file_path, sizeof(tr_file_path),
+ NVME_SYS_PATH"/%s/%s", device_name, "transport");
+ fp = fopen(tr_file_path, "r");
+ if (fp) {
+ char buf[12];
+ int n = fread(buf, 1, sizeof(buf), fp);
+
+ if (n) {
+ /*
+ * If NVMe transport is anything but pcie then skip the diag test
+ */
+ if (strncmp(buf, "pcie", 4) != 0) {
+ fprintf(stdout, "Skipping diagnostics for nvmf : %s\n",
+ device_name);
+ fclose(fp);
+ return 0;
+ }
+ }
+ fclose(fp);
+ } else {
+ fprintf(stderr, "Skipping diagnostics for %s:\n"
+ "Unable to find the nvme transport type\n",
+ device_name);
+ return -1;
+ }
+
tmp_rc = regex_controller(controller_name, device_name);
if (tmp_rc != 0)
return -1;
--
2.43.2

View File

@ -0,0 +1,149 @@
From 11cb2a44a59b63bdc23c94e386c4e2f43ea7eb61 Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2023 15:19:34 -0500
Subject: [PATCH] nvme_call_home: remove -d and -f as visible options
The dump(-d) and file(-f) options have been removed as option in usage
and also in the man page. The options are for internal test only and
should not have been exposed. Also added some helpful status messages.
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 23 ++++++++++++++-------
diags/man/diag_nvme.8 | 48 +++----------------------------------------
2 files changed, 19 insertions(+), 52 deletions(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 2a78034ecfd9..df191f2d1ac8 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -165,6 +165,11 @@ int main(int argc, char *argv[]) {
optind++;
}
+ if (rc == 0)
+ fprintf(stdout, "Command completed successfully\n");
+ else
+ fprintf(stderr, "Command failed, exiting with rc %d\n", rc);
+
return rc;
}
@@ -310,6 +315,7 @@ extern int dump_smart_data(char *device_name, char *dump_path) {
int fd, rc;
FILE *fp;
struct nvme_smart_log_page smart_log = { 0 };
+ char ans;
/* Read SMART data from device */
snprintf(dev_path,sizeof(dev_path), "/dev/%s", device_name);
@@ -331,8 +337,16 @@ extern int dump_smart_data(char *device_name, char *dump_path) {
}
fp = fopen(dump_path, "wx");
if (fp == NULL) {
- fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno));
- return -1;
+ if (errno == EEXIST) {
+ fprintf(stdout, "File %s exists. Overwrite (y/n)? ", dump_path);
+ rc = scanf("%c", &ans);
+ if (ans == 'y' || ans == 'Y')
+ fp = fopen(dump_path, "w");
+ }
+ if (fp == NULL) {
+ fprintf(stderr, "%s open failed: %s\n", dump_path, strerror(errno));
+ return -1;
+ }
}
write_smart_file(fp, &smart_log);
fclose(fp);
@@ -974,11 +988,6 @@ extern int open_nvme(char *dev_path) {
static void print_usage(char *command) {
printf("Usage: %s [-h] [-d <file>] [-f <file>] [<nvme_devices>]\n"
"\t-h or --help: print this help message\n"
- "\t-d or --dump: dump SMART data to the specified path and file name <file>\n"
- "\t one <nvme_device> is expected with this option\n"
- "\t-f or --file: use SMART data from the specified path and file name <file>\n"
- "\t instead of device, one <nvme_device> is expected with\n"
- "\t this option\n"
"\t<nvme_devices>: the NVMe devices on which to operate, for\n"
"\t example nvme0; if not specified, all detected\n"
"\t nvme devices will be diagnosed\n", command);
diff --git a/diags/man/diag_nvme.8 b/diags/man/diag_nvme.8
index 45567ac26bfc..c023ed6719eb 100644
--- a/diags/man/diag_nvme.8
+++ b/diags/man/diag_nvme.8
@@ -1,7 +1,7 @@
.\"
-.\" Copyright (C) 2022 IBM Corporation
+.\" Copyright (C) 2022, 2023 IBM Corporation
.\"
-.TH "DIAG_NVME" "8" "June 2022" "Linux" "PowerLinux Diagnostic Tools"
+.TH "DIAG_NVME" "8" "September 2023" "Linux" "PowerLinux Diagnostic Tools"
.hy
.SH NAME
.PP
@@ -12,7 +12,7 @@ diag_nvme \- diagnose NVMe devices
.PD 0
.P
.PD
-\f[B]diag_nvme\f[] [\-d <\f[I]file\f[]>] [\-f <\f[I]file\f[]>]
+\f[B]diag_nvme\f[]
<\f[B]nvme\f[]\f[I]n\f[]>
.PD 0
.P
@@ -29,52 +29,10 @@ go through the diagnostics procedure.
The user can control which events will be reported through the
configuration file \f[I]/etc/ppc64\-diag/diag_nvme.config\f[]
.SH OPTIONS
-.TP
-.B \f[B]\-d\f[], \f[B]--dump\f[] \f[I]file\f[]
-Dump SMART data to the specified path and file name \f[I]file\f[].
-The SMART data is extracted from an NVMe device, so specifying one is
-mandatory if this option is selected.
-File created is in a simple key=value format.
-.RS
-.RE
-.TP
-.B \f[B]\-f\f[], \f[B]--file\f[] \f[I]file\f[]
-This option usage is for testing only.
-Use SMART data from the specified path and file name \f[I]file\f[]
-instead of device, one NVMe is mandatory if this option is selected.
-The expected format of the file is a simple key=value that is the same
-one provided with the \-d / --dump option.
-If \f[I]file\f[] is missing from the filesystem it will be treated as a
-failure to retrieve SMART data and an event will be reported.
-.RS
-.RE
-.TP
-.B \f[B]\-h\f[], \f[B]--help\f[]
-Print a help message and exit
-.RS
-.RE
-.SH EXAMPLES
-.TP
-.B \f[B]diag_nvme\f[]
-Run diagnostics in all NVMe devices detected in the system.
-.RS
-.RE
-.TP
.B \f[B]diag_nvme nvme0 nvme1\f[]
Run diagnostics only in nvme0 and nvme1 devices.
.RS
.RE
-.TP
-.B \f[B]diag_nvme \-d smart.txt nvme0\f[]
-Dump SMART data from nvme0 into file smart.txt.
-.RS
-.RE
-.TP
-.B \f[B]diag_nvme \-f smart.txt nvme0\f[]
-Read SMART data from file smart.txt and use it as health information for
-diagnostics of device nvme0.
-.RS
-.RE
.SH REPORTING BUGS
.PP
Patches and issues may be submitted at
--
2.43.2

View File

@ -0,0 +1,29 @@
From 316d2baf5dba0d00447a3ac49f2f95339dbdf5cd Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.vnet.ibm.com>
Date: Wed, 15 Nov 2023 11:28:05 -0600
Subject: [PATCH] nvme_call_home: remove -d and -f from usage message
The -d and -f weren't removed from the first line of the
usage message.
Signed-off-by: Greg Joyce <gjoyce@linux.vnet.ibm.com>
---
diags/diag_nvme.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 00efec212056..164afe1cadd6 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -989,7 +989,7 @@ extern int open_nvme(char *dev_path) {
}
static void print_usage(char *command) {
- printf("Usage: %s [-h] [-d <file>] [-f <file>] [<nvme_devices>]\n"
+ printf("Usage: %s [-h] [<nvme_devices>]\n"
"\t-h or --help: print this help message\n"
"\t<nvme_devices>: the NVMe devices on which to operate, for\n"
"\t example nvme0; if not specified, all detected\n"
--
2.43.2

BIN
ppc64-diag-2.7.9.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

17
ppc64-diag-encl.service Normal file
View File

@ -0,0 +1,17 @@
[Unit]
Description=Execute ppc64-diag's enclosure diagnostic
[Service]
# added automatically, for details please see
# https://en.opensuse.org/openSUSE:Security_Features#Systemd_hardening_effort
ProtectSystem=full
ProtectHome=true
ProtectHostname=true
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
# end of automatic additions
Type=oneshot
ExecStart=/usr/sbin/diag_encl -c -s -l
ExecStart=/usr/sbin/diag_encl -d

9
ppc64-diag-encl.timer Normal file
View File

@ -0,0 +1,9 @@
[Unit]
Description=Daily execution of ppc64-diag's enclosure diagnostic
[Timer]
OnCalendar=Daily
Persistent=true
[Install]
WantedBy=timers.target

16
ppc64-diag-nvme.service Normal file
View File

@ -0,0 +1,16 @@
[Unit]
Description=Execute ppc64-diag's nvme diagnostic
[Service]
# added automatically, for details please see
# https://en.opensuse.org/openSUSE:Security_Features#Systemd_hardening_effort
ProtectSystem=full
ProtectHome=true
ProtectHostname=true
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
# end of automatic additions
Type=oneshot
ExecStart=/usr/sbin/diag_nvme

9
ppc64-diag-nvme.timer Normal file
View File

@ -0,0 +1,9 @@
[Unit]
Description=Daily execution of ppc64-diag's nvme diagnostic
[Timer]
OnCalendar=Daily
Persistent=true
[Install]
WantedBy=timers.target

437
ppc64-diag.changes Normal file
View File

@ -0,0 +1,437 @@
-------------------------------------------------------------------
Mon Feb 26 14:22:42 UTC 2024 - Michal Suchanek <msuchanek@suse.de>
- Fix NVMe diagnostics support with nvmf drive (bsc#1220345 ltc#205706).
* nvme_call_home-remove-d-and-f-as-visible-options.patch
* nvme_call_home-remove-d-and-f-from-usage-message.patch
* diag_nvme-improvements-to-status-and-err-messages.patch
* diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch
-------------------------------------------------------------------
Fri Nov 17 18:18:03 UTC 2023 - Martin Schreiner <martin.schreiner@suse.com>
- Migrate from cron to systemd timers.
-------------------------------------------------------------------
Fri Oct 20 16:43:03 UTC 2023 - Michal Suchanek <msuchanek@suse.de>
- Light path diagnostics: Support Enclosure Fault LEDs on new enclosures (bsc#1216074 ltc#201656)
+ 0001-ppc64-diag-Move-trim_trail_space-function-to-common-.patch
+ 0002-ppc64-diag-lp_diag-Utilize-trim_trail_space-function.patch
+ 0003-ppc64-diag-lp_diag-Enable-light-path-diagnostics-for.patch
-------------------------------------------------------------------
Wed Jul 19 10:12:11 UTC 2023 - Michal Suchanek <msuchanek@suse.de>
- Do not delete old system dumps offloaded from HMC (bsc#1209274 ltc#198526).
+ rtas_errd-Handle-multiple-platform-dumps.patch
-------------------------------------------------------------------
Mon Oct 17 14:35:43 UTC 2022 - Michal Suchanek <msuchanek@suse.com>
- Update to version 2.7.9 (jsc#PED-536)
- Add NVMe diagnostics support.
-------------------------------------------------------------------
Thu Sep 15 16:30:04 UTC 2022 - Michal Suchanek <msuchanek@suse.com>
- Update to version 2.7.8
- Remove upstreamed patch
- ppc64-diag-Directories-should-be-created-executable.patch
- Use new libvpd
-------------------------------------------------------------------
Thu Jul 15 16:07:50 UTC 2021 - Michal Suchanek <msuchanek@suse.com>
- Update to version 2.7.7 (jsc#SLE-18191).
- Man page for "rtas_errd"
- rtas_errd: Don't run the service in LXC
- New project location
- Remove upstreamed patch
- ppc64-diag-Drop-obsolete-logging-options-from-systemd-service-f.patch
- Fix installation
+ ppc64-diag-Directories-should-be-created-executable.patch
- Run tests
-------------------------------------------------------------------
Tue Mar 23 10:26:05 UTC 2021 - Michal Suchanek <msuchanek@suse.de>
- Fix systemd warning about obsolete logging options (bsc#1183700 ltc#192095).
+ ppc64-diag-Drop-obsolete-logging-options-from-systemd-service-f.patch
-------------------------------------------------------------------
Thu Sep 19 07:13:00 UTC 2019 - Josef Möllers <josef.moellers@suse.com>
- Upgrade to v2.7.6
- opal_errd: Enhance the purging logic of rotate_logs
- diags/cronjob: Include disk diagnostics
- CVE fixes
[jsc#SLE-8324, CVE-2014-4039, CVE-2014-4038]
-------------------------------------------------------------------
Tue Apr 30 12:31:54 UTC 2019 - josef.moellers@suse.com
- Upgrade to upstream version 2.7.5
* Obsoletes 6 patches:
+ ppc64-diag.bug-1072708_fix_service_scripts.patch
+ ppc64-diag.bug-1072708_create_diag_disk_path.patch
+ ppc64-diag.bug-1072708_increase_buffer_for_vpd.patch
+ ppc64-diag.bug-1072708_remove_timestamp.patch
+ ppc64-diag.bug-1072708_create_diag_disk_log_directory.patch
+ bsc1113097-run_diag_encl-exit-gracefully.patch
* rather that just fail, give more concise
error messages in configure script
* use bison rather than yacc
* probe for indicators on running platform
* usysattn, usysfault and usysident: add -P option to limit the
operations to only the platform indicators excluding the
enclosures.
[jsc:SLE-6081]
-------------------------------------------------------------------
Tue Jan 22 08:24:44 UTC 2019 - Tomáš Chvátal <tchvatal@suse.com>
- Drop pointless _service file
- Format with spec-cleaner
-------------------------------------------------------------------
Fri Jan 18 14:07:53 UTC 2019 - josef.moellers@suse.com
- If a system does not have any enclosures, let the daily cron
script "run_diag_encl"exit gracefully to prevent bogus
error messages in root's mailbox.
[bsc#1113097, bsc1113097-run_diag_encl-exit-gracefully.patch]
-------------------------------------------------------------------
Mon Dec 18 08:55:48 UTC 2017 - jloeser@suse.com
- fix 'diag_encl -d' call (bsc#1072708)
- added patches:
* ppc64-diag.bug-1072708_create_diag_disk_log_directory.patch
* ppc64-diag.bug-1072708_create_diag_disk_path.patch
* ppc64-diag.bug-1072708_fix_service_scripts.patch
* ppc64-diag.bug-1072708_increase_buffer_for_vpd.patch
* ppc64-diag.bug-1072708_remove_timestamp.patch
-------------------------------------------------------------------
Fri Oct 20 09:43:36 UTC 2017 - jloeser@suse.com
- version update to 2.7.4 (fate#322225, fate#326429)
- modified patches:
* ppc64-diag.varunused.patch
- removed patches:
* ppc64-diag.opal_errd.patch
* ppc64-diag.rtas_errd.patch
-------------------------------------------------------------------
Wed Sep 6 11:34:03 UTC 2017 - jloeser@suse.com
- add missing references for SLE merge:
* bsc#931001
-------------------------------------------------------------------
Wed Mar 1 16:33:06 UTC 2017 - jloeser@suse.com
- version update to 2.7.3 (fate#321444)
- LED support for Marvell HDD
- Added support to parse new drc-index device tree property
- ela: remove support on PowerVM LPAR
- modified patches:
* ppc64-diag.varunused.patch
-------------------------------------------------------------------
Wed Oct 19 07:48:42 UTC 2016 - jloeser@suse.com
- version update to 2.7.2
- Added slider enclosure diagnostics support
- Added support for eSEL parsing
- modified patches:
* ppc64-diag.opal_errd.patch
-------------------------------------------------------------------
Mon May 9 10:25:34 UTC 2016 - dvaleev@suse.com
- Update to 2.7.1 (bsc#979032)
- Fixed endianess issues in diagnostics code
-------------------------------------------------------------------
Tue Mar 22 15:31:40 UTC 2016 - jloeser@suse.com
- version update to 2.7.0 (FATE#319557)
- Move from EPL to the GNU GPL license
- LED support on FSP based PowerNV platform
- Few minor bugs fixes
- removed patches:
* ppc64-diag.bug-945385_create_dump_directory_on_startup.patch
* ppc64-diag.tmpraces.patch
- modified patches:
* ppc64-diag.opal_errd.patch
* ppc64-diag.rtas_errd.patch
* ppc64-diag.varunused.patch
-------------------------------------------------------------------
Wed Sep 16 16:48:35 UTC 2015 - jloeser@suse.com
- create /var/log/dump directory on startup (bsc#945385)
- added patches:
* ppc64-diag.bug-945385_create_dump_directory_on_startup.patch
-------------------------------------------------------------------
Thu Jul 23 11:45:16 UTC 2015 - jloeser@suse.com
- version update to 2.6.9 (FATE#318007)
- see Changelog under ppc64-diag-2.6.9/ppc64-diag.spec
- modified patches:
* ppc64-diag.varunused.patch
* ppc64-diag.rtas_errd.patch
* ppc64-diag.opal_errd.patch
* ppc64-diag.tmpraces.patch
- removed patches:
* ppc64-diag.add_ncurses_include_path.patch
* ppc64-diag.rtas_errd-LE-fixes-for-extract_platdump.patch
* ppc64-diag-tmpraces.patch
-------------------------------------------------------------------
Mon Jun 8 20:10:27 UTC 2015 - dvlaeev@suse.com
- Update to 2.6.8
- Cpu and memory hotplugging support for PowerKVM guest
- Various fixes to opal-dump-parse tool
- Few LE related fixes
- Several security fixes across tools
- Drop upstreamed patches
ppc64-diag.opal-dump-parse-Add-printk-log-in-the-list-of-dump-s.patch
ppc64-diag.rtas_errd-Fix-PRRN-Event-handling-on-LE.patch
-------------------------------------------------------------------
Fri Sep 26 09:20:31 UTC 2014 - stefan.fent@suse.com
- fix bsc #898480 ppc64-diag: fixes for LE support
- patches added:
- ppc64-diag.opal-dump-parse-Add-printk-log-in-the-list-of-dump-s.patch
- ppc64-diag.rtas_errd-Fix-PRRN-Event-handling-on-LE.patch
- ppc64-diag.rtas_errd-LE-fixes-for-extract_platdump.patch
-------------------------------------------------------------------
Tue Sep 9 11:44:11 UTC 2014 - dvlaeev@suse.com
- Fix download service
-------------------------------------------------------------------
Mon Sep 8 11:11:06 UTC 2014 - jloeser@suse.com
- version update to 2.6.7 (BNC#894846)
no upstream changelog provided
- modified patches:
* ppc64-diag-tmpraces.patch
* ppc64-diag.add_ncurses_include_path.patch
* ppc64-diag.opal_errd.patch
* ppc64-diag.rtas_errd.patch
* ppc64-diag.varunused.patch
-------------------------------------------------------------------
Tue Jul 22 12:22:13 UTC 2014 - dvlaeev@suse.com
- Fix download URL once again
-------------------------------------------------------------------
Tue Jul 22 11:50:30 UTC 2014 - meissner@suse.com
- Also run the systemd %post snippets within chroots, otherwise
we will enable the service (as we always install in chroots).
bnc#882450
- the %service snippets take argument lists.
-------------------------------------------------------------------
Fri Jul 18 13:32:25 UTC 2014 - jsegitz@suse.com
- updated ppc64-diag-tmpraces.patch to include proper permissions
(bnc#882667, CVE-2014-4039)
- added missing pclose and fclose in default path of get_dt_status()
-------------------------------------------------------------------
Mon Jun 23 13:47:37 UTC 2014 - jloeser@suse.com
- fix /tmp race (BNC#882667, CVE-2014-4038)
- added patches:
* ppc64-diag-tmpraces.patch
-------------------------------------------------------------------
Fri May 23 13:47:29 UTC 2014 - dvlaeev@suse.com
- Fix Source url
-------------------------------------------------------------------
Fri May 23 07:59:45 UTC 2014 - jsegitz@novell.com
- added necessary macros for systemd files
-------------------------------------------------------------------
Fri May 2 15:37:03 UTC 2014 - jloeser@suse.com
- version update to 2.6.6 (BNC#875775)
* No upstream changelog provided
* need to add include/ncurses path for menu.h in lpd/lp_diag.c
- added patches:
* ppc64-diag.add_ncurses_include_path.patch
-------------------------------------------------------------------
Fri Mar 28 11:41:49 UTC 2014 - jloeser@suse.com
- remove ppc64.stop_service.patch (already fixed by
ppc64.opal_errd.patch)
-------------------------------------------------------------------
Tue Mar 25 13:28:10 UTC 2014 - jloeser@suse.com
- version update to 2.6.4 (BNC#869845)
new opal error daemon added
No upstream changelog provided
- add opal_errd.service file
- add ppc64-diag.opal_errd.patch: fix opal_errd init script
- fix rtas_errd init script: $pid to correct pid file
(init scripts might be obsolete due to systemd)
-------------------------------------------------------------------
Mon Mar 10 12:41:50 UTC 2014 - jloeser@suse.com
- version update to 2.6.3 (BNC#867346, FATE#315459)
No upstream changelog provided
-------------------------------------------------------------------
Fri Jan 17 10:47:35 UTC 2014 - dvlaeev@suse.com
- drop ppc64-diag.unistd.patch
-------------------------------------------------------------------
Fri Jan 10 17:44:52 UTC 2014 - jloeser@suse.com
- version update to 2.6.2 (FATE#315459)
- Minor bug fix in diag_encl and encl_led
- drop ppc64-diag.makefile.patch
-------------------------------------------------------------------
Tue Dec 10 19:27:30 UTC 2013 - dvaleev@suse.com
- enable ppc64le
-------------------------------------------------------------------
Sun Feb 24 16:46:41 UTC 2013 - dvaleev@suse.com
- Update to 2.6.1:
* Add Handler to handle PRRN RTAS notification
* Drop upstreamed ppc64-diag.unistd.patch
* Update package desription
-------------------------------------------------------------------
Wed Dec 19 11:54:00 UTC 2012 - dvaleev@suse.com
- update to 2.6.0:
* Added Light Path Diagnostics code. (moved from powerpc-utils)
* Introduced new options to diag_encl command (Jim).
* Added bluehawk enclosure diagnostics support (Jim).
* Introduced new command "encl_led" to modify identify/fault
indicators for SCSI enclosures (Jim).
- update ppc64-diag.rtas_errd.patch to not have references to
/var/lock/subsys
- add ncurses-devel and libvpd2-devel to BuildRequires
-------------------------------------------------------------------
Thu Dec 6 17:30:26 UTC 2012 - dvaleev@suse.com
- update to 2.5.0
No upstream changelog provided
- add systemd service for rtas_errd
-------------------------------------------------------------------
Thu Jun 14 10:10:28 UTC 2012 - cfarrell@suse.com
- license update: EPL-1.0
SPDX format (http://www.spdx.org/licenses)
-------------------------------------------------------------------
Tue Apr 24 12:13:09 UTC 2012 - dvaleev@suse.com
- Change upstream Url
- fix build with gcc 4.7
- update to 2.4.3
- Added message catalogs for the ipr, ixgb, lpfc, and qla2xxx
drivers
- Removed obsolete ppc64_diag_servagent script
-------------------------------------------------------------------
Wed Nov 2 21:11:06 UTC 2011 - dvaleev@suse.com
- update to 2.4.2
- Minor modifications to GPFS catalog files
and syslog_to_svclog.cpp
- Added gpfs files to the catalog, updated ppc64-diag-setup
notification commands
- Changed Makefiles and rules.mk to build for the default
architecture rather than -m32
- Added ELA code to the package, made changes to the rules.mk
and minor changes to the spec file
- Bug fix adding in support for -e and -l, so that root users can
be notified of serviceable events. Addresses bug #26192
- Added SIGCHLD handler to clean up servicelog notification
scripts
- Removed all absolute path references, specifically
to /sbin/lsvpd and lsvpd
-------------------------------------------------------------------
Fri Jan 16 14:03:19 CET 2009 - olh@suse.de
- fix Supplements string, it cant contain a comma, use * instead
(bnc#456695)
-------------------------------------------------------------------
Mon Dec 8 14:42:16 CET 2008 - olh@suse.de
- do not autoinstall on Cell blades
- add prereq aaa_base to runlevel script in post install
- do not abort on errors
-------------------------------------------------------------------
Fri Dec 5 09:49:03 CET 2008 - olh@suse.de
- update Supplements to match only pSeries and Cell blades
-------------------------------------------------------------------
Fri Dec 5 08:53:30 CET 2008 - olh@suse.de
- add PreReq libservicelog to get servicelog.db (bnc#417532,bnc#449382)
-------------------------------------------------------------------
Mon Oct 27 09:05:50 CET 2008 - olh@suse.de
- use supplements instead of Enhances
-------------------------------------------------------------------
Mon Oct 27 09:03:44 CET 2008 - olh@suse.de
- register with servicelog during first install or first boot (bnc#417532)
-------------------------------------------------------------------
Fri Sep 26 13:22:29 CEST 2008 - olh@suse.de
- add tage Enhances: kernel-ppc64
- make sure package gets selected/installed on a 64bit system
-------------------------------------------------------------------
Wed Sep 10 13:51:49 CEST 2008 - olh@suse.de
- new ppc64-diag 2.3.1 package (bnc#417532)

165
ppc64-diag.spec Normal file
View File

@ -0,0 +1,165 @@
#
# spec file for package ppc64-diag
#
# Copyright (c) 2024 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
Name: ppc64-diag
Version: 2.7.9
Release: 0
Summary: Linux for Power Platform Diagnostics
License: GPL-2.0-or-later
Group: System/Monitoring
URL: https://github.com/power-ras/ppc64-diag
Source0: https://github.com/power-ras/ppc64-diag/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
Source1: ppc64-diag-encl.service
Source2: ppc64-diag-encl.timer
Source3: ppc64-diag-nvme.service
Source4: ppc64-diag-nvme.timer
#PATCH-FIX-OPENSUSE - ppc64-diag.varunused.patch - fix unused variables
Patch1: ppc64-diag.varunused.patch
#PATCH-FIX-UPSTREAM - rtas_errd-Handle-multiple-platform-dumps.patch - store multiple dumps
Patch2: rtas_errd-Handle-multiple-platform-dumps.patch
#PATCH-FIX-UPSTREAM - Enclosure fault LED support
Patch3: 0001-ppc64-diag-Move-trim_trail_space-function-to-common-.patch
Patch4: 0002-ppc64-diag-lp_diag-Utilize-trim_trail_space-function.patch
Patch5: 0003-ppc64-diag-lp_diag-Enable-light-path-diagnostics-for.patch
Patch6: nvme_call_home-remove-d-and-f-as-visible-options.patch
Patch7: nvme_call_home-remove-d-and-f-from-usage-message.patch
Patch8: diag_nvme-improvements-to-status-and-err-messages.patch
Patch9: diags-diag_nvme-call_home-command-fails-on-nvmf-driv.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: bison
BuildRequires: flex
BuildRequires: gcc-c++
BuildRequires: librtas-devel >= 1.4.0
BuildRequires: libservicelog-devel
BuildRequires: libtool
BuildRequires: libvpd-devel
BuildRequires: perl
BuildRequires: pkgconfig
BuildRequires: systemd-rpm-macros
BuildRequires: pkgconfig(libudev)
BuildRequires: pkgconfig(ncurses)
BuildRequires: pkgconfig(sqlite3)
# Light Path Diagnostics depends on below lsvpd version.
Requires: lsvpd >= 1.7.1
Requires: powerpc-utils >= 1.3.2
Requires: servicelog
Requires(post): aaa_base
# autoselect the package on systems which have the /vdevice/IBM,sp node
# All pSeries POWER5 and later have this property
Supplements: modalias(vio:TIBM*spS*)
ExclusiveArch: ppc ppc64 ppc64le
%description
This package contains various diagnostic tools for PowerLinux.
These tools captures the diagnostic events from Power Systems
platform firmware, SES enclosures and device drivers, and
write events to servicelog database. It also provides automated
responses to urgent events such as environmental conditions and
predictive failures, if appropriate modifies the FRUs fault
indicator(s) and provides event notification to system
administrators or connected service frameworks.
%prep
%setup -q
%autopatch -p1
%build
sed -i 's@%{_prefix}/libexec/ppc64-diag@%{_libexecdir}@g' scripts/opal_errd.service
sed -i 's@%{_prefix}/libexec/ppc64-diag@%{_libexecdir}@g' scripts/rtas_errd.service
autoreconf -fvi
%configure
make %{?_smp_mflags}
%install
%make_install
chmod 644 %{buildroot}%{_sysconfdir}/ppc64-diag/servevent_parse.pl
mkdir %{buildroot}%{_sysconfdir}/ppc64-diag/ses_pages
ln -sf %{_sbindir}/usysattn %{buildroot}%{_sbindir}/usysfault
install -D -m0644 scripts/rtas_errd.service %{buildroot}%{_unitdir}/rtas_errd.service
install -D -m0644 scripts/opal_errd.service %{buildroot}%{_unitdir}/opal_errd.service
install -D -m 0644 %{SOURCE1} %{buildroot}/%{_unitdir}/ppc64-diag-encl.service
install -D -m 0644 %{SOURCE2} %{buildroot}/%{_unitdir}/ppc64-diag-encl.timer
install -D -m 0644 %{SOURCE3} %{buildroot}/%{_unitdir}/ppc64-diag-nvme.service
install -D -m 0644 %{SOURCE4} %{buildroot}/%{_unitdir}/ppc64-diag-nvme.timer
ln -s service %{buildroot}%{_sbindir}/rcrtas_errd
ln -s service %{buildroot}%{_sbindir}/rcopal_errd
rm %{buildroot}%{_prefix}/libexec/%{name}/opal_errd
rm %{buildroot}%{_prefix}/libexec/%{name}/rtas_errd
rm %{buildroot}%{_datadir}/doc/%{name}/COPYING
rm %{buildroot}%{_datadir}/doc/%{name}/README.md
rm -rf %{buildroot}/etc/cron.daily
%check
%make_build check
for i in opal_errd common diags/test ; do
pushd $i
./run_tests
popd
done
%files
%license COPYING
%doc README.md
%{_sbindir}/*
%dir %{_sysconfdir}/ppc64-diag
%config %{_sysconfdir}/ppc64-diag/*
%config %{_sysconfdir}/rc.powerfail
%{_mandir}/man8/*.8%{?ext_man}
%{_unitdir}/rtas_errd.service
%{_unitdir}/opal_errd.service
%{_unitdir}/ppc64-diag-encl.service
%{_unitdir}/ppc64-diag-encl.timer
%{_unitdir}/ppc64-diag-nvme.service
%{_unitdir}/ppc64-diag-nvme.timer
%post
%{_sysconfdir}/ppc64-diag/ppc64_diag_setup --register >/dev/null 2>&1
%{_sysconfdir}/ppc64-diag/lp_diag_setup --register >/dev/null 2>&1
%service_add_post rtas_errd.service
%service_add_post opal_errd.service
%service_add_post ppc64-diag-encl.service ppc64-diag-encl.timer ppc64-diag-nvme.service ppc64-diag-nvme.timer
%preun
# Pre-uninstall script -------------------------------------------------
if [ "$1" = "0" ]; then # last uninstall
%service_del_preun rtas_errd.service
%service_del_preun opal_errd.service
%service_del_preun ppc64-diag-encl.service ppc64-diag-encl.timer ppc64-diag-nvme.service ppc64-diag-nvme.timer
%{_sysconfdir}/ppc64-diag/ppc64_diag_setup --unregister >/dev/null
%{_sysconfdir}/ppc64-diag/lp_diag_setup --unregister >/dev/null
fi
%triggerin -- librtas
# trigger on librtas upgrades ------------------------------------------
if [ "$2" = "2" ]; then
systemctl restart rtas_errd.service >/dev/null
fi
%pre
%service_add_pre rtas_errd.service
%service_add_pre opal_errd.service
%service_add_pre ppc64-diag-encl.service ppc64-diag-encl.timer ppc64-diag-nvme.service ppc64-diag-nvme.timer
%postun
%service_del_postun rtas_errd.service
%service_del_postun opal_errd.service
%service_del_postun ppc64-diag-encl.service ppc64-diag-encl.timer ppc64-diag-nvme.service ppc64-diag-nvme.timer
%changelog

View File

@ -0,0 +1,58 @@
Index: ppc64-diag-2.7.4/rtas_errd/files.c
===================================================================
--- ppc64-diag-2.7.4.orig/rtas_errd/files.c
+++ ppc64-diag-2.7.4/rtas_errd/files.c
@@ -176,7 +176,7 @@ setup_rtas_event_scenario(void)
{
struct stat sbuf;
char *tmp;
- int fd, len;
+ int fd, __attribute__((unused)) len;
int i;
if (scenario_file == NULL)
Index: ppc64-diag-2.7.4/rtas_errd/epow.c
===================================================================
--- ppc64-diag-2.7.4.orig/rtas_errd/epow.c
+++ ppc64-diag-2.7.4/rtas_errd/epow.c
@@ -62,7 +62,7 @@ static int time_remaining = 0;
void
epow_timer_handler(int sig, siginfo_t siginfo, void *context)
{
- int rc, state;
+ int __attribute__((unused)) rc, state;
struct itimerval tv;
if (time_remaining <= 0) {
@@ -162,7 +162,7 @@ parse_epow(struct event *event)
struct rtas_epow_scn *epow;
struct itimerval tv;
char *event_type;
- int rc, state;
+ int __attribute__((unused)) rc, state;
/*
* Check the sensor state; this will be used to ensure
@@ -397,7 +397,7 @@ check_epow(struct event *event)
{
pid_t child;
char *childargs[2];
- int current_status;
+ int __attribute__((unused)) current_status;
/*
* Dissect the EPOW extended error information;
Index: ppc64-diag-2.7.4/diags/7031_D24_T24.c
===================================================================
--- ppc64-diag-2.7.4.orig/diags/7031_D24_T24.c
+++ ppc64-diag-2.7.4/diags/7031_D24_T24.c
@@ -370,7 +370,8 @@ int
diag_7031_D24_T24(int fd, struct dev_vpd *vpd)
{
struct pearl_diag_page2 dp;
- int failure = 0, rc, encl_id;
+ int failure = 0, encl_id;
+ __attribute__((unused)) int rc;
int buf_len = sizeof(dp);
int ps1, ps2, fan1, fan2, fan3, rpt, vpd_card;

View File

@ -0,0 +1,110 @@
From d05654e5ec6f37cf6caa491fc7d95b336f9603e2 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 10 Jul 2023 13:43:21 +0530
Subject: [PATCH] rtas_errd: Handle multiple platform dumps
References: bsc#1209274 ltc#198526
Upstream: merged, expected in 2.7.10
Git-commit: d05654e5ec6f37cf6caa491fc7d95b336f9603e2
Currently, whenever a new dump arrives, old dump file of that specific dump
type is removed before writing the new dump out. Any dump file with the
same prefix (dump type) gets deleted. This means only one set of dump files
is saved, since only one dump file per dump type is saved.
Handle multiple dumps on Linux by allowing as many dumps to be offloaded
until disk space is available. To do this, remove the function that checks
for prefix size and removes old dump files. In the event of not enough
disk space available, log an error to the user along with the dump tag.
User will free up space and run extract_platdump tool using the dump tag
provided in the error message to offload the dump. Error log can be viewed
by the user by issuing 'journalctl -p err -t rtas_errd' command.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
rtas_errd/dump.c | 29 ++++++++++++++++++++++++++++-
rtas_errd/extract_platdump.c | 6 ------
2 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/rtas_errd/dump.c b/rtas_errd/dump.c
index cc50d91b593b..494c322c4164 100644
--- a/rtas_errd/dump.c
+++ b/rtas_errd/dump.c
@@ -30,8 +30,10 @@
#include <fcntl.h>
#include <librtas.h>
#include <librtasevent.h>
+#include <syslog.h>
#include <sys/stat.h>
#include <sys/wait.h>
+#include <sys/statvfs.h>
#include "utils.h"
#include "rtas_errd.h"
@@ -284,7 +286,9 @@ void
check_platform_dump(struct event *event)
{
struct rtas_dump_scn *dump_scn;
+ struct statvfs vfs;
uint64_t dump_tag;
+ uint64_t dump_size;
char filename[DUMP_MAX_FNAME_LEN + 20], *pos;
char *pathname = NULL;
FILE *f;
@@ -306,11 +310,34 @@ check_platform_dump(struct event *event)
return;
}
- /* Retrieve the dump */
+ /* Retrieve the dump tag */
dump_tag = dump_scn->id;
dump_tag |= ((uint64_t)dump_scn->v6hdr.subtype << 32);
dbg("Dump ID: 0x%016LX", dump_tag);
+ if (statvfs(d_cfg.platform_dump_path, &vfs) == -1) {
+ log_msg(event, "statvfs() failed on %s: %s",
+ d_cfg.platform_dump_path, strerror(errno));
+ return;
+ }
+
+ /* Retrieve the size of the platform dump */
+ dump_size = dump_scn->size_hi;
+ dump_size <<= 32;
+ dump_size |= dump_scn->size_lo;
+
+ /* Check if there is sufficient space in the file system to store the dump */
+ if (vfs.f_bavail * vfs.f_frsize < dump_size) {
+ syslog(LOG_ERR, "Insufficient space in %s to store platform dump for dump ID: "
+ "0x%016lX (required: %lu bytes, available: %lu bytes)",
+ d_cfg.platform_dump_path, dump_tag, dump_size,
+ (vfs.f_bavail * vfs.f_frsize));
+ syslog(LOG_ERR, "After clearing space, run 'extract_platdump "
+ "0x%016lX'.\n", dump_tag);
+ return;
+ }
+
+ /* Retrieve the dump */
snprintf(tmp_sys_arg, 60, "0x%016LX", (long long unsigned int)dump_tag);
system_args[0] = EXTRACT_PLATDUMP_CMD;
system_args[1] = tmp_sys_arg;
diff --git a/rtas_errd/extract_platdump.c b/rtas_errd/extract_platdump.c
index fbe65b2fe5c5..831e57ea8b69 100644
--- a/rtas_errd/extract_platdump.c
+++ b/rtas_errd/extract_platdump.c
@@ -290,12 +290,6 @@ extract_platform_dump(uint64_t dump_tag)
}
}
- /*
- * Before writing the new dump out, we need to see if any older
- * dumps need to be removed first
- */
- remove_old_dumpfiles(filename, prefix_size);
-
/* Copy the dump off to the filesystem */
pathname[0] = '\0';
strcpy(pathname, d_cfg.platform_dump_path);
--
2.41.0