SHA256
1
0
forked from pool/grub2

Accepting request 1112505 from home:michael-chang:branches:Base:System

- Fix a boot delay regression in PowerPC PXE boot (bsc#1201300)
  * 0001-ieee1275-ofdisk-retry-on-open-and-read-failure.patch

OBS-URL: https://build.opensuse.org/request/show/1112505
OBS-URL: https://build.opensuse.org/package/show/Base:System/grub2?expand=0&rev=462
This commit is contained in:
Michael Chang 2023-09-20 09:38:55 +00:00 committed by Git OBS Bridge
parent 0a700a1789
commit e51d4315a7
2 changed files with 104 additions and 47 deletions

View File

@ -1,6 +1,6 @@
From b99c45820f228ff5b881700eda95a017abf2e198 Mon Sep 17 00:00:00 2001 From f4728ed5307b6be6377b7bdafcab55fd3676a761 Mon Sep 17 00:00:00 2001
From: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com> From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Date: Wed, 1 Mar 2023 15:08:05 +0530 Date: Mon, 17 Jul 2023 16:02:34 +0530
Subject: [PATCH] ieee1275/ofdisk: retry on open and read failure Subject: [PATCH] ieee1275/ofdisk: retry on open and read failure
Sometimes, when booting from a very busy SAN, the access to the Sometimes, when booting from a very busy SAN, the access to the
@ -8,15 +8,48 @@ disk can fail and then grub will eventually drop to grub prompt.
This scenario is more frequent when deploying many machines at This scenario is more frequent when deploying many machines at
the same time using the same SAN. the same time using the same SAN.
This patch aims to force the ofdisk module to retry the open or This patch aims to force the ofdisk module to retry the open or
read function after it fails. We use MAX_RETRIES to specify the read function for network disks excluding after it fails. We use
amount of times it will try to access the disk before it DEFAULT_RETRY_TIMEOUT, which is 15 seconds to specify the time it'll
definitely fails. retry to access the disk before it definitely fails. The timeout can be
changed by setting the environment variable ofdisk_retry_timeout.
If the environment variable fails to read, grub will consider the
default value of 15 seconds.
Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com> Signed-off-by: Diego Domingos <diegodo@linux.vnet.ibm.com>
Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
--- ---
grub-core/disk/ieee1275/ofdisk.c | 65 +++++++++++++++++++++++++++++++- docs/grub.texi | 8 ++++
1 file changed, 63 insertions(+), 2 deletions(-) grub-core/disk/ieee1275/ofdisk.c | 80 +++++++++++++++++++++++++++++++-
2 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/docs/grub.texi b/docs/grub.texi
index d3f0f6577..c8ebc083d 100644
--- a/docs/grub.texi
+++ b/docs/grub.texi
@@ -3315,6 +3315,7 @@ These variables have special meaning to GRUB.
* net_default_ip::
* net_default_mac::
* net_default_server::
+* ofdisk_retry_timeout::
* pager::
* prefix::
* pxe_blksize::
@@ -3744,6 +3745,13 @@ The default is the value of @samp{color_normal} (@pxref{color_normal}).
@xref{Network}.
+@node ofdisk_retry_timeout
+@subsection ofdisk_retry_timeout
+
+The time in seconds till which the grub will retry to open or read a disk in
+case of failure to do so. This value defaults to 15 seconds.
+
+
@node pager
@subsection pager
diff --git a/grub-core/disk/ieee1275/ofdisk.c b/grub-core/disk/ieee1275/ofdisk.c
index 7197d5401..f96bbb58c 100644
--- a/grub-core/disk/ieee1275/ofdisk.c --- a/grub-core/disk/ieee1275/ofdisk.c
+++ b/grub-core/disk/ieee1275/ofdisk.c +++ b/grub-core/disk/ieee1275/ofdisk.c
@@ -24,6 +24,9 @@ @@ -24,6 +24,9 @@
@ -25,11 +58,11 @@ Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com>
#include <grub/time.h> #include <grub/time.h>
+#include <grub/env.h> +#include <grub/env.h>
+ +
+#define RETRY_DEFAULT_TIMEOUT 15000 +#define RETRY_DEFAULT_TIMEOUT 15
static char *last_devpath; static char *last_devpath;
static grub_ieee1275_ihandle_t last_ihandle; static grub_ieee1275_ihandle_t last_ihandle;
@@ -783,7 +786,7 @@ @@ -783,7 +786,7 @@ compute_dev_path (const char *name)
} }
static grub_err_t static grub_err_t
@ -38,20 +71,35 @@ Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com>
{ {
grub_ieee1275_phandle_t dev; grub_ieee1275_phandle_t dev;
char *devpath; char *devpath;
@@ -879,6 +882,41 @@ @@ -879,6 +882,56 @@ grub_ofdisk_open (const char *name, grub_disk_t disk)
return 0; return 0;
} }
+static grub_uint64_t +static grub_uint64_t
+grub_ofdisk_disk_timeout(void) +grub_ofdisk_disk_timeout (grub_disk_t disk)
+{ +{
+ if(grub_env_get("ofdisk_retry_timeout") != NULL) + grub_uint64_t retry;
+ const char *timeout = grub_env_get ("ofdisk_retry_timeout");
+
+ if (!(grub_strstr (disk->name, "fibre-channel@") ||
+ grub_strstr (disk->name, "vfc-client")) ||
+ grub_strstr(disk->name, "nvme-of"))
+ { + {
+ grub_uint64_t retry = grub_strtoul(grub_env_get("ofdisk_retry_timeout"), 0, 10); + /* Do not retry in case of non network drives */
+ return 0;
+ }
+
+ if (timeout != NULL)
+ {
+ retry = grub_strtoul (timeout, 0, 10);
+ if (grub_errno != GRUB_ERR_NONE)
+ {
+ grub_errno = GRUB_ERR_NONE;
+ return RETRY_DEFAULT_TIMEOUT;
+ }
+ if (retry) + if (retry)
+ return retry; + return retry;
+ } + }
+
+ return RETRY_DEFAULT_TIMEOUT; + return RETRY_DEFAULT_TIMEOUT;
+} +}
+ +
@ -59,28 +107,28 @@ Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com>
+grub_ofdisk_open (const char *name, grub_disk_t disk) +grub_ofdisk_open (const char *name, grub_disk_t disk)
+{ +{
+ grub_err_t err; + grub_err_t err;
+ grub_uint64_t timeout = grub_get_time_ms () + grub_ofdisk_disk_timeout(); + grub_uint64_t timeout = grub_get_time_ms () + (grub_ofdisk_disk_timeout (disk) * 1000);
+ + _Bool cont;
+ retry: + do
+ err = grub_ofdisk_open_real (name, disk);
+
+ if (err == GRUB_ERR_UNKNOWN_DEVICE)
+ { + {
+ if (grub_get_time_ms () < timeout) + err = grub_ofdisk_open_real (name, disk);
+ cont = grub_get_time_ms () < timeout;
+ if (err == GRUB_ERR_UNKNOWN_DEVICE && cont)
+ { + {
+ grub_dprintf ("ofdisk","Failed to open disk %s. Retrying...\n", name); + grub_dprintf ("ofdisk","Failed to open disk %s. Retrying...\n", name);
+ grub_errno = GRUB_ERR_NONE; + grub_errno = GRUB_ERR_NONE;
+ goto retry;
+ } + }
+ } + else
+ + break;
+ grub_millisleep (1000);
+ } while (cont);
+ return err; + return err;
+} +}
+ +
static void static void
grub_ofdisk_close (grub_disk_t disk) grub_ofdisk_close (grub_disk_t disk)
{ {
@@ -915,7 +953,7 @@ @@ -915,7 +968,7 @@ grub_ofdisk_prepare (grub_disk_t disk, grub_disk_addr_t sector)
} }
static grub_err_t static grub_err_t
@ -89,33 +137,36 @@ Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com>
grub_size_t size, char *buf) grub_size_t size, char *buf)
{ {
grub_err_t err; grub_err_t err;
@@ -935,6 +973,29 @@ @@ -934,6 +987,29 @@ grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
return 0;
} }
static grub_err_t +static grub_err_t
+grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector, +grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
+ grub_size_t size, char *buf) + grub_size_t size, char *buf)
+{ +{
+ grub_err_t err; + grub_err_t err;
+ grub_uint64_t timeout = grub_get_time_ms () + grub_ofdisk_disk_timeout(); + grub_uint64_t timeout = grub_get_time_ms () + (grub_ofdisk_disk_timeout (disk) * 1000);
+ + _Bool cont;
+ retry: + do
+ err = grub_ofdisk_read_real (disk, sector, size, buf);
+
+ if (err == GRUB_ERR_READ_ERROR)
+ { + {
+ if (grub_get_time_ms () < timeout) + err = grub_ofdisk_read_real (disk, sector, size, buf);
+ cont = grub_get_time_ms () < timeout;
+ if (err == GRUB_ERR_UNKNOWN_DEVICE && cont)
+ { + {
+ grub_dprintf ("ofdisk","Failed to read disk %s. Retrying...\n", (char*)disk->data); + grub_dprintf ("ofdisk","Failed to read disk %s. Retrying...\n", (char*)disk->data);
+ grub_errno = GRUB_ERR_NONE; + grub_errno = GRUB_ERR_NONE;
+ goto retry;
+ } + }
+ } + else
+ + break;
+ grub_millisleep (1000);
+ } while (cont);
+ return err; + return err;
+} +}
+ +
+static grub_err_t static grub_err_t
grub_ofdisk_write (grub_disk_t disk, grub_disk_addr_t sector, grub_ofdisk_write (grub_disk_t disk, grub_disk_addr_t sector,
grub_size_t size, const char *buf) grub_size_t size, const char *buf)
{ --
2.41.0

View File

@ -1,3 +1,9 @@
-------------------------------------------------------------------
Wed Sep 20 07:54:05 UTC 2023 - Michael Chang <mchang@suse.com>
- Fix a boot delay regression in PowerPC PXE boot (bsc#1201300)
* 0001-ieee1275-ofdisk-retry-on-open-and-read-failure.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Tue Sep 19 06:31:43 UTC 2023 - Gary Ching-Pang Lin <glin@suse.com> Tue Sep 19 06:31:43 UTC 2023 - Gary Ching-Pang Lin <glin@suse.com>