SHA256
3
0
forked from pool/glibc

Accepting request 1008197 from home:Andreas_Schwab:Factory

- get-nscd-addresses.patch: get_nscd_addresses: Fix subscript typos (BZ
  #29605)
- x86-64-avx2-string-functions.patch: check for required cpu features in
  AVX2 string functions (BZ #29611)
- nscd-aicache.patch: nscd: Drop local address tuple variable (BZ #29607)

OBS-URL: https://build.opensuse.org/request/show/1008197
OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=634
This commit is contained in:
Andreas Schwab 2022-10-05 12:19:14 +00:00 committed by Git OBS Bridge
parent 3ed180dddc
commit 0db4b59100
5 changed files with 686 additions and 1 deletions

37
get-nscd-addresses.patch Normal file
View File

@ -0,0 +1,37 @@
From c9226c03da0276593a0918eaa9a14835183343e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Sonnenberger?= <joerg@bec.de>
Date: Mon, 26 Sep 2022 13:59:16 -0400
Subject: [PATCH] get_nscd_addresses: Fix subscript typos [BZ #29605]
Fix the subscript on air->family, which was accidentally set to COUNT
when it should have remained as I.
Resolves: BZ #29605
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
---
sysdeps/posix/getaddrinfo.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/sysdeps/posix/getaddrinfo.c b/sysdeps/posix/getaddrinfo.c
index bcff909b2f..5cda9bb072 100644
--- a/sysdeps/posix/getaddrinfo.c
+++ b/sysdeps/posix/getaddrinfo.c
@@ -540,11 +540,11 @@ get_nscd_addresses (const char *name, const struct addrinfo *req,
at[count].addr[2] = htonl (0xffff);
}
else if (req->ai_family == AF_UNSPEC
- || air->family[count] == req->ai_family)
+ || air->family[i] == req->ai_family)
{
- at[count].family = air->family[count];
+ at[count].family = air->family[i];
memcpy (at[count].addr, addrs, size);
- if (air->family[count] == AF_INET6)
+ if (air->family[i] == AF_INET6)
res->got_ipv6 = true;
}
at[count].next = at + count + 1;
--
2.37.3

View File

@ -1,3 +1,12 @@
-------------------------------------------------------------------
Wed Oct 5 07:53:24 UTC 2022 - Andreas Schwab <schwab@suse.de>
- get-nscd-addresses.patch: get_nscd_addresses: Fix subscript typos (BZ
#29605)
- x86-64-avx2-string-functions.patch: check for required cpu features in
AVX2 string functions (BZ #29611)
- nscd-aicache.patch: nscd: Drop local address tuple variable (BZ #29607)
------------------------------------------------------------------- -------------------------------------------------------------------
Wed Sep 21 08:47:52 UTC 2022 - Andreas Schwab <schwab@suse.de> Wed Sep 21 08:47:52 UTC 2022 - Andreas Schwab <schwab@suse.de>

View File

@ -20,7 +20,8 @@
# It will avoid building some parts of glibc # It will avoid building some parts of glibc
%bcond_with fast_build %bcond_with fast_build
%define build_snapshot 0 %bcond_with snapshot
%define build_snapshot %{with snapshot}
%bcond_with ringdisabled %bcond_with ringdisabled
%define flavor @BUILD_FLAVOR@%{nil} %define flavor @BUILD_FLAVOR@%{nil}
@ -299,6 +300,12 @@ Patch1007: syslog-extra-whitespace.patch
Patch1008: errlist-edeadlock.patch Patch1008: errlist-edeadlock.patch
# PATCH-FIX-UPSTREAM Makerules: fix MAKEFLAGS assignment for upcoming make-4.4 (BZ# 29564) # PATCH-FIX-UPSTREAM Makerules: fix MAKEFLAGS assignment for upcoming make-4.4 (BZ# 29564)
Patch1009: makeflags.patch Patch1009: makeflags.patch
# PATCH-FIX-UPSTREAM get_nscd_addresses: Fix subscript typos (BZ #29605)
Patch1010: get-nscd-addresses.patch
# PATCH-FIX-UPSTREAM check for required cpu features in AVX2 string functions (BZ #29611)
Patch1011: x86-64-avx2-string-functions.patch
# PATCH-FIX-UPSTREAM nscd: Drop local address tuple variable (BZ #29607)
Patch1012: nscd-aicache.patch
### ###
# Patches awaiting upstream approval # Patches awaiting upstream approval
@ -533,6 +540,9 @@ library in a cross compilation setting.
%patch1007 -p1 %patch1007 -p1
%patch1008 -p1 %patch1008 -p1
%patch1009 -p1 %patch1009 -p1
%patch1010 -p1
%patch1011 -p1
%patch1012 -p1
%endif %endif
%patch2000 -p1 %patch2000 -p1

53
nscd-aicache.patch Normal file
View File

@ -0,0 +1,53 @@
From 6e33e5c4b73cea7b8aa3de0947123db16200fb65 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Tue, 4 Oct 2022 18:40:25 -0400
Subject: [PATCH] nscd: Drop local address tuple variable [BZ #29607]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a request needs to be resent (e.g. due to insufficient buffer
space), the references to subsequent tuples in the local variable are
stale and should not be used. This used to work by accident before, but
since 1d495912a it no longer does. Instead of trying to reset it, just
let gethostbyname4_r write into TUMPBUF6 for us, thus maintaining a
consistent state at all times. This is now consistent with what is done
in gaih_inet for getaddrinfo.
Resolves: BZ #29607
Reported-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
---
nscd/aicache.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/nscd/aicache.c b/nscd/aicache.c
index 51e793199f..e0baed170b 100644
--- a/nscd/aicache.c
+++ b/nscd/aicache.c
@@ -110,11 +110,10 @@ addhstaiX (struct database_dyn *db, int fd, request_header *req,
"gethostbyname4_r");
if (fct4 != NULL)
{
- struct gaih_addrtuple atmem;
struct gaih_addrtuple *at;
while (1)
{
- at = &atmem;
+ at = NULL;
rc6 = 0;
herrno = 0;
status[1] = DL_CALL_FCT (fct4, (key, &at,
@@ -137,7 +136,7 @@ addhstaiX (struct database_dyn *db, int fd, request_header *req,
goto next_nip;
/* We found the data. Count the addresses and the size. */
- for (const struct gaih_addrtuple *at2 = at = &atmem; at2 != NULL;
+ for (const struct gaih_addrtuple *at2 = at; at2 != NULL;
at2 = at2->next)
{
++naddrs;
--
2.38.0

View File

@ -0,0 +1,576 @@
From 18bec23cbb4d530a2a8ce95353770661fabcd55f Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level
The "System V Application Binary Interface AMD64 Architecture Processor
Supplement" mandates the BMI1 and BMI2 CPU features for the x86-64-v3
level.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit b80f16adbd979831bf25ea491e1261e81885c2b6)
---
sysdeps/x86/get-isa-level.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/sysdeps/x86/get-isa-level.h b/sysdeps/x86/get-isa-level.h
index 1ade78ab73..5b4dd5f062 100644
--- a/sysdeps/x86/get-isa-level.h
+++ b/sysdeps/x86/get-isa-level.h
@@ -47,6 +47,8 @@ get_isa_level (const struct cpu_features *cpu_features)
isa_level |= GNU_PROPERTY_X86_ISA_1_V2;
if (CPU_FEATURE_USABLE_P (cpu_features, AVX)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURE_USABLE_P (cpu_features, F16C)
&& CPU_FEATURE_USABLE_P (cpu_features, FMA)
&& CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
--
2.38.0
From 46479e5d10ed87825aa277da158d6a687974518b Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp
implementations
The AVX2 str(n)casecmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 10f79d3670b036925da63dc532b122d27ce65ff8)
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 28 +++++++++++++++------
sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 1 +
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a71444eccb..d208fae4bf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -448,13 +448,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strcasecmp,
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strcasecmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strcasecmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
@@ -470,13 +473,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strcasecmp_l,
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strcasecmp_l_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strcasecmp_l_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_l_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
@@ -638,13 +644,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncasecmp,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
@@ -660,13 +669,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncasecmp_l,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ & CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_l_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_l_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_l_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
index 68646ef199..7622af259c 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
--
2.38.0
From 7afbd1e56acb721031bffd876f275dcb1af7e530 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation
The AVX2 strcmp implementation uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 4d64c6445735e9b34e2ac8e369312cbfc2f88e17)
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 4 +++-
sysdeps/x86_64/multiarch/strcmp.c | 4 ++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d208fae4bf..a42b0a4620 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -591,10 +591,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__strcmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strcmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strcmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index fdd5afe3af..9d6c9f66ba 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -45,12 +45,12 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
--
2.38.0
From 29c577e0f54fe6e70ceacb3659179781c5569903 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation
The AVX2 strncmp implementations uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit fc7de1d9b99ae1676bc626ddca422d7abee0eb48)
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
sysdeps/x86_64/multiarch/strncmp.c | 4 ++--
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a42b0a4620..aebef3daaf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1176,13 +1176,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncmp,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index 4ebe4bde30..c4f8b6bbb5 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
--
2.38.0
From d8bf4388df679fa5a3ae7889a649e573e3124530 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit f31a5a884ed84bd37032729d4d1eb9d06c9f3c29)
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index aebef3daaf..fec8790c11 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -810,10 +810,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wcscmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__wcscmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcscmp_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -830,10 +832,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsncmp_avx2_rtm)
/* ISA V2 wrapper for GENERIC implementation because the
--
2.38.0
From d9196d4f3fa9997388655813ddd236426a16dd92 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr
implementations
The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
and 'sarx' instructions, which belongs to the BMI2 CPU feature.
Fixes: acfd088a1963 ("x86: Optimize memchr-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit e3e7fab7fe5186d18ca2046d99ba321c27db30ad)
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index fec8790c11..7c84963d92 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -69,10 +69,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__memchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__memchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -335,10 +337,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__rawmemchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -927,10 +931,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__wmemchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wmemchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
--
2.38.0
From 923c3f3c373f499e62160e00831dda576443317b Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr
implementation
The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.
Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 3c0c78afabfed4b6fc161c159e628fbf14ff370b)
---
sysdeps/x86/isa-level.h | 1 +
sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 10 ++++++++--
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 3c4480aba7..bbb90f5c5e 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -80,6 +80,7 @@
#define AVX_X86_ISA_LEVEL 3
#define AVX2_X86_ISA_LEVEL 3
#define BMI2_X86_ISA_LEVEL 3
+#define LZCNT_X86_ISA_LEVEL 3
#define MOVBE_X86_ISA_LEVEL 3
/* ISA level >= 2 guaranteed includes. */
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index a57a9952f3..f1741083fd 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 7c84963d92..ec1c5b55fb 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -209,13 +209,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, memrchr,
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (LZCNT)),
__memrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (LZCNT)),
__memrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (LZCNT)
&& CPU_FEATURE_USABLE (RTM)),
__memrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
--
2.38.0
From 2d8ef784bd6a784496a6fd460de6b6f57c70a501 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Mon, 3 Oct 2022 23:46:11 +0200
Subject: [PATCH 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr
implementations
The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
instruction which belongs to the BMI1 CPU feature and the 'shrx'
instruction, which belongs to the BMI2 CPU feature.
Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2")
Partially resolves: BZ #29611
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 7e8283170c5d6805b609a040801d819e362a6292)
---
sysdeps/x86/isa-level.h | 1 +
sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++---
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index bbb90f5c5e..06f6c9663e 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -79,6 +79,7 @@
/* ISA level >= 3 guaranteed includes. */
#define AVX_X86_ISA_LEVEL 3
#define AVX2_X86_ISA_LEVEL 3
+#define BMI1_X86_ISA_LEVEL 3
#define BMI2_X86_ISA_LEVEL 3
#define LZCNT_X86_ISA_LEVEL 3
#define MOVBE_X86_ISA_LEVEL 3
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index f1741083fd..f2f5e8a211 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -36,6 +36,7 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index ec1c5b55fb..00a91123d3 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -578,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strrchr,
X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI1)
+ && CPU_FEATURE_USABLE (BMI2)),
__strrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI1)
+ && CPU_FEATURE_USABLE (BMI2)),
__strrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI1)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -797,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI1)
+ && CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI1)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
--
2.38.0