forked from pool/glibc
577 lines
23 KiB
Diff
577 lines
23 KiB
Diff
|
From 18bec23cbb4d530a2a8ce95353770661fabcd55f Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level
|
||
|
|
||
|
The "System V Application Binary Interface AMD64 Architecture Processor
|
||
|
Supplement" mandates the BMI1 and BMI2 CPU features for the x86-64-v3
|
||
|
level.
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit b80f16adbd979831bf25ea491e1261e81885c2b6)
|
||
|
---
|
||
|
sysdeps/x86/get-isa-level.h | 2 ++
|
||
|
1 file changed, 2 insertions(+)
|
||
|
|
||
|
diff --git a/sysdeps/x86/get-isa-level.h b/sysdeps/x86/get-isa-level.h
|
||
|
index 1ade78ab73..5b4dd5f062 100644
|
||
|
--- a/sysdeps/x86/get-isa-level.h
|
||
|
+++ b/sysdeps/x86/get-isa-level.h
|
||
|
@@ -47,6 +47,8 @@ get_isa_level (const struct cpu_features *cpu_features)
|
||
|
isa_level |= GNU_PROPERTY_X86_ISA_1_V2;
|
||
|
if (CPU_FEATURE_USABLE_P (cpu_features, AVX)
|
||
|
&& CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
|
||
|
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
&& CPU_FEATURE_USABLE_P (cpu_features, F16C)
|
||
|
&& CPU_FEATURE_USABLE_P (cpu_features, FMA)
|
||
|
&& CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From 46479e5d10ed87825aa277da158d6a687974518b Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp
|
||
|
implementations
|
||
|
|
||
|
The AVX2 str(n)casecmp implementations use the 'bzhi' instruction, which
|
||
|
belongs to the BMI2 CPU feature.
|
||
|
|
||
|
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
|
||
|
as BSF if the CPU doesn't support TZCNT, and produces the same result
|
||
|
for non-zero input.
|
||
|
|
||
|
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit 10f79d3670b036925da63dc532b122d27ce65ff8)
|
||
|
---
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 28 +++++++++++++++------
|
||
|
sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 1 +
|
||
|
2 files changed, 21 insertions(+), 8 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index a71444eccb..d208fae4bf 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -448,13 +448,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strcasecmp,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcasecmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcasecmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strcasecmp_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
|
||
|
@@ -470,13 +473,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strcasecmp_l,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcasecmp_l_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcasecmp_l_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strcasecmp_l_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
|
||
|
@@ -638,13 +644,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strncasecmp,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncasecmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncasecmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strncasecmp_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
|
||
|
@@ -660,13 +669,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strncasecmp_l,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ & CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncasecmp_l_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncasecmp_l_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strncasecmp_l_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
||
|
index 68646ef199..7622af259c 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
||
|
@@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
|
||
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||
|
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||
|
AVX_Fast_Unaligned_Load, ))
|
||
|
{
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From 7afbd1e56acb721031bffd876f275dcb1af7e530 Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation
|
||
|
|
||
|
The AVX2 strcmp implementation uses the 'bzhi' instruction, which
|
||
|
belongs to the BMI2 CPU feature.
|
||
|
|
||
|
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
|
||
|
as BSF if the CPU doesn't support TZCNT, and produces the same result
|
||
|
for non-zero input.
|
||
|
|
||
|
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit 4d64c6445735e9b34e2ac8e369312cbfc2f88e17)
|
||
|
---
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 4 +++-
|
||
|
sysdeps/x86_64/multiarch/strcmp.c | 4 ++--
|
||
|
2 files changed, 5 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index d208fae4bf..a42b0a4620 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -591,10 +591,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strcmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strcmp_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
|
||
|
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
|
||
|
index fdd5afe3af..9d6c9f66ba 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/strcmp.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/strcmp.c
|
||
|
@@ -45,12 +45,12 @@ IFUNC_SELECTOR (void)
|
||
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||
|
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||
|
AVX_Fast_Unaligned_Load, ))
|
||
|
{
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
|
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
|
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
|
return OPTIMIZE (evex);
|
||
|
|
||
|
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From 29c577e0f54fe6e70ceacb3659179781c5569903 Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation
|
||
|
|
||
|
The AVX2 strncmp implementations uses the 'bzhi' instruction, which
|
||
|
belongs to the BMI2 CPU feature.
|
||
|
|
||
|
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
|
||
|
as BSF if the CPU doesn't support TZCNT, and produces the same result
|
||
|
for non-zero input.
|
||
|
|
||
|
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit fc7de1d9b99ae1676bc626ddca422d7abee0eb48)
|
||
|
---
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
|
||
|
sysdeps/x86_64/multiarch/strncmp.c | 4 ++--
|
||
|
2 files changed, 7 insertions(+), 4 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index a42b0a4620..aebef3daaf 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -1176,13 +1176,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strncmp,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strncmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strncmp_avx2_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
|
||
|
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
|
||
|
index 4ebe4bde30..c4f8b6bbb5 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/strncmp.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/strncmp.c
|
||
|
@@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
|
||
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||
|
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||
|
AVX_Fast_Unaligned_Load, ))
|
||
|
{
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
|
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
|
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
|
return OPTIMIZE (evex);
|
||
|
|
||
|
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From d8bf4388df679fa5a3ae7889a649e573e3124530 Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
|
||
|
|
||
|
The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
|
||
|
belongs to the BMI2 CPU feature.
|
||
|
|
||
|
NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
|
||
|
as BSF if the CPU doesn't support TZCNT, and produces the same result
|
||
|
for non-zero input.
|
||
|
|
||
|
Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit f31a5a884ed84bd37032729d4d1eb9d06c9f3c29)
|
||
|
---
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 8 ++++++--
|
||
|
1 file changed, 6 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index aebef3daaf..fec8790c11 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -810,10 +810,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcscmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcscmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__wcscmp_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
@@ -830,10 +832,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcsncmp_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcsncmp_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__wcsncmp_avx2_rtm)
|
||
|
/* ISA V2 wrapper for GENERIC implementation because the
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From d9196d4f3fa9997388655813ddd236426a16dd92 Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr
|
||
|
implementations
|
||
|
|
||
|
The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
|
||
|
and 'sarx' instructions, which belongs to the BMI2 CPU feature.
|
||
|
|
||
|
Fixes: acfd088a1963 ("x86: Optimize memchr-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit e3e7fab7fe5186d18ca2046d99ba321c27db30ad)
|
||
|
---
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
|
||
|
1 file changed, 9 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index fec8790c11..7c84963d92 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -69,10 +69,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__memchr_evex_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__memchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__memchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
@@ -335,10 +337,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__rawmemchr_evex_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__rawmemchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__rawmemchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
@@ -927,10 +931,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wmemchr_evex_rtm)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wmemchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__wmemchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From 923c3f3c373f499e62160e00831dda576443317b Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr
|
||
|
implementation
|
||
|
|
||
|
The AVX2 memrchr implementation uses the 'shlxl' instruction, which
|
||
|
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
|
||
|
belongs to the LZCNT CPU feature.
|
||
|
|
||
|
Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit 3c0c78afabfed4b6fc161c159e628fbf14ff370b)
|
||
|
---
|
||
|
sysdeps/x86/isa-level.h | 1 +
|
||
|
sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 10 ++++++++--
|
||
|
3 files changed, 10 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
|
||
|
index 3c4480aba7..bbb90f5c5e 100644
|
||
|
--- a/sysdeps/x86/isa-level.h
|
||
|
+++ b/sysdeps/x86/isa-level.h
|
||
|
@@ -80,6 +80,7 @@
|
||
|
#define AVX_X86_ISA_LEVEL 3
|
||
|
#define AVX2_X86_ISA_LEVEL 3
|
||
|
#define BMI2_X86_ISA_LEVEL 3
|
||
|
+#define LZCNT_X86_ISA_LEVEL 3
|
||
|
#define MOVBE_X86_ISA_LEVEL 3
|
||
|
|
||
|
/* ISA level >= 2 guaranteed includes. */
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
index a57a9952f3..f1741083fd 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
@@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
|
||
|
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
|
||
|
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||
|
AVX_Fast_Unaligned_Load, ))
|
||
|
{
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index 7c84963d92..ec1c5b55fb 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -209,13 +209,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, memrchr,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
+ && CPU_FEATURE_USABLE (LZCNT)),
|
||
|
__memrchr_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
+ && CPU_FEATURE_USABLE (LZCNT)),
|
||
|
__memrchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
+ && CPU_FEATURE_USABLE (LZCNT)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__memrchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
--
|
||
|
2.38.0
|
||
|
|
||
|
|
||
|
From 2d8ef784bd6a784496a6fd460de6b6f57c70a501 Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Mon, 3 Oct 2022 23:46:11 +0200
|
||
|
Subject: [PATCH 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr
|
||
|
implementations
|
||
|
|
||
|
The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
|
||
|
instruction which belongs to the BMI1 CPU feature and the 'shrx'
|
||
|
instruction, which belongs to the BMI2 CPU feature.
|
||
|
|
||
|
Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2")
|
||
|
Partially resolves: BZ #29611
|
||
|
|
||
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
(cherry picked from commit 7e8283170c5d6805b609a040801d819e362a6292)
|
||
|
---
|
||
|
sysdeps/x86/isa-level.h | 1 +
|
||
|
sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
|
||
|
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++---
|
||
|
3 files changed, 16 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
|
||
|
index bbb90f5c5e..06f6c9663e 100644
|
||
|
--- a/sysdeps/x86/isa-level.h
|
||
|
+++ b/sysdeps/x86/isa-level.h
|
||
|
@@ -79,6 +79,7 @@
|
||
|
/* ISA level >= 3 guaranteed includes. */
|
||
|
#define AVX_X86_ISA_LEVEL 3
|
||
|
#define AVX2_X86_ISA_LEVEL 3
|
||
|
+#define BMI1_X86_ISA_LEVEL 3
|
||
|
#define BMI2_X86_ISA_LEVEL 3
|
||
|
#define LZCNT_X86_ISA_LEVEL 3
|
||
|
#define MOVBE_X86_ISA_LEVEL 3
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
index f1741083fd..f2f5e8a211 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
|
@@ -36,6 +36,7 @@ IFUNC_SELECTOR (void)
|
||
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||
|
|
||
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
|
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1)
|
||
|
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
|
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
|
||
|
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||
|
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
index ec1c5b55fb..00a91123d3 100644
|
||
|
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
@@ -578,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
IFUNC_IMPL (i, name, strrchr,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
- && CPU_FEATURE_USABLE (AVX512BW)),
|
||
|
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strrchr_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__strrchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__strrchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
@@ -797,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX512VL)
|
||
|
&& CPU_FEATURE_USABLE (AVX512BW)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
&& CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcsrchr_evex)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
|
||
|
- CPU_FEATURE_USABLE (AVX2),
|
||
|
+ (CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
|
__wcsrchr_avx2)
|
||
|
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
|
||
|
(CPU_FEATURE_USABLE (AVX2)
|
||
|
+ && CPU_FEATURE_USABLE (BMI1)
|
||
|
+ && CPU_FEATURE_USABLE (BMI2)
|
||
|
&& CPU_FEATURE_USABLE (RTM)),
|
||
|
__wcsrchr_avx2_rtm)
|
||
|
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||
|
--
|
||
|
2.38.0
|
||
|
|