From 621320af4e05d916b0527aa655419f1e74938a2657e14c97d87add733b699f86 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 22 Aug 2023 12:33:42 +0000 Subject: [PATCH] Accepting request 1105262 from home:Andreas_Schwab:Factory - Require that elf/check-localplt does not fail - glibc-2.3.90-langpackdir.diff: add hidden alias for __strcpy_chk - cache-amd-legacy.patch: x86: Fix for cache computation on AMD legacy cpus - cache-intel-shared.patch: x86: Fix incorrect scope of setting `shared_per_thread` (BZ# 30745) OBS-URL: https://build.opensuse.org/request/show/1105262 OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=659 --- cache-amd-legacy.patch | 286 ++++++++++++++++++++++++++++++++++ cache-intel-shared.patch | 45 ++++++ glibc-2.3.90-langpackdir.diff | 21 +++ glibc.changes | 10 ++ glibc.spec | 7 + 5 files changed, 369 insertions(+) create mode 100644 cache-amd-legacy.patch create mode 100644 cache-intel-shared.patch diff --git a/cache-amd-legacy.patch b/cache-amd-legacy.patch new file mode 100644 index 0000000..22f9b40 --- /dev/null +++ b/cache-amd-legacy.patch @@ -0,0 +1,286 @@ +From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001 +From: Sajan Karumanchi +Date: Tue, 1 Aug 2023 15:20:55 +0000 +Subject: [PATCH] x86: Fix for cache computation on AMD legacy cpus. + +Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' +set to Zero, thus resulting in zeroed-out computed cache values. +This patch reintroduces the old way of cache computation as a +fail-safe option to handle these exceptions. +Fixed 'level4_cache_size' value through handle_amd(). + +Reviewed-by: Premachandra Mallappa +Tested-by: Florian Weimer +--- + sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++----- + 1 file changed, 199 insertions(+), 27 deletions(-) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index cd4d0351ae..285773039f 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -315,40 +315,206 @@ handle_amd (int name) + { + unsigned int eax; + unsigned int ebx; +- unsigned int ecx; ++ unsigned int ecx = 0; + unsigned int edx; +- unsigned int count = 0x1; ++ unsigned int max_cpuid = 0; ++ unsigned int fn = 0; + + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) + return 0; + +- if (name >= _SC_LEVEL3_CACHE_SIZE) +- count = 0x3; +- else if (name >= _SC_LEVEL2_CACHE_SIZE) +- count = 0x2; +- else if (name >= _SC_LEVEL1_DCACHE_SIZE) +- count = 0x0; ++ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx); ++ ++ if (max_cpuid >= 0x8000001D) ++ /* Use __cpuid__ '0x8000_001D' to compute cache details. */ ++ { ++ unsigned int count = 0x1; ++ ++ if (name >= _SC_LEVEL3_CACHE_SIZE) ++ count = 0x3; ++ else if (name >= _SC_LEVEL2_CACHE_SIZE) ++ count = 0x2; ++ else if (name >= _SC_LEVEL1_DCACHE_SIZE) ++ count = 0x0; ++ ++ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ ++ if (ecx != 0) ++ { ++ switch (name) ++ { ++ case _SC_LEVEL1_ICACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL3_CACHE_ASSOC: ++ return ((ebx >> 22) & 0x3ff) + 1; ++ case _SC_LEVEL1_ICACHE_LINESIZE: ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ case _SC_LEVEL3_CACHE_LINESIZE: ++ return (ebx & 0xfff) + 1; ++ case _SC_LEVEL1_ICACHE_SIZE: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ case _SC_LEVEL2_CACHE_SIZE: ++ case _SC_LEVEL3_CACHE_SIZE: ++ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); ++ default: ++ __builtin_unreachable (); ++ } ++ return -1; ++ } ++ } ++ ++ /* Legacy cache computation for CPUs prior to Bulldozer family. ++ This is also a fail-safe mechanism for some hypervisors that ++ accidentally configure __cpuid__ '0x8000_001D' to Zero. */ + +- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); ++ ++ if (max_cpuid < fn) ++ return 0; ++ ++ __cpuid (fn, eax, ebx, ecx, edx); ++ ++ if (name < _SC_LEVEL1_DCACHE_SIZE) ++ { ++ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; ++ ecx = edx; ++ } + + switch (name) + { +- case _SC_LEVEL1_ICACHE_ASSOC: +- case _SC_LEVEL1_DCACHE_ASSOC: +- case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ return (ecx >> 14) & 0x3fc00; ++ ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ ecx >>= 16; ++ if ((ecx & 0xff) == 0xff) ++ { ++ /* Fully associative. */ ++ return (ecx << 2) & 0x3fc00; ++ } ++ return ecx & 0xff; ++ ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ return ecx & 0xff; ++ ++ case _SC_LEVEL2_CACHE_SIZE: ++ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; ++ ++ case _SC_LEVEL2_CACHE_ASSOC: ++ switch ((ecx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (ecx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); ++ default: ++ return 0; ++ } ++ ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; ++ ++ case _SC_LEVEL3_CACHE_SIZE: ++ { ++ long int total_l3_cache = 0, l3_cache_per_thread = 0; ++ unsigned int threads = 0; ++ const struct cpu_features *cpu_features; ++ ++ if ((edx & 0xf000) == 0) ++ return 0; ++ ++ total_l3_cache = (edx & 0x3ffc0000) << 1; ++ cpu_features = __get_cpu_features (); ++ ++ /* Figure out the number of logical threads that share L3. */ ++ if (max_cpuid >= 0x80000008) ++ { ++ /* Get width of APIC ID. */ ++ __cpuid (0x80000008, eax, ebx, ecx, edx); ++ threads = (ecx & 0xff) + 1; ++ } ++ ++ if (threads == 0) ++ { ++ /* If APIC ID width is not available, use logical ++ processor count. */ ++ __cpuid (0x00000001, eax, ebx, ecx, edx); ++ if ((edx & (1 << 28)) != 0) ++ threads = (ebx >> 16) & 0xff; ++ } ++ ++ /* Cap usage of highest cache level to the number of ++ supported threads. */ ++ if (threads > 0) ++ l3_cache_per_thread = total_l3_cache/threads; ++ ++ /* Get shared cache per ccx for Zen architectures. */ ++ if (cpu_features->basic.family >= 0x17) ++ { ++ long int l3_cache_per_ccx = 0; ++ /* Get number of threads share the L3 cache in CCX. */ ++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); ++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; ++ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; ++ return l3_cache_per_ccx; ++ } ++ else ++ { ++ return l3_cache_per_thread; ++ } ++ } ++ + case _SC_LEVEL3_CACHE_ASSOC: +- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_LINESIZE: +- case _SC_LEVEL1_DCACHE_LINESIZE: +- case _SC_LEVEL2_CACHE_LINESIZE: ++ switch ((edx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (edx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); ++ default: ++ return 0; ++ } ++ + case _SC_LEVEL3_CACHE_LINESIZE: +- return ecx ? (ebx & 0xfff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_SIZE: +- case _SC_LEVEL1_DCACHE_SIZE: +- case _SC_LEVEL2_CACHE_SIZE: +- case _SC_LEVEL3_CACHE_SIZE: +- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; ++ return (edx & 0xf000) == 0 ? 0 : edx & 0xff; ++ + default: + __builtin_unreachable (); + } +@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); +- shared_per_thread = shared; + + level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); + level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); +@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level3_cache_size = shared; + level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); + level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); ++ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE); + + if (shared <= 0) +- /* No shared L3 cache. All we have is the L2 cache. */ +- shared = core; ++ { ++ /* No shared L3 cache. All we have is the L2 cache. */ ++ shared = core; ++ } ++ else if (cpu_features->basic.family < 0x17) ++ { ++ /* Account for exclusive L2 and L3 caches. */ ++ shared += core; ++ } + +- if (shared_per_thread <= 0) +- shared_per_thread = shared; ++ shared_per_thread = shared; + } + + cpu_features->level1_icache_size = level1_icache_size; +-- +2.41.0 + diff --git a/cache-intel-shared.patch b/cache-intel-shared.patch new file mode 100644 index 0000000..d06fdd7 --- /dev/null +++ b/cache-intel-shared.patch @@ -0,0 +1,45 @@ +From 5ea70cc02626d9b85f1570153873d8648a47bf95 Mon Sep 17 00:00:00 2001 +From: Noah Goldstein +Date: Thu, 10 Aug 2023 19:28:24 -0500 +Subject: [PATCH] x86: Fix incorrect scope of setting `shared_per_thread` [BZ# + 30745] + +The: + +``` + if (shared_per_thread > 0 && threads > 0) + shared_per_thread /= threads; +``` + +Code was accidentally moved to inside the else scope. This doesn't +match how it was previously (before af992e7abd). + +This patch fixes that by putting the division after the `else` block. + +(cherry picked from commit 084fb31bc2c5f95ae0b9e6df4d3cf0ff43471ede) +--- + sysdeps/x86/dl-cacheinfo.h | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 285773039f..5ddb35c9d9 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -770,11 +770,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u + level. */ + threads = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16) + & 0xff); +- +- /* Get per-thread size of highest level cache. */ +- if (shared_per_thread > 0 && threads > 0) +- shared_per_thread /= threads; + } ++ /* Get per-thread size of highest level cache. */ ++ if (shared_per_thread > 0 && threads > 0) ++ shared_per_thread /= threads; + } + + /* Account for non-inclusive L2 and L3 caches. */ +-- +2.41.0 + diff --git a/glibc-2.3.90-langpackdir.diff b/glibc-2.3.90-langpackdir.diff index 5cb7dab..802418b 100644 --- a/glibc-2.3.90-langpackdir.diff +++ b/glibc-2.3.90-langpackdir.diff @@ -1,3 +1,24 @@ +Index: glibc-2.38/debug/strcpy_chk.c +=================================================================== +--- glibc-2.38.orig/debug/strcpy_chk.c ++++ glibc-2.38/debug/strcpy_chk.c +@@ -31,3 +31,4 @@ __strcpy_chk (char *dest, const char *sr + + return memcpy (dest, src, len + 1); + } ++libc_hidden_builtin_def (__strcpy_chk) +Index: glibc-2.38/include/string.h +=================================================================== +--- glibc-2.38.orig/include/string.h ++++ glibc-2.38/include/string.h +@@ -213,6 +213,7 @@ libc_hidden_builtin_proto (__memcpy_chk) + libc_hidden_builtin_proto (__memmove_chk) + libc_hidden_builtin_proto (__mempcpy_chk) + libc_hidden_builtin_proto (__memset_chk) ++libc_hidden_builtin_proto (__strcpy_chk) + libc_hidden_builtin_proto (__stpcpy_chk) + + #endif Index: glibc-2.38/intl/loadmsgcat.c =================================================================== --- glibc-2.38.orig/intl/loadmsgcat.c diff --git a/glibc.changes b/glibc.changes index 917b58b..6215ade 100644 --- a/glibc.changes +++ b/glibc.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Mon Aug 14 08:12:28 UTC 2023 - Andreas Schwab + +- Require that elf/check-localplt does not fail +- glibc-2.3.90-langpackdir.diff: add hidden alias for __strcpy_chk +- cache-amd-legacy.patch: x86: Fix for cache computation on AMD legacy + cpus +- cache-intel-shared.patch: x86: Fix incorrect scope of setting + `shared_per_thread` (BZ# 30745) + ------------------------------------------------------------------- Wed Aug 2 10:50:32 UTC 2023 - Andreas Schwab diff --git a/glibc.spec b/glibc.spec index 0ad3f00..381486e 100644 --- a/glibc.spec +++ b/glibc.spec @@ -291,6 +291,10 @@ Patch306: glibc-fix-double-loopback.diff ### # PATCH-FIX-OPENSUSE iconv: restore verbosity with unrecognized encoding names (BZ #30694) Patch1000: iconv-error-verbosity.patch +# PATCH-FIX-OPENSUSE x86: Fix for cache computation on AMD legacy cpus +Patch1001: cache-amd-legacy.patch +# PATCH-FIX-OPENSUSE x86: Fix incorrect scope of setting `shared_per_thread` (BZ# 30745) +Patch1002: cache-intel-shared.patch ### # Patches awaiting upstream approval @@ -515,6 +519,8 @@ library in a cross compilation setting. %if %{without snapshot} %patch1000 -p1 +%patch1001 -p1 +%patch1002 -p1 %endif %patch2000 -p1 @@ -754,6 +760,7 @@ make %{?_smp_mflags} %{?make_output_sync} -C cc-base -k check || { # Exceptions: # None! make %{?_smp_mflags} %{?make_output_sync} -C cc-base check-abi +make %{?_smp_mflags} %{?make_output_sync} -C cc-base test t=elf/check-localplt %endif %define rtldlib %{_lib}