Compare commits
4 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
|
|
f023b8a12d | ||
| 06582d78bf | |||
| 97fbaaf262 | |||
| 07c51ebfb6 |
12
README.md
Normal file
12
README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
## Build Results
|
||||
|
||||
Current state of libpsm2 in openSUSE:Factory is
|
||||
|
||||

|
||||
|
||||
The current state of libpsm2 in the devel project build (science:HPC)
|
||||
|
||||

|
||||
|
||||
|
||||
162
libpsm2-disable-AVX.patch
Normal file
162
libpsm2-disable-AVX.patch
Normal file
@@ -0,0 +1,162 @@
|
||||
commit 30500125ff629ee83b856b246912408c33662a4b
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Fri Jul 4 08:41:25 2025 +0200
|
||||
|
||||
libpsm2: disable AVX
|
||||
|
||||
PSM2 is built with AVX2 enable by default and if manually disables,
|
||||
falls back to AVX.
|
||||
Disable both for compat purposes.
|
||||
|
||||
Signed-off-by: Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
diff --git buildflags.mak buildflags.mak
|
||||
index 206223dbd0a0..24221f1531a8 100644
|
||||
--- buildflags.mak
|
||||
+++ buildflags.mak
|
||||
@@ -98,54 +98,7 @@ INCLUDES += -I${IFS_HFI_HEADER_PATH}
|
||||
|
||||
BASECFLAGS +=-Wall $(WERROR)
|
||||
|
||||
-#
|
||||
-# test if compiler supports 32B(AVX2)/64B(AVX512F) move instruction.
|
||||
-#
|
||||
-ifeq (${CC},icc)
|
||||
- ifeq ($(PSM_DISABLE_AVX2),)
|
||||
- MAVX2=-xATOM_SSE4.2 -DPSM_AVX512
|
||||
- else
|
||||
- MAVX2=-march=core-avx-i
|
||||
- endif
|
||||
-else
|
||||
- ifeq ($(PSM_DISABLE_AVX2),)
|
||||
- MAVX2=-mavx2
|
||||
- else
|
||||
- MAVX2=-mavx
|
||||
- endif
|
||||
-endif
|
||||
-
|
||||
-ifneq (icc,${CC})
|
||||
- ifeq ($(PSM_DISABLE_AVX2),)
|
||||
- RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX2 ; echo $$?)
|
||||
- else
|
||||
- RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX ; echo $$?)
|
||||
- anerr := $(warning ***NOTE TO USER**** Disabling AVX2 will harm performance)
|
||||
- endif
|
||||
-
|
||||
- ifeq (0,${RET})
|
||||
- BASECFLAGS += ${MAVX2}
|
||||
- else
|
||||
- anerr := $(error Compiler does not support ${MAVX2} )
|
||||
- endif
|
||||
-else
|
||||
- BASECFLAGS += ${MAVX2}
|
||||
-endif
|
||||
-
|
||||
-# This support is dynamic at runtime, so is OK to enable as long as compiler can generate
|
||||
-# the code.
|
||||
-ifneq (,${PSM_AVX512})
|
||||
- ifneq (icc,${CC})
|
||||
- RET := $(shell echo "int main() {}" | ${CC} -mavx512f -E -dM -xc - 2>&1 | grep -q AVX512 ; echo $$?)
|
||||
- ifeq (0,${RET})
|
||||
- BASECFLAGS += -mavx512f
|
||||
- else
|
||||
- anerr := $(error Compiler does not support AVX512 )
|
||||
- endif
|
||||
- BASECFLAGS += -DPSM_AVX512
|
||||
- endif
|
||||
-endif
|
||||
-
|
||||
+BASECFLAGS += -msse4.2
|
||||
#
|
||||
# feature test macros for drand48_r
|
||||
#
|
||||
diff --git opa/opa_dwordcpy-generic.c opa/opa_dwordcpy-generic.c
|
||||
index dfb7755d5fdf..e1313bc4f25c 100644
|
||||
--- opa/opa_dwordcpy-generic.c
|
||||
+++ opa/opa_dwordcpy-generic.c
|
||||
@@ -192,33 +192,6 @@ void hfi_pio_blockcpy_512(volatile uint64_t *dest, const uint64_t *src, uint32_t
|
||||
}
|
||||
#endif
|
||||
|
||||
-void hfi_pio_blockcpy_256(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
||||
-{
|
||||
- volatile __m256i *dp = (volatile __m256i *) dest;
|
||||
- const __m256i *sp = (const __m256i *) src;
|
||||
-
|
||||
- psmi_assert((dp != NULL) && (sp != NULL));
|
||||
- psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
|
||||
-
|
||||
- if ((((uintptr_t) sp) & 0x1f) == 0x0) {
|
||||
- /* source and destination are both 32 byte aligned */
|
||||
- do {
|
||||
- __m256i tmp0 = _mm256_load_si256(sp);
|
||||
- __m256i tmp1 = _mm256_load_si256(sp + 1);
|
||||
- _mm256_store_si256((__m256i *)dp, tmp0);
|
||||
- _mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
||||
- } while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
||||
- } else {
|
||||
- /* only destination is 32 byte aligned - use unaligned loads */
|
||||
- do {
|
||||
- __m256i tmp0 = _mm256_loadu_si256(sp);
|
||||
- __m256i tmp1 = _mm256_loadu_si256(sp + 1);
|
||||
- _mm256_store_si256((__m256i *)dp, tmp0);
|
||||
- _mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
||||
- } while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
||||
- }
|
||||
-}
|
||||
-
|
||||
void hfi_pio_blockcpy_128(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
||||
{
|
||||
volatile __m128i *dp = (volatile __m128i *) dest;
|
||||
diff --git opa/opa_dwordcpy-x86_64.c opa/opa_dwordcpy-x86_64.c
|
||||
index dfb7755d5fdf..e1313bc4f25c 100644
|
||||
--- opa/opa_dwordcpy-x86_64.c
|
||||
+++ opa/opa_dwordcpy-x86_64.c
|
||||
@@ -192,33 +192,6 @@ void hfi_pio_blockcpy_512(volatile uint64_t *dest, const uint64_t *src, uint32_t
|
||||
}
|
||||
#endif
|
||||
|
||||
-void hfi_pio_blockcpy_256(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
||||
-{
|
||||
- volatile __m256i *dp = (volatile __m256i *) dest;
|
||||
- const __m256i *sp = (const __m256i *) src;
|
||||
-
|
||||
- psmi_assert((dp != NULL) && (sp != NULL));
|
||||
- psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
|
||||
-
|
||||
- if ((((uintptr_t) sp) & 0x1f) == 0x0) {
|
||||
- /* source and destination are both 32 byte aligned */
|
||||
- do {
|
||||
- __m256i tmp0 = _mm256_load_si256(sp);
|
||||
- __m256i tmp1 = _mm256_load_si256(sp + 1);
|
||||
- _mm256_store_si256((__m256i *)dp, tmp0);
|
||||
- _mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
||||
- } while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
||||
- } else {
|
||||
- /* only destination is 32 byte aligned - use unaligned loads */
|
||||
- do {
|
||||
- __m256i tmp0 = _mm256_loadu_si256(sp);
|
||||
- __m256i tmp1 = _mm256_loadu_si256(sp + 1);
|
||||
- _mm256_store_si256((__m256i *)dp, tmp0);
|
||||
- _mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
||||
- } while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
||||
- }
|
||||
-}
|
||||
-
|
||||
void hfi_pio_blockcpy_128(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
||||
{
|
||||
volatile __m128i *dp = (volatile __m128i *) dest;
|
||||
diff --git psm_hal_gen1/psm_hal_gen1_spio.c psm_hal_gen1/psm_hal_gen1_spio.c
|
||||
index 5444897a3e44..9b11a4ec133b 100644
|
||||
--- psm_hal_gen1/psm_hal_gen1_spio.c
|
||||
+++ psm_hal_gen1/psm_hal_gen1_spio.c
|
||||
@@ -171,8 +171,7 @@ ips_spio_init(const struct psmi_context *context, struct ptl *ptl,
|
||||
get_cpuid(0x7, 0, &id);
|
||||
|
||||
/* 32B copying supported */
|
||||
- ctrl->spio_blockcpy_large = (id.ebx & (1<<AVX2_BIT)) ?
|
||||
- hfi_pio_blockcpy_256 : ctrl->spio_blockcpy_med;
|
||||
+ ctrl->spio_blockcpy_large = ctrl->spio_blockcpy_med;
|
||||
|
||||
#ifdef PSM_AVX512
|
||||
/* 64B copying supported */
|
||||
31
libpsm2-gcc16-fixes.patch
Normal file
31
libpsm2-gcc16-fixes.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
commit ae8ed797f23b904a80d054ef10e286abe854a3ba
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Tue Jan 20 13:50:57 2026 +0100
|
||||
|
||||
libpsm2: gcc16 fixes
|
||||
|
||||
Signed-off-by: Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
diff --git psm_mpool.c psm_mpool.c
|
||||
index e36e91724333..2a42e6e12287 100644
|
||||
--- psm_mpool.c
|
||||
+++ psm_mpool.c
|
||||
@@ -435,8 +435,6 @@ void psmi_mpool_chunk_dealloc(mpool_t mp, int idx)
|
||||
void psmi_mpool_destroy(mpool_t mp)
|
||||
{
|
||||
int i = 0;
|
||||
- size_t nbytes = mp->mp_num_obj * mp->mp_elm_size;
|
||||
-
|
||||
for (i = 0; i < mp->mp_elm_vector_size; i++) {
|
||||
if (mp->mp_elm_vector[i]) {
|
||||
#ifdef PSM_CUDA
|
||||
@@ -447,9 +445,7 @@ void psmi_mpool_destroy(mpool_t mp)
|
||||
}
|
||||
}
|
||||
psmi_free(mp->mp_elm_vector);
|
||||
- nbytes += mp->mp_elm_vector_size * sizeof(struct mpool_element *);
|
||||
psmi_free(mp);
|
||||
- nbytes += sizeof(struct mpool);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1,3 +1,15 @@
|
||||
-------------------------------------------------------------------
|
||||
Tue Jan 20 12:53:12 UTC 2026 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Add patch to fix compilation with GCC 16 (bsc#1256971):
|
||||
- libpsm2-gcc16-fixes.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Fri Jul 4 06:43:53 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Add libpsm2-disable-AVX.patch to completely disable AVX support
|
||||
and use only up to SSE4.2. (bsc#1245739)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Fri Mar 1 08:51:40 UTC 2024 - pgajdos@suse.com
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package libpsm2
|
||||
#
|
||||
# Copyright (c) 2023 SUSE LLC
|
||||
# Copyright (c) 2025 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -34,8 +34,11 @@ URL: https://github.com/cornelisnetworks/opa-psm2/
|
||||
Source0: %{name}-%{version}%{git_ver}.tar.bz2
|
||||
Source1: libpsm2.changelog
|
||||
Source2: libpsm2-rpmlintrc
|
||||
Source100: README.md
|
||||
Patch2: libpsm2-use_RPM_OPT_FLAGS.patch
|
||||
Patch3: libpsm2-use-exported-variable-for-version-and-release.patch
|
||||
Patch4: libpsm2-disable-AVX.patch
|
||||
Patch5: libpsm2-gcc16-fixes.patch
|
||||
BuildRequires: libnuma-devel
|
||||
BuildRequires: libuuid-devel
|
||||
BuildRequires: pkgconfig
|
||||
|
||||
Reference in New Issue
Block a user