commit 4e8479a47ead60cb8c19995233d23950151088f8d2f5b9df87c56d62a27b7536 Author: Nicolas Morey Date: Fri Nov 29 15:18:23 2024 +0000 Accepting request 1227162 from home:NMorey:branches:science:HPC - Add psm3-fix-SIGILL-on-system-not-supporting-AVX.patch to fix SIGILL hapening during init on older CPU (bsc#1213538, bsc#1233356). - Refresh libfabric-libtool.patch tu support patch -p0 OBS-URL: https://build.opensuse.org/request/show/1227162 OBS-URL: https://build.opensuse.org/package/show/science:HPC/libfabric?expand=0&rev=106 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/_multibuild b/_multibuild new file mode 100644 index 0000000..6c4bdf9 --- /dev/null +++ b/_multibuild @@ -0,0 +1,4 @@ + + fabtests + + diff --git a/_service b/_service new file mode 100644 index 0000000..e5b6124 --- /dev/null +++ b/_service @@ -0,0 +1,18 @@ + + + git + https://github.com/ofiwg/libfabric.git + no + .git + libfabric + @PARENT_TAG@.@TAG_OFFSET@.%h + v(.*) + \1 + 159219639b7fd69d140892120121bbb4d694e295 + + + libfabric*.tar + bz2 + + + diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..280c5e6 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1,4 @@ +libfabric1 +libfabric-devel + requires -libibmad- + requires "libfabric1- = " diff --git a/fabtests-rpmlintrc b/fabtests-rpmlintrc new file mode 100644 index 0000000..c9bb498 --- /dev/null +++ b/fabtests-rpmlintrc @@ -0,0 +1,6 @@ +# This line is mandatory to access the configuration functions +from Config import * + +addFilter("no-manual-page-for-binary fi_.*") +addFilter("no-manual-page-for-binary runfabtests.sh") +addFilter("no-manual-page-for-binary rft_yaml_to_junit_xml") diff --git a/fabtests.spec b/fabtests.spec new file mode 100644 index 0000000..c8e03ec --- /dev/null +++ b/fabtests.spec @@ -0,0 +1,64 @@ +# +# spec file for package fabtests +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define git_ver .0.159219639b7f + +Name: fabtests +Version: 1.22.0 +Release: 0 +Summary: Test suite for libfabric API +License: BSD-2-Clause OR GPL-2.0-only +Group: Development/Tools/Other +URL: http://www.github.com/ofiwg/libfabric +Source: libfabric-%{version}%{git_ver}.tar.bz2 +Source1: fabtests-rpmlintrc +Patch0: libfabric-libtool.patch +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: libfabric-devel = %{version} +BuildRequires: libtool +BuildRoot: %{_tmppath}/%{name}-%{version}-build + +%description +Fabtests provides a set of examples that uses libfabric, a fabric software library. + +%prep +%autosetup -p0 -n libfabric-%{version}%{git_ver} + +%build +cd fabtests +./autogen.sh +%configure %{?_with_libfabric} +%make_build + +%install +%make_install -C fabtests + +%files +%defattr(-,root,root) +%dir %{_datadir}/fabtests/ + +%{_bindir}/* +%{_mandir}/man1/* +%{_mandir}/man7/* +%{_datadir}/fabtests/* + +%doc AUTHORS README NEWS.md +%license COPYING + +%changelog diff --git a/libfabric-1.22.0.0.159219639b7f.tar.bz2 b/libfabric-1.22.0.0.159219639b7f.tar.bz2 new file mode 100644 index 0000000..1b68ce8 --- /dev/null +++ b/libfabric-1.22.0.0.159219639b7f.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b899ab5837b3125f7cba015d1511adcf0c68ce1b1af4bd48e8aee8a566276bed +size 3492290 diff --git a/libfabric-libtool.patch b/libfabric-libtool.patch new file mode 100644 index 0000000..e28d111 --- /dev/null +++ b/libfabric-libtool.patch @@ -0,0 +1,23 @@ +commit 81de541fdd4abc77167f955f8ddd85f195888e5c +Author: Nicolas Morey-Chaisemartin +Date: Fri Sep 30 13:49:16 2022 +0200 + + libfabric libtool + + Disable static libs + + Signed-off-by: Nicolas Morey-Chaisemartin + +diff --git configure.ac configure.ac +index 7dacf69c9a69..90fcdc610754 100644 +--- configure.ac ++++ configure.ac +@@ -193,7 +193,7 @@ m4_version_prereq([2.70], + dnl Check for compiler features + AC_C_TYPEOF + +-LT_INIT ++LT_INIT([disable-static]) + LT_OUTPUT + + dnl dlopen support is optional diff --git a/libfabric.changes b/libfabric.changes new file mode 100644 index 0000000..45ef242 --- /dev/null +++ b/libfabric.changes @@ -0,0 +1,2359 @@ +------------------------------------------------------------------- +Thu Nov 28 15:47:54 UTC 2024 - Nicolas Morey + +- Add psm3-fix-SIGILL-on-system-not-supporting-AVX.patch to fix + SIGILL hapening during init on older CPU (bsc#1213538, bsc#1233356). +- Refresh libfabric-libtool.patch tu support patch -p0 + +------------------------------------------------------------------- +Mon Aug 5 11:20:55 UTC 2024 - Filip Kastl + +- Add -Wno-incompatible-pointer-types to CFLAGS to enable building + for 32bit with GCC 14. + +------------------------------------------------------------------- +Sun Aug 4 16:17:16 UTC 2024 - Nicolas Morey + +- Update to 1.22.0 + - Coll + - Fix Coverity issues + - Core + - General bug fixes + - hmem: change neuron get_dmabuf_fd error code + - Fix an error in the error handling path of fi_param_define() + - Makefile.am: Add Windows build files to distribution tarball + - hmem: disable ZE IPC + - Add profile variables for connections and memory allocated + - hmem: Fix `cuDeviceCanAccessPeer()` error reporting + - man: Update text for `len` parameter + - Add page size MR attr field + - man: Extend fi_mr_refresh support + - man: Improve FI_MR_ALLOCATED documentation + - man: Support optional MR desc + - man: Improve FI_MR_HMEM documentation + - Added ofi_get_realtime interfaces + - Add endpoint options for max message size and inject size + - Add Windows definition for `EREMOTEIO` + - EFA + - General improvement and bug fixes + - Handle recv cancel for zero copy recv + - Avoid iterating EP list in CQ read + - Add RDMA core errno for remote unknown peer + - Map EFA errnos to Libfabric codes + - Improve the zero-copy receive feature + - Improve the handshake enforcement procedure + - Support unsolicited rdma-write recv + - Support FI_MORE for eager send and rdma-write + - Improve the EFA_IO_COMP error code and explanation + - Improve the unit test for LL128 protocol + - Distinguish max RMA size from msg size + - Hooks + - dmabuf: Fix incompatible pointer warning + - OPX + - Add missing file needed for fabric direct build to release package + - Fix performance issue caused by not setting ACK bit in the single + SDMA packet case + - TID cache debug improvements + - Detection of driver lack of support for TID + - Multi-CTS support for TID + - Removal of statement that TID is not supported + - OPX Tracer improvements + - Improvements to OPX shared memory cleanup + - H to H performance improvements for build that supports HMEM + - Bug fix for a threshold check + - Bug fix for FI_SELECTIVE_COMPLETION + - CN5000 fixes + - Parameterization of various thresholds + - Further enhancements to support NVIDIA GPUs, included CUDA-allocated + bounce buffers and in-provider support for GDRCopy + - Enhancements to enable support for CN5000 hardware + - Better checking for TID support + - General TID enhancements + - Pkey error handling + - Send work queue splitting + - Support for OPX tracer for profiling purposes + - Coverity scan fixes + - Fixes and enhancements to logging and debug messages + - Intranode RMA read fixes + - Fix compile issues + - Fix shared memory segment index creation bug + - PSM3 + - Update provider to sync with IEFS 11.7.0.0.110 + - Improved auto-tuning features for PSM3, including dynamic Credit Flows + and detecting the presence of the rv kernel module + - Improved PSM3 intra-node performance for large message sizes + - SHM + - Added support for write() method to submit DSA work + - Touch all buffer pages after DSA page fault + - Add return and more descriptive error message + - Fix coverity about incorrect sign + - Fix memory leaks for srx + - Fix atomic read + - Sockets + - Fix Coverity issues + - USNIC + - Fix a few Coverity issues + - Util + - Discard outstanding operations in util_srx_close + - Enable profile on the size of bufpool allocated. + - Add more predefined profile variables. + - Fix issue while displaying addresses with fi_info -a + - fi_pingpong: Fix out of scope memory leak + - Add source address to fi_pingpong + - Verbs + - Flush CQ for SQ on no SQ credit + - Optimize search for device max inline size + - Enable profiling + - Fabtests + - pytest/shm: reduce the msg size in test_unexpected_msg + - Fix synapseai fabtests build + - Add pytests for EFA zero-copy receive + - Add benchmark option for `FI_OPT_MAX_MSG_SIZE` + - benchmarks: Add synapseai support + - Disable fi_rdm_tagged_peek test for ucx and psm3 + - Add manual init sync to fi_rdm_multiclient and fi_rdm + - Refactor ft_sock_sync to take in a socket + - Add fi_rdm_bw test + - Skip rma_pingpong write tests + - Init rx_buf before sending data + - Add rma_pingpong tests to makefile + - pytest: use different message sizes for rma pingpong + - Fix missing fixture memory_type in test_rma_pingpong_range_no_inject + - pytest: account for process startup overhead in client-server tests + - pytest: save client process output to a file + - Support testing inject with cq data + - multinode: update arguments + - multi_ep: Fix memory leak + - rdm_tagged_peek: Align rx's msg_order with tx's + - Add backlog > 0 to listen call + +------------------------------------------------------------------- +Wed Apr 3 11:32:43 UTC 2024 - Nicolas Morey + +- Enable ucx and new efa provider on 64b architectures. +- Use a single changes file for libfabric and fabtests. +- Update to 1.21.0 + - Core + - Various update and fixed in man pages + - Fix xpmem memory corruption + - Extend FI_PROVIDER_PATH to allow setting preferred DL provider + - Add a SECURITY.md file + - Document preferred threading model for scalable endpoints + - Move FI_PRIORITY to internal flag + - Remove FI_PROV_SPECIFIC + - Remove unimplemented or unused features + - Support cntr byte counting + - configure: Do not check for xpmem if disabled + - Add FI_PROGRESS_CONTROL_UNIFIED + - hmem/cuda: Get multiple attributes at once in cuda_is_addr_valid + - configure: Add -pipe by default to CFLAGS + - Selectively generate warnings on failed loading of DL providers + - hmem: introduce ofi_dev_reg_copy_*_iov ops + - Print provider path on fabric creation + - Introduce FI_OPT_SHARED_MEMORY_PERMITTED + - README.md: Add badge for openssf scorecard + - man: Regulate the fi_setopt call sequence. + - man: Clarify the usage of FI_RMOTE_CQ_DATA flag + - man: Add ucx provider to the fi_provider man page + - configure.ac: add extra check for 128 bit atomic support + - include/osd: align atomic complex definitions + - hmem/synapseai: Refine the error handling and warning + - Specify C11 standard for Visual Studio builds + - configure: Do not check for xpmem if disabled + - man page fixes + - EFA + - General improvement and bug fixes + - Propagate errnos from core functions untouched + - Create 1:1 relationship between libfabric CQs and IBV CQs + - Do not progress ep inside transmission call when hitting EAGAIN + - Remove unnecessary check in rdma write. + - Handle rx pkts error without ope + - Add a new rx pkt counter + - Enable runting for neuron with a different runt size + - Distinguish unresponsive receiver errors + - Remove unnecessary handshake in send path + - Don't fail the whole domain init if cudamalloc failed + - Introduce efa specific domain operations + - Implement FI_OPT_SHARED_MEMORY_PERMITTED + - Do not memset rxe to 0 on init + - Reduce # of error cases in happy path + - Add FI_EFA_USE_HUGE_PAGE to efa man page. + - Don't do handshake for local fi_write + - Add pingpong test after exhausting MRs + - Introduce utilities to exhaust MRs on EFA device + - Test EFA with a 1GiB message + - Do not abort on all deprecated env vars + - Onboard fi_mr_dmabuf API in mem reg ops. + - Try registering cuda memory via dmabuf when checking p2p + - Introduce HAVE_EFA_DMABUF_MR macro in configure + - Use long CTS protocol if long read and runting read protocols fail + because of memory registration limits + - Remove unnecessary check in rdma write. + - Enable runting for neuron with a different runt size + - Handle rx pkts error without ope + - Distinguish unresponsive receiver errors + - Add `efa_show_help()` + - Refactor error code definitions + - Remove error message assertions from CQ unit tests + - Refactor `efa_strerror()` + - Doxyfile: Configure tabs to 8 spaces + - Rename Doxyfile + - Hooks + - dmabuf_peer_mem: initialize fd to supress compiler warning + - NETDIR + -Removed. The functionality is intergrated into the verbs provider. + - OPX + - Fix compiler warnings and coverity issues + - General improvement and bug fixes + - Add GPU support to expected TID + - RZV RTS packet exclude empty immediate data + - Add more efficient check for cuda-resident user buffer + - Improve default HFI selection logic in multi rail environments + - Flush dead list opportunistically + - Add RISC-V support + - Make update HDRQ register frequency configurable at build time + - Removed all references to the reliability nack threshold env var + - Added missing tuneables, rearraged to match fi_info -e output + - Use BAR load/store macros + - Check HFI driver version to allow GPU-enabled build/run + - Added kernel and driver version check to allow/disallow expected receive TID + - Fix max SHM connections to allow up to 16 HFIs + - Use FI_HMEM_SYSTEM for Cuda-Managed (Unified) memory + - Handle FI_OPT_CUDA_API_PERMITTED + - Use contiguous send when only one iov present + - Always replay TID packets over SDMA + - Add Virtual Lane and Partition pkey (FI_OPX_SL and FI_OPX_PKEY) + - Forced AV type to be AV Map when requested AV is unsupported + - Reduce size of opx_shm_tx + - Add GPU support for RMA Atomic operations + - Add GPU support for RMA reads and writes + - Add HMEM debug counters + - Print debug counters upon receiving SIGUSR1 + - Fix multi-receive to work with contiguous rzv payload + - Initial support for GPU / FI_HMEM + - Limit multipacket eager implementation to tagged sends + - Read, verify and store some hfi chip attributes + - PSM3 + - Update provider to sync with IEFS 11.6.0.0.231 + - Fix some conditional build errors + - RSTREAM + - Removed. + - RXM + - Add option to auto detect hmem iface of user buffers + - SHM + - Manually align 8 byte fields in memory region + - Close device_fds for connected peers when the EP is closed + - Print shm name and error code when failed to open + - Mark send as completed when a message is discarded + - Don't close dmabuf-fd when a request is done + - Revert the smr_region fields adjustment + - Fix various coverity issues + - Add ep to cq ep list once in cq bind + - Add ofi_buf_alloc error handling + - Revert the smr_region fields adjustment + - Don't close dmabuf-fd when a request is done + - Mark send as completed when a message is discarded + - Print shm name and error code when failed to open + - Close device_fds for connected peers when the EP is closed + - SOCKETS + - fix compiler warnings and coverity issues + - UCX + - Fix incorrect enum value in FI_DBG() and FI_WARN() + - USNIC + - Turn off compiler warnings of possible string truncation + - Util + - Make ep_list_lock noop for FI_PROGRESS_CONTROL_UNIFIED + - Save control progress model to util_domain + - Set import monitor state to idle upon close + - Add name field to memory monitors + - memhooks: Fix a bug when calculating mprotect region + - Modify domain_attr based on FI_AV_AUTH_KEY + - Verbs + - Non-blocking EP creation + - Address cm_id resource leak in rdma_reject path + - Redirected error handle logic for dmabuf failure in verbs + - Added rocr dmabuf support under verbs + - Windows: Check error code from GetPrivateData + - Add missing lock to protect SRX + - Fix compiler warnings about out of boundary access + - Fabtests + - Fix various coverity issues + - General improvement and bug fixes + - Add multi_ep test + - Serialize the run of fi_cq_test + - Utilize `junitparser` module directly + - Add progress models to SHM/EFA fabtests + - Add option to change progress model + - efa/rnr_cq_read_err: poll cq when hitting EAGAIN + - Allow testing multi_ep with shared/non-shared cq and av + - Print warning for HMEM iface init failure + - efa: Add small tx_rx size test + - pytest: Make ssh connection error pattern less stringent + - Add new exclude file for io_uring tests + - Add rma_pingpong benchmark + - efa: Make 1G tests run faster + - pytests: add command line argument for dmabuf reg + - Bump Libfabric API version. + - Add option to support dmabuf MR + - Add dmabuf ops for cuda. + - Replace strtok with strtok_r + - Add new exclude file for io_uring tests + +------------------------------------------------------------------- +Mon Mar 25 07:45:03 UTC 2024 - Nicolas Morey + +- Update to 1.20.1 + - Core + - hmem/ze: Change the library name passed to dlopen + - hmem/ze: map device id to physical device + - hmem/ze: skip duplicate initialization + - hmem/ze: dynamically allocate device resources based on number of devices + - hmem/ze: fix hmem_ze_copy_engine variable look up + - hmem/ze: Increase ZE_MAX_DEVICES to 32 + - man: Fix typo in fi_getinfo man page + - Fix compiler warning when compiling with ICX + - man: Fix fi_rxm.7 and fi_collective.3 man pages + - man: Update EFA docs for FI_EFA_INTER_MIN_READ_WRITE_SIZE + - EFA + - efa_rdm_ep_record_tx_op_submitted() rm peer lookup + - Remove peer lookup from efa_rdm_pke_sendv() + - Make handshake response use txe + - test: Only close SHM if SHM peer is Created + - Handshake code allocs txe via efa util + - Initialize txe.rma_iov_count to 0 + - Switch fi_addr to efa_rdm_peer in trigger_handshake + - Downgrade EFA Endpoint Creation WARN to INFO + - Init srx_ctx before use + - Clean up generic_send path + - Pass in efa_rdm_ep to efa_rdm_msg_generic_recv() + - Make recv path slightly more efficient + - re-org rma write to avoid duplicate checks + - Add missing sync_memops call to writedata + - use peer pointer from txe in read, write and send + - Pass in peer pointer to txe + - Get rid of noop instruction from empty #define + - Remove noop memset + - Fix the ibv cq error handling. + - Don't do handshake for local read + - Fix a typo in configure.m4 + - Make runt_size aligned + - OPX + - Initialize cq error data size + - RXM + - Fix data error with FI_OFI_RXM_USE_RNDV_WRITE=1 + - SHM + - Fix coverity issue about resource leak + - Adjust the order of smr_region fields. + - Allocate peer device fds dynamically + - Util + - Fix coverity issue about missing lock + - Implement timeout in util_wait_yield_run() + - Fix bug in util_cq startup error case + - util_mem_hooks: add missing parantheses + - Verbs + - Windows: Resolve regression in user data retrieval + - Fabtests + - efa: Close ibv device after use + - efa: Get device MR limit from ibv_query_device + - efa: Add simple unexpected test to MR exhaustion test + - pytest: add a new ssh connection error pattern + +------------------------------------------------------------------- +Thu Feb 29 16:18:32 UTC 2024 - pgajdos@suse.com + +- Use %autosetup macro. Allows to eliminate the usage of deprecated + %patchN + +------------------------------------------------------------------- +Sun Nov 19 17:27:58 UTC 2023 - Nicolas Morey + +- Update to 1.20.0 (jsc#PED-5777, jsc#PED-5893, jsc#PED-5889) + - Core + - General bug fixes and code clean-up + - configure.ac: add extra check for 128 bit atomic support + - hmem/synapseai: Refine the error handling and warning + - Introduce FI_ENOMR + - hmem/cuda: fix a bug when calculating aligned size. + - Handle dmabuf for ofi_mr_cache* functions. + - Handle dmabuf flag in ofi_mr_attr_update + - Handle dmabuf for mr_map insert. + - man: Fix the description of virtual address when FI_MR_DMABUF is set + - man: Clarify the defition of FI_OPT_MIN_MULTI_RECV + - hmem/cuda: Add dmabuf fd ops functions + - include/ofi_atomic_queue: Properly align atomic values + - Define fi_av_set_user_id + - Support multiple auth keys per EP + - Simplify restricted-dl feature + - hmem: Only initalize synapseai if device exists + - Add "--enable-profile" option + - windows: Updated config.h + - Add environment variable for selective HMEM initialization + - Add restricted dlopen flag to configure options + - hmem: generalize the use of OFI_HMEM_DATA to non-cuda iface + - hmem: fail cuda_dev_register if gdrcopy is not enabled + - Add 1.7 ABI compat + - Define fi_domain_attr::max_ep_auth_key + - hmem: Add new op to hmem_ops for getting dmabuf fd + - hmem/cuda: Update cuda_gdrcopy_dev_register's signature + - mr_cache: Define ofi_mr_info::flags + - Add ABI compat for fi_cq_err_entry::src_addr + - Define fi_cq_err_entry::src_addr + - Add base_addr to fi_mr_dmabuf + - hmem: Set FI_HMEM_HOST_ALLOC for ze addr valid + - hmem: Support dev reg with FI_HMEM_ZE + - tostr: Added fi_tostr() for data type struct fi_cq_err_entry. + - hmem_ze: fix incorrect device id in copy function + - Introduce new profiling interface for low-level statistics + - hmem: Support dev reg with FI_HMEM_CUDA + - hmem: Support dev reg with FI_HMEM_ROCR + - hmem: Support dev reg with FI_HMEM_SYSTEM + - hmem: Define optimized HMEM memcpy APIs + - Implement memhooks atfork child handler + - hmem: Support ofi_hmem_get_base_addr with sys mem + - hmem: Add length field to ofi_hmem_get_base_addr + - mr_cache: Improve cache hit rate + - mr_cache: Purge dead regions in find + - mr_cache: Update find to remove invalid MR entries + - mr_cache: Update find with MM valid check + - Add direct support for dma-buf memory registration + - man/fi_tagged: Remove the peek for data ability + - indexer: Add byte idx abstraction + - Add missing FI_REMOTE_CQ_DATA for fi_inject_writedata + - Add configure flags for more sanitizers + - Fix fi_peer man page inconsistency + - include/fi_peer: Add cq_data to rx_entry, allow peer to modify on unexp + - Add XPMEM support + - EFA + - General bug fix and code clean-up + - Do not abort on all deprecated env vars + - Onboard fi_mr_dmabuf API in mem reg ops. + - Try registering cuda memory via dmabuf when checking p2p + - Introduce HAVE_EFA_DMABUF_MR macro in configure + - Add read nack protocol docs + - Receiver send NACK if runt read fails with ENOMR + - Sender switch to long CTS protocol if runt read fails with ENOMR + - Receiver send NACK if long read fails with ENOMR + - Update efa_rdm_rxe_map_remove to accept msg_id and addr + - Sender switch to long CTS protocol if long read fails with ENOMR + - Introduce new READ_NACK feature + - Use SHM's full inject size + - Add testing for small messages without inject + - Enable inject rdma write + - Use bounce buffer for 0 byte writes + - Onboard ofi_hmem_dev_register API + - Update cuda_gdrcopy_dev_register's signature + - Allocate pke_vec, recv_wr_vec, sge_vec from heap + - Close shm resource when it is disabled in ep + - Disable RUNTING for Neuron + - Move cuda-sync-memops from MR to EP + - Do not insert shm av inside efa progress engine + - Enable shm when FI_HMEM and FI_ATOMIC are requested + - Adjust posted receive size to pkt_size + - Do not create SHM peer when SHM is disabled + - Use correct threading model for shm + - Restrict RDMA read to compatible EFA devices + - Add EFA device version to handshake + - Add missing locks in efa_cntr_wait. + - Add writedata RNR fabtest + - Handle RNRs from RDMA writedata + - Check opt_len in efa_rdm_ep_getopt + - Use correct tx/rx op_flags for shm + - Hooks + - dmabuf: Initialize fd to supress compiler warning + - trace: Add log on FI_VAR_UNEXP_MSG_CNT when enabled. + - trace: Fixed trace log format on some attributes. + - OPX + - Fix compiler warnings + - PSM3 + - Fix compiler warnings + - Update provider to sync with IEFS 11.5.1.1.1 + - RXM + - Remove unused function + - Use gdrcopy in rma when emulating injection + - Use gdrcopy in eager send/recv + - Add hmem gdrcopy functions + - Remove unused dynamic rbuf support + - SHM + - General bug fixes and cleanup + - Add ofi_buf_alloc error handling + - Only copy header + msg on unexpected path + - Add FI_HMEM atomic support + - Add memory barrier before updating resp for atomic + - Add more error output + - Reduce atomic locking with ofi_mr_map_verify + - Only increment tx cntr when inject rma succeeded. + - Use peer cntr inc ops in smr_progress_cmd + - Allow for inject protocol to buffer more unexpected messages + - Change pending fs to bufpool to allow it to grow + - Add unexpected SAR buffering + - Use generic acronym for shm cap + - Move CMA to use the p2p infrastructure + - Add p2p abstraction + - Load DSA dependency dynamically + - Replace tx_lock with ep_lock + - Calculate comp vars when writing completion + - Move progress_sar above progress_cmd + - Rename SAR status enum to be more clear + - Make SAR protocol handle 0 byte transfer. + - Move selection logic to smr_select_proto() + - Sockets + - Fix compiler warnings + - Fix provider name and api version in returned fi_info struct + - TCP + - Add profiling interface support + - Pass through rdm_ep flags to msg eps + - Derive cq flags from op and msg flags + - Do not progress ep that is disconnected + - Set FI_MULTI_RECV for last completed RX slice + - Return an error if invalid sequence number received + - xnet_progress_rx() must only be called when connected + - Reset ep->rx_avail to 0 after RX queue is flushed + - Disable the EP if an error is detected for zero-copy + - Add debug tracking of transfer entries + - Negotiate support for rendezvous + - Add rendezvous protocol option + - Generalize xnet_send_ack + - Flatten protocol header definitions + - Remove unused dynamic rbuf support + - Define tcp specific protocol ops + - Remove unneeded and incorrect rx_entry init code + - UCX + - Add FI_HMEM support + - Initialize ep_flush to 1 + - Util + - General bug fixes + - memhooks: Fix a bug when calculating mprotect region + - Check the return value of ofi_genlock_init() + - Update checks for FI_AV_AUTH_KEY + - Define domain primary and secondary caps + - Add profiling util functions + - Update util_cq to support err_data + - Update ofi_cq_readerr to use new memcpy + - Update ofi_cq_err_memcpy to handle err_data + - Zero util cancel err entry + - Move FI_REMOTE/LOCAL_COMM to secondary caps + - Alter domain max_ep_auth_key + - Add domain checks for max_ep_auth_key + - Revert util_cntr->ep_list_lock to ofi_mutex + - Add NIC FID functions to ofi.h + - Add EP and domain auth key checking + - Add bounds checks to ibuf get + - Define dlist_first_entry_or_null + - Update util_getinfo to dup auth_key + - Revert util_av, util_cq and util_cntr to mutex + - Add missing calls to (de)initialize monitor's mutexes + - Avoid attempting to cleanup an uninitialized MR cache + - Rename ofi_mr_info fields + - Add rv64g support to memory hooks + - Verbs + - Windows: Check error code from GetPrivateData + - Add missing lock to protect SRX + - Add synapseai dmabuf mr support + - Bug fix for matching domain name with device name + - Windows: Fetch rejected connection data + - Add support for DMA-buf memory registration + - Windows: Fix use-after-free in case of failure in fi_listen + - Windows: Map ND request type to ibverbs opcode + - Fix memory leak when creating EQ with unsupported wait object + - Track ep state to prevent duplicate shutdown events + - Fabtests + - Update man page + - pytests/efa: onboard dmabuf argument for test_mr + - pytest: make do_dmabuf_reg_for_hmem an cmdline argument + - Bump Libfabric API version. + - mr_test: Add dmabuf support + - Introduce ft_get_dmabuf_from_iov + - unexpected_msg: Use ft_reg_mr to register memory + - pytest: Allow registering mr with dmabuf + - Add dmabuf support to ft_reg_mr + - Add dmabuf ops for cuda. + - Test max inject size + - Add FI_HMEM support to fi_rdm_rma_event and fi_rdm tests + - memcopy-xe: Fix data verification error for device buffer + - dmabuf-rdma: Increase the number of NICs that can be tested + - dmabuf-rdma: Remove redundant libze_ops definition + - fi-mr-reg-xe: Skip native dmabuf reg test for system memory + - Check if fi_info is returned correctly in case of FI_CONNREQ + - cq_data: relax CQ data validation to cq_data_size + - Add ZE host alloc function + - Use common device host buffer for check_buf + - hmem_ze: allocate one cq and cl on init + - fi-mr-reg-xe: Add testing for dmabuf registration + - scripts: use yaml safe_load + - macos: Fix build error with clang + - multinode: Use FI_DELIVERY_COMPLETE for 'barrier' + - Handle partial read scenario for fi_xe_rdmabw test For cross node tests + - pytest/efa: add cuda memory marker + - pytest/efa: Skip some configuration for unexp msg test on neuron. + - runfabtests.py: ignore error due to no tests are collected. + - pytest/efa: extend unexpected msg test range + - pytest/shm: extend unexpected msg test range + - pytest: Allow running shm fabtests in parallel + - unexpected_msg.c: Allow running the test with FI_DELIVERY_COMPLETE + - runfabtests.sh: run fi_unexpected_msg with data validation + - pytest/shm: Extend test_unexpected_message + - unexpected_msg: Make tx/rx_size large enough + - pytest/shm: Extend shm's rma bw test + - Update shm.exclude + +------------------------------------------------------------------- +Mon Sep 4 07:47:59 UTC 2023 - Nicolas Morey + +- Update to 1.19.0 + - Core + - General code cleanup and restructuring + - Add ofi_hmem_any_ipc_enabled() + - ofi_consume_iov allows 0-byte consume + - ofi_consume_iov consistency + - ofi_indexer: return error code when iterating + - getinfo: Add post filters for domain and fabric names + - Filter loopback device if iface is specified + - bsock: Fix error checking for -EAGAIN + - windows/osd: Remove unneeded check to silence coverity + - windows/osd: Move variable declaration to silence coverity + - Introduce gdrcopy awareness to hmem copy + - mr/cache: Fix fi_mr_info initialization + - hmem_cuda: remove gdrcopy from cuda hmem copy path + - iouring: Fix wrong indent in ofi_sockapi_accept_uring() + - Implement ofi_sockctx_uring_poll_add() + - hmem: introduce gdrcopy from/to cuda iov functions + - hmem: Deprecate `FI_HMEM_CUDA_ENABLE_XFER` + - hmem_cuda: Restrict CUDA IPC based on peer accessibility + - hmem_cuda: Log number of CUDA devices detected + - hmem_cuda: Refactor global variables + - tostr: Remove the extra dir "shared/" from "include/" and "src/" . + - hmem_ze: fix ZE is valid check + - hmem_rocr: fix offset calculation + - hmem_rocr: use ofi spinlock functions + - hmem_rocr: minor fixes + - hmem_neuron: convert warn to info for nrt_get_dmabuf_fd not found + - hmem_neuron: check existance of neuron devices during initialization + - tostr: Moved Windows functions in shared/ofi_str.c to windows/osd.h + - tostr: Add helper functions ofi_tostr_size() and ofi_tostr_count(). + - EFA + - Onboard Peer API, use shm provider as a peer provider + - Uses util SRX framework in shared receive procedures. + - Register shm MR with hmem_data, allow shm to use gdrcopy for cuda data movement + - Finish the refactor for rxr squash. + - Use rdma-core WR API for send requests + - Check optlen in getopt call + - Fix the rdma-read support check in RMA and MSG operations + - Optimize ep lock usage + - Use an internal fi_mr_attr for memory registration + - Hooks + - Init field in mr_attr to silence coverity + - Add profiling hook provider + - Rename cq hooking functions' names + - Added trace for resource creation operations + - OPX + - Initialize ofi_mr_info + - Fix dput credit check + - Only allocate replay buffer if psn is valid + - Support SHM Intra-node communication between single server HFI devices + - Fix incorrect packet size in packet header when sending CTS packet + - Added check to address Coverity scan defect + - Add multi-entry caching to TID rendezvous + - Fall back to default domain name for TID fabric + - Properly handle multiple IOVs in fi_opx_tsendmsg + - Fix OPX Rzv RTS receive operation SHM error (DAOS-related) + - Fix non-tagged sends may incorrectly set FI_TAGGED in send completions + - Add more info to reliability IOV buffer validation check + - Move dput packet build functions to new inline include + - Use fi_mr_attr in fi_opx_mr + - Disable Pre-NAKing by default, throttle until all outstanding replays ACK'd + - Fix reliability bug when NAKing the last PSN + - Update HeaderQ Register more frequently + - No rbuf_wrap needed for expected receive (TID) + - Fixes for Coverity scan issues + - Enhanced tag matching + - Tune expected recv for unaligned buffers + - Observability: Add finer logging granularity + - Reduce RTS immediate data and fix packet estimate for odd TID lengths + - Add additional sources for FI_OPX_UUID + - Peer + - Add cq_data to rx_entry, allow peer to modify on unexp + - Introduce peer cntr API + - Add foreach_unspec_addr API + - Add size as an input of the get_tag + - PSM3 + - Sync with IEFS 11.5.0.0.172 + - SHM + - Only poll IPC list when ROCR IPC is enabled + - Allow for SAR and inject protocol to buffer more unexpected messages + - Remove unused sar fields + - Make SAR protocol handle 0 byte transfer + - Load DSA dependency dynamically + - Change recv entry freestack into bufpool + - Remove shm signal + - Use util peer cntr implementation + - Make SHM default to domain level threading level + - Replace internal shared receive implementation with util_srx + - Lock entire progress loop + - Fix ROCR data coherency + - Add FI_LOCAL_COMM to shm attrs + - Handle empty freestack + - Fix bug in configure.m4 in atomics_happy assignment happy + - Add memory barrier before update resp->status for SAR + - Do not use inline/inject for read op + - Allow shm to use gdrcopy + - Refactor protocol selection code + - Init map fi addrs to FI_ADDR_NOTAVAIL + - TCP + - General code cleanups + - Restrict which EPs can be opened per domain + - Increase CM error debug output + - Avoid calling close() on an invalid socket after accept error + - Mark the EP as disconnected before flushing the queues + - Add assertion failures for xnet_{monitor,halt}_sock + - Disable ofi_dynpoll_wait() for non-blocking progress + - Move PEP pollin operations to io_uring + - Move EP poll operations to io_uring + - Early exit if ofi_bsock_flush() has operation in progress + - Implement pollin sockctx in bsock + - Add missing call to xnet_submit_uring() + - Add return error to xnet_update_pollflag() + - Remove the cancel sockctx from the EP structure + - Move io_uring cqe from the stack to progress struct + - Reduce stack size for epoll event array + - handle NULL av in xnet_freeall_conns() + - UCX + - Publish FI_LOCAL_COMM and FI_REMOTE_COMM capabilities + - Fix configure error with newer MOFED + - Fix segfault in unsignalled completions + - Util + - Add FI_PEER support to util counter + - Refactor the usage of cntrs + - Change util_ep to be a genlock + - Add util shared receive implementation + - Update log message for invalid AV type message + - Fix fi_mr_info initialization + - Add peer ID to MR cache + - Store hmem_data in ofi_mr_map + - Split the cq progress and reading entries in ofi_cq_readfrom + - Verbs + - Add event lock to EQ to serialize closing ep + - Remove saved_wc_list and use CQ directly + - Consolidate peer_mem and dmabuf support check + - Fix vrb_add_credits signature + - Introduce new progress engine structure + - Simplify (and correct) locking around progress operations + - General code restructuring + - Fabtests + - Fix reading addressing options + - Allow to change only the OOB address + - Allow to use FI_ADDR_STR with -F + - Fix bw buffer utilization + - Separate RX and RMA counters + - Fix tx counter with RMA + - Add FI_CONTEXT mode to rdm_cntr_pingpong + - Add HMEM support to fi_unexpected_msg test + - Fix array OOB during fabtest list parsing + - Enable shm tagged_peek test + - Fix windows build warnings + - Make tx_buf and rx_buf aligned to 64 bytes by default + - Fix windows build warnings for sscanf + - Use dummy ft_pin_core on macOS + - Fix some header includes + - sock_test: Do not use epoll if not available + - recv_cancel: initialize error entry + - Fix wrong size used to allocate tx_msg_buf + - unexpected: change defaults to support tcp + - unexpected: add unknown unexpected peer test + - Enable a list of arbitrary message sizes + - Enabled data validation for rma read & write + - bw_rma operates on distinct buffer offsets + - ft_post_rma issues reads from remote's tx_buf + - General code cleanup and restructuring + - rdm_tagged_peek: fix race condition synchronization + - Add FI_LOCAL_COMM/FI_REMOTE_COMM presence check to fi_getinfo_test + - Correct ft_exchange_keys in prefix-mode + - Make rdm_tagged_peek test more general + - Add unit test for fi_setopt + +------------------------------------------------------------------- +Mon Aug 7 16:54:07 UTC 2023 - Nicolas Morey + +- Drop support for obsolete TrueScale (bsc#1212146) + +------------------------------------------------------------------- +Mon Jul 3 16:15:56 UTC 2023 - Nicolas Morey + +- Update to 1.18.1 + - Core + - Fix build warning for ofi_dynpoll_get_fd + - EFA + - Handle 0-byte writes + - Apply byte_in_order_128_byte for all memory type + - Increase default shm_av_size to 256 + - Force handshake before selecting rtm for non-system ifaces. + - Only select readbase_rtm when both sides support rdma-read + - Bugfix for initializing SHM offload + - Correct CPPFLAGS during configure + - Make setopt support sendrecv aligned 128 bytes + - Make data size to be 128 byte multiples for in-order aligned send/recv + - prepare local read pkt entry for in-order aligned send/recv. + - Disable gdrcopy and cudamemcpy for in-order aligned recv. + - Increase the pad size in rxr_pkt_entry + - Make readcopy pkt pool 128 byte aligned + - Introduce alignment to support in order aligned ops + - Fix a bug when calling ibv_query_qp_data_in_order + - RMA operations will ensure FI_ATOMIC cap + - RMA operations will ensure FI_RMA cap + - Unittest atomics without FI_ATOMIC cap. + - Unittest RMA without FI_RMA cap. + - Refactor pkt_entry assignment in poll_ibv loop + - Fixes for RDMA Write and Writedata + - RXM + - Revert rxm util peer CQ support + - Fix credit size parameter for flow ctrl + - SHM + - Fix DSA enable + - Assert read op and inject proto are mutually exclusive + - Fix ROCR data coherency + - Add FI_LOCAL_COMM to shm attrs + - Signal peer when peer is out of resources + - Handle empty freestack + - Fix bug in configure.m4 in atomics_happy assignment happy + - Add memory barrier before update resp->status for SAR + - Fix resource leak reported by coverity + - Switch cmd_ctx pool from freestack to bufpool + - Add iface parameter to smr_select_proto + - TCP + - Fix spinning on fi_trywait() + - Handle truncation of active message + - Handle prefetched data after reporting ETRUNC error + - Progress all ep's on unexp_msg_list when posting recv + - Removed unused saved_msg::ep field to fix assert + - Continue receiving after truncation error + - Create function to allocate internal msg buffer + - Add runtime setting for max saved message size + - Increase default max_saved value + - Dynamically allocate large saved Rx buffers + - Separate the max inject and recv buf size + - Remove 1-line xnet_cq_add_progress function + - Changed default wait object to epoll + - Handle case where epoll isn't natively supported + - Hold domain lock while deregistering memory + - Rename DL package from libnet to libtcp + - UCX + - Align the provider version with the libfabric version + - Verbs + - Delay device initialization to when fi_getinfo is called + - Consolidate peer_mem and dmabuf support check + - verbs_nd: Init len to 0 for WCSGetProviderPath call + - verbs_nd: Verify CQs are valid in rdma_create_qp + - verbs_nd: Initialize ibv_wc fields + - verbs_nd: Release lock in network direct error paths + - Fix vrb_add_credits signature + - Fix credit size parameter for flow ctrl + - Recover RXM connection from verbs QP in error state + - Fabtests + - Add ze-dlopen functions to component tests + - Call cudaSetDevice() for selected device + - pytest/efa: Adjust get_efa_devices() + - pytest/common: Support parallel neuron test + - pytest/common: Use different cuda device for parallel cuda set + - efa: Test_flood_peer.py increase timeout + - pytest/efa: Test to flood peer during startup + - fi-rdmabw-xe: Add option to set maximum message size + - fi-rdmabw-xe: Add option to set batch size + +------------------------------------------------------------------- +Thu May 4 13:27:21 UTC 2023 - Frederic Crozat + +- Add _multibuild to define additional spec files as additional + flavors. + Eliminates the need for source package links in OBS. + +------------------------------------------------------------------- +Tue Apr 18 17:25:02 UTC 2023 - Nicolas Morey + +- Update to 1.18.0 + - Core + - rocr: fix offset calculation + - rocr: use ofi spinlock functions + - rocr: minor fixes + - neuron: convert warn to info for nrt_get_dmabuf_fd not found + - neuron: check existance of neuron devices during initialization + - neuron: Add support for neuron dma-buf + - ze: update ZE to support new driver index specification + - List variables read from config file + - Add switch to prefer system-config over environment + - Add basic system-config support for setting library variables + - Move peer provider defines into new header + - rocr: Support asynchronous memory copies + - rocr: Add support for ROCR IPC + - rocr: rename rocr data-structures + - synpaseai: return 0 for host_register and host_deregister + - fabric: Improve log level of provider mismatch + - cuda: Allow CUDA IPC when P2P disabled + - ze: add ZE command list pool to reuse command lists + - cuda: implement cuda_get_xfer_setting for non cuda build + - cuda: adjust FI_HMEM_CUDA_ENABLE_XFER behavior + - cuda.c: Add const to param to remove warning + - Add IFF_RUNNING check to indicate iface is up and running + - io_uring support enhancements + - EFA + - Implement CUDA support on instance types that do not support GPUDirect RDMA + - Implement fi_write using device's RDMA write capability + - Enrich error messages with debug and connection info + - Implement support for FI_OPT_EFA_USE_DEVICE_RDMA in fi_setopt + - Implement support for FI_OPT_CUDA_API_PERMITTED in fi_setopt + - Add support for neuron dma-buf + - Use gdrcopy to improve the intra-node CUDA communication performance for small messages + - Use shm provider's FI_AV_USER_ID support + - Fix bugs in efa provider’s shm info initialization procedure + - Hooks + - dmabuf_peer_mem: Handle IPC handle caching in L0 + - trace: Add trace log for CM operation APIs + - trace: Change tag in trace log to hex format + - trace: Enhance trace log for data transfer API calls + - trace: Add trace log for API fi_cq_readerr() + - trace: Add trace log for CQ operation APIs + - Add tracing hook provider + - Net + - Net provider optimizations have been integrated into the tcp provider. + - Net provider has been removed as a reported provider. + - OPX + - Fixes for Coverity scan issues + - Enhanced tag matching + - Tune expected recv for unaligned buffers + - Add finer logging granularity + - Reduce RTS immediate data and fix packet estimate for odd TID lengths + - Add additional sources for FI_OPX_UUID + - Exclude opx from build if missing needed defines + - Move some logs to optimized builds + - Fix build warnings for unused return code from posix_memalign + - Add reliability sanity check to detect when send buffer is illegally altered + - SDMA Completion workaround for driver cache invalidation race condition + - Fix replay payload pointer increment + - Handle completion counter across multiple writes in SDMA + - Cleanup pointers after free() + - Modify domain creation to handle soft cache errors + - Two biband performance improvements + - Fixes based on Coverity Scan related to auto progress patch + - Changed poll many argument to rx_caps instead of caps + - Resync with server configured for Multi-Engines (DAOS CART Self Tests) + - Remove import_monitor as ENOSYS case + - Address memory leaks reported on OFIWG issues page + - General code cleanup + - Add replays over SDMA + - Implement basic TID Cache + - Revert work_pending check change + - Fix use_immediate_blocks + - Restore state after replay packet is NULL + - Fix memory leak from early arrival packets + - Fix segfault in SHM operations from uninitialized value in atomic path + - Prevent SDMA work entries from being reused with outstanding replays + - Set runtime as default for OPX_AV + - Fix RTS replay immediate data + - Fix errors caught by the upstream libfabric Coverity Scan + - fi_getInfo - Support multiple HFI devices + - Support OFI_PORT and Contiguous endpoint addresses for CART & Mercury + - Add fi_opx_tid.h to Makefile.include + - Fix progress checks and default domain + - Revert is_intranode simplification. + - Don't inline handle_ud_ping function + - Allow atomic fetch ops to use SDMA for sufficiently large counts + - Cleaned up FI_LOG_LEVEL=warn output + - Cleaned up unused macros for FI_REMOTE_COMM and FI_LOCAL_COMM + - Reset default progress to FI_PROGRESS_MANUAL + - Fixed GCC 10 build error with Auto Progress + - Add support for FI_PROGRESS_AUTO + - Use max allowed packet size in SDMA path when expected TID is off + - Expected receive (TID) rendezvous + - RMA Read/Write operations over SDMA + - Remove origin_rs from cts and dput packet header + - Fix for hang in DAOS CART tests + - Use single IOV for bounce buffer in SDMA requests. + - Check for FI_MULTI_RECV with bitwise OR instead of AND + - Fix for intermittent intra-node deadlock hang (DAOS CART tests) + - Fix to RPC transport error failure (DAOS CART tests) + - Fix for context->buf set to NULL + - Fix bad asserts + - Ensure atomicity of atomic ops + - fi_opx_cq_poll_inline count and head check fix + - Fix intermittent intra-node hang causing RPC timeouts (DAOS CART tests) + - PSM3 + - Update provider to sync with IEFS 11.4.1.1.2 + - Fix warnings from build + - Add oneapi ZE support to OFI configure + - RXD + - Ignore error path in av_close return + - RXM + - Handle NULL av in rxm_freeall_conns() + - Implement the FI_OPT_CUDA_API_PERMITTED option + - Write "len" field for remote write + - Ignore error path domain_close return + - Free coll_pool on ep close + - Update rxm to use util_cq FI_PEER support functions + - Fix incorrect CQ completion field + - Rename srx to msg_srx + - Disable FI_SOURCE if not requested + - Memory leaks removed + - Set offload_coll_mask based on actual configuration + - Report on coll offload capabilities with OFI_OFFLOAD_PROV_ONLY + - Fabric setups collective offload fabric + - Create eq for collective offload provider + - Close collective providers ep when rxm_ep is closed + - Fix incorrect use of OFI_UNUSED() + - Rework collective support to use collective provider(s) + - SHM + - Fix potential deadlock in smr_generic_rma() + - smr_generic_rma() wwrite error completion with positive errno + - Update SHM to use ROCR + - Fix incorrect discard call when cleaning up unexpected queues + - Separate smr_generic_msg into msg and tagged recv + - Fix start_msg call + - Implement the FI_OPT_CUDA_API_PERMITTED option + - Assert not valid atomic op + - Fix a bug in smr_av_insert + - Optimize locking on the SAR path + - Remove unneeded sar_cnt + - Optimize locking + - Enable multiple GPU/interface support + - Remove HMEM specific calls from atomic path + - Use util_cq FI_PEER support + - Import shm as device host memory + - Add HMEM flag to smr region + - Fix user_id support + - Write tx err comp to correct cq + - Fix index when setting FI_ADDR_USER_ID + - TCP + - Provider source has been replaced by net provider source + - Removed incorrect reporting of support for FI_ATOMIC + - Do not save unmatched messages until we have the peer's fi_addr + - Use internal flag for FI_CLAIM messages, versus a reserved tag bit + - Fix updating error counter when discarding saved messages + - Allow saved messages to be received after the underlying ep has been closed + - Enhanced debug logging in connection path + - Force CM progress on unconnected ep's when posting data transfers + - Support connect and accept calls with io_uring + - Fix segfault accessing an invalid fi_addr + - Add io_uring support for CM message exchange + - Move CM progress from fabric to EQ to improve multi-threaded performance + - Fix small memory leak destroying an EQ + - Fix race where same rx entry could be freed twice + - Handle NULL av in rdm ep cleanup + - Reduce stack use for epoll event array + - UCX + - New provider targeting Nvidia fabrics that layers over libucp + - Util + - Fix the behavior of cq_read for FI_PEER + - rocr: Fix compilation issue + - cuda: Use correct debug string calls + - Free cq->peer_cq on close + - Remove extra new line from av insert log + - Check for count = 0 in ofi_ip_av_insert + - rocr: Add support for ROCR IPC + - Add FI_PEER support to util_cq + - Disable FI_SOURCE if not requested + - Remove FID events from the EQ when closing endpoint + - Rework collective support to be a peer collective provider(s) + - Allow FI_PEER to pass CQ, EQ and AV attr checking + - Remove annoying WARNING message for FI_AFFINITY + - Add utility collective provider + - Verbs + - Implement the FI_OPT_CUDA_API_PERMITTED option + - Add support for ROCR IPC + - Fabtests + - Add fi_setopt_test unit test + - Update ze device registration calls + - fi-rdmabw-xe: Always use host buffer for synchronization + - Fix bug in posting RMA operation + - fi_cq_data: Extend test to fi_writedata + - fi_cq_data: Extend validation of completion data + - Rename fi_msg_inject tests to fi_inject_test to reflect its use + - fi_rdm_stress: Add count option to json key/pair options + - Add and fix OOB option handling in several tests + - fi_eq_test: Fix incorrect return value + - fi_rdm_multi_client: Increase the size of ep name buffer + - Add FI_MR_RAW to default mr_mode + - Support larger control messages needed by newer providers + - fi-rdmabw-xe: Update to work with the ucx provider + - fi_ubertest: Cleanup allocations in failure cases + - Change ft_reg_mr to not assume hmem iface & device + - fi_multinode: Bugfix multinode test for ze + verbs + - fi_multinode: Remove unused validation print + - fi_multinode: Skip tests for unsupported collective operations + - fi_ubertest: Fix data validation with device memory + - fi_peek_tagged: Restructure and expand test + +------------------------------------------------------------------- +Mon Mar 20 09:03:29 UTC 2023 - Nicolas Morey + +- Update to 1.17.1 + - Core + - hmem_cuda Add const to param to remove warning + - Fix typos in fi_ext.h + - ofi_epoll: Remove unused hot_index struct member + - EFA + - Print local/peer addresses for RX write errors + - Unit test to verify no copy with shm for small host message + - Avoid unnecessary copy when sending data from shm + - Compare pci bus id in hints + - Fix double free in rxr endpoint init + - Hooks + - dmabuf_peer_mem: Handle IPC handle caching in L0 + - OPX + - Exclude from build if missing needed defines + - Move some logs to optimized builds + - Fix build warnings for unused return code from posix_memalign + - Add reliability sanity check to detect when send buffer is illegally altered + - SDMA Completion workaround for driver cache invalidation race condition + - Fix replay payload pointer increment + - Handle completion counter across multiple writes in SDMA + - Cleanup pointers after free() + - Modify domain creation to handle soft cache errors + - Two biband performance improvements + - Fixes based on Coverity Scan related to auto progress patch + - Changed poll many argument to rx_caps instead of caps + - Resynch with server configured for Multi-Engines (DAOS CART Self Tests) + - Remove import_monitor as ENOSYS case + - Address memory leaks reported on OFIWG issues page + - Remove unused fields + - Fix unwanted print statement case + - Add replays over SDMA + - Implement basic TID Cache + - Revert work_pending check change + - Fix use_immediate_blocks + - Restore state after replay packet is NULL + - Fix memory leak from early arrival packets. + - Fix segfault in SHM operations from uninitialized value in atomic path. + - Prevent SDMA work entries from being reused with outstanding + replays pointing to bounce buf. + - Set runtime as default for OPX_AV + - Fix RTS replay immediate data + - Fix errors caught by the upstream libfabric Coverity Scan + - Support multiple HFI devices + - Support OFI_PORT and Contiguous endpoint addresses + - Update man pages + - Util + - util_cq: Remove annoying WARNING message for FI_AFFINITY + +------------------------------------------------------------------- +Mon Dec 19 08:39:57 UTC 2022 - Nicolas Morey + +- Update to 1.17.0 + - Core + - Add IFF_RUNNING check to indicate iface is up and running + - General code cleanups + - Add abstraction for common io_uring operations + - Support ROCR get_base_addr + - Add a 'flags' parameter to fi_barrier() + - Introduce new calls for opening domain and endpoint with flags + - Add ability to re-sort the fi_info list + - Allowing layering of rxm over net provider + - General cleanup of provider filtering functions + - Add io_uring operations to be used by sockapi + - Modify internal handling of async socket operations + - Sockets operations are moved to a common sockapi abstraction + - Add support for Ze host register/unregister + - Add new offload provider type + - Rename fi_prov_context and simplify its use + - Convert interface prefix string checks to exact checks + - EFA + - Code cleanups and various bug fixes + - Improved debug logging and warnings and assertions + - Do not ignore hints->domain_attr->name + - Fix the calculation of REQ header size for a packet entry + - Fix default value for host memory's max_medium_msg_size + - Add tracepoints to send/recv/read ops + - Simplified emulated read protocol + - Set use_device_rdma according to efa device id + - Fix shm initialization path on error + - Fix Implementation of FI_EFA_INTER_MIN_READ_MESSAGE_SIZE + - Do not enable rdma_read if rxr_env.use_device_rdma is false + - Remove de-allocated CUDA memory region during registration + - Fix the error handling path of efa_mr_reg_impl() + - Fix rxr_ep unit tests involving ibv_cq_ex + - Add check of rdma-read capability for synapseai + - Report correct default for runt_size parameter + - Toggle cuda sync memops via environment variable. + - Net + - Continued fork of tcp provider, will eventually merge changes back + - Fix inject support + - Fix memory leak in peek/claim path + - General code cleanups and bug fixes from initial fork + - Allow looking ahead in tcp stream to handle out-of-order messages + - Add message tracing ability + - Fetch correct ep when posting to a loopback connection + - Release lock in case of error in rdm_close + - Fix error path in xnet_enable_rdm + - Add missing progress lock in srx cleanup + - Code restructuring and enhancements with longer term goal of supporting io_uring + - Disable the progress thread in most situations + - Rename DL from libxnet-fi to libnet-fi + - Add missing initialization calls for DL provider + - Add support for FI_PEEK, FI_CLAIM, and FI_DISCARD + - Include source address with CQ entry + - Fix support for FI_MULTI_RECV + - OPX + - Bug fixes and general code cleanup + - Fix progress checks and default domain + - Allow atomic fetch ops to use SDMA for sufficiently large counts + - Cleaned up FI_LOG_LEVEL=warn output + - Reset default progress to FI_PROGRESS_MANUAL + - Fixed GCC 10 build error with Auto Progress + - Add support for FI_PROGRESS_AUTO + - Use max allowed packet size in SDMA path when expected TID is turned off + - Expected receive (TID) rendezvous + - RMA Read/Write operations over SDMA + - Remove origin_rs from cts and dput packet header. + - Fix for hang - unable to match inbound packets with receive + context->src_addr (DAOS CART tests) + - Use single IOV for bounce buffer in SDMA requests. + - Check for FI_MULTI_RECV with bitwise OR instead of AND + - Fix for intermittent intra-node deadlock hang (DAOS CART tests) + - Fix to RPC transport error failure (DAOS CART tests) + - Fix for context->buf set to NULL + - Fix bad asserts + - Ensure atomicity of atomic ops + - fi_opx_cq_poll_inline count and head check fix + - Fix intermittent intra-node hang causing RPC timeouts (DAOS CART tests) + - Temporarily reduce SDMA queue ring size for possible driver bug workaround + - Fix alignment issue and asserts + - Enable more parallel SDMA operations + - PSM3 + - Synced to IEFS 11.4.0.0.198 + - Tech Preview Ubuntu 22.04 Support + - Tech Preview Intel DSA Support + - Improved Intel GPU Support + - Various performance improvements + - Various bug fixes + - RxM + - Always use rendezvous protocol for ZE device memory send + - Code cleanup + - Add option to free resources on AV removal + - SHM + - Fix user_id support + - Write tx err comp to correct cq + - Fix index when setting FI_ADDR_USER_ID + - Remove extraneous ofi_cirque_next() call + - Add support for FI_AV_USER_ID + - Fix multi_recv messaging + - General code restructuring for maintainability + - Implement shared completion queues + - Decouple error processing from cq completion path to avoid switch + - Fix incorrect op passed into recv cancel operation + - Enhanced SHM implementation with DSA offload + - Use multiple SAR buffers per copy operation + - Fix ZE IPC race condition on startup + - TCP + - Minor updates in preparation for io_uring support (via net provider) + - Util + - Add option to free resources on AV removal + - Add 'flags' parameter to new fi_barrier2() call + - Add debugging in ofi_mr_map_verify + - Rename internal bitmask struct to include ofi prefix + - Verbs + - Add option to disable dmabuf support + - FI_SOCKADDR includes support of FI_SOCKADDR_IB + - Fabtests + - shared: Expand hmem support + - fi_loopback: Add support for tagged messages + - fi_mr_test: add support of hmem + - fi_rdm_atomic: Fix hmem support + - fi_rdm_tagged_peek: Read messages in order, code cleanup and fixes + - fi_multinode: Add performance and runtime control options, cleanups + - benchmarks: Add data verification to some bw tests + - fi_multi_recv: Fix possible crash in cleanup +- Drop prov-net-fix-error-path-in-xnet_enable_rdm.patch which was merged upstream. + +------------------------------------------------------------------- +Tue Nov 8 11:46:56 UTC 2022 - Nicolas Morey-Chaisemartin + +- Add prov-net-fix-error-path-in-xnet_enable_rdm.patch to fix a deadlock + when no network interfaces are available (bsc#1205139) + +------------------------------------------------------------------- +Mon Oct 10 06:47:42 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update to 1.16.1 + - Core + - Fix windows implementation to remove fd from poll set + - PSM3 + - Add missing files to release tarball + - Util + - Handle NULL address insertion to fi_av_insert +- Drop prov-rxm-Disable-128-bit-atomics.patch which was merged upstream + +------------------------------------------------------------------- +Thu Oct 6 16:06:29 UTC 2022 - Nicolas Morey-Chaisemartin + +- Add prov-rxm-Disable-128-bit-atomics.patch to fix a potential + segfault on misaligned buffers. + +------------------------------------------------------------------- +Fri Sep 30 11:44:45 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update to 1.16.0 (jsc#PED-351, jsc#PED-190) + - Core + - Added HMEM IPC cache + - Use exact string comparison checks for network interfaces + - Restructuring of poll/epoll abstraction + - Add ability to disable locks completely in debug builds + - Serialize access to modifying the logging calls + - Minor fixes to fi_tostr text formatting + - Add hmem interface checks to memory registration + - EFA + - Added support of Synapse AI memory. + - Improved error message + - Net + - Temporarily forked, optimized version of tcp provider + - Focused on improved performance and scalability over tcp sockets + - Fork ensures tcp provider stability while net provider is developed + - Shares the tcp provider protocol and base implementation for msg endpoints + - Integrates direct support for rdm endpoints, using a derivative from rxm + - Implements own protocol for rdm endpoints, separate from rxm;tcp + - OPX + - Added initial support for SDMA + - General performance enhancements + - Performance improvements to reliability protocol + - Improved deferred work pending complete + - Added support for OPX_AV=runtime + - Support iov memory registration ops + - Added DAOS RPC support + - Atomic ops enhancements + - Improved documentation + - Debug build enhancements + - Fixed compiler warnings + - Reduced time to compile prov/opx code + - General bug fixes + - Fixed PSN wrapping scaling + - Added intranode fence + - Addressed bugs discovered by coverity scan + - PSM2 + - Fix sending CQ data in some instances of fi_tsendmsg + - PSM3 + - Updated to match Intel Ethernet Fabric Suite (IEFS) 11.3 release + - RxM + - Update to read multiple completions at once from msg provider + - Move RxM AV implementation to util code to share with net provider + - Minor code cleanups + - SHM + - Implement and use ipc_cache + - Add log messages for debugging and error tracking + - Fix check for FI_MR_HMEM mr_mode + - Move shm signal handlers initialization to EP + - Added log messages for errors detected + - TCP + - Fix incorrect signaling of the CQ + - Increase max number of poll events to retrieve + - Acquire ep lock prior to flushing socket in shutdown + - Verify ep state prior to progressing socket data + - Read cm error data when receiving connreq response + - Log error on connect failure + - Fix assertion failure in CQ progress function + - Util + - Fix text in log of UFFD ioctl failure + - Introduce cuda ipc monitor + - Fix CQ memory leak handling overflow + - Fix MR mode bit check for ver 1.5 and greater + - Add max_array_size to track/check array overflow + - Always progress transfers when reading from a CQ + - Handle NULL address insertion + - Try IPv4 before IPv6 addresses when starting name server + - Fix IP util av default address length + - Fix util IP getinfo path to read hints->addr_format + - Fix debug print mismatch + - Fix return code when memory allocation fails. + - Fix build sign warning in ofi_bufpool_region_alloc + - Minor code cleanups + - Print warning if an addr is inserted into an AV again + - Verbs + - Fix support of FI_SOCKADDR_IB when requested by the application + - Ensure all posted receives are flushed to the application + - Update ofi_mr_cache_search API for hmem IPC support + - Reduce logging verbosity for "no active ports" + - Fix incorrect length used in memory registration + - Various minor bug fixes for test failures + - Fix a memory leak getting IB address + - Implement verbs provider on Windows over NetworkDirect API + - Set and check address format correctly + - Only close qp if it was initialized + - Portable detection of loopback device + - Fabtests + - multi_ep: Separate EP resources and fix MR registration + - multi_recv: Fix possible crash and check for valid buffer + - unexpected_msg: Fix printf compiler warning + - dgram_pingpong.c: Use out-of-band sync + - multinode: Make multinode tests platform agnostic, fix formatting + - ubertest: Fix string comparison to include length, fix writedata completion check + - av_test: add support for -e + - New tests: + - dmabuf-rdma: Component level test for dma-buf RDMA + - sock_test: Component level performance test of poll, epoll, and select + - rdm_stress: Multi-threaded, multi-process stress test for RDM endpoints + - sighandler_test: Regression test for signal handler restoration +- Drop patches fixed upstream: + - prov-opx-Correctly-disable-OPX-if-unsupported.patch + - disable-flatten-attr.patch + +------------------------------------------------------------------- +Mon Aug 1 20:01:18 UTC 2022 - Martin Liška + +- Add disable-flatten-attr.patch that drops flatten attribute. + Note the flatten attribute results in huge compile time hog + in inliner (same the binary size would be huge). +- Use %make_build and enable LTO (boo#1133235). +- Synchronize used Patches. + +------------------------------------------------------------------- +Thu Jun 23 10:36:09 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update to 1.15.1 + - Core + - Fix fi_info indentation error in fi_tostr + - hmem_ze: Add runtime option to choose specific copy engine + - Cleanup of configure HMEM checks + - Fixed stringop-truncation in ofi_ifaddr_get_speed + - Add utility provider log suffix to make logs easier to read + - Fix truncation of ipv6 addressing + - hmem: add support for AWS Trainium devices + - Fix potential sscanf overflows + - hmem: pass through device and flags when querying memory interface + - Rework locking in several areas to convert spinlocks to mutexes + - Add new locking abstractions to select lock types at runtime + - Add new FI_PROTO_RXM_TCP for optimized rxm over tcp path + - Fix windows implementation to remove fd from poll set + - EFA + - Added windows support through efawin (https://github.com/aws/efawin) + - Added support of AWS neuron. + - Added support of using gdrcopy to copy data from host to device. + - Fixed a bug that cause 0 byte read to fail. + - Fixed a memory corruption issue that can caused forked process to crash. + - Extended testing coverage through new pytest based testing framework. + - HOOKS + - Add new hooking provider dmabuf_peer_mem + - Enable DL build of hooking providers + - Add HMEM memory registration hook + - OPX + - New provider supporting Cornelis Networks Omni-path hardware + - PSM3 + - Updated psm3 to match IEFS 11.2.0.0 release + - Added support for sockets (TCP/UDP) via a runtime selectable Hardware + Abstraction Layer (HAL) + - Added support for IPv6 addressing in RoCE and sockets + - Added various NIC selection filtering options (wildcarded NIC name, + address format, wildcarded IP subnet, link speed) + - Performance tuning in conjunction with OneAPI and OneCCL + - Improved PSM3_IDENTIFY output + - Rename most internal symbols to psm3_ + - Corrected vulnerabilities found during Coverity scans + - configure options refined and help text improved + - PSM3_MULTI_EP has been deprecated (recommend always enabled, default + is enabled [same default as previous releases]) + - Various bug fixes + - RxM + - Add check that atomic size is valid + - Add support to passthru calls to tcp provider in specific + - TCP + - Add assert to verify RMA source/target msg sizes match + - Wake-up threads blocked on CQ to update their poll events + - Fix use of incorrect events in progress handler + - Fixes for various compile warnings, mostly on Windows + - Add support for FI_RMA_EVENT capability + - Add support for completion counters + - Fix check for CQ data in tagged messages + - Add cancel support to shared rx context + - Add src_addr receive buffer matching + - Add provider control to assign a src_addr with an ep + - Handle trecv with FI_PEEK flag + - Allow binding a CQ with an SRX + - Restructuring of code in source files + - Handle EWOULDBLOCK returned by send call + - Add hot (active) pollfd + - SHM + - Properly chain the original signal handlers + - Avoid uninitialized variable with invalid atomic parameters + - Fix 0 byte SAR read + - Initialize len parameter to accept + - Refactor and simplify protocol code + - Remove broken support for 128-bit atomics + - Fix FI_INJECT flag support + - Add assert to verify RMA source/target msg sizes match + - Set domain threading to thread safe + - Fix possible use of uninitiated var in av_insert + - Util + - Fix sign warning in ofi_bufpool_region_alloc + - Remove unused variable from ofi_bufpool_destroy + - Fix check for valid datatype in ofi_atomic_valid + - Return with error if util_coll_sched_copy fails + - Fix use of uninitialized variable in ofi_ep_allreduce + - Fix memory access in ip_av_insertsym + - Track ep per collective operation not with multicast + - Restructure collective av set creation/destruction + - Change most locks from spin locks to mutexes + - Allow selection of spinlocks for CQ and domain objects + - Fix AV default addrlen + - Update fi_getinfo checks to include hints->addr_ + - Handle NULL address insertion to fi_av_insert + - Verbs + - Initial changes for compiling on Windows (via NetworkDirect) + - Add a failover path to dma-buf based memory registration + - Replace use of spin locks with mutexes + - Check for valid qp prior to cleanup + - Set and check for address format correct in fi_getinfo + - Fabtests + - hmem_cuda: used device allocated host buff to fill device buf + - Add python scripts to control test execution + - test_configs: include util provider in core config file + - Add option "--pin-core" + - Only call nrt_init once + - Fix a bug in ft_neuron_cleanup + - Correct help for unit test programs + - Remove duplicate help prints from fi_mcast + - configure.ac: fix --enable-debug=no not properly detected + - msg_inject: handle the case ft_tsendmsg return -FI_EAGAIN + - Add AWS Trainium device support + - fi_inj_complete: Add FI_INJECT to fabtests + - inj_complete.c: Make arguments align with the other tests + - dgram_pingpong: handle the error return of fi_recv + - recv_cancel: Remove requirement for unexpected msg handling + - poll: Fix crash if unable to allocate pollset + - ubertest: Add GPU testing and validation support + - Add HMEM options parsing support + - Update and re-enable fi_multi_ep test +- Add prov-opx-Correctly-disable-OPX-if-unsupported.patch to disable + OPX compilation on non x86_64 systems + +------------------------------------------------------------------- +Tue Apr 19 07:27:42 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update to 1.14.1 + - Core + - Use non-shared memory allocations to use MADV_DONTFORK safely + - Fix incorrect use of gdr_copy_from_mapping + - Ensure proper timeout time for pollfds to avoid early exit + - EFA + - Handle read completion properly for multi_recv + - Use shm's inject write when possible + - Support 0 byte read + - RxM + - Ensure signaling the CQ fd after writing completion + - Fix inject path for sending tagged messages with cq data + - Negotiate credit based flow control support over CM + - Add PID to CM messages to detect stale vs duplicate connections + - Fix race handling unexpected messages from unknown peers + - Fix possible leak of stack data in cm_accept + - Restrict reported caps based on core provider + - Delay starting listen until endpoint fully initialized + - Verify valid atomic size + - Sockets + - Fix coverity reports on uninitialized data + - Check for NULL pointers passed to memcpy + - Add missing error return code from sock_ep_enable + - TCP + - Fix performance regression resulting from sparse pollfd sets + - Fix assertion failure in CQ progress function + - Do not generate error completions for inject msgs + - Fix use of incorrect event names in progress handler + - Fix check for CQ data in tagged messages + - Make start_op array a static to reduce memory + - Wake-up threads blocked on CQ to update their poll events + - Verbs + - Generate error completions for all failed transmits + - Set all fields in the fi_fabric_attr for FI_CONNREQ events + - Set proper completion flags for all failed transfer + - Ensure that all attributes are provided when opening an endpoint + - Fix error handling in vrb_eq_read + - Fix memory leak in error case in vrb_get_sib + - Work-around bug in verbs HW not reported correct send opcodes + - Only call ibv_reg_dmabuf_mr when kernel support exists + - Add a failover path to dma-buf based memory registration + - Negotiate credit based flow control support over CM + +------------------------------------------------------------------- +Mon Nov 22 07:57:54 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.14.0 + - Add time stamps to log messages + - Fix gdrcopy calculation of memory region size when aligned + - Allow user to disable use of p2p transfers + - Update fi_tostr print FI_SHARED_CONTEXT text instead of value + - Update fi_tostr to output field names matching header file names + - Fix narrow race condition in ofi_init + - Add new fi_log_sparse API to rate limit repeated log output + - Define memory registration for buffers used for collective operations + - EFA, SHM, TCP, RXM, and verbs fixes + +------------------------------------------------------------------- +Wed Nov 3 07:53:20 UTC 2021 - Nicolas Morey-Chaisemartin + +- Enable PSM3 provider (jsc#SLE-18754) + +------------------------------------------------------------------- +Fri Oct 29 11:13:43 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.13.2 + - Sort DL providers to ensure consistent load ordering + - Update hooking providers to handle fi_open_ops calls to avoid crashes + - Replace cassert with assert.h to avoid C++ headers in C code + - Enhance serialization for memory monitors to handle external monitors + - EFA, SHM, TCP, RxM and vers fixes + +------------------------------------------------------------------- +Wed Aug 25 07:41:46 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.13.1 + - Enable loading ZE library with dlopen() + - Add IPv6 support to fi_pingpong + - EFA, PSM3 and SHM fixes + +------------------------------------------------------------------- +Wed Jul 7 11:13:26 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.13.0 + - Fix behavior of fi_param_get parsing an invalid boolean value + - Add new APIs to open, export, and import specialized fid's + - Define ability to import a monitor into the registration cache + - Add API support for INT128/UINT128 atomics + - Fix incorrect check for provider name in getinfo filtering path + - Allow core providers to return default attributes which are lower then + maximum supported attributes in getinfo call + - Add option prefer external providers (in order discovered) over internal + providers, regardless of provider version + - Separate Ze (level-0) and DRM dependencies + - Always maintain a list of all discovered providers + - Fix incorrect CUDA warnings + - Fix bug in cuda init/cleanup checking for gdrcopy support + - Shift order providers are called from in fi_getinfo, move psm2 ahead of + psm3 and efa ahead of psmX + - See NEWS.md for changelog + +------------------------------------------------------------------- +Fri Apr 2 07:30:34 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.12.1 + - Fix initialization checks for CUDA HMEM support + - Fail if a memory monitor is requested but not available + - Adjust priority of psm3 provider to prefer HW specific providers, + such as efa and psm2 + - EFA and PSM3 fixes + - See NEWS.md for changelog + +------------------------------------------------------------------- +Tue Mar 9 08:43:43 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to 1.12.0 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Wed Dec 16 08:29:07 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.11.2 (bsc#1181983) + - See NEWS.md for changelog + +------------------------------------------------------------------- +Mon Oct 12 10:40:29 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.11.1 (jsc#SLE-13312) + - See NEWS.md for changelog +------------------------------------------------------------------- +Tue Aug 18 08:12:27 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.11.0 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Thu May 14 08:59:09 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.10.1 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Mon Apr 27 13:04:26 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.10.0 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Thu Mar 19 08:29:38 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to 1.9.1 (bsc#1160275) + - See NEWS.md for changelog + +------------------------------------------------------------------- +Mon Nov 25 09:39:53 UTC 2019 - Nicolas Morey-Chaisemartin + +- Update to 1.9.0 (jsc#SLE-8257) + - See NEWS.md for changelog + +------------------------------------------------------------------- +Tue Oct 1 05:57:27 UTC 2019 - Nicolas Morey-Chaisemartin + +- Update to 1.8.1 (jsc#SLE-8257) + - See NEWS.md for changelog + +------------------------------------------------------------------- +Fri Sep 6 07:10:57 UTC 2019 - Nicolas Morey-Chaisemartin + +- Update to 1.8.0 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Wed Apr 24 17:13:07 UTC 2019 - Martin Liška + +- Disable LTO (boo#1133235). + +------------------------------------------------------------------- +Tue Apr 9 06:46:41 UTC 2019 - Nicolas Morey-Chaisemartin + +- Update to 1.7.1 + - See NEWS.md for changelog + +------------------------------------------------------------------- +Mon Feb 11 10:34:29 UTC 2019 - Jan Engelhardt + +- Remove silly Prefix: value, we do not support that in SUSE anyway. +- Update summaries, make use of %make_install. + +------------------------------------------------------------------- +Thu Feb 7 07:24:21 UTC 2019 - nmoreychaisemartin@suse.com + +- Update to v1.7.0 + - fabtests and libfabric repos have been merged upstream + - Core + - Add ability to report NIC details with fi_info data + - Improve MR cache notification mechanisms + - Set sockaddr address format correctly + - Avoid possible null dereference in eq_read + - Handle FI_PEEK in CQ/EQ readerr + - Add debug messages to name server + - Feature and performance enhancements added to internal buffer pool + - Add support for huge pages + - Decrease memory use for idle buffer pools + - Refactor utility AV functionality + - Generic counter support enhancements + - Optimize EP and CQ locking based on application threading level + - Enhance common support for EQ error handling + - Add free/alloc memory notification hooks for MR cache support + - Fix memory monitor unsubscribe handling + - Add CQ fd wait support + - Add CQ overflow protection + - Enhance IPv6 addressing support for AVs + - Enhancements to support for AV address lookup + - Fixes for emulated epoll support + - Allow layering of multiple utility providers + - Minor bug fixes and optimization + - Hook + - Improved hooking infrastructure + - Add support for installing multiple hooks + - Support hooks provided by external libraries. + - GNI + - Fix CQ readfrom overwriting src_addr in case of multiple events + - Signal wait set if error entry is added to CQ + - Fix state data issue with SMSG buffers + - Enhance and fix possible misuse of default authorization key + - Add cancel support for SEP + - Rework SEP setup + - Suppress huge page counting for ARM + - Fix incorrect check of FI_SYNC_ERR flag + - PSM2 + - Requires PSM2 library version 10.2.260 or later + - Clean up connection state in fi_av_remove + - Use psm2_info_query to read HFI device info + - Clean up CQ/counter poll list when endpoint is closed + - Support shared address vector + - Optimize CQ event conversion with psm2_mq_ipeek_dequeue_multi + - Lock optimization for FI_THREAD_DOMAIN + - Use new PSM2 fast path isend/irecv functions for large size RMA + - Support building with latest PSM2 source code (version 11.2.68) + - Support fabric direct + - RxD + - Initial release of RxD provider + - Provides reliable datagram semantics over unreliable datagram EPs + - Target is to improve scalability for very large clusters relative to RxM + - RxM + - Decrease memory use needed to maintain large number of connections + - Set correct op_context and flags on CQ error completions + - Fix file descriptor memory leaks + - Introduce new protocol optimized for medium message transfers + - Improve Rx software performance path + - Use shared receive contexts if required by underlying provider + - Handle addresses inserted multiple times into AV (for AV map) + - Performance optimizations for single-thread applications + - Rework deferred transmit processing + - Separate and optimize eager and rendezvous protocol processing. + - Fix passing incorrect addresses for AV insert/remove + - Fix CM address handling + - Fix race condition accessing connection handles + - Simplify small RMA code path + - Increment correct counter when processing FI_READ events + - Dynamically grow the number of connections that can be supported + - Fix padding in wire protocol structures + - Report correct fi_addr when FI_SOURCE is requested + - Fix truncating rendezvous messages + - Fix use after free error in Rx buffer processing + - Add support for manual progress + - Make Tx/Rx queue sizes independent of MSG EP sizes + - Decrease time needed to repost buffers to the MSG EP Rx queue. + - Miscellaneous bug fixes + - Sockets + - Enable MSG EPs when user calls fi_accept + - Fix fabric names to be underlying IP address + - Add connection timeout environment variable. + - Use size of addresses, not structures + - Add debug messages to display selected addresses + - Use loopback address in place of localhost + - Simplify listen paths + - Add support for IPv6 + - Code restructuring + - Avoid unneeded address to string to address translations + - Check length of iovec entries prior to access buffers + - Fix segfault + - Avoid acquiring nested spinlocks resulting in hangs + - Fix use after free error in triggered op handling + - New connection manager for MSG EPs to reduce number of threads + - Avoid retrying recv operations if connection has been broken + - Fixes for Windows socket support + - TCP + - Initial release of optimized socket based tcp provider + - Supports MSG EPs, to be used in conjunction with RxM provider + - Targets eventual replacement of sockets provider + - Verbs + - Remove RDM EP support. Use RxM and RxD for RDM EPs. + - Improve address handling and report in fi_getinfo + - Handle FI_PEER when calling CQ/EQ readerr functions + - Add support for XRC QPs. + - Ignore destination address when allocating a PEP + - Add workaround for i40iw incorrect return values when posting sends + - Fix completion handling for FI_SELECTIVE_COMPLETION EP setting + - Change format of fabric name to use hex instead of decimal values + - Fix handling of err_data with EQ readerr + - Report correct size of max_err_data + - Fast path performance improvements + - Improve progress under high system load + - Optimize completion processing when handling hidden completions + - Optimize RMA and MSG transfers by pre-formatting work requests + - Remove locks based on application threading model + - Add overflow support for CQ error events + - Minor cleanups and bug fixes + +------------------------------------------------------------------- +Thu Oct 25 10:52:50 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to v1.6.2 (fate#325852) + - Core + - Cleanup of debug messages + - Fix compile issues with older compilers + - Check that all debug compiler flags are supported by compiler + - GNI + - Fix problems with Scalable Endpoint creation + - Fix interoperability problem with HPC toolkit + - Improve configuration check for kdreg + - PSM + - Enforce FI_RMA_EVENT checking when updating counters + - Fix race condition in fi_cq_readerr() + - Always try to make progress when fi_cntr_read is called + - PSM2 + - Revert "Avoid long delay in psm2_ep_close" + - Fix memory corruption related to sendv + - Performance tweak for bi-directional send/recv on KNL + - Fix CPU detection + - Enforce FI_RMA_EVENT checking when updating counters + - Remove stale info from address vector when disconnecting + - Fix race condition in fi_cq_readerr() + - Adjust reported context numbers for special cases + - Always try to make progress when fi_cntr_read is called + - Support control functions related to MR mode + - Unblock fi_cntr_wait on errors + - Properly update error counters + - Fix irregular performance drop for aggregated RMA operations + - Reset Tx/Rx context counter when fabric is initialized + - Fix incorrect completion event for iov send + - Fix occasional assertion failure in psm2_ep_close + - Avoid long delay in psm2_ep_close + - Fix potential duplication of iov send completion + - Replace some parameter checking with assertions + - Check iov limit in sendmsg + - Avoid adding FI_TRIGGER caps automatically + - Avoid unnecessary calls to psmx2_am_progress() + - RXM + - Fix incorrect increments of error counters for small messages + - Increment write completion counter for small transfers + - Use FI_UNIVERSE_SIZE when defining MSG provider CQ size + - Make TX, RX queue sizes independent of MSG provider + - Make deferred requests opt-in + - Fill missing rxm_conn in rx_buf when shared context is not used + - Fix an issue where MSG endpoint recv queue got empty resulting + in a hang + - Set FI_ORDER_NONE for tx and rx completion ordering + - Serialize access to repost_ready_list + - Reprocess unexpected messages on av update + - Fix a bug in matching directed receives + - Fix desc field when postponing RMA ops + - Fix incorrect reporting of mem_tag format + - Don't include FI_DIRECTED_RECV, FI_SOURCE caps if they're not needed + - Fix matching for RMA I/O vectors + - Fix reading pointer after freeing it. + - Avoid reading invalid AV entry + - Handle deleting the same address multiple times + - Fix crash in fi_av_remove if FI_SOURCE wasn't enabled + - Sockets + - Increase maximum messages size as MPICH bug work-around + - Fix use after free error handling triggered ops. + - Verbs + - Detect string format of wildcard address in node argument + - Don't report unusable fi_info (no source IP address) + - Don't assert when a verbs device exposes unsupported MTU types + - Report correct rma_iov_limit + - Add new variable - FI_VERBS_MR_CACHE_MERGE_REGIONS + - eq->err.err must return a positive error code + +------------------------------------------------------------------- +Thu Mar 15 06:51:08 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to v1.6.0 + - Fixes stack smashing when using the verbs provider (bsc#1089190) + - Core + - Introduces support for performing RMA operations to persistent memory + See FI_RMA_PMEM capability in fi_getinfo.3 + - Define additional errno values + - General code cleanups and restructuring + - Force provider ordering when using dynamically loaded providers + - Add const to fi_getinfo() hints parameter + - Improve use of epoll for better scalability + - Fixes to generic name service + - PSM + - Move environment variable reading out from fi_getinfo() + - Shortcut obviously unsuccessful fi_getinfo() calls + - Remove excessive name sever implementation + - Enable ordering of RMA operations + - PSM2 + - Skip inactive units in round-robin context allocation + - Allow contexts be shared by Tx-only and Rx-only endpoints + - Use utility functions to check provider attributes + - Turn on FI_THREAD_SAFE support + - Make address vector operations thread-safe + - Move environment variable reading out from fi_getinfo() + - Reduce noise when optimizing tagged message functions + - Shortcut obviously unsuccessful fi_getinfo() calls + - Improve how Tx/Rx context limits are handled + - Support auto selection from two different tag layout schemes + - Add provider build options to debug output + - Support remote CQ data for tagged messages, add specialization. + - Support opening multiple domains + - Put trigger implementation into a separate file + - Update makefile and configure script + - Replace allocated context with reserved space in psm2_mq_req + - Limit exported symbols for DSO provider + - Reduce HW context usage for certain TX only endpoints + - Remove unnecessary dependencies from the configure script + - Refactor the handling of op context type + - Optimize the conversion between 96-bit and 64-bit tags + - Code refactoring for completion generation + - Remove obsolete feature checking code + - Report correct source address for scalable endpoints + - Allow binding any number of endpoints to a CQ/counter + - Add shared Tx context support + - Add alternative implementation for completion polling + - Change the default value of FI_PSM2_DELAY to 0 + - Add an environment variable for automatic connection cleanup + - Abstract the completion polling mechanism + - Use the new psm2_am_register_handlers_2 function when available + - Allow specialization when FI_COMPLETION op_flag is set. + - Put Tx/Rx context related functions into a separate file + - Enable PSM2 multi-ep feature by default + - Add option to build with PSM2 source included + - Simplify the code for checking endpoint capabilities + - Simplify the handling of self-targeted RMA operations + - Allow all free contexts be used for scalable endpoints + - Enable ordering of RMA operations + - Enable multiple endpoints over PSM2 multi-ep support + - Support multiple Tx/Rx contexts in address vector + - Remove the virtual lane mechanism + - Less code duplication in tagged, add more specialization. + - Allow PSM2 epid be reused within the same session + - Turn on user adjustable inject size for all operations + - Use pre-allocated memory pool for RMA requests + - Add support for lazy connection + - Various bug fixes + - SHM + - Initial release of shared memory provider + - See the fi_shm.7 man page for details on available features and limitations + - Sockets + - Scalability enhancements + - Fix issue associating a connection with an AV entry that could result in + application hangs + - Add support for new persistent memory capabilities + - Fix fi_cq_signal to unblock threads waiting on cq sread calls + - Fix epoll_wait loop handling to avoid out of memory errors + - Add support for TCP keepalives, controllable via environment variables + - Reduce the number of threads allocated for handling connections + - Several code cleanups in response to static code analysis reports + - Fix reporting multiple completion events for the same request in error cases + - usNIC + - Minor adjustments to match new core MR mode bits functionality + - Several code cleanups in response to static code analysis reports + - Verbs + - Code cleanups and simplifications + - General code optimizations to improve performance + - Fix handling of wildcard addresses + - Check for fatal errors during connection establishment + - Support larger inject sizes + - Fix double locking issue + - Add support for memory registration caching (disabled by default) + - Enable setting thread affinity for CM threads + - Fix hangs in MPI closing RDM endpoints + - Add support for different CQ formats + - Fix RMA read operations over iWarp devices + - Optimize CM progress handling + - Several bug fixes + +------------------------------------------------------------------- +Wed Dec 20 08:49:03 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.5.3 + - Core + - Handle malloc failures + - Ensure global lock is initialized on Windows + - Fix spelling and formatting errors in man pages + - PSM + - Fix print format mismatches + - Remove 15 second startup delay when no hardware is installed + - Preserve FI_MR_SCALABLE mode bit for backwards compatability + - PSM2 + - Fix print format mismatches + - Allow all to all communication between scalable endpoints + - Preserve FI_MR_SCALABLE mode bit for backwards compatability + - Fix reference counting issue with opened domains + - Fix segfault for RMA/atomic operations to local scalable endpoints + - Fix resource counting related issues for Tx/Rx contexts + - Allow completion suppression when fi_context is non-NULL + - Use correct queue for triggered operations with scalable endpoints + - Sockets + - Fix check for invalid connection handle + - Fix crash in fi_av_remove + - Util + - Fix number of bits used for connection index + - Verbs + - Fix incorrect CQ entry data for MSG endpoints + - Properly check for errors from getifaddrs + - Retry getifaddr on failure because of busy netlink sockets + - Ack CM events on error paths +- Remove 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch + as it was merged upstream + +------------------------------------------------------------------- +Mon Nov 20 16:27:13 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.5.2 + - Core + - Fix Power PC 32-bit build + - Sockets + - Fix incorrect reporting of counter attributes + - Verbs + - Fix reporting attributes based on device limits + - Fix incorrect CQ size reported for iWarp NICs + - Update man page with known issues for specific NICs + - Fix FI_RX_CQ_DATA mode check + - Disable on-demand paging by default (can cause data corruption) + - Disable loopback (localhost) addressing (causing failures in MPI) + +------------------------------------------------------------------- +Mon Oct 9 23:28:31 UTC 2017 - stefan.bruens@rwth-aachen.de + +- Fix github issue #3393: + Add 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch + +------------------------------------------------------------------- +Thu Oct 5 07:10:28 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.5.1 + - Core + - Fix initialization used by DL providers to avoid crash + - Add checks for null hints and improperly terminated strings + - Check for invalid core names passed to fabric open + - Provide consistent provider ordering when using DL providers + - Fix OFI_LIKELY definitions when GNUC is not present + - GNI + - Add ability to detect local PE rank + - Fix compiler/config problems + - Fix CQ read error corruption + - Remove tests of deprecated interfaces + - PSM + - Fix CQ corruption reporting errors + - Always generate a completion on error + - PSM2 + - Fix CQ corruption reporting errors + - Always generate a completion on error + - Add checks to handle out of memory errors + - Add NULL check for iov in atomic readv/writev calls + - Fix FI_PEEK src address matching + - Fix bug in scalable endpoint address resolution + - Fix segfault bug in RMA completion generation + - Sockets + - Fix missing FI_CLAIM src address data on completion + - Fix CQ corruption reporting errors + - Fix serialization issue wrt out of order CPU writes to Tx ring buffer + - Verbs + - Allow modifying rnr retry timout to improve performance + - Add checks to handle out of memory errors + - Fix crash using atomic operations for MSG EPs +- Fix dependency to libfabric1 for libfabric-devel in baselibs.conf + +------------------------------------------------------------------- +Tue Sep 5 09:56:19 UTC 2017 - nmoreychaisemartin@suse.com + +- Update _service to allow auto updates from github + +------------------------------------------------------------------- +Thu Aug 10 08:29:42 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.5.0 + * Authorization keys Authorization keys, commonly referred to as job keys, + are used to isolate processes from communicating with other processes + for security purposes. + * Multicast support Datagram endpoints can now support multicast communication. + * (Experimental) socket-like endpoint types New FI_SOCK_STREAM and FI_SOCK_DGRAM + endpoint types are introduced. These endpoint types target support of cloud + and enterprise based middleware and applications. + * Tagged atomic support Atomic operations can now target tagged receive + buffers, in addition to RMA buffers. + * (Experimental) deferred work queues Deferred work queues are enhanced triggerred + operations. They target support for collective-based operations. + * New mode bits: FI_RESTRICTED_COMP and FI_NOTIFY_FLAGS_ONLY These mode bits + support optimized completion processing to minimize software overhead. + * Multi-threaded error reporting Reading CQ and EQ errors now allow the application + to provide the error buffer, eliminating the need for the application to + synchronize between multiple threads when handling errors. + * FI_SOURCE_ERR capability This feature allows the provider to validate and + report the source address for any received messages. + * FI_ADDR_STR string based addressing Applications can now request and use + addresses provided using a standardized string format. This makes it easier + to pass full addressing data through a command line, or handle address exchange + through text files. + * Communication scope capabilities: FI_LOCAL_COMM and FI_REMOTE_COMM Used to + indicate if an application requires communication with peers on the same + node and/or remote nodes. + * New memory registration modes The FI_BASIC_MR and FI_SCALABLE_MR memory registration + modes have been replaced by more refined registration mode bits. This allows + applications to make better use of provider hardware capabilities when dealing + with registered memory regions. + * New mode bit: FI_CONTEXT2 Some providers need more than the size provided by the + FI_CONTEXT mode bit setting. To accomodate such providers, an FI_CONTEXT2 mode bit + was added. This mode bit doubles the amount of context space that an application + allocates on behalf of the provider. + * PSM provider notes + * Improve the name server functionality and move to the utility code + * Handle updated mr_mode definitions + * Add support of 32 and 64 bit atomic values + * PSM2 provider notes + * Add option to adjust the locking level + * Improve the name server functionality and move to the utility code + * Add support for string address format + * Add an environment vaiable for message inject size + * Handle FI_DISCARD in tagged receive functions + * Handle updated mr_mode definitions + * Add support for scalable endpoint + * Add support of 32 and 64 bit atomic values + * Add FI_SOURCE_ERR to the supported caps + * Improve the method of checking device existence + * Sockets provider notes + * Updated and enhanced atomic operation support. + * Add support for experimental deferred work queue operations. + * Fixed counter signaling when used with wait sets. + * Improved support on Windows. + * Cleaned up event reporting for destroyed endpoints. + * Fixed several possible crash scenarios. + * Fixed handling socket disconnect events which could hang the provider. + * UDP provider notes + * Add support for multicast data transfers + * Verbs provider notes + * Fix an issue where if the user requests higher values for tx, rx + context sizes than default it wasn't honored. + * Introduce env variables for setting default tx, rx context sizes and iov limits. + * Report correct completion ordering supported by MSG endpoints. +- Fix rpmbuild warnings +- libfabric-devel requires libfabric1, not libfabric +- Fix baselibs.conf + +------------------------------------------------------------------- +Tue Jul 4 09:21:35 UTC 2017 - nmoreychaisemartin@suse.com + +- Enable build on all archs +- Enable mlx build + +------------------------------------------------------------------- +Fri Jun 30 07:42:15 UTC 2017 - nmoreychaisemartin@suse.com + +- Add x86 build without libpsm2 + +------------------------------------------------------------------- +Tue May 16 06:43:19 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.4.2 (bsc#1036907). + +------------------------------------------------------------------- +Thu May 11 18:14:41 UTC 2017 - nmoreychaisemartin@suse.com + +- Update to v1.4.2-rc1 (bsc#1036907). +- Update notes: + - Fix for OS X clock_gettime() portability issue. + - Updated default counter wait object for improved performance + - Fix multi-threaded RMA progress stalls + - Updated default counter wait object for improved performance + - Fix multi-threaded RMA progress stalls + - Fix error in fi_cq_sreadfrom aborting before timeout expires + - Set atomic iov count correct correctly inside fi_atomicv + - Fix handling of apps that call fork. Move ibv_fork_init() before + calling any other verbs call. + - Fix crash in fi_write when connection is not yet established and + write data size is below inline threshold. + - Fix issues not handling multiple ipoib interfaces + - Reduce lock contention on buffer pools in send/completion handling + code. + + + +------------------------------------------------------------------- +Wed Apr 5 10:19:28 UTC 2017 - josef.moellers@suse.com + +- This version fixes bnc#990184 + (bnc#990184) + +------------------------------------------------------------------- +Thu Mar 23 16:21:53 UTC 2017 - jengelh@inai.de + +- RPM group fix + +------------------------------------------------------------------- +Fri Mar 10 15:58:55 UTC 2017 - josef.moellers@suse.com + +- PSM provider notes + - Defer initialization of the PSM library to allow runtime selection from + different versions of the same provider before fi_getinfo is called. +- PSM2 provider notes + - Defer initialization of the PSM2 library to allow runtime selection from + different versions of the same provider before fi_getinfo is called. + - General bug fixes. +- UDP provider notes + - Fix setting address format in fi_getinfo call. +- usNIC provider notes + - Fixed compilation issues with newer versions of libibverbs. + (fate#321883) +------------------------------------------------------------------- +Mon Jan 16 13:12:14 CET 2017 - ndas@suse.de + +- Updated to version 1.4.0 for general stability(fate#321883) +- Summary of changes as follow: + - Add new options, `-f` and `-d`, to fi_info that can be used to + specify hints about the fabric and domain name. Change port to `-P` + and provider to `-p` to be more in line with fi_pingpong. + + *GNI provider notes + + - General bug fixes, plugged memory leaks, performance improvements, + improved error handling and warning messages, etc. + - Additional API support: + - FI_THREAD_COMPLETION + - FI_RMA_EVENT + - iov length up to 8 for messaging data transfers + + *PSM provider notes + + - General bug fixes + - Use utility provider for EQ, wait object, and poll set + - Allow multi-recv to post buffer larger than message size limit + + *PSM2 provider notes + + - General bug fixes + - Add support for multi-iov RMA read and aromic operations + - Allow multi-recv to post buffer larger than message size limit + + + *Verbs provider notes + + - Add fork support. It is enabled by default and can be turned off by + setting the FI_FORK_UNSAFE variable to "yes". This can improve + performance of memory registrations but also makes fork unsafe. The + following are the limitations of fork support: + - Fabric resources like endpoint, CQ, EQ, etc. should not be used in + the forked process. + - The memory registered using fi_mr_reg has to be page aligned since + ibv_reg_mr marks the entire page that a memory region belongs to + as not to be re-mapped when the process is forked (MADV_DONTFORK). + - Fix a bug where source address info was not being returned in + fi_info when destination node is specified. + +------------------------------------------------------------------- +Fri May 6 12:51:41 CEST 2016 - nads@suse.de + +- Updated to version 1.3.0 for better PSM2 support as suggested by + fate#319253, comment #9. + + [*libfabric-libtool.patch] + + Summary of changes as follow: + *PSM provider notes + - Remove PSM2 related code. + *PSM2 provider notes + - Add support for multi-iov send, tagged send, and RMA write. + - Use utility provider for EQ, wait object, and poll set. + *GNI provider notes + - General bug fixes, plugged memory leaks, etc. + - Added support for the following APIs: + - fi_endpoint: fi_getopt, fi_setopt, fi_rx_size_left, fi_tx_size_left, fi_stx_context + - fi_cq: fi_sread, fi_sreadfrom + - fi_msg: FI_MULTI_RECV (flag) + - fi_domain: FI_PROGRESS_AUTO (flag) + - fi_direct: FI_DIRECT + - Added support for FI_EP_DGRAM (datagram endpoint): + - Memory registration improvements: + - Initial support for Cray Cluster Compatibility Mode (CCM) + *MXM provider notes + - Initial release + *Sockets provider notes + - Enable FABRIC_DIRECT + - Enable sockets-provider to run on FreeBSD + - Add support for fi_trywait + - Add support for map_addr in shared-av creation + - Add shared-av support on OSX + - General bug fixes + *UDP provider notes + - Initial release + *usNIC provider notes + - Implement fi_recvv and fi_recvmsg for FI_EP_RDM. [PR #1594] + - Add support for FI_INJECT flag in the FI_EP_RDM implementation of fi_sendv. + [PR #1594] + - Handle FI_PEEK flag in fi_eq_sread. [PR #1758] + - Implement waitsets [PR #1893] + - Implement fi_trywait [PR #1893] + - Fix progress thread deadlock [PR #1893] + - Implement FD based CQ sread [PR #1893] + *Verbs provider notes + - Add support for fi_trywait + - verbs/RDM + - Add support for RMA operations. + - Add support for fi_cq_sread and fi_cq_sreadfrom + - Rework connection management to make it work with fabtests and also allow + connection to self. + - Other bug fixes and performance improvements. + +------------------------------------------------------------------- +Wed Apr 6 16:20:41 CEST 2016 - ndas@suse.de + +- Moved man pages to main package. +- Fixed invalid library group. + +------------------------------------------------------------------- +Wed Apr 6 15:40:25 CEST 2016 - ndas@suse.de + +- Packaging version 1.2.0 for fate#319253 + +------------------------------------------------------------------- +Fri Feb 12 10:18:49 CET 2016 - pth@suse.de + +- Use explicit file list instead of wildcards +- Package fi_info. +- Remove libtool.m4 from the package so that autoreconf installs + a current version. + +------------------------------------------------------------------- +Thu Feb 11 10:18:41 CET 2016 - pth@suse.de + +- Initial package, based on the OFED specfile for libfabric +- Add libfabric-libtool.patch to disable static builds by default. diff --git a/libfabric.spec b/libfabric.spec new file mode 100644 index 0000000..bc5a4e8 --- /dev/null +++ b/libfabric.spec @@ -0,0 +1,161 @@ +# +# spec file for package libfabric +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +# +%define git_ver .0.159219639b7f + +%ifarch aarch64 %power64 x86_64 s390x +%if 0%{?suse_version} > 1530 +%define with_ucx 1 +%endif +%define with_efa 1 +%endif + +Name: libfabric +Version: 1.22.0 +Release: 0 +Summary: User-space RDMA Fabric Interfaces +License: BSD-2-Clause OR GPL-2.0-only +Group: Development/Libraries/C and C++ +Source: %{name}-%{version}%{git_ver}.tar.bz2 +Source1: baselibs.conf +Patch0: libfabric-libtool.patch +Patch1: psm3-fix-SIGILL-on-system-not-supporting-AVX.patch +URL: http://www.github.com/ofiwg/libfabric +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: libibverbs-devel +BuildRequires: libnl3-devel +%ifarch x86_64 +BuildRequires: libnuma-devel +BuildRequires: libpsm2-devel +%endif +BuildRequires: fdupes +BuildRequires: librdmacm-devel +BuildRequires: libtool +%if 0%{?with_ucx} +BuildRequires: libucm-devel +BuildRequires: libucp-devel + # 1.10 Needed for UCS_MEMORY_TYPE_UNKNOWN +BuildRequires: libucs-devel >= 1.10 +BuildRequires: libuct-devel +%endif +BuildRequires: pkg-config +%define lib_major 1 + +%description +libfabric provides a user-space API to access high-performance fabric +services, such as RDMA. This package only contains the fi_info binary. + +%package -n libfabric%{lib_major} +Summary: User-space RDMA fabric interfaces +Group: System/Libraries + +%description -n libfabric%{lib_major} +libfabric provides a user-space API to access high-performance fabric +services, such as RDMA. This package contains the runtime library. + +%package devel +Summary: Development files for the libfabric library +Group: Development/Libraries/C and C++ +Requires: libfabric%{lib_major} = %{version} + +%description devel +libfabric provides a user-space API to access high-performance fabric +services, such as RDMA. This package contains the development files. + +%prep +%autosetup -p0 -n %{name}-%{version}%{git_ver} + +%build +export CFLAGS=-Wno-incompatible-pointer-types +rm -f config/libtool.m4 +autoreconf -fi +# defaults: with-dlopen and without-valgrind can be over-rode: +%configure %{?_without_dlopen} %{?_with_valgrind} \ + --enable-sockets --enable-verbs --enable-usnic \ +%if 0%{?with_efa} + --enable-efa \ +%endif +%if 0%{?with_ucx} + --enable-ucx \ +%endif +%ifarch x86_64 + --enable-psm2 \ + --enable-psm3 \ +%endif + --disable-static +%make_build + +%install +%make_install + +# remove unpackaged files from the buildroot +rm -f %{buildroot}%{_libdir}/*.la +%fdupes %{buildroot}/%{_prefix} + +%post -n libfabric%{lib_major} -p /sbin/ldconfig +%postun -n libfabric%{lib_major} -p /sbin/ldconfig + +%files +%defattr(-,root,root,-) +%{_bindir}/* +%{_mandir}/man1/* +%doc NEWS.md +%license COPYING + +%files -n libfabric%{lib_major} +%defattr(-,root,root,-) +%{_libdir}/%{name}.so.%{lib_major}* +%doc AUTHORS README +%license COPYING + +%files devel +%defattr(-,root,root) +%{_libdir}/%{name}.so +%dir %{_includedir}/rdma +%{_includedir}/rdma/fabric.h +%{_includedir}/rdma/fi_atomic.h +%{_includedir}/rdma/fi_cm.h +%{_includedir}/rdma/fi_collective.h +%{_includedir}/rdma/fi_domain.h +%{_includedir}/rdma/fi_endpoint.h +%{_includedir}/rdma/fi_eq.h +%{_includedir}/rdma/fi_errno.h +%{_includedir}/rdma/fi_ext.h +%{_includedir}/rdma/fi_profile.h +%{_includedir}/rdma/fi_rma.h +%{_includedir}/rdma/fi_tagged.h +%{_includedir}/rdma/fi_trigger.h +%dir %{_includedir}/rdma/providers +%{_includedir}/rdma/providers/fi_log.h +%{_includedir}/rdma/providers/fi_peer.h +%{_includedir}/rdma/providers/fi_prov.h +%{_includedir}/rdma/fi_ext_usnic.h +%ifarch x86_64 +%{_includedir}/rdma/fi_ext_psm2.h +%endif +%if 0%{?with_efa} +%{_includedir}/rdma/fi_ext_efa.h +%endif +%{_mandir}/man3/* +%{_mandir}/man7/* + +%{_libdir}/pkgconfig/%{name}.pc + +%changelog diff --git a/pre_checkin.sh b/pre_checkin.sh new file mode 100644 index 0000000..a3f18ee --- /dev/null +++ b/pre_checkin.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# This script has to be run prior to a check-in if changes were done +# to spec and/or changes +GIT_VER=$(grep "%define git_ver" libfabric.spec) +VERSION=$(egrep "^Version:" libfabric.spec) +sed -i -e 's/^%define git_ver.*$/'"$GIT_VER/" -e 's/^Version:.*$/'"$VERSION/" fabtests.spec +osc service run format_spec_file + diff --git a/psm3-fix-SIGILL-on-system-not-supporting-AVX.patch b/psm3-fix-SIGILL-on-system-not-supporting-AVX.patch new file mode 100644 index 0000000..6009da6 --- /dev/null +++ b/psm3-fix-SIGILL-on-system-not-supporting-AVX.patch @@ -0,0 +1,60 @@ +commit fd049a0a053502a85b839a715fc6f9fdbfc4439a +Author: Nicolas Morey +Date: Thu Nov 28 16:44:12 2024 +0100 + + psm3: fix SIGILL on system not supporting AVX + + Even though code was added to not use the PSM3 provider on system + without AVX/AVX2 support, logs triggered by the detection code can + be using AVX instructions causing psmx3_getinfo to SIGILL + + Move the detection even earlier in the provider init and use OFI + standard print functions to avoid using any code that might have + been compiled with AVX instructions. + + Fixes: 3ef633408edf ("prov/psm3: update provider to sync with IEFS 11.5.1.0.3") + Signed-off-by: Nicolas Morey + +diff --git prov/psm3/src/psmx3_init.c prov/psm3/src/psmx3_init.c +index 29359d3ea348..cc259a1b6301 100644 +--- prov/psm3/src/psmx3_init.c ++++ prov/psm3/src/psmx3_init.c +@@ -680,18 +680,6 @@ static int psmx3_getinfo(uint32_t api_version, const char *node, + + PSMX3_INFO(&psmx3_prov, FI_LOG_CORE,"\n"); + +- __builtin_cpu_init(); +- if (!__builtin_cpu_supports(PSM3_MARCH)) { +- PSMX3_INFO(&psmx3_prov, FI_LOG_CORE, +- "CPU does not support '%s'.\n", PSM3_MARCH); +- OFI_INFO_STR(&psmx3_prov, +- (__builtin_cpu_supports("avx2") ? "AVX2" : +- (__builtin_cpu_supports("avx") ? "AVX" : +- (__builtin_cpu_supports("sse4.2") ? "SSE4.2" : "unknown"))), +- PSM3_MARCH, "CPU Supports", "PSM3 Built With"); +- goto err_out; +- } +- + if (psmx3_init_prov_info(hints, &prov_info)) + goto err_out; + +@@ -895,6 +883,19 @@ struct fi_provider psmx3_prov = { + + PROVIDER_INI + { ++ __builtin_cpu_init(); ++ if (!__builtin_cpu_supports(PSM3_MARCH)) { ++ FI_INFO(&core_prov, FI_LOG_CORE, ++ "PSM3: CPU does not support '%s'.\n", PSM3_MARCH); ++ OFI_INFO_STR(&core_prov, ++ (__builtin_cpu_supports("avx2") ? "AVX2" : ++ (__builtin_cpu_supports("avx") ? "AVX" : ++ (__builtin_cpu_supports("sse4.2") ? "SSE4.2" : "unknown"))), ++ PSM3_MARCH, "CPU Supports", "PSM3 Built With"); ++ return NULL; ++ } ++ ++ + psmx3_prov.version = get_psm3_provider_version(); + + PSMX3_INFO(&psmx3_prov, FI_LOG_CORE, "build options: VERSION=%u.%u=%u.%u.%u.%u, "