Compare commits
9 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
| 4719105ccf | |||
| fab85be73a | |||
| 9d6266b5c8 | |||
| da715dee32 | |||
| 11d2f8ecbe | |||
| 4e8479a47e | |||
| f9b9495259 | |||
| 28daf30db4 | |||
| 08f2f39cab |
12
README.md
12
README.md
@@ -1,12 +0,0 @@
|
||||
|
||||
## Build Results
|
||||
|
||||
Current state of libfabric in openSUSE:Factory is
|
||||
|
||||

|
||||
|
||||
The current state of libfabric in the devel project build (science:HPC)
|
||||
|
||||

|
||||
|
||||
|
||||
3
_service
3
_service
@@ -8,10 +8,11 @@
|
||||
<param name="versionformat">@PARENT_TAG@.@TAG_OFFSET@.%h</param>
|
||||
<param name="versionrewrite-pattern">v(.*)</param>
|
||||
<param name="versionrewrite-replacement">\1</param>
|
||||
<param name="revision">ae02f52ef5c30e3023d399b27818876fe81763ac</param>
|
||||
<param name="revision">2ee68f6051e90a59d7550d94a331fdf5e038db90</param>
|
||||
</service>
|
||||
<service name="recompress" mode="manual">
|
||||
<param name="file">libfabric*.tar</param>
|
||||
<param name="compression">bz2</param>
|
||||
</service>
|
||||
|
||||
</services>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package fabtests
|
||||
#
|
||||
# Copyright (c) 2025 SUSE LLC and contributors
|
||||
# Copyright (c) 2024 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -16,10 +16,10 @@
|
||||
#
|
||||
|
||||
|
||||
%define git_ver .0.5a13558c2
|
||||
%define git_ver .0.2ee68f6051e9
|
||||
|
||||
Name: fabtests
|
||||
Version: 2.4.0
|
||||
Version: 2.0.0
|
||||
Release: 0
|
||||
Summary: Test suite for libfabric API
|
||||
License: BSD-2-Clause OR GPL-2.0-only
|
||||
@@ -28,6 +28,7 @@ URL: http://www.github.com/ofiwg/libfabric
|
||||
Source: libfabric-%{version}%{git_ver}.tar.bz2
|
||||
Source1: fabtests-rpmlintrc
|
||||
Patch0: libfabric-libtool.patch
|
||||
Patch1: psm3-prevent-code-from-building-using-AVX-AVX2.patch
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: automake
|
||||
BuildRequires: libfabric-devel = %{version}
|
||||
@@ -41,10 +42,6 @@ Fabtests provides a set of examples that uses libfabric, a fabric software libra
|
||||
%autosetup -p0 -n libfabric-%{version}%{git_ver}
|
||||
|
||||
%build
|
||||
%set_build_flags
|
||||
%if 0%{?gcc_version} >= 8
|
||||
export CFLAGS="$CFLAGS --std=gnu17"
|
||||
%endif
|
||||
cd fabtests
|
||||
./autogen.sh
|
||||
%configure %{?_with_libfabric}
|
||||
|
||||
BIN
libfabric-2.0.0.0.2ee68f6051e9.tar.bz2
LFS
Normal file
BIN
libfabric-2.0.0.0.2ee68f6051e9.tar.bz2
LFS
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,723 +1,3 @@
|
||||
-------------------------------------------------------------------
|
||||
Fri Jan 02 13:41:31 UTC 2026 - Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
- Update to v2.4.0
|
||||
- Core
|
||||
- hmem/cuda: Adding more robust libgdrapi libpaths
|
||||
- Update bindings/rust/README.md to reflect the recommended build process.
|
||||
- Update build.rs to support both cargo build & cargo publish work directories.
|
||||
- Update Cargo.toml in preparation for crates.io publishing.
|
||||
- configure: Fix sanitizer detection logic
|
||||
- Introduce a lightweight Rust bindings for Libfabric, using bindgen.
|
||||
- include/ofi_indexer: introduce new ofi_array_at_max function
|
||||
- man/fi_cxi: fixup info for FI_CXI_RDZV_GET_MIN
|
||||
- man/fi_getinfo: Update the capabilities with mode bits requirements
|
||||
- man/fi_cq: Document `FI_GETWAITOBJ` for `fi_control`
|
||||
- man/fi_fabric: Update `fi_tostr()` datatypes
|
||||
- CXI
|
||||
- Bump provider support up to libfabric 2.4
|
||||
- Add domain rx match mode override
|
||||
- Set rendezvous eager size default to 2K
|
||||
- Change cuda dmabuf default to enabled
|
||||
- Do not abort if MR match count do not reconcile
|
||||
- Allow CP for triggered CQ to remap to Best Effort
|
||||
- Fix sl-driver path for testing
|
||||
- Set max domain TX CQs to 14
|
||||
- Use cxil_alloc_trig_cp to distinguish trig and tx cmdqs
|
||||
- Add FI_EBUSY debug messages
|
||||
- Fix validation of service id
|
||||
- Fix criterion test_sw tap files
|
||||
- Cxip_cmdq_cp_modify fix
|
||||
- Fix RNR protocol send byte/error counting
|
||||
- Release TX credit when pending RNR retry
|
||||
- Update rocr test fine grained flags
|
||||
- Fix DEVICE in fi_info_test
|
||||
- Introduce non-debug tracing
|
||||
- Reset timer on rx of ARM packet
|
||||
- Fix performance issue with close_mc()
|
||||
- Increase vni range in auth_key tests
|
||||
- Support auth_key ranges
|
||||
- Fix use of hw_cps and memory leak
|
||||
- EFA
|
||||
- Fix cq data size in efa-rdm pkt post
|
||||
- fix test_efa_rdm_mr_reg_cuda_memory unit test
|
||||
- adjust the memory barrier positions
|
||||
- Optimize RTW packet sending by replacing efa_rdm_ope_post_send
|
||||
- Adjust logging level for txe releases
|
||||
- Add tracepoints for handshake
|
||||
- Add flags to MR logs
|
||||
- Grow efa_tx_pkt_pool and ope_pool during rdm ep creation
|
||||
- Do not use rdma write when unsolicited recv support is inconsistent
|
||||
- Determine whether using device rdma based on p2p
|
||||
- Introduce pke generation counter for protocol path
|
||||
- Enable data path direct for efa-rdm
|
||||
- Update the function signature for efa_data_path_direct_cq_initialize
|
||||
- Move efa_cq_open_ibv_cq to efa_cq.c
|
||||
- Do not track rx pkt pool for non-debug build
|
||||
- Temporarily disable FI_OPT_EFA_SENDRECV_IN_ORDER_ALIGNED_128_BYTES support for efa protocol
|
||||
- do not ignore local read completion
|
||||
- Add missing lttng tps in efa_post_send
|
||||
- Fix the remote cq data flags for zcpy recv
|
||||
- Optimize the WQE post in data path direct
|
||||
- fix typos in error messages
|
||||
- Only show help message for OPE warn logs
|
||||
- configure: replace no-brake space with regular space character
|
||||
- Remove unused function declarations
|
||||
- Acquire CQ's `ep_list_lock` during counter progress
|
||||
- Add asserts to detect erroneous CQE dereferences
|
||||
- Ignore rma completion to a removed peer
|
||||
- Remove the incorrect check for device max_msg_size
|
||||
- Fix function signature mismatch
|
||||
- Set FI_RX_CQ_DATA for efa direct with NULL hints
|
||||
- Do not fail fi_getinfo for the wrong fabric
|
||||
- Log warnings only for internal OPE failures or if CQ error entry not written
|
||||
- Add unit tests for LRU AH eviction
|
||||
- Evict AH with no explicit AV entries when AH limit reached
|
||||
- Add locking assertions and update unit tests
|
||||
- Remove efa_conn_release unsafe
|
||||
- Require FI_RX_CQ_DATA on devices without unsolicited write recv
|
||||
- Add LLTng tracepoints for direct data path operations
|
||||
- Don't warn users about non-EFA devices
|
||||
- Support FI_RX_CQ_DATA for efa-direct
|
||||
- Fix deadlocks in AV insert/remove/close and CQ read paths
|
||||
- Don't try to release a lock that is not taken
|
||||
- set RUNPATH if custom rdma-core provided
|
||||
- Remove rx_msg_flags from efa_rdm_msg_recv/efa_rdm_msg_recvv
|
||||
- Update tracepoints in the receive path
|
||||
- Slide recv-win on RTM/RTA error
|
||||
- Insert read and write packets to tx debug list
|
||||
- LNX
|
||||
- remove force setting DEVICE_ONLY flag
|
||||
- set core hints proto to UNSPEC
|
||||
- remove iov count failures
|
||||
- add wait object implementation
|
||||
- OPX
|
||||
- Don't fail configure when OPX unhappy
|
||||
- Add note to FI_OPX_SDMA_MIN_PAYLOAD_BYTES doc
|
||||
- Simplify uapi configuration
|
||||
- Unionize 9B and 16B packet SCB models in endpoint structs.
|
||||
- Support shared contexts in hfisvc bts
|
||||
- Fix replays for multi-packet eager
|
||||
- Don't retry forever in send rendezvous.
|
||||
- Don't ACK packets that were never received
|
||||
- Segfault in opx_hfi_rdma_context_open() on 2nd endpoint opened
|
||||
- Fix seg fault in finalize
|
||||
- Fix SDMA writev error when RDMA core functions are being used.
|
||||
- Add back accidentally removed opx_domain_hfisvc_poll()
|
||||
- Add missing function pointers for HFI service
|
||||
- Check uapi for hfisvc/HFI1 direct verbs
|
||||
- Rename hfisvc to opx-hfisvc
|
||||
- Move submodule to rdma core
|
||||
- Remove stx/srx support in OPX
|
||||
- Register MRs with HFI service
|
||||
- Ensure SDMA packet lengths are 8-byte multiples
|
||||
- Use HFI service by default if enabled in the driver.
|
||||
- fixup goto labels that need statements
|
||||
- Update hfisvc_client to 64-bit atomics
|
||||
- HFISVC: Fix replay payload
|
||||
- Disable HFI Service by default.
|
||||
- Disable use of HFI service when driver does not support it.
|
||||
- Update hfisvc_client to latest patch
|
||||
- Only open IPC cache if HMEM initialized and IPC enabled
|
||||
- Handle extended rx bits in common 9B code
|
||||
- Add IPC to 16B header path
|
||||
- Make sriov-alpha limitations CN5000-only
|
||||
- Remove cmake build for hfisvc_client library
|
||||
- Handle completion errors from HFI service
|
||||
- Fix setting of rc in deferred recv rts
|
||||
- Additional HFI Service support changes
|
||||
- HFI Service initial support
|
||||
- Asynchronous HMEM memcopy for IPC
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Dec 4 14:40:44 UTC 2025 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||
|
||||
- Build with distro flags
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Oct 23 20:01:17 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to v2.3.1
|
||||
- Core
|
||||
- include/windows: Add definition for realpath()
|
||||
- EFA
|
||||
- Tune recvwindow and peer reorder buffer pool sizes
|
||||
- Allocate peer reorder buffers from a pre-allocated pool
|
||||
- Allocate peer map entry pool during the rdm ep create
|
||||
- Create shm info on demand for efa_rdm_ep_open
|
||||
- Check whether to enable shm inside efa_shm_info_create
|
||||
- Ring doorbell when reaching the max batch wqe cnt
|
||||
- Fix the queued ope progress in ep close
|
||||
- Logging improvements and unit test fix
|
||||
- Grab srx lock when releasing rx entries in AV close
|
||||
- Remove duplicate macro for rdma sge limit
|
||||
- Fix memory leak from efa_srx_unexp_pkt unit test
|
||||
- Fix unit test build error for rdma-core without HAVE_CAPS_UNSOLICITED_WRITE_RECV
|
||||
- Check rdma iov limit in data path direct
|
||||
- Add packet entry flag tracking for double linked list management
|
||||
- Remove the extra dlist remove for peers in ep close
|
||||
- Add traces for completions bypassing util-cq
|
||||
- Update shared domain caps and modes
|
||||
- Move domain check inside the fabric lock
|
||||
- Take domain lock to protect concurrent access to domain fields
|
||||
- Return matching fabric/domain in fi_getinfo
|
||||
- Remove duplicated efa_cq_construct_cq_entry
|
||||
- Fix the memleak for efa_rdm_peer
|
||||
- Introduce error status for ope
|
||||
- Cleanup the queue flags after dlist_remove
|
||||
- Allocate cq err_buf on demand
|
||||
- Bypass util cq in the data path of efa-direct
|
||||
- Fix the unsolicited write recv support in unit-test
|
||||
- Add unit-tests for lock types
|
||||
- Shift the src_addr correctly in sreadfrom
|
||||
- Acquire the same lock for qp lifecycle
|
||||
- Ignore stale cqes from destroyed qps
|
||||
- Use IBV_QUERY_QP_DATA_IN_ORDER_DEVICE_ONLY flag when available
|
||||
- OPX
|
||||
- 0 byte write_data Error
|
||||
- 16B 3-nodes seg fault with openmpi
|
||||
- Atomics and bounce buffer fixes
|
||||
- Only open IPC cache if HMEM initialized and IPC enabled
|
||||
- Fix ROCR IPC build errors
|
||||
- Use getpid() instead of gettid() for POSIX compliancy.
|
||||
- Fix compilation error
|
||||
- RXD
|
||||
- Fix return of EAGAIN when not enough tx entries
|
||||
- UCX
|
||||
- Use noop callback in ucx_inject_write
|
||||
- Add a noop callback function for inject send path
|
||||
- UDP
|
||||
- Cleanup CQ parameters
|
||||
- Fix CQ overflow case
|
||||
- Util
|
||||
- Remove unmatched rx entry from unspec unexpected queue
|
||||
- Add status enum to util rx entry
|
||||
- Extract helper function to lookup existing fabric/domain
|
||||
- Use hints name for fabric/domain lookup
|
||||
- Verbs
|
||||
- Return PCI bus information with fi_getinfo()
|
||||
- Fix compiler warning on the bound of 'strncpy' call
|
||||
- Fabtests
|
||||
- efa: Run FI_MORE test with more iterations
|
||||
- efa: Add implicit AV test
|
||||
- Split OOB address exchange function
|
||||
- efa: Fix the av operation
|
||||
- efa: Improve rnr_read_cq_error test
|
||||
- Drop prov-opx-fix-compilation-error.patch which was merged upstream.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Sep 8 08:54:07 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to v2.3.0
|
||||
- Core
|
||||
- configure: Improve the restricted-dl help text
|
||||
- ofi_list: Introduce dlist_entry_in_list
|
||||
- man/fi_peer: Fix `FI_ADDR_NOTAVAIL` typo
|
||||
- common: Make common runtime parameters working for DL providers
|
||||
- configure.ac: Move cuda cppflag set before DMABUF check
|
||||
- Add address format FI_SOCKADDR_IP
|
||||
- include/fi_peer.h: remove fi_peer_rx_entry dlist fields
|
||||
- configure: Fix clang checking
|
||||
- hmem/neuron: Implement put_dmabuf_fd op
|
||||
- man/fi_endpoint: Clarify rx_attr->caps usage
|
||||
- EFA
|
||||
- Decrement rx_pkts_posted before efa_rdm_pke_release_rx
|
||||
- Enable direct data path by default
|
||||
- Bypass rdma-core in blocking cq read path
|
||||
- Add traces for RX/TX completions
|
||||
- Fix the unsolicited write recv check
|
||||
- Refactor efa_base_ep_create_qp
|
||||
- Add generic function to process queued op entries
|
||||
- Deduce queued packet list from op entry
|
||||
- Add generic utility for fetching RDM packet type
|
||||
- Create abstraction for IBV CQ polling sequence
|
||||
- Bypass rdma-core in data path.
|
||||
- Refactor ibv_cq_ex open call
|
||||
- Fix stale links in docs/overview.md
|
||||
- Initialize nevents in efa_domain_cq_open_ext
|
||||
- Fix conflicting types for efa_mock_efa_ibv_cq_wc_read_opcode_return_mock
|
||||
- Remove duplicate mock function declarations
|
||||
- Use efa specific cq trywait
|
||||
- Implement fi_control for efa direct cq
|
||||
- Support blocking cq read in efa-direct
|
||||
- Remove cq status prediction
|
||||
- Fix wait_send procedure
|
||||
- Restrict GDA domain ops to efa-direct
|
||||
- Fix efa device selection in test_rdm_cq_create_error_handling
|
||||
- Fix the hmem flags setting
|
||||
- Fix segfault in LTTNG tracing when peer is NULL
|
||||
- Revert "prov/efa: Only do dmabuf reg when FI_MR_DMABUF is set"
|
||||
- Add implicit AV and logic to move entries to explicit AV
|
||||
- Add get_mr_lkey to GDA ops
|
||||
- Move gda operations to FI_EFA_GDA_OPS
|
||||
- Improve the handshake tx error handling
|
||||
- Avoid flushing cq during ep close for external cq
|
||||
- Replace the address members of peer struct with conn
|
||||
- Add test_av_reinsertion unit test
|
||||
- Skip call to efa_rdm_ep_get_peer with FI_ADDR_NOTAVAIL
|
||||
- Replace pke->addr with pke->peer and remove ope->addr
|
||||
- Rename efa_rdm_msg_alloc_rxe to efa_rdm_msg_alloc_rxe_zcpy
|
||||
- Add efa_ep_addr_print debug function
|
||||
- Refactor reverse AV addition and deletion
|
||||
- Rename efa_conn_rdm_init to efa_conn_rdm_insert_shm_av
|
||||
- Drain cq after qp destroy
|
||||
- Fix the locking procedure in ep close.
|
||||
- Move rxe_map to peer struct
|
||||
- Clean stale error handling
|
||||
- Remove illegal ope state
|
||||
- LNX
|
||||
- Fix missing peer_entry field updates
|
||||
- OPX
|
||||
- Asynchronous HMEM memcopy for IPC
|
||||
- Fix infinite loop/crash in SIGUSR2 handler
|
||||
- CN5000 SR-IOV pbc lid update
|
||||
- CN5000 alpha SR-IOV loopback support
|
||||
- Fixing typo in reliability_service_max_outstanding_bytes parameter.
|
||||
- CYR: Support 256B rcvhdr size
|
||||
- Fix overwrite of entropy field in 16-byte packet headers
|
||||
- Add debug dump of endpoint upon receiving SIGUSR2
|
||||
- GPU/driver configure check only warns
|
||||
- Adding initialization requirements for when opx is built as a dl
|
||||
- Rename intranode as needed for clarity
|
||||
- Use fi_opx_check_info function for library location lookup
|
||||
- Add IPC cache to OPX
|
||||
- Initialize SDMA work entry field
|
||||
- Fix RMA with CQ Data Bug
|
||||
- Change shm tx connection table from array to RBT
|
||||
- Fix FI_REMOTE_CQ_DATA write
|
||||
- Fix bug in setting pbc dlid for 16B sendv_egr
|
||||
- Resolve IPC HMEM D to H Segfault
|
||||
- Parameter mismatch fix for opx_ep_complete_receive_operation()
|
||||
- Link bounce support for context sharing
|
||||
- CN6000 simulator support
|
||||
- PSM3
|
||||
- Support fi_av_insertsvc()
|
||||
- Util
|
||||
- Handle the new address format FI_SOCKADDR_IP
|
||||
- Add example directory with first example
|
||||
- Get srx lock in the caller of util_foreach_unspec
|
||||
- ofi_util.h: Fix bug in fi_cq_readfrom with no FI_SOURCE
|
||||
- mon_sampler: Disable the sampler together with the monitor provider
|
||||
- Verbs
|
||||
- Add peer mem support for Broadcom Devices
|
||||
- Fabtests
|
||||
- Fix `make distcheck` error caused by SUBDIRS
|
||||
- efa: Initialize timespec as 0
|
||||
- ft_finalize: Serialized sync in ft_finalize.
|
||||
- pingpong: Post recv before ep finalize
|
||||
- efa: Add volatile to prevent compiler optimization of CQE flag
|
||||
- efa: fix missing rdma check in test_rma_bw_sread
|
||||
- efa: Add FT_COMP_WAIT_FD tests
|
||||
- efa: Add fi_cq_sread tests
|
||||
- efa: Allow shared AV in multi_ep_mt test
|
||||
- efa: Add pytest marker and fixture to GDA fabtest
|
||||
- Fix type mismatch build warning
|
||||
- efa: Fix EFA device query
|
||||
- efa: Add GPU Direct Async test
|
||||
- multinode: Fix multi_barrier EAGAIN path
|
||||
- efa: Check device number in efa_device_selection
|
||||
- efa: Increase the timeout for test_rma_bw_range
|
||||
- efa: Add multi_ep_mt into pytest suites
|
||||
- efa: Introduce multi_ep_mt test
|
||||
- pytest: Fix the additional environment return
|
||||
- Support customized fi_addr for ft_post_rx_buf
|
||||
- Add New Benchmark rma_tx_completion
|
||||
- Add prov-opx-fix-compilation-error.patch to fix compilation with GCC >= 15
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Jul 23 11:50:10 UTC 2025 - Andreas Schwab <schwab@suse.de>
|
||||
|
||||
- Fix file list on riscv64
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Jun 23 06:17:44 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to v2.2.0
|
||||
- Core
|
||||
- log: Fix buffer overrun when accessing the 'log_levels' array
|
||||
- man/fi_mr: Clarify fi_close behavior
|
||||
- rdma/fabric.h: Add new FI_RESCAN flag to fi_getinfo()
|
||||
- hmem/cuda: Add fallback for dmabuf flag with CUDA_ERROR_NOT_SUPPORTED
|
||||
- hmem/cuda: Add runtime fallback for unsupported dmabuf flag
|
||||
- hmem/cuda: Add a flag for exporting dmabuf fd on GB200
|
||||
- man: Clarify fi_close behavior on FI_ENDPOINT
|
||||
- av: introduce FI_FIREWALL_ADDR flag for insert operations
|
||||
- common: ofi_ifname_toaddr check ifa->ifa_addr for null
|
||||
- man/fi_mr: Add note that requested_key may be ignored w/o remote access
|
||||
- CXI
|
||||
- Fix alt_read unit test to use rdzv_threshold
|
||||
- Adjust cxi environment variable defaults
|
||||
- Fix regression which could cause deadlock
|
||||
- Support libfabric 2.2 API
|
||||
- Set cq_data in peer unexpected message
|
||||
- Fix locking on the SRX path
|
||||
- Allow for passing opaque 64-bit data in ctrl_msg
|
||||
- Fix cxi driver paths for CI
|
||||
- Fix use of alt_read rget restricted TC type
|
||||
- Fix compile warnings associated with new dlopen of curl/json
|
||||
- Fix curl CXIP_WARN that included extra parameter
|
||||
- Decouple existence CXI_MAP_IOVA_ALLOC for build
|
||||
- New conf opt for binding of json symbols
|
||||
- New conf opt for binding of curl symbols
|
||||
- Pad struct to address hash mismatch bug
|
||||
- Consistency for initialization of cxip_addr structure
|
||||
- Fix uninitialized padding in cxip_addr structure causing hash mismatches
|
||||
- Mem reg test with READ/WRITE access
|
||||
- Support read-only cached MRs
|
||||
- Remove cached mem reg -FI_EAGAIN comment
|
||||
- Add access ctrl bits to internal mem reg
|
||||
- Fix broken tests
|
||||
- Added collective logical operators to cxip_query_collective()
|
||||
- Log rc from cxi_eq_get_event() failure
|
||||
- Add nanosecond timestamp to tracing
|
||||
- Stuff the sending rank into the reduction packet
|
||||
- Increase the reduction engine timeout to the max
|
||||
- Increase RX buffer size for collectives
|
||||
- EFA
|
||||
- Update error message for invalid MRs
|
||||
- Set IBV_ACCESS_LOCAL_WRITE for FI_REMOTE_WRITE
|
||||
- Update packet printing functions
|
||||
- Avoid iteration when cuda_api_permitted is false
|
||||
- Do not add explicit MRs from application to MR cache
|
||||
- Move peer bufpool to endpoint
|
||||
- Move RDMA check functions to efa_rdm_ep.h
|
||||
- Add detailed logging of device enumeration and selection
|
||||
- Extend domain ops to open CQ with external memory
|
||||
- Extend domain ops to allow querying of QP and CQ attributes
|
||||
- Add a domain operation to query address info
|
||||
- Minimize calls to efa_rdm_ep_get_peer in the CQ read path
|
||||
- Remove unused function get_first_nic_name
|
||||
- Post initial rx pkt when qp is enabled.
|
||||
- Update qp table after qp destroy
|
||||
- Track cloned rx pkts
|
||||
- Clear domain level peer lists when closing endpoints
|
||||
- Allow multiple EPs to bind to a single AV
|
||||
- Minimize calls to efa_rdm_ep_get_peer in the RMA path
|
||||
- Minimize efa_rdm_ep_get_peer calls in the atomic path
|
||||
- Add RNR test with fi_tsend
|
||||
- Minimize efa_rdm_ep_get_peer calls in the send path
|
||||
- Remove duplicate filtering on FI_EFA_IFACE
|
||||
- Update man page
|
||||
- Clear cur_device when filtering EFA devices with FI_EFA_IFACE
|
||||
- Move address handle to domain level.
|
||||
- Make 1:1 relationship between efa domain and pd
|
||||
- Delete unused efa_conn_release prototype
|
||||
- Fix unit test build warning
|
||||
- Use Mutex when removing from g_efa_domain_list
|
||||
- Add missing lock in list iteration
|
||||
- Remove ope from ope_longcts_list during release
|
||||
- Use mutex locks for FI_THREAD_COMPLETE
|
||||
- Do not try to initialize non-EFA devices
|
||||
- Only initialize devices that match FI_EFA_IFACE filter
|
||||
- Rename g_efa_device_list and g_efa_device_cnt
|
||||
- Remove efa_device->device_idx
|
||||
- Initialize EFA environment variables before devices
|
||||
- Add lock around g_efa_domain_list
|
||||
- Improve debug logging
|
||||
- Do copy for inline send
|
||||
- Enforce FI_MR_LOCAL in efa-direct
|
||||
- Add lock to ensure efa direct cq poll is thread safe
|
||||
- Require FI_MR_ALLOCATED in domain_attr
|
||||
- Introduce setopt flag for homogeneous peers
|
||||
- Change FI_AV_MAP logs to info
|
||||
- Check if the util_av entry is valid before fetching
|
||||
- HOOK
|
||||
- Add monitor hook provider
|
||||
- LNX
|
||||
- Dump statistics
|
||||
- Refactor LNX to follow libfabric semantics
|
||||
- OPX
|
||||
- Cornelis Networks OPX provider upstream June 5, 2025
|
||||
- Cornelis Networks OPX provider upstream May 9, 2025
|
||||
- Cornelis Networks OPX provider upstream April 19, 2025
|
||||
- Make room for new RPC field in fid_ep
|
||||
- PSM3
|
||||
- Update psm3 to match IEFS 12.1.0.0 release
|
||||
- RXD
|
||||
- Add an environment variable to control FI_RESCAN
|
||||
- RXM
|
||||
- Fix user id case
|
||||
- Fix potential completion mismatch for the rndv_write protocol
|
||||
- Fix potential premature recv completion with rndv_write protocol
|
||||
- Fix rndv_write state machine issue that may cause premature completion
|
||||
- Fix the flow control enabling checking
|
||||
- Add an environment variable to control FI_RESCAN
|
||||
- fix auto progress
|
||||
- Add a new error code for firewall
|
||||
- SHM
|
||||
- Add shm rename and retry
|
||||
- Fix srx entry cleanup
|
||||
- Acquire ep lock when freeing entries
|
||||
- Don't run smr progress if region isn't initialized
|
||||
- Update shm man page to be accurate and fix typos/formatting
|
||||
- TCP
|
||||
- Enable keepalive during CM exchange
|
||||
- Add FI_GET_FD support to tcp provider
|
||||
- Add firewall flags into verify flag set
|
||||
- Fix memory out of bound access in RDM
|
||||
- Add a new error code for firewall
|
||||
- Add firewall support for RDM
|
||||
- Process EPOLLERR properly
|
||||
- Handle the error correctly
|
||||
- UTIL
|
||||
- Add fi_mon_sampler
|
||||
- poll: Add `kqueue` support
|
||||
- Move/Rename fi_thread_level() to header
|
||||
- ofi_util.h: Make util_prov::info non const
|
||||
- Only allocate MR key when needed
|
||||
- bufpool: Add new flag to skip zeroing new memory allocations
|
||||
- USNIC
|
||||
- Fix memory leak in usd_ib_get_devlist()
|
||||
- Verbs
|
||||
- Avoid holding vrb_info_mutex when reloading interfaces
|
||||
- Use a separate lock for provider initialization
|
||||
- Register vrb_info_mutex in util_prov
|
||||
- Code clean up.
|
||||
- Optimize init info.
|
||||
- Enable logging of ibv_async_events
|
||||
- Fix unprotected access to vrb_util_prov.info
|
||||
- Fix memory leak of fi_info structure in vrb_domain()
|
||||
- Fix the flow control enabling checking
|
||||
- Reload the list of interfaces on each call to fi_getinfo()
|
||||
- Use single pointer instead of double pointer
|
||||
- Always protect vrb_util_prov.info accesses with the mutex
|
||||
- Make vrb_get_verbs_info() static
|
||||
- Make a copy of the provider info in the fabric object
|
||||
- Rename vrb_init_mutex to vrb_info_mutex
|
||||
- Clarify little endian limitation for verbs atomics
|
||||
- Fix atomic work request fields
|
||||
- Memory leak for address formats different than FI_ADDR_IB_UD
|
||||
- Fabtests
|
||||
- Document limitations of fi_rma_pinpong
|
||||
- Add additional_env to ClientServerTest
|
||||
- Fix leak of dmabuf fd
|
||||
- Define ft_hmem_put_dmabuf_fd
|
||||
- efa: Make FI_EFA_IFACE tests check the Libfabric build
|
||||
- Extend efa_info_test to accept fabric argument
|
||||
- efa: Check rdma capability for both sides
|
||||
- efa: Add FI_EFA_IFACE tests
|
||||
- Add FI_EFA_IFACE tests
|
||||
- efa: Fix get_efa_device_names
|
||||
- Use yaml safe_load
|
||||
- Add MR mode and descriptor to resource mgmt test
|
||||
- Fix rdm_bw_mt unchecked fi_close return
|
||||
- Support rdm_bw_mt in runfabtests.py
|
||||
- Add rdm_bw_mt test to runfabtests on windows
|
||||
- Add threaded_tests support to runfabtests.sh
|
||||
- Add multi-threaded bandwidth test
|
||||
- Add pthread_barrier support to windows
|
||||
- Add pthread_barrier support to osx
|
||||
- efa: Do not set multinode marker for test_efa_shm_addr
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jun 12 08:44:23 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to v2.1.0
|
||||
- Core
|
||||
- man/fi_domain: Define resource mgmt unreachable EP
|
||||
- man/fi_domain: Update connectionless EP disable
|
||||
- hmem: Fix missing rocr dlopen function assignments
|
||||
- Fix data race on log_prefix
|
||||
- hmem: Define ofi_hmem_put_dmabuf_fd and add support for cuda and rocr
|
||||
- Fix a few minor man page issues
|
||||
- CXI
|
||||
- Fix ss_plugin_auth_key_priority test
|
||||
- Bump internal CXI version to support 2.1
|
||||
- Fix possible cq_open segfault
|
||||
- Fix peer CQ support
|
||||
- Added collectives logical operators
|
||||
- Fix bug in constrained LE test cases in test.sh and test_sw.sh
|
||||
- Fix unit test missing pthread initialization
|
||||
- Add FI_WAIT_YIELD EQ support
|
||||
- Make string setup of FI_CXI_CURL_LIB_PATH safe
|
||||
- Add FI_CXI_CURL_LIB_PATH #define from autoconf
|
||||
- Test CUDA with DMA buf FD recycling
|
||||
- Test ROCR with DMA buf FD recycling
|
||||
- Test ROCR with DMA buf offset
|
||||
- Integrate with ofi_hmem_put_dmabuf_fd
|
||||
- Test monitor unsubscribe
|
||||
- Fix fi_cq_strerror
|
||||
- Cxi EQ do not support wait objects
|
||||
- Fix CQ wait FD logic
|
||||
- Disable retry logic for experimental collectives
|
||||
- Ignore drop count during init
|
||||
- Remove CXI_MAP_IOVA_ALLOC flag.
|
||||
- Synchronous fi_close on collective multicast
|
||||
- Fix deferred work test
|
||||
- Depreciate FI_CXI_WEAK_FENCE
|
||||
- Update message and target ordering doc
|
||||
- Define FI_CXI_MR_TARGET_ORDERING
|
||||
- Remove FI_CXI_ENABLE_UNRESTRICTED_RO
|
||||
- Set MR relax order on EP order size
|
||||
- Fix RMA/AMO network ordering
|
||||
- Update CXI provider max order size
|
||||
- EFA
|
||||
- Do not allocate rdm_peer struct for efa-direct and dgram paths
|
||||
- Remove efa_av->ep_type in favor of efa_domain->info_type
|
||||
- Replace domain->rdm_ep with domain->info_type
|
||||
- Release matched rxe before destroying the srx rx_pool
|
||||
- Fix the error code from ibv wr API
|
||||
- Fix the clean up issue for efa_util_prov
|
||||
- Fix the cntr interface for efa-direct
|
||||
- Add unit test for efa-direct progress model
|
||||
- Fix the max_msg_size reporting for efa-direct
|
||||
- Clean up rxe map during rxe release
|
||||
- rdm: Do not claim support for FI_PROGRESS_AUTO
|
||||
- Always return efa_prov in EFA_INI
|
||||
- Do not write cq error for ope from internal operations
|
||||
- Remove unused field efa_domain->mr_mode
|
||||
- Do GDRCopy registrations only in the EFA RDM path
|
||||
- Reset g_efa_hmem_info after each test
|
||||
- Fix the unexp_pkt clean up.
|
||||
- Call efa_fork_support_enable_if_requested earlier
|
||||
- Check efa_prov_info_set_fabric_name return code
|
||||
- Clean up efa_prov_info_set_hmem_flags
|
||||
- Bug fix in the RDM path with FI_MSG_PREFIX mode
|
||||
- Rework the efa_cq unit tests
|
||||
- Improve efa_cq's completion report
|
||||
- Unit test additions and fixes for efa-direct
|
||||
- Remove incorrect usage of rdm_info->ep_attr->max_msg_size
|
||||
- Add new efa-direct fi_info objects
|
||||
- Cleanup efa_user_info
|
||||
- Add debug log for efa-direct data transfer
|
||||
- Use cuda_put_dmabuf_fd
|
||||
- Fix leak of dmabuf fd in cuda p2p probe
|
||||
- Implement FI_CONTEXT2 in EFA Direct
|
||||
- Remove x86-64 architecture check for static_assert
|
||||
- Do infinite rnr retry for base ep by default
|
||||
- Extend efa_ep interface
|
||||
- Migrate efa_dgram_ep to efa_ep
|
||||
- Adjust the logging level for unreleased rxe
|
||||
- Regulate the usage of optnames
|
||||
- Move struct efa_ep_addr to efa_base_ep
|
||||
- Remove util_av_fi_addr from efa_conn
|
||||
- Make efa_rdm_cq use efa_cq
|
||||
- Deprecate FI_AV_MAP
|
||||
- Remove inline write logic for rma inject
|
||||
- Add missing mock for wc_is_unsolicited in unit test
|
||||
- Implement the cq progress
|
||||
- Remove err_msg from efa_rdm_ep
|
||||
- Move raw addr functions
|
||||
- Move efa_rdm_cq_wc_is_unsolicited to efa_cq
|
||||
- Correct the error code for IBV_WC_RECV_RDMA_WITH_IMM
|
||||
- Add missing locks in efa_msg and efa_rma
|
||||
- Move fork handler installation to efa_domain_open
|
||||
- Detect unsolicited write recv support status on both sides
|
||||
- Add unit tests for efa_rma
|
||||
- Add tracepoints for efa_msg and efa_rma
|
||||
- Add unit tests for efa_msg
|
||||
- Add tracepoint for poll cq ope
|
||||
- Adjust the error code for flushed receive
|
||||
- LPP
|
||||
- Add check for atomics
|
||||
- OPX
|
||||
- Adding changes to resolve Coverity Scan Defects
|
||||
- Link bounce fixes
|
||||
- SDMA error handling fix
|
||||
- Fix build with GCC 15
|
||||
- Move CUDA sync attribute setting to mr registration
|
||||
- Add HMEM handle for GDRCopy in GET/PUT
|
||||
- Add newline to trace entry
|
||||
- Add debug trace messages to RMA functions
|
||||
- Disable out of order RC if TID is enabled
|
||||
- Unexpected packet processing modifications
|
||||
- Use inlined call to process_header for payloadless RZV_DATA (TID) packets
|
||||
- Run opx-format on upstream opx provider change
|
||||
- Remove reliability handshake
|
||||
- Add PR close event to Cornelis Networks internal workflow triggers
|
||||
- Use cycle timer as long as all set CPUs are same socket
|
||||
- fi_opx_addr changes as pre-context sharing and pre-CYR
|
||||
- Replace intranode hashmap with array
|
||||
- Default RTS/CTS to in-order route control
|
||||
- Write CQ entry for successful data transfer operation by default
|
||||
- Resolve OPX fi_writedata() reliability errors
|
||||
- Remove extraneous warning
|
||||
- Enable TID by default.
|
||||
- Fixed OPX trace points
|
||||
- Set route control based on packet type
|
||||
- Implement FI_MR_VIRT_ADDR in OPX
|
||||
- Use reliability timer for link bounce status check
|
||||
- Link bounce for JKR
|
||||
- Fix debug print array indexing
|
||||
- Resolve new Coverity scan defects
|
||||
- Enhanced simulation and debug support
|
||||
- Add HFI1 Direct Verbs support
|
||||
- Making pkey related failures more obvious
|
||||
- Reformat full OPX provider
|
||||
- Add .clang-format file for OPX provider
|
||||
- Identify and resolve new Coverity scan defects
|
||||
- Changing default pkey to fetch from pkey table index 0
|
||||
- Fix wrong function name for getting hmem iface.
|
||||
- Handle Cuda Managed/Unified memory
|
||||
- Fix OPX hint checking and capability setting
|
||||
- Implement fi_writedata()
|
||||
- Set rate control defaults
|
||||
- Process RZV payload immediately
|
||||
- CN5000/JKR 16B: 3B Lid changes
|
||||
- Set entropy to rx/tx pair
|
||||
- Don't send immediate data in send_rzv when send buffer is not host memory
|
||||
- Use `page_sizes[OFI_PAGE_SIZE]-1` instead of `PAGE_MASK`
|
||||
- PSM3
|
||||
- Update provider to sync with IEFS 12.0.0.0.36
|
||||
- RXM
|
||||
- Fix rxm multi recv getopt segfault
|
||||
- SHM
|
||||
- Free all pending srx entries on ep close
|
||||
- Remove prefix from map inserts
|
||||
- Fix name compare bug
|
||||
- TCP
|
||||
- Only disable ep if the failure can not be retried
|
||||
- Fix data race caused by parallel access to xnet_rdm_fid_ops
|
||||
- Fix FI_MULTI_RECV not set on error
|
||||
- Fix race in writing to xnet_ep_fi_ops
|
||||
- Util
|
||||
- Change util_av lock to genlock
|
||||
- Roundup_power_of_two remove unnecessary decrement
|
||||
- Enchance performace of roundup_power_of_two
|
||||
- Fix FI_MULTI_RECV not set on FI_ECANCELED
|
||||
- Fix flag initialization for generic receive of unexpected entry
|
||||
- Add fabric argument to pingpong test
|
||||
- Statically set uffd callbacks
|
||||
- Fix ROCR and memhooks deadlock
|
||||
- Support mem monitors with per sub ctx
|
||||
- Separate uffd and import mem monitors
|
||||
- pingpong: close mr after ep close
|
||||
- Verbs
|
||||
- Always return vrb_prov in VERBS_INI
|
||||
- Fix data race vrb_open_ep function
|
||||
- Fabtests
|
||||
- efa: Bind eq to ep in remote exits early test
|
||||
- Fix some test function prototypes
|
||||
- efa: Add remote exit early test with post recv
|
||||
- Do not require FI_TAGGED for fi_av_xfer test
|
||||
- efa: print err for recv failure
|
||||
- efa: Add fabtests for efa-direct
|
||||
- Set the min of tx/rx_mr_size
|
||||
- efa: Add remote exit early test
|
||||
- efa: Fix the rnr read cq error test for efa-direct
|
||||
- multi_ep: Support customized transfer size
|
||||
- Re-enable psm3 rdm_tagged_peek
|
||||
- Disable multi_recv
|
||||
- Run efa tests with efa fabric name
|
||||
- Add fabric argument to ClientServerTest
|
||||
- efa: add rdma check for unsolicited write recv
|
||||
- Add support for FI_CONTEXT2
|
||||
- Bugfixes for neuron
|
||||
- Corrected flags argument type in ft_sendmsg/ft_recvmsg functions
|
||||
- pytest/efa: Avoid duiplicate completion semantic for RMA test
|
||||
- pytest/efa: merge memory_type and check_rma_bw_memory_type
|
||||
- Drop patches merged upstream:
|
||||
- psm3-prevent-code-from-building-using-AVX-AVX2.patch
|
||||
- libfabric-2.0.0-gcc15-part2.patch
|
||||
- libfabric-2.0.0-gcc15.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sat May 3 11:52:34 UTC 2025 - Friedrich Haubensak <hsk17@mail.de>
|
||||
|
||||
- Add upstream patches libfabric-2.0.0-gcc15.patch and
|
||||
libfabric-2.0.0-gcc15-part2.patch to fix gcc-15 compile time
|
||||
errors
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Dec 16 08:34:01 UTC 2024 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package libfabric
|
||||
#
|
||||
# Copyright (c) 2025 SUSE LLC and contributors
|
||||
# Copyright (c) 2024 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -17,28 +17,25 @@
|
||||
|
||||
|
||||
#
|
||||
%define git_ver .0.5a13558c2
|
||||
%define git_ver .0.2ee68f6051e9
|
||||
|
||||
%ifarch aarch64 %power64 x86_64 s390x riscv64
|
||||
%if 0%{?suse_version} > 1530
|
||||
%ifarch aarch64 %power64 x86_64 s390x
|
||||
%if 0%{?suse_version} > 1530
|
||||
%define with_ucx 1
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifarch x86_64 aarch64
|
||||
%define with_efa 1
|
||||
%endif
|
||||
|
||||
Name: libfabric
|
||||
Version: 2.4.0
|
||||
Version: 2.0.0
|
||||
Release: 0
|
||||
Summary: User-space RDMA Fabric Interfaces
|
||||
License: BSD-2-Clause OR GPL-2.0-only
|
||||
Group: Development/Libraries/C and C++
|
||||
Source: %{name}-%{version}%{git_ver}.tar.bz2
|
||||
Source1: baselibs.conf
|
||||
Source100: README.md
|
||||
Patch0: libfabric-libtool.patch
|
||||
Patch1: psm3-prevent-code-from-building-using-AVX-AVX2.patch
|
||||
URL: http://www.github.com/ofiwg/libfabric
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: automake
|
||||
@@ -86,12 +83,7 @@ services, such as RDMA. This package contains the development files.
|
||||
%autosetup -p0 -n %{name}-%{version}%{git_ver}
|
||||
|
||||
%build
|
||||
%set_build_flags
|
||||
export CFLAGS="$CFLAGS -Wno-incompatible-pointer-types"
|
||||
%if 0%{?gcc_version} >= 8
|
||||
export CFLAGS="$CFLAGS --std=gnu17"
|
||||
%endif
|
||||
|
||||
export CFLAGS=-Wno-incompatible-pointer-types
|
||||
rm -f config/libtool.m4
|
||||
autoreconf -fi
|
||||
# defaults: with-dlopen and without-valgrind can be over-rode:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# This script has to be run prior to a check-in if changes were done
|
||||
# to spec and/or changes
|
||||
GIT_VER=$(grep "%define git_ver" libfabric.spec)
|
||||
VERSION=$(grep -E "^Version:" libfabric.spec)
|
||||
VERSION=$(egrep "^Version:" libfabric.spec)
|
||||
sed -i -e 's/^%define git_ver.*$/'"$GIT_VER/" -e 's/^Version:.*$/'"$VERSION/" fabtests.spec
|
||||
osc service run format_spec_file
|
||||
|
||||
|
||||
83
psm3-prevent-code-from-building-using-AVX-AVX2.patch
Normal file
83
psm3-prevent-code-from-building-using-AVX-AVX2.patch
Normal file
@@ -0,0 +1,83 @@
|
||||
commit fdb3ddfe4e372022adcc1093f7f28d49021662ce
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Mon Dec 2 09:44:02 2024 +0100
|
||||
|
||||
psm3: prevent code from building using AVX/AVX2
|
||||
|
||||
Even with a snippet detecting if the CPU is compatible, PSM3 uses multiple
|
||||
constructors which may trigger a SIGILL when the library gets loaded
|
||||
|
||||
Signed-off-by: Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
diff --git prov/psm3/configure.m4 prov/psm3/configure.m4
|
||||
index 5c8c083f7dc0..88e6871c1a9e 100644
|
||||
--- prov/psm3/configure.m4
|
||||
+++ prov/psm3/configure.m4
|
||||
@@ -125,49 +125,7 @@ AC_DEFUN([FI_PSM3_CONFIGURE],[
|
||||
],[
|
||||
psm3_happy=0
|
||||
AC_MSG_RESULT([no])
|
||||
- AC_MSG_NOTICE([psm3 requires minimum of avx instruction set to build])
|
||||
- ])
|
||||
- CFLAGS=$save_CFLAGS
|
||||
-
|
||||
- AC_MSG_CHECKING([for -mavx support])
|
||||
- save_CFLAGS=$CFLAGS
|
||||
- CFLAGS="$PSM3_STRIP_OPTFLAGS -mavx -O0"
|
||||
- AC_LINK_IFELSE(
|
||||
- [AC_LANG_PROGRAM(
|
||||
- [[#include <immintrin.h>]],
|
||||
- [[unsigned long long _a[4] = {1ULL,2ULL,3ULL,4ULL};
|
||||
- __m256i vA = _mm256_loadu_si256((__m256i *)_a);
|
||||
- __m256i vB;
|
||||
- _mm256_store_si256(&vB, vA);
|
||||
- return 0;]])
|
||||
- ],[
|
||||
- AC_MSG_RESULT([yes])
|
||||
- PSM3_ARCH_CFLAGS="-mavx"
|
||||
- PSM3_MARCH="avx"
|
||||
- ],[
|
||||
- psm3_happy=0
|
||||
- AC_MSG_RESULT([no])
|
||||
- AC_MSG_NOTICE([psm3 requires minimum of avx instruction set to build])
|
||||
- ])
|
||||
- CFLAGS=$save_CFLAGS
|
||||
-
|
||||
- AC_MSG_CHECKING([for -mavx2 support])
|
||||
- save_CFLAGS=$CFLAGS
|
||||
- CFLAGS="$PSM3_STRIP_OPTFLAGS -mavx2 -O0"
|
||||
- AC_LINK_IFELSE(
|
||||
- [AC_LANG_PROGRAM(
|
||||
- [[#include <immintrin.h>]],
|
||||
- [[unsigned long long _a[4] = {1ULL,2ULL,3ULL,4ULL};
|
||||
- __m256i vA = _mm256_loadu_si256((__m256i *)_a);
|
||||
- __m256i vB = _mm256_add_epi64(vA, vA);
|
||||
- (void)vB;
|
||||
- return 0;]])
|
||||
- ],[
|
||||
- AC_MSG_RESULT([yes])
|
||||
- PSM3_ARCH_CFLAGS="-mavx2"
|
||||
- PSM3_MARCH="avx2"
|
||||
- ],[
|
||||
- AC_MSG_RESULT([no])
|
||||
+ AC_MSG_NOTICE([psm3 requires minimum of SSE4.2 instruction set to build])
|
||||
])
|
||||
CFLAGS=$save_CFLAGS
|
||||
|
||||
diff --git prov/psm3/src/psmx3_init.c prov/psm3/src/psmx3_init.c
|
||||
index 29359d3ea348..a02c1ff8698b 100644
|
||||
--- prov/psm3/src/psmx3_init.c
|
||||
+++ prov/psm3/src/psmx3_init.c
|
||||
@@ -685,10 +685,8 @@ static int psmx3_getinfo(uint32_t api_version, const char *node,
|
||||
PSMX3_INFO(&psmx3_prov, FI_LOG_CORE,
|
||||
"CPU does not support '%s'.\n", PSM3_MARCH);
|
||||
OFI_INFO_STR(&psmx3_prov,
|
||||
- (__builtin_cpu_supports("avx2") ? "AVX2" :
|
||||
- (__builtin_cpu_supports("avx") ? "AVX" :
|
||||
- (__builtin_cpu_supports("sse4.2") ? "SSE4.2" : "unknown"))),
|
||||
- PSM3_MARCH, "CPU Supports", "PSM3 Built With");
|
||||
+ __builtin_cpu_supports("sse4.2") ? "SSE4.2" : "unknown",
|
||||
+ PSM3_MARCH, "CPU Supports", "PSM3 Built With");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user