diff --git a/fix_ix86_build.patch b/fix_ix86_build.patch new file mode 100644 index 0000000..d241ba7 --- /dev/null +++ b/fix_ix86_build.patch @@ -0,0 +1,99 @@ +From 843d4f63b1c64905881b4648916a4d027baa1a1c Mon Sep 17 00:00:00 2001 +From: James Bonfield +Date: Wed, 17 Aug 2022 15:27:04 +0100 +Subject: [PATCH] Improvements for intel -m32 builds. + +On this platform _mm256_extract_epi64 isn't defined, but the rest of +AVX2 is. It needs to fail auto-detection. + +Also we get unaligned accesses in the SSE4 code with tbuf due to +differing data alignment caused by 32-bit pointers instead of 64-bit. +This exposes an underlying problem of using aligned SIMD writes on +tbuf without explicitly asking for alignment. (The new code is also +sometimes a little faster.) + +See also samtools/htslib#1500 +--- + configure.ac | 3 ++- + htscodecs/rANS_static32x16pr_avx2.c | 2 +- + htscodecs/rANS_static32x16pr_avx512.c | 2 +- + htscodecs/rANS_static32x16pr_sse4.c | 2 +- + m4/ax_check_compile_flag.m4 | 6 +++++- + 5 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 4c8ad54..eb2f44e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -165,7 +165,8 @@ AX_CHECK_COMPILE_FLAG([-mavx2], [ + ]],[[ + __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256i b = _mm256_add_epi32(a, a); +- return *((char *) &b); ++ long long c = _mm256_extract_epi64(b, 0); ++ return (int) c; + ]]) + ]) + AM_CONDITIONAL([RANS_32x16_AVX2],[test "x$MAVX2" != "x"]) +diff --git a/htscodecs/rANS_static32x16pr_avx2.c b/htscodecs/rANS_static32x16pr_avx2.c +index 2dd14f1..c36f23c 100644 +--- a/htscodecs/rANS_static32x16pr_avx2.c ++++ b/htscodecs/rANS_static32x16pr_avx2.c +@@ -1123,7 +1123,7 @@ unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in, + union { + unsigned char tbuf[32][32]; + uint64_t tbuf64[32][4]; +- } u; ++ } u __attribute__((aligned(32))); + unsigned int tidx = 0; + + if (0) { +diff --git a/htscodecs/rANS_static32x16pr_avx512.c b/htscodecs/rANS_static32x16pr_avx512.c +index 0d1456f..e9cce46 100644 +--- a/htscodecs/rANS_static32x16pr_avx512.c ++++ b/htscodecs/rANS_static32x16pr_avx512.c +@@ -735,7 +735,7 @@ unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in, + union { + unsigned char tbuf[32][32]; + uint64_t tbuf64[32][4]; +- } u; ++ } u __attribute__((aligned(32))); + #else + uint32_t tbuf[32][32]; + #endif +diff --git a/htscodecs/rANS_static32x16pr_sse4.c b/htscodecs/rANS_static32x16pr_sse4.c +index fe0345f..88bcbe0 100644 +--- a/htscodecs/rANS_static32x16pr_sse4.c ++++ b/htscodecs/rANS_static32x16pr_sse4.c +@@ -1423,7 +1423,7 @@ unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in, + uint16_t *sp = (uint16_t *)ptr; + const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); + __m128i maskv = _mm_set1_epi32(mask); // set mask in all lanes +- uint8_t tbuf[32][32]; ++ uint8_t tbuf[32][32] __attribute__((aligned(32))); + int tidx = 0; + LOAD128(Rv, R); + LOAD128(Lv, l); +diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 +index bd753b3..c2bd2c6 100644 +--- a/m4/ax_check_compile_flag.m4 ++++ b/m4/ax_check_compile_flag.m4 +@@ -36,13 +36,17 @@ + + #serial 6 + ++# LOCAL modification; change AC_COMPILE_IFELSE to AC_LINK_IFELSE so ++# _mm256_extract_epi64 tests on a x86_64 running under -m32 still ++# fails. (Otherwise it compiles, but fails to link.) ++ + AC_DEFUN([AX_CHECK_COMPILE_FLAG], + [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF + AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl + AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ + ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" +- AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], ++ AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) diff --git a/htscodecs-1.2.2.tar.gz b/htscodecs-1.2.2.tar.gz deleted file mode 100644 index 7f4eb80..0000000 --- a/htscodecs-1.2.2.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d17d7a505da3fc79dcced320c132c5492b5dc78ee70d9ca8218b927070f183ca -size 3044356 diff --git a/htscodecs-1.3.0.tar.gz b/htscodecs-1.3.0.tar.gz new file mode 100644 index 0000000..4ea2597 --- /dev/null +++ b/htscodecs-1.3.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696c0affbfa7e59276e0dfc71dcba2f04b0a6db12998f30f1baf2fc21e9113f4 +size 3368716 diff --git a/htscodecs.changes b/htscodecs.changes index a84a371..bc15b95 100644 --- a/htscodecs.changes +++ b/htscodecs.changes @@ -1,3 +1,26 @@ +------------------------------------------------------------------- +Thu Sep 8 21:52:47 UTC 2022 - Stefan BrĂ¼ns + +- Update to 1.3.0: + * The primary change in this release is a new SIMD enabled + rANS codec. + There is a 32-way unrolled rANS implementation. This is accessed + using the existing rans 4x16 API with the RANS_ORDER_X32 bit set. + * Improved memory allocation via a new htscodecs_tls_alloc function. + * Some external functions have been renamed, with the old ones + still existing in a deprecated fashion. + * Improved test framework with an "entropy" tool that iterates + over all entropy encoders. + * Reworked fuzzing infrastructure. + * Small speed improvements to various rANS encoders and decoders. + * Substantial memory reduction to the name tokeniser (tok3). + * Fixed undefined behaviour in our use of _builtin_clz(). + * Fixed a few redundant #includes. + * Work around strict aliasing bugs, uncovered with gcc -O2. + * Fixed an issue with encoding data blocks close to 2GB in size. + * Fix encode error with large blocks using RANS_ORDER_STRIPE. +- Add fix_ix86_build.patch + ------------------------------------------------------------------- Wed Apr 20 19:45:48 UTC 2022 - Ferdinand Thiessen diff --git a/htscodecs.spec b/htscodecs.spec index f30b037..0f01923 100644 --- a/htscodecs.spec +++ b/htscodecs.spec @@ -18,7 +18,7 @@ %define sonum 2 Name: htscodecs -Version: 1.2.2 +Version: 1.3.0 Release: 0 Summary: C library for custom compression for CRAM and other formats License: MIT @@ -26,6 +26,9 @@ Group: Productivity/Scientific/Other URL: https://github.com/samtools/htscodecs Source0: https://github.com/samtools/htscodecs/releases/download/v%{version}/htscodecs-%{version}.tar.gz Source100: baselibs.conf +# PATCH-FIX-UPSTREAM +Patch0: https://github.com/samtools/htscodecs/commit/843d4f63b1c64905881b4648916a4d027baa1a1c.patch#/fix_ix86_build.patch +BuildRequires: autoconf BuildRequires: gcc-c++ BuildRequires: pkgconfig BuildRequires: pkgconfig(bzip2) @@ -58,9 +61,11 @@ Requires: lib%{name}%{sonum} = %{version} Header files and libraries of the samtools project for compiling against %{name}. %prep -%setup -q +%autosetup -p1 %build +# Rebuild configure script after Patch0 +autoconf %configure --disable-static %make_build @@ -76,7 +81,7 @@ rm %{buildroot}%{_libdir}/lib%{name}.la %{_libdir}/lib%{name}.so.* %files devel -%doc README.md NEWS +%doc README.md NEWS.md %{_includedir}/htscodecs %{_libdir}/lib%{name}.so