From 292c9efb9edf1e5cd53fdfe9c905c077c6e40e3350673f9de5846bad86bef227 Mon Sep 17 00:00:00 2001 From: Dirk Stoecker Date: Fri, 12 May 2023 09:50:10 +0000 Subject: [PATCH] Accepting request 1084765 from home:amanzini:branches:science - Update to 1.5.0: * Significant speed ups to the fqzcomp codec via code restructuring and use of memory prefetch instructions. Encode is 30-40% faster and decode 5-8% faster. * Remove unused ax_with_libdeflate.m4 file from build system - removed patch fix_ix86_build.patch already merged in upstream - Update to 1.4.0: * This is almost entirely minor bug fixing with a few small updates. * Optimise compression / speed of the name tokeniser * Improvements for Intel -m32 builds, including better AVX2 validation * Detect Neon capability at runtime via operating system APIs. * Update hts_pack to operate in line with CRAMcodecs spec, where the number of symbols > 16. * Fixed too-stringent buffer overflow checking in O1 rans decoder. OBS-URL: https://build.opensuse.org/request/show/1084765 OBS-URL: https://build.opensuse.org/package/show/science/htscodecs?expand=0&rev=5 --- fix_ix86_build.patch | 99 ------------------------------------------ htscodecs-1.3.0.tar.gz | 3 -- htscodecs-1.5.0.tar.gz | 3 ++ htscodecs.changes | 20 +++++++++ htscodecs.spec | 8 +--- 5 files changed, 25 insertions(+), 108 deletions(-) delete mode 100644 fix_ix86_build.patch delete mode 100644 htscodecs-1.3.0.tar.gz create mode 100644 htscodecs-1.5.0.tar.gz diff --git a/fix_ix86_build.patch b/fix_ix86_build.patch deleted file mode 100644 index d241ba7..0000000 --- a/fix_ix86_build.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 843d4f63b1c64905881b4648916a4d027baa1a1c Mon Sep 17 00:00:00 2001 -From: James Bonfield -Date: Wed, 17 Aug 2022 15:27:04 +0100 -Subject: [PATCH] Improvements for intel -m32 builds. - -On this platform _mm256_extract_epi64 isn't defined, but the rest of -AVX2 is. It needs to fail auto-detection. - -Also we get unaligned accesses in the SSE4 code with tbuf due to -differing data alignment caused by 32-bit pointers instead of 64-bit. -This exposes an underlying problem of using aligned SIMD writes on -tbuf without explicitly asking for alignment. (The new code is also -sometimes a little faster.) - -See also samtools/htslib#1500 ---- - configure.ac | 3 ++- - htscodecs/rANS_static32x16pr_avx2.c | 2 +- - htscodecs/rANS_static32x16pr_avx512.c | 2 +- - htscodecs/rANS_static32x16pr_sse4.c | 2 +- - m4/ax_check_compile_flag.m4 | 6 +++++- - 5 files changed, 10 insertions(+), 5 deletions(-) - -diff --git a/configure.ac b/configure.ac -index 4c8ad54..eb2f44e 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -165,7 +165,8 @@ AX_CHECK_COMPILE_FLAG([-mavx2], [ - ]],[[ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); -- return *((char *) &b); -+ long long c = _mm256_extract_epi64(b, 0); -+ return (int) c; - ]]) - ]) - AM_CONDITIONAL([RANS_32x16_AVX2],[test "x$MAVX2" != "x"]) -diff --git a/htscodecs/rANS_static32x16pr_avx2.c b/htscodecs/rANS_static32x16pr_avx2.c -index 2dd14f1..c36f23c 100644 ---- a/htscodecs/rANS_static32x16pr_avx2.c -+++ b/htscodecs/rANS_static32x16pr_avx2.c -@@ -1123,7 +1123,7 @@ unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in, - union { - unsigned char tbuf[32][32]; - uint64_t tbuf64[32][4]; -- } u; -+ } u __attribute__((aligned(32))); - unsigned int tidx = 0; - - if (0) { -diff --git a/htscodecs/rANS_static32x16pr_avx512.c b/htscodecs/rANS_static32x16pr_avx512.c -index 0d1456f..e9cce46 100644 ---- a/htscodecs/rANS_static32x16pr_avx512.c -+++ b/htscodecs/rANS_static32x16pr_avx512.c -@@ -735,7 +735,7 @@ unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in, - union { - unsigned char tbuf[32][32]; - uint64_t tbuf64[32][4]; -- } u; -+ } u __attribute__((aligned(32))); - #else - uint32_t tbuf[32][32]; - #endif -diff --git a/htscodecs/rANS_static32x16pr_sse4.c b/htscodecs/rANS_static32x16pr_sse4.c -index fe0345f..88bcbe0 100644 ---- a/htscodecs/rANS_static32x16pr_sse4.c -+++ b/htscodecs/rANS_static32x16pr_sse4.c -@@ -1423,7 +1423,7 @@ unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in, - uint16_t *sp = (uint16_t *)ptr; - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - __m128i maskv = _mm_set1_epi32(mask); // set mask in all lanes -- uint8_t tbuf[32][32]; -+ uint8_t tbuf[32][32] __attribute__((aligned(32))); - int tidx = 0; - LOAD128(Rv, R); - LOAD128(Lv, l); -diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 -index bd753b3..c2bd2c6 100644 ---- a/m4/ax_check_compile_flag.m4 -+++ b/m4/ax_check_compile_flag.m4 -@@ -36,13 +36,17 @@ - - #serial 6 - -+# LOCAL modification; change AC_COMPILE_IFELSE to AC_LINK_IFELSE so -+# _mm256_extract_epi64 tests on a x86_64 running under -m32 still -+# fails. (Otherwise it compiles, but fails to link.) -+ - AC_DEFUN([AX_CHECK_COMPILE_FLAG], - [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF - AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl - AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ - ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" -- AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], -+ AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[yes])], - [AS_VAR_SET(CACHEVAR,[no])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) diff --git a/htscodecs-1.3.0.tar.gz b/htscodecs-1.3.0.tar.gz deleted file mode 100644 index 4ea2597..0000000 --- a/htscodecs-1.3.0.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:696c0affbfa7e59276e0dfc71dcba2f04b0a6db12998f30f1baf2fc21e9113f4 -size 3368716 diff --git a/htscodecs-1.5.0.tar.gz b/htscodecs-1.5.0.tar.gz new file mode 100644 index 0000000..96b4060 --- /dev/null +++ b/htscodecs-1.5.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34def7341d01a374583d61c9012a80f6b40af078b11bfcaacba1a2412e34dab3 +size 3498542 diff --git a/htscodecs.changes b/htscodecs.changes index bc15b95..1f5665b 100644 --- a/htscodecs.changes +++ b/htscodecs.changes @@ -1,3 +1,23 @@ +------------------------------------------------------------------- +Thu May 4 14:47:16 UTC 2023 - Andrea Manzini + +- Update to 1.5.0: + * Significant speed ups to the fqzcomp codec via code restructuring + and use of memory prefetch instructions. Encode is 30-40% faster + and decode 5-8% faster. + * Remove unused ax_with_libdeflate.m4 file from build system + +- removed patch fix_ix86_build.patch already merged in upstream + +- Update to 1.4.0: + * This is almost entirely minor bug fixing with a few small updates. + * Optimise compression / speed of the name tokeniser + * Improvements for Intel -m32 builds, including better AVX2 validation + * Detect Neon capability at runtime via operating system APIs. + * Update hts_pack to operate in line with CRAMcodecs spec, where the number of symbols > 16. + * Fixed too-stringent buffer overflow checking in O1 rans decoder. + + ------------------------------------------------------------------- Thu Sep 8 21:52:47 UTC 2022 - Stefan BrĂ¼ns diff --git a/htscodecs.spec b/htscodecs.spec index 0f01923..d18cb91 100644 --- a/htscodecs.spec +++ b/htscodecs.spec @@ -1,7 +1,7 @@ # # spec file for package htscodecs # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %define sonum 2 Name: htscodecs -Version: 1.3.0 +Version: 1.5.0 Release: 0 Summary: C library for custom compression for CRAM and other formats License: MIT @@ -26,8 +26,6 @@ Group: Productivity/Scientific/Other URL: https://github.com/samtools/htscodecs Source0: https://github.com/samtools/htscodecs/releases/download/v%{version}/htscodecs-%{version}.tar.gz Source100: baselibs.conf -# PATCH-FIX-UPSTREAM -Patch0: https://github.com/samtools/htscodecs/commit/843d4f63b1c64905881b4648916a4d027baa1a1c.patch#/fix_ix86_build.patch BuildRequires: autoconf BuildRequires: gcc-c++ BuildRequires: pkgconfig @@ -64,8 +62,6 @@ Header files and libraries of the samtools project for compiling against %{name} %autosetup -p1 %build -# Rebuild configure script after Patch0 -autoconf %configure --disable-static %make_build