SHA256
1
0
forked from pool/htscodecs

Accepting request 1084765 from home:amanzini:branches:science

- Update to 1.5.0:
  * Significant speed ups to the fqzcomp codec via code restructuring
    and use of memory prefetch instructions. Encode is 30-40% faster
    and decode 5-8% faster.
  * Remove unused ax_with_libdeflate.m4 file from build system
- removed patch fix_ix86_build.patch already merged in upstream
- Update to 1.4.0:
  * This is almost entirely minor bug fixing with a few small updates.
  * Optimise compression / speed of the name tokeniser
  * Improvements for Intel -m32 builds, including better AVX2 validation
  * Detect Neon capability at runtime via operating system APIs.
  * Update hts_pack to operate in line with CRAMcodecs spec, where the number of symbols > 16.
  * Fixed too-stringent buffer overflow checking in O1 rans decoder.

OBS-URL: https://build.opensuse.org/request/show/1084765
OBS-URL: https://build.opensuse.org/package/show/science/htscodecs?expand=0&rev=5
This commit is contained in:
Dirk Stoecker 2023-05-12 09:50:10 +00:00 committed by Git OBS Bridge
parent f85a2cd77e
commit edd8dd578a
5 changed files with 25 additions and 108 deletions

View File

@ -1,99 +0,0 @@
From 843d4f63b1c64905881b4648916a4d027baa1a1c Mon Sep 17 00:00:00 2001
From: James Bonfield <jkb@sanger.ac.uk>
Date: Wed, 17 Aug 2022 15:27:04 +0100
Subject: [PATCH] Improvements for intel -m32 builds.
On this platform _mm256_extract_epi64 isn't defined, but the rest of
AVX2 is. It needs to fail auto-detection.
Also we get unaligned accesses in the SSE4 code with tbuf due to
differing data alignment caused by 32-bit pointers instead of 64-bit.
This exposes an underlying problem of using aligned SIMD writes on
tbuf without explicitly asking for alignment. (The new code is also
sometimes a little faster.)
See also samtools/htslib#1500
---
configure.ac | 3 ++-
htscodecs/rANS_static32x16pr_avx2.c | 2 +-
htscodecs/rANS_static32x16pr_avx512.c | 2 +-
htscodecs/rANS_static32x16pr_sse4.c | 2 +-
m4/ax_check_compile_flag.m4 | 6 +++++-
5 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/configure.ac b/configure.ac
index 4c8ad54..eb2f44e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -165,7 +165,8 @@ AX_CHECK_COMPILE_FLAG([-mavx2], [
]],[[
__m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
__m256i b = _mm256_add_epi32(a, a);
- return *((char *) &b);
+ long long c = _mm256_extract_epi64(b, 0);
+ return (int) c;
]])
])
AM_CONDITIONAL([RANS_32x16_AVX2],[test "x$MAVX2" != "x"])
diff --git a/htscodecs/rANS_static32x16pr_avx2.c b/htscodecs/rANS_static32x16pr_avx2.c
index 2dd14f1..c36f23c 100644
--- a/htscodecs/rANS_static32x16pr_avx2.c
+++ b/htscodecs/rANS_static32x16pr_avx2.c
@@ -1123,7 +1123,7 @@ unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in,
union {
unsigned char tbuf[32][32];
uint64_t tbuf64[32][4];
- } u;
+ } u __attribute__((aligned(32)));
unsigned int tidx = 0;
if (0) {
diff --git a/htscodecs/rANS_static32x16pr_avx512.c b/htscodecs/rANS_static32x16pr_avx512.c
index 0d1456f..e9cce46 100644
--- a/htscodecs/rANS_static32x16pr_avx512.c
+++ b/htscodecs/rANS_static32x16pr_avx512.c
@@ -735,7 +735,7 @@ unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in,
union {
unsigned char tbuf[32][32];
uint64_t tbuf64[32][4];
- } u;
+ } u __attribute__((aligned(32)));
#else
uint32_t tbuf[32][32];
#endif
diff --git a/htscodecs/rANS_static32x16pr_sse4.c b/htscodecs/rANS_static32x16pr_sse4.c
index fe0345f..88bcbe0 100644
--- a/htscodecs/rANS_static32x16pr_sse4.c
+++ b/htscodecs/rANS_static32x16pr_sse4.c
@@ -1423,7 +1423,7 @@ unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in,
uint16_t *sp = (uint16_t *)ptr;
const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1);
__m128i maskv = _mm_set1_epi32(mask); // set mask in all lanes
- uint8_t tbuf[32][32];
+ uint8_t tbuf[32][32] __attribute__((aligned(32)));
int tidx = 0;
LOAD128(Rv, R);
LOAD128(Lv, l);
diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4
index bd753b3..c2bd2c6 100644
--- a/m4/ax_check_compile_flag.m4
+++ b/m4/ax_check_compile_flag.m4
@@ -36,13 +36,17 @@
#serial 6
+# LOCAL modification; change AC_COMPILE_IFELSE to AC_LINK_IFELSE so
+# _mm256_extract_epi64 tests on a x86_64 running under -m32 still
+# fails. (Otherwise it compiles, but fails to link.)
+
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
- AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
+ AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:696c0affbfa7e59276e0dfc71dcba2f04b0a6db12998f30f1baf2fc21e9113f4
size 3368716

3
htscodecs-1.5.0.tar.gz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:34def7341d01a374583d61c9012a80f6b40af078b11bfcaacba1a2412e34dab3
size 3498542

View File

@ -1,3 +1,23 @@
-------------------------------------------------------------------
Thu May 4 14:47:16 UTC 2023 - Andrea Manzini <andrea.manzini@suse.com>
- Update to 1.5.0:
* Significant speed ups to the fqzcomp codec via code restructuring
and use of memory prefetch instructions. Encode is 30-40% faster
and decode 5-8% faster.
* Remove unused ax_with_libdeflate.m4 file from build system
- removed patch fix_ix86_build.patch already merged in upstream
- Update to 1.4.0:
* This is almost entirely minor bug fixing with a few small updates.
* Optimise compression / speed of the name tokeniser
* Improvements for Intel -m32 builds, including better AVX2 validation
* Detect Neon capability at runtime via operating system APIs.
* Update hts_pack to operate in line with CRAMcodecs spec, where the number of symbols > 16.
* Fixed too-stringent buffer overflow checking in O1 rans decoder.
-------------------------------------------------------------------
Thu Sep 8 21:52:47 UTC 2022 - Stefan Brüns <stefan.bruens@rwth-aachen.de>

View File

@ -1,7 +1,7 @@
#
# spec file for package htscodecs
#
# Copyright (c) 2022 SUSE LLC
# Copyright (c) 2023 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@ -18,7 +18,7 @@
%define sonum 2
Name: htscodecs
Version: 1.3.0
Version: 1.5.0
Release: 0
Summary: C library for custom compression for CRAM and other formats
License: MIT
@ -26,8 +26,6 @@ Group: Productivity/Scientific/Other
URL: https://github.com/samtools/htscodecs
Source0: https://github.com/samtools/htscodecs/releases/download/v%{version}/htscodecs-%{version}.tar.gz
Source100: baselibs.conf
# PATCH-FIX-UPSTREAM
Patch0: https://github.com/samtools/htscodecs/commit/843d4f63b1c64905881b4648916a4d027baa1a1c.patch#/fix_ix86_build.patch
BuildRequires: autoconf
BuildRequires: gcc-c++
BuildRequires: pkgconfig
@ -64,8 +62,6 @@ Header files and libraries of the samtools project for compiling against %{name}
%autosetup -p1
%build
# Rebuild configure script after Patch0
autoconf
%configure --disable-static
%make_build