SHA256
3
0
forked from pool/zlib

Accepting request 686469 from devel:libraries:c_c++

- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576:
  * zlib-power8-fate325307.patch

OBS-URL: https://build.opensuse.org/request/show/686469
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/zlib?expand=0&rev=69
This commit is contained in:
Dominique Leuenberger 2019-03-25 08:47:11 +00:00 committed by Git OBS Bridge
commit bb5c8151d4
2 changed files with 137 additions and 588 deletions

View File

@ -14,12 +14,11 @@ This is the C implementation created by Rogerio Alves <rogealve@br.ibm.com>
create mode 100644 power8-crc/crc32_constants.h
create mode 100644 power8-crc/vec_crc32.c
diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
new file mode 100644
index 00000000..9b26ba59
Index: zlib-1.2.11/contrib/power8-crc/clang_workaround.h
===================================================================
--- /dev/null
+++ b/contrib/power8-crc/clang_workaround.h
@@ -0,0 +1,69 @@
+++ zlib-1.2.11/contrib/power8-crc/clang_workaround.h
@@ -0,0 +1,82 @@
+#ifndef CLANG_WORKAROUNDS_H
+#define CLANG_WORKAROUNDS_H
+
@ -27,10 +26,6 @@ index 00000000..9b26ba59
+ * These stubs fix clang incompatibilities with GCC builtins.
+ */
+
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
+#error These workaround aren't big endian compatible
+#endif
+
+#ifndef __builtin_crypto_vpmsumw
+#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
+#endif
@ -57,7 +52,11 @@ index 00000000..9b26ba59
+__vector unsigned long long __builtin_pack_vector (unsigned long __a,
+ unsigned long __b)
+{
+ #if defined(__BIG_ENDIAN__)
+ __vector unsigned long long __v = {__a, __b};
+ #else
+ __vector unsigned long long __v = {__b, __a};
+ #endif
+ return __v;
+}
+
@ -70,30 +69,42 @@ index 00000000..9b26ba59
+ return __v[__o];
+}
+
+#if defined(__BIG_ENDIAN__)
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
+#else
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
+#endif
+
+#else
+
+static inline
+unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
+{
+ #if defined(__BIG_ENDIAN__)
+ return vec_xxpermdi(__v, __v, 0x0)[1];
+ #else
+ return vec_xxpermdi(__v, __v, 0x0)[0];
+ #endif
+}
+
+static inline
+unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
+{
+ #if defined(__BIG_ENDIAN__)
+ return vec_xxpermdi(__v, __v, 0x3)[1];
+ #else
+ return vec_xxpermdi(__v, __v, 0x3)[0];
+ #endif
+}
+#endif /* vec_xxpermdi */
+
+#endif
diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h
new file mode 100644
index 00000000..58088dcc
Index: zlib-1.2.11/contrib/power8-crc/crc32_constants.h
===================================================================
--- /dev/null
+++ b/contrib/power8-crc/crc32_constants.h
+++ zlib-1.2.11/contrib/power8-crc/crc32_constants.h
@@ -0,0 +1,1206 @@
+/*
+*
@ -1301,12 +1312,11 @@ index 00000000..58088dcc
+#endif /* POWER8_INTRINSICS */
+
+#endif /* __ASSEMBLER__ */
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
new file mode 100644
index 00000000..864d04d4
Index: zlib-1.2.11/contrib/power8-crc/vec_crc32.c
===================================================================
--- /dev/null
+++ b/contrib/power8-crc/vec_crc32.c
@@ -0,0 +1,672 @@
+++ zlib-1.2.11/contrib/power8-crc/vec_crc32.c
@@ -0,0 +1,679 @@
+/*
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
@ -1349,11 +1359,13 @@ index 00000000..864d04d4
+#include "crc32_constants.h"
+#endif
+
+#include <stdlib.h>
+
+#define VMX_ALIGN 16
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
+
+#ifdef REFLECT
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
@ -1361,7 +1373,7 @@ index 00000000..864d04d4
+ return crc;
+}
+#else
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
@ -1371,18 +1383,21 @@ index 00000000..864d04d4
+#endif
+
+static unsigned int __attribute__ ((aligned (32)))
+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
+
+#ifndef CRC32_FUNCTION
+#define CRC32_FUNCTION crc32_vpmsum
+#endif
+
+unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
+ unsigned long len)
+{
+ unsigned int prealign;
+ unsigned int tail;
+
+ /* For zlib API */
+ if (p == NULL) return 0UL;
+
+#ifdef CRC_XOR
+ crc ^= 0xffffffff;
+#endif
@ -1453,7 +1468,7 @@ index 00000000..864d04d4
+#endif
+
+static unsigned int __attribute__ ((aligned (32)))
+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
+
+ const __vector unsigned long long vzero = {0,0};
+ const __vector unsigned long long vones = {0xffffffffffffffffUL,
@ -1494,8 +1509,8 @@ index 00000000..864d04d4
+ unsigned int result = 0;
+ unsigned int offset; /* Constant table offset. */
+
+ long i; /* Counter. */
+ long chunks;
+ unsigned long i; /* Counter. */
+ unsigned long chunks;
+
+ unsigned long block_size;
+ int next_block = 0;
@ -1567,7 +1582,7 @@ index 00000000..864d04d4
+ /* xor in initial value */
+ vdata0 = vec_xor(vdata0, vcrc);
+
+ p += 128;
+ p = (char *)p + 128;
+
+ do {
+ /* Checksum in blocks of MAX_SIZE. */
@ -1635,14 +1650,14 @@ index 00000000..864d04d4
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
+
+ p += 128;
+ p = (char *)p + 128;
+
+ /*
+ * main loop. We modulo schedule it such that it takes three
+ * iterations to complete - first iteration load, second
+ * iteration vpmsum, third iteration xor.
+ */
+ for (i = 0; i < chunks-2; i++, p += 128) {
+ for (i = 0; i < chunks-2; i++) {
+ vconst1 = vec_ld(offset, vcrc_const);
+ offset += 16;
+ GROUP_ENDING_NOP;
@ -1703,6 +1718,8 @@ index 00000000..864d04d4
+ long)vdata7, (__vector unsigned long long)vconst1);
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
+
+ p = (char *)p + 128;
+ }
+
+ /* First cool down*/
@ -1809,7 +1826,7 @@ index 00000000..864d04d4
+ va7 = vec_ld(112, (__vector unsigned long long*) p);
+ VEC_PERM(va7, va7, va7, vperm_const);
+
+ p += 128;
+ p = (char *)p + 128;
+
+ vdata0 = vec_xor(v0, va0);
+ vdata1 = vec_xor(v1, va1);
@ -1979,106 +1996,16 @@ index 00000000..864d04d4
+
+ return result;
+}
From 615d7188509b0f16dae919d7b369f8d01db18be5 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Thu, 11 Jan 2018 17:04:38 +1100
Subject: [PATCH 2/8] Port crc32-vpmsum (Power architecture) to zlib types/api
Correct argument types and ensure that a buffer pointer
of 0 returns 0ULL consistent with existing crc32 functions.
---
power8-crc/vec_crc32.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
index 864d04d4..aa35ea5a 100644
--- a/contrib/power8-crc/vec_crc32.c
+++ b/contrib/power8-crc/vec_crc32.c
@@ -40,11 +40,13 @@
#include "crc32_constants.h"
#endif
+#include "../zutil.h"
+
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#ifdef REFLECT
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
unsigned long len)
{
while (len--)
@@ -52,7 +54,7 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
return crc;
}
#else
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
unsigned long len)
{
while (len--)
@@ -61,19 +63,24 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
}
#endif
-static unsigned int __attribute__ ((aligned (32)))
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
+static unsigned long __attribute__ ((aligned (32)))
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
#ifndef CRC32_FUNCTION
#define CRC32_FUNCTION crc32_vpmsum
#endif
-unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
- unsigned long len)
+unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
+ unsigned long crc,
+ const unsigned char FAR *p,
+ z_size_t len)
{
unsigned int prealign;
unsigned int tail;
+ /* For zlib API */
+ if (p == NULL) return 0UL;
+
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
@@ -143,8 +150,8 @@ static const __vector unsigned long long vperm_const
#define VEC_PERM(vr, va, vb, vc)
#endif
-static unsigned int __attribute__ ((aligned (32)))
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
+static unsigned long __attribute__ ((aligned (32)))
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
const __vector unsigned long long vzero = {0,0};
const __vector unsigned long long vones = {0xffffffffffffffffUL,
From c2697e9b5f8379b434ed79ab9599bab1193f1d46 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Wed, 10 Jan 2018 10:55:27 +1100
Subject: [PATCH 3/8] Add makefile rule to build crc32_power8 objects
---
Makefile.in | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Makefile.in b/Makefile.in
index 5a77949f..5b8563eb 100644
--- a/Makefile.in
+++ b/Makefile.in
Index: zlib-1.2.11/Makefile.in
===================================================================
--- zlib-1.2.11.orig/Makefile.in
+++ zlib-1.2.11/Makefile.in
@@ -162,6 +162,9 @@ adler32.o: $(SRCDIR)adler32.c
crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
+crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
+
deflate.o: $(SRCDIR)deflate.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
@ -2087,73 +2014,55 @@ index 5a77949f..5b8563eb 100644
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
-@mv objs/crc32.o $@
+crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
+ -@mkdir objs 2>/dev/null || test -d objs
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
+ -@mv objs/crc32_power8.o $@
+
deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
From a2f5adc957354d677fe25a7fc167506c436dd6e6 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Wed, 10 Jan 2018 11:01:30 +1100
Subject: [PATCH 4/8] Runtime crc32_z optimized implementation detection
To support runtime optimization of crc32 GNU IFUNC capabilities
allows zlib to return an optimized crc32_z function pointer that is
resolved at runtime to the existing crc32_z name and is able to be
used by existing applications.
There are two code forms in which this can be defined; a native
attribute, and; an asm type defination which will work with older
gcc compilers.
crc32_ifunc is added as a function that is called by the glibc
loader if the IFUNC capability exists to resolve the crc32_z
function. If the IFUNC capabilies don't exists it will otherwise
returns a function pointer on the first instigation of the crc32_z
function call. For staticly compiled code the function pointer
variant of this function is used.
crc32_ifunc provides a point of expansion for returning other
optimized crc32 implementations for other architectures.
DYNAMIC_CRC_TABLE/make_crc_table now occurs to the crc32_ifunc
and only if an crc32 function (crc32_big/crc32_little/
crc32_table_lookup) that use the generated table.
As a result lazy binding occurs (the default for glibc) on the
calling of make_crc_table occurs on the first use of crc32/crc32_z.
Compile time linker options, environment LD_BIND_NOW, hardened
compilers etc, will solve functions, i.e. the IFUNC, on symbold
initialisation to occur as the program is loaded rather
than on first use of crc32/crc32_z. The generation of this table
will be farely minor compared to the other non-lazy bindings
occuring.
As crc32_big/crc32_little are optimized functions these are used
as a fallback to any optimized implemented (provided NO_BYFOUR
isn't defined) these will now be called directly for a crc32/
crc32_z and as such the 'if (buf == Z_NULL) return 0UL;' needed to
be introduced to these functions.
The table lookup implementation of crc32 is moved to
crc32_table_lookup and used a function of last resort.
---
configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++
crc32.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
2 files changed, 109 insertions(+), 12 deletions(-)
diff --git a/configure b/configure
index e974d1fd..0b2fe035 100755
--- a/configure
+++ b/configure
@@ -826,6 +826,58 @@ EOF
Index: zlib-1.2.11/configure
===================================================================
--- zlib-1.2.11.orig/configure
+++ zlib-1.2.11/configure
@@ -826,6 +826,91 @@ EOF
fi
fi
+# test to see if Power8+ implementation is compile time possible
+echo >> configure.log
+cat > $test.c <<EOF
+#if _ARCH_PWR8==1
+
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
+#error "Clang vector instructions aren't big endian compatible"
+#endif
+
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+/* good and easy */
+#else
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+int main()
+{
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+}
+#endif
+
+#else
+#error No Power 8 or newer architecture, may need -mcpu=power8
+#endif
+EOF
+
+if tryboth $CC -c $CFLAGS $test.c; then
+ OBJC="$OBJC crc32_power8.o"
+ PIC_OBJC="$PIC_OBJC crc32_power8.lo"
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
+else
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
+fi
+
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
+echo >> configure.log
+cat > $test.c <<EOF
@ -2209,11 +2118,11 @@ index e974d1fd..0b2fe035 100755
# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
diff --git a/crc32.c b/crc32.c
index 9580440c..52e855fb 100644
--- a/crc32.c
+++ b/crc32.c
@@ -199,13 +199,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
Index: zlib-1.2.11/crc32.c
===================================================================
--- zlib-1.2.11.orig/crc32.c
+++ zlib-1.2.11/crc32.c
@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_tabl
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
/* ========================================================================= */
@ -2237,6 +2146,19 @@ index 9580440c..52e855fb 100644
+ return crc ^ 0xffffffffUL;
+}
+
+/* Small helper function to compare optfun against the reference table lookup
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
+#include <assert.h>
+#define TEST_COMPARE(optfn) \
+ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
+ { \
+ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
+ unsigned long optcrc = optfn(crc, p, len); \
+ assert( optcrc == crc_tbl_lookup ); \
+ return optcrc; \
+ }
+*/
+
+#ifdef Z_IFUNC_ASM
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
+ __asm__ ("crc32_z");
@ -2249,6 +2171,15 @@ index 9580440c..52e855fb 100644
+ __attribute__ ((ifunc ("crc32_z_ifunc")));
+#endif
+
+#if _ARCH_PWR8==1
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
+/* for testing TEST_COMPARE(crc32_vpmsum) */
+#ifndef __BUILTIN_CPU_SUPPORTS__
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+#endif
+#endif
+
+/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
+ * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
+#ifndef Z_IFUNC_ASM
@ -2256,13 +2187,22 @@ index 9580440c..52e855fb 100644
+#endif
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
+{
+#if _ARCH_PWR8==1
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+ if (__builtin_cpu_supports("arch_2_07"))
+ return crc32_vpmsum;
+#else
+ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
+ return crc32_vpmsum;
+#endif
+#endif /* _ARCH_PWR8 */
+
+/* return a function pointer for optimized arches here */
+
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
@@ -217,22 +251,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf,
endian = 1;
if (*((unsigned char *)(&endian)))
@ -2305,7 +2245,7 @@ index 9580440c..52e855fb 100644
/* ========================================================================= */
unsigned long ZEXPORT crc32(crc, buf, len)
unsigned long crc;
@@ -271,6 +314,7 @@ local unsigned long crc32_little(crc, buf, len)
@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, bu
register z_crc_t c;
register const z_crc_t FAR *buf4;
@ -2313,7 +2253,7 @@ index 9580440c..52e855fb 100644
c = (z_crc_t)crc;
c = ~c;
while (len && ((ptrdiff_t)buf & 3)) {
@@ -311,6 +355,7 @@ local unsigned long crc32_big(crc, buf, len)
@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf,
register z_crc_t c;
register const z_crc_t FAR *buf4;
@ -2321,400 +2261,3 @@ index 9580440c..52e855fb 100644
c = ZSWAP32((z_crc_t)crc);
c = ~c;
while (len && ((ptrdiff_t)buf & 3)) {
From 49c212a202fd1e12dc42574b228d72085cc4d1f7 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Mon, 15 Jan 2018 08:59:43 +1100
Subject: [PATCH 5/8] Linux Power crc32_vpmsum if available
Power Architecture is detected in the configure script and adds
optimized code to PIC_OBJC/OBJC.
Power8 crc32 performance
------------------------
Test - decompressing a jdk binary:
Before (no optimized crc32_vpmsum (disabled in crc32_z_ifunc):
$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
real 0m2.972s
user 0m2.292s
sys 0m0.100s
perf report -g --no-children:
52.26% minigzip minigzip [.] crc32_little
18.86% minigzip minigzip [.] inflate_fast
4.87% minigzip [unknown] [k] 0xc000000000063748
4.87% minigzip libc-2.23.so [.] __memcpy_power7
1.56% minigzip minigzip [.] inflate
0.96% minigzip minigzip [.] inflate_table
0.95% minigzip libc-2.23.so [.] _IO_fwrite
0.61% minigzip minigzip [.] inflateCodesUsed
Using crc32_vpmsum:
$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
real 0m0.895s
user 0m0.224s
sys 0m0.092s
perf report -g --no-children:
36.49% minigzip minigzip [.] inflate_fast
11.60% minigzip [unknown] [k] 0xc000000000063748
7.93% minigzip libc-2.23.so [.] __memcpy_power7
3.77% minigzip minigzip [.] crc32_vpmsum
3.70% minigzip minigzip [.] inflate_table
2.29% minigzip minigzip [.] inflate
---
configure | 33 +++++++++++++++++++++++++++++++++
crc32.c | 18 ++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/configure b/configure
index 0b2fe035..650abe66 100755
--- a/configure
+++ b/configure
@@ -826,6 +826,39 @@ EOF
fi
fi
+# test to see if Power8+ implementation is compile time possible
+echo >> configure.log
+cat > $test.c <<EOF
+#if _ARCH_PWR8==1
+
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
+#error "Clang vector instructions aren't big endian compatible"
+#endif
+
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+/* good and easy */
+#else
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+int main()
+{
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+}
+#endif
+
+#else
+#error No Power 8 or newer architecture, may need -mcpu=power8
+#endif
+EOF
+
+if tryboth $CC -c $CFLAGS $test.c; then
+ OBJC="$OBJC crc32_power8.o"
+ PIC_OBJC="$PIC_OBJC crc32_power8.lo"
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
+else
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
+fi
+
# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
echo >> configure.log
cat > $test.c <<EOF
diff --git a/crc32.c b/crc32.c
index 52e855fb..b66aa520 100644
--- a/crc32.c
+++ b/crc32.c
@@ -230,6 +230,15 @@ unsigned long ZEXPORT crc32_z(
__attribute__ ((ifunc ("crc32_z_ifunc")));
#endif
+#if _ARCH_PWR8==1
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
+/* for testing TEST_COMPARE(crc32_vpmsum) */
+#ifndef __BUILTIN_CPU_SUPPORTS__
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+#endif
+#endif
+
/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
* crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
#ifndef Z_IFUNC_ASM
@@ -237,6 +246,15 @@ local
#endif
unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
{
+#if _ARCH_PWR8==1
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+ if (__builtin_cpu_supports("arch_2_07"))
+ return crc32_vpmsum;
+#else
+ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
+ return crc32_vpmsum;
+#endif
+#endif /* _ARCH_PWR8 */
/* return a function pointer for optimized arches here */
From 3476aa2e05deb4696c114dd3b0150f90e2a4e340 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Fri, 12 Jan 2018 15:22:09 +1100
Subject: [PATCH 6/8] crc32: add test harness for implementers of optimized
crc32_z
---
crc32.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/crc32.c b/crc32.c
index b66aa520..12daa5e6 100644
--- a/crc32.c
+++ b/crc32.c
@@ -218,6 +218,19 @@ unsigned long ZEXPORT crc32_table_lookup(crc, buf, len)
return crc ^ 0xffffffffUL;
}
+/* Small helper function to compare optfun against the reference table lookup
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
+#include <assert.h>
+#define TEST_COMPARE(optfn) \
+ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
+ { \
+ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
+ unsigned long optcrc = optfn(crc, p, len); \
+ assert( optcrc == crc_tbl_lookup ); \
+ return optcrc; \
+ }
+*/
+
#ifdef Z_IFUNC_ASM
unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
__asm__ ("crc32_z");
From 0daabafef27d7c215ed28d89cd366874a58d2573 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Tue, 6 Feb 2018 17:37:10 +1100
Subject: [PATCH 7/8] fix: move power8-crc into contrib
---
Makefile.in | 8 ++++----
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Makefile.in b/Makefile.in
index 5b8563eb..c3a43f1b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -162,8 +162,8 @@ adler32.o: $(SRCDIR)adler32.c
crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
-crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
deflate.o: $(SRCDIR)deflate.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
@@ -215,9 +215,9 @@ crc32.lo: $(SRCDIR)crc32.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
-@mv objs/crc32.o $@
-crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
-@mkdir objs 2>/dev/null || test -d objs
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
-@mv objs/crc32_power8.o $@
deflate.lo: $(SRCDIR)deflate.c
From 5d3e57b905a586a6fb6f8b847c35ba3b47a20719 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 13:20:00 +1100
Subject: [PATCH 8/8] fix: clang fixes for Big Endian on Power8 crc32
---
contrib/power8-crc/clang_workaround.h | 21 +++++++++++++++++----
contrib/power8-crc/vec_crc32.c | 33 ++++++++++++++-------------------
2 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
index 9b26ba59..b5e7dae0 100644
--- a/contrib/power8-crc/clang_workaround.h
+++ b/contrib/power8-crc/clang_workaround.h
@@ -5,10 +5,6 @@
* These stubs fix clang incompatibilities with GCC builtins.
*/
-#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
-#error These workaround aren't big endian compatible
-#endif
-
#ifndef __builtin_crypto_vpmsumw
#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
#endif
@@ -35,7 +31,11 @@ static inline
__vector unsigned long long __builtin_pack_vector (unsigned long __a,
unsigned long __b)
{
+ #if defined(__BIG_ENDIAN__)
+ __vector unsigned long long __v = {__a, __b};
+ #else
__vector unsigned long long __v = {__b, __a};
+ #endif
return __v;
}
@@ -48,21 +48,34 @@ unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
return __v[__o];
}
+#if defined(__BIG_ENDIAN__)
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
+#else
#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
+#endif
#else
static inline
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
{
+ #if defined(__BIG_ENDIAN__)
+ return vec_xxpermdi(__v, __v, 0x0)[1];
+ #else
return vec_xxpermdi(__v, __v, 0x0)[0];
+ #endif
}
static inline
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
{
+ #if defined(__BIG_ENDIAN__)
+ return vec_xxpermdi(__v, __v, 0x3)[1];
+ #else
return vec_xxpermdi(__v, __v, 0x3)[0];
+ #endif
}
#endif /* vec_xxpermdi */
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
index aa35ea5a..bb2204b2 100644
--- a/contrib/power8-crc/vec_crc32.c
+++ b/contrib/power8-crc/vec_crc32.c
@@ -40,13 +40,11 @@
#include "crc32_constants.h"
#endif
-#include "../zutil.h"
-
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#ifdef REFLECT
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
unsigned long len)
{
while (len--)
@@ -54,7 +52,7 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
return crc;
}
#else
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
unsigned long len)
{
while (len--)
@@ -63,24 +61,19 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
}
#endif
-static unsigned long __attribute__ ((aligned (32)))
+static unsigned int __attribute__ ((aligned (32)))
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
#ifndef CRC32_FUNCTION
#define CRC32_FUNCTION crc32_vpmsum
#endif
-unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
- unsigned long crc,
- const unsigned char FAR *p,
- z_size_t len)
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
+ unsigned long len)
{
unsigned int prealign;
unsigned int tail;
- /* For zlib API */
- if (p == NULL) return 0UL;
-
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
@@ -150,7 +143,7 @@ static const __vector unsigned long long vperm_const
#define VEC_PERM(vr, va, vb, vc)
#endif
-static unsigned long __attribute__ ((aligned (32)))
+static unsigned int __attribute__ ((aligned (32)))
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
const __vector unsigned long long vzero = {0,0};
@@ -192,8 +185,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
unsigned int result = 0;
unsigned int offset; /* Constant table offset. */
- long i; /* Counter. */
- long chunks;
+ unsigned long i; /* Counter. */
+ unsigned long chunks;
unsigned long block_size;
int next_block = 0;
@@ -265,7 +258,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
/* xor in initial value */
vdata0 = vec_xor(vdata0, vcrc);
- p += 128;
+ p = (char *)p + 128;
do {
/* Checksum in blocks of MAX_SIZE. */
@@ -333,14 +326,14 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
vdata7 = vec_ld(112, (__vector unsigned long long*) p);
VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
- p += 128;
+ p = (char *)p + 128;
/*
* main loop. We modulo schedule it such that it takes three
* iterations to complete - first iteration load, second
* iteration vpmsum, third iteration xor.
*/
- for (i = 0; i < chunks-2; i++, p += 128) {
+ for (i = 0; i < chunks-2; i++) {
vconst1 = vec_ld(offset, vcrc_const);
offset += 16;
GROUP_ENDING_NOP;
@@ -401,6 +394,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
long)vdata7, (__vector unsigned long long)vconst1);
vdata7 = vec_ld(112, (__vector unsigned long long*) p);
VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
+
+ p = (char *)p + 128;
}
/* First cool down*/
@@ -507,7 +502,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
va7 = vec_ld(112, (__vector unsigned long long*) p);
VEC_PERM(va7, va7, va7, vperm_const);
- p += 128;
+ p = (char *)p + 128;
vdata0 = vec_xor(v0, va0);
vdata1 = vec_xor(v1, va1);

View File

@ -1,3 +1,9 @@
-------------------------------------------------------------------
Tue Mar 19 14:11:48 UTC 2019 - Tomáš Chvátal <tchvatal@suse.com>
- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576:
* zlib-power8-fate325307.patch
-------------------------------------------------------------------
Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com