Accepting request 686469 from devel:libraries:c_c++

- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576: * zlib-power8-fate325307.patch OBS-URL: https://build.opensuse.org/request/show/686469 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/zlib?expand=0&rev=69
2019-03-25 08:47:11 +00:00 · 2019-03-25 08:47:11 +00:00 · bb5c8151d4
commit bb5c8151d4
parent 84d27fe56d 67a5099b2d
2 changed files with 137 additions and 588 deletions
--- a/zlib-power8-fate325307.patch
+++ b/zlib-power8-fate325307.patch
@ -14,12 +14,11 @@ This is the C implementation created by Rogerio Alves <rogealve@br.ibm.com>
 create mode 100644 power8-crc/crc32_constants.h
 create mode 100644 power8-crc/vec_crc32.c

-diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
-new file mode 100644
-index 00000000..9b26ba59
+Index: zlib-1.2.11/contrib/power8-crc/clang_workaround.h
+===================================================================
 --- /dev/null
-+++ b/contrib/power8-crc/clang_workaround.h
-@@ -0,0 +1,69 @@
+++ zlib-1.2.11/contrib/power8-crc/clang_workaround.h
+@@ -0,0 +1,82 @@
 +#ifndef CLANG_WORKAROUNDS_H
 +#define CLANG_WORKAROUNDS_H
 +
@ -27,10 +26,6 @@ index 00000000..9b26ba59
 + * These stubs fix clang incompatibilities with GCC builtins.
 + */
 +
-+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
-+#error These workaround aren't big endian compatible
-+#endif
-+
 +#ifndef __builtin_crypto_vpmsumw
 +#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
 +#endif
@ -57,7 +52,11 @@ index 00000000..9b26ba59
 +__vector unsigned long long  __builtin_pack_vector (unsigned long __a,
 +						    unsigned long __b)
 +{
+	#if defined(__BIG_ENDIAN__)
+	__vector unsigned long long __v = {__a, __b};
+	#else
 +	__vector unsigned long long __v = {__b, __a};
+	#endif
 +	return __v;
 +}
 +
@ -70,30 +69,42 @@ index 00000000..9b26ba59
 +	return __v[__o];
 +}
 +
+#if defined(__BIG_ENDIAN__)
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
+#else
 +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
 +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
+#endif
 +
 +#else
 +
 +static inline
 +unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
 +{
+	#if defined(__BIG_ENDIAN__)
+	return vec_xxpermdi(__v, __v, 0x0)[1];
+	#else
 +	return vec_xxpermdi(__v, __v, 0x0)[0];
+	#endif
 +}
 +
 +static inline
 +unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
 +{
+	#if defined(__BIG_ENDIAN__)
+	return vec_xxpermdi(__v, __v, 0x3)[1];
+	#else
 +	return vec_xxpermdi(__v, __v, 0x3)[0];
+	#endif
 +}
 +#endif /* vec_xxpermdi */
 +
 +#endif
-diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h
-new file mode 100644
-index 00000000..58088dcc
+Index: zlib-1.2.11/contrib/power8-crc/crc32_constants.h
+===================================================================
 --- /dev/null
-+++ b/contrib/power8-crc/crc32_constants.h
+++ zlib-1.2.11/contrib/power8-crc/crc32_constants.h
@@ -0,0 +1,1206 @@
 +/*
 +*
@ -1301,12 +1312,11 @@ index 00000000..58088dcc
 +#endif /* POWER8_INTRINSICS */
 +
 +#endif /* __ASSEMBLER__ */
-diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
-new file mode 100644
-index 00000000..864d04d4
+Index: zlib-1.2.11/contrib/power8-crc/vec_crc32.c
+===================================================================
 --- /dev/null
-+++ b/contrib/power8-crc/vec_crc32.c
-@@ -0,0 +1,672 @@
+++ zlib-1.2.11/contrib/power8-crc/vec_crc32.c
+@@ -0,0 +1,679 @@
 +/*
 + * Calculate the checksum of data that is 16 byte aligned and a multiple of
 + * 16 bytes.
@ -1349,11 +1359,13 @@ index 00000000..864d04d4
 +#include "crc32_constants.h"
 +#endif
 +
+#include <stdlib.h>
+
 +#define VMX_ALIGN	16
 +#define VMX_ALIGN_MASK	(VMX_ALIGN-1)
 +
 +#ifdef REFLECT
-+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
 +			       unsigned long len)
 +{
 +	while (len--)
@ -1361,7 +1373,7 @@ index 00000000..864d04d4
 +	return crc;
 +}
 +#else
-+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
 +				unsigned long len)
 +{
 +	while (len--)
@ -1371,18 +1383,21 @@ index 00000000..864d04d4
 +#endif
 +
 +static unsigned int __attribute__ ((aligned (32)))
-+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
 +
 +#ifndef CRC32_FUNCTION
 +#define CRC32_FUNCTION  crc32_vpmsum
 +#endif
 +
-+unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
 +			    unsigned long len)
 +{
 +	unsigned int prealign;
 +	unsigned int tail;
 +
+	/* For zlib API */
+	if (p == NULL) return 0UL;
+
 +#ifdef CRC_XOR
 +	crc ^= 0xffffffff;
 +#endif
@ -1453,7 +1468,7 @@ index 00000000..864d04d4
 +#endif
 +
 +static unsigned int __attribute__ ((aligned (32)))
-+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
 +
 +	const __vector unsigned long long vzero = {0,0};
 +	const __vector unsigned long long vones = {0xffffffffffffffffUL,
@ -1494,8 +1509,8 @@ index 00000000..864d04d4
 +	unsigned int result = 0;
 +	unsigned int offset; /* Constant table offset. */
 +
-+	long i; /* Counter. */
-+	long chunks;
+	unsigned long i; /* Counter. */
+	unsigned long chunks;
 +
 +	unsigned long block_size;
 +	int next_block = 0;
@ -1567,7 +1582,7 @@ index 00000000..864d04d4
 +		/* xor in initial value */
 +		vdata0 = vec_xor(vdata0, vcrc);
 +
-+		p += 128;
+		p = (char *)p + 128;
 +
 +		do {
 +			/* Checksum in blocks of MAX_SIZE. */
@ -1635,14 +1650,14 @@ index 00000000..864d04d4
 +				vdata7 = vec_ld(112, (__vector unsigned long long*) p);
 +				VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
 +
-+				p += 128;
+				p = (char *)p + 128;
 +
 +				/*
 +				 * main loop. We modulo schedule it such that it takes three
 +				 * iterations to complete - first iteration load, second
 +				 * iteration vpmsum, third iteration xor.
 +				 */
-+				for (i = 0; i < chunks-2; i++, p += 128) {
+				for (i = 0; i < chunks-2; i++) {
 +					vconst1 = vec_ld(offset, vcrc_const);
 +					offset += 16;
 +					GROUP_ENDING_NOP;
@ -1703,6 +1718,8 @@ index 00000000..864d04d4
 +							long)vdata7, (__vector unsigned long long)vconst1);
 +					vdata7 = vec_ld(112, (__vector unsigned long long*) p);
 +					VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
+
+					p = (char *)p + 128;
 +				}
 +
 +				/* First cool down*/
@ -1809,7 +1826,7 @@ index 00000000..864d04d4
 +			va7 = vec_ld(112, (__vector unsigned long long*) p);
 +			VEC_PERM(va7, va7, va7, vperm_const);
 +
-+			p += 128;
+			p = (char *)p + 128;
 +
 +			vdata0 = vec_xor(v0, va0);
 +			vdata1 = vec_xor(v1, va1);
@ -1979,106 +1996,16 @@ index 00000000..864d04d4
 +
 +	return result;
 +}
-
-From 615d7188509b0f16dae919d7b369f8d01db18be5 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Thu, 11 Jan 2018 17:04:38 +1100
-Subject: [PATCH 2/8] Port crc32-vpmsum (Power architecture) to zlib types/api
-
-Correct argument types and ensure that a buffer pointer
-of 0 returns 0ULL consistent with existing crc32 functions.
---
- power8-crc/vec_crc32.c | 23 +++++++++++++++--------
- 1 file changed, 15 insertions(+), 8 deletions(-)
-
-diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
-index 864d04d4..aa35ea5a 100644
--- a/contrib/power8-crc/vec_crc32.c
-+++ b/contrib/power8-crc/vec_crc32.c
-@@ -40,11 +40,13 @@
- #include "crc32_constants.h"
- #endif
- 
-+#include "../zutil.h"
-+
- #define VMX_ALIGN	16
- #define VMX_ALIGN_MASK	(VMX_ALIGN-1)
- 
- #ifdef REFLECT
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
-+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
- 			       unsigned long len)
- {
- 	while (len--)
-@@ -52,7 +54,7 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- 	return crc;
- }
- #else
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
-+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
- 				unsigned long len)
- {
- 	while (len--)
-@@ -61,19 +63,24 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- }
- #endif
- 
-static unsigned int __attribute__ ((aligned (32)))
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
-+static unsigned long __attribute__ ((aligned (32)))
-+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
- 
- #ifndef CRC32_FUNCTION
- #define CRC32_FUNCTION  crc32_vpmsum
- #endif
- 
-unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
-			    unsigned long len)
-+unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
-+    unsigned long crc,
-+    const unsigned char FAR *p,
-+    z_size_t len)
- {
- 	unsigned int prealign;
- 	unsigned int tail;
- 
-+	/* For zlib API */
-+	if (p == NULL) return 0UL;
-+
- #ifdef CRC_XOR
- 	crc ^= 0xffffffff;
- #endif
-@@ -143,8 +150,8 @@ static const __vector unsigned long long vperm_const
- #define VEC_PERM(vr, va, vb, vc)
- #endif
- 
-static unsigned int __attribute__ ((aligned (32)))
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
-+static unsigned long __attribute__ ((aligned (32)))
-+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 
- 	const __vector unsigned long long vzero = {0,0};
- 	const __vector unsigned long long vones = {0xffffffffffffffffUL,
-
-From c2697e9b5f8379b434ed79ab9599bab1193f1d46 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Wed, 10 Jan 2018 10:55:27 +1100
-Subject: [PATCH 3/8] Add makefile rule to build crc32_power8 objects
-
---
- Makefile.in | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/Makefile.in b/Makefile.in
-index 5a77949f..5b8563eb 100644
--- a/Makefile.in
-+++ b/Makefile.in
+Index: zlib-1.2.11/Makefile.in
+===================================================================
+--- zlib-1.2.11.orig/Makefile.in
+++ zlib-1.2.11/Makefile.in
@@ -162,6 +162,9 @@ adler32.o: $(SRCDIR)adler32.c
 crc32.o: $(SRCDIR)crc32.c
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
 
-+crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
-+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
 +
 deflate.o: $(SRCDIR)deflate.c
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
@ -2087,73 +2014,55 @@ index 5a77949f..5b8563eb 100644
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
 	-@mv objs/crc32.o $@
 
-+crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
 +	-@mkdir objs 2>/dev/null || test -d objs
-+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
 +	-@mv objs/crc32_power8.o $@
 +
 deflate.lo: $(SRCDIR)deflate.c
 	-@mkdir objs 2>/dev/null || test -d objs
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
-
-From a2f5adc957354d677fe25a7fc167506c436dd6e6 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Wed, 10 Jan 2018 11:01:30 +1100
-Subject: [PATCH 4/8] Runtime crc32_z optimized implementation detection
-
-To support runtime optimization of crc32 GNU IFUNC capabilities
-allows zlib to return an optimized crc32_z function pointer that is
-resolved at runtime to the existing crc32_z name and is able to be
-used by existing applications.
-
-There are two code forms in which this can be defined; a native
-attribute, and; an asm type defination which will work with older
-gcc compilers.
-
-crc32_ifunc is added as a function that is called by the glibc
-loader if the IFUNC capability exists to resolve the crc32_z
-function. If the IFUNC capabilies don't exists it will otherwise
-returns a function pointer on the first instigation of the crc32_z
-function call. For staticly compiled code the function pointer
-variant of this function is used.
-
-crc32_ifunc provides a point of expansion for returning other
-optimized crc32 implementations for other architectures.
-
-DYNAMIC_CRC_TABLE/make_crc_table now occurs to the crc32_ifunc
-and only if an crc32 function (crc32_big/crc32_little/
-crc32_table_lookup) that use the generated table.
-
-As a result lazy binding occurs (the default for glibc) on the
-calling of make_crc_table occurs on the first use of crc32/crc32_z.
-Compile time linker options, environment LD_BIND_NOW, hardened
-compilers etc, will solve functions, i.e. the IFUNC, on symbold
-initialisation to occur as the program is loaded rather
-than on first use of crc32/crc32_z. The generation of this table
-will be farely minor compared to the other non-lazy bindings
-occuring.
-
-As crc32_big/crc32_little are optimized functions these are used
-as a fallback to any optimized implemented (provided NO_BYFOUR
-isn't defined) these will now be called directly for a crc32/
-crc32_z and as such the 'if (buf == Z_NULL) return 0UL;' needed to
-be introduced to these functions.
-
-The table lookup implementation of crc32 is moved to
-crc32_table_lookup and used a function of last resort.
---
- configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++
- crc32.c   | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
- 2 files changed, 109 insertions(+), 12 deletions(-)
-
-diff --git a/configure b/configure
-index e974d1fd..0b2fe035 100755
--- a/configure
-+++ b/configure
-@@ -826,6 +826,58 @@ EOF
+Index: zlib-1.2.11/configure
+===================================================================
+--- zlib-1.2.11.orig/configure
+++ zlib-1.2.11/configure
+@@ -826,6 +826,91 @@ EOF
   fi
 fi
 
+# test to see if Power8+ implementation is compile time possible
+echo >> configure.log
+cat > $test.c <<EOF
+#if _ARCH_PWR8==1
+
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
+#error "Clang vector instructions aren't big endian compatible"
+#endif
+
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+/* good and easy */
+#else
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+int main()
+{
+  return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+}
+#endif
+
+#else
+#error No Power 8 or newer architecture, may need -mcpu=power8
+#endif
+EOF
+
+if tryboth $CC -c $CFLAGS $test.c; then
+  OBJC="$OBJC crc32_power8.o"
+  PIC_OBJC="$PIC_OBJC crc32_power8.lo"
+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
+else
+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
+fi
+
 +# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
 +echo >> configure.log
 +cat > $test.c <<EOF
@ -2209,11 +2118,11 @@ index e974d1fd..0b2fe035 100755
 # show the results in the log
 echo >> configure.log
 echo ALL = $ALL >> configure.log
-diff --git a/crc32.c b/crc32.c
-index 9580440c..52e855fb 100644
--- a/crc32.c
-+++ b/crc32.c
-@@ -199,13 +199,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
+Index: zlib-1.2.11/crc32.c
+===================================================================
+--- zlib-1.2.11.orig/crc32.c
+++ zlib-1.2.11/crc32.c
+@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_tabl
 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
 
 /* ========================================================================= */
@ -2237,6 +2146,19 @@ index 9580440c..52e855fb 100644
 +    return crc ^ 0xffffffffUL;
 +}
 +
+/* Small helper function to compare optfun against the reference table lookup
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
+#include <assert.h>
+#define TEST_COMPARE(optfn) \
+   static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
+   { \
+     unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
+     unsigned long optcrc = optfn(crc, p, len); \
+     assert( optcrc == crc_tbl_lookup ); \
+     return optcrc; \
+   }
+*/
+
 +#ifdef Z_IFUNC_ASM
 +unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
 +    __asm__ ("crc32_z");
@ -2249,6 +2171,15 @@ index 9580440c..52e855fb 100644
 +  __attribute__ ((ifunc ("crc32_z_ifunc")));
 +#endif
 +
+#if _ARCH_PWR8==1
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
+/* for testing TEST_COMPARE(crc32_vpmsum) */
+#ifndef __BUILTIN_CPU_SUPPORTS__
+#include <sys/auxv.h>
+#include <bits/hwcap.h>
+#endif
+#endif
+
 +/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
 + * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
 +#ifndef Z_IFUNC_ASM
@ -2256,13 +2187,22 @@ index 9580440c..52e855fb 100644
 +#endif
 +unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
 +{
+#if _ARCH_PWR8==1
+#if defined(__BUILTIN_CPU_SUPPORTS__)
+    if (__builtin_cpu_supports("arch_2_07"))
+        return crc32_vpmsum;
+#else
+    if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
+        return crc32_vpmsum;
+#endif
+#endif /* _ARCH_PWR8 */
 +
 +/* return a function pointer for optimized arches here */
 +
 #ifdef DYNAMIC_CRC_TABLE
     if (crc_table_empty)
         make_crc_table();
-@@ -217,22 +251,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
+@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf,
 
         endian = 1;
         if (*((unsigned char *)(&endian)))
@ -2305,7 +2245,7 @@ index 9580440c..52e855fb 100644
 /* ========================================================================= */
 unsigned long ZEXPORT crc32(crc, buf, len)
     unsigned long crc;
-@@ -271,6 +314,7 @@ local unsigned long crc32_little(crc, buf, len)
+@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, bu
     register z_crc_t c;
     register const z_crc_t FAR *buf4;
 
@ -2313,7 +2253,7 @@ index 9580440c..52e855fb 100644
     c = (z_crc_t)crc;
     c = ~c;
     while (len && ((ptrdiff_t)buf & 3)) {
-@@ -311,6 +355,7 @@ local unsigned long crc32_big(crc, buf, len)
+@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf,
     register z_crc_t c;
     register const z_crc_t FAR *buf4;
 
@ -2321,400 +2261,3 @@ index 9580440c..52e855fb 100644
     c = ZSWAP32((z_crc_t)crc);
     c = ~c;
     while (len && ((ptrdiff_t)buf & 3)) {
-
-From 49c212a202fd1e12dc42574b228d72085cc4d1f7 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Mon, 15 Jan 2018 08:59:43 +1100
-Subject: [PATCH 5/8] Linux Power crc32_vpmsum if available
-
-Power Architecture is detected in the configure script and adds
-optimized code to PIC_OBJC/OBJC.
-
-Power8 crc32 performance
------------------------
-
-Test - decompressing a jdk binary:
-
-Before (no optimized crc32_vpmsum (disabled in crc32_z_ifunc):
-
-$ time ./minigzip -d -c  ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
-
-real    0m2.972s
-user    0m2.292s
-sys     0m0.100s
-
-perf report -g --no-children:
-   52.26%  minigzip  minigzip          [.] crc32_little
-   18.86%  minigzip  minigzip          [.] inflate_fast
-    4.87%  minigzip  [unknown]         [k] 0xc000000000063748
-    4.87%  minigzip  libc-2.23.so      [.] __memcpy_power7
-    1.56%  minigzip  minigzip          [.] inflate
-    0.96%  minigzip  minigzip          [.] inflate_table
-    0.95%  minigzip  libc-2.23.so      [.] _IO_fwrite
-    0.61%  minigzip  minigzip          [.] inflateCodesUsed
-
-Using crc32_vpmsum:
-
-$ time ./minigzip -d -c  ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
-
-real    0m0.895s
-user    0m0.224s
-sys     0m0.092s
-
-perf report -g --no-children:
-   36.49%  minigzip  minigzip          [.] inflate_fast
-   11.60%  minigzip  [unknown]         [k] 0xc000000000063748
-    7.93%  minigzip  libc-2.23.so      [.] __memcpy_power7
-    3.77%  minigzip  minigzip          [.] crc32_vpmsum
-    3.70%  minigzip  minigzip          [.] inflate_table
-    2.29%  minigzip  minigzip          [.] inflate
---
- configure | 33 +++++++++++++++++++++++++++++++++
- crc32.c   | 18 ++++++++++++++++++
- 2 files changed, 51 insertions(+)
-
-diff --git a/configure b/configure
-index 0b2fe035..650abe66 100755
--- a/configure
-+++ b/configure
-@@ -826,6 +826,39 @@ EOF
-   fi
- fi
- 
-+# test to see if Power8+ implementation is compile time possible
-+echo >> configure.log
-+cat > $test.c <<EOF
-+#if _ARCH_PWR8==1
-+
-+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
-+#error "Clang vector instructions aren't big endian compatible"
-+#endif
-+
-+#if defined(__BUILTIN_CPU_SUPPORTS__)
-+/* good and easy */
-+#else
-+#include <sys/auxv.h>
-+#include <bits/hwcap.h>
-+int main()
-+{
-+  return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
-+}
-+#endif
-+
-+#else
-+#error No Power 8 or newer architecture, may need -mcpu=power8
-+#endif
-+EOF
-+
-+if tryboth $CC -c $CFLAGS $test.c; then
-+  OBJC="$OBJC crc32_power8.o"
-+  PIC_OBJC="$PIC_OBJC crc32_power8.lo"
-+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
-+else
-+  echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
-+fi
-+
- # test to see if we can use a gnu indirection function to detect and load optimized code at runtime
- echo >> configure.log
- cat > $test.c <<EOF
-diff --git a/crc32.c b/crc32.c
-index 52e855fb..b66aa520 100644
--- a/crc32.c
-+++ b/crc32.c
-@@ -230,6 +230,15 @@ unsigned long ZEXPORT crc32_z(
-   __attribute__ ((ifunc ("crc32_z_ifunc")));
- #endif
- 
-+#if _ARCH_PWR8==1
-+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
-+/* for testing TEST_COMPARE(crc32_vpmsum) */
-+#ifndef __BUILTIN_CPU_SUPPORTS__
-+#include <sys/auxv.h>
-+#include <bits/hwcap.h>
-+#endif
-+#endif
-+
- /* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
-  * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
- #ifndef Z_IFUNC_ASM
-@@ -237,6 +246,15 @@ local
- #endif
- unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
- {
-+#if _ARCH_PWR8==1
-+#if defined(__BUILTIN_CPU_SUPPORTS__)
-+    if (__builtin_cpu_supports("arch_2_07"))
-+        return crc32_vpmsum;
-+#else
-+    if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
-+        return crc32_vpmsum;
-+#endif
-+#endif /* _ARCH_PWR8 */
- 
- /* return a function pointer for optimized arches here */
- 
-
-From 3476aa2e05deb4696c114dd3b0150f90e2a4e340 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Fri, 12 Jan 2018 15:22:09 +1100
-Subject: [PATCH 6/8] crc32: add test harness for implementers of optimized
- crc32_z
-
---
- crc32.c | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/crc32.c b/crc32.c
-index b66aa520..12daa5e6 100644
--- a/crc32.c
-+++ b/crc32.c
-@@ -218,6 +218,19 @@ unsigned long ZEXPORT crc32_table_lookup(crc, buf, len)
-     return crc ^ 0xffffffffUL;
- }
- 
-+/* Small helper function to compare optfun against the reference table lookup
-+ * return test_ref_comparision_##optfn in crc32_z_ifunc
-+#include <assert.h>
-+#define TEST_COMPARE(optfn) \
-+   static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
-+   { \
-+     unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
-+     unsigned long optcrc = optfn(crc, p, len); \
-+     assert( optcrc == crc_tbl_lookup ); \
-+     return optcrc; \
-+   }
-+*/
-+
- #ifdef Z_IFUNC_ASM
- unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
-     __asm__ ("crc32_z");
-
-From 0daabafef27d7c215ed28d89cd366874a58d2573 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Tue, 6 Feb 2018 17:37:10 +1100
-Subject: [PATCH 7/8] fix: move power8-crc into contrib
-
---
- Makefile.in                                           | 8 ++++----
- 4 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/Makefile.in b/Makefile.in
-index 5b8563eb..c3a43f1b 100644
--- a/Makefile.in
-+++ b/Makefile.in
-@@ -162,8 +162,8 @@ adler32.o: $(SRCDIR)adler32.c
- crc32.o: $(SRCDIR)crc32.c
- 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
- 
-crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
-	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
-+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
-+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
- 
- deflate.o: $(SRCDIR)deflate.c
- 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
-@@ -215,9 +215,9 @@ crc32.lo: $(SRCDIR)crc32.c
- 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
- 	-@mv objs/crc32.o $@
- 
-crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
-+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
- 	-@mkdir objs 2>/dev/null || test -d objs
-	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
-+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
- 	-@mv objs/crc32_power8.o $@
- 
- deflate.lo: $(SRCDIR)deflate.c
-
-From 5d3e57b905a586a6fb6f8b847c35ba3b47a20719 Mon Sep 17 00:00:00 2001
-From: Daniel Black <daniel@linux.vnet.ibm.com>
-Date: Thu, 8 Feb 2018 13:20:00 +1100
-Subject: [PATCH 8/8] fix: clang fixes for Big Endian on Power8 crc32
-
---
- contrib/power8-crc/clang_workaround.h | 21 +++++++++++++++++----
- contrib/power8-crc/vec_crc32.c        | 33 ++++++++++++++-------------------
- 2 files changed, 31 insertions(+), 23 deletions(-)
-
-diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
-index 9b26ba59..b5e7dae0 100644
--- a/contrib/power8-crc/clang_workaround.h
-+++ b/contrib/power8-crc/clang_workaround.h
-@@ -5,10 +5,6 @@
-  * These stubs fix clang incompatibilities with GCC builtins.
-  */
- 
-#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
-#error These workaround aren't big endian compatible
-#endif
-
- #ifndef __builtin_crypto_vpmsumw
- #define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
- #endif
-@@ -35,7 +31,11 @@ static inline
- __vector unsigned long long  __builtin_pack_vector (unsigned long __a,
- 						    unsigned long __b)
- {
-+	#if defined(__BIG_ENDIAN__)
-+	__vector unsigned long long __v = {__a, __b};
-+	#else
- 	__vector unsigned long long __v = {__b, __a};
-+	#endif
- 	return __v;
- }
- 
-@@ -48,21 +48,34 @@ unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
- 	return __v[__o];
- }
- 
-+#if defined(__BIG_ENDIAN__)
-+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
-+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
-+#else
- #define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
- #define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
-+#endif
- 
- #else
- 
- static inline
- unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
- {
-+	#if defined(__BIG_ENDIAN__)
-+	return vec_xxpermdi(__v, __v, 0x0)[1];
-+	#else
- 	return vec_xxpermdi(__v, __v, 0x0)[0];
-+	#endif
- }
- 
- static inline
- unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
- {
-+	#if defined(__BIG_ENDIAN__)
-+	return vec_xxpermdi(__v, __v, 0x3)[1];
-+	#else
- 	return vec_xxpermdi(__v, __v, 0x3)[0];
-+	#endif
- }
- #endif /* vec_xxpermdi */
- 
-diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
-index aa35ea5a..bb2204b2 100644
--- a/contrib/power8-crc/vec_crc32.c
-+++ b/contrib/power8-crc/vec_crc32.c
-@@ -40,13 +40,11 @@
- #include "crc32_constants.h"
- #endif
- 
-#include "../zutil.h"
-
- #define VMX_ALIGN	16
- #define VMX_ALIGN_MASK	(VMX_ALIGN-1)
- 
- #ifdef REFLECT
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
-+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
- 			       unsigned long len)
- {
- 	while (len--)
-@@ -54,7 +52,7 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
- 	return crc;
- }
- #else
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
-+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
- 				unsigned long len)
- {
- 	while (len--)
-@@ -63,24 +61,19 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
- }
- #endif
- 
-static unsigned long __attribute__ ((aligned (32)))
-+static unsigned int __attribute__ ((aligned (32)))
- __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
- 
- #ifndef CRC32_FUNCTION
- #define CRC32_FUNCTION  crc32_vpmsum
- #endif
- 
-unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
-    unsigned long crc,
-    const unsigned char FAR *p,
-    z_size_t len)
-+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
-+			    unsigned long len)
- {
- 	unsigned int prealign;
- 	unsigned int tail;
- 
-	/* For zlib API */
-	if (p == NULL) return 0UL;
-
- #ifdef CRC_XOR
- 	crc ^= 0xffffffff;
- #endif
-@@ -150,7 +143,7 @@ static const __vector unsigned long long vperm_const
- #define VEC_PERM(vr, va, vb, vc)
- #endif
- 
-static unsigned long __attribute__ ((aligned (32)))
-+static unsigned int __attribute__ ((aligned (32)))
- __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 
- 	const __vector unsigned long long vzero = {0,0};
-@@ -192,8 +185,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 	unsigned int result = 0;
- 	unsigned int offset; /* Constant table offset. */
- 
-	long i; /* Counter. */
-	long chunks;
-+	unsigned long i; /* Counter. */
-+	unsigned long chunks;
- 
- 	unsigned long block_size;
- 	int next_block = 0;
-@@ -265,7 +258,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 		/* xor in initial value */
- 		vdata0 = vec_xor(vdata0, vcrc);
- 
-		p += 128;
-+		p = (char *)p + 128;
- 
- 		do {
- 			/* Checksum in blocks of MAX_SIZE. */
-@@ -333,14 +326,14 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 				vdata7 = vec_ld(112, (__vector unsigned long long*) p);
- 				VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
- 
-				p += 128;
-+				p = (char *)p + 128;
- 
- 				/*
- 				 * main loop. We modulo schedule it such that it takes three
- 				 * iterations to complete - first iteration load, second
- 				 * iteration vpmsum, third iteration xor.
- 				 */
-				for (i = 0; i < chunks-2; i++, p += 128) {
-+				for (i = 0; i < chunks-2; i++) {
- 					vconst1 = vec_ld(offset, vcrc_const);
- 					offset += 16;
- 					GROUP_ENDING_NOP;
-@@ -401,6 +394,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 							long)vdata7, (__vector unsigned long long)vconst1);
- 					vdata7 = vec_ld(112, (__vector unsigned long long*) p);
- 					VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
-+
-+					p = (char *)p + 128;
- 				}
- 
- 				/* First cool down*/
-@@ -507,7 +502,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
- 			va7 = vec_ld(112, (__vector unsigned long long*) p);
- 			VEC_PERM(va7, va7, va7, vperm_const);
- 
-			p += 128;
-+			p = (char *)p + 128;
- 
- 			vdata0 = vec_xor(v0, va0);
- 			vdata1 = vec_xor(v1, va1);
--- a/zlib.changes
+++ b/zlib.changes
@ -1,3 +1,9 @@
+-------------------------------------------------------------------
+Tue Mar 19 14:11:48 UTC 2019 - Tomáš Chvátal <tchvatal@suse.com>
+
+- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576:
+  * zlib-power8-fate325307.patch
+
 -------------------------------------------------------------------
 Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com