diff --git a/zlib-power8-fate325307.patch b/zlib-power8-fate325307.patch index 1eeaacc..0584508 100644 --- a/zlib-power8-fate325307.patch +++ b/zlib-power8-fate325307.patch @@ -14,12 +14,11 @@ This is the C implementation created by Rogerio Alves create mode 100644 power8-crc/crc32_constants.h create mode 100644 power8-crc/vec_crc32.c -diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h -new file mode 100644 -index 00000000..9b26ba59 +Index: zlib-1.2.11/contrib/power8-crc/clang_workaround.h +=================================================================== --- /dev/null -+++ b/contrib/power8-crc/clang_workaround.h -@@ -0,0 +1,69 @@ ++++ zlib-1.2.11/contrib/power8-crc/clang_workaround.h +@@ -0,0 +1,82 @@ +#ifndef CLANG_WORKAROUNDS_H +#define CLANG_WORKAROUNDS_H + @@ -27,10 +26,6 @@ index 00000000..9b26ba59 + * These stubs fix clang incompatibilities with GCC builtins. + */ + -+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__) -+#error These workaround aren't big endian compatible -+#endif -+ +#ifndef __builtin_crypto_vpmsumw +#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb +#endif @@ -57,7 +52,11 @@ index 00000000..9b26ba59 +__vector unsigned long long __builtin_pack_vector (unsigned long __a, + unsigned long __b) +{ ++ #if defined(__BIG_ENDIAN__) ++ __vector unsigned long long __v = {__a, __b}; ++ #else + __vector unsigned long long __v = {__b, __a}; ++ #endif + return __v; +} + @@ -70,30 +69,42 @@ index 00000000..9b26ba59 + return __v[__o]; +} + ++#if defined(__BIG_ENDIAN__) ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) ++#else +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) ++#endif + +#else + +static inline +unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) +{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x0)[1]; ++ #else + return vec_xxpermdi(__v, __v, 0x0)[0]; ++ #endif +} + +static inline +unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) +{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x3)[1]; ++ #else + return vec_xxpermdi(__v, __v, 0x3)[0]; ++ #endif +} +#endif /* vec_xxpermdi */ + +#endif -diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h -new file mode 100644 -index 00000000..58088dcc +Index: zlib-1.2.11/contrib/power8-crc/crc32_constants.h +=================================================================== --- /dev/null -+++ b/contrib/power8-crc/crc32_constants.h ++++ zlib-1.2.11/contrib/power8-crc/crc32_constants.h @@ -0,0 +1,1206 @@ +/* +* @@ -1301,12 +1312,11 @@ index 00000000..58088dcc +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ -diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c -new file mode 100644 -index 00000000..864d04d4 +Index: zlib-1.2.11/contrib/power8-crc/vec_crc32.c +=================================================================== --- /dev/null -+++ b/contrib/power8-crc/vec_crc32.c -@@ -0,0 +1,672 @@ ++++ zlib-1.2.11/contrib/power8-crc/vec_crc32.c +@@ -0,0 +1,679 @@ +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. @@ -1349,11 +1359,13 @@ index 00000000..864d04d4 +#include "crc32_constants.h" +#endif + ++#include ++ +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#ifdef REFLECT -+static unsigned int crc32_align(unsigned int crc, unsigned char *p, ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) @@ -1361,7 +1373,7 @@ index 00000000..864d04d4 + return crc; +} +#else -+static unsigned int crc32_align(unsigned int crc, unsigned char *p, ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) @@ -1371,18 +1383,21 @@ index 00000000..864d04d4 +#endif + +static unsigned int __attribute__ ((aligned (32))) -+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len); ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); + +#ifndef CRC32_FUNCTION +#define CRC32_FUNCTION crc32_vpmsum +#endif + -+unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p, ++unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + unsigned int prealign; + unsigned int tail; + ++ /* For zlib API */ ++ if (p == NULL) return 0UL; ++ +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif @@ -1453,7 +1468,7 @@ index 00000000..864d04d4 +#endif + +static unsigned int __attribute__ ((aligned (32))) -+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) { ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { + + const __vector unsigned long long vzero = {0,0}; + const __vector unsigned long long vones = {0xffffffffffffffffUL, @@ -1494,8 +1509,8 @@ index 00000000..864d04d4 + unsigned int result = 0; + unsigned int offset; /* Constant table offset. */ + -+ long i; /* Counter. */ -+ long chunks; ++ unsigned long i; /* Counter. */ ++ unsigned long chunks; + + unsigned long block_size; + int next_block = 0; @@ -1567,7 +1582,7 @@ index 00000000..864d04d4 + /* xor in initial value */ + vdata0 = vec_xor(vdata0, vcrc); + -+ p += 128; ++ p = (char *)p + 128; + + do { + /* Checksum in blocks of MAX_SIZE. */ @@ -1635,14 +1650,14 @@ index 00000000..864d04d4 + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + -+ p += 128; ++ p = (char *)p + 128; + + /* + * main loop. We modulo schedule it such that it takes three + * iterations to complete - first iteration load, second + * iteration vpmsum, third iteration xor. + */ -+ for (i = 0; i < chunks-2; i++, p += 128) { ++ for (i = 0; i < chunks-2; i++) { + vconst1 = vec_ld(offset, vcrc_const); + offset += 16; + GROUP_ENDING_NOP; @@ -1703,6 +1718,8 @@ index 00000000..864d04d4 + long)vdata7, (__vector unsigned long long)vconst1); + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; + } + + /* First cool down*/ @@ -1809,7 +1826,7 @@ index 00000000..864d04d4 + va7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(va7, va7, va7, vperm_const); + -+ p += 128; ++ p = (char *)p + 128; + + vdata0 = vec_xor(v0, va0); + vdata1 = vec_xor(v1, va1); @@ -1979,106 +1996,16 @@ index 00000000..864d04d4 + + return result; +} - -From 615d7188509b0f16dae919d7b369f8d01db18be5 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Thu, 11 Jan 2018 17:04:38 +1100 -Subject: [PATCH 2/8] Port crc32-vpmsum (Power architecture) to zlib types/api - -Correct argument types and ensure that a buffer pointer -of 0 returns 0ULL consistent with existing crc32 functions. ---- - power8-crc/vec_crc32.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - -diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c -index 864d04d4..aa35ea5a 100644 ---- a/contrib/power8-crc/vec_crc32.c -+++ b/contrib/power8-crc/vec_crc32.c -@@ -40,11 +40,13 @@ - #include "crc32_constants.h" - #endif - -+#include "../zutil.h" -+ - #define VMX_ALIGN 16 - #define VMX_ALIGN_MASK (VMX_ALIGN-1) - - #ifdef REFLECT --static unsigned int crc32_align(unsigned int crc, unsigned char *p, -+static unsigned long crc32_align(unsigned int crc, const unsigned char *p, - unsigned long len) - { - while (len--) -@@ -52,7 +54,7 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p, - return crc; - } - #else --static unsigned int crc32_align(unsigned int crc, unsigned char *p, -+static unsigned long crc32_align(unsigned int crc, const unsigned char *p, - unsigned long len) - { - while (len--) -@@ -61,19 +63,24 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p, - } - #endif - --static unsigned int __attribute__ ((aligned (32))) --__crc32_vpmsum(unsigned int crc, void* p, unsigned long len); -+static unsigned long __attribute__ ((aligned (32))) -+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); - - #ifndef CRC32_FUNCTION - #define CRC32_FUNCTION crc32_vpmsum - #endif - --unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p, -- unsigned long len) -+unsigned long ZLIB_INTERNAL CRC32_FUNCTION( -+ unsigned long crc, -+ const unsigned char FAR *p, -+ z_size_t len) - { - unsigned int prealign; - unsigned int tail; - -+ /* For zlib API */ -+ if (p == NULL) return 0UL; -+ - #ifdef CRC_XOR - crc ^= 0xffffffff; - #endif -@@ -143,8 +150,8 @@ static const __vector unsigned long long vperm_const - #define VEC_PERM(vr, va, vb, vc) - #endif - --static unsigned int __attribute__ ((aligned (32))) --__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) { -+static unsigned long __attribute__ ((aligned (32))) -+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - - const __vector unsigned long long vzero = {0,0}; - const __vector unsigned long long vones = {0xffffffffffffffffUL, - -From c2697e9b5f8379b434ed79ab9599bab1193f1d46 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Wed, 10 Jan 2018 10:55:27 +1100 -Subject: [PATCH 3/8] Add makefile rule to build crc32_power8 objects - ---- - Makefile.in | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/Makefile.in b/Makefile.in -index 5a77949f..5b8563eb 100644 ---- a/Makefile.in -+++ b/Makefile.in +Index: zlib-1.2.11/Makefile.in +=================================================================== +--- zlib-1.2.11.orig/Makefile.in ++++ zlib-1.2.11/Makefile.in @@ -162,6 +162,9 @@ adler32.o: $(SRCDIR)adler32.c crc32.o: $(SRCDIR)crc32.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c -+crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c -+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c ++crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c ++ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c + deflate.o: $(SRCDIR)deflate.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c @@ -2087,73 +2014,55 @@ index 5a77949f..5b8563eb 100644 $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c -@mv objs/crc32.o $@ -+crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c ++crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c + -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c + -@mv objs/crc32_power8.o $@ + deflate.lo: $(SRCDIR)deflate.c -@mkdir objs 2>/dev/null || test -d objs $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c - -From a2f5adc957354d677fe25a7fc167506c436dd6e6 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Wed, 10 Jan 2018 11:01:30 +1100 -Subject: [PATCH 4/8] Runtime crc32_z optimized implementation detection - -To support runtime optimization of crc32 GNU IFUNC capabilities -allows zlib to return an optimized crc32_z function pointer that is -resolved at runtime to the existing crc32_z name and is able to be -used by existing applications. - -There are two code forms in which this can be defined; a native -attribute, and; an asm type defination which will work with older -gcc compilers. - -crc32_ifunc is added as a function that is called by the glibc -loader if the IFUNC capability exists to resolve the crc32_z -function. If the IFUNC capabilies don't exists it will otherwise -returns a function pointer on the first instigation of the crc32_z -function call. For staticly compiled code the function pointer -variant of this function is used. - -crc32_ifunc provides a point of expansion for returning other -optimized crc32 implementations for other architectures. - -DYNAMIC_CRC_TABLE/make_crc_table now occurs to the crc32_ifunc -and only if an crc32 function (crc32_big/crc32_little/ -crc32_table_lookup) that use the generated table. - -As a result lazy binding occurs (the default for glibc) on the -calling of make_crc_table occurs on the first use of crc32/crc32_z. -Compile time linker options, environment LD_BIND_NOW, hardened -compilers etc, will solve functions, i.e. the IFUNC, on symbold -initialisation to occur as the program is loaded rather -than on first use of crc32/crc32_z. The generation of this table -will be farely minor compared to the other non-lazy bindings -occuring. - -As crc32_big/crc32_little are optimized functions these are used -as a fallback to any optimized implemented (provided NO_BYFOUR -isn't defined) these will now be called directly for a crc32/ -crc32_z and as such the 'if (buf == Z_NULL) return 0UL;' needed to -be introduced to these functions. - -The table lookup implementation of crc32 is moved to -crc32_table_lookup and used a function of last resort. ---- - configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++ - crc32.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------- - 2 files changed, 109 insertions(+), 12 deletions(-) - -diff --git a/configure b/configure -index e974d1fd..0b2fe035 100755 ---- a/configure -+++ b/configure -@@ -826,6 +826,58 @@ EOF +Index: zlib-1.2.11/configure +=================================================================== +--- zlib-1.2.11.orig/configure ++++ zlib-1.2.11/configure +@@ -826,6 +826,91 @@ EOF fi fi ++# test to see if Power8+ implementation is compile time possible ++echo >> configure.log ++cat > $test.c < ++#include ++int main() ++{ ++ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); ++} ++#endif ++ ++#else ++#error No Power 8 or newer architecture, may need -mcpu=power8 ++#endif ++EOF ++ ++if tryboth $CC -c $CFLAGS $test.c; then ++ OBJC="$OBJC crc32_power8.o" ++ PIC_OBJC="$PIC_OBJC crc32_power8.lo" ++ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log ++else ++ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log ++fi ++ +# test to see if we can use a gnu indirection function to detect and load optimized code at runtime +echo >> configure.log +cat > $test.c <> configure.log echo ALL = $ALL >> configure.log -diff --git a/crc32.c b/crc32.c -index 9580440c..52e855fb 100644 ---- a/crc32.c -+++ b/crc32.c -@@ -199,13 +199,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table() +Index: zlib-1.2.11/crc32.c +=================================================================== +--- zlib-1.2.11.orig/crc32.c ++++ zlib-1.2.11/crc32.c +@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_tabl #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 /* ========================================================================= */ @@ -2237,6 +2146,19 @@ index 9580440c..52e855fb 100644 + return crc ^ 0xffffffffUL; +} + ++/* Small helper function to compare optfun against the reference table lookup ++ * return test_ref_comparision_##optfn in crc32_z_ifunc ++#include ++#define TEST_COMPARE(optfn) \ ++ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \ ++ { \ ++ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \ ++ unsigned long optcrc = optfn(crc, p, len); \ ++ assert( optcrc == crc_tbl_lookup ); \ ++ return optcrc; \ ++ } ++*/ ++ +#ifdef Z_IFUNC_ASM +unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) + __asm__ ("crc32_z"); @@ -2249,6 +2171,15 @@ index 9580440c..52e855fb 100644 + __attribute__ ((ifunc ("crc32_z_ifunc"))); +#endif + ++#if _ARCH_PWR8==1 ++unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t); ++/* for testing TEST_COMPARE(crc32_vpmsum) */ ++#ifndef __BUILTIN_CPU_SUPPORTS__ ++#include ++#include ++#endif ++#endif ++ +/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to + * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */ +#ifndef Z_IFUNC_ASM @@ -2256,13 +2187,22 @@ index 9580440c..52e855fb 100644 +#endif +unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) +{ ++#if _ARCH_PWR8==1 ++#if defined(__BUILTIN_CPU_SUPPORTS__) ++ if (__builtin_cpu_supports("arch_2_07")) ++ return crc32_vpmsum; ++#else ++ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) ++ return crc32_vpmsum; ++#endif ++#endif /* _ARCH_PWR8 */ + +/* return a function pointer for optimized arches here */ + #ifdef DYNAMIC_CRC_TABLE if (crc_table_empty) make_crc_table(); -@@ -217,22 +251,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) +@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, endian = 1; if (*((unsigned char *)(&endian))) @@ -2305,7 +2245,7 @@ index 9580440c..52e855fb 100644 /* ========================================================================= */ unsigned long ZEXPORT crc32(crc, buf, len) unsigned long crc; -@@ -271,6 +314,7 @@ local unsigned long crc32_little(crc, buf, len) +@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, bu register z_crc_t c; register const z_crc_t FAR *buf4; @@ -2313,7 +2253,7 @@ index 9580440c..52e855fb 100644 c = (z_crc_t)crc; c = ~c; while (len && ((ptrdiff_t)buf & 3)) { -@@ -311,6 +355,7 @@ local unsigned long crc32_big(crc, buf, len) +@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf, register z_crc_t c; register const z_crc_t FAR *buf4; @@ -2321,400 +2261,3 @@ index 9580440c..52e855fb 100644 c = ZSWAP32((z_crc_t)crc); c = ~c; while (len && ((ptrdiff_t)buf & 3)) { - -From 49c212a202fd1e12dc42574b228d72085cc4d1f7 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Mon, 15 Jan 2018 08:59:43 +1100 -Subject: [PATCH 5/8] Linux Power crc32_vpmsum if available - -Power Architecture is detected in the configure script and adds -optimized code to PIC_OBJC/OBJC. - -Power8 crc32 performance ------------------------- - -Test - decompressing a jdk binary: - -Before (no optimized crc32_vpmsum (disabled in crc32_z_ifunc): - -$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored - -real 0m2.972s -user 0m2.292s -sys 0m0.100s - -perf report -g --no-children: - 52.26% minigzip minigzip [.] crc32_little - 18.86% minigzip minigzip [.] inflate_fast - 4.87% minigzip [unknown] [k] 0xc000000000063748 - 4.87% minigzip libc-2.23.so [.] __memcpy_power7 - 1.56% minigzip minigzip [.] inflate - 0.96% minigzip minigzip [.] inflate_table - 0.95% minigzip libc-2.23.so [.] _IO_fwrite - 0.61% minigzip minigzip [.] inflateCodesUsed - -Using crc32_vpmsum: - -$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored - -real 0m0.895s -user 0m0.224s -sys 0m0.092s - -perf report -g --no-children: - 36.49% minigzip minigzip [.] inflate_fast - 11.60% minigzip [unknown] [k] 0xc000000000063748 - 7.93% minigzip libc-2.23.so [.] __memcpy_power7 - 3.77% minigzip minigzip [.] crc32_vpmsum - 3.70% minigzip minigzip [.] inflate_table - 2.29% minigzip minigzip [.] inflate ---- - configure | 33 +++++++++++++++++++++++++++++++++ - crc32.c | 18 ++++++++++++++++++ - 2 files changed, 51 insertions(+) - -diff --git a/configure b/configure -index 0b2fe035..650abe66 100755 ---- a/configure -+++ b/configure -@@ -826,6 +826,39 @@ EOF - fi - fi - -+# test to see if Power8+ implementation is compile time possible -+echo >> configure.log -+cat > $test.c < -+#include -+int main() -+{ -+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); -+} -+#endif -+ -+#else -+#error No Power 8 or newer architecture, may need -mcpu=power8 -+#endif -+EOF -+ -+if tryboth $CC -c $CFLAGS $test.c; then -+ OBJC="$OBJC crc32_power8.o" -+ PIC_OBJC="$PIC_OBJC crc32_power8.lo" -+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log -+else -+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log -+fi -+ - # test to see if we can use a gnu indirection function to detect and load optimized code at runtime - echo >> configure.log - cat > $test.c < -+#include -+#endif -+#endif -+ - /* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to - * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */ - #ifndef Z_IFUNC_ASM -@@ -237,6 +246,15 @@ local - #endif - unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) - { -+#if _ARCH_PWR8==1 -+#if defined(__BUILTIN_CPU_SUPPORTS__) -+ if (__builtin_cpu_supports("arch_2_07")) -+ return crc32_vpmsum; -+#else -+ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) -+ return crc32_vpmsum; -+#endif -+#endif /* _ARCH_PWR8 */ - - /* return a function pointer for optimized arches here */ - - -From 3476aa2e05deb4696c114dd3b0150f90e2a4e340 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Fri, 12 Jan 2018 15:22:09 +1100 -Subject: [PATCH 6/8] crc32: add test harness for implementers of optimized - crc32_z - ---- - crc32.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/crc32.c b/crc32.c -index b66aa520..12daa5e6 100644 ---- a/crc32.c -+++ b/crc32.c -@@ -218,6 +218,19 @@ unsigned long ZEXPORT crc32_table_lookup(crc, buf, len) - return crc ^ 0xffffffffUL; - } - -+/* Small helper function to compare optfun against the reference table lookup -+ * return test_ref_comparision_##optfn in crc32_z_ifunc -+#include -+#define TEST_COMPARE(optfn) \ -+ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \ -+ { \ -+ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \ -+ unsigned long optcrc = optfn(crc, p, len); \ -+ assert( optcrc == crc_tbl_lookup ); \ -+ return optcrc; \ -+ } -+*/ -+ - #ifdef Z_IFUNC_ASM - unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) - __asm__ ("crc32_z"); - -From 0daabafef27d7c215ed28d89cd366874a58d2573 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Tue, 6 Feb 2018 17:37:10 +1100 -Subject: [PATCH 7/8] fix: move power8-crc into contrib - ---- - Makefile.in | 8 ++++---- - 4 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/Makefile.in b/Makefile.in -index 5b8563eb..c3a43f1b 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -162,8 +162,8 @@ adler32.o: $(SRCDIR)adler32.c - crc32.o: $(SRCDIR)crc32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - --crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c -+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c -+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c - - deflate.o: $(SRCDIR)deflate.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c -@@ -215,9 +215,9 @@ crc32.lo: $(SRCDIR)crc32.c - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c - -@mv objs/crc32.o $@ - --crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c -+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c -+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c - -@mv objs/crc32_power8.o $@ - - deflate.lo: $(SRCDIR)deflate.c - -From 5d3e57b905a586a6fb6f8b847c35ba3b47a20719 Mon Sep 17 00:00:00 2001 -From: Daniel Black -Date: Thu, 8 Feb 2018 13:20:00 +1100 -Subject: [PATCH 8/8] fix: clang fixes for Big Endian on Power8 crc32 - ---- - contrib/power8-crc/clang_workaround.h | 21 +++++++++++++++++---- - contrib/power8-crc/vec_crc32.c | 33 ++++++++++++++------------------- - 2 files changed, 31 insertions(+), 23 deletions(-) - -diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h -index 9b26ba59..b5e7dae0 100644 ---- a/contrib/power8-crc/clang_workaround.h -+++ b/contrib/power8-crc/clang_workaround.h -@@ -5,10 +5,6 @@ - * These stubs fix clang incompatibilities with GCC builtins. - */ - --#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__) --#error These workaround aren't big endian compatible --#endif -- - #ifndef __builtin_crypto_vpmsumw - #define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb - #endif -@@ -35,7 +31,11 @@ static inline - __vector unsigned long long __builtin_pack_vector (unsigned long __a, - unsigned long __b) - { -+ #if defined(__BIG_ENDIAN__) -+ __vector unsigned long long __v = {__a, __b}; -+ #else - __vector unsigned long long __v = {__b, __a}; -+ #endif - return __v; - } - -@@ -48,21 +48,34 @@ unsigned long __builtin_unpack_vector (__vector unsigned long long __v, - return __v[__o]; - } - -+#if defined(__BIG_ENDIAN__) -+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) -+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) -+#else - #define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) - #define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) -+#endif - - #else - - static inline - unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) - { -+ #if defined(__BIG_ENDIAN__) -+ return vec_xxpermdi(__v, __v, 0x0)[1]; -+ #else - return vec_xxpermdi(__v, __v, 0x0)[0]; -+ #endif - } - - static inline - unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) - { -+ #if defined(__BIG_ENDIAN__) -+ return vec_xxpermdi(__v, __v, 0x3)[1]; -+ #else - return vec_xxpermdi(__v, __v, 0x3)[0]; -+ #endif - } - #endif /* vec_xxpermdi */ - -diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c -index aa35ea5a..bb2204b2 100644 ---- a/contrib/power8-crc/vec_crc32.c -+++ b/contrib/power8-crc/vec_crc32.c -@@ -40,13 +40,11 @@ - #include "crc32_constants.h" - #endif - --#include "../zutil.h" -- - #define VMX_ALIGN 16 - #define VMX_ALIGN_MASK (VMX_ALIGN-1) - - #ifdef REFLECT --static unsigned long crc32_align(unsigned int crc, const unsigned char *p, -+static unsigned int crc32_align(unsigned int crc, const unsigned char *p, - unsigned long len) - { - while (len--) -@@ -54,7 +52,7 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p, - return crc; - } - #else --static unsigned long crc32_align(unsigned int crc, const unsigned char *p, -+static unsigned int crc32_align(unsigned int crc, const unsigned char *p, - unsigned long len) - { - while (len--) -@@ -63,24 +61,19 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p, - } - #endif - --static unsigned long __attribute__ ((aligned (32))) -+static unsigned int __attribute__ ((aligned (32))) - __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); - - #ifndef CRC32_FUNCTION - #define CRC32_FUNCTION crc32_vpmsum - #endif - --unsigned long ZLIB_INTERNAL CRC32_FUNCTION( -- unsigned long crc, -- const unsigned char FAR *p, -- z_size_t len) -+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, -+ unsigned long len) - { - unsigned int prealign; - unsigned int tail; - -- /* For zlib API */ -- if (p == NULL) return 0UL; -- - #ifdef CRC_XOR - crc ^= 0xffffffff; - #endif -@@ -150,7 +143,7 @@ static const __vector unsigned long long vperm_const - #define VEC_PERM(vr, va, vb, vc) - #endif - --static unsigned long __attribute__ ((aligned (32))) -+static unsigned int __attribute__ ((aligned (32))) - __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - - const __vector unsigned long long vzero = {0,0}; -@@ -192,8 +185,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - unsigned int result = 0; - unsigned int offset; /* Constant table offset. */ - -- long i; /* Counter. */ -- long chunks; -+ unsigned long i; /* Counter. */ -+ unsigned long chunks; - - unsigned long block_size; - int next_block = 0; -@@ -265,7 +258,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - /* xor in initial value */ - vdata0 = vec_xor(vdata0, vcrc); - -- p += 128; -+ p = (char *)p + 128; - - do { - /* Checksum in blocks of MAX_SIZE. */ -@@ -333,14 +326,14 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - vdata7 = vec_ld(112, (__vector unsigned long long*) p); - VEC_PERM(vdata7, vdata7, vdata7, vperm_const); - -- p += 128; -+ p = (char *)p + 128; - - /* - * main loop. We modulo schedule it such that it takes three - * iterations to complete - first iteration load, second - * iteration vpmsum, third iteration xor. - */ -- for (i = 0; i < chunks-2; i++, p += 128) { -+ for (i = 0; i < chunks-2; i++) { - vconst1 = vec_ld(offset, vcrc_const); - offset += 16; - GROUP_ENDING_NOP; -@@ -401,6 +394,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - long)vdata7, (__vector unsigned long long)vconst1); - vdata7 = vec_ld(112, (__vector unsigned long long*) p); - VEC_PERM(vdata7, vdata7, vdata7, vperm_const); -+ -+ p = (char *)p + 128; - } - - /* First cool down*/ -@@ -507,7 +502,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { - va7 = vec_ld(112, (__vector unsigned long long*) p); - VEC_PERM(va7, va7, va7, vperm_const); - -- p += 128; -+ p = (char *)p + 128; - - vdata0 = vec_xor(v0, va0); - vdata1 = vec_xor(v1, va1); diff --git a/zlib.changes b/zlib.changes index c95dce1..312276e 100644 --- a/zlib.changes +++ b/zlib.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Tue Mar 19 14:11:48 UTC 2019 - Tomáš Chvátal + +- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576: + * zlib-power8-fate325307.patch + ------------------------------------------------------------------- Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com