- Try to safely abort if we get NULL ptr bsc#1110304:
* zlib-power8-fate325307.patch OBS-URL: https://build.opensuse.org/package/show/devel:libraries:c_c++/zlib?expand=0&rev=48
This commit is contained in:
parent
9c41df0cbb
commit
3685577878
@ -14,12 +14,11 @@ This is the C implementation created by Rogerio Alves <rogealve@br.ibm.com>
|
|||||||
create mode 100644 power8-crc/crc32_constants.h
|
create mode 100644 power8-crc/crc32_constants.h
|
||||||
create mode 100644 power8-crc/vec_crc32.c
|
create mode 100644 power8-crc/vec_crc32.c
|
||||||
|
|
||||||
diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
|
Index: zlib-1.2.11/contrib/power8-crc/clang_workaround.h
|
||||||
new file mode 100644
|
===================================================================
|
||||||
index 00000000..9b26ba59
|
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/contrib/power8-crc/clang_workaround.h
|
+++ zlib-1.2.11/contrib/power8-crc/clang_workaround.h
|
||||||
@@ -0,0 +1,69 @@
|
@@ -0,0 +1,82 @@
|
||||||
+#ifndef CLANG_WORKAROUNDS_H
|
+#ifndef CLANG_WORKAROUNDS_H
|
||||||
+#define CLANG_WORKAROUNDS_H
|
+#define CLANG_WORKAROUNDS_H
|
||||||
+
|
+
|
||||||
@ -27,10 +26,6 @@ index 00000000..9b26ba59
|
|||||||
+ * These stubs fix clang incompatibilities with GCC builtins.
|
+ * These stubs fix clang incompatibilities with GCC builtins.
|
||||||
+ */
|
+ */
|
||||||
+
|
+
|
||||||
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
|
|
||||||
+#error These workaround aren't big endian compatible
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifndef __builtin_crypto_vpmsumw
|
+#ifndef __builtin_crypto_vpmsumw
|
||||||
+#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
|
+#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
|
||||||
+#endif
|
+#endif
|
||||||
@ -57,7 +52,11 @@ index 00000000..9b26ba59
|
|||||||
+__vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
+__vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
||||||
+ unsigned long __b)
|
+ unsigned long __b)
|
||||||
+{
|
+{
|
||||||
|
+ #if defined(__BIG_ENDIAN__)
|
||||||
|
+ __vector unsigned long long __v = {__a, __b};
|
||||||
|
+ #else
|
||||||
+ __vector unsigned long long __v = {__b, __a};
|
+ __vector unsigned long long __v = {__b, __a};
|
||||||
|
+ #endif
|
||||||
+ return __v;
|
+ return __v;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
@ -70,30 +69,42 @@ index 00000000..9b26ba59
|
|||||||
+ return __v[__o];
|
+ return __v[__o];
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
|
+#if defined(__BIG_ENDIAN__)
|
||||||
|
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
|
||||||
|
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
|
||||||
|
+#else
|
||||||
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
|
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
|
||||||
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
|
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
|
||||||
|
+#endif
|
||||||
+
|
+
|
||||||
+#else
|
+#else
|
||||||
+
|
+
|
||||||
+static inline
|
+static inline
|
||||||
+unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
+unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
||||||
+{
|
+{
|
||||||
|
+ #if defined(__BIG_ENDIAN__)
|
||||||
|
+ return vec_xxpermdi(__v, __v, 0x0)[1];
|
||||||
|
+ #else
|
||||||
+ return vec_xxpermdi(__v, __v, 0x0)[0];
|
+ return vec_xxpermdi(__v, __v, 0x0)[0];
|
||||||
|
+ #endif
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
+static inline
|
+static inline
|
||||||
+unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
+unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
||||||
+{
|
+{
|
||||||
|
+ #if defined(__BIG_ENDIAN__)
|
||||||
|
+ return vec_xxpermdi(__v, __v, 0x3)[1];
|
||||||
|
+ #else
|
||||||
+ return vec_xxpermdi(__v, __v, 0x3)[0];
|
+ return vec_xxpermdi(__v, __v, 0x3)[0];
|
||||||
|
+ #endif
|
||||||
+}
|
+}
|
||||||
+#endif /* vec_xxpermdi */
|
+#endif /* vec_xxpermdi */
|
||||||
+
|
+
|
||||||
+#endif
|
+#endif
|
||||||
diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h
|
Index: zlib-1.2.11/contrib/power8-crc/crc32_constants.h
|
||||||
new file mode 100644
|
===================================================================
|
||||||
index 00000000..58088dcc
|
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/contrib/power8-crc/crc32_constants.h
|
+++ zlib-1.2.11/contrib/power8-crc/crc32_constants.h
|
||||||
@@ -0,0 +1,1206 @@
|
@@ -0,0 +1,1206 @@
|
||||||
+/*
|
+/*
|
||||||
+*
|
+*
|
||||||
@ -1301,12 +1312,11 @@ index 00000000..58088dcc
|
|||||||
+#endif /* POWER8_INTRINSICS */
|
+#endif /* POWER8_INTRINSICS */
|
||||||
+
|
+
|
||||||
+#endif /* __ASSEMBLER__ */
|
+#endif /* __ASSEMBLER__ */
|
||||||
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
|
Index: zlib-1.2.11/contrib/power8-crc/vec_crc32.c
|
||||||
new file mode 100644
|
===================================================================
|
||||||
index 00000000..864d04d4
|
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/contrib/power8-crc/vec_crc32.c
|
+++ zlib-1.2.11/contrib/power8-crc/vec_crc32.c
|
||||||
@@ -0,0 +1,672 @@
|
@@ -0,0 +1,677 @@
|
||||||
+/*
|
+/*
|
||||||
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
|
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
|
||||||
+ * 16 bytes.
|
+ * 16 bytes.
|
||||||
@ -1353,7 +1363,7 @@ index 00000000..864d04d4
|
|||||||
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
|
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
|
||||||
+
|
+
|
||||||
+#ifdef REFLECT
|
+#ifdef REFLECT
|
||||||
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
||||||
+ unsigned long len)
|
+ unsigned long len)
|
||||||
+{
|
+{
|
||||||
+ while (len--)
|
+ while (len--)
|
||||||
@ -1361,7 +1371,7 @@ index 00000000..864d04d4
|
|||||||
+ return crc;
|
+ return crc;
|
||||||
+}
|
+}
|
||||||
+#else
|
+#else
|
||||||
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
||||||
+ unsigned long len)
|
+ unsigned long len)
|
||||||
+{
|
+{
|
||||||
+ while (len--)
|
+ while (len--)
|
||||||
@ -1371,18 +1381,21 @@ index 00000000..864d04d4
|
|||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
+static unsigned int __attribute__ ((aligned (32)))
|
+static unsigned int __attribute__ ((aligned (32)))
|
||||||
+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
|
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
|
||||||
+
|
+
|
||||||
+#ifndef CRC32_FUNCTION
|
+#ifndef CRC32_FUNCTION
|
||||||
+#define CRC32_FUNCTION crc32_vpmsum
|
+#define CRC32_FUNCTION crc32_vpmsum
|
||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
+unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
|
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
|
||||||
+ unsigned long len)
|
+ unsigned long len)
|
||||||
+{
|
+{
|
||||||
+ unsigned int prealign;
|
+ unsigned int prealign;
|
||||||
+ unsigned int tail;
|
+ unsigned int tail;
|
||||||
+
|
+
|
||||||
|
+ /* For zlib API */
|
||||||
|
+ if (p == NULL) return 0UL;
|
||||||
|
+
|
||||||
+#ifdef CRC_XOR
|
+#ifdef CRC_XOR
|
||||||
+ crc ^= 0xffffffff;
|
+ crc ^= 0xffffffff;
|
||||||
+#endif
|
+#endif
|
||||||
@ -1453,7 +1466,7 @@ index 00000000..864d04d4
|
|||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
+static unsigned int __attribute__ ((aligned (32)))
|
+static unsigned int __attribute__ ((aligned (32)))
|
||||||
+__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
|
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
||||||
+
|
+
|
||||||
+ const __vector unsigned long long vzero = {0,0};
|
+ const __vector unsigned long long vzero = {0,0};
|
||||||
+ const __vector unsigned long long vones = {0xffffffffffffffffUL,
|
+ const __vector unsigned long long vones = {0xffffffffffffffffUL,
|
||||||
@ -1494,8 +1507,8 @@ index 00000000..864d04d4
|
|||||||
+ unsigned int result = 0;
|
+ unsigned int result = 0;
|
||||||
+ unsigned int offset; /* Constant table offset. */
|
+ unsigned int offset; /* Constant table offset. */
|
||||||
+
|
+
|
||||||
+ long i; /* Counter. */
|
+ unsigned long i; /* Counter. */
|
||||||
+ long chunks;
|
+ unsigned long chunks;
|
||||||
+
|
+
|
||||||
+ unsigned long block_size;
|
+ unsigned long block_size;
|
||||||
+ int next_block = 0;
|
+ int next_block = 0;
|
||||||
@ -1567,7 +1580,7 @@ index 00000000..864d04d4
|
|||||||
+ /* xor in initial value */
|
+ /* xor in initial value */
|
||||||
+ vdata0 = vec_xor(vdata0, vcrc);
|
+ vdata0 = vec_xor(vdata0, vcrc);
|
||||||
+
|
+
|
||||||
+ p += 128;
|
+ p = (char *)p + 128;
|
||||||
+
|
+
|
||||||
+ do {
|
+ do {
|
||||||
+ /* Checksum in blocks of MAX_SIZE. */
|
+ /* Checksum in blocks of MAX_SIZE. */
|
||||||
@ -1635,14 +1648,14 @@ index 00000000..864d04d4
|
|||||||
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
||||||
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
||||||
+
|
+
|
||||||
+ p += 128;
|
+ p = (char *)p + 128;
|
||||||
+
|
+
|
||||||
+ /*
|
+ /*
|
||||||
+ * main loop. We modulo schedule it such that it takes three
|
+ * main loop. We modulo schedule it such that it takes three
|
||||||
+ * iterations to complete - first iteration load, second
|
+ * iterations to complete - first iteration load, second
|
||||||
+ * iteration vpmsum, third iteration xor.
|
+ * iteration vpmsum, third iteration xor.
|
||||||
+ */
|
+ */
|
||||||
+ for (i = 0; i < chunks-2; i++, p += 128) {
|
+ for (i = 0; i < chunks-2; i++) {
|
||||||
+ vconst1 = vec_ld(offset, vcrc_const);
|
+ vconst1 = vec_ld(offset, vcrc_const);
|
||||||
+ offset += 16;
|
+ offset += 16;
|
||||||
+ GROUP_ENDING_NOP;
|
+ GROUP_ENDING_NOP;
|
||||||
@ -1703,6 +1716,8 @@ index 00000000..864d04d4
|
|||||||
+ long)vdata7, (__vector unsigned long long)vconst1);
|
+ long)vdata7, (__vector unsigned long long)vconst1);
|
||||||
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
||||||
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
||||||
|
+
|
||||||
|
+ p = (char *)p + 128;
|
||||||
+ }
|
+ }
|
||||||
+
|
+
|
||||||
+ /* First cool down*/
|
+ /* First cool down*/
|
||||||
@ -1809,7 +1824,7 @@ index 00000000..864d04d4
|
|||||||
+ va7 = vec_ld(112, (__vector unsigned long long*) p);
|
+ va7 = vec_ld(112, (__vector unsigned long long*) p);
|
||||||
+ VEC_PERM(va7, va7, va7, vperm_const);
|
+ VEC_PERM(va7, va7, va7, vperm_const);
|
||||||
+
|
+
|
||||||
+ p += 128;
|
+ p = (char *)p + 128;
|
||||||
+
|
+
|
||||||
+ vdata0 = vec_xor(v0, va0);
|
+ vdata0 = vec_xor(v0, va0);
|
||||||
+ vdata1 = vec_xor(v1, va1);
|
+ vdata1 = vec_xor(v1, va1);
|
||||||
@ -1979,106 +1994,16 @@ index 00000000..864d04d4
|
|||||||
+
|
+
|
||||||
+ return result;
|
+ return result;
|
||||||
+}
|
+}
|
||||||
|
Index: zlib-1.2.11/Makefile.in
|
||||||
From 615d7188509b0f16dae919d7b369f8d01db18be5 Mon Sep 17 00:00:00 2001
|
===================================================================
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
--- zlib-1.2.11.orig/Makefile.in
|
||||||
Date: Thu, 11 Jan 2018 17:04:38 +1100
|
+++ zlib-1.2.11/Makefile.in
|
||||||
Subject: [PATCH 2/8] Port crc32-vpmsum (Power architecture) to zlib types/api
|
|
||||||
|
|
||||||
Correct argument types and ensure that a buffer pointer
|
|
||||||
of 0 returns 0ULL consistent with existing crc32 functions.
|
|
||||||
---
|
|
||||||
power8-crc/vec_crc32.c | 23 +++++++++++++++--------
|
|
||||||
1 file changed, 15 insertions(+), 8 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
|
|
||||||
index 864d04d4..aa35ea5a 100644
|
|
||||||
--- a/contrib/power8-crc/vec_crc32.c
|
|
||||||
+++ b/contrib/power8-crc/vec_crc32.c
|
|
||||||
@@ -40,11 +40,13 @@
|
|
||||||
#include "crc32_constants.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#include "../zutil.h"
|
|
||||||
+
|
|
||||||
#define VMX_ALIGN 16
|
|
||||||
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
|
|
||||||
|
|
||||||
#ifdef REFLECT
|
|
||||||
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
|
||||||
+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
unsigned long len)
|
|
||||||
{
|
|
||||||
while (len--)
|
|
||||||
@@ -52,7 +54,7 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
|
||||||
+static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
unsigned long len)
|
|
||||||
{
|
|
||||||
while (len--)
|
|
||||||
@@ -61,19 +63,24 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-static unsigned int __attribute__ ((aligned (32)))
|
|
||||||
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len);
|
|
||||||
+static unsigned long __attribute__ ((aligned (32)))
|
|
||||||
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
|
|
||||||
|
|
||||||
#ifndef CRC32_FUNCTION
|
|
||||||
#define CRC32_FUNCTION crc32_vpmsum
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char *p,
|
|
||||||
- unsigned long len)
|
|
||||||
+unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
|
|
||||||
+ unsigned long crc,
|
|
||||||
+ const unsigned char FAR *p,
|
|
||||||
+ z_size_t len)
|
|
||||||
{
|
|
||||||
unsigned int prealign;
|
|
||||||
unsigned int tail;
|
|
||||||
|
|
||||||
+ /* For zlib API */
|
|
||||||
+ if (p == NULL) return 0UL;
|
|
||||||
+
|
|
||||||
#ifdef CRC_XOR
|
|
||||||
crc ^= 0xffffffff;
|
|
||||||
#endif
|
|
||||||
@@ -143,8 +150,8 @@ static const __vector unsigned long long vperm_const
|
|
||||||
#define VEC_PERM(vr, va, vb, vc)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-static unsigned int __attribute__ ((aligned (32)))
|
|
||||||
-__crc32_vpmsum(unsigned int crc, void* p, unsigned long len) {
|
|
||||||
+static unsigned long __attribute__ ((aligned (32)))
|
|
||||||
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
|
|
||||||
const __vector unsigned long long vzero = {0,0};
|
|
||||||
const __vector unsigned long long vones = {0xffffffffffffffffUL,
|
|
||||||
|
|
||||||
From c2697e9b5f8379b434ed79ab9599bab1193f1d46 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
|
||||||
Date: Wed, 10 Jan 2018 10:55:27 +1100
|
|
||||||
Subject: [PATCH 3/8] Add makefile rule to build crc32_power8 objects
|
|
||||||
|
|
||||||
---
|
|
||||||
Makefile.in | 8 ++++++++
|
|
||||||
1 file changed, 8 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/Makefile.in b/Makefile.in
|
|
||||||
index 5a77949f..5b8563eb 100644
|
|
||||||
--- a/Makefile.in
|
|
||||||
+++ b/Makefile.in
|
|
||||||
@@ -162,6 +162,9 @@ adler32.o: $(SRCDIR)adler32.c
|
@@ -162,6 +162,9 @@ adler32.o: $(SRCDIR)adler32.c
|
||||||
crc32.o: $(SRCDIR)crc32.c
|
crc32.o: $(SRCDIR)crc32.c
|
||||||
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
|
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
|
||||||
|
|
||||||
+crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
|
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
||||||
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
|
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
||||||
+
|
+
|
||||||
deflate.o: $(SRCDIR)deflate.c
|
deflate.o: $(SRCDIR)deflate.c
|
||||||
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
|
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
|
||||||
@ -2087,73 +2012,55 @@ index 5a77949f..5b8563eb 100644
|
|||||||
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
|
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
|
||||||
-@mv objs/crc32.o $@
|
-@mv objs/crc32.o $@
|
||||||
|
|
||||||
+crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
|
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
||||||
+ -@mkdir objs 2>/dev/null || test -d objs
|
+ -@mkdir objs 2>/dev/null || test -d objs
|
||||||
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
|
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
||||||
+ -@mv objs/crc32_power8.o $@
|
+ -@mv objs/crc32_power8.o $@
|
||||||
+
|
+
|
||||||
deflate.lo: $(SRCDIR)deflate.c
|
deflate.lo: $(SRCDIR)deflate.c
|
||||||
-@mkdir objs 2>/dev/null || test -d objs
|
-@mkdir objs 2>/dev/null || test -d objs
|
||||||
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
|
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
|
||||||
|
Index: zlib-1.2.11/configure
|
||||||
From a2f5adc957354d677fe25a7fc167506c436dd6e6 Mon Sep 17 00:00:00 2001
|
===================================================================
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
--- zlib-1.2.11.orig/configure
|
||||||
Date: Wed, 10 Jan 2018 11:01:30 +1100
|
+++ zlib-1.2.11/configure
|
||||||
Subject: [PATCH 4/8] Runtime crc32_z optimized implementation detection
|
@@ -826,6 +826,91 @@ EOF
|
||||||
|
|
||||||
To support runtime optimization of crc32 GNU IFUNC capabilities
|
|
||||||
allows zlib to return an optimized crc32_z function pointer that is
|
|
||||||
resolved at runtime to the existing crc32_z name and is able to be
|
|
||||||
used by existing applications.
|
|
||||||
|
|
||||||
There are two code forms in which this can be defined; a native
|
|
||||||
attribute, and; an asm type defination which will work with older
|
|
||||||
gcc compilers.
|
|
||||||
|
|
||||||
crc32_ifunc is added as a function that is called by the glibc
|
|
||||||
loader if the IFUNC capability exists to resolve the crc32_z
|
|
||||||
function. If the IFUNC capabilies don't exists it will otherwise
|
|
||||||
returns a function pointer on the first instigation of the crc32_z
|
|
||||||
function call. For staticly compiled code the function pointer
|
|
||||||
variant of this function is used.
|
|
||||||
|
|
||||||
crc32_ifunc provides a point of expansion for returning other
|
|
||||||
optimized crc32 implementations for other architectures.
|
|
||||||
|
|
||||||
DYNAMIC_CRC_TABLE/make_crc_table now occurs to the crc32_ifunc
|
|
||||||
and only if an crc32 function (crc32_big/crc32_little/
|
|
||||||
crc32_table_lookup) that use the generated table.
|
|
||||||
|
|
||||||
As a result lazy binding occurs (the default for glibc) on the
|
|
||||||
calling of make_crc_table occurs on the first use of crc32/crc32_z.
|
|
||||||
Compile time linker options, environment LD_BIND_NOW, hardened
|
|
||||||
compilers etc, will solve functions, i.e. the IFUNC, on symbold
|
|
||||||
initialisation to occur as the program is loaded rather
|
|
||||||
than on first use of crc32/crc32_z. The generation of this table
|
|
||||||
will be farely minor compared to the other non-lazy bindings
|
|
||||||
occuring.
|
|
||||||
|
|
||||||
As crc32_big/crc32_little are optimized functions these are used
|
|
||||||
as a fallback to any optimized implemented (provided NO_BYFOUR
|
|
||||||
isn't defined) these will now be called directly for a crc32/
|
|
||||||
crc32_z and as such the 'if (buf == Z_NULL) return 0UL;' needed to
|
|
||||||
be introduced to these functions.
|
|
||||||
|
|
||||||
The table lookup implementation of crc32 is moved to
|
|
||||||
crc32_table_lookup and used a function of last resort.
|
|
||||||
---
|
|
||||||
configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
crc32.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
|
|
||||||
2 files changed, 109 insertions(+), 12 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/configure b/configure
|
|
||||||
index e974d1fd..0b2fe035 100755
|
|
||||||
--- a/configure
|
|
||||||
+++ b/configure
|
|
||||||
@@ -826,6 +826,58 @@ EOF
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
+# test to see if Power8+ implementation is compile time possible
|
||||||
|
+echo >> configure.log
|
||||||
|
+cat > $test.c <<EOF
|
||||||
|
+#if _ARCH_PWR8==1
|
||||||
|
+
|
||||||
|
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
|
||||||
|
+#error "Clang vector instructions aren't big endian compatible"
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if defined(__BUILTIN_CPU_SUPPORTS__)
|
||||||
|
+/* good and easy */
|
||||||
|
+#else
|
||||||
|
+#include <sys/auxv.h>
|
||||||
|
+#include <bits/hwcap.h>
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#else
|
||||||
|
+#error No Power 8 or newer architecture, may need -mcpu=power8
|
||||||
|
+#endif
|
||||||
|
+EOF
|
||||||
|
+
|
||||||
|
+if tryboth $CC -c $CFLAGS $test.c; then
|
||||||
|
+ OBJC="$OBJC crc32_power8.o"
|
||||||
|
+ PIC_OBJC="$PIC_OBJC crc32_power8.lo"
|
||||||
|
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
|
||||||
|
+else
|
||||||
|
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
|
||||||
|
+fi
|
||||||
|
+
|
||||||
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
||||||
+echo >> configure.log
|
+echo >> configure.log
|
||||||
+cat > $test.c <<EOF
|
+cat > $test.c <<EOF
|
||||||
@ -2209,11 +2116,11 @@ index e974d1fd..0b2fe035 100755
|
|||||||
# show the results in the log
|
# show the results in the log
|
||||||
echo >> configure.log
|
echo >> configure.log
|
||||||
echo ALL = $ALL >> configure.log
|
echo ALL = $ALL >> configure.log
|
||||||
diff --git a/crc32.c b/crc32.c
|
Index: zlib-1.2.11/crc32.c
|
||||||
index 9580440c..52e855fb 100644
|
===================================================================
|
||||||
--- a/crc32.c
|
--- zlib-1.2.11.orig/crc32.c
|
||||||
+++ b/crc32.c
|
+++ zlib-1.2.11/crc32.c
|
||||||
@@ -199,13 +199,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
|
@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_tabl
|
||||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||||
|
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
@ -2237,6 +2144,19 @@ index 9580440c..52e855fb 100644
|
|||||||
+ return crc ^ 0xffffffffUL;
|
+ return crc ^ 0xffffffffUL;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
|
+/* Small helper function to compare optfun against the reference table lookup
|
||||||
|
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
|
||||||
|
+#include <assert.h>
|
||||||
|
+#define TEST_COMPARE(optfn) \
|
||||||
|
+ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
|
||||||
|
+ { \
|
||||||
|
+ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
|
||||||
|
+ unsigned long optcrc = optfn(crc, p, len); \
|
||||||
|
+ assert( optcrc == crc_tbl_lookup ); \
|
||||||
|
+ return optcrc; \
|
||||||
|
+ }
|
||||||
|
+*/
|
||||||
|
+
|
||||||
+#ifdef Z_IFUNC_ASM
|
+#ifdef Z_IFUNC_ASM
|
||||||
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
||||||
+ __asm__ ("crc32_z");
|
+ __asm__ ("crc32_z");
|
||||||
@ -2249,6 +2169,15 @@ index 9580440c..52e855fb 100644
|
|||||||
+ __attribute__ ((ifunc ("crc32_z_ifunc")));
|
+ __attribute__ ((ifunc ("crc32_z_ifunc")));
|
||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
|
+#if _ARCH_PWR8==1
|
||||||
|
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
|
||||||
|
+/* for testing TEST_COMPARE(crc32_vpmsum) */
|
||||||
|
+#ifndef __BUILTIN_CPU_SUPPORTS__
|
||||||
|
+#include <sys/auxv.h>
|
||||||
|
+#include <bits/hwcap.h>
|
||||||
|
+#endif
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
+/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
|
+/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
|
||||||
+ * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
|
+ * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
|
||||||
+#ifndef Z_IFUNC_ASM
|
+#ifndef Z_IFUNC_ASM
|
||||||
@ -2256,13 +2185,22 @@ index 9580440c..52e855fb 100644
|
|||||||
+#endif
|
+#endif
|
||||||
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
+unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
||||||
+{
|
+{
|
||||||
|
+#if _ARCH_PWR8==1
|
||||||
|
+#if defined(__BUILTIN_CPU_SUPPORTS__)
|
||||||
|
+ if (__builtin_cpu_supports("arch_2_07"))
|
||||||
|
+ return crc32_vpmsum;
|
||||||
|
+#else
|
||||||
|
+ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
|
||||||
|
+ return crc32_vpmsum;
|
||||||
|
+#endif
|
||||||
|
+#endif /* _ARCH_PWR8 */
|
||||||
+
|
+
|
||||||
+/* return a function pointer for optimized arches here */
|
+/* return a function pointer for optimized arches here */
|
||||||
+
|
+
|
||||||
#ifdef DYNAMIC_CRC_TABLE
|
#ifdef DYNAMIC_CRC_TABLE
|
||||||
if (crc_table_empty)
|
if (crc_table_empty)
|
||||||
make_crc_table();
|
make_crc_table();
|
||||||
@@ -217,22 +251,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
|
@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf,
|
||||||
|
|
||||||
endian = 1;
|
endian = 1;
|
||||||
if (*((unsigned char *)(&endian)))
|
if (*((unsigned char *)(&endian)))
|
||||||
@ -2305,7 +2243,7 @@ index 9580440c..52e855fb 100644
|
|||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
unsigned long ZEXPORT crc32(crc, buf, len)
|
unsigned long ZEXPORT crc32(crc, buf, len)
|
||||||
unsigned long crc;
|
unsigned long crc;
|
||||||
@@ -271,6 +314,7 @@ local unsigned long crc32_little(crc, buf, len)
|
@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, bu
|
||||||
register z_crc_t c;
|
register z_crc_t c;
|
||||||
register const z_crc_t FAR *buf4;
|
register const z_crc_t FAR *buf4;
|
||||||
|
|
||||||
@ -2313,7 +2251,7 @@ index 9580440c..52e855fb 100644
|
|||||||
c = (z_crc_t)crc;
|
c = (z_crc_t)crc;
|
||||||
c = ~c;
|
c = ~c;
|
||||||
while (len && ((ptrdiff_t)buf & 3)) {
|
while (len && ((ptrdiff_t)buf & 3)) {
|
||||||
@@ -311,6 +355,7 @@ local unsigned long crc32_big(crc, buf, len)
|
@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf,
|
||||||
register z_crc_t c;
|
register z_crc_t c;
|
||||||
register const z_crc_t FAR *buf4;
|
register const z_crc_t FAR *buf4;
|
||||||
|
|
||||||
@ -2321,400 +2259,3 @@ index 9580440c..52e855fb 100644
|
|||||||
c = ZSWAP32((z_crc_t)crc);
|
c = ZSWAP32((z_crc_t)crc);
|
||||||
c = ~c;
|
c = ~c;
|
||||||
while (len && ((ptrdiff_t)buf & 3)) {
|
while (len && ((ptrdiff_t)buf & 3)) {
|
||||||
|
|
||||||
From 49c212a202fd1e12dc42574b228d72085cc4d1f7 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
|
||||||
Date: Mon, 15 Jan 2018 08:59:43 +1100
|
|
||||||
Subject: [PATCH 5/8] Linux Power crc32_vpmsum if available
|
|
||||||
|
|
||||||
Power Architecture is detected in the configure script and adds
|
|
||||||
optimized code to PIC_OBJC/OBJC.
|
|
||||||
|
|
||||||
Power8 crc32 performance
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Test - decompressing a jdk binary:
|
|
||||||
|
|
||||||
Before (no optimized crc32_vpmsum (disabled in crc32_z_ifunc):
|
|
||||||
|
|
||||||
$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
|
|
||||||
|
|
||||||
real 0m2.972s
|
|
||||||
user 0m2.292s
|
|
||||||
sys 0m0.100s
|
|
||||||
|
|
||||||
perf report -g --no-children:
|
|
||||||
52.26% minigzip minigzip [.] crc32_little
|
|
||||||
18.86% minigzip minigzip [.] inflate_fast
|
|
||||||
4.87% minigzip [unknown] [k] 0xc000000000063748
|
|
||||||
4.87% minigzip libc-2.23.so [.] __memcpy_power7
|
|
||||||
1.56% minigzip minigzip [.] inflate
|
|
||||||
0.96% minigzip minigzip [.] inflate_table
|
|
||||||
0.95% minigzip libc-2.23.so [.] _IO_fwrite
|
|
||||||
0.61% minigzip minigzip [.] inflateCodesUsed
|
|
||||||
|
|
||||||
Using crc32_vpmsum:
|
|
||||||
|
|
||||||
$ time ./minigzip -d -c ../ibm-java-i386-sdk-8.0-5.0.bin.gz > ../ibm-java-i386-sdk-8.0-5.0.bin.restored
|
|
||||||
|
|
||||||
real 0m0.895s
|
|
||||||
user 0m0.224s
|
|
||||||
sys 0m0.092s
|
|
||||||
|
|
||||||
perf report -g --no-children:
|
|
||||||
36.49% minigzip minigzip [.] inflate_fast
|
|
||||||
11.60% minigzip [unknown] [k] 0xc000000000063748
|
|
||||||
7.93% minigzip libc-2.23.so [.] __memcpy_power7
|
|
||||||
3.77% minigzip minigzip [.] crc32_vpmsum
|
|
||||||
3.70% minigzip minigzip [.] inflate_table
|
|
||||||
2.29% minigzip minigzip [.] inflate
|
|
||||||
---
|
|
||||||
configure | 33 +++++++++++++++++++++++++++++++++
|
|
||||||
crc32.c | 18 ++++++++++++++++++
|
|
||||||
2 files changed, 51 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/configure b/configure
|
|
||||||
index 0b2fe035..650abe66 100755
|
|
||||||
--- a/configure
|
|
||||||
+++ b/configure
|
|
||||||
@@ -826,6 +826,39 @@ EOF
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
+# test to see if Power8+ implementation is compile time possible
|
|
||||||
+echo >> configure.log
|
|
||||||
+cat > $test.c <<EOF
|
|
||||||
+#if _ARCH_PWR8==1
|
|
||||||
+
|
|
||||||
+#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
|
|
||||||
+#error "Clang vector instructions aren't big endian compatible"
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#if defined(__BUILTIN_CPU_SUPPORTS__)
|
|
||||||
+/* good and easy */
|
|
||||||
+#else
|
|
||||||
+#include <sys/auxv.h>
|
|
||||||
+#include <bits/hwcap.h>
|
|
||||||
+int main()
|
|
||||||
+{
|
|
||||||
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#else
|
|
||||||
+#error No Power 8 or newer architecture, may need -mcpu=power8
|
|
||||||
+#endif
|
|
||||||
+EOF
|
|
||||||
+
|
|
||||||
+if tryboth $CC -c $CFLAGS $test.c; then
|
|
||||||
+ OBJC="$OBJC crc32_power8.o"
|
|
||||||
+ PIC_OBJC="$PIC_OBJC crc32_power8.lo"
|
|
||||||
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log
|
|
||||||
+else
|
|
||||||
+ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log
|
|
||||||
+fi
|
|
||||||
+
|
|
||||||
# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
|
||||||
echo >> configure.log
|
|
||||||
cat > $test.c <<EOF
|
|
||||||
diff --git a/crc32.c b/crc32.c
|
|
||||||
index 52e855fb..b66aa520 100644
|
|
||||||
--- a/crc32.c
|
|
||||||
+++ b/crc32.c
|
|
||||||
@@ -230,6 +230,15 @@ unsigned long ZEXPORT crc32_z(
|
|
||||||
__attribute__ ((ifunc ("crc32_z_ifunc")));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if _ARCH_PWR8==1
|
|
||||||
+unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t);
|
|
||||||
+/* for testing TEST_COMPARE(crc32_vpmsum) */
|
|
||||||
+#ifndef __BUILTIN_CPU_SUPPORTS__
|
|
||||||
+#include <sys/auxv.h>
|
|
||||||
+#include <bits/hwcap.h>
|
|
||||||
+#endif
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to
|
|
||||||
* crc32_z which is not desired. crc32_z_ifunc is implictly "local" */
|
|
||||||
#ifndef Z_IFUNC_ASM
|
|
||||||
@@ -237,6 +246,15 @@ local
|
|
||||||
#endif
|
|
||||||
unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
|
||||||
{
|
|
||||||
+#if _ARCH_PWR8==1
|
|
||||||
+#if defined(__BUILTIN_CPU_SUPPORTS__)
|
|
||||||
+ if (__builtin_cpu_supports("arch_2_07"))
|
|
||||||
+ return crc32_vpmsum;
|
|
||||||
+#else
|
|
||||||
+ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
|
|
||||||
+ return crc32_vpmsum;
|
|
||||||
+#endif
|
|
||||||
+#endif /* _ARCH_PWR8 */
|
|
||||||
|
|
||||||
/* return a function pointer for optimized arches here */
|
|
||||||
|
|
||||||
|
|
||||||
From 3476aa2e05deb4696c114dd3b0150f90e2a4e340 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
|
||||||
Date: Fri, 12 Jan 2018 15:22:09 +1100
|
|
||||||
Subject: [PATCH 6/8] crc32: add test harness for implementers of optimized
|
|
||||||
crc32_z
|
|
||||||
|
|
||||||
---
|
|
||||||
crc32.c | 13 +++++++++++++
|
|
||||||
1 file changed, 13 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/crc32.c b/crc32.c
|
|
||||||
index b66aa520..12daa5e6 100644
|
|
||||||
--- a/crc32.c
|
|
||||||
+++ b/crc32.c
|
|
||||||
@@ -218,6 +218,19 @@ unsigned long ZEXPORT crc32_table_lookup(crc, buf, len)
|
|
||||||
return crc ^ 0xffffffffUL;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Small helper function to compare optfun against the reference table lookup
|
|
||||||
+ * return test_ref_comparision_##optfn in crc32_z_ifunc
|
|
||||||
+#include <assert.h>
|
|
||||||
+#define TEST_COMPARE(optfn) \
|
|
||||||
+ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \
|
|
||||||
+ { \
|
|
||||||
+ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \
|
|
||||||
+ unsigned long optcrc = optfn(crc, p, len); \
|
|
||||||
+ assert( optcrc == crc_tbl_lookup ); \
|
|
||||||
+ return optcrc; \
|
|
||||||
+ }
|
|
||||||
+*/
|
|
||||||
+
|
|
||||||
#ifdef Z_IFUNC_ASM
|
|
||||||
unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t)
|
|
||||||
__asm__ ("crc32_z");
|
|
||||||
|
|
||||||
From 0daabafef27d7c215ed28d89cd366874a58d2573 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
|
||||||
Date: Tue, 6 Feb 2018 17:37:10 +1100
|
|
||||||
Subject: [PATCH 7/8] fix: move power8-crc into contrib
|
|
||||||
|
|
||||||
---
|
|
||||||
Makefile.in | 8 ++++----
|
|
||||||
4 files changed, 4 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/Makefile.in b/Makefile.in
|
|
||||||
index 5b8563eb..c3a43f1b 100644
|
|
||||||
--- a/Makefile.in
|
|
||||||
+++ b/Makefile.in
|
|
||||||
@@ -162,8 +162,8 @@ adler32.o: $(SRCDIR)adler32.c
|
|
||||||
crc32.o: $(SRCDIR)crc32.c
|
|
||||||
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
|
|
||||||
|
|
||||||
-crc32_power8.o: $(SRCDIR)power8-crc/vec_crc32.c
|
|
||||||
- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)power8-crc/vec_crc32.c
|
|
||||||
+crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
|
||||||
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
|
||||||
|
|
||||||
deflate.o: $(SRCDIR)deflate.c
|
|
||||||
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
|
|
||||||
@@ -215,9 +215,9 @@ crc32.lo: $(SRCDIR)crc32.c
|
|
||||||
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
|
|
||||||
-@mv objs/crc32.o $@
|
|
||||||
|
|
||||||
-crc32_power8.lo: $(SRCDIR)power8-crc/vec_crc32.c
|
|
||||||
+crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
|
||||||
-@mkdir objs 2>/dev/null || test -d objs
|
|
||||||
- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)power8-crc/vec_crc32.c
|
|
||||||
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c
|
|
||||||
-@mv objs/crc32_power8.o $@
|
|
||||||
|
|
||||||
deflate.lo: $(SRCDIR)deflate.c
|
|
||||||
|
|
||||||
From 5d3e57b905a586a6fb6f8b847c35ba3b47a20719 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Black <daniel@linux.vnet.ibm.com>
|
|
||||||
Date: Thu, 8 Feb 2018 13:20:00 +1100
|
|
||||||
Subject: [PATCH 8/8] fix: clang fixes for Big Endian on Power8 crc32
|
|
||||||
|
|
||||||
---
|
|
||||||
contrib/power8-crc/clang_workaround.h | 21 +++++++++++++++++----
|
|
||||||
contrib/power8-crc/vec_crc32.c | 33 ++++++++++++++-------------------
|
|
||||||
2 files changed, 31 insertions(+), 23 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h
|
|
||||||
index 9b26ba59..b5e7dae0 100644
|
|
||||||
--- a/contrib/power8-crc/clang_workaround.h
|
|
||||||
+++ b/contrib/power8-crc/clang_workaround.h
|
|
||||||
@@ -5,10 +5,6 @@
|
|
||||||
* These stubs fix clang incompatibilities with GCC builtins.
|
|
||||||
*/
|
|
||||||
|
|
||||||
-#if __BYTE_ORDER == __BIG_ENDIAN && defined(__clang__)
|
|
||||||
-#error These workaround aren't big endian compatible
|
|
||||||
-#endif
|
|
||||||
-
|
|
||||||
#ifndef __builtin_crypto_vpmsumw
|
|
||||||
#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
|
|
||||||
#endif
|
|
||||||
@@ -35,7 +31,11 @@ static inline
|
|
||||||
__vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
|
||||||
unsigned long __b)
|
|
||||||
{
|
|
||||||
+ #if defined(__BIG_ENDIAN__)
|
|
||||||
+ __vector unsigned long long __v = {__a, __b};
|
|
||||||
+ #else
|
|
||||||
__vector unsigned long long __v = {__b, __a};
|
|
||||||
+ #endif
|
|
||||||
return __v;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -48,21 +48,34 @@ unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
|
|
||||||
return __v[__o];
|
|
||||||
}
|
|
||||||
|
|
||||||
+#if defined(__BIG_ENDIAN__)
|
|
||||||
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
|
|
||||||
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
|
|
||||||
+#else
|
|
||||||
#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
|
|
||||||
#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline
|
|
||||||
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
|
||||||
{
|
|
||||||
+ #if defined(__BIG_ENDIAN__)
|
|
||||||
+ return vec_xxpermdi(__v, __v, 0x0)[1];
|
|
||||||
+ #else
|
|
||||||
return vec_xxpermdi(__v, __v, 0x0)[0];
|
|
||||||
+ #endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
|
||||||
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
|
||||||
{
|
|
||||||
+ #if defined(__BIG_ENDIAN__)
|
|
||||||
+ return vec_xxpermdi(__v, __v, 0x3)[1];
|
|
||||||
+ #else
|
|
||||||
return vec_xxpermdi(__v, __v, 0x3)[0];
|
|
||||||
+ #endif
|
|
||||||
}
|
|
||||||
#endif /* vec_xxpermdi */
|
|
||||||
|
|
||||||
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
|
|
||||||
index aa35ea5a..bb2204b2 100644
|
|
||||||
--- a/contrib/power8-crc/vec_crc32.c
|
|
||||||
+++ b/contrib/power8-crc/vec_crc32.c
|
|
||||||
@@ -40,13 +40,11 @@
|
|
||||||
#include "crc32_constants.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-#include "../zutil.h"
|
|
||||||
-
|
|
||||||
#define VMX_ALIGN 16
|
|
||||||
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
|
|
||||||
|
|
||||||
#ifdef REFLECT
|
|
||||||
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
unsigned long len)
|
|
||||||
{
|
|
||||||
while (len--)
|
|
||||||
@@ -54,7 +52,7 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
-static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
unsigned long len)
|
|
||||||
{
|
|
||||||
while (len--)
|
|
||||||
@@ -63,24 +61,19 @@ static unsigned long crc32_align(unsigned int crc, const unsigned char *p,
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-static unsigned long __attribute__ ((aligned (32)))
|
|
||||||
+static unsigned int __attribute__ ((aligned (32)))
|
|
||||||
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
|
|
||||||
|
|
||||||
#ifndef CRC32_FUNCTION
|
|
||||||
#define CRC32_FUNCTION crc32_vpmsum
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-unsigned long ZLIB_INTERNAL CRC32_FUNCTION(
|
|
||||||
- unsigned long crc,
|
|
||||||
- const unsigned char FAR *p,
|
|
||||||
- z_size_t len)
|
|
||||||
+unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
|
|
||||||
+ unsigned long len)
|
|
||||||
{
|
|
||||||
unsigned int prealign;
|
|
||||||
unsigned int tail;
|
|
||||||
|
|
||||||
- /* For zlib API */
|
|
||||||
- if (p == NULL) return 0UL;
|
|
||||||
-
|
|
||||||
#ifdef CRC_XOR
|
|
||||||
crc ^= 0xffffffff;
|
|
||||||
#endif
|
|
||||||
@@ -150,7 +143,7 @@ static const __vector unsigned long long vperm_const
|
|
||||||
#define VEC_PERM(vr, va, vb, vc)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-static unsigned long __attribute__ ((aligned (32)))
|
|
||||||
+static unsigned int __attribute__ ((aligned (32)))
|
|
||||||
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
|
|
||||||
const __vector unsigned long long vzero = {0,0};
|
|
||||||
@@ -192,8 +185,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
unsigned int result = 0;
|
|
||||||
unsigned int offset; /* Constant table offset. */
|
|
||||||
|
|
||||||
- long i; /* Counter. */
|
|
||||||
- long chunks;
|
|
||||||
+ unsigned long i; /* Counter. */
|
|
||||||
+ unsigned long chunks;
|
|
||||||
|
|
||||||
unsigned long block_size;
|
|
||||||
int next_block = 0;
|
|
||||||
@@ -265,7 +258,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
/* xor in initial value */
|
|
||||||
vdata0 = vec_xor(vdata0, vcrc);
|
|
||||||
|
|
||||||
- p += 128;
|
|
||||||
+ p = (char *)p + 128;
|
|
||||||
|
|
||||||
do {
|
|
||||||
/* Checksum in blocks of MAX_SIZE. */
|
|
||||||
@@ -333,14 +326,14 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
||||||
VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
|
||||||
|
|
||||||
- p += 128;
|
|
||||||
+ p = (char *)p + 128;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* main loop. We modulo schedule it such that it takes three
|
|
||||||
* iterations to complete - first iteration load, second
|
|
||||||
* iteration vpmsum, third iteration xor.
|
|
||||||
*/
|
|
||||||
- for (i = 0; i < chunks-2; i++, p += 128) {
|
|
||||||
+ for (i = 0; i < chunks-2; i++) {
|
|
||||||
vconst1 = vec_ld(offset, vcrc_const);
|
|
||||||
offset += 16;
|
|
||||||
GROUP_ENDING_NOP;
|
|
||||||
@@ -401,6 +394,8 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
long)vdata7, (__vector unsigned long long)vconst1);
|
|
||||||
vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
||||||
VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
|
||||||
+
|
|
||||||
+ p = (char *)p + 128;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* First cool down*/
|
|
||||||
@@ -507,7 +502,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
||||||
va7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
||||||
VEC_PERM(va7, va7, va7, vperm_const);
|
|
||||||
|
|
||||||
- p += 128;
|
|
||||||
+ p = (char *)p + 128;
|
|
||||||
|
|
||||||
vdata0 = vec_xor(v0, va0);
|
|
||||||
vdata1 = vec_xor(v1, va1);
|
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Mar 19 14:11:48 UTC 2019 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Try to safely abort if we get NULL ptr bsc#1110304:
|
||||||
|
* zlib-power8-fate325307.patch
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com
|
Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user