5014 lines
195 KiB
Diff
5014 lines
195 KiB
Diff
From e382a919a5bce2aa0738e85540942d7af7d6c1c0 Mon Sep 17 00:00:00 2001
|
|
From: Manjunath S Matti <mmatti@linux.ibm.com>
|
|
Date: Tue, 15 Nov 2022 02:45:53 -0600
|
|
Subject: [PATCH 1/4] Preparation for Power optimizations
|
|
|
|
Optimized functions for Power will make use of GNU indirect functions,
|
|
an extension to support different implementations of the same function,
|
|
which can be selected during runtime. This will be used to provide
|
|
optimized functions for different processor versions.
|
|
|
|
Since this is a GNU extension, we placed the definition of the Z_IFUNC
|
|
macro under `contrib/gcc`. This can be reused by other archs as well.
|
|
|
|
Author: Matheus Castanho <msc@linux.ibm.com>
|
|
Author: Rogerio Alves <rcardoso@linux.ibm.com>
|
|
Signed-off-by: Manjunath Matti <mmatti@linux.ibm.com>
|
|
---
|
|
CMakeLists.txt | 71 ++++++++++++++++++++++++++++++++++++++++++
|
|
configure | 66 +++++++++++++++++++++++++++++++++++++++
|
|
contrib/README.contrib | 8 +++++
|
|
contrib/gcc/zifunc.h | 60 +++++++++++++++++++++++++++++++++++
|
|
contrib/power/power.h | 4 +++
|
|
5 files changed, 209 insertions(+)
|
|
create mode 100644 contrib/gcc/zifunc.h
|
|
create mode 100644 contrib/power/power.h
|
|
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index b3a58b30e..dd1752757 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -5,6 +5,8 @@ project(zlib C)
|
|
|
|
set(VERSION "1.2.13")
|
|
|
|
+option(POWER "Enable building power implementation")
|
|
+
|
|
set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
|
|
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
|
|
set(INSTALL_INC_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers")
|
|
@@ -126,6 +128,75 @@ if(NOT MINGW)
|
|
)
|
|
endif()
|
|
|
|
+if(CMAKE_COMPILER_IS_GNUCC)
|
|
+
|
|
+ # test to see if we can use a GNU indirect function to detect and load optimized code at runtime
|
|
+ CHECK_C_SOURCE_COMPILES("
|
|
+ static int test_ifunc_native(void)
|
|
+ {
|
|
+ return 1;
|
|
+ }
|
|
+ static int (*(check_ifunc_native(void)))(void)
|
|
+ {
|
|
+ return test_ifunc_native;
|
|
+ }
|
|
+ int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\")));
|
|
+ int main(void)
|
|
+ {
|
|
+ return 0;
|
|
+ }
|
|
+ " HAS_C_ATTR_IFUNC)
|
|
+
|
|
+ if(HAS_C_ATTR_IFUNC)
|
|
+ add_definitions(-DHAVE_IFUNC)
|
|
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h)
|
|
+ endif()
|
|
+
|
|
+ if(POWER)
|
|
+ # Test to see if we can use the optimizations for Power
|
|
+ CHECK_C_SOURCE_COMPILES("
|
|
+ #ifndef _ARCH_PPC
|
|
+ #error \"Target is not Power\"
|
|
+ #endif
|
|
+ #ifndef __BUILTIN_CPU_SUPPORTS__
|
|
+ #error \"Target doesn't support __builtin_cpu_supports()\"
|
|
+ #endif
|
|
+ int main() { return 0; }
|
|
+ " HAS_POWER_SUPPORT)
|
|
+
|
|
+ if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC)
|
|
+ add_definitions(-DZ_POWER_OPT)
|
|
+
|
|
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power8)
|
|
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8)
|
|
+
|
|
+ if(POWER8)
|
|
+ add_definitions(-DZ_POWER8)
|
|
+ set(ZLIB_POWER8 )
|
|
+
|
|
+ set_source_files_properties(
|
|
+ ${ZLIB_POWER8}
|
|
+ PROPERTIES COMPILE_FLAGS -mcpu=power8)
|
|
+ endif()
|
|
+
|
|
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power9)
|
|
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9)
|
|
+
|
|
+ if(POWER9)
|
|
+ add_definitions(-DZ_POWER9)
|
|
+ set(ZLIB_POWER9 )
|
|
+
|
|
+ set_source_files_properties(
|
|
+ ${ZLIB_POWER9}
|
|
+ PROPERTIES COMPILE_FLAGS -mcpu=power9)
|
|
+ endif()
|
|
+
|
|
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h)
|
|
+ set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9})
|
|
+ endif()
|
|
+ endif()
|
|
+endif()
|
|
+
|
|
# parse the full version number from zlib.h and include in ZLIB_FULL_VERSION
|
|
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents)
|
|
string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*"
|
|
diff --git a/configure b/configure
|
|
index fa4d5daab..9ee7008c3 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -828,6 +828,72 @@ EOF
|
|
fi
|
|
fi
|
|
|
|
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
|
+echo >> configure.log
|
|
+cat > $test.c <<EOF
|
|
+static int test_ifunc_native(void)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int (*(check_ifunc_native(void)))(void)
|
|
+{
|
|
+ return test_ifunc_native;
|
|
+}
|
|
+
|
|
+int test_ifunc(void) __attribute__ ((ifunc ("check_ifunc_native")));
|
|
+EOF
|
|
+
|
|
+if tryboth $CC -c $CFLAGS $test.c; then
|
|
+ SFLAGS="${SFLAGS} -DHAVE_IFUNC"
|
|
+ CFLAGS="${CFLAGS} -DHAVE_IFUNC"
|
|
+ echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
|
|
+else
|
|
+ echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
|
|
+fi
|
|
+
|
|
+# Test to see if we can use the optimizations for Power
|
|
+echo >> configure.log
|
|
+cat > $test.c <<EOF
|
|
+#ifndef _ARCH_PPC
|
|
+ #error "Target is not Power"
|
|
+#endif
|
|
+#ifndef HAVE_IFUNC
|
|
+ #error "Target doesn't support ifunc"
|
|
+#endif
|
|
+#ifndef __BUILTIN_CPU_SUPPORTS__
|
|
+ #error "Target doesn't support __builtin_cpu_supports()"
|
|
+#endif
|
|
+EOF
|
|
+
|
|
+if tryboth $CC -c $CFLAGS $test.c; then
|
|
+ echo "int main(void){return 0;}" > $test.c
|
|
+
|
|
+ if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
|
|
+ POWER8="-DZ_POWER8"
|
|
+ PIC_OBJC="${PIC_OBJC}"
|
|
+ OBJC="${OBJC}"
|
|
+ echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
|
|
+ else
|
|
+ echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
|
|
+ fi
|
|
+
|
|
+ if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then
|
|
+ POWER9="-DZ_POWER9"
|
|
+ PIC_OBJC="${PIC_OBJC}"
|
|
+ OBJC="${OBJC}"
|
|
+ echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log
|
|
+ else
|
|
+ echo "Checking for -mcpu=power9 support... No." | tee -a configure.log
|
|
+ fi
|
|
+
|
|
+ SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
|
|
+ CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
|
|
+ echo "Checking for Power optimizations support... Yes." | tee -a configure.log
|
|
+else
|
|
+ echo "Checking for Power optimizations support... No." | tee -a configure.log
|
|
+fi
|
|
+
|
|
# show the results in the log
|
|
echo >> configure.log
|
|
echo ALL = $ALL >> configure.log
|
|
diff --git a/contrib/README.contrib b/contrib/README.contrib
|
|
index 5e5f95054..c57b52012 100644
|
|
--- a/contrib/README.contrib
|
|
+++ b/contrib/README.contrib
|
|
@@ -11,6 +11,10 @@ ada/ by Dmitriy Anisimkov <anisimkov@yahoo.com>
|
|
blast/ by Mark Adler <madler@alumni.caltech.edu>
|
|
Decompressor for output of PKWare Data Compression Library (DCL)
|
|
|
|
+gcc/ by Matheus Castanho <msc@linux.ibm.com>
|
|
+ and Rogerio Alves <rcardoso@linux.ibm.com>
|
|
+ Optimization helpers using GCC-specific extensions
|
|
+
|
|
delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
|
|
Support for Delphi and C++ Builder
|
|
|
|
@@ -42,6 +46,10 @@ minizip/ by Gilles Vollant <info@winimage.com>
|
|
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
|
|
Support for Pascal
|
|
|
|
+power/ by Matheus Castanho <msc@linux.ibm.com>
|
|
+ and Rogerio Alves <rcardoso@linux.ibm.com>
|
|
+ Optimized functions for Power processors
|
|
+
|
|
puff/ by Mark Adler <madler@alumni.caltech.edu>
|
|
Small, low memory usage inflate. Also serves to provide an
|
|
unambiguous description of the deflate format.
|
|
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
|
|
new file mode 100644
|
|
index 000000000..daf4fe442
|
|
--- /dev/null
|
|
+++ b/contrib/gcc/zifunc.h
|
|
@@ -0,0 +1,60 @@
|
|
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
|
|
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+#ifndef Z_IFUNC_H_
|
|
+#define Z_IFUNC_H_
|
|
+
|
|
+/* Helpers for arch optimizations */
|
|
+
|
|
+#define Z_IFUNC(fname) \
|
|
+ typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
|
|
+ local typeof(fname) *fname##_resolver(void)
|
|
+/* This is a helper macro to declare a resolver for an indirect function
|
|
+ * (ifunc). Let's say you have function
|
|
+ *
|
|
+ * int foo (int a);
|
|
+ *
|
|
+ * for which you want to provide different implementations, for example:
|
|
+ *
|
|
+ * int foo_clever (int a) {
|
|
+ * ... clever things ...
|
|
+ * }
|
|
+ *
|
|
+ * int foo_smart (int a) {
|
|
+ * ... smart things ...
|
|
+ * }
|
|
+ *
|
|
+ * You will have to declare foo() as an indirect function and also provide a
|
|
+ * resolver for it, to choose between foo_clever() and foo_smart() based on
|
|
+ * some criteria you define (e.g. processor features).
|
|
+ *
|
|
+ * Since most likely foo() has a default implementation somewhere in zlib, you
|
|
+ * may have to rename it so the 'foo' symbol can be used by the ifunc without
|
|
+ * conflicts.
|
|
+ *
|
|
+ * #define foo foo_default
|
|
+ * int foo (int a) {
|
|
+ * ...
|
|
+ * }
|
|
+ * #undef foo
|
|
+ *
|
|
+ * Now you just have to provide a resolver function to choose which function
|
|
+ * should be used (decided at runtime on the first call to foo()):
|
|
+ *
|
|
+ * Z_IFUNC(foo) {
|
|
+ * if (... some condition ...)
|
|
+ * return foo_clever;
|
|
+ *
|
|
+ * if (... other condition ...)
|
|
+ * return foo_smart;
|
|
+ *
|
|
+ * return foo_default;
|
|
+ * }
|
|
+ *
|
|
+ * All calls to foo() throughout the code can remain untouched, all the magic
|
|
+ * will be done by the linker using the resolver function.
|
|
+ */
|
|
+
|
|
+#endif /* Z_IFUNC_H_ */
|
|
diff --git a/contrib/power/power.h b/contrib/power/power.h
|
|
new file mode 100644
|
|
index 000000000..b42c7d6c6
|
|
--- /dev/null
|
|
+++ b/contrib/power/power.h
|
|
@@ -0,0 +1,4 @@
|
|
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
|
|
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
|
|
From b7ee6436703e5e8716f3b82df669422035a84385 Mon Sep 17 00:00:00 2001
|
|
From: Manjunath S Matti <mmatti@linux.ibm.com>
|
|
Date: Wed, 16 Nov 2022 03:53:53 -0600
|
|
Subject: [PATCH 2/4] Add Power8+ optimized crc32
|
|
|
|
This commit adds an optimized version for the crc32 function based
|
|
on crc32-vpmsum from https://github.com/antonblanchard/crc32-vpmsum/
|
|
|
|
This is the C implementation created by Rogerio Alves
|
|
<rogealve@br.ibm.com>
|
|
|
|
It makes use of vector instructions to speed up CRC32 algorithm.
|
|
|
|
Author: Rogerio Alves <rcardoso@linux.ibm.com>
|
|
Signed-off-by: Manjunath Matti <mmatti@linux.ibm.com>
|
|
---
|
|
.gitignore | 3 +
|
|
CMakeLists.txt | 7 +-
|
|
Makefile.in | 43 +-
|
|
configure | 7 +-
|
|
contrib/README.contrib | 3 +-
|
|
contrib/power/clang_workaround.h | 82 ++
|
|
contrib/power/crc32_constants.h | 1206 ++++++++++++++++++++++++++++++
|
|
contrib/power/crc32_z_power8.c | 679 +++++++++++++++++
|
|
contrib/power/crc32_z_resolver.c | 15 +
|
|
contrib/power/power.h | 4 +
|
|
crc32.c | 12 +
|
|
test/crc32_test.c | 205 +++++
|
|
12 files changed, 2252 insertions(+), 14 deletions(-)
|
|
create mode 100644 contrib/power/clang_workaround.h
|
|
create mode 100644 contrib/power/crc32_constants.h
|
|
create mode 100644 contrib/power/crc32_z_power8.c
|
|
create mode 100644 contrib/power/crc32_z_resolver.c
|
|
create mode 100644 test/crc32_test.c
|
|
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index dd1752757..1077c83ef 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -172,7 +172,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
|
|
|
if(POWER8)
|
|
add_definitions(-DZ_POWER8)
|
|
- set(ZLIB_POWER8 )
|
|
+ set(ZLIB_POWER8
|
|
+ contrib/power/crc32_z_power8.c)
|
|
|
|
set_source_files_properties(
|
|
${ZLIB_POWER8}
|
|
@@ -269,6 +270,10 @@ add_executable(example test/example.c)
|
|
target_link_libraries(example zlib)
|
|
add_test(example example)
|
|
|
|
+add_executable(crc32_test test/crc32_test.c)
|
|
+target_link_libraries(crc32_test zlib)
|
|
+add_test(crc32_test crc32_test)
|
|
+
|
|
add_executable(minigzip test/minigzip.c)
|
|
target_link_libraries(minigzip zlib)
|
|
|
|
diff --git a/Makefile.in b/Makefile.in
|
|
index 9cdb85259..83d8ca47d 100644
|
|
--- a/Makefile.in
|
|
+++ b/Makefile.in
|
|
@@ -71,11 +71,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)
|
|
|
|
all: static shared
|
|
|
|
-static: example$(EXE) minigzip$(EXE)
|
|
+static: crc32_test$(EXE) example$(EXE) minigzip$(EXE)
|
|
|
|
-shared: examplesh$(EXE) minigzipsh$(EXE)
|
|
+shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)
|
|
|
|
-all64: example64$(EXE) minigzip64$(EXE)
|
|
+all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE)
|
|
|
|
check: test
|
|
|
|
@@ -83,7 +83,7 @@ test: all teststatic testshared
|
|
|
|
teststatic: static
|
|
@TMPST=tmpst_$$; \
|
|
- if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST ; then \
|
|
+ if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST && ${QEMU_RUN} ./crc32_test; then \
|
|
echo ' *** zlib test OK ***'; \
|
|
else \
|
|
echo ' *** zlib test FAILED ***'; false; \
|
|
@@ -96,7 +96,7 @@ testshared: shared
|
|
DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
|
|
SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
|
|
TMPSH=tmpsh_$$; \
|
|
- if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH; then \
|
|
+ if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH && ${QEMU_RUN} ./crc32_testsh; then \
|
|
echo ' *** zlib shared test OK ***'; \
|
|
else \
|
|
echo ' *** zlib shared test FAILED ***'; false; \
|
|
@@ -105,7 +105,7 @@ testshared: shared
|
|
|
|
test64: all64
|
|
@TMP64=tmp64_$$; \
|
|
- if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64; then \
|
|
+ if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64 && ${QEMU_RUN} ./crc32_test64; then \
|
|
echo ' *** zlib 64-bit test OK ***'; \
|
|
else \
|
|
echo ' *** zlib 64-bit test FAILED ***'; false; \
|
|
@@ -139,12 +139,18 @@ match.lo: match.S
|
|
mv _match.o match.lo
|
|
rm -f _match.s
|
|
|
|
+crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
|
|
+ $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c
|
|
+
|
|
example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
|
|
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c
|
|
|
|
minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
|
|
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c
|
|
|
|
+crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
|
|
+ $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c
|
|
+
|
|
example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
|
|
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c
|
|
|
|
@@ -158,6 +164,9 @@ adler32.o: $(SRCDIR)adler32.c
|
|
crc32.o: $(SRCDIR)crc32.c
|
|
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
|
|
|
|
+crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
|
|
+ $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c
|
|
+
|
|
deflate.o: $(SRCDIR)deflate.c
|
|
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
|
|
|
|
@@ -208,6 +217,11 @@ crc32.lo: $(SRCDIR)crc32.c
|
|
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
|
|
-@mv objs/crc32.o $@
|
|
|
|
+crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
|
|
+ -@mkdir objs 2>/dev/null || test -d objs
|
|
+ $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
|
|
+ -@mv objs/crc32_z_power8.o $@
|
|
+
|
|
deflate.lo: $(SRCDIR)deflate.c
|
|
-@mkdir objs 2>/dev/null || test -d objs
|
|
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
|
|
@@ -281,18 +295,27 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a
|
|
ln -s $@ $(SHAREDLIBM)
|
|
-@rmdir objs
|
|
|
|
+crc32_test$(EXE): crc32_test.o $(STATICLIB)
|
|
+ $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS)
|
|
+
|
|
example$(EXE): example.o $(STATICLIB)
|
|
$(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS)
|
|
|
|
minigzip$(EXE): minigzip.o $(STATICLIB)
|
|
$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)
|
|
|
|
+crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV)
|
|
+ $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV)
|
|
+
|
|
examplesh$(EXE): example.o $(SHAREDLIBV)
|
|
$(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) -L. $(SHAREDLIBV)
|
|
|
|
minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
|
|
$(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) -L. $(SHAREDLIBV)
|
|
|
|
+crc32_test64$(EXE): crc32_test64.o $(STATICLIB)
|
|
+ $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS)
|
|
+
|
|
example64$(EXE): example64.o $(STATICLIB)
|
|
$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)
|
|
|
|
@@ -362,8 +385,8 @@ zconf: $(SRCDIR)zconf.h.in
|
|
mostlyclean: clean
|
|
clean:
|
|
rm -f *.o *.lo *~ \
|
|
- example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
|
- example64$(EXE) minigzip64$(EXE) \
|
|
+ crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
|
+ crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \
|
|
infcover \
|
|
libz.* foo.gz so_locations \
|
|
_match.s maketree contrib/infback9/*.o
|
|
@@ -385,7 +408,7 @@ tags:
|
|
|
|
adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
|
|
-compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
|
|
+compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
|
|
crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
|
|
deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
|
|
@@ -395,7 +418,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr
|
|
|
|
adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
|
|
-compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
|
|
+compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
|
|
crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
|
|
deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
|
|
diff --git a/configure b/configure
|
|
index 9ee7008c3..45d51e596 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -858,6 +858,9 @@ cat > $test.c <<EOF
|
|
#ifndef _ARCH_PPC
|
|
#error "Target is not Power"
|
|
#endif
|
|
+#if !(defined(__PPC64__) || defined(__powerpc64__))
|
|
+ #error "Target is not 64 bits"
|
|
+#endif
|
|
#ifndef HAVE_IFUNC
|
|
#error "Target doesn't support ifunc"
|
|
#endif
|
|
@@ -871,8 +874,8 @@ if tryboth $CC -c $CFLAGS $test.c; then
|
|
|
|
if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
|
|
POWER8="-DZ_POWER8"
|
|
- PIC_OBJC="${PIC_OBJC}"
|
|
- OBJC="${OBJC}"
|
|
+ PIC_OBJC="${PIC_OBJC} crc32_z_power8.lo"
|
|
+ OBJC="${OBJC} crc32_z_power8.o"
|
|
echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
|
|
else
|
|
echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
|
|
diff --git a/contrib/README.contrib b/contrib/README.contrib
|
|
index c57b52012..90170df69 100644
|
|
--- a/contrib/README.contrib
|
|
+++ b/contrib/README.contrib
|
|
@@ -46,7 +46,8 @@ minizip/ by Gilles Vollant <info@winimage.com>
|
|
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
|
|
Support for Pascal
|
|
|
|
-power/ by Matheus Castanho <msc@linux.ibm.com>
|
|
+power/ by Daniel Black <daniel@linux.ibm.com>
|
|
+ Matheus Castanho <msc@linux.ibm.com>
|
|
and Rogerio Alves <rcardoso@linux.ibm.com>
|
|
Optimized functions for Power processors
|
|
|
|
diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h
|
|
new file mode 100644
|
|
index 000000000..b5e7dae01
|
|
--- /dev/null
|
|
+++ b/contrib/power/clang_workaround.h
|
|
@@ -0,0 +1,82 @@
|
|
+#ifndef CLANG_WORKAROUNDS_H
|
|
+#define CLANG_WORKAROUNDS_H
|
|
+
|
|
+/*
|
|
+ * These stubs fix clang incompatibilities with GCC builtins.
|
|
+ */
|
|
+
|
|
+#ifndef __builtin_crypto_vpmsumw
|
|
+#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb
|
|
+#endif
|
|
+#ifndef __builtin_crypto_vpmsumd
|
|
+#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb
|
|
+#endif
|
|
+
|
|
+static inline
|
|
+__vector unsigned long long __attribute__((overloadable))
|
|
+vec_ld(int __a, const __vector unsigned long long* __b)
|
|
+{
|
|
+ return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang
|
|
+ * does not recognize this type. On GCC this builtin is translated to a
|
|
+ * xxpermdi instruction that only moves the registers __a, __b instead generates
|
|
+ * a load.
|
|
+ *
|
|
+ * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0.
|
|
+ */
|
|
+static inline
|
|
+__vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
|
+ unsigned long __b)
|
|
+{
|
|
+ #if defined(__BIG_ENDIAN__)
|
|
+ __vector unsigned long long __v = {__a, __b};
|
|
+ #else
|
|
+ __vector unsigned long long __v = {__b, __a};
|
|
+ #endif
|
|
+ return __v;
|
|
+}
|
|
+
|
|
+#ifndef vec_xxpermdi
|
|
+
|
|
+static inline
|
|
+unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
|
|
+ int __o)
|
|
+{
|
|
+ return __v[__o];
|
|
+}
|
|
+
|
|
+#if defined(__BIG_ENDIAN__)
|
|
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0)
|
|
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1)
|
|
+#else
|
|
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1)
|
|
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0)
|
|
+#endif
|
|
+
|
|
+#else
|
|
+
|
|
+static inline
|
|
+unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
|
+{
|
|
+ #if defined(__BIG_ENDIAN__)
|
|
+ return vec_xxpermdi(__v, __v, 0x0)[1];
|
|
+ #else
|
|
+ return vec_xxpermdi(__v, __v, 0x0)[0];
|
|
+ #endif
|
|
+}
|
|
+
|
|
+static inline
|
|
+unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
|
+{
|
|
+ #if defined(__BIG_ENDIAN__)
|
|
+ return vec_xxpermdi(__v, __v, 0x3)[1];
|
|
+ #else
|
|
+ return vec_xxpermdi(__v, __v, 0x3)[0];
|
|
+ #endif
|
|
+}
|
|
+#endif /* vec_xxpermdi */
|
|
+
|
|
+#endif
|
|
diff --git a/contrib/power/crc32_constants.h b/contrib/power/crc32_constants.h
|
|
new file mode 100644
|
|
index 000000000..58088dcc0
|
|
--- /dev/null
|
|
+++ b/contrib/power/crc32_constants.h
|
|
@@ -0,0 +1,1206 @@
|
|
+/*
|
|
+*
|
|
+* THIS FILE IS GENERATED WITH
|
|
+./crc32_constants -c -r -x 0x04C11DB7
|
|
+
|
|
+* This is from https://github.com/antonblanchard/crc32-vpmsum/
|
|
+* DO NOT MODIFY IT MANUALLY!
|
|
+*
|
|
+*/
|
|
+
|
|
+#define CRC 0x4c11db7
|
|
+#define CRC_XOR
|
|
+#define REFLECT
|
|
+#define MAX_SIZE 32768
|
|
+
|
|
+#ifndef __ASSEMBLER__
|
|
+#ifdef CRC_TABLE
|
|
+static const unsigned int crc_table[] = {
|
|
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
|
|
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
|
|
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
|
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
|
|
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
|
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
|
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
|
|
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
|
|
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
|
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
|
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
|
|
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
|
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
|
|
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
|
|
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
|
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
|
|
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
|
|
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
|
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
|
|
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
|
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
|
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
|
|
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
|
|
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
|
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
|
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
|
|
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
|
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
|
|
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
|
|
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
|
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
|
|
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
|
|
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
|
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
|
|
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
|
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
|
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
|
|
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
|
|
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
|
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
|
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
|
|
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
|
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
|
|
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
|
|
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
|
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
|
|
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
|
|
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
|
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
|
|
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
|
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
|
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
|
|
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
|
|
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
|
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
|
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
|
|
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
|
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
|
|
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
|
|
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
|
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
|
|
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
|
|
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
|
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,};
|
|
+
|
|
+#endif /* CRC_TABLE */
|
|
+#ifdef POWER8_INTRINSICS
|
|
+
|
|
+/* Constants */
|
|
+
|
|
+/* Reduce 262144 kbits to 1024 bits */
|
|
+static const __vector unsigned long long vcrc_const[255]
|
|
+ __attribute__((aligned (16))) = {
|
|
+#ifdef __LITTLE_ENDIAN__
|
|
+ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
|
|
+ { 0x0000000099ea94a8, 0x00000001651797d2 },
|
|
+ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
|
|
+ { 0x00000000945a8420, 0x0000000021e0d56c },
|
|
+ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
|
|
+ { 0x0000000030762706, 0x000000000f95ecaa },
|
|
+ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
|
|
+ { 0x00000001a52fc582, 0x00000001ebd224ac },
|
|
+ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
|
|
+ { 0x00000001a4a7167a, 0x000000000ccb97ca },
|
|
+ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
|
|
+ { 0x000000000c18249a, 0x00000001006ec8a8 },
|
|
+ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
|
|
+ { 0x00000000a924ae7c, 0x000000014f58f196 },
|
|
+ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
|
|
+ { 0x00000001e12ccc12, 0x00000001a7192ca6 },
|
|
+ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
|
|
+ { 0x00000000a0b9d4ac, 0x000000019a64bab2 },
|
|
+ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
|
|
+ { 0x0000000095e8ddfe, 0x0000000014f4ed2e },
|
|
+ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
|
|
+ { 0x00000000233fddc4, 0x000000011092b6a2 },
|
|
+ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
|
|
+ { 0x00000001b4529b62, 0x00000000c8a1629c },
|
|
+ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
|
|
+ { 0x00000001a7fa0e64, 0x000000017bf32e8e },
|
|
+ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
|
|
+ { 0x00000001b5334592, 0x00000001f8cc6582 },
|
|
+ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
|
|
+ { 0x000000011f8ee1b4, 0x000000008631ddf0 },
|
|
+ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
|
|
+ { 0x000000006252e632, 0x000000007e5a76d0 },
|
|
+ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
|
|
+ { 0x00000000ab973e84, 0x000000002b09b31c },
|
|
+ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
|
|
+ { 0x000000007734f5ec, 0x00000001b2df1f84 },
|
|
+ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
|
|
+ { 0x000000007c547798, 0x00000001d6f56afc },
|
|
+ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
|
|
+ { 0x000000007ec40210, 0x00000001b9b5e70c },
|
|
+ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
|
|
+ { 0x00000001ab1695a8, 0x0000000034b626d2 },
|
|
+ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
|
|
+ { 0x0000000090494bba, 0x000000014c53479a },
|
|
+ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
|
|
+ { 0x00000001123fb816, 0x00000001a6d179a4 },
|
|
+ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
|
|
+ { 0x00000001e188c74c, 0x000000015abd16b4 },
|
|
+ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
|
|
+ { 0x00000001c2d3451c, 0x00000000018f9852 },
|
|
+ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
|
|
+ { 0x00000000f55cf1ca, 0x000000001fb3084a },
|
|
+ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
|
|
+ { 0x00000001a0531540, 0x00000000c53dfb04 },
|
|
+ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
|
|
+ { 0x0000000132cd7ebc, 0x00000000e10c9ad6 },
|
|
+ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
|
|
+ { 0x0000000073ab7f36, 0x0000000025aa994a },
|
|
+ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
|
|
+ { 0x0000000041aed1c2, 0x00000000fa3a74c4 },
|
|
+ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
|
|
+ { 0x0000000136c53800, 0x0000000033eb3f40 },
|
|
+ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
|
|
+ { 0x0000000126835a30, 0x000000017193f296 },
|
|
+ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
|
|
+ { 0x000000006241b502, 0x0000000043f6c86a },
|
|
+ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
|
|
+ { 0x00000000d5196ad4, 0x000000016b513ec6 },
|
|
+ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
|
|
+ { 0x000000009cfa769a, 0x00000000c8f25b4e },
|
|
+ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
|
|
+ { 0x00000000920e5df4, 0x00000001a45048ec },
|
|
+ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
|
|
+ { 0x0000000169dc310e, 0x000000000c441004 },
|
|
+ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
|
|
+ { 0x0000000009fc331c, 0x000000000e17cad6 },
|
|
+ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
|
|
+ { 0x000000010d94a81e, 0x00000001253ae964 },
|
|
+ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
|
|
+ { 0x0000000027a20ab2, 0x00000001d7c88ebc },
|
|
+ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
|
|
+ { 0x0000000114f87504, 0x00000001e7ca913a },
|
|
+ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
|
|
+ { 0x000000004b076d96, 0x0000000033ed078a },
|
|
+ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
|
|
+ { 0x00000000da4d1e74, 0x00000000e1839c78 },
|
|
+ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
|
|
+ { 0x000000001b81f672, 0x00000001322b267e },
|
|
+ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
|
|
+ { 0x000000009367c988, 0x00000000638231b6 },
|
|
+ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
|
|
+ { 0x00000001717214ca, 0x00000001ee7f16f4 },
|
|
+ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
|
|
+ { 0x000000009f47d820, 0x0000000117d9924a },
|
|
+ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
|
|
+ { 0x000000010d9a47d2, 0x00000000e1a9e0c4 },
|
|
+ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
|
|
+ { 0x00000000a696c58c, 0x00000001403731dc },
|
|
+ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
|
|
+ { 0x000000002aa28ec6, 0x00000001a5ea9682 },
|
|
+ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
|
|
+ { 0x00000001fe18fd9a, 0x0000000101c5c578 },
|
|
+ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
|
|
+ { 0x000000019d4fc1ae, 0x00000000dddf6494 },
|
|
+ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
|
|
+ { 0x00000001ba0e3dea, 0x00000000f1c3db28 },
|
|
+ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
|
|
+ { 0x0000000074b59a5e, 0x000000013112fb9c },
|
|
+ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
|
|
+ { 0x00000000f2b5ea98, 0x00000000b680b906 },
|
|
+ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
|
|
+ { 0x0000000187132676, 0x000000001a282932 },
|
|
+ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
|
|
+ { 0x000000010a8c6ad4, 0x0000000089406e7e },
|
|
+ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
|
|
+ { 0x00000001e21dfe70, 0x00000001def6be8c },
|
|
+ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
|
|
+ { 0x00000001da0050e4, 0x0000000075258728 },
|
|
+ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
|
|
+ { 0x00000000772172ae, 0x000000019536090a },
|
|
+ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
|
|
+ { 0x00000000e47724aa, 0x00000000f2455bfc },
|
|
+ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
|
|
+ { 0x000000003cd63ac4, 0x000000018c40baf4 },
|
|
+ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
|
|
+ { 0x00000001bf47d352, 0x000000004cd390d4 },
|
|
+ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
|
|
+ { 0x000000018dc1d708, 0x00000001e4ece95a },
|
|
+ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
|
|
+ { 0x000000002d4620a4, 0x000000001a3ee918 },
|
|
+ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
|
|
+ { 0x0000000058fd1740, 0x000000007c652fb8 },
|
|
+ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
|
|
+ { 0x00000000dadd9bfc, 0x000000011c67842c },
|
|
+ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
|
|
+ { 0x00000001ea2140be, 0x00000000254f759c },
|
|
+ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
|
|
+ { 0x000000009de128ba, 0x000000007ece94ca },
|
|
+ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
|
|
+ { 0x000000013ac3aa8e, 0x0000000038f258c2 },
|
|
+ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
|
|
+ { 0x0000000099980562, 0x00000001cdf17b00 },
|
|
+ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
|
|
+ { 0x00000001c1579c86, 0x000000011f882c16 },
|
|
+ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
|
|
+ { 0x0000000068dbbf94, 0x0000000100093fc8 },
|
|
+ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
|
|
+ { 0x000000004509fb04, 0x00000001cd684f16 },
|
|
+ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
|
|
+ { 0x00000001202f6398, 0x000000004bc6a70a },
|
|
+ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
|
|
+ { 0x000000013aea243e, 0x000000004fc7e8e4 },
|
|
+ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
|
|
+ { 0x00000001b4052ae6, 0x0000000130103f1c },
|
|
+ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
|
|
+ { 0x00000001cd2a0ae8, 0x0000000111b0024c },
|
|
+ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
|
|
+ { 0x00000001fe4aa8b4, 0x000000010b3079da },
|
|
+ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
|
|
+ { 0x00000001d1559a42, 0x000000010192bcc2 },
|
|
+ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
|
|
+ { 0x00000001f3e05ecc, 0x0000000074838d50 },
|
|
+ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
|
|
+ { 0x0000000104ddd2cc, 0x000000001b20f520 },
|
|
+ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
|
|
+ { 0x000000015393153c, 0x0000000050c3590a },
|
|
+ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
|
|
+ { 0x0000000057e942c6, 0x00000000b41cac8e },
|
|
+ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
|
|
+ { 0x000000012c633850, 0x000000000c72cc78 },
|
|
+ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
|
|
+ { 0x00000000ebcaae4c, 0x0000000030cdb032 },
|
|
+ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
|
|
+ { 0x000000013ee532a6, 0x000000013e09fc32 },
|
|
+ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
|
|
+ { 0x00000001bf0cbc7e, 0x000000001ed624d2 },
|
|
+ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
|
|
+ { 0x00000000d50b7a5a, 0x00000000781aee1a },
|
|
+ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
|
|
+ { 0x0000000002fca6e8, 0x00000001c4d8348c },
|
|
+ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
|
|
+ { 0x000000007af40044, 0x0000000057a40336 },
|
|
+ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
|
|
+ { 0x0000000016178744, 0x0000000085544940 },
|
|
+ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
|
|
+ { 0x000000014c177458, 0x000000019cd21e80 },
|
|
+ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
|
|
+ { 0x000000011b6ddf04, 0x000000013eb95bc0 },
|
|
+ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
|
|
+ { 0x00000001f3e29ccc, 0x00000001dfc9fdfc },
|
|
+ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
|
|
+ { 0x0000000135ae7562, 0x00000000cd028bc2 },
|
|
+ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
|
|
+ { 0x0000000190ef812c, 0x0000000090db8c44 },
|
|
+ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
|
|
+ { 0x0000000067a2c786, 0x000000010010a4ce },
|
|
+ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
|
|
+ { 0x0000000048b9496c, 0x00000001c8f4c72c },
|
|
+ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
|
|
+ { 0x000000015a422de6, 0x000000001c26170c },
|
|
+ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
|
|
+ { 0x00000001ef0e3640, 0x00000000e3fccf68 },
|
|
+ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
|
|
+ { 0x00000001006d2d26, 0x00000000d513ed24 },
|
|
+ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
|
|
+ { 0x00000001170d56d6, 0x00000000141beada },
|
|
+ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
|
|
+ { 0x00000000a5fb613c, 0x000000011071aea0 },
|
|
+ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
|
|
+ { 0x0000000040bbf7fc, 0x000000012e19080a },
|
|
+ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
|
|
+ { 0x000000016ac3a5b2, 0x0000000100ecf826 },
|
|
+ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
|
|
+ { 0x00000000abf16230, 0x0000000069b09412 },
|
|
+ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
|
|
+ { 0x00000001ebe23fac, 0x0000000122297bac },
|
|
+ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
|
|
+ { 0x000000008b6a0894, 0x00000000e9e4b068 },
|
|
+ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
|
|
+ { 0x00000001288ea478, 0x000000004b38651a },
|
|
+ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
|
|
+ { 0x000000016619c442, 0x00000001468360e2 },
|
|
+ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
|
|
+ { 0x0000000086230038, 0x00000000121c2408 },
|
|
+ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
|
|
+ { 0x000000017746a756, 0x00000000da7e7d08 },
|
|
+ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
|
|
+ { 0x0000000191b8f8f8, 0x00000001058d7652 },
|
|
+ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
|
|
+ { 0x000000008e167708, 0x000000014a098a90 },
|
|
+ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
|
|
+ { 0x0000000148b22d54, 0x0000000020dbe72e },
|
|
+ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
|
|
+ { 0x0000000044ba2c3c, 0x000000011e7323e8 },
|
|
+ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
|
|
+ { 0x00000000b54d2b52, 0x00000000d5d4bf94 },
|
|
+ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
|
|
+ { 0x0000000005a4fd8a, 0x0000000199d8746c },
|
|
+ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
|
|
+ { 0x0000000139f9fc46, 0x00000000ce9ca8a0 },
|
|
+ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
|
|
+ { 0x000000015a1fa824, 0x00000000136edece },
|
|
+ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
|
|
+ { 0x000000000a61ae4c, 0x000000019b92a068 },
|
|
+ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
|
|
+ { 0x0000000145e9113e, 0x0000000071d62206 },
|
|
+ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
|
|
+ { 0x000000006a348448, 0x00000000dfc50158 },
|
|
+ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
|
|
+ { 0x000000004d80a08c, 0x00000001517626bc },
|
|
+ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
|
|
+ { 0x000000014b6837a0, 0x0000000148d1e4fa },
|
|
+ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
|
|
+ { 0x000000016896a7fc, 0x0000000094d8266e },
|
|
+ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
|
|
+ { 0x000000014f187140, 0x00000000606c5e34 },
|
|
+ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
|
|
+ { 0x000000019581b9da, 0x000000019766beaa },
|
|
+ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
|
|
+ { 0x00000001091bc984, 0x00000001d80c506c },
|
|
+ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
|
|
+ { 0x000000001067223c, 0x000000001e73837c },
|
|
+ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
|
|
+ { 0x00000001ab16ea02, 0x0000000064d587de },
|
|
+ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
|
|
+ { 0x000000013c4598a8, 0x00000000f4a507b0 },
|
|
+ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
|
|
+ { 0x00000000b3735430, 0x0000000040e342fc },
|
|
+ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
|
|
+ { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a },
|
|
+ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
|
|
+ { 0x00000001570ae19c, 0x0000000094a691a4 },
|
|
+ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
|
|
+ { 0x00000001ea910712, 0x00000001271ecdfa },
|
|
+ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
|
|
+ { 0x0000000167127128, 0x000000009e54475a },
|
|
+ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
|
|
+ { 0x0000000019e790a2, 0x00000000c9c099ee },
|
|
+ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
|
|
+ { 0x000000003788f710, 0x000000009a2f736c },
|
|
+ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
|
|
+ { 0x00000001682a160e, 0x00000000bb9f4996 },
|
|
+ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
|
|
+ { 0x000000007f0ebd2e, 0x00000001db688050 },
|
|
+ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
|
|
+ { 0x000000002b032080, 0x00000000e9b10af4 },
|
|
+ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
|
|
+ { 0x00000000cfd1664a, 0x000000012d4545e4 },
|
|
+ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
|
|
+ { 0x00000000aa1181c2, 0x000000000361139c },
|
|
+ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
|
|
+ { 0x00000000ddd08002, 0x00000001a5a1a3a8 },
|
|
+ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
|
|
+ { 0x00000000e8dd0446, 0x000000006844e0b0 },
|
|
+ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
|
|
+ { 0x00000001bbd94a00, 0x00000000c3762f28 },
|
|
+ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
|
|
+ { 0x00000000ab6cd180, 0x00000001d26287a2 },
|
|
+ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
|
|
+ { 0x0000000031803ce2, 0x00000001f6f0bba8 },
|
|
+ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
|
|
+ { 0x0000000024f40b0c, 0x000000002ffabd62 },
|
|
+ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
|
|
+ { 0x00000001ba1d9834, 0x00000000fb4516b8 },
|
|
+ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
|
|
+ { 0x0000000104de61aa, 0x000000018cfa961c },
|
|
+ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
|
|
+ { 0x0000000113e40d46, 0x000000019e588d52 },
|
|
+ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
|
|
+ { 0x00000001415598a0, 0x00000001180f0bbc },
|
|
+ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
|
|
+ { 0x00000000bf6c8c90, 0x00000000e1d9177a },
|
|
+ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
|
|
+ { 0x00000001788b0504, 0x0000000105abc27c },
|
|
+ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
|
|
+ { 0x0000000038385d02, 0x00000000972e4a58 },
|
|
+ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
|
|
+ { 0x00000001b6c83844, 0x0000000183499a5e },
|
|
+ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
|
|
+ { 0x0000000051061a8a, 0x00000001c96a8cca },
|
|
+ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
|
|
+ { 0x000000017351388a, 0x00000001a1a5b60c },
|
|
+ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
|
|
+ { 0x0000000132928f92, 0x00000000e4b6ac9c },
|
|
+ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
|
|
+ { 0x00000000e6b4f48a, 0x00000001807e7f5a },
|
|
+ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
|
|
+ { 0x0000000039d15e90, 0x000000017a7e3bc8 },
|
|
+ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
|
|
+ { 0x00000000312d6074, 0x00000000d73975da },
|
|
+ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
|
|
+ { 0x000000017bbb2cc4, 0x000000017375d038 },
|
|
+ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
|
|
+ { 0x000000016ded3e18, 0x00000000193680bc },
|
|
+ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
|
|
+ { 0x00000000f1638b16, 0x00000000999b06f6 },
|
|
+ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
|
|
+ { 0x00000001d38b9ecc, 0x00000001f685d2b8 },
|
|
+ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
|
|
+ { 0x000000018b8d09dc, 0x00000001f4ecbed2 },
|
|
+ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
|
|
+ { 0x00000000e7bc27d2, 0x00000000ba16f1a0 },
|
|
+ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
|
|
+ { 0x00000000275e1e96, 0x0000000115aceac4 },
|
|
+ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
|
|
+ { 0x00000000e2e3031e, 0x00000001aeff6292 },
|
|
+ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
|
|
+ { 0x00000001041c84d8, 0x000000009640124c },
|
|
+ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
|
|
+ { 0x00000000706ce672, 0x0000000114f41f02 },
|
|
+ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
|
|
+ { 0x000000015d5070da, 0x000000009c5f3586 },
|
|
+ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
|
|
+ { 0x0000000038f9493a, 0x00000001878275fa },
|
|
+ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
|
|
+ { 0x00000000a3348a76, 0x00000000ddc42ce8 },
|
|
+ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
|
|
+ { 0x00000001ad0aab92, 0x0000000181d2c73a },
|
|
+ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
|
|
+ { 0x000000019e85f712, 0x0000000141c9320a },
|
|
+ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
|
|
+ { 0x000000005a871e76, 0x000000015235719a },
|
|
+ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
|
|
+ { 0x000000017249c662, 0x00000000be27d804 },
|
|
+ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
|
|
+ { 0x000000003a084712, 0x000000006242d45a },
|
|
+ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
|
|
+ { 0x00000000ed438478, 0x000000009a53638e },
|
|
+ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
|
|
+ { 0x00000000abac34cc, 0x00000001001ecfb6 },
|
|
+ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
|
|
+ { 0x000000005f35ef3e, 0x000000016d7c2d64 },
|
|
+ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
|
|
+ { 0x0000000047d6608c, 0x00000001d0ce46c0 },
|
|
+ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
|
|
+ { 0x000000002d01470e, 0x0000000124c907b4 },
|
|
+ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
|
|
+ { 0x0000000158bbc7b0, 0x0000000018a555ca },
|
|
+ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
|
|
+ { 0x00000000c0a23e8e, 0x000000006b0980bc },
|
|
+ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
|
|
+ { 0x00000001ebd85c88, 0x000000008bbba964 },
|
|
+ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
|
|
+ { 0x000000019ee20bb2, 0x00000001070a5a1e },
|
|
+ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
|
|
+ { 0x00000001acabf2d6, 0x000000002204322a },
|
|
+ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
|
|
+ { 0x00000001b7963d56, 0x00000000a27524d0 },
|
|
+ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
|
|
+ { 0x000000017bffa1fe, 0x0000000020b1e4ba },
|
|
+ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
|
|
+ { 0x000000001f15333e, 0x0000000032cc27fc },
|
|
+ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
|
|
+ { 0x000000018593129e, 0x0000000044dd22b8 },
|
|
+ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
|
|
+ { 0x000000019cb32602, 0x00000000dffc9e0a },
|
|
+ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
|
|
+ { 0x0000000142b05cc8, 0x00000001b7a0ed14 },
|
|
+ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
|
|
+ { 0x00000001be49e7a4, 0x00000000c7842488 },
|
|
+ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
|
|
+ { 0x0000000108f69d6c, 0x00000001c02a4fee },
|
|
+ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
|
|
+ { 0x000000006c0971f0, 0x000000003c273778 },
|
|
+ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
|
|
+ { 0x000000005b16467a, 0x00000001d63f8894 },
|
|
+ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
|
|
+ { 0x00000001551a628e, 0x000000006be557d6 },
|
|
+ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
|
|
+ { 0x000000019e42ea92, 0x000000006a7806ea },
|
|
+ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
|
|
+ { 0x000000012fa83ff2, 0x000000016155aa0c },
|
|
+ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
|
|
+ { 0x000000011ca9cde0, 0x00000000908650ac },
|
|
+ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
|
|
+ { 0x00000000c8e5cd74, 0x00000000aa5a8084 },
|
|
+ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
|
|
+ { 0x0000000096c27f0c, 0x0000000191bb500a },
|
|
+ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
|
|
+ { 0x000000002baed926, 0x0000000064e9bed0 },
|
|
+ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
|
|
+ { 0x000000017c8de8d2, 0x000000009444f302 },
|
|
+ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
|
|
+ { 0x00000000d43d6068, 0x000000019db07d3c },
|
|
+ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
|
|
+ { 0x00000000cb2c4b26, 0x00000001359e3e6e },
|
|
+ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
|
|
+ { 0x0000000145b8da26, 0x00000001e4f10dd2 },
|
|
+ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
|
|
+ { 0x000000018fff4b08, 0x0000000124f5735e },
|
|
+ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
|
|
+ { 0x0000000150b58ed0, 0x0000000124760a4c },
|
|
+ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
|
|
+ { 0x00000001549f39bc, 0x000000000f1fc186 },
|
|
+ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
|
|
+ { 0x00000000ef4d2f42, 0x00000000150e4cc4 },
|
|
+ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
|
|
+ { 0x00000001b1468572, 0x000000002a6204e8 },
|
|
+ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
|
|
+ { 0x000000013d7403b2, 0x00000000beb1d432 },
|
|
+ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
|
|
+ { 0x00000001a4681842, 0x0000000135f3f1f0 },
|
|
+ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
|
|
+ { 0x0000000167714492, 0x0000000074fe2232 },
|
|
+ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
|
|
+ { 0x00000001e599099a, 0x000000001ac6e2ba },
|
|
+ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
|
|
+ { 0x00000000fe128194, 0x0000000013fca91e },
|
|
+ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
|
|
+ { 0x0000000077e8b990, 0x0000000183f4931e },
|
|
+ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
|
|
+ { 0x00000001a267f63a, 0x00000000b6d9b4e4 },
|
|
+ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
|
|
+ { 0x00000001945c245a, 0x00000000b5188656 },
|
|
+ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
|
|
+ { 0x0000000149002e76, 0x0000000027a81a84 },
|
|
+ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
|
|
+ { 0x00000001bb8310a4, 0x0000000125699258 },
|
|
+ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
|
|
+ { 0x000000019ec60bcc, 0x00000001b23de796 },
|
|
+ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
|
|
+ { 0x000000012d8590ae, 0x00000000fe4365dc },
|
|
+ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
|
|
+ { 0x0000000065b00684, 0x00000000c68f497a },
|
|
+ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
|
|
+ { 0x000000015e5aeadc, 0x00000000fbf521ee },
|
|
+ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
|
|
+ { 0x00000000b77ff2b0, 0x000000015eac3378 },
|
|
+ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
|
|
+ { 0x0000000188da2ff6, 0x0000000134914b90 },
|
|
+ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
|
|
+ { 0x0000000063da929a, 0x0000000016335cfe },
|
|
+ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
|
|
+ { 0x00000001389caa80, 0x000000010372d10c },
|
|
+ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
|
|
+ { 0x000000013db599d2, 0x000000015097b908 },
|
|
+ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
|
|
+ { 0x0000000122505a86, 0x00000001227a7572 },
|
|
+ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
|
|
+ { 0x000000016bd72746, 0x000000009a8f75c0 },
|
|
+ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
|
|
+ { 0x00000001c3faf1d4, 0x00000000682c77a2 },
|
|
+ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
|
|
+ { 0x00000001111c826c, 0x00000000231f091c },
|
|
+ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
|
|
+ { 0x00000000153e9fb2, 0x000000007d4439f2 },
|
|
+ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
|
|
+ { 0x000000002b1f7b60, 0x000000017e221efc },
|
|
+ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
|
|
+ { 0x00000000b1dba570, 0x0000000167457c38 },
|
|
+ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
|
|
+ { 0x00000001f6397b76, 0x00000000bdf081c4 },
|
|
+ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
|
|
+ { 0x0000000156335214, 0x000000016286d6b0 },
|
|
+ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
|
|
+ { 0x00000001d70e3986, 0x00000000c84f001c },
|
|
+ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
|
|
+ { 0x000000003701a774, 0x0000000064efe7c0 },
|
|
+ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
|
|
+ { 0x00000000ac81ef72, 0x000000000ac2d904 },
|
|
+ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
|
|
+ { 0x0000000133212464, 0x00000000fd226d14 },
|
|
+ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
|
|
+ { 0x00000000e4e45610, 0x000000011cfd42e0 },
|
|
+ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
|
|
+ { 0x000000000c1bd370, 0x000000016e5a5678 },
|
|
+ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
|
|
+ { 0x00000001a7b9e7a6, 0x00000001d888fe22 },
|
|
+ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
|
|
+ { 0x000000007d657a10, 0x00000001af77fcd4 }
|
|
+#else /* __LITTLE_ENDIAN__ */
|
|
+ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
|
|
+ { 0x00000001651797d2, 0x0000000099ea94a8 },
|
|
+ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
|
|
+ { 0x0000000021e0d56c, 0x00000000945a8420 },
|
|
+ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
|
|
+ { 0x000000000f95ecaa, 0x0000000030762706 },
|
|
+ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
|
|
+ { 0x00000001ebd224ac, 0x00000001a52fc582 },
|
|
+ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
|
|
+ { 0x000000000ccb97ca, 0x00000001a4a7167a },
|
|
+ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
|
|
+ { 0x00000001006ec8a8, 0x000000000c18249a },
|
|
+ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
|
|
+ { 0x000000014f58f196, 0x00000000a924ae7c },
|
|
+ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
|
|
+ { 0x00000001a7192ca6, 0x00000001e12ccc12 },
|
|
+ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
|
|
+ { 0x000000019a64bab2, 0x00000000a0b9d4ac },
|
|
+ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
|
|
+ { 0x0000000014f4ed2e, 0x0000000095e8ddfe },
|
|
+ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
|
|
+ { 0x000000011092b6a2, 0x00000000233fddc4 },
|
|
+ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
|
|
+ { 0x00000000c8a1629c, 0x00000001b4529b62 },
|
|
+ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
|
|
+ { 0x000000017bf32e8e, 0x00000001a7fa0e64 },
|
|
+ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
|
|
+ { 0x00000001f8cc6582, 0x00000001b5334592 },
|
|
+ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
|
|
+ { 0x000000008631ddf0, 0x000000011f8ee1b4 },
|
|
+ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
|
|
+ { 0x000000007e5a76d0, 0x000000006252e632 },
|
|
+ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
|
|
+ { 0x000000002b09b31c, 0x00000000ab973e84 },
|
|
+ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
|
|
+ { 0x00000001b2df1f84, 0x000000007734f5ec },
|
|
+ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
|
|
+ { 0x00000001d6f56afc, 0x000000007c547798 },
|
|
+ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
|
|
+ { 0x00000001b9b5e70c, 0x000000007ec40210 },
|
|
+ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
|
|
+ { 0x0000000034b626d2, 0x00000001ab1695a8 },
|
|
+ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
|
|
+ { 0x000000014c53479a, 0x0000000090494bba },
|
|
+ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
|
|
+ { 0x00000001a6d179a4, 0x00000001123fb816 },
|
|
+ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
|
|
+ { 0x000000015abd16b4, 0x00000001e188c74c },
|
|
+ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
|
|
+ { 0x00000000018f9852, 0x00000001c2d3451c },
|
|
+ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
|
|
+ { 0x000000001fb3084a, 0x00000000f55cf1ca },
|
|
+ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
|
|
+ { 0x00000000c53dfb04, 0x00000001a0531540 },
|
|
+ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
|
|
+ { 0x00000000e10c9ad6, 0x0000000132cd7ebc },
|
|
+ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
|
|
+ { 0x0000000025aa994a, 0x0000000073ab7f36 },
|
|
+ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
|
|
+ { 0x00000000fa3a74c4, 0x0000000041aed1c2 },
|
|
+ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
|
|
+ { 0x0000000033eb3f40, 0x0000000136c53800 },
|
|
+ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
|
|
+ { 0x000000017193f296, 0x0000000126835a30 },
|
|
+ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
|
|
+ { 0x0000000043f6c86a, 0x000000006241b502 },
|
|
+ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
|
|
+ { 0x000000016b513ec6, 0x00000000d5196ad4 },
|
|
+ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
|
|
+ { 0x00000000c8f25b4e, 0x000000009cfa769a },
|
|
+ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
|
|
+ { 0x00000001a45048ec, 0x00000000920e5df4 },
|
|
+ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
|
|
+ { 0x000000000c441004, 0x0000000169dc310e },
|
|
+ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
|
|
+ { 0x000000000e17cad6, 0x0000000009fc331c },
|
|
+ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
|
|
+ { 0x00000001253ae964, 0x000000010d94a81e },
|
|
+ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
|
|
+ { 0x00000001d7c88ebc, 0x0000000027a20ab2 },
|
|
+ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
|
|
+ { 0x00000001e7ca913a, 0x0000000114f87504 },
|
|
+ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
|
|
+ { 0x0000000033ed078a, 0x000000004b076d96 },
|
|
+ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
|
|
+ { 0x00000000e1839c78, 0x00000000da4d1e74 },
|
|
+ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
|
|
+ { 0x00000001322b267e, 0x000000001b81f672 },
|
|
+ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
|
|
+ { 0x00000000638231b6, 0x000000009367c988 },
|
|
+ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
|
|
+ { 0x00000001ee7f16f4, 0x00000001717214ca },
|
|
+ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
|
|
+ { 0x0000000117d9924a, 0x000000009f47d820 },
|
|
+ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
|
|
+ { 0x00000000e1a9e0c4, 0x000000010d9a47d2 },
|
|
+ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
|
|
+ { 0x00000001403731dc, 0x00000000a696c58c },
|
|
+ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
|
|
+ { 0x00000001a5ea9682, 0x000000002aa28ec6 },
|
|
+ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
|
|
+ { 0x0000000101c5c578, 0x00000001fe18fd9a },
|
|
+ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
|
|
+ { 0x00000000dddf6494, 0x000000019d4fc1ae },
|
|
+ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
|
|
+ { 0x00000000f1c3db28, 0x00000001ba0e3dea },
|
|
+ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
|
|
+ { 0x000000013112fb9c, 0x0000000074b59a5e },
|
|
+ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
|
|
+ { 0x00000000b680b906, 0x00000000f2b5ea98 },
|
|
+ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
|
|
+ { 0x000000001a282932, 0x0000000187132676 },
|
|
+ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
|
|
+ { 0x0000000089406e7e, 0x000000010a8c6ad4 },
|
|
+ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
|
|
+ { 0x00000001def6be8c, 0x00000001e21dfe70 },
|
|
+ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
|
|
+ { 0x0000000075258728, 0x00000001da0050e4 },
|
|
+ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
|
|
+ { 0x000000019536090a, 0x00000000772172ae },
|
|
+ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
|
|
+ { 0x00000000f2455bfc, 0x00000000e47724aa },
|
|
+ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
|
|
+ { 0x000000018c40baf4, 0x000000003cd63ac4 },
|
|
+ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
|
|
+ { 0x000000004cd390d4, 0x00000001bf47d352 },
|
|
+ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
|
|
+ { 0x00000001e4ece95a, 0x000000018dc1d708 },
|
|
+ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
|
|
+ { 0x000000001a3ee918, 0x000000002d4620a4 },
|
|
+ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
|
|
+ { 0x000000007c652fb8, 0x0000000058fd1740 },
|
|
+ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
|
|
+ { 0x000000011c67842c, 0x00000000dadd9bfc },
|
|
+ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
|
|
+ { 0x00000000254f759c, 0x00000001ea2140be },
|
|
+ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
|
|
+ { 0x000000007ece94ca, 0x000000009de128ba },
|
|
+ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
|
|
+ { 0x0000000038f258c2, 0x000000013ac3aa8e },
|
|
+ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
|
|
+ { 0x00000001cdf17b00, 0x0000000099980562 },
|
|
+ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
|
|
+ { 0x000000011f882c16, 0x00000001c1579c86 },
|
|
+ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
|
|
+ { 0x0000000100093fc8, 0x0000000068dbbf94 },
|
|
+ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
|
|
+ { 0x00000001cd684f16, 0x000000004509fb04 },
|
|
+ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
|
|
+ { 0x000000004bc6a70a, 0x00000001202f6398 },
|
|
+ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
|
|
+ { 0x000000004fc7e8e4, 0x000000013aea243e },
|
|
+ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
|
|
+ { 0x0000000130103f1c, 0x00000001b4052ae6 },
|
|
+ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
|
|
+ { 0x0000000111b0024c, 0x00000001cd2a0ae8 },
|
|
+ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
|
|
+ { 0x000000010b3079da, 0x00000001fe4aa8b4 },
|
|
+ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
|
|
+ { 0x000000010192bcc2, 0x00000001d1559a42 },
|
|
+ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
|
|
+ { 0x0000000074838d50, 0x00000001f3e05ecc },
|
|
+ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
|
|
+ { 0x000000001b20f520, 0x0000000104ddd2cc },
|
|
+ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
|
|
+ { 0x0000000050c3590a, 0x000000015393153c },
|
|
+ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
|
|
+ { 0x00000000b41cac8e, 0x0000000057e942c6 },
|
|
+ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
|
|
+ { 0x000000000c72cc78, 0x000000012c633850 },
|
|
+ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
|
|
+ { 0x0000000030cdb032, 0x00000000ebcaae4c },
|
|
+ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
|
|
+ { 0x000000013e09fc32, 0x000000013ee532a6 },
|
|
+ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
|
|
+ { 0x000000001ed624d2, 0x00000001bf0cbc7e },
|
|
+ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
|
|
+ { 0x00000000781aee1a, 0x00000000d50b7a5a },
|
|
+ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
|
|
+ { 0x00000001c4d8348c, 0x0000000002fca6e8 },
|
|
+ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
|
|
+ { 0x0000000057a40336, 0x000000007af40044 },
|
|
+ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
|
|
+ { 0x0000000085544940, 0x0000000016178744 },
|
|
+ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
|
|
+ { 0x000000019cd21e80, 0x000000014c177458 },
|
|
+ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
|
|
+ { 0x000000013eb95bc0, 0x000000011b6ddf04 },
|
|
+ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
|
|
+ { 0x00000001dfc9fdfc, 0x00000001f3e29ccc },
|
|
+ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
|
|
+ { 0x00000000cd028bc2, 0x0000000135ae7562 },
|
|
+ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
|
|
+ { 0x0000000090db8c44, 0x0000000190ef812c },
|
|
+ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
|
|
+ { 0x000000010010a4ce, 0x0000000067a2c786 },
|
|
+ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
|
|
+ { 0x00000001c8f4c72c, 0x0000000048b9496c },
|
|
+ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
|
|
+ { 0x000000001c26170c, 0x000000015a422de6 },
|
|
+ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
|
|
+ { 0x00000000e3fccf68, 0x00000001ef0e3640 },
|
|
+ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
|
|
+ { 0x00000000d513ed24, 0x00000001006d2d26 },
|
|
+ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
|
|
+ { 0x00000000141beada, 0x00000001170d56d6 },
|
|
+ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
|
|
+ { 0x000000011071aea0, 0x00000000a5fb613c },
|
|
+ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
|
|
+ { 0x000000012e19080a, 0x0000000040bbf7fc },
|
|
+ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
|
|
+ { 0x0000000100ecf826, 0x000000016ac3a5b2 },
|
|
+ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
|
|
+ { 0x0000000069b09412, 0x00000000abf16230 },
|
|
+ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
|
|
+ { 0x0000000122297bac, 0x00000001ebe23fac },
|
|
+ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
|
|
+ { 0x00000000e9e4b068, 0x000000008b6a0894 },
|
|
+ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
|
|
+ { 0x000000004b38651a, 0x00000001288ea478 },
|
|
+ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
|
|
+ { 0x00000001468360e2, 0x000000016619c442 },
|
|
+ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
|
|
+ { 0x00000000121c2408, 0x0000000086230038 },
|
|
+ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
|
|
+ { 0x00000000da7e7d08, 0x000000017746a756 },
|
|
+ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
|
|
+ { 0x00000001058d7652, 0x0000000191b8f8f8 },
|
|
+ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
|
|
+ { 0x000000014a098a90, 0x000000008e167708 },
|
|
+ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
|
|
+ { 0x0000000020dbe72e, 0x0000000148b22d54 },
|
|
+ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
|
|
+ { 0x000000011e7323e8, 0x0000000044ba2c3c },
|
|
+ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
|
|
+ { 0x00000000d5d4bf94, 0x00000000b54d2b52 },
|
|
+ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
|
|
+ { 0x0000000199d8746c, 0x0000000005a4fd8a },
|
|
+ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
|
|
+ { 0x00000000ce9ca8a0, 0x0000000139f9fc46 },
|
|
+ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
|
|
+ { 0x00000000136edece, 0x000000015a1fa824 },
|
|
+ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
|
|
+ { 0x000000019b92a068, 0x000000000a61ae4c },
|
|
+ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
|
|
+ { 0x0000000071d62206, 0x0000000145e9113e },
|
|
+ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
|
|
+ { 0x00000000dfc50158, 0x000000006a348448 },
|
|
+ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
|
|
+ { 0x00000001517626bc, 0x000000004d80a08c },
|
|
+ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
|
|
+ { 0x0000000148d1e4fa, 0x000000014b6837a0 },
|
|
+ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
|
|
+ { 0x0000000094d8266e, 0x000000016896a7fc },
|
|
+ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
|
|
+ { 0x00000000606c5e34, 0x000000014f187140 },
|
|
+ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
|
|
+ { 0x000000019766beaa, 0x000000019581b9da },
|
|
+ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
|
|
+ { 0x00000001d80c506c, 0x00000001091bc984 },
|
|
+ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
|
|
+ { 0x000000001e73837c, 0x000000001067223c },
|
|
+ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
|
|
+ { 0x0000000064d587de, 0x00000001ab16ea02 },
|
|
+ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
|
|
+ { 0x00000000f4a507b0, 0x000000013c4598a8 },
|
|
+ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
|
|
+ { 0x0000000040e342fc, 0x00000000b3735430 },
|
|
+ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
|
|
+ { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 },
|
|
+ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
|
|
+ { 0x0000000094a691a4, 0x00000001570ae19c },
|
|
+ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
|
|
+ { 0x00000001271ecdfa, 0x00000001ea910712 },
|
|
+ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
|
|
+ { 0x000000009e54475a, 0x0000000167127128 },
|
|
+ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
|
|
+ { 0x00000000c9c099ee, 0x0000000019e790a2 },
|
|
+ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
|
|
+ { 0x000000009a2f736c, 0x000000003788f710 },
|
|
+ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
|
|
+ { 0x00000000bb9f4996, 0x00000001682a160e },
|
|
+ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
|
|
+ { 0x00000001db688050, 0x000000007f0ebd2e },
|
|
+ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
|
|
+ { 0x00000000e9b10af4, 0x000000002b032080 },
|
|
+ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
|
|
+ { 0x000000012d4545e4, 0x00000000cfd1664a },
|
|
+ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
|
|
+ { 0x000000000361139c, 0x00000000aa1181c2 },
|
|
+ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
|
|
+ { 0x00000001a5a1a3a8, 0x00000000ddd08002 },
|
|
+ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
|
|
+ { 0x000000006844e0b0, 0x00000000e8dd0446 },
|
|
+ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
|
|
+ { 0x00000000c3762f28, 0x00000001bbd94a00 },
|
|
+ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
|
|
+ { 0x00000001d26287a2, 0x00000000ab6cd180 },
|
|
+ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
|
|
+ { 0x00000001f6f0bba8, 0x0000000031803ce2 },
|
|
+ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
|
|
+ { 0x000000002ffabd62, 0x0000000024f40b0c },
|
|
+ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
|
|
+ { 0x00000000fb4516b8, 0x00000001ba1d9834 },
|
|
+ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
|
|
+ { 0x000000018cfa961c, 0x0000000104de61aa },
|
|
+ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
|
|
+ { 0x000000019e588d52, 0x0000000113e40d46 },
|
|
+ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
|
|
+ { 0x00000001180f0bbc, 0x00000001415598a0 },
|
|
+ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
|
|
+ { 0x00000000e1d9177a, 0x00000000bf6c8c90 },
|
|
+ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
|
|
+ { 0x0000000105abc27c, 0x00000001788b0504 },
|
|
+ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
|
|
+ { 0x00000000972e4a58, 0x0000000038385d02 },
|
|
+ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
|
|
+ { 0x0000000183499a5e, 0x00000001b6c83844 },
|
|
+ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
|
|
+ { 0x00000001c96a8cca, 0x0000000051061a8a },
|
|
+ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
|
|
+ { 0x00000001a1a5b60c, 0x000000017351388a },
|
|
+ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
|
|
+ { 0x00000000e4b6ac9c, 0x0000000132928f92 },
|
|
+ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
|
|
+ { 0x00000001807e7f5a, 0x00000000e6b4f48a },
|
|
+ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
|
|
+ { 0x000000017a7e3bc8, 0x0000000039d15e90 },
|
|
+ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
|
|
+ { 0x00000000d73975da, 0x00000000312d6074 },
|
|
+ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
|
|
+ { 0x000000017375d038, 0x000000017bbb2cc4 },
|
|
+ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
|
|
+ { 0x00000000193680bc, 0x000000016ded3e18 },
|
|
+ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
|
|
+ { 0x00000000999b06f6, 0x00000000f1638b16 },
|
|
+ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
|
|
+ { 0x00000001f685d2b8, 0x00000001d38b9ecc },
|
|
+ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
|
|
+ { 0x00000001f4ecbed2, 0x000000018b8d09dc },
|
|
+ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
|
|
+ { 0x00000000ba16f1a0, 0x00000000e7bc27d2 },
|
|
+ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
|
|
+ { 0x0000000115aceac4, 0x00000000275e1e96 },
|
|
+ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
|
|
+ { 0x00000001aeff6292, 0x00000000e2e3031e },
|
|
+ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
|
|
+ { 0x000000009640124c, 0x00000001041c84d8 },
|
|
+ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
|
|
+ { 0x0000000114f41f02, 0x00000000706ce672 },
|
|
+ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
|
|
+ { 0x000000009c5f3586, 0x000000015d5070da },
|
|
+ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
|
|
+ { 0x00000001878275fa, 0x0000000038f9493a },
|
|
+ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
|
|
+ { 0x00000000ddc42ce8, 0x00000000a3348a76 },
|
|
+ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
|
|
+ { 0x0000000181d2c73a, 0x00000001ad0aab92 },
|
|
+ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
|
|
+ { 0x0000000141c9320a, 0x000000019e85f712 },
|
|
+ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
|
|
+ { 0x000000015235719a, 0x000000005a871e76 },
|
|
+ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
|
|
+ { 0x00000000be27d804, 0x000000017249c662 },
|
|
+ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
|
|
+ { 0x000000006242d45a, 0x000000003a084712 },
|
|
+ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
|
|
+ { 0x000000009a53638e, 0x00000000ed438478 },
|
|
+ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
|
|
+ { 0x00000001001ecfb6, 0x00000000abac34cc },
|
|
+ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
|
|
+ { 0x000000016d7c2d64, 0x000000005f35ef3e },
|
|
+ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
|
|
+ { 0x00000001d0ce46c0, 0x0000000047d6608c },
|
|
+ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
|
|
+ { 0x0000000124c907b4, 0x000000002d01470e },
|
|
+ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
|
|
+ { 0x0000000018a555ca, 0x0000000158bbc7b0 },
|
|
+ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
|
|
+ { 0x000000006b0980bc, 0x00000000c0a23e8e },
|
|
+ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
|
|
+ { 0x000000008bbba964, 0x00000001ebd85c88 },
|
|
+ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
|
|
+ { 0x00000001070a5a1e, 0x000000019ee20bb2 },
|
|
+ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
|
|
+ { 0x000000002204322a, 0x00000001acabf2d6 },
|
|
+ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
|
|
+ { 0x00000000a27524d0, 0x00000001b7963d56 },
|
|
+ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
|
|
+ { 0x0000000020b1e4ba, 0x000000017bffa1fe },
|
|
+ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
|
|
+ { 0x0000000032cc27fc, 0x000000001f15333e },
|
|
+ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
|
|
+ { 0x0000000044dd22b8, 0x000000018593129e },
|
|
+ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
|
|
+ { 0x00000000dffc9e0a, 0x000000019cb32602 },
|
|
+ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
|
|
+ { 0x00000001b7a0ed14, 0x0000000142b05cc8 },
|
|
+ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
|
|
+ { 0x00000000c7842488, 0x00000001be49e7a4 },
|
|
+ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
|
|
+ { 0x00000001c02a4fee, 0x0000000108f69d6c },
|
|
+ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
|
|
+ { 0x000000003c273778, 0x000000006c0971f0 },
|
|
+ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
|
|
+ { 0x00000001d63f8894, 0x000000005b16467a },
|
|
+ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
|
|
+ { 0x000000006be557d6, 0x00000001551a628e },
|
|
+ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
|
|
+ { 0x000000006a7806ea, 0x000000019e42ea92 },
|
|
+ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
|
|
+ { 0x000000016155aa0c, 0x000000012fa83ff2 },
|
|
+ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
|
|
+ { 0x00000000908650ac, 0x000000011ca9cde0 },
|
|
+ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
|
|
+ { 0x00000000aa5a8084, 0x00000000c8e5cd74 },
|
|
+ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
|
|
+ { 0x0000000191bb500a, 0x0000000096c27f0c },
|
|
+ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
|
|
+ { 0x0000000064e9bed0, 0x000000002baed926 },
|
|
+ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
|
|
+ { 0x000000009444f302, 0x000000017c8de8d2 },
|
|
+ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
|
|
+ { 0x000000019db07d3c, 0x00000000d43d6068 },
|
|
+ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
|
|
+ { 0x00000001359e3e6e, 0x00000000cb2c4b26 },
|
|
+ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
|
|
+ { 0x00000001e4f10dd2, 0x0000000145b8da26 },
|
|
+ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
|
|
+ { 0x0000000124f5735e, 0x000000018fff4b08 },
|
|
+ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
|
|
+ { 0x0000000124760a4c, 0x0000000150b58ed0 },
|
|
+ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
|
|
+ { 0x000000000f1fc186, 0x00000001549f39bc },
|
|
+ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
|
|
+ { 0x00000000150e4cc4, 0x00000000ef4d2f42 },
|
|
+ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
|
|
+ { 0x000000002a6204e8, 0x00000001b1468572 },
|
|
+ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
|
|
+ { 0x00000000beb1d432, 0x000000013d7403b2 },
|
|
+ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
|
|
+ { 0x0000000135f3f1f0, 0x00000001a4681842 },
|
|
+ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
|
|
+ { 0x0000000074fe2232, 0x0000000167714492 },
|
|
+ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
|
|
+ { 0x000000001ac6e2ba, 0x00000001e599099a },
|
|
+ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
|
|
+ { 0x0000000013fca91e, 0x00000000fe128194 },
|
|
+ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
|
|
+ { 0x0000000183f4931e, 0x0000000077e8b990 },
|
|
+ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
|
|
+ { 0x00000000b6d9b4e4, 0x00000001a267f63a },
|
|
+ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
|
|
+ { 0x00000000b5188656, 0x00000001945c245a },
|
|
+ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
|
|
+ { 0x0000000027a81a84, 0x0000000149002e76 },
|
|
+ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
|
|
+ { 0x0000000125699258, 0x00000001bb8310a4 },
|
|
+ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
|
|
+ { 0x00000001b23de796, 0x000000019ec60bcc },
|
|
+ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
|
|
+ { 0x00000000fe4365dc, 0x000000012d8590ae },
|
|
+ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
|
|
+ { 0x00000000c68f497a, 0x0000000065b00684 },
|
|
+ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
|
|
+ { 0x00000000fbf521ee, 0x000000015e5aeadc },
|
|
+ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
|
|
+ { 0x000000015eac3378, 0x00000000b77ff2b0 },
|
|
+ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
|
|
+ { 0x0000000134914b90, 0x0000000188da2ff6 },
|
|
+ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
|
|
+ { 0x0000000016335cfe, 0x0000000063da929a },
|
|
+ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
|
|
+ { 0x000000010372d10c, 0x00000001389caa80 },
|
|
+ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
|
|
+ { 0x000000015097b908, 0x000000013db599d2 },
|
|
+ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
|
|
+ { 0x00000001227a7572, 0x0000000122505a86 },
|
|
+ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
|
|
+ { 0x000000009a8f75c0, 0x000000016bd72746 },
|
|
+ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
|
|
+ { 0x00000000682c77a2, 0x00000001c3faf1d4 },
|
|
+ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
|
|
+ { 0x00000000231f091c, 0x00000001111c826c },
|
|
+ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
|
|
+ { 0x000000007d4439f2, 0x00000000153e9fb2 },
|
|
+ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
|
|
+ { 0x000000017e221efc, 0x000000002b1f7b60 },
|
|
+ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
|
|
+ { 0x0000000167457c38, 0x00000000b1dba570 },
|
|
+ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
|
|
+ { 0x00000000bdf081c4, 0x00000001f6397b76 },
|
|
+ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
|
|
+ { 0x000000016286d6b0, 0x0000000156335214 },
|
|
+ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
|
|
+ { 0x00000000c84f001c, 0x00000001d70e3986 },
|
|
+ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
|
|
+ { 0x0000000064efe7c0, 0x000000003701a774 },
|
|
+ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
|
|
+ { 0x000000000ac2d904, 0x00000000ac81ef72 },
|
|
+ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
|
|
+ { 0x00000000fd226d14, 0x0000000133212464 },
|
|
+ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
|
|
+ { 0x000000011cfd42e0, 0x00000000e4e45610 },
|
|
+ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
|
|
+ { 0x000000016e5a5678, 0x000000000c1bd370 },
|
|
+ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
|
|
+ { 0x00000001d888fe22, 0x00000001a7b9e7a6 },
|
|
+ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
|
|
+ { 0x00000001af77fcd4, 0x000000007d657a10 }
|
|
+#endif /* __LITTLE_ENDIAN__ */
|
|
+ };
|
|
+
|
|
+/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
|
|
+
|
|
+static const __vector unsigned long long vcrc_short_const[16]
|
|
+ __attribute__((aligned (16))) = {
|
|
+#ifdef __LITTLE_ENDIAN__
|
|
+ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */
|
|
+ { 0x99168a18ec447f11, 0xed837b2613e8221e },
|
|
+ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */
|
|
+ { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a },
|
|
+ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */
|
|
+ { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 },
|
|
+ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */
|
|
+ { 0xf38a3556291ea462, 0xc10ec5e033fbca3b },
|
|
+ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */
|
|
+ { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f },
|
|
+ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */
|
|
+ { 0x855712b3784d2a56, 0x71aa1df0e172334d },
|
|
+ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */
|
|
+ { 0xa5abe9f80eaee722, 0xfee3053e3969324d },
|
|
+ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */
|
|
+ { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 },
|
|
+ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */
|
|
+ { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 },
|
|
+ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */
|
|
+ { 0xebe7e3566325605c, 0x6f8346e1d777606e },
|
|
+ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */
|
|
+ { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 },
|
|
+ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */
|
|
+ { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 },
|
|
+ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */
|
|
+ { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 },
|
|
+ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */
|
|
+ { 0xba1aca0315141c31, 0x78ed02d5a700e96a },
|
|
+ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */
|
|
+ { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 },
|
|
+ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */
|
|
+ { 0x6655004fa06a2517, 0xedb88320b1e6b092 }
|
|
+#else /* __LITTLE_ENDIAN__ */
|
|
+ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */
|
|
+ { 0xed837b2613e8221e, 0x99168a18ec447f11 },
|
|
+ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */
|
|
+ { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c },
|
|
+ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */
|
|
+ { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 },
|
|
+ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */
|
|
+ { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 },
|
|
+ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */
|
|
+ { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b },
|
|
+ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */
|
|
+ { 0x71aa1df0e172334d, 0x855712b3784d2a56 },
|
|
+ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */
|
|
+ { 0xfee3053e3969324d, 0xa5abe9f80eaee722 },
|
|
+ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */
|
|
+ { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c },
|
|
+ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */
|
|
+ { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a },
|
|
+ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */
|
|
+ { 0x6f8346e1d777606e, 0xebe7e3566325605c },
|
|
+ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */
|
|
+ { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 },
|
|
+ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */
|
|
+ { 0xaa2215ea329ecc11, 0x5705a9ca4721589f },
|
|
+ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */
|
|
+ { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 },
|
|
+ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */
|
|
+ { 0x78ed02d5a700e96a, 0xba1aca0315141c31 },
|
|
+ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */
|
|
+ { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae },
|
|
+ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */
|
|
+ { 0xedb88320b1e6b092, 0x6655004fa06a2517 }
|
|
+#endif /* __LITTLE_ENDIAN__ */
|
|
+ };
|
|
+
|
|
+/* Barrett constants */
|
|
+/* 33 bit reflected Barrett constant m - (4^32)/n */
|
|
+
|
|
+static const __vector unsigned long long v_Barrett_const[2]
|
|
+ __attribute__((aligned (16))) = {
|
|
+ /* x^64 div p(x) */
|
|
+#ifdef __LITTLE_ENDIAN__
|
|
+ { 0x00000001f7011641, 0x0000000000000000 },
|
|
+ { 0x00000001db710641, 0x0000000000000000 }
|
|
+#else /* __LITTLE_ENDIAN__ */
|
|
+ { 0x0000000000000000, 0x00000001f7011641 },
|
|
+ { 0x0000000000000000, 0x00000001db710641 }
|
|
+#endif /* __LITTLE_ENDIAN__ */
|
|
+ };
|
|
+#endif /* POWER8_INTRINSICS */
|
|
+
|
|
+#endif /* __ASSEMBLER__ */
|
|
diff --git a/contrib/power/crc32_z_power8.c b/contrib/power/crc32_z_power8.c
|
|
new file mode 100644
|
|
index 000000000..7858cfe0e
|
|
--- /dev/null
|
|
+++ b/contrib/power/crc32_z_power8.c
|
|
@@ -0,0 +1,679 @@
|
|
+/*
|
|
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
|
|
+ * 16 bytes.
|
|
+ *
|
|
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
|
|
+ * chunks in order to mask the latency of the vpmsum instructions. If we
|
|
+ * have more than 32 kB of data to checksum we repeat this step multiple
|
|
+ * times, passing in the previous 1024 bits.
|
|
+ *
|
|
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
|
|
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
|
|
+ * calculate constants that land the data in this 32 bits.
|
|
+ *
|
|
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
|
|
+ * for n = CRC using POWER8 instructions. We use x = 32.
|
|
+ *
|
|
+ * http://en.wikipedia.org/wiki/Barrett_reduction
|
|
+ *
|
|
+ * This code uses gcc vector builtins instead using assembly directly.
|
|
+ *
|
|
+ * Copyright (C) 2017 Rogerio Alves <rogealve@br.ibm.com>, IBM
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of either:
|
|
+ *
|
|
+ * a) the GNU General Public License as published by the Free Software
|
|
+ * Foundation; either version 2 of the License, or (at your option)
|
|
+ * any later version, or
|
|
+ * b) the Apache License, Version 2.0
|
|
+ */
|
|
+
|
|
+#include <altivec.h>
|
|
+#include "../../zutil.h"
|
|
+#include "power.h"
|
|
+
|
|
+#define POWER8_INTRINSICS
|
|
+#define CRC_TABLE
|
|
+
|
|
+#ifdef CRC32_CONSTANTS_HEADER
|
|
+#include CRC32_CONSTANTS_HEADER
|
|
+#else
|
|
+#include "crc32_constants.h"
|
|
+#endif
|
|
+
|
|
+#define VMX_ALIGN 16
|
|
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
|
|
+
|
|
+#ifdef REFLECT
|
|
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
|
+ unsigned long len)
|
|
+{
|
|
+ while (len--)
|
|
+ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
|
|
+ return crc;
|
|
+}
|
|
+#else
|
|
+static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
|
|
+ unsigned long len)
|
|
+{
|
|
+ while (len--)
|
|
+ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
|
|
+ return crc;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static unsigned int __attribute__ ((aligned (32)))
|
|
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
|
|
+
|
|
+unsigned long ZLIB_INTERNAL _crc32_z_power8(uLong _crc, const Bytef *_p,
|
|
+ z_size_t _len)
|
|
+{
|
|
+ unsigned int prealign;
|
|
+ unsigned int tail;
|
|
+
|
|
+ /* Map zlib API to crc32_vpmsum API */
|
|
+ unsigned int crc = (unsigned int) (0xffffffff & _crc);
|
|
+ const unsigned char *p = _p;
|
|
+ unsigned long len = (unsigned long) _len;
|
|
+
|
|
+ if (p == (const unsigned char *) 0x0) return 0;
|
|
+#ifdef CRC_XOR
|
|
+ crc ^= 0xffffffff;
|
|
+#endif
|
|
+
|
|
+ if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
|
|
+ crc = crc32_align(crc, p, len);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if ((unsigned long)p & VMX_ALIGN_MASK) {
|
|
+ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
|
|
+ crc = crc32_align(crc, p, prealign);
|
|
+ len -= prealign;
|
|
+ p += prealign;
|
|
+ }
|
|
+
|
|
+ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
|
|
+
|
|
+ tail = len & VMX_ALIGN_MASK;
|
|
+ if (tail) {
|
|
+ p += len & ~VMX_ALIGN_MASK;
|
|
+ crc = crc32_align(crc, p, tail);
|
|
+ }
|
|
+
|
|
+out:
|
|
+#ifdef CRC_XOR
|
|
+ crc ^= 0xffffffff;
|
|
+#endif
|
|
+
|
|
+ /* Convert to zlib API */
|
|
+ return (unsigned long) crc;
|
|
+}
|
|
+
|
|
+#if defined (__clang__)
|
|
+#include "clang_workaround.h"
|
|
+#else
|
|
+#define __builtin_pack_vector(a, b) __builtin_pack_vector_int128 ((a), (b))
|
|
+#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0)
|
|
+#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1)
|
|
+#endif
|
|
+
|
|
+/* When we have a load-store in a single-dispatch group and address overlap
|
|
+ * such that foward is not allowed (load-hit-store) the group must be flushed.
|
|
+ * A group ending NOP prevents the flush.
|
|
+ */
|
|
+#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory")
|
|
+
|
|
+#if defined(__BIG_ENDIAN__) && defined (REFLECT)
|
|
+#define BYTESWAP_DATA
|
|
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
|
|
+#define BYTESWAP_DATA
|
|
+#endif
|
|
+
|
|
+#ifdef BYTESWAP_DATA
|
|
+#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\
|
|
+ (__vector unsigned char) vc)
|
|
+#if defined(__LITTLE_ENDIAN__)
|
|
+/* Byte reverse permute constant LE. */
|
|
+static const __vector unsigned long long vperm_const
|
|
+ __attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL,
|
|
+ 0x0001020304050607UL };
|
|
+#else
|
|
+static const __vector unsigned long long vperm_const
|
|
+ __attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL,
|
|
+ 0X0706050403020100UL };
|
|
+#endif
|
|
+#else
|
|
+#define VEC_PERM(vr, va, vb, vc)
|
|
+#endif
|
|
+
|
|
+static unsigned int __attribute__ ((aligned (32)))
|
|
+__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
|
|
+
|
|
+ const __vector unsigned long long vzero = {0,0};
|
|
+ const __vector unsigned long long vones = {0xffffffffffffffffUL,
|
|
+ 0xffffffffffffffffUL};
|
|
+
|
|
+#ifdef REFLECT
|
|
+ const __vector unsigned long long vmask_32bit =
|
|
+ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero,
|
|
+ (__vector unsigned char)vones, 4);
|
|
+#endif
|
|
+
|
|
+ const __vector unsigned long long vmask_64bit =
|
|
+ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero,
|
|
+ (__vector unsigned char)vones, 8);
|
|
+
|
|
+ __vector unsigned long long vcrc;
|
|
+
|
|
+ __vector unsigned long long vconst1, vconst2;
|
|
+
|
|
+ /* vdata0-vdata7 will contain our data (p). */
|
|
+ __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4,
|
|
+ vdata5, vdata6, vdata7;
|
|
+
|
|
+ /* v0-v7 will contain our checksums */
|
|
+ __vector unsigned long long v0 = {0,0};
|
|
+ __vector unsigned long long v1 = {0,0};
|
|
+ __vector unsigned long long v2 = {0,0};
|
|
+ __vector unsigned long long v3 = {0,0};
|
|
+ __vector unsigned long long v4 = {0,0};
|
|
+ __vector unsigned long long v5 = {0,0};
|
|
+ __vector unsigned long long v6 = {0,0};
|
|
+ __vector unsigned long long v7 = {0,0};
|
|
+
|
|
+
|
|
+ /* Vector auxiliary variables. */
|
|
+ __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7;
|
|
+
|
|
+ unsigned int result = 0;
|
|
+ unsigned int offset; /* Constant table offset. */
|
|
+
|
|
+ unsigned long i; /* Counter. */
|
|
+ unsigned long chunks;
|
|
+
|
|
+ unsigned long block_size;
|
|
+ int next_block = 0;
|
|
+
|
|
+ /* Align by 128 bits. The last 128 bit block will be processed at end. */
|
|
+ unsigned long length = len & 0xFFFFFFFFFFFFFF80UL;
|
|
+
|
|
+#ifdef REFLECT
|
|
+ vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc);
|
|
+#else
|
|
+ vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL);
|
|
+
|
|
+ /* Shift into top 32 bits */
|
|
+ vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+#endif
|
|
+
|
|
+ /* Short version. */
|
|
+ if (len < 256) {
|
|
+ /* Calculate where in the constant table we need to start. */
|
|
+ offset = 256 - len;
|
|
+
|
|
+ vconst1 = vec_ld(offset, vcrc_short_const);
|
|
+ vdata0 = vec_ld(0, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata0, vdata0, vconst1, vperm_const);
|
|
+
|
|
+ /* xor initial value*/
|
|
+ vdata0 = vec_xor(vdata0, vcrc);
|
|
+
|
|
+ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw
|
|
+ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1);
|
|
+ v0 = vec_xor(v0, vdata0);
|
|
+
|
|
+ for (i = 16; i < len; i += 16) {
|
|
+ vconst1 = vec_ld(offset + i, vcrc_short_const);
|
|
+ vdata0 = vec_ld(i, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata0, vdata0, vconst1, vperm_const);
|
|
+ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw
|
|
+ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1);
|
|
+ v0 = vec_xor(v0, vdata0);
|
|
+ }
|
|
+ } else {
|
|
+
|
|
+ /* Load initial values. */
|
|
+ vdata0 = vec_ld(0, (__vector unsigned long long*) p);
|
|
+ vdata1 = vec_ld(16, (__vector unsigned long long*) p);
|
|
+
|
|
+ VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
|
|
+ VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
|
|
+
|
|
+ vdata2 = vec_ld(32, (__vector unsigned long long*) p);
|
|
+ vdata3 = vec_ld(48, (__vector unsigned long long*) p);
|
|
+
|
|
+ VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
|
|
+ VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
|
|
+
|
|
+ vdata4 = vec_ld(64, (__vector unsigned long long*) p);
|
|
+ vdata5 = vec_ld(80, (__vector unsigned long long*) p);
|
|
+
|
|
+ VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
|
|
+ VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
|
|
+
|
|
+ vdata6 = vec_ld(96, (__vector unsigned long long*) p);
|
|
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
+
|
|
+ VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
|
|
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
|
+
|
|
+ /* xor in initial value */
|
|
+ vdata0 = vec_xor(vdata0, vcrc);
|
|
+
|
|
+ p = (char *)p + 128;
|
|
+
|
|
+ do {
|
|
+ /* Checksum in blocks of MAX_SIZE. */
|
|
+ block_size = length;
|
|
+ if (block_size > MAX_SIZE) {
|
|
+ block_size = MAX_SIZE;
|
|
+ }
|
|
+
|
|
+ length = length - block_size;
|
|
+
|
|
+ /*
|
|
+ * Work out the offset into the constants table to start at. Each
|
|
+ * constant is 16 bytes, and it is used against 128 bytes of input
|
|
+ * data - 128 / 16 = 8
|
|
+ */
|
|
+ offset = (MAX_SIZE/8) - (block_size/8);
|
|
+ /* We reduce our final 128 bytes in a separate step */
|
|
+ chunks = (block_size/128)-1;
|
|
+
|
|
+ vconst1 = vec_ld(offset, vcrc_const);
|
|
+
|
|
+ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+
|
|
+ if (chunks > 1) {
|
|
+ offset += 16;
|
|
+ vconst2 = vec_ld(offset, vcrc_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ vdata0 = vec_ld(0, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
|
|
+
|
|
+ vdata1 = vec_ld(16, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
|
|
+
|
|
+ vdata2 = vec_ld(32, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
|
|
+
|
|
+ vdata3 = vec_ld(48, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
|
|
+
|
|
+ vdata4 = vec_ld(64, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
|
|
+
|
|
+ vdata5 = vec_ld(80, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
|
|
+
|
|
+ vdata6 = vec_ld(96, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
|
|
+
|
|
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
|
+
|
|
+ p = (char *)p + 128;
|
|
+
|
|
+ /*
|
|
+ * main loop. We modulo schedule it such that it takes three
|
|
+ * iterations to complete - first iteration load, second
|
|
+ * iteration vpmsum, third iteration xor.
|
|
+ */
|
|
+ for (i = 0; i < chunks-2; i++) {
|
|
+ vconst1 = vec_ld(offset, vcrc_const);
|
|
+ offset += 16;
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v0 = vec_xor(v0, va0);
|
|
+ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata0, (__vector unsigned long long)vconst2);
|
|
+ vdata0 = vec_ld(0, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v1 = vec_xor(v1, va1);
|
|
+ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata1, (__vector unsigned long long)vconst2);
|
|
+ vdata1 = vec_ld(16, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata1, vdata1, vdata1, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v2 = vec_xor(v2, va2);
|
|
+ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata2, (__vector unsigned long long)vconst2);
|
|
+ vdata2 = vec_ld(32, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata2, vdata2, vdata2, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v3 = vec_xor(v3, va3);
|
|
+ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata3, (__vector unsigned long long)vconst2);
|
|
+ vdata3 = vec_ld(48, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata3, vdata3, vdata3, vperm_const);
|
|
+
|
|
+ vconst2 = vec_ld(offset, vcrc_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v4 = vec_xor(v4, va4);
|
|
+ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata4, (__vector unsigned long long)vconst1);
|
|
+ vdata4 = vec_ld(64, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata4, vdata4, vdata4, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v5 = vec_xor(v5, va5);
|
|
+ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata5, (__vector unsigned long long)vconst1);
|
|
+ vdata5 = vec_ld(80, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata5, vdata5, vdata5, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v6 = vec_xor(v6, va6);
|
|
+ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata6, (__vector unsigned long long)vconst1);
|
|
+ vdata6 = vec_ld(96, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata6, vdata6, vdata6, vperm_const);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v7 = vec_xor(v7, va7);
|
|
+ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata7, (__vector unsigned long long)vconst1);
|
|
+ vdata7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(vdata7, vdata7, vdata7, vperm_const);
|
|
+
|
|
+ p = (char *)p + 128;
|
|
+ }
|
|
+
|
|
+ /* First cool down*/
|
|
+ vconst1 = vec_ld(offset, vcrc_const);
|
|
+ offset += 16;
|
|
+
|
|
+ v0 = vec_xor(v0, va0);
|
|
+ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata0, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v1 = vec_xor(v1, va1);
|
|
+ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata1, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v2 = vec_xor(v2, va2);
|
|
+ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata2, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v3 = vec_xor(v3, va3);
|
|
+ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata3, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v4 = vec_xor(v4, va4);
|
|
+ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata4, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v5 = vec_xor(v5, va5);
|
|
+ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata5, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v6 = vec_xor(v6, va6);
|
|
+ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata6, (__vector unsigned long long)vconst1);
|
|
+ GROUP_ENDING_NOP;
|
|
+
|
|
+ v7 = vec_xor(v7, va7);
|
|
+ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long
|
|
+ long)vdata7, (__vector unsigned long long)vconst1);
|
|
+ }/* else */
|
|
+
|
|
+ /* Second cool down. */
|
|
+ v0 = vec_xor(v0, va0);
|
|
+ v1 = vec_xor(v1, va1);
|
|
+ v2 = vec_xor(v2, va2);
|
|
+ v3 = vec_xor(v3, va3);
|
|
+ v4 = vec_xor(v4, va4);
|
|
+ v5 = vec_xor(v5, va5);
|
|
+ v6 = vec_xor(v6, va6);
|
|
+ v7 = vec_xor(v7, va7);
|
|
+
|
|
+#ifdef REFLECT
|
|
+ /*
|
|
+ * vpmsumd produces a 96 bit result in the least significant bits
|
|
+ * of the register. Since we are bit reflected we have to shift it
|
|
+ * left 32 bits so it occupies the least significant bits in the
|
|
+ * bit reflected domain.
|
|
+ */
|
|
+ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+ v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+#endif
|
|
+
|
|
+ /* xor with the last 1024 bits. */
|
|
+ va0 = vec_ld(0, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va0, va0, va0, vperm_const);
|
|
+
|
|
+ va1 = vec_ld(16, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va1, va1, va1, vperm_const);
|
|
+
|
|
+ va2 = vec_ld(32, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va2, va2, va2, vperm_const);
|
|
+
|
|
+ va3 = vec_ld(48, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va3, va3, va3, vperm_const);
|
|
+
|
|
+ va4 = vec_ld(64, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va4, va4, va4, vperm_const);
|
|
+
|
|
+ va5 = vec_ld(80, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va5, va5, va5, vperm_const);
|
|
+
|
|
+ va6 = vec_ld(96, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va6, va6, va6, vperm_const);
|
|
+
|
|
+ va7 = vec_ld(112, (__vector unsigned long long*) p);
|
|
+ VEC_PERM(va7, va7, va7, vperm_const);
|
|
+
|
|
+ p = (char *)p + 128;
|
|
+
|
|
+ vdata0 = vec_xor(v0, va0);
|
|
+ vdata1 = vec_xor(v1, va1);
|
|
+ vdata2 = vec_xor(v2, va2);
|
|
+ vdata3 = vec_xor(v3, va3);
|
|
+ vdata4 = vec_xor(v4, va4);
|
|
+ vdata5 = vec_xor(v5, va5);
|
|
+ vdata6 = vec_xor(v6, va6);
|
|
+ vdata7 = vec_xor(v7, va7);
|
|
+
|
|
+ /* Check if we have more blocks to process */
|
|
+ next_block = 0;
|
|
+ if (length != 0) {
|
|
+ next_block = 1;
|
|
+
|
|
+ /* zero v0-v7 */
|
|
+ v0 = vec_xor(v0, v0);
|
|
+ v1 = vec_xor(v1, v1);
|
|
+ v2 = vec_xor(v2, v2);
|
|
+ v3 = vec_xor(v3, v3);
|
|
+ v4 = vec_xor(v4, v4);
|
|
+ v5 = vec_xor(v5, v5);
|
|
+ v6 = vec_xor(v6, v6);
|
|
+ v7 = vec_xor(v7, v7);
|
|
+ }
|
|
+ length = length + 128;
|
|
+
|
|
+ } while (next_block);
|
|
+
|
|
+ /* Calculate how many bytes we have left. */
|
|
+ length = (len & 127);
|
|
+
|
|
+ /* Calculate where in (short) constant table we need to start. */
|
|
+ offset = 128 - length;
|
|
+
|
|
+ v0 = vec_ld(offset, vcrc_short_const);
|
|
+ v1 = vec_ld(offset + 16, vcrc_short_const);
|
|
+ v2 = vec_ld(offset + 32, vcrc_short_const);
|
|
+ v3 = vec_ld(offset + 48, vcrc_short_const);
|
|
+ v4 = vec_ld(offset + 64, vcrc_short_const);
|
|
+ v5 = vec_ld(offset + 80, vcrc_short_const);
|
|
+ v6 = vec_ld(offset + 96, vcrc_short_const);
|
|
+ v7 = vec_ld(offset + 112, vcrc_short_const);
|
|
+
|
|
+ offset += 128;
|
|
+
|
|
+ v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata0,(__vector unsigned int)v0);
|
|
+ v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata1,(__vector unsigned int)v1);
|
|
+ v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata2,(__vector unsigned int)v2);
|
|
+ v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata3,(__vector unsigned int)v3);
|
|
+ v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata4,(__vector unsigned int)v4);
|
|
+ v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata5,(__vector unsigned int)v5);
|
|
+ v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata6,(__vector unsigned int)v6);
|
|
+ v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata7,(__vector unsigned int)v7);
|
|
+
|
|
+ /* Now reduce the tail (0-112 bytes). */
|
|
+ for (i = 0; i < length; i+=16) {
|
|
+ vdata0 = vec_ld(i,(__vector unsigned long long*)p);
|
|
+ VEC_PERM(vdata0, vdata0, vdata0, vperm_const);
|
|
+ va0 = vec_ld(offset + i,vcrc_short_const);
|
|
+ va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw (
|
|
+ (__vector unsigned int)vdata0,(__vector unsigned int)va0);
|
|
+ v0 = vec_xor(v0, va0);
|
|
+ }
|
|
+
|
|
+ /* xor all parallel chunks together. */
|
|
+ v0 = vec_xor(v0, v1);
|
|
+ v2 = vec_xor(v2, v3);
|
|
+ v4 = vec_xor(v4, v5);
|
|
+ v6 = vec_xor(v6, v7);
|
|
+
|
|
+ v0 = vec_xor(v0, v2);
|
|
+ v4 = vec_xor(v4, v6);
|
|
+
|
|
+ v0 = vec_xor(v0, v4);
|
|
+ }
|
|
+
|
|
+ /* Barrett Reduction */
|
|
+ vconst1 = vec_ld(0, v_Barrett_const);
|
|
+ vconst2 = vec_ld(16, v_Barrett_const);
|
|
+
|
|
+ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
|
|
+ (__vector unsigned char)v0, 8);
|
|
+ v0 = vec_xor(v1,v0);
|
|
+
|
|
+#ifdef REFLECT
|
|
+ /* shift left one bit */
|
|
+ __vector unsigned char vsht_splat = vec_splat_u8 (1);
|
|
+ v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0,
|
|
+ vsht_splat);
|
|
+#endif
|
|
+
|
|
+ v0 = vec_and(v0, vmask_64bit);
|
|
+
|
|
+#ifndef REFLECT
|
|
+
|
|
+ /*
|
|
+ * Now for the actual algorithm. The idea is to calculate q,
|
|
+ * the multiple of our polynomial that we need to subtract. By
|
|
+ * doing the computation 2x bits higher (ie 64 bits) and shifting the
|
|
+ * result back down 2x bits, we round down to the nearest multiple.
|
|
+ */
|
|
+
|
|
+ /* ma */
|
|
+ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+ /* q = floor(ma/(2^64)) */
|
|
+ v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero,
|
|
+ (__vector unsigned char)v1, 8);
|
|
+ /* qn */
|
|
+ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
|
|
+ (__vector unsigned long long)vconst2);
|
|
+ /* a - qn, subtraction is xor in GF(2) */
|
|
+ v0 = vec_xor (v0, v1);
|
|
+ /*
|
|
+ * Get the result into r3. We need to shift it left 8 bytes:
|
|
+ * V0 [ 0 1 2 X ]
|
|
+ * V0 [ 0 X 2 3 ]
|
|
+ */
|
|
+ result = __builtin_unpack_vector_1 (v0);
|
|
+#else
|
|
+
|
|
+ /*
|
|
+ * The reflected version of Barrett reduction. Instead of bit
|
|
+ * reflecting our data (which is expensive to do), we bit reflect our
|
|
+ * constants and our algorithm, which means the intermediate data in
|
|
+ * our vector registers goes from 0-63 instead of 63-0. We can reflect
|
|
+ * the algorithm because we don't carry in mod 2 arithmetic.
|
|
+ */
|
|
+
|
|
+ /* bottom 32 bits of a */
|
|
+ v1 = vec_and(v0, vmask_32bit);
|
|
+
|
|
+ /* ma */
|
|
+ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
|
|
+ (__vector unsigned long long)vconst1);
|
|
+
|
|
+ /* bottom 32bits of ma */
|
|
+ v1 = vec_and(v1, vmask_32bit);
|
|
+ /* qn */
|
|
+ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1,
|
|
+ (__vector unsigned long long)vconst2);
|
|
+ /* a - qn, subtraction is xor in GF(2) */
|
|
+ v0 = vec_xor (v0, v1);
|
|
+
|
|
+ /*
|
|
+ * Since we are bit reflected, the result (ie the low 32 bits) is in
|
|
+ * the high 32 bits. We just need to shift it left 4 bytes
|
|
+ * V0 [ 0 1 X 3 ]
|
|
+ * V0 [ 0 X 2 3 ]
|
|
+ */
|
|
+
|
|
+ /* shift result into top 64 bits of */
|
|
+ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0,
|
|
+ (__vector unsigned char)vzero, 4);
|
|
+
|
|
+ result = __builtin_unpack_vector_0 (v0);
|
|
+#endif
|
|
+
|
|
+ return result;
|
|
+}
|
|
diff --git a/contrib/power/crc32_z_resolver.c b/contrib/power/crc32_z_resolver.c
|
|
new file mode 100644
|
|
index 000000000..f4e9aa491
|
|
--- /dev/null
|
|
+++ b/contrib/power/crc32_z_resolver.c
|
|
@@ -0,0 +1,15 @@
|
|
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+#include "../gcc/zifunc.h"
|
|
+#include "power.h"
|
|
+
|
|
+Z_IFUNC(crc32_z) {
|
|
+#ifdef Z_POWER8
|
|
+ if (__builtin_cpu_supports("arch_2_07"))
|
|
+ return _crc32_z_power8;
|
|
+#endif
|
|
+
|
|
+ return crc32_z_default;
|
|
+}
|
|
diff --git a/contrib/power/power.h b/contrib/power/power.h
|
|
index b42c7d6c6..79123aa90 100644
|
|
--- a/contrib/power/power.h
|
|
+++ b/contrib/power/power.h
|
|
@@ -2,3 +2,7 @@
|
|
* 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
|
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
|
*/
|
|
+
|
|
+#include "../../zconf.h"
|
|
+
|
|
+unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t);
|
|
diff --git a/crc32.c b/crc32.c
|
|
index f8357b083..4e5830b86 100644
|
|
--- a/crc32.c
|
|
+++ b/crc32.c
|
|
@@ -745,6 +745,13 @@ local z_word_t crc_word_big(data)
|
|
#endif
|
|
|
|
/* ========================================================================= */
|
|
+#ifdef Z_POWER_OPT
|
|
+/* Rename function so resolver can use its symbol. The default version will be
|
|
+ * returned by the resolver if the host has no support for an optimized version.
|
|
+ */
|
|
+#define crc32_z crc32_z_default
|
|
+#endif /* Z_POWER_OPT */
|
|
+
|
|
unsigned long ZEXPORT crc32_z(crc, buf, len)
|
|
unsigned long crc;
|
|
const unsigned char FAR *buf;
|
|
@@ -1066,6 +1073,11 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
|
|
return crc ^ 0xffffffff;
|
|
}
|
|
|
|
+#ifdef Z_POWER_OPT
|
|
+#undef crc32_z
|
|
+#include "contrib/power/crc32_z_resolver.c"
|
|
+#endif /* Z_POWER_OPT */
|
|
+
|
|
#endif
|
|
|
|
/* ========================================================================= */
|
|
diff --git a/test/crc32_test.c b/test/crc32_test.c
|
|
new file mode 100644
|
|
index 000000000..3155553e6
|
|
--- /dev/null
|
|
+++ b/test/crc32_test.c
|
|
@@ -0,0 +1,205 @@
|
|
+/* crc32_tes.c -- unit test for crc32 in the zlib compression library
|
|
+ * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+#include "zlib.h"
|
|
+#include <stdio.h>
|
|
+
|
|
+#ifdef STDC
|
|
+# include <string.h>
|
|
+# include <stdlib.h>
|
|
+#endif
|
|
+
|
|
+void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line));
|
|
+int main OF((void));
|
|
+
|
|
+typedef struct {
|
|
+ int line;
|
|
+ uLong crc;
|
|
+ char* buf;
|
|
+ int len;
|
|
+ uLong expect;
|
|
+} crc32_test;
|
|
+
|
|
+void test_crc32(crc, buf, len, chk, line)
|
|
+ uLong crc;
|
|
+ Byte *buf;
|
|
+ z_size_t len;
|
|
+ uLong chk;
|
|
+ int line;
|
|
+{
|
|
+ uLong res = crc32(crc, buf, len);
|
|
+ if (res != chk) {
|
|
+ fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n",
|
|
+ line, (unsigned int)res, (unsigned int)chk);
|
|
+ exit(1);
|
|
+ }
|
|
+}
|
|
+
|
|
+static const crc32_test tests[] = {
|
|
+ {__LINE__, 0x0, 0x0, 0, 0x0},
|
|
+ {__LINE__, 0xffffffff, 0x0, 0, 0x0},
|
|
+ {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */
|
|
+ {__LINE__, 0x0, 0x0, 256, 0x0},
|
|
+ {__LINE__, 0x0, 0x0, 257, 0x0},
|
|
+ {__LINE__, 0x0, 0x0, 32767, 0x0},
|
|
+ {__LINE__, 0x0, 0x0, 32768, 0x0},
|
|
+ {__LINE__, 0x0, 0x0, 32769, 0x0},
|
|
+ {__LINE__, 0x0, "", 0, 0x0},
|
|
+ {__LINE__, 0xffffffff, "", 0, 0xffffffff},
|
|
+ {__LINE__, 0x0, "abacus", 6, 0xc3d7115b},
|
|
+ {__LINE__, 0x0, "backlog", 7, 0x269205},
|
|
+ {__LINE__, 0x0, "campfire", 8, 0x22a515f8},
|
|
+ {__LINE__, 0x0, "delta", 5, 0x9643fed9},
|
|
+ {__LINE__, 0x0, "executable", 10, 0xd68eda01},
|
|
+ {__LINE__, 0x0, "file", 4, 0x8c9f3610},
|
|
+ {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd},
|
|
+ {__LINE__, 0x0, "hello", 5, 0x3610a686},
|
|
+ {__LINE__, 0x0, "inverter", 8, 0xc9e962c9},
|
|
+ {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69},
|
|
+ {__LINE__, 0x0, "karate", 6, 0x890be0e2},
|
|
+ {__LINE__, 0x0, "landscape", 9, 0xc4e0330b},
|
|
+ {__LINE__, 0x0, "machine", 7, 0x1505df84},
|
|
+ {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39},
|
|
+ {__LINE__, 0x0, "oblivion", 8, 0xdae9de77},
|
|
+ {__LINE__, 0x0, "panama", 6, 0x66b8979c},
|
|
+ {__LINE__, 0x0, "quest", 5, 0x4317f817},
|
|
+ {__LINE__, 0x0, "resource", 8, 0xbc91f416},
|
|
+ {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5},
|
|
+ {__LINE__, 0x0, "test", 4, 0xd87f7e0c},
|
|
+ {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b},
|
|
+ {__LINE__, 0x0, "vector", 6, 0x1b6e485b},
|
|
+ {__LINE__, 0x0, "walrus", 6, 0xbe769b97},
|
|
+ {__LINE__, 0x0, "xeno", 4, 0xe7a06444},
|
|
+ {__LINE__, 0x0, "yelling", 7, 0xfe3944e5},
|
|
+ {__LINE__, 0x0, "zlib", 4, 0x73887d3a},
|
|
+ {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1},
|
|
+ {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e},
|
|
+ {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76},
|
|
+ {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a},
|
|
+ {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d},
|
|
+ {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5},
|
|
+ {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11},
|
|
+ {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac},
|
|
+ {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb},
|
|
+ {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9},
|
|
+ {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37},
|
|
+ {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0},
|
|
+ {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29},
|
|
+ {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8},
|
|
+ {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192},
|
|
+ {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0},
|
|
+ {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7},
|
|
+ {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546},
|
|
+ {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c},
|
|
+ {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca},
|
|
+ {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7},
|
|
+ {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5},
|
|
+ {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba},
|
|
+ {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8},
|
|
+ {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662},
|
|
+ {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2},
|
|
+ {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b},
|
|
+ {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937},
|
|
+ {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f},
|
|
+ {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4},
|
|
+ {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4},
|
|
+ {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631},
|
|
+ {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99},
|
|
+ {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac},
|
|
+ {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9},
|
|
+ {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf},
|
|
+ {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac},
|
|
+ {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388},
|
|
+ {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d},
|
|
+ {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5},
|
|
+ {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9},
|
|
+ {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777},
|
|
+ {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048},
|
|
+ {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84},
|
|
+ {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1},
|
|
+ {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca},
|
|
+ {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1},
|
|
+ {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254},
|
|
+ {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b},
|
|
+ {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b},
|
|
+ {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5},
|
|
+ {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109},
|
|
+ {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41},
|
|
+ {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644},
|
|
+ {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636},
|
|
+ {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb},
|
|
+ {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b},
|
|
+ {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4},
|
|
+ {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36},
|
|
+ {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925},
|
|
+ {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db},
|
|
+ {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8},
|
|
+ {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d},
|
|
+ {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef},
|
|
+ {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5},
|
|
+ {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38},
|
|
+ {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127},
|
|
+ {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0},
|
|
+ {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274},
|
|
+ {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870},
|
|
+ {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd},
|
|
+ {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc},
|
|
+ {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178},
|
|
+ {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070},
|
|
+ {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0},
|
|
+ {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72},
|
|
+ {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620},
|
|
+ {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d},
|
|
+ {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24},
|
|
+ {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df},
|
|
+ {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30},
|
|
+ {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9},
|
|
+ {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7},
|
|
+ {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d},
|
|
+ {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7},
|
|
+ {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7},
|
|
+ {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2},
|
|
+ {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461},
|
|
+ {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a},
|
|
+ {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12},
|
|
+ {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c},
|
|
+ {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1},
|
|
+ {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2},
|
|
+ {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1},
|
|
+ {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83},
|
|
+ {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81},
|
|
+ {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404},
|
|
+ {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010},
|
|
+ {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3},
|
|
+ {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f},
|
|
+ {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de},
|
|
+ {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59},
|
|
+ {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f},
|
|
+ {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac},
|
|
+ {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66},
|
|
+ {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7},
|
|
+ {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a},
|
|
+ {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060},
|
|
+ {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312},
|
|
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912},
|
|
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B}
|
|
+};
|
|
+
|
|
+static const int test_size = sizeof(tests) / sizeof(tests[0]);
|
|
+
|
|
+int main(void)
|
|
+{
|
|
+ int i;
|
|
+ for (i = 0; i < test_size; i++) {
|
|
+ test_crc32(tests[i].crc, (Byte*) tests[i].buf, tests[i].len,
|
|
+ tests[i].expect, tests[i].line);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
|
|
From 8ef06a3a9f26205a08a12c324665d5620662264f Mon Sep 17 00:00:00 2001
|
|
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
|
|
Date: Wed, 25 Mar 2020 12:16:41 -0300
|
|
Subject: [PATCH 3/4] Fix clang's behavior on versions >= 7
|
|
|
|
Clang 7 changed the behavior of vec_xxpermdi in order to match GCC's
|
|
behavior. After this change, code that used to work on Clang 6 stopped
|
|
to work on Clang >= 7.
|
|
|
|
Tested on Clang 6, 7, 8 and 9.
|
|
|
|
Reference: https://bugs.llvm.org/show_bug.cgi?id=38192
|
|
|
|
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
|
|
---
|
|
contrib/power/clang_workaround.h | 15 ++++++++++-----
|
|
1 file changed, 10 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h
|
|
index b5e7dae01..915f7e528 100644
|
|
--- a/contrib/power/clang_workaround.h
|
|
+++ b/contrib/power/clang_workaround.h
|
|
@@ -39,7 +39,12 @@ __vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
|
return __v;
|
|
}
|
|
|
|
-#ifndef vec_xxpermdi
|
|
+/*
|
|
+ * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same
|
|
+ * behavior of GCC. That means code adapted to Clang >= 7 does not work on
|
|
+ * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6.
|
|
+ */
|
|
+#if !defined vec_xxpermdi || __clang_major__ <= 6
|
|
|
|
static inline
|
|
unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
|
|
@@ -62,9 +67,9 @@ static inline
|
|
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
|
{
|
|
#if defined(__BIG_ENDIAN__)
|
|
- return vec_xxpermdi(__v, __v, 0x0)[1];
|
|
- #else
|
|
return vec_xxpermdi(__v, __v, 0x0)[0];
|
|
+ #else
|
|
+ return vec_xxpermdi(__v, __v, 0x3)[0];
|
|
#endif
|
|
}
|
|
|
|
@@ -72,9 +77,9 @@ static inline
|
|
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
|
{
|
|
#if defined(__BIG_ENDIAN__)
|
|
- return vec_xxpermdi(__v, __v, 0x3)[1];
|
|
- #else
|
|
return vec_xxpermdi(__v, __v, 0x3)[0];
|
|
+ #else
|
|
+ return vec_xxpermdi(__v, __v, 0x0)[0];
|
|
#endif
|
|
}
|
|
#endif /* vec_xxpermdi */
|
|
|
|
From 113203437eda67261848b14b6c80a33ff7e33d34 Mon Sep 17 00:00:00 2001
|
|
From: Ilya Leoshkevich <iii@linux.ibm.com>
|
|
Date: Wed, 18 Jul 2018 13:14:07 +0200
|
|
Subject: [PATCH 4/4] Add support for IBM Z hardware-accelerated deflate
|
|
|
|
IBM Z mainframes starting from version z15 provide DFLTCC instruction,
|
|
which implements deflate algorithm in hardware with estimated
|
|
compression and decompression performance orders of magnitude faster
|
|
than the current zlib and ratio comparable with that of level 1.
|
|
|
|
This patch adds DFLTCC support to zlib. In order to enable it, the
|
|
following build commands should be used:
|
|
|
|
$ ./configure --dfltcc
|
|
$ make
|
|
|
|
When built like this, zlib would compress in hardware on level 1, and in
|
|
software on all other levels. Decompression will always happen in
|
|
hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to
|
|
make it used by default) one could either configure with
|
|
--dfltcc-level-mask=0x7e or set the environment variable
|
|
DFLTCC_LEVEL_MASK to 0x7e at run time.
|
|
|
|
Two DFLTCC compression calls produce the same results only when they
|
|
both are made on machines of the same generation, and when the
|
|
respective buffers have the same offset relative to the start of the
|
|
page. Therefore care should be taken when using hardware compression
|
|
when reproducible results are desired. One such use case - reproducible
|
|
software builds - is handled explicitly: when SOURCE_DATE_EPOCH
|
|
environment variable is set, the hardware compression is disabled.
|
|
|
|
DFLTCC does not support every single zlib feature, in particular:
|
|
|
|
* inflate(Z_BLOCK) and inflate(Z_TREES)
|
|
* inflateMark()
|
|
* inflatePrime()
|
|
* inflateSyncPoint()
|
|
|
|
When used, these functions will either switch to software, or, in case
|
|
this is not possible, gracefully fail.
|
|
|
|
This patch tries to add DFLTCC support in the least intrusive way.
|
|
All SystemZ-specific code is placed into a separate file, but
|
|
unfortunately there is still a noticeable amount of changes in the
|
|
main zlib code. Below is the summary of these changes.
|
|
|
|
DFLTCC takes as arguments a parameter block, an input buffer, an output
|
|
buffer and a window. Since DFLTCC requires parameter block to be
|
|
doubleword-aligned, and it's reasonable to allocate it alongside
|
|
deflate and inflate states, ZALLOC_STATE, ZFREE_STATE and ZCOPY_STATE
|
|
macros were introduced in order to encapsulate the allocation details.
|
|
The same is true for window, for which ZALLOC_WINDOW and
|
|
TRY_FREE_WINDOW macros were introduced.
|
|
|
|
Software and hardware window formats do not match, therefore,
|
|
deflateSetDictionary(), deflateGetDictionary(), inflateSetDictionary()
|
|
and inflateGetDictionary() need special handling, which is triggered
|
|
using DEFLATE_SET_DICTIONARY_HOOK, DEFLATE_GET_DICTIONARY_HOOK,
|
|
INFLATE_SET_DICTIONARY_HOOK and INFLATE_GET_DICTIONARY_HOOK macros.
|
|
|
|
deflateResetKeep() and inflateResetKeep() now update the DFLTCC
|
|
parameter block, which is allocated alongside zlib state, using
|
|
the new DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros.
|
|
|
|
The new DEFLATE_PARAMS_HOOK switches between hardware and software
|
|
deflate implementations when deflateParams() arguments demand this.
|
|
|
|
The new INFLATE_PRIME_HOOK, INFLATE_MARK_HOOK and
|
|
INFLATE_SYNC_POINT_HOOK macros make the respective unsupported calls
|
|
gracefully fail.
|
|
|
|
The algorithm implemented in hardware has different compression ratio
|
|
than the one implemented in software. In order for deflateBound() to
|
|
return the correct results for the hardware implementation, the new
|
|
DEFLATE_BOUND_ADJUST_COMPLEN and DEFLATE_NEED_CONSERVATIVE_BOUND macros
|
|
were introduced.
|
|
|
|
Actual compression and decompression are handled by the new DEFLATE_HOOK
|
|
and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the
|
|
window on its own, calling updatewindow() is suppressed using the new
|
|
INFLATE_NEED_UPDATEWINDOW() macro.
|
|
|
|
In addition to compression, DFLTCC computes CRC-32 and Adler-32
|
|
checksums, therefore, whenever it's used, software checksumming needs to
|
|
be suppressed using the new DEFLATE_NEED_CHECKSUM and
|
|
INFLATE_NEED_CHECKSUM macros.
|
|
|
|
DFLTCC will refuse to write an End-of-block Symbol if there is no input
|
|
data, thus in some cases it is necessary to do this manually. In order
|
|
to achieve this, send_bits, bi_reverse, bi_windup and flush_pending
|
|
were promoted from local to ZLIB_INTERNAL. Furthermore, since block and
|
|
stream termination must be handled in software as well, block_state enum
|
|
was moved to deflate.h.
|
|
|
|
Since the first call to dfltcc_inflate already needs the window, and it
|
|
might be not allocated yet, inflate_ensure_window was factored out of
|
|
updatewindow and made ZLIB_INTERNAL.
|
|
---
|
|
Makefile.in | 8 +
|
|
compress.c | 14 +-
|
|
configure | 24 +
|
|
contrib/README.contrib | 4 +
|
|
contrib/s390/README.txt | 17 +
|
|
contrib/s390/dfltcc.c | 1089 +++++++++++++++++++++++++++++++++
|
|
contrib/s390/dfltcc.h | 100 +++
|
|
contrib/s390/dfltcc_deflate.h | 55 ++
|
|
deflate.c | 82 ++-
|
|
deflate.h | 12 +
|
|
gzguts.h | 4 +
|
|
inflate.c | 97 ++-
|
|
inflate.h | 2 +
|
|
test/infcover.c | 4 +-
|
|
test/minigzip.c | 4 +
|
|
trees.c | 13 +-
|
|
zutil.h | 2 +
|
|
17 files changed, 1469 insertions(+), 62 deletions(-)
|
|
create mode 100644 contrib/s390/README.txt
|
|
create mode 100644 contrib/s390/dfltcc.c
|
|
create mode 100644 contrib/s390/dfltcc.h
|
|
create mode 100644 contrib/s390/dfltcc_deflate.h
|
|
|
|
diff --git a/Makefile.in b/Makefile.in
|
|
index 83d8ca47d..54c529b3b 100644
|
|
--- a/Makefile.in
|
|
+++ b/Makefile.in
|
|
@@ -139,6 +139,14 @@ match.lo: match.S
|
|
mv _match.o match.lo
|
|
rm -f _match.s
|
|
|
|
+dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h
|
|
+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c
|
|
+
|
|
+dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h
|
|
+ -@mkdir objs 2>/dev/null || test -d objs
|
|
+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c
|
|
+ -@mv objs/dfltcc.o $@
|
|
+
|
|
crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
|
|
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c
|
|
|
|
diff --git a/compress.c b/compress.c
|
|
index 2ad5326c1..179ee2773 100644
|
|
--- a/compress.c
|
|
+++ b/compress.c
|
|
@@ -5,9 +5,15 @@
|
|
|
|
/* @(#) $Id$ */
|
|
|
|
-#define ZLIB_INTERNAL
|
|
+#include "zutil.h"
|
|
#include "zlib.h"
|
|
|
|
+#ifdef DFLTCC
|
|
+# include "contrib/s390/dfltcc.h"
|
|
+#else
|
|
+#define DEFLATE_BOUND_COMPLEN(source_len) 0
|
|
+#endif
|
|
+
|
|
/* ===========================================================================
|
|
Compresses the source buffer into the destination buffer. The level
|
|
parameter has the same meaning as in deflateInit. sourceLen is the byte
|
|
@@ -81,6 +87,12 @@ int ZEXPORT compress(dest, destLen, source, sourceLen)
|
|
uLong ZEXPORT compressBound(sourceLen)
|
|
uLong sourceLen;
|
|
{
|
|
+ uLong complen = DEFLATE_BOUND_COMPLEN(sourceLen);
|
|
+
|
|
+ if (complen > 0)
|
|
+ /* Architecture-specific code provided an upper bound. */
|
|
+ return complen + ZLIB_WRAPLEN;
|
|
+
|
|
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
|
|
(sourceLen >> 25) + 13;
|
|
}
|
|
diff --git a/configure b/configure
|
|
index 45d51e596..ab3204a6d 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -118,6 +118,7 @@ case "$1" in
|
|
echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log
|
|
echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log
|
|
echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log
|
|
+ echo ' [--dfltcc] [--dfltcc-level-mask=MASK]' | tee -a configure.log
|
|
exit 0 ;;
|
|
-p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
|
|
-e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
|
|
@@ -142,6 +143,16 @@ case "$1" in
|
|
-w* | --warn) warn=1; shift ;;
|
|
-d* | --debug) debug=1; shift ;;
|
|
--sanitize) sanitize=1; shift ;;
|
|
+ --dfltcc)
|
|
+ CFLAGS="$CFLAGS -DDFLTCC"
|
|
+ OBJC="$OBJC dfltcc.o"
|
|
+ PIC_OBJC="$PIC_OBJC dfltcc.lo"
|
|
+ shift
|
|
+ ;;
|
|
+ --dfltcc-level-mask=*)
|
|
+ CFLAGS="$CFLAGS -DDFLTCC_LEVEL_MASK=`echo $1 | sed 's/.*=//'`"
|
|
+ shift
|
|
+ ;;
|
|
*)
|
|
echo "unknown option: $1" | tee -a configure.log
|
|
echo "$0 --help for help" | tee -a configure.log
|
|
@@ -828,6 +839,19 @@ EOF
|
|
fi
|
|
fi
|
|
|
|
+# Check whether sys/sdt.h is available
|
|
+cat > $test.c << EOF
|
|
+#include <sys/sdt.h>
|
|
+int main() { return 0; }
|
|
+EOF
|
|
+if try $CC -c $CFLAGS $test.c; then
|
|
+ echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log
|
|
+ CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H"
|
|
+ SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H"
|
|
+else
|
|
+ echo "Checking for sys/sdt.h ... No." | tee -a configure.log
|
|
+fi
|
|
+
|
|
# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
|
echo >> configure.log
|
|
cat > $test.c <<EOF
|
|
diff --git a/contrib/README.contrib b/contrib/README.contrib
|
|
index 90170df69..a36d4047c 100644
|
|
--- a/contrib/README.contrib
|
|
+++ b/contrib/README.contrib
|
|
@@ -55,6 +55,10 @@ puff/ by Mark Adler <madler@alumni.caltech.edu>
|
|
Small, low memory usage inflate. Also serves to provide an
|
|
unambiguous description of the deflate format.
|
|
|
|
+s390/ by Ilya Leoshkevich <iii@linux.ibm.com>
|
|
+ Hardware-accelerated deflate on IBM Z with DEFLATE CONVERSION CALL
|
|
+ instruction.
|
|
+
|
|
testzlib/ by Gilles Vollant <info@winimage.com>
|
|
Example of the use of zlib
|
|
|
|
diff --git a/contrib/s390/README.txt b/contrib/s390/README.txt
|
|
new file mode 100644
|
|
index 000000000..48be008bd
|
|
--- /dev/null
|
|
+++ b/contrib/s390/README.txt
|
|
@@ -0,0 +1,17 @@
|
|
+IBM Z mainframes starting from version z15 provide DFLTCC instruction,
|
|
+which implements deflate algorithm in hardware with estimated
|
|
+compression and decompression performance orders of magnitude faster
|
|
+than the current zlib and ratio comparable with that of level 1.
|
|
+
|
|
+This directory adds DFLTCC support. In order to enable it, the following
|
|
+build commands should be used:
|
|
+
|
|
+ $ ./configure --dfltcc
|
|
+ $ make
|
|
+
|
|
+When built like this, zlib would compress in hardware on level 1, and in
|
|
+software on all other levels. Decompression will always happen in
|
|
+hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to
|
|
+make it used by default) one could either configure with
|
|
+--dfltcc-level-mask=0x7e or set the environment variable
|
|
+DFLTCC_LEVEL_MASK to 0x7e at run time.
|
|
diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c
|
|
new file mode 100644
|
|
index 000000000..b8c20bded
|
|
--- /dev/null
|
|
+++ b/contrib/s390/dfltcc.c
|
|
@@ -0,0 +1,1089 @@
|
|
+/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */
|
|
+
|
|
+/*
|
|
+ Use the following commands to build zlib with DFLTCC support:
|
|
+
|
|
+ $ ./configure --dfltcc
|
|
+ $ make
|
|
+*/
|
|
+
|
|
+#define _GNU_SOURCE
|
|
+#include <ctype.h>
|
|
+#include <errno.h>
|
|
+#include <inttypes.h>
|
|
+#include <stddef.h>
|
|
+#include <stdio.h>
|
|
+#include <stdint.h>
|
|
+#include <stdlib.h>
|
|
+#include "../../zutil.h"
|
|
+#include "../../deflate.h"
|
|
+#include "../../inftrees.h"
|
|
+#include "../../inflate.h"
|
|
+#include "dfltcc.h"
|
|
+#include "dfltcc_deflate.h"
|
|
+#ifdef HAVE_SYS_SDT_H
|
|
+#include <sys/sdt.h>
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ C wrapper for the DEFLATE CONVERSION CALL instruction.
|
|
+ */
|
|
+typedef enum {
|
|
+ DFLTCC_CC_OK = 0,
|
|
+ DFLTCC_CC_OP1_TOO_SHORT = 1,
|
|
+ DFLTCC_CC_OP2_TOO_SHORT = 2,
|
|
+ DFLTCC_CC_OP2_CORRUPT = 2,
|
|
+ DFLTCC_CC_AGAIN = 3,
|
|
+} dfltcc_cc;
|
|
+
|
|
+#define DFLTCC_QAF 0
|
|
+#define DFLTCC_GDHT 1
|
|
+#define DFLTCC_CMPR 2
|
|
+#define DFLTCC_XPND 4
|
|
+#define HBT_CIRCULAR (1 << 7)
|
|
+#define HB_BITS 15
|
|
+#define HB_SIZE (1 << HB_BITS)
|
|
+#define DFLTCC_FACILITY 151
|
|
+
|
|
+local inline dfltcc_cc dfltcc OF((int fn, void *param,
|
|
+ Bytef **op1, size_t *len1,
|
|
+ z_const Bytef **op2, size_t *len2,
|
|
+ void *hist));
|
|
+local inline dfltcc_cc dfltcc(fn, param, op1, len1, op2, len2, hist)
|
|
+ int fn;
|
|
+ void *param;
|
|
+ Bytef **op1;
|
|
+ size_t *len1;
|
|
+ z_const Bytef **op2;
|
|
+ size_t *len2;
|
|
+ void *hist;
|
|
+{
|
|
+ Bytef *t2 = op1 ? *op1 : NULL;
|
|
+ size_t t3 = len1 ? *len1 : 0;
|
|
+ z_const Bytef *t4 = op2 ? *op2 : NULL;
|
|
+ size_t t5 = len2 ? *len2 : 0;
|
|
+ register int r0 __asm__("r0") = fn;
|
|
+ register void *r1 __asm__("r1") = param;
|
|
+ register Bytef *r2 __asm__("r2") = t2;
|
|
+ register size_t r3 __asm__("r3") = t3;
|
|
+ register z_const Bytef *r4 __asm__("r4") = t4;
|
|
+ register size_t r5 __asm__("r5") = t5;
|
|
+ int cc;
|
|
+
|
|
+ __asm__ volatile(
|
|
+#ifdef HAVE_SYS_SDT_H
|
|
+ STAP_PROBE_ASM(zlib, dfltcc_entry,
|
|
+ STAP_PROBE_ASM_TEMPLATE(5))
|
|
+#endif
|
|
+ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
|
|
+#ifdef HAVE_SYS_SDT_H
|
|
+ STAP_PROBE_ASM(zlib, dfltcc_exit,
|
|
+ STAP_PROBE_ASM_TEMPLATE(5))
|
|
+#endif
|
|
+ "ipm %[cc]\n"
|
|
+ : [r2] "+r" (r2)
|
|
+ , [r3] "+r" (r3)
|
|
+ , [r4] "+r" (r4)
|
|
+ , [r5] "+r" (r5)
|
|
+ , [cc] "=r" (cc)
|
|
+ : [r0] "r" (r0)
|
|
+ , [r1] "r" (r1)
|
|
+ , [hist] "r" (hist)
|
|
+#ifdef HAVE_SYS_SDT_H
|
|
+ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
|
|
+#endif
|
|
+ : "cc", "memory");
|
|
+ t2 = r2; t3 = r3; t4 = r4; t5 = r5;
|
|
+
|
|
+ if (op1)
|
|
+ *op1 = t2;
|
|
+ if (len1)
|
|
+ *len1 = t3;
|
|
+ if (op2)
|
|
+ *op2 = t4;
|
|
+ if (len2)
|
|
+ *len2 = t5;
|
|
+ return (cc >> 28) & 3;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Parameter Block for Query Available Functions.
|
|
+ */
|
|
+#define static_assert(c, msg) \
|
|
+ __attribute__((unused)) \
|
|
+ static char static_assert_failed_ ## msg[c ? 1 : -1]
|
|
+
|
|
+struct dfltcc_qaf_param {
|
|
+ char fns[16];
|
|
+ char reserved1[8];
|
|
+ char fmts[2];
|
|
+ char reserved2[6];
|
|
+};
|
|
+
|
|
+static_assert(sizeof(struct dfltcc_qaf_param) == 32,
|
|
+ sizeof_struct_dfltcc_qaf_param_is_32);
|
|
+
|
|
+local inline int is_bit_set OF((const char *bits, int n));
|
|
+local inline int is_bit_set(bits, n)
|
|
+ const char *bits;
|
|
+ int n;
|
|
+{
|
|
+ return bits[n / 8] & (1 << (7 - (n % 8)));
|
|
+}
|
|
+
|
|
+local inline void clear_bit OF((char *bits, int n));
|
|
+local inline void clear_bit(bits, n)
|
|
+ char *bits;
|
|
+ int n;
|
|
+{
|
|
+ bits[n / 8] &= ~(1 << (7 - (n % 8)));
|
|
+}
|
|
+
|
|
+#define DFLTCC_FMT0 0
|
|
+
|
|
+/*
|
|
+ Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand.
|
|
+ */
|
|
+#define CVT_CRC32 0
|
|
+#define CVT_ADLER32 1
|
|
+#define HTT_FIXED 0
|
|
+#define HTT_DYNAMIC 1
|
|
+
|
|
+struct dfltcc_param_v0 {
|
|
+ uint16_t pbvn; /* Parameter-Block-Version Number */
|
|
+ uint8_t mvn; /* Model-Version Number */
|
|
+ uint8_t ribm; /* Reserved for IBM use */
|
|
+ unsigned reserved32 : 31;
|
|
+ unsigned cf : 1; /* Continuation Flag */
|
|
+ uint8_t reserved64[8];
|
|
+ unsigned nt : 1; /* New Task */
|
|
+ unsigned reserved129 : 1;
|
|
+ unsigned cvt : 1; /* Check Value Type */
|
|
+ unsigned reserved131 : 1;
|
|
+ unsigned htt : 1; /* Huffman-Table Type */
|
|
+ unsigned bcf : 1; /* Block-Continuation Flag */
|
|
+ unsigned bcc : 1; /* Block Closing Control */
|
|
+ unsigned bhf : 1; /* Block Header Final */
|
|
+ unsigned reserved136 : 1;
|
|
+ unsigned reserved137 : 1;
|
|
+ unsigned dhtgc : 1; /* DHT Generation Control */
|
|
+ unsigned reserved139 : 5;
|
|
+ unsigned reserved144 : 5;
|
|
+ unsigned sbb : 3; /* Sub-Byte Boundary */
|
|
+ uint8_t oesc; /* Operation-Ending-Supplemental Code */
|
|
+ unsigned reserved160 : 12;
|
|
+ unsigned ifs : 4; /* Incomplete-Function Status */
|
|
+ uint16_t ifl; /* Incomplete-Function Length */
|
|
+ uint8_t reserved192[8];
|
|
+ uint8_t reserved256[8];
|
|
+ uint8_t reserved320[4];
|
|
+ uint16_t hl; /* History Length */
|
|
+ unsigned reserved368 : 1;
|
|
+ uint16_t ho : 15; /* History Offset */
|
|
+ uint32_t cv; /* Check Value */
|
|
+ unsigned eobs : 15; /* End-of-block Symbol */
|
|
+ unsigned reserved431: 1;
|
|
+ uint8_t eobl : 4; /* End-of-block Length */
|
|
+ unsigned reserved436 : 12;
|
|
+ unsigned reserved448 : 4;
|
|
+ uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table
|
|
+ Length */
|
|
+ uint8_t reserved464[6];
|
|
+ uint8_t cdht[288];
|
|
+ uint8_t reserved[32];
|
|
+ uint8_t csb[1152];
|
|
+};
|
|
+
|
|
+static_assert(sizeof(struct dfltcc_param_v0) == 1536,
|
|
+ sizeof_struct_dfltcc_param_v0_is_1536);
|
|
+
|
|
+local z_const char *oesc_msg OF((char *buf, int oesc));
|
|
+local z_const char *oesc_msg(buf, oesc)
|
|
+ char *buf;
|
|
+ int oesc;
|
|
+{
|
|
+ if (oesc == 0x00)
|
|
+ return NULL; /* Successful completion */
|
|
+ else {
|
|
+ sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc);
|
|
+ return buf;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ Extension of inflate_state and deflate_state. Must be doubleword-aligned.
|
|
+*/
|
|
+struct dfltcc_state {
|
|
+ struct dfltcc_param_v0 param; /* Parameter block. */
|
|
+ struct dfltcc_qaf_param af; /* Available functions. */
|
|
+ uLong level_mask; /* Levels on which to use DFLTCC */
|
|
+ uLong block_size; /* New block each X bytes */
|
|
+ uLong block_threshold; /* New block after total_in > X */
|
|
+ uLong dht_threshold; /* New block only if avail_in >= X */
|
|
+ char msg[64]; /* Buffer for strm->msg */
|
|
+};
|
|
+
|
|
+#define ALIGN_UP(p, size) \
|
|
+ (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
|
|
+
|
|
+#define GET_DFLTCC_STATE(state) ((struct dfltcc_state FAR *)( \
|
|
+ (char FAR *)(state) + ALIGN_UP(sizeof(*state), 8)))
|
|
+
|
|
+/*
|
|
+ Compress.
|
|
+ */
|
|
+local inline int dfltcc_can_deflate_with_params(z_streamp strm,
|
|
+ int level,
|
|
+ uInt window_bits,
|
|
+ int strategy);
|
|
+local inline int dfltcc_can_deflate_with_params(strm,
|
|
+ level,
|
|
+ window_bits,
|
|
+ strategy)
|
|
+ z_streamp strm;
|
|
+ int level;
|
|
+ uInt window_bits;
|
|
+ int strategy;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+
|
|
+ /* Unsupported compression settings */
|
|
+ if ((dfltcc_state->level_mask & (1 << level)) == 0)
|
|
+ return 0;
|
|
+ if (window_bits != HB_BITS)
|
|
+ return 0;
|
|
+ if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
|
|
+ return 0;
|
|
+
|
|
+ /* Unsupported hardware */
|
|
+ if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) ||
|
|
+ !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) ||
|
|
+ !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0))
|
|
+ return 0;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_can_deflate(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+
|
|
+ return dfltcc_can_deflate_with_params(strm,
|
|
+ state->level,
|
|
+ state->w_bits,
|
|
+ state->strategy);
|
|
+}
|
|
+
|
|
+local void dfltcc_gdht OF((z_streamp strm));
|
|
+local void dfltcc_gdht(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
|
|
+ size_t avail_in = avail_in = strm->avail_in;
|
|
+
|
|
+ dfltcc(DFLTCC_GDHT,
|
|
+ param, NULL, NULL,
|
|
+ &strm->next_in, &avail_in, NULL);
|
|
+}
|
|
+
|
|
+local dfltcc_cc dfltcc_cmpr OF((z_streamp strm));
|
|
+local dfltcc_cc dfltcc_cmpr(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
|
|
+ size_t avail_in = strm->avail_in;
|
|
+ size_t avail_out = strm->avail_out;
|
|
+ dfltcc_cc cc;
|
|
+
|
|
+ cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR,
|
|
+ param, &strm->next_out, &avail_out,
|
|
+ &strm->next_in, &avail_in, state->window);
|
|
+ strm->total_in += (strm->avail_in - avail_in);
|
|
+ strm->total_out += (strm->avail_out - avail_out);
|
|
+ strm->avail_in = avail_in;
|
|
+ strm->avail_out = avail_out;
|
|
+ return cc;
|
|
+}
|
|
+
|
|
+local void send_eobs OF((z_streamp strm,
|
|
+ z_const struct dfltcc_param_v0 FAR *param));
|
|
+local void send_eobs(strm, param)
|
|
+ z_streamp strm;
|
|
+ z_const struct dfltcc_param_v0 FAR *param;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+
|
|
+ _tr_send_bits(
|
|
+ state,
|
|
+ bi_reverse(param->eobs >> (15 - param->eobl), param->eobl),
|
|
+ param->eobl);
|
|
+ flush_pending(strm);
|
|
+ if (state->pending != 0) {
|
|
+ /* The remaining data is located in pending_out[0:pending]. If someone
|
|
+ * calls put_byte() - this might happen in deflate() - the byte will be
|
|
+ * placed into pending_buf[pending], which is incorrect. Move the
|
|
+ * remaining data to the beginning of pending_buf so that put_byte() is
|
|
+ * usable again.
|
|
+ */
|
|
+ memmove(state->pending_buf, state->pending_out, state->pending);
|
|
+ state->pending_out = state->pending_buf;
|
|
+ }
|
|
+#ifdef ZLIB_DEBUG
|
|
+ state->compressed_len += param->eobl;
|
|
+#endif
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_deflate(strm, flush, result)
|
|
+ z_streamp strm;
|
|
+ int flush;
|
|
+ block_state *result;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+ uInt masked_avail_in;
|
|
+ dfltcc_cc cc;
|
|
+ int need_empty_block;
|
|
+ int soft_bcc;
|
|
+ int no_flush;
|
|
+
|
|
+ if (!dfltcc_can_deflate(strm)) {
|
|
+ /* Clear history. */
|
|
+ if (flush == Z_FULL_FLUSH)
|
|
+ param->hl = 0;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+again:
|
|
+ masked_avail_in = 0;
|
|
+ soft_bcc = 0;
|
|
+ no_flush = flush == Z_NO_FLUSH;
|
|
+
|
|
+ /* No input data. Return, except when Continuation Flag is set, which means
|
|
+ * that DFLTCC has buffered some output in the parameter block and needs to
|
|
+ * be called again in order to flush it.
|
|
+ */
|
|
+ if (strm->avail_in == 0 && !param->cf) {
|
|
+ /* A block is still open, and the hardware does not support closing
|
|
+ * blocks without adding data. Thus, close it manually.
|
|
+ */
|
|
+ if (!no_flush && param->bcf) {
|
|
+ send_eobs(strm, param);
|
|
+ param->bcf = 0;
|
|
+ }
|
|
+ /* Let one of deflate_* functions write a trailing empty block. */
|
|
+ if (flush == Z_FINISH)
|
|
+ return 0;
|
|
+ /* Clear history. */
|
|
+ if (flush == Z_FULL_FLUSH)
|
|
+ param->hl = 0;
|
|
+ /* Trigger block post-processing if necessary. */
|
|
+ *result = no_flush ? need_more : block_done;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ /* There is an open non-BFINAL block, we are not going to close it just
|
|
+ * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see
|
|
+ * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new
|
|
+ * DHT in order to adapt to a possibly changed input data distribution.
|
|
+ */
|
|
+ if (param->bcf && no_flush &&
|
|
+ strm->total_in > dfltcc_state->block_threshold &&
|
|
+ strm->avail_in >= dfltcc_state->dht_threshold) {
|
|
+ if (param->cf) {
|
|
+ /* We need to flush the DFLTCC buffer before writing the
|
|
+ * End-of-block Symbol. Mask the input data and proceed as usual.
|
|
+ */
|
|
+ masked_avail_in += strm->avail_in;
|
|
+ strm->avail_in = 0;
|
|
+ no_flush = 0;
|
|
+ } else {
|
|
+ /* DFLTCC buffer is empty, so we can manually write the
|
|
+ * End-of-block Symbol right away.
|
|
+ */
|
|
+ send_eobs(strm, param);
|
|
+ param->bcf = 0;
|
|
+ dfltcc_state->block_threshold =
|
|
+ strm->total_in + dfltcc_state->block_size;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* No space for compressed data. If we proceed, dfltcc_cmpr() will return
|
|
+ * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
|
|
+ * set BCF=1, which is wrong. Avoid complications and return early.
|
|
+ */
|
|
+ if (strm->avail_out == 0) {
|
|
+ *result = need_more;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ /* The caller gave us too much data. Pass only one block worth of
|
|
+ * uncompressed data to DFLTCC and mask the rest, so that on the next
|
|
+ * iteration we start a new block.
|
|
+ */
|
|
+ if (no_flush && strm->avail_in > dfltcc_state->block_size) {
|
|
+ masked_avail_in += (strm->avail_in - dfltcc_state->block_size);
|
|
+ strm->avail_in = dfltcc_state->block_size;
|
|
+ }
|
|
+
|
|
+ /* When we have an open non-BFINAL deflate block and caller indicates that
|
|
+ * the stream is ending, we need to close an open deflate block and open a
|
|
+ * BFINAL one.
|
|
+ */
|
|
+ need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf;
|
|
+
|
|
+ /* Translate stream to parameter block */
|
|
+ param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
|
|
+ if (!no_flush)
|
|
+ /* We need to close a block. Always do this in software - when there is
|
|
+ * no input data, the hardware will not honor BCC. */
|
|
+ soft_bcc = 1;
|
|
+ if (flush == Z_FINISH && !param->bcf)
|
|
+ /* We are about to open a BFINAL block, set Block Header Final bit
|
|
+ * until the stream ends.
|
|
+ */
|
|
+ param->bhf = 1;
|
|
+ /* DFLTCC-CMPR will write to next_out, so make sure that buffers with
|
|
+ * higher precedence are empty.
|
|
+ */
|
|
+ Assert(state->pending == 0, "There must be no pending bytes");
|
|
+ Assert(state->bi_valid < 8, "There must be less than 8 pending bits");
|
|
+ param->sbb = (unsigned int)state->bi_valid;
|
|
+ if (param->sbb > 0)
|
|
+ *strm->next_out = (Bytef)state->bi_buf;
|
|
+ /* Honor history and check value */
|
|
+ param->nt = 0;
|
|
+ if (state->wrap == 1)
|
|
+ param->cv = strm->adler;
|
|
+ else if (state->wrap == 2)
|
|
+ param->cv = ZSWAP32(strm->adler);
|
|
+
|
|
+ /* When opening a block, choose a Huffman-Table Type */
|
|
+ if (!param->bcf) {
|
|
+ if (state->strategy == Z_FIXED ||
|
|
+ (strm->total_in == 0 && dfltcc_state->block_threshold > 0))
|
|
+ param->htt = HTT_FIXED;
|
|
+ else {
|
|
+ param->htt = HTT_DYNAMIC;
|
|
+ dfltcc_gdht(strm);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Deflate */
|
|
+ do {
|
|
+ cc = dfltcc_cmpr(strm);
|
|
+ if (strm->avail_in < 4096 && masked_avail_in > 0)
|
|
+ /* We are about to call DFLTCC with a small input buffer, which is
|
|
+ * inefficient. Since there is masked data, there will be at least
|
|
+ * one more DFLTCC call, so skip the current one and make the next
|
|
+ * one handle more data.
|
|
+ */
|
|
+ break;
|
|
+ } while (cc == DFLTCC_CC_AGAIN);
|
|
+
|
|
+ /* Translate parameter block to stream */
|
|
+ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
|
|
+ state->bi_valid = param->sbb;
|
|
+ if (state->bi_valid == 0)
|
|
+ state->bi_buf = 0; /* Avoid accessing next_out */
|
|
+ else
|
|
+ state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
|
|
+ if (state->wrap == 1)
|
|
+ strm->adler = param->cv;
|
|
+ else if (state->wrap == 2)
|
|
+ strm->adler = ZSWAP32(param->cv);
|
|
+
|
|
+ /* Unmask the input data */
|
|
+ strm->avail_in += masked_avail_in;
|
|
+ masked_avail_in = 0;
|
|
+
|
|
+ /* If we encounter an error, it means there is a bug in DFLTCC call */
|
|
+ Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG");
|
|
+
|
|
+ /* Update Block-Continuation Flag. It will be used to check whether to call
|
|
+ * GDHT the next time.
|
|
+ */
|
|
+ if (cc == DFLTCC_CC_OK) {
|
|
+ if (soft_bcc) {
|
|
+ send_eobs(strm, param);
|
|
+ param->bcf = 0;
|
|
+ dfltcc_state->block_threshold =
|
|
+ strm->total_in + dfltcc_state->block_size;
|
|
+ } else
|
|
+ param->bcf = 1;
|
|
+ if (flush == Z_FINISH) {
|
|
+ if (need_empty_block)
|
|
+ /* Make the current deflate() call also close the stream */
|
|
+ return 0;
|
|
+ else {
|
|
+ bi_windup(state);
|
|
+ *result = finish_done;
|
|
+ }
|
|
+ } else {
|
|
+ if (flush == Z_FULL_FLUSH)
|
|
+ param->hl = 0; /* Clear history */
|
|
+ *result = flush == Z_NO_FLUSH ? need_more : block_done;
|
|
+ }
|
|
+ } else {
|
|
+ param->bcf = 1;
|
|
+ *result = need_more;
|
|
+ }
|
|
+ if (strm->avail_in != 0 && strm->avail_out != 0)
|
|
+ goto again; /* deflate() must use all input or all output */
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Expand.
|
|
+ */
|
|
+int ZLIB_INTERNAL dfltcc_can_inflate(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+
|
|
+ /* Unsupported hardware */
|
|
+ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) &&
|
|
+ is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
|
|
+}
|
|
+
|
|
+local dfltcc_cc dfltcc_xpnd OF((z_streamp strm));
|
|
+local dfltcc_cc dfltcc_xpnd(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
|
+ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
|
|
+ size_t avail_in = strm->avail_in;
|
|
+ size_t avail_out = strm->avail_out;
|
|
+ dfltcc_cc cc;
|
|
+
|
|
+ cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR,
|
|
+ param, &strm->next_out, &avail_out,
|
|
+ &strm->next_in, &avail_in, state->window);
|
|
+ strm->avail_in = avail_in;
|
|
+ strm->avail_out = avail_out;
|
|
+ return cc;
|
|
+}
|
|
+
|
|
+dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
|
|
+ z_streamp strm;
|
|
+ int flush;
|
|
+ int *ret;
|
|
+{
|
|
+ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+ dfltcc_cc cc;
|
|
+
|
|
+ if (flush == Z_BLOCK || flush == Z_TREES) {
|
|
+ /* DFLTCC does not support stopping on block boundaries */
|
|
+ if (dfltcc_inflate_disable(strm)) {
|
|
+ *ret = Z_STREAM_ERROR;
|
|
+ return DFLTCC_INFLATE_BREAK;
|
|
+ } else
|
|
+ return DFLTCC_INFLATE_SOFTWARE;
|
|
+ }
|
|
+
|
|
+ if (state->last) {
|
|
+ if (state->bits != 0) {
|
|
+ strm->next_in++;
|
|
+ strm->avail_in--;
|
|
+ state->bits = 0;
|
|
+ }
|
|
+ state->mode = CHECK;
|
|
+ return DFLTCC_INFLATE_CONTINUE;
|
|
+ }
|
|
+
|
|
+ if (strm->avail_in == 0 && !param->cf)
|
|
+ return DFLTCC_INFLATE_BREAK;
|
|
+
|
|
+ if (inflate_ensure_window(state)) {
|
|
+ state->mode = MEM;
|
|
+ return DFLTCC_INFLATE_CONTINUE;
|
|
+ }
|
|
+
|
|
+ /* Translate stream to parameter block */
|
|
+ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32;
|
|
+ param->sbb = state->bits;
|
|
+ if (param->hl)
|
|
+ param->nt = 0; /* Honor history for the first block */
|
|
+ if (state->wrap & 4)
|
|
+ param->cv = state->flags ? ZSWAP32(state->check) : state->check;
|
|
+
|
|
+ /* Inflate */
|
|
+ do {
|
|
+ cc = dfltcc_xpnd(strm);
|
|
+ } while (cc == DFLTCC_CC_AGAIN);
|
|
+
|
|
+ /* Translate parameter block to stream */
|
|
+ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
|
|
+ state->last = cc == DFLTCC_CC_OK;
|
|
+ state->bits = param->sbb;
|
|
+ if (state->wrap & 4)
|
|
+ strm->adler = state->check = state->flags ?
|
|
+ ZSWAP32(param->cv) : param->cv;
|
|
+ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
|
|
+ /* Report an error if stream is corrupted */
|
|
+ state->mode = BAD;
|
|
+ return DFLTCC_INFLATE_CONTINUE;
|
|
+ }
|
|
+ state->mode = TYPEDO;
|
|
+ /* Break if operands are exhausted, otherwise continue looping */
|
|
+ return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ?
|
|
+ DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_was_inflate_used(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
|
+ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
|
|
+
|
|
+ return !param->nt;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Rotates a circular buffer.
|
|
+ The implementation is based on https://cplusplus.com/reference/algorithm/rotate/
|
|
+ */
|
|
+local void rotate OF((Bytef *start, Bytef *pivot, Bytef *end));
|
|
+local void rotate(start, pivot, end)
|
|
+ Bytef *start;
|
|
+ Bytef *pivot;
|
|
+ Bytef *end;
|
|
+{
|
|
+ Bytef *p = pivot;
|
|
+ Bytef tmp;
|
|
+
|
|
+ while (p != start) {
|
|
+ tmp = *start;
|
|
+ *start = *p;
|
|
+ *p = tmp;
|
|
+
|
|
+ start++;
|
|
+ p++;
|
|
+
|
|
+ if (p == end)
|
|
+ p = pivot;
|
|
+ else if (start == pivot)
|
|
+ pivot = p;
|
|
+ }
|
|
+}
|
|
+
|
|
+#define MIN(x, y) ({ \
|
|
+ typeof(x) _x = (x); \
|
|
+ typeof(y) _y = (y); \
|
|
+ _x < _y ? _x : _y; \
|
|
+})
|
|
+
|
|
+#define MAX(x, y) ({ \
|
|
+ typeof(x) _x = (x); \
|
|
+ typeof(y) _y = (y); \
|
|
+ _x > _y ? _x : _y; \
|
|
+})
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_inflate_disable(strm)
|
|
+ z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
|
+
|
|
+ if (!dfltcc_can_inflate(strm))
|
|
+ return 0;
|
|
+ if (dfltcc_was_inflate_used(strm))
|
|
+ /* DFLTCC has already decompressed some data. Since there is not
|
|
+ * enough information to resume decompression in software, the call
|
|
+ * must fail.
|
|
+ */
|
|
+ return 1;
|
|
+ /* DFLTCC was not used yet - decompress in software */
|
|
+ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
|
|
+ /* Convert the window from the hardware to the software format */
|
|
+ rotate(state->window, state->window + param->ho, state->window + HB_SIZE);
|
|
+ state->whave = state->wnext = MIN(param->hl, state->wsize);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+local int env_dfltcc_disabled;
|
|
+local int env_source_date_epoch;
|
|
+local unsigned long env_level_mask;
|
|
+local unsigned long env_block_size;
|
|
+local unsigned long env_block_threshold;
|
|
+local unsigned long env_dht_threshold;
|
|
+local unsigned long env_ribm;
|
|
+local uint64_t cpu_facilities[(DFLTCC_FACILITY / 64) + 1];
|
|
+local struct dfltcc_qaf_param cpu_af __attribute__((aligned(8)));
|
|
+
|
|
+local inline int is_dfltcc_enabled OF((void));
|
|
+local inline int is_dfltcc_enabled(void)
|
|
+{
|
|
+ if (env_dfltcc_disabled)
|
|
+ /* User has explicitly disabled DFLTCC. */
|
|
+ return 0;
|
|
+
|
|
+ return is_bit_set((const char *)cpu_facilities, DFLTCC_FACILITY);
|
|
+}
|
|
+
|
|
+local unsigned long xstrtoul OF((const char *s, unsigned long _default));
|
|
+local unsigned long xstrtoul(s, _default)
|
|
+ const char *s;
|
|
+ unsigned long _default;
|
|
+{
|
|
+ char *endptr;
|
|
+ unsigned long result;
|
|
+
|
|
+ if (!(s && *s))
|
|
+ return _default;
|
|
+ errno = 0;
|
|
+ result = strtoul(s, &endptr, 0);
|
|
+ return (errno || *endptr) ? _default : result;
|
|
+}
|
|
+
|
|
+__attribute__((constructor)) local void init_globals OF((void));
|
|
+__attribute__((constructor)) local void init_globals(void)
|
|
+{
|
|
+ const char *env;
|
|
+ register char r0 __asm__("r0");
|
|
+
|
|
+ env = secure_getenv("DFLTCC");
|
|
+ env_dfltcc_disabled = env && !strcmp(env, "0");
|
|
+
|
|
+ env = secure_getenv("SOURCE_DATE_EPOCH");
|
|
+ env_source_date_epoch = !!env;
|
|
+
|
|
+#ifndef DFLTCC_LEVEL_MASK
|
|
+#define DFLTCC_LEVEL_MASK 0x2
|
|
+#endif
|
|
+ env_level_mask = xstrtoul(secure_getenv("DFLTCC_LEVEL_MASK"),
|
|
+ DFLTCC_LEVEL_MASK);
|
|
+
|
|
+#ifndef DFLTCC_BLOCK_SIZE
|
|
+#define DFLTCC_BLOCK_SIZE 1048576
|
|
+#endif
|
|
+ env_block_size = xstrtoul(secure_getenv("DFLTCC_BLOCK_SIZE"),
|
|
+ DFLTCC_BLOCK_SIZE);
|
|
+
|
|
+#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE
|
|
+#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096
|
|
+#endif
|
|
+ env_block_threshold = xstrtoul(secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"),
|
|
+ DFLTCC_FIRST_FHT_BLOCK_SIZE);
|
|
+
|
|
+#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE
|
|
+#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096
|
|
+#endif
|
|
+ env_dht_threshold = xstrtoul(secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"),
|
|
+ DFLTCC_DHT_MIN_SAMPLE_SIZE);
|
|
+
|
|
+#ifndef DFLTCC_RIBM
|
|
+#define DFLTCC_RIBM 0
|
|
+#endif
|
|
+ env_ribm = xstrtoul(secure_getenv("DFLTCC_RIBM"), DFLTCC_RIBM);
|
|
+
|
|
+ memset(cpu_facilities, 0, sizeof(cpu_facilities));
|
|
+ r0 = sizeof(cpu_facilities) / sizeof(cpu_facilities[0]) - 1;
|
|
+ /* STFLE is supported since z9-109 and only in z/Architecture mode. When
|
|
+ * compiling with -m31, gcc defaults to ESA mode, however, since the kernel
|
|
+ * is 64-bit, it's always z/Architecture mode at runtime.
|
|
+ */
|
|
+ __asm__ volatile(
|
|
+#ifndef __clang__
|
|
+ ".machinemode push\n"
|
|
+ ".machinemode zarch\n"
|
|
+#endif
|
|
+ "stfle %[facilities]\n"
|
|
+#ifndef __clang__
|
|
+ ".machinemode pop\n"
|
|
+#endif
|
|
+ : [facilities] "=Q" (cpu_facilities)
|
|
+ , [r0] "+r" (r0)
|
|
+ :
|
|
+ : "cc");
|
|
+
|
|
+ /* Initialize available functions */
|
|
+ if (is_dfltcc_enabled())
|
|
+ dfltcc(DFLTCC_QAF, &cpu_af, NULL, NULL, NULL, NULL, NULL);
|
|
+ else
|
|
+ memset(&cpu_af, 0, sizeof(cpu_af));
|
|
+}
|
|
+
|
|
+/*
|
|
+ Memory management.
|
|
+
|
|
+ DFLTCC requires parameter blocks and window to be aligned. zlib allows
|
|
+ users to specify their own allocation functions, so using e.g.
|
|
+ `posix_memalign' is not an option. Thus, we overallocate and take the
|
|
+ aligned portion of the buffer.
|
|
+*/
|
|
+void ZLIB_INTERNAL dfltcc_reset(strm, size)
|
|
+ z_streamp strm;
|
|
+ uInt size;
|
|
+{
|
|
+ struct dfltcc_state *dfltcc_state =
|
|
+ (struct dfltcc_state *)((char FAR *)strm->state + ALIGN_UP(size, 8));
|
|
+
|
|
+ memcpy(&dfltcc_state->af, &cpu_af, sizeof(dfltcc_state->af));
|
|
+
|
|
+ if (env_source_date_epoch)
|
|
+ /* User needs reproducible results, but the output of DFLTCC_CMPR
|
|
+ * depends on buffers' page offsets.
|
|
+ */
|
|
+ clear_bit(dfltcc_state->af.fns, DFLTCC_CMPR);
|
|
+
|
|
+ /* Initialize parameter block */
|
|
+ memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param));
|
|
+ dfltcc_state->param.nt = 1;
|
|
+
|
|
+ /* Initialize tuning parameters */
|
|
+ dfltcc_state->level_mask = env_level_mask;
|
|
+ dfltcc_state->block_size = env_block_size;
|
|
+ dfltcc_state->block_threshold = env_block_threshold;
|
|
+ dfltcc_state->dht_threshold = env_dht_threshold;
|
|
+ dfltcc_state->param.ribm = env_ribm;
|
|
+}
|
|
+
|
|
+voidpf ZLIB_INTERNAL dfltcc_alloc_state(strm, items, size)
|
|
+ z_streamp strm;
|
|
+ uInt items;
|
|
+ uInt size;
|
|
+{
|
|
+ return ZALLOC(strm,
|
|
+ ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state),
|
|
+ sizeof(unsigned char));
|
|
+}
|
|
+
|
|
+void ZLIB_INTERNAL dfltcc_copy_state(dst, src, size)
|
|
+ voidpf dst;
|
|
+ const voidpf src;
|
|
+ uInt size;
|
|
+{
|
|
+ zmemcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state));
|
|
+}
|
|
+
|
|
+static const int PAGE_ALIGN = 0x1000;
|
|
+
|
|
+voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size)
|
|
+ z_streamp strm;
|
|
+ uInt items;
|
|
+ uInt size;
|
|
+{
|
|
+ voidpf p, w;
|
|
+
|
|
+ /* To simplify freeing, we store the pointer to the allocated buffer right
|
|
+ * before the window. Note that DFLTCC always uses HB_SIZE bytes.
|
|
+ */
|
|
+ p = ZALLOC(strm, sizeof(voidpf) + MAX(items * size, HB_SIZE) + PAGE_ALIGN,
|
|
+ sizeof(unsigned char));
|
|
+ if (p == NULL)
|
|
+ return NULL;
|
|
+ w = ALIGN_UP((char FAR *)p + sizeof(voidpf), PAGE_ALIGN);
|
|
+ *(voidpf *)((char FAR *)w - sizeof(voidpf)) = p;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+void ZLIB_INTERNAL dfltcc_copy_window(dest, src, n)
|
|
+ void *dest;
|
|
+ const void *src;
|
|
+ size_t n;
|
|
+{
|
|
+ memcpy(dest, src, MAX(n, HB_SIZE));
|
|
+}
|
|
+
|
|
+void ZLIB_INTERNAL dfltcc_free_window(strm, w)
|
|
+ z_streamp strm;
|
|
+ voidpf w;
|
|
+{
|
|
+ if (w)
|
|
+ ZFREE(strm, *(voidpf *)((unsigned char FAR *)w - sizeof(voidpf)));
|
|
+}
|
|
+
|
|
+/*
|
|
+ Switching between hardware and software compression.
|
|
+
|
|
+ DFLTCC does not support all zlib settings, e.g. generation of non-compressed
|
|
+ blocks or alternative window sizes. When such settings are applied on the
|
|
+ fly with deflateParams, we need to convert between hardware and software
|
|
+ window formats.
|
|
+*/
|
|
+int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy, flush)
|
|
+ z_streamp strm;
|
|
+ int level;
|
|
+ int strategy;
|
|
+ int *flush;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+ int could_deflate = dfltcc_can_deflate(strm);
|
|
+ int can_deflate = dfltcc_can_deflate_with_params(strm,
|
|
+ level,
|
|
+ state->w_bits,
|
|
+ strategy);
|
|
+
|
|
+ if (can_deflate == could_deflate)
|
|
+ /* We continue to work in the same mode - no changes needed */
|
|
+ return Z_OK;
|
|
+
|
|
+ if (strm->total_in == 0 && param->nt == 1 && param->hl == 0)
|
|
+ /* DFLTCC was not used yet - no changes needed */
|
|
+ return Z_OK;
|
|
+
|
|
+ /* For now, do not convert between window formats - simply get rid of the
|
|
+ * old data instead.
|
|
+ */
|
|
+ *flush = Z_FULL_FLUSH;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_deflate_done(strm, flush)
|
|
+ z_streamp strm;
|
|
+ int flush;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+
|
|
+ /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
|
|
+ * close the block without resetting the compression state. Detect this
|
|
+ * situation and return that deflation is not done.
|
|
+ */
|
|
+ if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
|
|
+ return 0;
|
|
+
|
|
+ /* Return that deflation is not done if DFLTCC is used and either it
|
|
+ * buffered some data (Continuation Flag is set), or has not written EOBS
|
|
+ * yet (Block-Continuation Flag is set).
|
|
+ */
|
|
+ return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Preloading history.
|
|
+*/
|
|
+local void append_history OF((struct dfltcc_param_v0 FAR *param,
|
|
+ Bytef *history,
|
|
+ const Bytef *buf,
|
|
+ uInt count));
|
|
+local void append_history(param, history, buf, count)
|
|
+ struct dfltcc_param_v0 FAR *param;
|
|
+ Bytef *history;
|
|
+ const Bytef *buf;
|
|
+ uInt count;
|
|
+{
|
|
+ size_t offset;
|
|
+ size_t n;
|
|
+
|
|
+ /* Do not use more than 32K */
|
|
+ if (count > HB_SIZE) {
|
|
+ buf += count - HB_SIZE;
|
|
+ count = HB_SIZE;
|
|
+ }
|
|
+ offset = (param->ho + param->hl) % HB_SIZE;
|
|
+ if (offset + count <= HB_SIZE)
|
|
+ /* Circular history buffer does not wrap - copy one chunk */
|
|
+ zmemcpy(history + offset, buf, count);
|
|
+ else {
|
|
+ /* Circular history buffer wraps - copy two chunks */
|
|
+ n = HB_SIZE - offset;
|
|
+ zmemcpy(history + offset, buf, n);
|
|
+ zmemcpy(history, buf + n, count - n);
|
|
+ }
|
|
+ n = param->hl + count;
|
|
+ if (n <= HB_SIZE)
|
|
+ /* All history fits into buffer - no need to discard anything */
|
|
+ param->hl = n;
|
|
+ else {
|
|
+ /* History does not fit into buffer - discard extra bytes */
|
|
+ param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE;
|
|
+ param->hl = HB_SIZE;
|
|
+ }
|
|
+}
|
|
+
|
|
+local void get_history OF((struct dfltcc_param_v0 FAR *param,
|
|
+ const Bytef *history,
|
|
+ Bytef *buf));
|
|
+local void get_history(param, history, buf)
|
|
+ struct dfltcc_param_v0 FAR *param;
|
|
+ const Bytef *history;
|
|
+ Bytef *buf;
|
|
+{
|
|
+ if (param->ho + param->hl <= HB_SIZE)
|
|
+ /* Circular history buffer does not wrap - copy one chunk */
|
|
+ memcpy(buf, history + param->ho, param->hl);
|
|
+ else {
|
|
+ /* Circular history buffer wraps - copy two chunks */
|
|
+ memcpy(buf, history + param->ho, HB_SIZE - param->ho);
|
|
+ memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE);
|
|
+ }
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length)
|
|
+ z_streamp strm;
|
|
+ const Bytef *dictionary;
|
|
+ uInt dict_length;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+
|
|
+ append_history(param, state->window, dictionary, dict_length);
|
|
+ state->strstart = 1; /* Add FDICT to zlib header */
|
|
+ state->block_start = state->strstart; /* Make deflate_stored happy */
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length)
|
|
+ z_streamp strm;
|
|
+ Bytef *dictionary;
|
|
+ uInt *dict_length;
|
|
+{
|
|
+ deflate_state FAR *state = (deflate_state FAR *)strm->state;
|
|
+ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
|
+
|
|
+ if (dictionary)
|
|
+ get_history(param, state->window, dictionary);
|
|
+ if (dict_length)
|
|
+ *dict_length = param->hl;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(strm, dictionary, dict_length)
|
|
+ z_streamp strm;
|
|
+ const Bytef *dictionary;
|
|
+ uInt dict_length;
|
|
+{
|
|
+ struct inflate_state *state = (struct inflate_state *)strm->state;
|
|
+ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
|
+
|
|
+ if (inflate_ensure_window(state)) {
|
|
+ state->mode = MEM;
|
|
+ return Z_MEM_ERROR;
|
|
+ }
|
|
+
|
|
+ append_history(param, state->window, dictionary, dict_length);
|
|
+ state->havedict = 1;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length)
|
|
+ z_streamp strm;
|
|
+ Bytef *dictionary;
|
|
+ uInt *dict_length;
|
|
+{
|
|
+ struct inflate_state *state = (struct inflate_state *)strm->state;
|
|
+ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
|
+
|
|
+ if (dictionary && state->window)
|
|
+ get_history(param, state->window, dictionary);
|
|
+ if (dict_length)
|
|
+ *dict_length = param->hl;
|
|
+ return Z_OK;
|
|
+}
|
|
diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h
|
|
new file mode 100644
|
|
index 000000000..be28b8ad4
|
|
--- /dev/null
|
|
+++ b/contrib/s390/dfltcc.h
|
|
@@ -0,0 +1,100 @@
|
|
+#ifndef DFLTCC_H
|
|
+#define DFLTCC_H
|
|
+
|
|
+#include "../../zlib.h"
|
|
+#include "../../zutil.h"
|
|
+
|
|
+voidpf ZLIB_INTERNAL dfltcc_alloc_state OF((z_streamp strm, uInt items,
|
|
+ uInt size));
|
|
+void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src,
|
|
+ uInt size));
|
|
+void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size));
|
|
+voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items,
|
|
+ uInt size));
|
|
+void ZLIB_INTERNAL dfltcc_copy_window OF((void *dest, const void *src,
|
|
+ size_t n));
|
|
+void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w));
|
|
+#define DFLTCC_BLOCK_HEADER_BITS 3
|
|
+#define DFLTCC_HLITS_COUNT_BITS 5
|
|
+#define DFLTCC_HDISTS_COUNT_BITS 5
|
|
+#define DFLTCC_HCLENS_COUNT_BITS 4
|
|
+#define DFLTCC_MAX_HCLENS 19
|
|
+#define DFLTCC_HCLEN_BITS 3
|
|
+#define DFLTCC_MAX_HLITS 286
|
|
+#define DFLTCC_MAX_HDISTS 30
|
|
+#define DFLTCC_MAX_HLIT_HDIST_BITS 7
|
|
+#define DFLTCC_MAX_SYMBOL_BITS 16
|
|
+#define DFLTCC_MAX_EOBS_BITS 15
|
|
+#define DFLTCC_MAX_PADDING_BITS 7
|
|
+#define DEFLATE_BOUND_COMPLEN(source_len) \
|
|
+ ((DFLTCC_BLOCK_HEADER_BITS + \
|
|
+ DFLTCC_HLITS_COUNT_BITS + \
|
|
+ DFLTCC_HDISTS_COUNT_BITS + \
|
|
+ DFLTCC_HCLENS_COUNT_BITS + \
|
|
+ DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \
|
|
+ (DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \
|
|
+ (source_len) * DFLTCC_MAX_SYMBOL_BITS + \
|
|
+ DFLTCC_MAX_EOBS_BITS + \
|
|
+ DFLTCC_MAX_PADDING_BITS) >> 3)
|
|
+int ZLIB_INTERNAL dfltcc_can_inflate OF((z_streamp strm));
|
|
+typedef enum {
|
|
+ DFLTCC_INFLATE_CONTINUE,
|
|
+ DFLTCC_INFLATE_BREAK,
|
|
+ DFLTCC_INFLATE_SOFTWARE,
|
|
+} dfltcc_inflate_action;
|
|
+dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm,
|
|
+ int flush, int *ret));
|
|
+int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm));
|
|
+int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm));
|
|
+int ZLIB_INTERNAL dfltcc_inflate_set_dictionary OF((z_streamp strm,
|
|
+ const Bytef *dictionary,
|
|
+ uInt dict_length));
|
|
+int ZLIB_INTERNAL dfltcc_inflate_get_dictionary OF((z_streamp strm,
|
|
+ Bytef *dictionary,
|
|
+ uInt* dict_length));
|
|
+
|
|
+#define ZALLOC_STATE dfltcc_alloc_state
|
|
+#define ZFREE_STATE ZFREE
|
|
+#define ZCOPY_STATE dfltcc_copy_state
|
|
+#define ZALLOC_WINDOW dfltcc_alloc_window
|
|
+#define ZCOPY_WINDOW dfltcc_copy_window
|
|
+#define ZFREE_WINDOW dfltcc_free_window
|
|
+#define TRY_FREE_WINDOW dfltcc_free_window
|
|
+#define INFLATE_RESET_KEEP_HOOK(strm) \
|
|
+ dfltcc_reset((strm), sizeof(struct inflate_state))
|
|
+#define INFLATE_PRIME_HOOK(strm, bits, value) \
|
|
+ do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0)
|
|
+#define INFLATE_TYPEDO_HOOK(strm, flush) \
|
|
+ if (dfltcc_can_inflate((strm))) { \
|
|
+ dfltcc_inflate_action action; \
|
|
+\
|
|
+ RESTORE(); \
|
|
+ action = dfltcc_inflate((strm), (flush), &ret); \
|
|
+ LOAD(); \
|
|
+ if (action == DFLTCC_INFLATE_CONTINUE) \
|
|
+ break; \
|
|
+ else if (action == DFLTCC_INFLATE_BREAK) \
|
|
+ goto inf_leave; \
|
|
+ }
|
|
+#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm)))
|
|
+#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm)))
|
|
+#define INFLATE_MARK_HOOK(strm) \
|
|
+ do { \
|
|
+ if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \
|
|
+ } while (0)
|
|
+#define INFLATE_SYNC_POINT_HOOK(strm) \
|
|
+ do { \
|
|
+ if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \
|
|
+ } while (0)
|
|
+#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
|
+ do { \
|
|
+ if (dfltcc_can_inflate(strm)) \
|
|
+ return dfltcc_inflate_set_dictionary(strm, dict, dict_len); \
|
|
+ } while (0)
|
|
+#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
|
+ do { \
|
|
+ if (dfltcc_can_inflate(strm)) \
|
|
+ return dfltcc_inflate_get_dictionary(strm, dict, dict_len); \
|
|
+ } while (0)
|
|
+
|
|
+#endif
|
|
diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h
|
|
new file mode 100644
|
|
index 000000000..46acfc550
|
|
--- /dev/null
|
|
+++ b/contrib/s390/dfltcc_deflate.h
|
|
@@ -0,0 +1,55 @@
|
|
+#ifndef DFLTCC_DEFLATE_H
|
|
+#define DFLTCC_DEFLATE_H
|
|
+
|
|
+#include "dfltcc.h"
|
|
+
|
|
+int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm));
|
|
+int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm,
|
|
+ int flush,
|
|
+ block_state *result));
|
|
+int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm,
|
|
+ int level,
|
|
+ int strategy,
|
|
+ int *flush));
|
|
+int ZLIB_INTERNAL dfltcc_deflate_done OF((z_streamp strm, int flush));
|
|
+int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm,
|
|
+ const Bytef *dictionary,
|
|
+ uInt dict_length));
|
|
+int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm,
|
|
+ Bytef *dictionary,
|
|
+ uInt* dict_length));
|
|
+
|
|
+#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
|
+ do { \
|
|
+ if (dfltcc_can_deflate((strm))) \
|
|
+ return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \
|
|
+ } while (0)
|
|
+#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
|
+ do { \
|
|
+ if (dfltcc_can_deflate((strm))) \
|
|
+ return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \
|
|
+ } while (0)
|
|
+#define DEFLATE_RESET_KEEP_HOOK(strm) \
|
|
+ dfltcc_reset((strm), sizeof(deflate_state))
|
|
+#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \
|
|
+ do { \
|
|
+ int err; \
|
|
+\
|
|
+ err = dfltcc_deflate_params((strm), \
|
|
+ (level), \
|
|
+ (strategy), \
|
|
+ (hook_flush)); \
|
|
+ if (err == Z_STREAM_ERROR) \
|
|
+ return err; \
|
|
+ } while (0)
|
|
+#define DEFLATE_DONE dfltcc_deflate_done
|
|
+#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
|
|
+ do { \
|
|
+ if (dfltcc_can_deflate((strm))) \
|
|
+ (complen) = DEFLATE_BOUND_COMPLEN(source_len); \
|
|
+ } while (0)
|
|
+#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm)))
|
|
+#define DEFLATE_HOOK dfltcc_deflate
|
|
+#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))
|
|
+
|
|
+#endif
|
|
diff --git a/deflate.c b/deflate.c
|
|
index 4a512e1f9..ba446d982 100644
|
|
--- a/deflate.c
|
|
+++ b/deflate.c
|
|
@@ -61,15 +61,30 @@ const char deflate_copyright[] =
|
|
*/
|
|
|
|
/* ===========================================================================
|
|
- * Function prototypes.
|
|
+ * Architecture-specific bits.
|
|
*/
|
|
-typedef enum {
|
|
- need_more, /* block not completed, need more input or more output */
|
|
- block_done, /* block flush performed */
|
|
- finish_started, /* finish started, need only more output at next deflate */
|
|
- finish_done /* finish done, accept no more input or output */
|
|
-} block_state;
|
|
+#ifdef DFLTCC
|
|
+# include "contrib/s390/dfltcc_deflate.h"
|
|
+#else
|
|
+#define ZALLOC_STATE ZALLOC
|
|
+#define ZFREE_STATE ZFREE
|
|
+#define ZCOPY_STATE zmemcpy
|
|
+#define ZALLOC_WINDOW ZALLOC
|
|
+#define TRY_FREE_WINDOW TRY_FREE
|
|
+#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
|
+#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
|
+#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
|
|
+#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0)
|
|
+#define DEFLATE_DONE(strm, flush) 1
|
|
+#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0)
|
|
+#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0
|
|
+#define DEFLATE_HOOK(strm, flush, bstate) 0
|
|
+#define DEFLATE_NEED_CHECKSUM(strm) 1
|
|
+#endif
|
|
|
|
+/* ===========================================================================
|
|
+ * Function prototypes.
|
|
+ */
|
|
typedef block_state (*compress_func) OF((deflate_state *s, int flush));
|
|
/* Compression function. Returns the block state after the call. */
|
|
|
|
@@ -85,7 +100,6 @@ local block_state deflate_rle OF((deflate_state *s, int flush));
|
|
local block_state deflate_huff OF((deflate_state *s, int flush));
|
|
local void lm_init OF((deflate_state *s));
|
|
local void putShortMSB OF((deflate_state *s, uInt b));
|
|
-local void flush_pending OF((z_streamp strm));
|
|
local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
|
|
local uInt longest_match OF((deflate_state *s, IPos cur_match));
|
|
|
|
@@ -295,7 +309,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
|
|
return Z_STREAM_ERROR;
|
|
}
|
|
if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */
|
|
- s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
|
|
+ s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state));
|
|
if (s == Z_NULL) return Z_MEM_ERROR;
|
|
strm->state = (struct internal_state FAR *)s;
|
|
s->strm = strm;
|
|
@@ -312,7 +326,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
|
|
s->hash_mask = s->hash_size - 1;
|
|
s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH);
|
|
|
|
- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
|
|
+ s->window = (Bytef *) ZALLOC_WINDOW(strm, s->w_size, 2*sizeof(Byte));
|
|
s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
|
|
s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
|
|
|
|
@@ -430,6 +444,7 @@ int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength)
|
|
/* when using zlib wrappers, compute Adler-32 for provided dictionary */
|
|
if (wrap == 1)
|
|
strm->adler = adler32(strm->adler, dictionary, dictLength);
|
|
+ DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
|
s->wrap = 0; /* avoid computing Adler-32 in read_buf */
|
|
|
|
/* if dictionary would fill window, just replace the history */
|
|
@@ -488,6 +503,7 @@ int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength)
|
|
|
|
if (deflateStateCheck(strm))
|
|
return Z_STREAM_ERROR;
|
|
+ DEFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
|
s = strm->state;
|
|
len = s->strstart + s->lookahead;
|
|
if (len > s->w_size)
|
|
@@ -534,6 +550,8 @@ int ZEXPORT deflateResetKeep(strm)
|
|
|
|
_tr_init(s);
|
|
|
|
+ DEFLATE_RESET_KEEP_HOOK(strm);
|
|
+
|
|
return Z_OK;
|
|
}
|
|
|
|
@@ -609,6 +627,7 @@ int ZEXPORT deflateParams(strm, level, strategy)
|
|
{
|
|
deflate_state *s;
|
|
compress_func func;
|
|
+ int hook_flush = Z_NO_FLUSH;
|
|
|
|
if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
s = strm->state;
|
|
@@ -621,15 +640,18 @@ int ZEXPORT deflateParams(strm, level, strategy)
|
|
if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
|
|
return Z_STREAM_ERROR;
|
|
}
|
|
+ DEFLATE_PARAMS_HOOK(strm, level, strategy, &hook_flush);
|
|
func = configuration_table[s->level].func;
|
|
|
|
- if ((strategy != s->strategy || func != configuration_table[level].func) &&
|
|
- s->last_flush != -2) {
|
|
+ if (((strategy != s->strategy || func != configuration_table[level].func) &&
|
|
+ s->last_flush != -2) || hook_flush != Z_NO_FLUSH) {
|
|
/* Flush the last buffer: */
|
|
- int err = deflate(strm, Z_BLOCK);
|
|
+ int flush = RANK(hook_flush) > RANK(Z_BLOCK) ? hook_flush : Z_BLOCK;
|
|
+ int err = deflate(strm, flush);
|
|
if (err == Z_STREAM_ERROR)
|
|
return err;
|
|
- if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead)
|
|
+ if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead ||
|
|
+ !DEFLATE_DONE(strm, flush))
|
|
return Z_BUF_ERROR;
|
|
}
|
|
if (s->level != level) {
|
|
@@ -705,11 +727,13 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
|
|
~13% overhead plus a small constant */
|
|
fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) +
|
|
(sourceLen >> 9) + 4;
|
|
+ DEFLATE_BOUND_ADJUST_COMPLEN(strm, fixedlen, sourceLen);
|
|
|
|
/* upper bound for stored blocks with length 127 (memLevel == 1) --
|
|
~4% overhead plus a small constant */
|
|
storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) +
|
|
(sourceLen >> 11) + 7;
|
|
+ DEFLATE_BOUND_ADJUST_COMPLEN(strm, storelen, sourceLen);
|
|
|
|
/* if can't get parameters, return larger bound plus a zlib wrapper */
|
|
if (deflateStateCheck(strm))
|
|
@@ -751,7 +775,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
|
|
}
|
|
|
|
/* if not default parameters, return one of the conservative bounds */
|
|
- if (s->w_bits != 15 || s->hash_bits != 8 + 7)
|
|
+ if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) ||
|
|
+ s->w_bits != 15 || s->hash_bits != 8 + 7)
|
|
return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
|
|
wraplen;
|
|
|
|
@@ -780,7 +805,7 @@ local void putShortMSB(s, b)
|
|
* applications may wish to modify it to avoid allocating a large
|
|
* strm->next_out buffer and copying into it. (See also read_buf()).
|
|
*/
|
|
-local void flush_pending(strm)
|
|
+void ZLIB_INTERNAL flush_pending(strm)
|
|
z_streamp strm;
|
|
{
|
|
unsigned len;
|
|
@@ -1052,7 +1077,8 @@ int ZEXPORT deflate(strm, flush)
|
|
(flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
|
|
block_state bstate;
|
|
|
|
- bstate = s->level == 0 ? deflate_stored(s, flush) :
|
|
+ bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate :
|
|
+ s->level == 0 ? deflate_stored(s, flush) :
|
|
s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
|
|
s->strategy == Z_RLE ? deflate_rle(s, flush) :
|
|
(*(configuration_table[s->level].func))(s, flush);
|
|
@@ -1099,7 +1125,6 @@ int ZEXPORT deflate(strm, flush)
|
|
}
|
|
|
|
if (flush != Z_FINISH) return Z_OK;
|
|
- if (s->wrap <= 0) return Z_STREAM_END;
|
|
|
|
/* Write the trailer */
|
|
#ifdef GZIP
|
|
@@ -1115,7 +1140,7 @@ int ZEXPORT deflate(strm, flush)
|
|
}
|
|
else
|
|
#endif
|
|
- {
|
|
+ if (s->wrap == 1) {
|
|
putShortMSB(s, (uInt)(strm->adler >> 16));
|
|
putShortMSB(s, (uInt)(strm->adler & 0xffff));
|
|
}
|
|
@@ -1124,7 +1149,11 @@ int ZEXPORT deflate(strm, flush)
|
|
* to flush the rest.
|
|
*/
|
|
if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
|
|
- return s->pending != 0 ? Z_OK : Z_STREAM_END;
|
|
+ if (s->pending == 0) {
|
|
+ Assert(s->bi_valid == 0, "bi_buf not flushed");
|
|
+ return Z_STREAM_END;
|
|
+ }
|
|
+ return Z_OK;
|
|
}
|
|
|
|
/* ========================================================================= */
|
|
@@ -1141,9 +1170,9 @@ int ZEXPORT deflateEnd(strm)
|
|
TRY_FREE(strm, strm->state->pending_buf);
|
|
TRY_FREE(strm, strm->state->head);
|
|
TRY_FREE(strm, strm->state->prev);
|
|
- TRY_FREE(strm, strm->state->window);
|
|
+ TRY_FREE_WINDOW(strm, strm->state->window);
|
|
|
|
- ZFREE(strm, strm->state);
|
|
+ ZFREE_STATE(strm, strm->state);
|
|
strm->state = Z_NULL;
|
|
|
|
return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
|
|
@@ -1173,13 +1202,13 @@ int ZEXPORT deflateCopy(dest, source)
|
|
|
|
zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
|
|
|
|
- ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
|
|
+ ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state));
|
|
if (ds == Z_NULL) return Z_MEM_ERROR;
|
|
dest->state = (struct internal_state FAR *) ds;
|
|
- zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
|
|
+ ZCOPY_STATE((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
|
|
ds->strm = dest;
|
|
|
|
- ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
|
|
+ ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte));
|
|
ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
|
|
ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
|
|
ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
|
|
@@ -1226,7 +1255,8 @@ local unsigned read_buf(strm, buf, size)
|
|
strm->avail_in -= len;
|
|
|
|
zmemcpy(buf, strm->next_in, len);
|
|
- if (strm->state->wrap == 1) {
|
|
+ if (!DEFLATE_NEED_CHECKSUM(strm)) {}
|
|
+ else if (strm->state->wrap == 1) {
|
|
strm->adler = adler32(strm->adler, buf, len);
|
|
}
|
|
#ifdef GZIP
|
|
diff --git a/deflate.h b/deflate.h
|
|
index 1a06cd5f2..f92750ca6 100644
|
|
--- a/deflate.h
|
|
+++ b/deflate.h
|
|
@@ -299,6 +299,7 @@ void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
|
|
void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
|
|
void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
|
|
ulg stored_len, int last));
|
|
+void ZLIB_INTERNAL _tr_send_bits OF((deflate_state *s, int value, int length));
|
|
|
|
#define d_code(dist) \
|
|
((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
|
|
@@ -343,4 +344,15 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
|
|
flush = _tr_tally(s, distance, length)
|
|
#endif
|
|
|
|
+typedef enum {
|
|
+ need_more, /* block not completed, need more input or more output */
|
|
+ block_done, /* block flush performed */
|
|
+ finish_started, /* finish started, need only more output at next deflate */
|
|
+ finish_done /* finish done, accept no more input or output */
|
|
+} block_state;
|
|
+
|
|
+unsigned ZLIB_INTERNAL bi_reverse OF((unsigned code, int len));
|
|
+void ZLIB_INTERNAL bi_windup OF((deflate_state *s));
|
|
+void ZLIB_INTERNAL flush_pending OF((z_streamp strm));
|
|
+
|
|
#endif /* DEFLATE_H */
|
|
diff --git a/gzguts.h b/gzguts.h
|
|
index 57faf3716..581f2b631 100644
|
|
--- a/gzguts.h
|
|
+++ b/gzguts.h
|
|
@@ -153,7 +153,11 @@
|
|
|
|
/* default i/o buffer size -- double this for output when reading (this and
|
|
twice this must be able to fit in an unsigned type) */
|
|
+#ifdef DFLTCC
|
|
+#define GZBUFSIZE 131072
|
|
+#else
|
|
#define GZBUFSIZE 8192
|
|
+#endif
|
|
|
|
/* gzip modes, also provide a little integrity check on the passed structure */
|
|
#define GZ_NONE 0
|
|
diff --git a/inflate.c b/inflate.c
|
|
index 8acbef44e..19dc7242f 100644
|
|
--- a/inflate.c
|
|
+++ b/inflate.c
|
|
@@ -85,6 +85,27 @@
|
|
#include "inflate.h"
|
|
#include "inffast.h"
|
|
|
|
+/* architecture-specific bits */
|
|
+#ifdef DFLTCC
|
|
+# include "contrib/s390/dfltcc.h"
|
|
+#else
|
|
+#define ZALLOC_STATE ZALLOC
|
|
+#define ZFREE_STATE ZFREE
|
|
+#define ZCOPY_STATE zmemcpy
|
|
+#define ZALLOC_WINDOW ZALLOC
|
|
+#define ZCOPY_WINDOW zmemcpy
|
|
+#define ZFREE_WINDOW ZFREE
|
|
+#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
|
|
+#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0)
|
|
+#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0)
|
|
+#define INFLATE_NEED_CHECKSUM(strm) 1
|
|
+#define INFLATE_NEED_UPDATEWINDOW(strm) 1
|
|
+#define INFLATE_MARK_HOOK(strm) do {} while (0)
|
|
+#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0)
|
|
+#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
|
+#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
|
+#endif
|
|
+
|
|
#ifdef MAKEFIXED
|
|
# ifndef BUILDFIXED
|
|
# define BUILDFIXED
|
|
@@ -138,6 +159,7 @@ z_streamp strm;
|
|
state->lencode = state->distcode = state->next = state->codes;
|
|
state->sane = 1;
|
|
state->back = -1;
|
|
+ INFLATE_RESET_KEEP_HOOK(strm);
|
|
Tracev((stderr, "inflate: reset\n"));
|
|
return Z_OK;
|
|
}
|
|
@@ -185,7 +207,7 @@ int windowBits;
|
|
if (windowBits && (windowBits < 8 || windowBits > 15))
|
|
return Z_STREAM_ERROR;
|
|
if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
|
|
- ZFREE(strm, state->window);
|
|
+ ZFREE_WINDOW(strm, state->window);
|
|
state->window = Z_NULL;
|
|
}
|
|
|
|
@@ -224,7 +246,7 @@ int stream_size;
|
|
strm->zfree = zcfree;
|
|
#endif
|
|
state = (struct inflate_state FAR *)
|
|
- ZALLOC(strm, 1, sizeof(struct inflate_state));
|
|
+ ZALLOC_STATE(strm, 1, sizeof(struct inflate_state));
|
|
if (state == Z_NULL) return Z_MEM_ERROR;
|
|
Tracev((stderr, "inflate: allocated\n"));
|
|
strm->state = (struct internal_state FAR *)state;
|
|
@@ -233,7 +255,7 @@ int stream_size;
|
|
state->mode = HEAD; /* to pass state test in inflateReset2() */
|
|
ret = inflateReset2(strm, windowBits);
|
|
if (ret != Z_OK) {
|
|
- ZFREE(strm, state);
|
|
+ ZFREE_STATE(strm, state);
|
|
strm->state = Z_NULL;
|
|
}
|
|
return ret;
|
|
@@ -255,6 +277,7 @@ int value;
|
|
struct inflate_state FAR *state;
|
|
|
|
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ INFLATE_PRIME_HOOK(strm, bits, value);
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
if (bits < 0) {
|
|
state->hold = 0;
|
|
@@ -382,6 +405,27 @@ void makefixed()
|
|
}
|
|
#endif /* MAKEFIXED */
|
|
|
|
+int ZLIB_INTERNAL inflate_ensure_window(state)
|
|
+ struct inflate_state *state;
|
|
+{
|
|
+ /* if it hasn't been done already, allocate space for the window */
|
|
+ if (state->window == Z_NULL) {
|
|
+ state->window = (unsigned char FAR *)
|
|
+ ZALLOC_WINDOW(state->strm, 1U << state->wbits,
|
|
+ sizeof(unsigned char));
|
|
+ if (state->window == Z_NULL) return 1;
|
|
+ }
|
|
+
|
|
+ /* if window not in use yet, initialize */
|
|
+ if (state->wsize == 0) {
|
|
+ state->wsize = 1U << state->wbits;
|
|
+ state->wnext = 0;
|
|
+ state->whave = 0;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
/*
|
|
Update the window with the last wsize (normally 32K) bytes written before
|
|
returning. If window does not exist yet, create it. This is only called
|
|
@@ -406,20 +450,7 @@ unsigned copy;
|
|
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
|
|
- /* if it hasn't been done already, allocate space for the window */
|
|
- if (state->window == Z_NULL) {
|
|
- state->window = (unsigned char FAR *)
|
|
- ZALLOC(strm, 1U << state->wbits,
|
|
- sizeof(unsigned char));
|
|
- if (state->window == Z_NULL) return 1;
|
|
- }
|
|
-
|
|
- /* if window not in use yet, initialize */
|
|
- if (state->wsize == 0) {
|
|
- state->wsize = 1U << state->wbits;
|
|
- state->wnext = 0;
|
|
- state->whave = 0;
|
|
- }
|
|
+ if (inflate_ensure_window(state)) return 1;
|
|
|
|
/* copy state->wsize or less output bytes into the circular window */
|
|
if (copy >= state->wsize) {
|
|
@@ -863,6 +894,7 @@ int flush;
|
|
if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
|
|
/* fallthrough */
|
|
case TYPEDO:
|
|
+ INFLATE_TYPEDO_HOOK(strm, flush);
|
|
if (state->last) {
|
|
BYTEBITS();
|
|
state->mode = CHECK;
|
|
@@ -1224,7 +1256,7 @@ int flush;
|
|
out -= left;
|
|
strm->total_out += out;
|
|
state->total += out;
|
|
- if ((state->wrap & 4) && out)
|
|
+ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
|
|
strm->adler = state->check =
|
|
UPDATE_CHECK(state->check, put - out, out);
|
|
out = left;
|
|
@@ -1279,8 +1311,9 @@ int flush;
|
|
*/
|
|
inf_leave:
|
|
RESTORE();
|
|
- if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
|
|
- (state->mode < CHECK || flush != Z_FINISH)))
|
|
+ if (INFLATE_NEED_UPDATEWINDOW(strm) &&
|
|
+ (state->wsize || (out != strm->avail_out && state->mode < BAD &&
|
|
+ (state->mode < CHECK || flush != Z_FINISH))))
|
|
if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
|
|
state->mode = MEM;
|
|
return Z_MEM_ERROR;
|
|
@@ -1290,7 +1323,7 @@ int flush;
|
|
strm->total_in += in;
|
|
strm->total_out += out;
|
|
state->total += out;
|
|
- if ((state->wrap & 4) && out)
|
|
+ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
|
|
strm->adler = state->check =
|
|
UPDATE_CHECK(state->check, strm->next_out - out, out);
|
|
strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
|
|
@@ -1308,8 +1341,8 @@ z_streamp strm;
|
|
if (inflateStateCheck(strm))
|
|
return Z_STREAM_ERROR;
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
- if (state->window != Z_NULL) ZFREE(strm, state->window);
|
|
- ZFREE(strm, strm->state);
|
|
+ if (state->window != Z_NULL) ZFREE_WINDOW(strm, state->window);
|
|
+ ZFREE_STATE(strm, strm->state);
|
|
strm->state = Z_NULL;
|
|
Tracev((stderr, "inflate: end\n"));
|
|
return Z_OK;
|
|
@@ -1326,6 +1359,8 @@ uInt *dictLength;
|
|
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
|
|
+ INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
|
+
|
|
/* copy dictionary */
|
|
if (state->whave && dictionary != Z_NULL) {
|
|
zmemcpy(dictionary, state->window + state->wnext,
|
|
@@ -1361,6 +1396,8 @@ uInt dictLength;
|
|
return Z_DATA_ERROR;
|
|
}
|
|
|
|
+ INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
|
+
|
|
/* copy dictionary to window using updatewindow(), which will amend the
|
|
existing dictionary if appropriate */
|
|
ret = updatewindow(strm, dictionary + dictLength, dictLength);
|
|
@@ -1488,6 +1525,7 @@ z_streamp strm;
|
|
struct inflate_state FAR *state;
|
|
|
|
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ INFLATE_SYNC_POINT_HOOK(strm);
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
return state->mode == STORED && state->bits == 0;
|
|
}
|
|
@@ -1508,21 +1546,22 @@ z_streamp source;
|
|
|
|
/* allocate space */
|
|
copy = (struct inflate_state FAR *)
|
|
- ZALLOC(source, 1, sizeof(struct inflate_state));
|
|
+ ZALLOC_STATE(source, 1, sizeof(struct inflate_state));
|
|
if (copy == Z_NULL) return Z_MEM_ERROR;
|
|
window = Z_NULL;
|
|
if (state->window != Z_NULL) {
|
|
window = (unsigned char FAR *)
|
|
- ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
|
|
+ ZALLOC_WINDOW(source, 1U << state->wbits,
|
|
+ sizeof(unsigned char));
|
|
if (window == Z_NULL) {
|
|
- ZFREE(source, copy);
|
|
+ ZFREE_STATE(source, copy);
|
|
return Z_MEM_ERROR;
|
|
}
|
|
}
|
|
|
|
/* copy state */
|
|
zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
|
|
- zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
|
|
+ ZCOPY_STATE((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
|
|
copy->strm = dest;
|
|
if (state->lencode >= state->codes &&
|
|
state->lencode <= state->codes + ENOUGH - 1) {
|
|
@@ -1531,8 +1570,7 @@ z_streamp source;
|
|
}
|
|
copy->next = copy->codes + (state->next - state->codes);
|
|
if (window != Z_NULL) {
|
|
- wsize = 1U << state->wbits;
|
|
- zmemcpy(window, state->window, wsize);
|
|
+ ZCOPY_WINDOW(window, state->window, 1U << state->wbits);
|
|
}
|
|
copy->window = window;
|
|
dest->state = (struct internal_state FAR *)copy;
|
|
@@ -1579,6 +1617,7 @@ z_streamp strm;
|
|
|
|
if (inflateStateCheck(strm))
|
|
return -(1L << 16);
|
|
+ INFLATE_MARK_HOOK(strm);
|
|
state = (struct inflate_state FAR *)strm->state;
|
|
return (long)(((unsigned long)((long)state->back)) << 16) +
|
|
(state->mode == COPY ? state->length :
|
|
diff --git a/inflate.h b/inflate.h
|
|
index f127b6b1f..519ed3535 100644
|
|
--- a/inflate.h
|
|
+++ b/inflate.h
|
|
@@ -124,3 +124,5 @@ struct inflate_state {
|
|
int back; /* bits back of last unprocessed length/lit */
|
|
unsigned was; /* initial length of match */
|
|
};
|
|
+
|
|
+int ZLIB_INTERNAL inflate_ensure_window OF((struct inflate_state *state));
|
|
diff --git a/test/infcover.c b/test/infcover.c
|
|
index 2be01646c..a208219dc 100644
|
|
--- a/test/infcover.c
|
|
+++ b/test/infcover.c
|
|
@@ -373,7 +373,7 @@ local void cover_support(void)
|
|
mem_setup(&strm);
|
|
strm.avail_in = 0;
|
|
strm.next_in = Z_NULL;
|
|
- ret = inflateInit_(&strm, ZLIB_VERSION - 1, (int)sizeof(z_stream));
|
|
+ ret = inflateInit_(&strm, &ZLIB_VERSION[1], (int)sizeof(z_stream));
|
|
assert(ret == Z_VERSION_ERROR);
|
|
mem_done(&strm, "wrong version");
|
|
|
|
@@ -444,7 +444,7 @@ local void cover_wrap(void)
|
|
}
|
|
|
|
/* input and output functions for inflateBack() */
|
|
-local unsigned pull(void *desc, unsigned char **buf)
|
|
+local unsigned pull(void *desc, z_const unsigned char **buf)
|
|
{
|
|
static unsigned int next = 0;
|
|
static unsigned char dat[] = {0x63, 0, 2, 0};
|
|
diff --git a/test/minigzip.c b/test/minigzip.c
|
|
index a649d2b3d..964408a40 100644
|
|
--- a/test/minigzip.c
|
|
+++ b/test/minigzip.c
|
|
@@ -132,7 +132,11 @@ static void pwinerror (s)
|
|
#endif
|
|
#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1)
|
|
|
|
+#ifdef DFLTCC
|
|
+#define BUFLEN 262144
|
|
+#else
|
|
#define BUFLEN 16384
|
|
+#endif
|
|
#define MAX_NAME_LEN 1024
|
|
|
|
#ifdef MAXSEG_64K
|
|
diff --git a/trees.c b/trees.c
|
|
index 5f305c472..4924bdfc8 100644
|
|
--- a/trees.c
|
|
+++ b/trees.c
|
|
@@ -149,8 +149,6 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
|
|
local void compress_block OF((deflate_state *s, const ct_data *ltree,
|
|
const ct_data *dtree));
|
|
local int detect_data_type OF((deflate_state *s));
|
|
-local unsigned bi_reverse OF((unsigned code, int len));
|
|
-local void bi_windup OF((deflate_state *s));
|
|
local void bi_flush OF((deflate_state *s));
|
|
|
|
#ifdef GEN_TREES_H
|
|
@@ -223,6 +221,13 @@ local void send_bits(s, value, length)
|
|
}
|
|
#endif /* ZLIB_DEBUG */
|
|
|
|
+void ZLIB_INTERNAL _tr_send_bits(s, value, length)
|
|
+ deflate_state *s;
|
|
+ int value;
|
|
+ int length;
|
|
+{
|
|
+ send_bits(s, value, length);
|
|
+}
|
|
|
|
/* the arguments must not have side effects */
|
|
|
|
@@ -1133,7 +1138,7 @@ local int detect_data_type(s)
|
|
* method would use a table)
|
|
* IN assertion: 1 <= len <= 15
|
|
*/
|
|
-local unsigned bi_reverse(code, len)
|
|
+unsigned ZLIB_INTERNAL bi_reverse(code, len)
|
|
unsigned code; /* the value to invert */
|
|
int len; /* its bit length */
|
|
{
|
|
@@ -1165,7 +1170,7 @@ local void bi_flush(s)
|
|
/* ===========================================================================
|
|
* Flush the bit buffer and align the output on a byte boundary
|
|
*/
|
|
-local void bi_windup(s)
|
|
+void ZLIB_INTERNAL bi_windup(s)
|
|
deflate_state *s;
|
|
{
|
|
if (s->bi_valid > 8) {
|
|
diff --git a/zutil.h b/zutil.h
|
|
index 0bc7f4ecd..75eb4df47 100644
|
|
--- a/zutil.h
|
|
+++ b/zutil.h
|
|
@@ -87,6 +87,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
|
|
|
|
#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
|
|
|
|
+#define ZLIB_WRAPLEN 6 /* zlib format overhead */
|
|
+
|
|
/* target dependencies */
|
|
|
|
#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
|