commit e941bdc25dea71431985feb186732a34036900e600f1fa7c7f8af34d2625c480 Author: Adrian Schröter Date: Sat May 4 02:38:40 2024 +0200 Sync from SUSE:SLFO:Main zlib revision 36f39b6d9d335ce320e489faa36e0710 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/0001-Do-not-try-to-store-negative-values-in-unsigned-int.patch b/0001-Do-not-try-to-store-negative-values-in-unsigned-int.patch new file mode 100644 index 0000000..360acf3 --- /dev/null +++ b/0001-Do-not-try-to-store-negative-values-in-unsigned-int.patch @@ -0,0 +1,28 @@ +From bb5eec59e3ec66f6dccb79b1900aa806a1cca12e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Chv=C3=A1tal?= +Date: Fri, 8 Dec 2017 14:59:08 +0100 +Subject: [PATCH] Do not try to store negative values in unsigned int + +--- + deflate.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/deflate.c b/deflate.c +index 1ec7614..1bad1eb 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -1536,7 +1536,10 @@ local void fill_window(s) + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { +- uInt str = s->strstart - s->insert; ++ uInt str = 0; ++ /* storing negative values to uInt is not good idea */ ++ if (s->strstart - s->insert > 0) ++ str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); + #if MIN_MATCH != 3 +-- +2.15.1 + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..97d3f67 --- /dev/null +++ b/LICENSE @@ -0,0 +1,354 @@ +The zlib library itself is released under specific zlib license see +zlib.h in tarball + +However files (relative to extracted source tarballs root) + contrib/asm586/match.S + contrib/masm686/match.asm + contrib/masmx86/gvmat32.asm + contrib/ada/zlib.ads + +which are not used to build zlib library, but are shipped in source +of zlib are release under the GNU General Public License, see the +text of the GNU GPL below. + +--------------------------------------------------------------- + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..70c6c07 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1,14 @@ +libz1 + targettype x86 provides "baselibs-x86:/lib/libz.so.1" + provides "zlib- = " + obsoletes "zlib- <= 1.2.8" +zlib-devel + requires "glibc-devel-" + requires "libz1- = " +zlib-devel-static + requires "zlib-devel- = " + targettype x86-64-v2 -/.* + targettype x86-64-v3 -/.* + targettype x86-64-v4 -/.* +libminizip1 + targettype x86 provides "baselibs-x86:/lib/libminizip.so.1" diff --git a/bsc1210593.patch b/bsc1210593.patch new file mode 100644 index 0000000..c6c5ec3 --- /dev/null +++ b/bsc1210593.patch @@ -0,0 +1,13 @@ +Index: zlib-1.2.11/contrib/s390/dfltcc_deflate.h +=================================================================== +--- zlib-1.2.11.orig/contrib/s390/dfltcc_deflate.h ++++ zlib-1.2.11/contrib/s390/dfltcc_deflate.h +@@ -45,7 +45,7 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dic + #define DEFLATE_DONE dfltcc_deflate_done + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ +- if (dfltcc_can_deflate((strm))) \ ++ if (deflateStateCheck((strm)) || dfltcc_can_deflate((strm))) \ + (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ + } while (0) + #define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) diff --git a/minizip-dont-install-crypt-header.patch b/minizip-dont-install-crypt-header.patch new file mode 100644 index 0000000..fee3585 --- /dev/null +++ b/minizip-dont-install-crypt-header.patch @@ -0,0 +1,32 @@ +From e490ddad3091574a0c2e3b5a66a8fee9a7ab212f Mon Sep 17 00:00:00 2001 +From: Pavel Raiskup +Date: Sun, 19 Feb 2017 16:42:12 +0100 +Subject: [PATCH] minizip: don't install crypt.h + +People did mistakenly or unintentionally include crypt.h before, +don't install this internal header from now. +--- + contrib/minizip/Makefile.am | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/contrib/minizip/Makefile.am b/contrib/minizip/Makefile.am +index d343011eb..4f4661023 100644 +--- a/contrib/minizip/Makefile.am ++++ b/contrib/minizip/Makefile.am +@@ -26,13 +26,15 @@ libminizip_la_LDFLAGS = $(AM_LDFLAGS) -version-info 1:0:0 -lz + + minizip_includedir = $(includedir)/minizip + minizip_include_HEADERS = \ +- crypt.h \ + ioapi.h \ + mztools.h \ + unzip.h \ + zip.h \ + ${iowin32_h} + ++noinst_HEADERS = \ ++ crypt.h ++ + pkgconfigdir = $(libdir)/pkgconfig + pkgconfig_DATA = minizip.pc + diff --git a/zlib-1.2.11-covscan-issues-rhel9.patch b/zlib-1.2.11-covscan-issues-rhel9.patch new file mode 100644 index 0000000..76cd370 --- /dev/null +++ b/zlib-1.2.11-covscan-issues-rhel9.patch @@ -0,0 +1,28 @@ +From a7d3c3076dc316f1408f56af86a72a17fcfdf5dd Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Wed, 27 Apr 2022 14:37:54 +0200 +Subject: [PATCH] zlib-1.2.11-covscan-issues-rhel9.patch + +--- + contrib/minizip/mztools.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/contrib/minizip/mztools.c b/contrib/minizip/mztools.c +index 96891c2e0..1197928a7 100644 +--- a/contrib/minizip/mztools.c ++++ b/contrib/minizip/mztools.c +@@ -286,6 +286,14 @@ uLong* bytesRecovered; + } + } else { + err = Z_STREAM_ERROR; ++ if(fpZip != NULL) ++ fclose(fpZip); ++ ++ if(fpOut != NULL) ++ fclose(fpOut); ++ ++ if(fpOutCD != NULL) ++ fclose(fpOutCD); + } + return err; + } diff --git a/zlib-1.2.11-covscan-issues.patch b/zlib-1.2.11-covscan-issues.patch new file mode 100644 index 0000000..82cf360 --- /dev/null +++ b/zlib-1.2.11-covscan-issues.patch @@ -0,0 +1,22 @@ +From e4c0c07385f80e260f1f1aa2a80c41c62754b9d4 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Wed, 27 Apr 2022 14:37:39 +0200 +Subject: [PATCH] zlib-1.2.11-covscan-issues.patch + +--- + deflate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/deflate.c b/deflate.c +index ac0b865a4..49f056a00 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -1062,7 +1062,7 @@ int ZEXPORT deflate (strm, flush) + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { +- block_state bstate; ++ block_state bstate = 0; + + bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : + s->level == 0 ? deflate_stored(s, flush) : diff --git a/zlib-1.2.12-add-optimized-slide_hash-for-power.patch b/zlib-1.2.12-add-optimized-slide_hash-for-power.patch new file mode 100644 index 0000000..2b973c7 --- /dev/null +++ b/zlib-1.2.12-add-optimized-slide_hash-for-power.patch @@ -0,0 +1,217 @@ +From 4a8d89ae49aa17d1634a2816c8d159f533a07eae Mon Sep 17 00:00:00 2001 +From: Matheus Castanho +Date: Wed, 27 Nov 2019 10:18:10 -0300 +Subject: [PATCH] Add optimized slide_hash for Power + +Considerable time is spent on deflate.c:slide_hash() during +deflate. This commit introduces a new slide_hash function that +uses VSX vector instructions to slide 8 hash elements at a time, +instead of just one as the standard code does. + +The choice between the optimized and default versions is made only +on the first call to the function, enabling a fallback to standard +behavior if the host processor does not support VSX instructions, +so the same binary can be used for multiple Power processor +versions. + +Author: Matheus Castanho +--- + CMakeLists.txt | 3 +- + Makefile.in | 8 ++++ + configure | 4 +- + contrib/power/power.h | 3 ++ + contrib/power/slide_hash_power8.c | 63 +++++++++++++++++++++++++++++ + contrib/power/slide_hash_resolver.c | 15 +++++++ + deflate.c | 12 ++++++ + 7 files changed, 105 insertions(+), 3 deletions(-) + create mode 100644 contrib/power/slide_hash_power8.c + create mode 100644 contrib/power/slide_hash_resolver.c + +Index: zlib-1.2.13/CMakeLists.txt +=================================================================== +--- zlib-1.2.13.orig/CMakeLists.txt ++++ zlib-1.2.13/CMakeLists.txt +@@ -174,7 +174,8 @@ if(CMAKE_COMPILER_IS_GNUCC) + add_definitions(-DZ_POWER8) + set(ZLIB_POWER8 + contrib/power/adler32_power8.c +- contrib/power/crc32_z_power8.c) ++ contrib/power/crc32_z_power8.c ++ contrib/power/slide_hash_power8.c) + + set_source_files_properties( + ${ZLIB_POWER8} +Index: zlib-1.2.13/Makefile.in +=================================================================== +--- zlib-1.2.13.orig/Makefile.in ++++ zlib-1.2.13/Makefile.in +@@ -185,6 +185,9 @@ crc32-vx.o: $(SRCDIR)contrib/s390/crc32- + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + ++slide_hash_power8.o: $(SRCDIR)contrib/power/slide_hash_power8.c ++ $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/slide_hash_power8.c ++ + infback.o: $(SRCDIR)infback.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c + +@@ -252,6 +255,11 @@ deflate.lo: $(SRCDIR)deflate.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c + -@mv objs/deflate.o $@ + ++slide_hash_power8.lo: $(SRCDIR)contrib/power/slide_hash_power8.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/slide_hash_power8.o $(SRCDIR)contrib/power/slide_hash_power8.c ++ -@mv objs/slide_hash_power8.o $@ ++ + infback.lo: $(SRCDIR)infback.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c +Index: zlib-1.2.13/configure +=================================================================== +--- zlib-1.2.13.orig/configure ++++ zlib-1.2.13/configure +@@ -898,8 +898,8 @@ if tryboth $CC -c $CFLAGS $test.c; then + + if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then + POWER8="-DZ_POWER8" +- PIC_OBJC="${PIC_OBJC} adler32_power8.lo crc32_z_power8.lo" +- OBJC="${OBJC} adler32_power8.o crc32_z_power8.o" ++ PIC_OBJC="${PIC_OBJC} adler32_power8.lo crc32_z_power8.lo slide_hash_power8.lo" ++ OBJC="${OBJC} adler32_power8.o crc32_z_power8.o slide_hash_power8.o" + echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log + else + echo "Checking for -mcpu=power8 support... No." | tee -a configure.log +Index: zlib-1.2.13/contrib/power/power.h +=================================================================== +--- zlib-1.2.13.orig/contrib/power/power.h ++++ zlib-1.2.13/contrib/power/power.h +@@ -4,7 +4,10 @@ + */ + #include "../../zconf.h" + #include "../../zutil.h" ++#include "../../deflate.h" + + uLong _adler32_power8(uLong adler, const Bytef* buf, uInt len); + + unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t); ++ ++void _slide_hash_power8(deflate_state *s); +Index: zlib-1.2.13/contrib/power/slide_hash_power8.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/power/slide_hash_power8.c +@@ -0,0 +1,63 @@ ++ /* Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "../../deflate.h" ++ ++local inline void slide_hash_power8_loop OF((deflate_state *s, ++ unsigned n_elems, Posf *table_end)) __attribute__((always_inline)); ++ ++local void slide_hash_power8_loop( ++ deflate_state *s, ++ unsigned n_elems, ++ Posf *table_end) ++{ ++ vector unsigned short vw, vm, *vp; ++ unsigned chunks; ++ ++ /* Each vector register (chunk) corresponds to 128 bits == 8 Posf, ++ * so instead of processing each of the n_elems in the hash table ++ * individually, we can do it in chunks of 8 with vector instructions. ++ * ++ * This function is only called from slide_hash_power8(), and both calls ++ * pass n_elems as a power of 2 higher than 2^7, as defined by ++ * deflateInit2_(), so n_elems will always be a multiple of 8. */ ++ chunks = n_elems >> 3; ++ Assert(n_elems % 8 == 0, "Weird hash table size!"); ++ ++ /* This type casting is safe since s->w_size is always <= 64KB ++ * as defined by deflateInit2_() and Posf == unsigned short */ ++ vw[0] = (Posf) s->w_size; ++ vw = vec_splat(vw,0); ++ ++ vp = (vector unsigned short *) table_end; ++ ++ do { ++ /* Processing 8 elements at a time */ ++ vp--; ++ vm = *vp; ++ ++ /* This is equivalent to: m >= w_size ? m - w_size : 0 ++ * Since we are using a saturated unsigned subtraction, any ++ * values that are > w_size will be set to 0, while the others ++ * will be subtracted by w_size. */ ++ *vp = vec_subs(vm,vw); ++ } while (--chunks); ++}; ++ ++void ZLIB_INTERNAL _slide_hash_power8(deflate_state *s) ++{ ++ unsigned n; ++ Posf *p; ++ ++ n = s->hash_size; ++ p = &s->head[n]; ++ slide_hash_power8_loop(s,n,p); ++ ++#ifndef FASTEST ++ n = s->w_size; ++ p = &s->prev[n]; ++ slide_hash_power8_loop(s,n,p); ++#endif ++} +Index: zlib-1.2.13/contrib/power/slide_hash_resolver.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/power/slide_hash_resolver.c +@@ -0,0 +1,15 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../gcc/zifunc.h" ++#include "power.h" ++ ++Z_IFUNC(slide_hash) { ++#ifdef Z_POWER8 ++ if (__builtin_cpu_supports("arch_2_07")) ++ return _slide_hash_power8; ++#endif ++ ++ return slide_hash_default; ++} +Index: zlib-1.2.13/deflate.c +=================================================================== +--- zlib-1.2.13.orig/deflate.c ++++ zlib-1.2.13/deflate.c +@@ -204,6 +204,13 @@ local const config configuration_table[1 + (unsigned)(s->hash_size - 1)*sizeof(*s->head)); \ + } while (0) + ++#ifdef Z_POWER_OPT ++/* Rename function so resolver can use its symbol. The default version will be ++ * returned by the resolver if the host has no support for an optimized version. ++ */ ++#define slide_hash slide_hash_default ++#endif /* Z_POWER_OPT */ ++ + /* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to +@@ -235,6 +242,11 @@ local void slide_hash(s) + #endif + } + ++#ifdef Z_POWER_OPT ++#undef slide_hash ++#include "contrib/power/slide_hash_resolver.c" ++#endif /* Z_POWER_OPT */ ++ + /* ========================================================================= */ + int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; diff --git a/zlib-1.2.12-add-vectorized-longest_match-for-power.patch b/zlib-1.2.12-add-vectorized-longest_match-for-power.patch new file mode 100644 index 0000000..9bdd24b --- /dev/null +++ b/zlib-1.2.12-add-vectorized-longest_match-for-power.patch @@ -0,0 +1,338 @@ +From aecdff0646c7e188b48f6db285d8d63a74f246c1 Mon Sep 17 00:00:00 2001 +From: Matheus Castanho +Date: Tue, 29 Oct 2019 18:04:11 -0300 +Subject: [PATCH] Add vectorized longest_match for Power + +This commit introduces an optimized version of the longest_match +function for Power processors. It uses VSX instructions to match +16 bytes at a time on each comparison, instead of one by one. + +Author: Matheus Castanho +--- + CMakeLists.txt | 3 +- + Makefile.in | 8 + + configure | 4 +- + contrib/power/longest_match_power9.c | 194 +++++++++++++++++++++++++ + contrib/power/longest_match_resolver.c | 15 ++ + contrib/power/power.h | 2 + + deflate.c | 13 ++ + 7 files changed, 236 insertions(+), 3 deletions(-) + create mode 100644 contrib/power/longest_match_power9.c + create mode 100644 contrib/power/longest_match_resolver.c + +Index: zlib-1.2.13/CMakeLists.txt +=================================================================== +--- zlib-1.2.13.orig/CMakeLists.txt ++++ zlib-1.2.13/CMakeLists.txt +@@ -187,7 +187,8 @@ if(CMAKE_COMPILER_IS_GNUCC) + + if(POWER9) + add_definitions(-DZ_POWER9) +- set(ZLIB_POWER9 ) ++ set(ZLIB_POWER9 ++ contrib/power/longest_match_power9.c) + + set_source_files_properties( + ${ZLIB_POWER9} +Index: zlib-1.2.13/Makefile.in +=================================================================== +--- zlib-1.2.13.orig/Makefile.in ++++ zlib-1.2.13/Makefile.in +@@ -185,6 +185,9 @@ crc32-vx.o: $(SRCDIR)contrib/s390/crc32- + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + ++longest_match_power9.o: $(SRCDIR)contrib/power/longest_match_power9.c ++ $(CC) $(CFLAGS) -mcpu=power9 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/longest_match_power9.c ++ + slide_hash_power8.o: $(SRCDIR)contrib/power/slide_hash_power8.c + $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/slide_hash_power8.c + +@@ -255,6 +258,11 @@ deflate.lo: $(SRCDIR)deflate.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c + -@mv objs/deflate.o $@ + ++longest_match_power9.lo: $(SRCDIR)contrib/power/longest_match_power9.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) -mcpu=power9 $(ZINC) -DPIC -c -o objs/longest_match_power9.o $(SRCDIR)contrib/power/longest_match_power9.c ++ -@mv objs/longest_match_power9.o $@ ++ + slide_hash_power8.lo: $(SRCDIR)contrib/power/slide_hash_power8.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/slide_hash_power8.o $(SRCDIR)contrib/power/slide_hash_power8.c +Index: zlib-1.2.13/configure +=================================================================== +--- zlib-1.2.13.orig/configure ++++ zlib-1.2.13/configure +@@ -907,8 +907,8 @@ if tryboth $CC -c $CFLAGS $test.c; then + + if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then + POWER9="-DZ_POWER9" +- PIC_OBJC="${PIC_OBJC}" +- OBJC="${OBJC}" ++ PIC_OBJC="$PIC_OBJC longest_match_power9.lo" ++ OBJC="$OBJC longest_match_power9.o" + echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log + else + echo "Checking for -mcpu=power9 support... No." | tee -a configure.log +Index: zlib-1.2.13/contrib/power/longest_match_power9.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/power/longest_match_power9.c +@@ -0,0 +1,194 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "../../deflate.h" ++ ++local inline int vec_match OF((Bytef* scan, Bytef* match)) ++ __attribute__((always_inline)); ++ ++local inline int vec_match(Bytef* scan, Bytef* match) ++{ ++ vector unsigned char vscan, vmatch, vc; ++ int len; ++ ++ vscan = *((vector unsigned char *) scan); ++ vmatch = *((vector unsigned char *) match); ++ ++ /* Compare 16 bytes at a time. ++ * Each byte of vc will be either all ones or all zeroes, ++ * depending on the result of the comparison ++ */ ++ vc = (vector unsigned char) vec_cmpne(vscan,vmatch); ++ ++ /* Since the index of matching bytes will contain only zeroes ++ * on vc (since we used cmpne), counting the number of consecutive ++ * bytes where LSB == 0 is the same as counting the length of the match. ++ * ++ * There was an issue in the way the vec_cnttz_lsbb builtin was implemented ++ * that got fixed on GCC 12, but now we have to use different builtins ++ * depending on the compiler version. To avoid that, let's use inline asm to ++ * generate the exact instruction we need. ++ */ ++ #ifdef __LITTLE_ENDIAN__ ++ asm volatile("vctzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc)); ++ #else ++ asm volatile("vclzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc)); ++ #endif ++ ++ return len; ++} ++ ++uInt ZLIB_INTERNAL _longest_match_power9(deflate_state *s, IPos cur_match) ++{ ++ unsigned chain_length = s->max_chain_length;/* max hash chain length */ ++ register Bytef *scan = s->window + s->strstart; /* current string */ ++ register Bytef *match; /* matched string */ ++ register int len; /* length of current match */ ++ int best_len = (int)s->prev_length; /* best match length so far */ ++ int nice_match = s->nice_match; /* stop if match long enough */ ++ int mbytes; /* matched bytes inside loop */ ++ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? ++ s->strstart - (IPos)MAX_DIST(s) : 0; ++ /* Stop when cur_match becomes <= limit. To simplify the code, ++ * we prevent matches with the string of window index 0. ++ */ ++ Posf *prev = s->prev; ++ uInt wmask = s->w_mask; ++ ++#if (MAX_MATCH == 258) ++ /* Compare the last two bytes at once. */ ++ register Bytef *strend2 = s->window + s->strstart + MAX_MATCH - 2; ++ register ush scan_end = *(ushf*)(scan+best_len-1); ++#else ++ register Bytef *strend = s->window + s->strstart + MAX_MATCH; ++ register Byte scan_end1 = scan[best_len-1]; ++ register Byte scan_end = scan[best_len]; ++#endif ++ ++ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. ++ * It is easy to get rid of this optimization if necessary. ++ */ ++ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); ++ ++ /* Do not waste too much time if we already have a good match: */ ++ if (s->prev_length >= s->good_match) { ++ chain_length >>= 2; ++ } ++ /* Do not look for matches beyond the end of the input. This is necessary ++ * to make deflate deterministic. ++ */ ++ if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; ++ ++ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); ++ ++ do { ++ Assert(cur_match < s->strstart, "no future"); ++ match = s->window + cur_match; ++ ++ /* Skip to next match if the match length cannot increase ++ * or if the match length is less than 2. Note that the checks below ++ * for insufficient lookahead only occur occasionally for performance ++ * reasons. Therefore uninitialized memory will be accessed, and ++ * conditional jumps will be made that depend on those values. ++ * However the length of the match is limited to the lookahead, so ++ * the output of deflate is not affected by the uninitialized values. ++ */ ++ ++/* MAX_MATCH - 2 should be a multiple of 16 for this optimization to work. */ ++#if (MAX_MATCH == 258) ++ ++ /* Compare ending (2 bytes) and beginning of potential match. ++ * ++ * On Power processors, loading a 16-byte vector takes only 1 extra ++ * cycle compared to a regular byte load. So instead of comparing the ++ * first two bytes and then the rest later if they match, we can compare ++ * the first 16 at once, and when we have a match longer than 2, we will ++ * already have the result of comparing the first 16 bytes saved in mbytes. ++ */ ++ if (*(ushf*)(match+best_len-1) != scan_end || ++ (mbytes = vec_match(scan,match)) < 3) continue; ++ ++ scan += mbytes; ++ match += mbytes; ++ ++ /* In case when we may have a match longer than 16, we perform further ++ * comparisons in chunks of 16 and keep going while all bytes match. ++ */ ++ while(mbytes == 16) { ++ mbytes = vec_match(scan,match); ++ scan += mbytes; ++ match += mbytes; ++ ++ /* We also have to limit the maximum match based on MAX_MATCH. ++ * Since we are comparing 16 bytes at a time and MAX_MATCH == 258 (to ++ * comply with default implementation), we should stop comparing when ++ * we have matched 256 bytes, which happens when scan == strend2. ++ * In this ("rare") case, we have to check the remaining 2 bytes ++ * individually using common load and compare operations. ++ */ ++ if(scan >= strend2) { ++ if(*scan == *match) { ++ if(*++scan == *++match) ++ scan++; ++ } ++ break; ++ } ++ } ++ ++ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); ++ ++ len = (MAX_MATCH - 2) - (int)(strend2 - scan); ++ scan = strend2 - (MAX_MATCH - 2); ++ ++#else /* MAX_MATCH == 258 */ ++ ++ if (match[best_len] != scan_end || ++ match[best_len-1] != scan_end1 || ++ *match != *scan || ++ *++match != scan[1]) continue; ++ ++ /* The check at best_len-1 can be removed because it will be made ++ * again later. (This heuristic is not always a win.) ++ * It is not necessary to compare scan[2] and match[2] since they ++ * are always equal when the other bytes match, given that ++ * the hash keys are equal and that HASH_BITS >= 8. ++ */ ++ scan += 2, match++; ++ Assert(*scan == *match, "match[2]?"); ++ ++ /* We check for insufficient lookahead only every 8th comparison; ++ * the 256th check will be made at strstart+258. ++ */ ++ do { ++ } while (*++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ scan < strend); ++ ++ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); ++ ++ len = MAX_MATCH - (int)(strend - scan); ++ scan = strend - MAX_MATCH; ++ ++#endif /* MAX_MATCH == 258 */ ++ ++ if (len > best_len) { ++ s->match_start = cur_match; ++ best_len = len; ++ if (len >= nice_match) break; ++#if (MAX_MATCH == 258) ++ scan_end = *(ushf*)(scan+best_len-1); ++#else ++ scan_end1 = scan[best_len-1]; ++ scan_end = scan[best_len]; ++#endif ++ } ++ } while ((cur_match = prev[cur_match & wmask]) > limit ++ && --chain_length != 0); ++ ++ if ((uInt)best_len <= s->lookahead) return (uInt)best_len; ++ return s->lookahead; ++} +Index: zlib-1.2.13/contrib/power/longest_match_resolver.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/power/longest_match_resolver.c +@@ -0,0 +1,15 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../gcc/zifunc.h" ++#include "power.h" ++ ++Z_IFUNC(longest_match) { ++#ifdef Z_POWER9 ++ if (__builtin_cpu_supports("arch_3_00")) ++ return _longest_match_power9; ++#endif ++ ++ return longest_match_default; ++} +Index: zlib-1.2.13/contrib/power/power.h +=================================================================== +--- zlib-1.2.13.orig/contrib/power/power.h ++++ zlib-1.2.13/contrib/power/power.h +@@ -10,4 +10,6 @@ uLong _adler32_power8(uLong adler, const + + unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t); + ++uInt _longest_match_power9(deflate_state *s, IPos cur_match); ++ + void _slide_hash_power8(deflate_state *s); +Index: zlib-1.2.13/deflate.c +=================================================================== +--- zlib-1.2.13.orig/deflate.c ++++ zlib-1.2.13/deflate.c +@@ -1313,6 +1313,14 @@ local void lm_init(s) + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ ++ ++#ifdef Z_POWER_OPT ++/* Rename function so resolver can use its symbol. The default version will be ++ * returned by the resolver if the host has no support for an optimized version. ++ */ ++#define longest_match longest_match_default ++#endif /* Z_POWER_OPT */ ++ + local uInt longest_match(s, pcur_match) + deflate_state *s; + IPos pcur_match; /* current match */ +@@ -1460,6 +1468,11 @@ local uInt longest_match(s, pcur_match) + return s->lookahead; + } + ++#ifdef Z_POWER_OPT ++#undef longest_match ++#include "contrib/power/longest_match_resolver.c" ++#endif /* Z_POWER_OPT */ ++ + #else /* FASTEST */ + + /* --------------------------------------------------------------------------- diff --git a/zlib-1.2.12-adler32-vector-optimizations-for-power.patch b/zlib-1.2.12-adler32-vector-optimizations-for-power.patch new file mode 100644 index 0000000..e5dfb38 --- /dev/null +++ b/zlib-1.2.12-adler32-vector-optimizations-for-power.patch @@ -0,0 +1,342 @@ +From 772f4bd0f880c4c193ab7da78728f38821572a02 Mon Sep 17 00:00:00 2001 +From: Rogerio Alves +Date: Mon, 9 Dec 2019 14:40:53 -0300 +Subject: [PATCH] Adler32 vector optimization for Power. + +This commit implements a Power (POWER8+) vector optimization for Adler32 +checksum using VSX (vector) instructions. The VSX adler32 checksum is up +to 10x fast than the adler32 baseline code. + +Author: Rogerio Alves +--- + CMakeLists.txt | 1 + + Makefile.in | 8 ++ + adler32.c | 11 ++ + configure | 4 +- + contrib/power/adler32_power8.c | 196 +++++++++++++++++++++++++++++++ + contrib/power/adler32_resolver.c | 15 +++ + contrib/power/power.h | 4 +- + 7 files changed, 236 insertions(+), 3 deletions(-) + create mode 100644 contrib/power/adler32_power8.c + create mode 100644 contrib/power/adler32_resolver.c + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 581e1fa6d..c6296ee68 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -185,6 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCC) + if(POWER8) + add_definitions(-DZ_POWER8) + set(ZLIB_POWER8 ++ contrib/power/adler32_power8.c + contrib/power/crc32_z_power8.c) + + set_source_files_properties( +diff --git a/Makefile.in b/Makefile.in +index 16943044e..a0ffac860 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -165,6 +165,9 @@ minigzip64.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h + adler32.o: $(SRCDIR)adler32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c + ++adler32_power8.o: $(SRCDIR)contrib/power/adler32_power8.c ++ $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/adler32_power8.c ++ + crc32.o: $(SRCDIR)crc32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c + +@@ -216,6 +219,11 @@ adler32.lo: $(SRCDIR)adler32.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c + -@mv objs/adler32.o $@ + ++adler32_power8.lo: $(SRCDIR)contrib/power/adler32_power8.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/adler32_power8.o $(SRCDIR)contrib/power/adler32_power8.c ++ -@mv objs/adler32_power8.o $@ ++ + crc32.lo: $(SRCDIR)crc32.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c +diff --git a/adler32.c b/adler32.c +index d0be4380a..4bde0fa18 100644 +--- a/adler32.c ++++ b/adler32.c +@@ -131,6 +131,12 @@ uLong ZEXPORT adler32_z(adler, buf, len) + } + + /* ========================================================================= */ ++ ++#ifdef Z_POWER_OPT ++/* Rename the default function to avoid naming conflicts */ ++#define adler32 adler32_default ++#endif /* Z_POWER_OPT */ ++ + uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; +@@ -139,6 +145,11 @@ uLong ZEXPORT adler32(adler, buf, len) + return adler32_z(adler, buf, len); + } + ++#ifdef Z_POWER_OPT ++#undef adler32 ++#include "contrib/power/adler32_resolver.c" ++#endif /* Z_POWER_OPT */ ++ + /* ========================================================================= */ + local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; +diff --git a/configure b/configure +index 914d9f4aa..810a7404d 100755 +--- a/configure ++++ b/configure +@@ -879,8 +879,8 @@ if tryboth $CC -c $CFLAGS $test.c; then + + if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then + POWER8="-DZ_POWER8" +- PIC_OBJC="${PIC_OBJC} crc32_z_power8.lo" +- OBJC="${OBJC} crc32_z_power8.o" ++ PIC_OBJC="${PIC_OBJC} adler32_power8.lo crc32_z_power8.lo" ++ OBJC="${OBJC} adler32_power8.o crc32_z_power8.o" + echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log + else + echo "Checking for -mcpu=power8 support... No." | tee -a configure.log +diff --git a/contrib/power/adler32_power8.c b/contrib/power/adler32_power8.c +new file mode 100644 +index 000000000..473c39457 +--- /dev/null ++++ b/contrib/power/adler32_power8.c +@@ -0,0 +1,196 @@ ++/* ++ * Adler32 for POWER 8+ using VSX instructions. ++ * ++ * Calculate adler32 checksum for 16 bytes at once using POWER8+ VSX (vector) ++ * instructions. ++ * ++ * If adler32 do 1 byte at time on the first iteration s1 is s1_0 (_n means ++ * iteration n) is the initial value of adler - at start _0 is 1 unless ++ * adler initial value is different than 1. So s1_1 = s1_0 + c[0] after ++ * the first calculation. For the iteration s1_2 = s1_1 + c[1] and so on. ++ * Hence, for iteration N, s1_N = s1_(N-1) + c[N] is the value of s1 on ++ * after iteration N. ++ * ++ * Therefore, for s2 and iteration N, s2_N = s2_0 + N*s1_N + N*c[0] + ++ * N-1*c[1] + ... + c[N] ++ * ++ * In a more general way: ++ * ++ * s1_N = s1_0 + sum(i=1 to N)c[i] ++ * s2_N = s2_0 + N*s1 + sum (i=1 to N)(N-i+1)*c[i] ++ * ++ * Where s1_N, s2_N are the values for s1, s2 after N iterations. So if we ++ * can process N-bit at time we can do this at once. ++ * ++ * Since VSX can support 16-bit vector instructions, we can process ++ * 16-bit at time using N = 16 we have: ++ * ++ * s1 = s1_16 = s1_(16-1) + c[16] = s1_0 + sum(i=1 to 16)c[i] ++ * s2 = s2_16 = s2_0 + 16*s1 + sum(i=1 to 16)(16-i+1)*c[i] ++ * ++ * After the first iteration we calculate the adler32 checksum for 16 bytes. ++ * ++ * For more background about adler32 please check the RFC: ++ * https://www.ietf.org/rfc/rfc1950.txt ++ * ++ * Copyright (C) 2019 Rogerio Alves , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ */ ++ ++#include "../../zutil.h" ++#include ++ ++/* Largest prime smaller than 65536. */ ++#define BASE 65521U ++#define NMAX 5552 ++/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1. */ ++ ++#define DO1(s1,s2,buf,i) {(s1) += buf[(i)]; (s2) += (s1);} ++#define DO2(s1,s2,buf,i) {DO1(s1,s2,buf,i); DO1(s1,s2,buf,i+1);} ++#define DO4(s1,s2,buf,i) {DO2(s1,s2,buf,i); DO2(s1,s2,buf,i+2);} ++#define DO8(s1,s2,buf,i) {DO4(s1,s2,buf,i); DO4(s1,s2,buf,i+4);} ++#define DO16(s1,s2,buf) {DO8(s1,s2,buf,0); DO8(s1,s2,buf,8);} ++ ++/* Vector across sum unsigned int (saturate). */ ++inline vector unsigned int vec_sumsu (vector unsigned int __a, ++ vector unsigned int __b) ++{ ++ __b = vec_sld(__a, __a, 8); ++ __b = vec_add(__b, __a); ++ __a = vec_sld(__b, __b, 4); ++ __a = vec_add(__a, __b); ++ ++ return __a; ++} ++ ++uLong ZLIB_INTERNAL _adler32_power8 (uLong adler, const Bytef* buf, uInt len) ++{ ++ /* If buffer is empty or len=0 we need to return adler initial value. */ ++ if (buf == NULL) ++ return 1; ++ ++ unsigned int s1 = adler & 0xffff; ++ unsigned int s2 = (adler >> 16) & 0xffff; ++ ++ /* in case user likes doing a byte at a time, keep it fast */ ++ if (len == 1) { ++ s1 += buf[0]; ++ if (s1 >= BASE) ++ s1 -= BASE; ++ s2 += s1; ++ if (s2 >= BASE) ++ s2 -= BASE; ++ return (s2 << 16) | s1; ++ } ++ ++ /* Keep it fast for short length buffers. */ ++ if (len < 16) { ++ while (len--) { ++ s1 += *buf++; ++ s2 += s1; ++ } ++ if (s1 >= BASE) ++ s1 -= BASE; ++ s2 %= BASE; ++ return (s2 << 16) | s1; ++ } ++ ++ /* This is faster than VSX code for len < 64. */ ++ if (len < 64) { ++ while (len >= 16) { ++ len -= 16; ++ DO16(s1,s2,buf); ++ buf += 16; ++ } ++ } else { ++ /* Use POWER VSX instructions for len >= 64. */ ++ const vector unsigned int v_zeros = { 0 }; ++ const vector unsigned char v_mul = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, ++ 6, 5, 4, 3, 2, 1}; ++ const vector unsigned char vsh = vec_splat_u8(4); ++ const vector unsigned int vmask = {0xffffffff, 0x0, 0x0, 0x0}; ++ vector unsigned int vs1 = vec_xl(0, &s1); ++ vector unsigned int vs2 = vec_xl(0, &s2); ++ vector unsigned int vs1_save = { 0 }; ++ vector unsigned int vsum1, vsum2; ++ vector unsigned char vbuf; ++ int n; ++ ++ /* Zeros the undefined values of vectors vs1, vs2. */ ++ vs1 = vec_and(vs1, vmask); ++ vs2 = vec_and(vs2, vmask); ++ ++ /* Do length bigger than NMAX in blocks of NMAX size. */ ++ while (len >= NMAX) { ++ len -= NMAX; ++ n = NMAX / 16; ++ do { ++ vbuf = vec_xl(0, (unsigned char *) buf); ++ vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */ ++ /* sum(i=1 to 16) buf[i]*(16-i+1). */ ++ vsum2 = vec_msum(vbuf, v_mul, v_zeros); ++ /* Save vs1. */ ++ vs1_save = vec_add(vs1_save, vs1); ++ /* Accumulate the sums. */ ++ vs1 = vec_add(vsum1, vs1); ++ vs2 = vec_add(vsum2, vs2); ++ ++ buf += 16; ++ } while (--n); ++ /* Once each block of NMAX size. */ ++ vs1 = vec_sumsu(vs1, vsum1); ++ vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */ ++ vs2 = vec_add(vs1_save, vs2); ++ vs2 = vec_sumsu(vs2, vsum2); ++ ++ /* vs1[0] = (s1_i + sum(i=1 to 16)buf[i]) mod 65521. */ ++ vs1[0] = vs1[0] % BASE; ++ /* vs2[0] = s2_i + 16*s1_save + ++ sum(i=1 to 16)(16-i+1)*buf[i] mod 65521. */ ++ vs2[0] = vs2[0] % BASE; ++ ++ vs1 = vec_and(vs1, vmask); ++ vs2 = vec_and(vs2, vmask); ++ vs1_save = v_zeros; ++ } ++ ++ /* len is less than NMAX one modulo is needed. */ ++ if (len >= 16) { ++ while (len >= 16) { ++ len -= 16; ++ ++ vbuf = vec_xl(0, (unsigned char *) buf); ++ ++ vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */ ++ /* sum(i=1 to 16) buf[i]*(16-i+1). */ ++ vsum2 = vec_msum(vbuf, v_mul, v_zeros); ++ /* Save vs1. */ ++ vs1_save = vec_add(vs1_save, vs1); ++ /* Accumulate the sums. */ ++ vs1 = vec_add(vsum1, vs1); ++ vs2 = vec_add(vsum2, vs2); ++ ++ buf += 16; ++ } ++ /* Since the size will be always less than NMAX we do this once. */ ++ vs1 = vec_sumsu(vs1, vsum1); ++ vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */ ++ vs2 = vec_add(vs1_save, vs2); ++ vs2 = vec_sumsu(vs2, vsum2); ++ } ++ /* Copy result back to s1, s2 (mod 65521). */ ++ s1 = vs1[0] % BASE; ++ s2 = vs2[0] % BASE; ++ } ++ ++ /* Process tail (len < 16). */ ++ while (len--) { ++ s1 += *buf++; ++ s2 += s1; ++ } ++ s1 %= BASE; ++ s2 %= BASE; ++ ++ return (s2 << 16) | s1; ++} +diff --git a/contrib/power/adler32_resolver.c b/contrib/power/adler32_resolver.c +new file mode 100644 +index 000000000..07a1a2cb2 +--- /dev/null ++++ b/contrib/power/adler32_resolver.c +@@ -0,0 +1,15 @@ ++/* Copyright (C) 2019 Rogerio Alves , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../gcc/zifunc.h" ++#include "power.h" ++ ++Z_IFUNC(adler32) { ++#ifdef Z_POWER8 ++ if (__builtin_cpu_supports("arch_2_07")) ++ return _adler32_power8; ++#endif ++ ++ return adler32_default; ++} +diff --git a/contrib/power/power.h b/contrib/power/power.h +index 79123aa90..f57c76167 100644 +--- a/contrib/power/power.h ++++ b/contrib/power/power.h +@@ -2,7 +2,9 @@ + * 2019 Rogerio Alves , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ +- + #include "../../zconf.h" ++#include "../../zutil.h" ++ ++uLong _adler32_power8(uLong adler, const Bytef* buf, uInt len); + + unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t); diff --git a/zlib-1.2.12-fix-invalid-memory-access-on-ppc-and-ppc64.patch b/zlib-1.2.12-fix-invalid-memory-access-on-ppc-and-ppc64.patch new file mode 100644 index 0000000..d105a20 --- /dev/null +++ b/zlib-1.2.12-fix-invalid-memory-access-on-ppc-and-ppc64.patch @@ -0,0 +1,34 @@ +From 11b722e4ae91b611f605221587ec8e0829c27949 Mon Sep 17 00:00:00 2001 +From: Matheus Castanho +Date: Tue, 23 Jun 2020 10:26:19 -0300 +Subject: [PATCH] Fix invalid memory access on ppc and ppc64 + +--- + contrib/power/adler32_power8.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/contrib/power/adler32_power8.c b/contrib/power/adler32_power8.c +index 473c39457..fdd086453 100644 +--- a/contrib/power/adler32_power8.c ++++ b/contrib/power/adler32_power8.c +@@ -110,16 +110,15 @@ uLong ZLIB_INTERNAL _adler32_power8 (uLong adler, const Bytef* buf, uInt len) + 6, 5, 4, 3, 2, 1}; + const vector unsigned char vsh = vec_splat_u8(4); + const vector unsigned int vmask = {0xffffffff, 0x0, 0x0, 0x0}; +- vector unsigned int vs1 = vec_xl(0, &s1); +- vector unsigned int vs2 = vec_xl(0, &s2); ++ vector unsigned int vs1 = { 0 }; ++ vector unsigned int vs2 = { 0 }; + vector unsigned int vs1_save = { 0 }; + vector unsigned int vsum1, vsum2; + vector unsigned char vbuf; + int n; + +- /* Zeros the undefined values of vectors vs1, vs2. */ +- vs1 = vec_and(vs1, vmask); +- vs2 = vec_and(vs2, vmask); ++ vs1[0] = s1; ++ vs2[0] = s2; + + /* Do length bigger than NMAX in blocks of NMAX size. */ + while (len >= NMAX) { diff --git a/zlib-1.2.12-s390-vectorize-crc32.patch b/zlib-1.2.12-s390-vectorize-crc32.patch new file mode 100644 index 0000000..4477d6e --- /dev/null +++ b/zlib-1.2.12-s390-vectorize-crc32.patch @@ -0,0 +1,423 @@ +From 957bc67cfb4e01403c01fe6243850383183a7c19 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Thu, 19 Mar 2020 11:52:03 +0100 +Subject: [PATCH] s390x: vectorize crc32 + +Use vector extensions when compiling for s390x and binutils knows +about them. At runtime, check whether kernel supports vector +extensions (it has to be not just the CPU, but also the kernel) and +choose between the regular and the vectorized implementations. +--- + Makefile.in | 9 ++ + configure | 28 +++++ + contrib/gcc/zifunc.h | 21 +++- + contrib/s390/crc32-vx.c | 195 ++++++++++++++++++++++++++++++++ + contrib/s390/crc32_z_resolver.c | 41 +++++++ + crc32.c | 11 +- + 6 files changed, 301 insertions(+), 4 deletions(-) + create mode 100644 contrib/s390/crc32-vx.c + create mode 100644 contrib/s390/crc32_z_resolver.c + +Index: zlib-1.2.13/Makefile.in +=================================================================== +--- zlib-1.2.13.orig/Makefile.in ++++ zlib-1.2.13/Makefile.in +@@ -25,6 +25,7 @@ LDFLAGS= + TEST_LDFLAGS=$(LDFLAGS) -L. libz.a + LDSHARED=$(CC) + CPP=$(CC) -E ++VGFMAFLAG= + + STATICLIB=libz.a + SHAREDLIB=libz.so +@@ -175,6 +176,9 @@ crc32.o: $(SRCDIR)crc32.c + crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c + $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c + ++crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c ++ $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c ++ + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + +@@ -225,6 +229,11 @@ crc32.lo: $(SRCDIR)crc32.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c + -@mv objs/crc32.o $@ + ++crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mv objs/crc32-vx.o $@ ++ + crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c +Index: zlib-1.2.13/configure +=================================================================== +--- zlib-1.2.13.orig/configure ++++ zlib-1.2.13/configure +@@ -921,6 +921,32 @@ else + echo "Checking for Power optimizations support... No." | tee -a configure.log + fi + ++# check if we are compiling for s390 and binutils support vector extensions ++VGFMAFLAG=-march=z13 ++cat > $test.c <> configure.log + echo ALL = $ALL >> configure.log +@@ -952,6 +978,7 @@ echo mandir = $mandir >> configure.log + echo prefix = $prefix >> configure.log + echo sharedlibdir = $sharedlibdir >> configure.log + echo uname = $uname >> configure.log ++echo VGFMAFLAG = $VGFMAFLAG >> configure.log + + # udpate Makefile with the configure results + sed < ${SRCDIR}Makefile.in " +@@ -961,6 +988,7 @@ sed < ${SRCDIR}Makefile.in " + /^LDFLAGS *=/s#=.*#=$LDFLAGS# + /^LDSHARED *=/s#=.*#=$LDSHARED# + /^CPP *=/s#=.*#=$CPP# ++/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG# + /^STATICLIB *=/s#=.*#=$STATICLIB# + /^SHAREDLIB *=/s#=.*#=$SHAREDLIB# + /^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV# +Index: zlib-1.2.13/contrib/gcc/zifunc.h +=================================================================== +--- zlib-1.2.13.orig/contrib/gcc/zifunc.h ++++ zlib-1.2.13/contrib/gcc/zifunc.h +@@ -8,9 +8,28 @@ + + /* Helpers for arch optimizations */ + ++#if defined(__clang__) ++#if __has_feature(coverage_sanitizer) ++#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage"))) ++#else /* __has_feature(coverage_sanitizer) */ ++#define Z_IFUNC_NO_SANCOV ++#endif /* __has_feature(coverage_sanitizer) */ ++#else /* __clang__ */ ++#define Z_IFUNC_NO_SANCOV ++#endif /* __clang__ */ ++ ++#ifdef __s390__ ++#define Z_IFUNC_PARAMS unsigned long hwcap ++#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV ++#else /* __s390__ */ ++#define Z_IFUNC_PARAMS void ++#define Z_IFUNC_ATTRS ++#endif /* __s390__ */ ++ + #define Z_IFUNC(fname) \ + typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \ +- local typeof(fname) *fname##_resolver(void) ++ Z_IFUNC_ATTRS \ ++ local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS) + /* This is a helper macro to declare a resolver for an indirect function + * (ifunc). Let's say you have function + * +Index: zlib-1.2.13/contrib/s390/crc32-vx.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/s390/crc32-vx.c +@@ -0,0 +1,195 @@ ++/* ++ * Hardware-accelerated CRC-32 variants for Linux on z Systems ++ * ++ * Use the z/Architecture Vector Extension Facility to accelerate the ++ * computing of bitreflected CRC-32 checksums. ++ * ++ * This CRC-32 implementation algorithm is bitreflected and processes ++ * the least-significant bit first (Little-Endian). ++ * ++ * This code was originally written by Hendrik Brueckner ++ * for use in the Linux kernel and has been ++ * relicensed under the zlib license. ++ */ ++ ++#include "../../zutil.h" ++ ++#include ++#include ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) { ++ /* ++ * The CRC-32 constant block contains reduction constants to fold and ++ * process particular chunks of the input data stream in parallel. ++ * ++ * For the CRC-32 variants, the constants are precomputed according to ++ * these definitions: ++ * ++ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 ++ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 ++ * R3 = [(x128+32 mod P'(x) << 32)]' << 1 ++ * R4 = [(x128-32 mod P'(x) << 32)]' << 1 ++ * R5 = [(x64 mod P'(x) << 32)]' << 1 ++ * R6 = [(x32 mod P'(x) << 32)]' << 1 ++ * ++ * The bitreflected Barret reduction constant, u', is defined as ++ * the bit reversal of floor(x**64 / P(x)). ++ * ++ * where P(x) is the polynomial in the normal domain and the P'(x) is the ++ * polynomial in the reversed (bitreflected) domain. ++ * ++ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: ++ * ++ * P(x) = 0x04C11DB7 ++ * P'(x) = 0xEDB88320 ++ */ ++ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ ++ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ ++ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ ++ const uv2di r5 = {0, 0x163CD6124}; /* R5 */ ++ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ ++ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ ++ ++ /* ++ * Load the initial CRC value. ++ * ++ * The CRC value is loaded into the rightmost word of the ++ * vector register and is later XORed with the LSB portion ++ * of the loaded input data. ++ */ ++ uv2di v0 = {0, 0}; ++ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); ++ ++ /* Load a 64-byte data chunk and XOR with CRC */ ++ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); ++ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); ++ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); ++ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); ++ ++ v1 ^= v0; ++ buf += 64; ++ len -= 64; ++ ++ while (len >= 64) { ++ /* Load the next 64-byte data chunk */ ++ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); ++ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); ++ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); ++ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); ++ ++ /* ++ * Perform a GF(2) multiplication of the doublewords in V1 with ++ * the R1 and R2 reduction constants in V0. The intermediate result ++ * is then folded (accumulated) with the next data chunk in PART1 and ++ * stored in V1. Repeat this step for the register contents ++ * in V2, V3, and V4 respectively. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); ++ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); ++ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); ++ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); ++ ++ buf += 64; ++ len -= 64; ++ } ++ ++ /* ++ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 ++ * and R4 and accumulating the next 128-bit chunk until a single 128-bit ++ * value remains. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); ++ ++ while (len >= 16) { ++ /* Load next data chunk */ ++ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); ++ ++ /* Fold next data chunk */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ ++ buf += 16; ++ len -= 16; ++ } ++ ++ /* ++ * Set up a vector register for byte shifts. The shift value must ++ * be loaded in bits 1-4 in byte element 7 of a vector register. ++ * Shift by 8 bytes: 0x40 ++ * Shift by 4 bytes: 0x20 ++ */ ++ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ v9 = vec_insert((unsigned char)0x40, v9, 7); ++ ++ /* ++ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes ++ * to move R4 into the rightmost doubleword and set the leftmost ++ * doubleword to 0x1. ++ */ ++ v0 = vec_srb(r4r3, (uv2di)v9); ++ v0[0] = 1; ++ ++ /* ++ * Compute GF(2) product of V1 and V0. The rightmost doubleword ++ * of V1 is multiplied with R4. The leftmost doubleword of V1 is ++ * multiplied by 0x1 and is then XORed with rightmost product. ++ * Implicitly, the intermediate leftmost product becomes padded ++ */ ++ v1 = (uv2di)vec_gfmsum_128(v0, v1); ++ ++ /* ++ * Now do the final 32-bit fold by multiplying the rightmost word ++ * in V1 with R5 and XOR the result with the remaining bits in V1. ++ * ++ * To achieve this by a single VGFMAG, right shift V1 by a word ++ * and store the result in V2 which is then accumulated. Use the ++ * vector unpack instruction to load the rightmost half of the ++ * doubleword into the rightmost doubleword element of V1; the other ++ * half is loaded in the leftmost doubleword. ++ * The vector register with CONST_R5 contains the R5 constant in the ++ * rightmost doubleword and the leftmost doubleword is zero to ignore ++ * the leftmost product of V1. ++ */ ++ v9 = vec_insert((unsigned char)0x20, v9, 7); ++ v2 = vec_srb(v1, (uv2di)v9); ++ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); ++ ++ /* ++ * Apply a Barret reduction to compute the final 32-bit CRC value. ++ * ++ * The input values to the Barret reduction are the degree-63 polynomial ++ * in V1 (R(x)), degree-32 generator polynomial, and the reduction ++ * constant u. The Barret reduction result is the CRC value of R(x) mod ++ * P(x). ++ * ++ * The Barret reduction algorithm is defined as: ++ * ++ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u ++ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) ++ * 3. C(x) = R(x) XOR T2(x) mod x^32 ++ * ++ * Note: The leftmost doubleword of vector register containing ++ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product ++ * is zero and does not contribute to the final result. ++ */ ++ ++ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ ++ v2 = vec_unpackl((uv4si)v1); ++ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); ++ ++ /* ++ * Compute the GF(2) product of the CRC polynomial with T1(x) in ++ * V2 and XOR the intermediate result, T2(x), with the value in V1. ++ * The final result is stored in word element 2 of V2. ++ */ ++ v2 = vec_unpackl((uv4si)v2); ++ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); ++ ++ return ((uv4si)v2)[2]; ++} +Index: zlib-1.2.13/contrib/s390/crc32_z_resolver.c +=================================================================== +--- /dev/null ++++ zlib-1.2.13/contrib/s390/crc32_z_resolver.c +@@ -0,0 +1,41 @@ ++#include ++#include "../gcc/zifunc.h" ++ ++#define VX_MIN_LEN 64 ++#define VX_ALIGNMENT 16L ++#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) ++ ++unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len); ++ ++local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len) ++{ ++ uintptr_t prealign, aligned, remaining; ++ ++ if (buf == Z_NULL) return 0UL; ++ ++ if (len < VX_MIN_LEN + VX_ALIGN_MASK) ++ return crc32_z_default(crc, buf, len); ++ ++ if ((uintptr_t)buf & VX_ALIGN_MASK) { ++ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); ++ len -= prealign; ++ crc = crc32_z_default(crc, buf, prealign); ++ buf += prealign; ++ } ++ aligned = len & ~VX_ALIGN_MASK; ++ remaining = len & VX_ALIGN_MASK; ++ ++ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff; ++ ++ if (remaining) ++ crc = crc32_z_default(crc, buf + aligned, remaining); ++ ++ return crc; ++} ++ ++Z_IFUNC(crc32_z) ++{ ++ if (hwcap & HWCAP_S390_VX) ++ return s390_crc32_vx; ++ return crc32_z_default; ++} +Index: zlib-1.2.13/crc32.c +=================================================================== +--- zlib-1.2.13.orig/crc32.c ++++ zlib-1.2.13/crc32.c +@@ -745,12 +745,12 @@ local z_word_t crc_word_big(data) + #endif + + /* ========================================================================= */ +-#ifdef Z_POWER_OPT ++#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) + /* Rename function so resolver can use its symbol. The default version will be + * returned by the resolver if the host has no support for an optimized version. + */ + #define crc32_z crc32_z_default +-#endif /* Z_POWER_OPT */ ++#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + + unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; +@@ -1073,10 +1073,15 @@ unsigned long ZEXPORT crc32_z(crc, buf, + return crc ^ 0xffffffff; + } + +-#ifdef Z_POWER_OPT ++#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) + #undef crc32_z ++#ifdef Z_POWER_OPT + #include "contrib/power/crc32_z_resolver.c" + #endif /* Z_POWER_OPT */ ++#ifdef HAVE_S390X_VX ++#include "contrib/s390/crc32_z_resolver.c" ++#endif /* HAVE_S390X_VX */ ++#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + + #endif + diff --git a/zlib-1.2.13-IBM-Z-hw-accelerated-deflate-s390x.patch b/zlib-1.2.13-IBM-Z-hw-accelerated-deflate-s390x.patch new file mode 100644 index 0000000..36d1215 --- /dev/null +++ b/zlib-1.2.13-IBM-Z-hw-accelerated-deflate-s390x.patch @@ -0,0 +1,5013 @@ +From e382a919a5bce2aa0738e85540942d7af7d6c1c0 Mon Sep 17 00:00:00 2001 +From: Manjunath S Matti +Date: Tue, 15 Nov 2022 02:45:53 -0600 +Subject: [PATCH 1/4] Preparation for Power optimizations + +Optimized functions for Power will make use of GNU indirect functions, +an extension to support different implementations of the same function, +which can be selected during runtime. This will be used to provide +optimized functions for different processor versions. + +Since this is a GNU extension, we placed the definition of the Z_IFUNC +macro under `contrib/gcc`. This can be reused by other archs as well. + +Author: Matheus Castanho +Author: Rogerio Alves +Signed-off-by: Manjunath Matti +--- + CMakeLists.txt | 71 ++++++++++++++++++++++++++++++++++++++++++ + configure | 66 +++++++++++++++++++++++++++++++++++++++ + contrib/README.contrib | 8 +++++ + contrib/gcc/zifunc.h | 60 +++++++++++++++++++++++++++++++++++ + contrib/power/power.h | 4 +++ + 5 files changed, 209 insertions(+) + create mode 100644 contrib/gcc/zifunc.h + create mode 100644 contrib/power/power.h + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index b3a58b30e..dd1752757 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -5,6 +5,8 @@ project(zlib C) + + set(VERSION "1.2.13") + ++option(POWER "Enable building power implementation") ++ + set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables") + set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries") + set(INSTALL_INC_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers") +@@ -126,6 +128,75 @@ if(NOT MINGW) + ) + endif() + ++if(CMAKE_COMPILER_IS_GNUCC) ++ ++ # test to see if we can use a GNU indirect function to detect and load optimized code at runtime ++ CHECK_C_SOURCE_COMPILES(" ++ static int test_ifunc_native(void) ++ { ++ return 1; ++ } ++ static int (*(check_ifunc_native(void)))(void) ++ { ++ return test_ifunc_native; ++ } ++ int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\"))); ++ int main(void) ++ { ++ return 0; ++ } ++ " HAS_C_ATTR_IFUNC) ++ ++ if(HAS_C_ATTR_IFUNC) ++ add_definitions(-DHAVE_IFUNC) ++ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h) ++ endif() ++ ++ if(POWER) ++ # Test to see if we can use the optimizations for Power ++ CHECK_C_SOURCE_COMPILES(" ++ #ifndef _ARCH_PPC ++ #error \"Target is not Power\" ++ #endif ++ #ifndef __BUILTIN_CPU_SUPPORTS__ ++ #error \"Target doesn't support __builtin_cpu_supports()\" ++ #endif ++ int main() { return 0; } ++ " HAS_POWER_SUPPORT) ++ ++ if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC) ++ add_definitions(-DZ_POWER_OPT) ++ ++ set(CMAKE_REQUIRED_FLAGS -mcpu=power8) ++ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8) ++ ++ if(POWER8) ++ add_definitions(-DZ_POWER8) ++ set(ZLIB_POWER8 ) ++ ++ set_source_files_properties( ++ ${ZLIB_POWER8} ++ PROPERTIES COMPILE_FLAGS -mcpu=power8) ++ endif() ++ ++ set(CMAKE_REQUIRED_FLAGS -mcpu=power9) ++ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9) ++ ++ if(POWER9) ++ add_definitions(-DZ_POWER9) ++ set(ZLIB_POWER9 ) ++ ++ set_source_files_properties( ++ ${ZLIB_POWER9} ++ PROPERTIES COMPILE_FLAGS -mcpu=power9) ++ endif() ++ ++ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h) ++ set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9}) ++ endif() ++ endif() ++endif() ++ + # parse the full version number from zlib.h and include in ZLIB_FULL_VERSION + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents) + string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*" +diff --git a/configure b/configure +index fa4d5daab..9ee7008c3 100755 +--- a/configure ++++ b/configure +@@ -828,6 +828,72 @@ EOF + fi + fi + ++# test to see if we can use a gnu indirection function to detect and load optimized code at runtime ++echo >> configure.log ++cat > $test.c <> configure.log ++cat > $test.c < $test.c ++ ++ if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then ++ POWER8="-DZ_POWER8" ++ PIC_OBJC="${PIC_OBJC}" ++ OBJC="${OBJC}" ++ echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log ++ else ++ echo "Checking for -mcpu=power8 support... No." | tee -a configure.log ++ fi ++ ++ if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then ++ POWER9="-DZ_POWER9" ++ PIC_OBJC="${PIC_OBJC}" ++ OBJC="${OBJC}" ++ echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log ++ else ++ echo "Checking for -mcpu=power9 support... No." | tee -a configure.log ++ fi ++ ++ SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT" ++ CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT" ++ echo "Checking for Power optimizations support... Yes." | tee -a configure.log ++else ++ echo "Checking for Power optimizations support... No." | tee -a configure.log ++fi ++ + # show the results in the log + echo >> configure.log + echo ALL = $ALL >> configure.log +diff --git a/contrib/README.contrib b/contrib/README.contrib +index 5e5f95054..c57b52012 100644 +--- a/contrib/README.contrib ++++ b/contrib/README.contrib +@@ -11,6 +11,10 @@ ada/ by Dmitriy Anisimkov + blast/ by Mark Adler + Decompressor for output of PKWare Data Compression Library (DCL) + ++gcc/ by Matheus Castanho ++ and Rogerio Alves ++ Optimization helpers using GCC-specific extensions ++ + delphi/ by Cosmin Truta + Support for Delphi and C++ Builder + +@@ -42,6 +46,10 @@ minizip/ by Gilles Vollant + pascal/ by Bob Dellaca et al. + Support for Pascal + ++power/ by Matheus Castanho ++ and Rogerio Alves ++ Optimized functions for Power processors ++ + puff/ by Mark Adler + Small, low memory usage inflate. Also serves to provide an + unambiguous description of the deflate format. +diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h +new file mode 100644 +index 000000000..daf4fe442 +--- /dev/null ++++ b/contrib/gcc/zifunc.h +@@ -0,0 +1,60 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * 2019 Rogerio Alves , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef Z_IFUNC_H_ ++#define Z_IFUNC_H_ ++ ++/* Helpers for arch optimizations */ ++ ++#define Z_IFUNC(fname) \ ++ typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \ ++ local typeof(fname) *fname##_resolver(void) ++/* This is a helper macro to declare a resolver for an indirect function ++ * (ifunc). Let's say you have function ++ * ++ * int foo (int a); ++ * ++ * for which you want to provide different implementations, for example: ++ * ++ * int foo_clever (int a) { ++ * ... clever things ... ++ * } ++ * ++ * int foo_smart (int a) { ++ * ... smart things ... ++ * } ++ * ++ * You will have to declare foo() as an indirect function and also provide a ++ * resolver for it, to choose between foo_clever() and foo_smart() based on ++ * some criteria you define (e.g. processor features). ++ * ++ * Since most likely foo() has a default implementation somewhere in zlib, you ++ * may have to rename it so the 'foo' symbol can be used by the ifunc without ++ * conflicts. ++ * ++ * #define foo foo_default ++ * int foo (int a) { ++ * ... ++ * } ++ * #undef foo ++ * ++ * Now you just have to provide a resolver function to choose which function ++ * should be used (decided at runtime on the first call to foo()): ++ * ++ * Z_IFUNC(foo) { ++ * if (... some condition ...) ++ * return foo_clever; ++ * ++ * if (... other condition ...) ++ * return foo_smart; ++ * ++ * return foo_default; ++ * } ++ * ++ * All calls to foo() throughout the code can remain untouched, all the magic ++ * will be done by the linker using the resolver function. ++ */ ++ ++#endif /* Z_IFUNC_H_ */ +diff --git a/contrib/power/power.h b/contrib/power/power.h +new file mode 100644 +index 000000000..b42c7d6c6 +--- /dev/null ++++ b/contrib/power/power.h +@@ -0,0 +1,4 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * 2019 Rogerio Alves , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ + +From b7ee6436703e5e8716f3b82df669422035a84385 Mon Sep 17 00:00:00 2001 +From: Manjunath S Matti +Date: Wed, 16 Nov 2022 03:53:53 -0600 +Subject: [PATCH 2/4] Add Power8+ optimized crc32 + +This commit adds an optimized version for the crc32 function based +on crc32-vpmsum from https://github.com/antonblanchard/crc32-vpmsum/ + +This is the C implementation created by Rogerio Alves + + +It makes use of vector instructions to speed up CRC32 algorithm. + +Author: Rogerio Alves +Signed-off-by: Manjunath Matti +--- + .gitignore | 3 + + CMakeLists.txt | 7 +- + Makefile.in | 43 +- + configure | 7 +- + contrib/README.contrib | 3 +- + contrib/power/clang_workaround.h | 82 ++ + contrib/power/crc32_constants.h | 1206 ++++++++++++++++++++++++++++++ + contrib/power/crc32_z_power8.c | 679 +++++++++++++++++ + contrib/power/crc32_z_resolver.c | 15 + + contrib/power/power.h | 4 + + crc32.c | 12 + + test/crc32_test.c | 205 +++++ + 12 files changed, 2252 insertions(+), 14 deletions(-) + create mode 100644 contrib/power/clang_workaround.h + create mode 100644 contrib/power/crc32_constants.h + create mode 100644 contrib/power/crc32_z_power8.c + create mode 100644 contrib/power/crc32_z_resolver.c + create mode 100644 test/crc32_test.c + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index dd1752757..1077c83ef 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -172,7 +172,8 @@ if(CMAKE_COMPILER_IS_GNUCC) + + if(POWER8) + add_definitions(-DZ_POWER8) +- set(ZLIB_POWER8 ) ++ set(ZLIB_POWER8 ++ contrib/power/crc32_z_power8.c) + + set_source_files_properties( + ${ZLIB_POWER8} +@@ -269,6 +270,10 @@ add_executable(example test/example.c) + target_link_libraries(example zlib) + add_test(example example) + ++add_executable(crc32_test test/crc32_test.c) ++target_link_libraries(crc32_test zlib) ++add_test(crc32_test crc32_test) ++ + add_executable(minigzip test/minigzip.c) + target_link_libraries(minigzip zlib) + +diff --git a/Makefile.in b/Makefile.in +index 9cdb85259..83d8ca47d 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -71,11 +71,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA) + + all: static shared + +-static: example$(EXE) minigzip$(EXE) ++static: crc32_test$(EXE) example$(EXE) minigzip$(EXE) + +-shared: examplesh$(EXE) minigzipsh$(EXE) ++shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) + +-all64: example64$(EXE) minigzip64$(EXE) ++all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) + + check: test + +@@ -83,7 +83,7 @@ test: all teststatic testshared + + teststatic: static + @TMPST=tmpst_$$; \ +- if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST ; then \ ++ if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST && ${QEMU_RUN} ./crc32_test; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; false; \ +@@ -96,7 +96,7 @@ testshared: shared + DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \ + SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \ + TMPSH=tmpsh_$$; \ +- if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH; then \ ++ if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH && ${QEMU_RUN} ./crc32_testsh; then \ + echo ' *** zlib shared test OK ***'; \ + else \ + echo ' *** zlib shared test FAILED ***'; false; \ +@@ -105,7 +105,7 @@ testshared: shared + + test64: all64 + @TMP64=tmp64_$$; \ +- if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64; then \ ++ if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64 && ${QEMU_RUN} ./crc32_test64; then \ + echo ' *** zlib 64-bit test OK ***'; \ + else \ + echo ' *** zlib 64-bit test FAILED ***'; false; \ +@@ -139,12 +139,18 @@ match.lo: match.S + mv _match.o match.lo + rm -f _match.s + ++crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c ++ + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c + + minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c + ++crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c ++ + example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c + +@@ -158,6 +164,9 @@ adler32.o: $(SRCDIR)adler32.c + crc32.o: $(SRCDIR)crc32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c + ++crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c ++ $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c ++ + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + +@@ -208,6 +217,11 @@ crc32.lo: $(SRCDIR)crc32.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c + -@mv objs/crc32.o $@ + ++crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c ++ -@mv objs/crc32_z_power8.o $@ ++ + deflate.lo: $(SRCDIR)deflate.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c +@@ -281,18 +295,27 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a + ln -s $@ $(SHAREDLIBM) + -@rmdir objs + ++crc32_test$(EXE): crc32_test.o $(STATICLIB) ++ $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS) ++ + example$(EXE): example.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS) + + minigzip$(EXE): minigzip.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS) + ++crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV) ++ $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV) ++ + examplesh$(EXE): example.o $(SHAREDLIBV) + $(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) -L. $(SHAREDLIBV) + + minigzipsh$(EXE): minigzip.o $(SHAREDLIBV) + $(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) -L. $(SHAREDLIBV) + ++crc32_test64$(EXE): crc32_test64.o $(STATICLIB) ++ $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS) ++ + example64$(EXE): example64.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS) + +@@ -362,8 +385,8 @@ zconf: $(SRCDIR)zconf.h.in + mostlyclean: clean + clean: + rm -f *.o *.lo *~ \ +- example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ +- example64$(EXE) minigzip64$(EXE) \ ++ crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ ++ crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \ + infcover \ + libz.* foo.gz so_locations \ + _match.s maketree contrib/infback9/*.o +@@ -385,7 +408,7 @@ tags: + + adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h +-compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h ++compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h + crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h + deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h +@@ -395,7 +418,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr + + adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h +-compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h ++compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h + crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h + deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h +diff --git a/configure b/configure +index 9ee7008c3..45d51e596 100755 +--- a/configure ++++ b/configure +@@ -858,6 +858,9 @@ cat > $test.c < + pascal/ by Bob Dellaca et al. + Support for Pascal + +-power/ by Matheus Castanho ++power/ by Daniel Black ++ Matheus Castanho + and Rogerio Alves + Optimized functions for Power processors + +diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h +new file mode 100644 +index 000000000..b5e7dae01 +--- /dev/null ++++ b/contrib/power/clang_workaround.h +@@ -0,0 +1,82 @@ ++#ifndef CLANG_WORKAROUNDS_H ++#define CLANG_WORKAROUNDS_H ++ ++/* ++ * These stubs fix clang incompatibilities with GCC builtins. ++ */ ++ ++#ifndef __builtin_crypto_vpmsumw ++#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb ++#endif ++#ifndef __builtin_crypto_vpmsumd ++#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb ++#endif ++ ++static inline ++__vector unsigned long long __attribute__((overloadable)) ++vec_ld(int __a, const __vector unsigned long long* __b) ++{ ++ return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); ++} ++ ++/* ++ * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang ++ * does not recognize this type. On GCC this builtin is translated to a ++ * xxpermdi instruction that only moves the registers __a, __b instead generates ++ * a load. ++ * ++ * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0. ++ */ ++static inline ++__vector unsigned long long __builtin_pack_vector (unsigned long __a, ++ unsigned long __b) ++{ ++ #if defined(__BIG_ENDIAN__) ++ __vector unsigned long long __v = {__a, __b}; ++ #else ++ __vector unsigned long long __v = {__b, __a}; ++ #endif ++ return __v; ++} ++ ++#ifndef vec_xxpermdi ++ ++static inline ++unsigned long __builtin_unpack_vector (__vector unsigned long long __v, ++ int __o) ++{ ++ return __v[__o]; ++} ++ ++#if defined(__BIG_ENDIAN__) ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) ++#else ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) ++#endif ++ ++#else ++ ++static inline ++unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) ++{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x0)[1]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x0)[0]; ++ #endif ++} ++ ++static inline ++unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) ++{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x3)[1]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x3)[0]; ++ #endif ++} ++#endif /* vec_xxpermdi */ ++ ++#endif +diff --git a/contrib/power/crc32_constants.h b/contrib/power/crc32_constants.h +new file mode 100644 +index 000000000..58088dcc0 +--- /dev/null ++++ b/contrib/power/crc32_constants.h +@@ -0,0 +1,1206 @@ ++/* ++* ++* THIS FILE IS GENERATED WITH ++./crc32_constants -c -r -x 0x04C11DB7 ++ ++* This is from https://github.com/antonblanchard/crc32-vpmsum/ ++* DO NOT MODIFY IT MANUALLY! ++* ++*/ ++ ++#define CRC 0x4c11db7 ++#define CRC_XOR ++#define REFLECT ++#define MAX_SIZE 32768 ++ ++#ifndef __ASSEMBLER__ ++#ifdef CRC_TABLE ++static const unsigned int crc_table[] = { ++ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, ++ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, ++ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, ++ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, ++ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, ++ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, ++ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, ++ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, ++ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, ++ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, ++ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, ++ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, ++ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, ++ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, ++ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, ++ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, ++ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, ++ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, ++ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, ++ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, ++ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, ++ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, ++ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, ++ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, ++ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, ++ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, ++ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, ++ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, ++ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, ++ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, ++ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, ++ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, ++ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, ++ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, ++ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, ++ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, ++ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, ++ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, ++ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, ++ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, ++ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, ++ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, ++ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, ++ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, ++ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, ++ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, ++ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, ++ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, ++ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, ++ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, ++ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, ++ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, ++ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, ++ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, ++ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, ++ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, ++ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, ++ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, ++ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, ++ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, ++ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, ++ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, ++ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, ++ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,}; ++ ++#endif /* CRC_TABLE */ ++#ifdef POWER8_INTRINSICS ++ ++/* Constants */ ++ ++/* Reduce 262144 kbits to 1024 bits */ ++static const __vector unsigned long long vcrc_const[255] ++ __attribute__((aligned (16))) = { ++#ifdef __LITTLE_ENDIAN__ ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x0000000099ea94a8, 0x00000001651797d2 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x00000000945a8420, 0x0000000021e0d56c }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x0000000030762706, 0x000000000f95ecaa }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001a52fc582, 0x00000001ebd224ac }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x00000001a4a7167a, 0x000000000ccb97ca }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x000000000c18249a, 0x00000001006ec8a8 }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x00000000a924ae7c, 0x000000014f58f196 }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001e12ccc12, 0x00000001a7192ca6 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x00000000a0b9d4ac, 0x000000019a64bab2 }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000095e8ddfe, 0x0000000014f4ed2e }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x00000000233fddc4, 0x000000011092b6a2 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000001b4529b62, 0x00000000c8a1629c }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x00000001a7fa0e64, 0x000000017bf32e8e }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001b5334592, 0x00000001f8cc6582 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000011f8ee1b4, 0x000000008631ddf0 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000006252e632, 0x000000007e5a76d0 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x00000000ab973e84, 0x000000002b09b31c }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x000000007734f5ec, 0x00000001b2df1f84 }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x000000007c547798, 0x00000001d6f56afc }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x000000007ec40210, 0x00000001b9b5e70c }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x00000001ab1695a8, 0x0000000034b626d2 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x0000000090494bba, 0x000000014c53479a }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001123fb816, 0x00000001a6d179a4 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x00000001e188c74c, 0x000000015abd16b4 }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000001c2d3451c, 0x00000000018f9852 }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x00000000f55cf1ca, 0x000000001fb3084a }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000001a0531540, 0x00000000c53dfb04 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x0000000132cd7ebc, 0x00000000e10c9ad6 }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000073ab7f36, 0x0000000025aa994a }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x0000000041aed1c2, 0x00000000fa3a74c4 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000136c53800, 0x0000000033eb3f40 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x0000000126835a30, 0x000000017193f296 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x000000006241b502, 0x0000000043f6c86a }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x00000000d5196ad4, 0x000000016b513ec6 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x000000009cfa769a, 0x00000000c8f25b4e }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000000920e5df4, 0x00000001a45048ec }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x0000000169dc310e, 0x000000000c441004 }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x0000000009fc331c, 0x000000000e17cad6 }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x000000010d94a81e, 0x00000001253ae964 }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x0000000027a20ab2, 0x00000001d7c88ebc }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x0000000114f87504, 0x00000001e7ca913a }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x000000004b076d96, 0x0000000033ed078a }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000da4d1e74, 0x00000000e1839c78 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x000000001b81f672, 0x00000001322b267e }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x000000009367c988, 0x00000000638231b6 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001717214ca, 0x00000001ee7f16f4 }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x000000009f47d820, 0x0000000117d9924a }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x000000010d9a47d2, 0x00000000e1a9e0c4 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000000a696c58c, 0x00000001403731dc }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x000000002aa28ec6, 0x00000001a5ea9682 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x00000001fe18fd9a, 0x0000000101c5c578 }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x000000019d4fc1ae, 0x00000000dddf6494 }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000001ba0e3dea, 0x00000000f1c3db28 }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x0000000074b59a5e, 0x000000013112fb9c }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000f2b5ea98, 0x00000000b680b906 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x0000000187132676, 0x000000001a282932 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x000000010a8c6ad4, 0x0000000089406e7e }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001e21dfe70, 0x00000001def6be8c }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x00000001da0050e4, 0x0000000075258728 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x00000000772172ae, 0x000000019536090a }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000e47724aa, 0x00000000f2455bfc }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000003cd63ac4, 0x000000018c40baf4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x00000001bf47d352, 0x000000004cd390d4 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x000000018dc1d708, 0x00000001e4ece95a }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000002d4620a4, 0x000000001a3ee918 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x0000000058fd1740, 0x000000007c652fb8 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x00000000dadd9bfc, 0x000000011c67842c }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000001ea2140be, 0x00000000254f759c }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000009de128ba, 0x000000007ece94ca }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x000000013ac3aa8e, 0x0000000038f258c2 }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x0000000099980562, 0x00000001cdf17b00 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x00000001c1579c86, 0x000000011f882c16 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000068dbbf94, 0x0000000100093fc8 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x000000004509fb04, 0x00000001cd684f16 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x00000001202f6398, 0x000000004bc6a70a }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000013aea243e, 0x000000004fc7e8e4 }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x00000001b4052ae6, 0x0000000130103f1c }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x00000001cd2a0ae8, 0x0000000111b0024c }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x00000001fe4aa8b4, 0x000000010b3079da }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x00000001d1559a42, 0x000000010192bcc2 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x00000001f3e05ecc, 0x0000000074838d50 }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x0000000104ddd2cc, 0x000000001b20f520 }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x000000015393153c, 0x0000000050c3590a }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x0000000057e942c6, 0x00000000b41cac8e }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000012c633850, 0x000000000c72cc78 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x00000000ebcaae4c, 0x0000000030cdb032 }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013ee532a6, 0x000000013e09fc32 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x00000001bf0cbc7e, 0x000000001ed624d2 }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000d50b7a5a, 0x00000000781aee1a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x0000000002fca6e8, 0x00000001c4d8348c }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x000000007af40044, 0x0000000057a40336 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000016178744, 0x0000000085544940 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000014c177458, 0x000000019cd21e80 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000011b6ddf04, 0x000000013eb95bc0 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001f3e29ccc, 0x00000001dfc9fdfc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x0000000135ae7562, 0x00000000cd028bc2 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000190ef812c, 0x0000000090db8c44 }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x0000000067a2c786, 0x000000010010a4ce }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x0000000048b9496c, 0x00000001c8f4c72c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000015a422de6, 0x000000001c26170c }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000001ef0e3640, 0x00000000e3fccf68 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000001006d2d26, 0x00000000d513ed24 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000001170d56d6, 0x00000000141beada }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x00000000a5fb613c, 0x000000011071aea0 }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x0000000040bbf7fc, 0x000000012e19080a }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x000000016ac3a5b2, 0x0000000100ecf826 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x00000000abf16230, 0x0000000069b09412 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x00000001ebe23fac, 0x0000000122297bac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x000000008b6a0894, 0x00000000e9e4b068 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x00000001288ea478, 0x000000004b38651a }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x000000016619c442, 0x00000001468360e2 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x0000000086230038, 0x00000000121c2408 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x000000017746a756, 0x00000000da7e7d08 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x0000000191b8f8f8, 0x00000001058d7652 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000008e167708, 0x000000014a098a90 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000148b22d54, 0x0000000020dbe72e }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x0000000044ba2c3c, 0x000000011e7323e8 }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000b54d2b52, 0x00000000d5d4bf94 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000005a4fd8a, 0x0000000199d8746c }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x0000000139f9fc46, 0x00000000ce9ca8a0 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x000000015a1fa824, 0x00000000136edece }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000000a61ae4c, 0x000000019b92a068 }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000145e9113e, 0x0000000071d62206 }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x000000006a348448, 0x00000000dfc50158 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x000000004d80a08c, 0x00000001517626bc }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x000000014b6837a0, 0x0000000148d1e4fa }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x000000016896a7fc, 0x0000000094d8266e }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x000000014f187140, 0x00000000606c5e34 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019581b9da, 0x000000019766beaa }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001091bc984, 0x00000001d80c506c }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001067223c, 0x000000001e73837c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x00000001ab16ea02, 0x0000000064d587de }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x000000013c4598a8, 0x00000000f4a507b0 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x00000000b3735430, 0x0000000040e342fc }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x00000001570ae19c, 0x0000000094a691a4 }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001ea910712, 0x00000001271ecdfa }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x0000000167127128, 0x000000009e54475a }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x0000000019e790a2, 0x00000000c9c099ee }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000003788f710, 0x000000009a2f736c }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000001682a160e, 0x00000000bb9f4996 }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x000000007f0ebd2e, 0x00000001db688050 }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x000000002b032080, 0x00000000e9b10af4 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x00000000cfd1664a, 0x000000012d4545e4 }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x00000000aa1181c2, 0x000000000361139c }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000000ddd08002, 0x00000001a5a1a3a8 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x00000000e8dd0446, 0x000000006844e0b0 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000001bbd94a00, 0x00000000c3762f28 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000000ab6cd180, 0x00000001d26287a2 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x0000000031803ce2, 0x00000001f6f0bba8 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x0000000024f40b0c, 0x000000002ffabd62 }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000001ba1d9834, 0x00000000fb4516b8 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x0000000104de61aa, 0x000000018cfa961c }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x0000000113e40d46, 0x000000019e588d52 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001415598a0, 0x00000001180f0bbc }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000bf6c8c90, 0x00000000e1d9177a }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x00000001788b0504, 0x0000000105abc27c }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x0000000038385d02, 0x00000000972e4a58 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x00000001b6c83844, 0x0000000183499a5e }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x0000000051061a8a, 0x00000001c96a8cca }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x000000017351388a, 0x00000001a1a5b60c }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x0000000132928f92, 0x00000000e4b6ac9c }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000000e6b4f48a, 0x00000001807e7f5a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x0000000039d15e90, 0x000000017a7e3bc8 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000312d6074, 0x00000000d73975da }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017bbb2cc4, 0x000000017375d038 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x000000016ded3e18, 0x00000000193680bc }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000f1638b16, 0x00000000999b06f6 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001d38b9ecc, 0x00000001f685d2b8 }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x000000018b8d09dc, 0x00000001f4ecbed2 }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000e7bc27d2, 0x00000000ba16f1a0 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x00000000275e1e96, 0x0000000115aceac4 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000000e2e3031e, 0x00000001aeff6292 }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x00000001041c84d8, 0x000000009640124c }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x00000000706ce672, 0x0000000114f41f02 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000015d5070da, 0x000000009c5f3586 }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x0000000038f9493a, 0x00000001878275fa }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000a3348a76, 0x00000000ddc42ce8 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x00000001ad0aab92, 0x0000000181d2c73a }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x000000019e85f712, 0x0000000141c9320a }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000005a871e76, 0x000000015235719a }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x000000017249c662, 0x00000000be27d804 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000003a084712, 0x000000006242d45a }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x00000000ed438478, 0x000000009a53638e }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000000abac34cc, 0x00000001001ecfb6 }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000005f35ef3e, 0x000000016d7c2d64 }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x0000000047d6608c, 0x00000001d0ce46c0 }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x000000002d01470e, 0x0000000124c907b4 }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000158bbc7b0, 0x0000000018a555ca }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x00000000c0a23e8e, 0x000000006b0980bc }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x00000001ebd85c88, 0x000000008bbba964 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x000000019ee20bb2, 0x00000001070a5a1e }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x00000001acabf2d6, 0x000000002204322a }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000001b7963d56, 0x00000000a27524d0 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x000000017bffa1fe, 0x0000000020b1e4ba }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x000000001f15333e, 0x0000000032cc27fc }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x000000018593129e, 0x0000000044dd22b8 }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x000000019cb32602, 0x00000000dffc9e0a }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x0000000142b05cc8, 0x00000001b7a0ed14 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000001be49e7a4, 0x00000000c7842488 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x0000000108f69d6c, 0x00000001c02a4fee }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000006c0971f0, 0x000000003c273778 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x000000005b16467a, 0x00000001d63f8894 }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x00000001551a628e, 0x000000006be557d6 }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000019e42ea92, 0x000000006a7806ea }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000012fa83ff2, 0x000000016155aa0c }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x000000011ca9cde0, 0x00000000908650ac }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000c8e5cd74, 0x00000000aa5a8084 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000096c27f0c, 0x0000000191bb500a }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x000000002baed926, 0x0000000064e9bed0 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000017c8de8d2, 0x000000009444f302 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x00000000d43d6068, 0x000000019db07d3c }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000000cb2c4b26, 0x00000001359e3e6e }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x0000000145b8da26, 0x00000001e4f10dd2 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x000000018fff4b08, 0x0000000124f5735e }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000150b58ed0, 0x0000000124760a4c }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x00000001549f39bc, 0x000000000f1fc186 }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000ef4d2f42, 0x00000000150e4cc4 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x00000001b1468572, 0x000000002a6204e8 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x000000013d7403b2, 0x00000000beb1d432 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x00000001a4681842, 0x0000000135f3f1f0 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000167714492, 0x0000000074fe2232 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x00000001e599099a, 0x000000001ac6e2ba }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x00000000fe128194, 0x0000000013fca91e }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000077e8b990, 0x0000000183f4931e }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000001a267f63a, 0x00000000b6d9b4e4 }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000001945c245a, 0x00000000b5188656 }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000149002e76, 0x0000000027a81a84 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x00000001bb8310a4, 0x0000000125699258 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x000000019ec60bcc, 0x00000001b23de796 }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x000000012d8590ae, 0x00000000fe4365dc }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x0000000065b00684, 0x00000000c68f497a }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x000000015e5aeadc, 0x00000000fbf521ee }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x00000000b77ff2b0, 0x000000015eac3378 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000188da2ff6, 0x0000000134914b90 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000063da929a, 0x0000000016335cfe }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x00000001389caa80, 0x000000010372d10c }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000013db599d2, 0x000000015097b908 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x0000000122505a86, 0x00000001227a7572 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000016bd72746, 0x000000009a8f75c0 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000001c3faf1d4, 0x00000000682c77a2 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000001111c826c, 0x00000000231f091c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x00000000153e9fb2, 0x000000007d4439f2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000002b1f7b60, 0x000000017e221efc }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x00000000b1dba570, 0x0000000167457c38 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000001f6397b76, 0x00000000bdf081c4 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x0000000156335214, 0x000000016286d6b0 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000001d70e3986, 0x00000000c84f001c }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x000000003701a774, 0x0000000064efe7c0 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x00000000ac81ef72, 0x000000000ac2d904 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x0000000133212464, 0x00000000fd226d14 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x00000000e4e45610, 0x000000011cfd42e0 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000000c1bd370, 0x000000016e5a5678 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001a7b9e7a6, 0x00000001d888fe22 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x000000007d657a10, 0x00000001af77fcd4 } ++#else /* __LITTLE_ENDIAN__ */ ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x00000001651797d2, 0x0000000099ea94a8 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x0000000021e0d56c, 0x00000000945a8420 }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x000000000f95ecaa, 0x0000000030762706 }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001ebd224ac, 0x00000001a52fc582 }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x000000000ccb97ca, 0x00000001a4a7167a }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x00000001006ec8a8, 0x000000000c18249a }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x000000014f58f196, 0x00000000a924ae7c }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001a7192ca6, 0x00000001e12ccc12 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x000000019a64bab2, 0x00000000a0b9d4ac }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000014f4ed2e, 0x0000000095e8ddfe }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x000000011092b6a2, 0x00000000233fddc4 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000000c8a1629c, 0x00000001b4529b62 }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x000000017bf32e8e, 0x00000001a7fa0e64 }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001f8cc6582, 0x00000001b5334592 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000008631ddf0, 0x000000011f8ee1b4 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000007e5a76d0, 0x000000006252e632 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x000000002b09b31c, 0x00000000ab973e84 }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x00000001b2df1f84, 0x000000007734f5ec }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x00000001d6f56afc, 0x000000007c547798 }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x00000001b9b5e70c, 0x000000007ec40210 }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x0000000034b626d2, 0x00000001ab1695a8 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x000000014c53479a, 0x0000000090494bba }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001a6d179a4, 0x00000001123fb816 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x000000015abd16b4, 0x00000001e188c74c }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000000018f9852, 0x00000001c2d3451c }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x000000001fb3084a, 0x00000000f55cf1ca }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000000c53dfb04, 0x00000001a0531540 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x00000000e10c9ad6, 0x0000000132cd7ebc }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000025aa994a, 0x0000000073ab7f36 }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x00000000fa3a74c4, 0x0000000041aed1c2 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000033eb3f40, 0x0000000136c53800 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x000000017193f296, 0x0000000126835a30 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x0000000043f6c86a, 0x000000006241b502 }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x000000016b513ec6, 0x00000000d5196ad4 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x00000000c8f25b4e, 0x000000009cfa769a }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000001a45048ec, 0x00000000920e5df4 }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x000000000c441004, 0x0000000169dc310e }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x000000000e17cad6, 0x0000000009fc331c }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x00000001253ae964, 0x000000010d94a81e }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x00000001d7c88ebc, 0x0000000027a20ab2 }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x00000001e7ca913a, 0x0000000114f87504 }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x0000000033ed078a, 0x000000004b076d96 }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000e1839c78, 0x00000000da4d1e74 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x00000001322b267e, 0x000000001b81f672 }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x00000000638231b6, 0x000000009367c988 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001ee7f16f4, 0x00000001717214ca }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x0000000117d9924a, 0x000000009f47d820 }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x00000000e1a9e0c4, 0x000000010d9a47d2 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000001403731dc, 0x00000000a696c58c }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x00000001a5ea9682, 0x000000002aa28ec6 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x0000000101c5c578, 0x00000001fe18fd9a }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x00000000dddf6494, 0x000000019d4fc1ae }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000000f1c3db28, 0x00000001ba0e3dea }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x000000013112fb9c, 0x0000000074b59a5e }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000b680b906, 0x00000000f2b5ea98 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x000000001a282932, 0x0000000187132676 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x0000000089406e7e, 0x000000010a8c6ad4 }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001def6be8c, 0x00000001e21dfe70 }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x0000000075258728, 0x00000001da0050e4 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x000000019536090a, 0x00000000772172ae }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000f2455bfc, 0x00000000e47724aa }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000018c40baf4, 0x000000003cd63ac4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x000000004cd390d4, 0x00000001bf47d352 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x00000001e4ece95a, 0x000000018dc1d708 }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000001a3ee918, 0x000000002d4620a4 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x000000007c652fb8, 0x0000000058fd1740 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x000000011c67842c, 0x00000000dadd9bfc }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000000254f759c, 0x00000001ea2140be }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000007ece94ca, 0x000000009de128ba }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x0000000038f258c2, 0x000000013ac3aa8e }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x00000001cdf17b00, 0x0000000099980562 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x000000011f882c16, 0x00000001c1579c86 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000100093fc8, 0x0000000068dbbf94 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x00000001cd684f16, 0x000000004509fb04 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x000000004bc6a70a, 0x00000001202f6398 }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000004fc7e8e4, 0x000000013aea243e }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x0000000130103f1c, 0x00000001b4052ae6 }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x0000000111b0024c, 0x00000001cd2a0ae8 }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x000000010b3079da, 0x00000001fe4aa8b4 }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x000000010192bcc2, 0x00000001d1559a42 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x0000000074838d50, 0x00000001f3e05ecc }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x000000001b20f520, 0x0000000104ddd2cc }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x0000000050c3590a, 0x000000015393153c }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x00000000b41cac8e, 0x0000000057e942c6 }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000000c72cc78, 0x000000012c633850 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x0000000030cdb032, 0x00000000ebcaae4c }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013e09fc32, 0x000000013ee532a6 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x000000001ed624d2, 0x00000001bf0cbc7e }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000781aee1a, 0x00000000d50b7a5a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x00000001c4d8348c, 0x0000000002fca6e8 }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x0000000057a40336, 0x000000007af40044 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000085544940, 0x0000000016178744 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000019cd21e80, 0x000000014c177458 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000013eb95bc0, 0x000000011b6ddf04 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001dfc9fdfc, 0x00000001f3e29ccc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x00000000cd028bc2, 0x0000000135ae7562 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000090db8c44, 0x0000000190ef812c }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x000000010010a4ce, 0x0000000067a2c786 }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x00000001c8f4c72c, 0x0000000048b9496c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000001c26170c, 0x000000015a422de6 }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000000e3fccf68, 0x00000001ef0e3640 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000000d513ed24, 0x00000001006d2d26 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000000141beada, 0x00000001170d56d6 }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x000000011071aea0, 0x00000000a5fb613c }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x000000012e19080a, 0x0000000040bbf7fc }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x0000000100ecf826, 0x000000016ac3a5b2 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x0000000069b09412, 0x00000000abf16230 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x0000000122297bac, 0x00000001ebe23fac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x00000000e9e4b068, 0x000000008b6a0894 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x000000004b38651a, 0x00000001288ea478 }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x00000001468360e2, 0x000000016619c442 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x00000000121c2408, 0x0000000086230038 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x00000000da7e7d08, 0x000000017746a756 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x00000001058d7652, 0x0000000191b8f8f8 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000014a098a90, 0x000000008e167708 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000020dbe72e, 0x0000000148b22d54 }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x000000011e7323e8, 0x0000000044ba2c3c }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000d5d4bf94, 0x00000000b54d2b52 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000199d8746c, 0x0000000005a4fd8a }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x00000000ce9ca8a0, 0x0000000139f9fc46 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x00000000136edece, 0x000000015a1fa824 }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000019b92a068, 0x000000000a61ae4c }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000071d62206, 0x0000000145e9113e }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x00000000dfc50158, 0x000000006a348448 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x00000001517626bc, 0x000000004d80a08c }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x0000000148d1e4fa, 0x000000014b6837a0 }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x0000000094d8266e, 0x000000016896a7fc }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x00000000606c5e34, 0x000000014f187140 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019766beaa, 0x000000019581b9da }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001d80c506c, 0x00000001091bc984 }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001e73837c, 0x000000001067223c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x0000000064d587de, 0x00000001ab16ea02 }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x00000000f4a507b0, 0x000000013c4598a8 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x0000000040e342fc, 0x00000000b3735430 }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x0000000094a691a4, 0x00000001570ae19c }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001271ecdfa, 0x00000001ea910712 }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x000000009e54475a, 0x0000000167127128 }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x00000000c9c099ee, 0x0000000019e790a2 }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000009a2f736c, 0x000000003788f710 }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000000bb9f4996, 0x00000001682a160e }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x00000001db688050, 0x000000007f0ebd2e }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x00000000e9b10af4, 0x000000002b032080 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x000000012d4545e4, 0x00000000cfd1664a }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x000000000361139c, 0x00000000aa1181c2 }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000001a5a1a3a8, 0x00000000ddd08002 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x000000006844e0b0, 0x00000000e8dd0446 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000000c3762f28, 0x00000001bbd94a00 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000001d26287a2, 0x00000000ab6cd180 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x00000001f6f0bba8, 0x0000000031803ce2 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x000000002ffabd62, 0x0000000024f40b0c }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000000fb4516b8, 0x00000001ba1d9834 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x000000018cfa961c, 0x0000000104de61aa }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x000000019e588d52, 0x0000000113e40d46 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001180f0bbc, 0x00000001415598a0 }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000e1d9177a, 0x00000000bf6c8c90 }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x0000000105abc27c, 0x00000001788b0504 }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x00000000972e4a58, 0x0000000038385d02 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x0000000183499a5e, 0x00000001b6c83844 }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x00000001c96a8cca, 0x0000000051061a8a }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x00000001a1a5b60c, 0x000000017351388a }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x00000000e4b6ac9c, 0x0000000132928f92 }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000001807e7f5a, 0x00000000e6b4f48a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x000000017a7e3bc8, 0x0000000039d15e90 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000d73975da, 0x00000000312d6074 }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017375d038, 0x000000017bbb2cc4 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x00000000193680bc, 0x000000016ded3e18 }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000999b06f6, 0x00000000f1638b16 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001f685d2b8, 0x00000001d38b9ecc }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x00000001f4ecbed2, 0x000000018b8d09dc }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000ba16f1a0, 0x00000000e7bc27d2 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x0000000115aceac4, 0x00000000275e1e96 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000001aeff6292, 0x00000000e2e3031e }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x000000009640124c, 0x00000001041c84d8 }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x0000000114f41f02, 0x00000000706ce672 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000009c5f3586, 0x000000015d5070da }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x00000001878275fa, 0x0000000038f9493a }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000ddc42ce8, 0x00000000a3348a76 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x0000000181d2c73a, 0x00000001ad0aab92 }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x0000000141c9320a, 0x000000019e85f712 }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000015235719a, 0x000000005a871e76 }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x00000000be27d804, 0x000000017249c662 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000006242d45a, 0x000000003a084712 }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x000000009a53638e, 0x00000000ed438478 }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000001001ecfb6, 0x00000000abac34cc }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000016d7c2d64, 0x000000005f35ef3e }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x00000001d0ce46c0, 0x0000000047d6608c }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x0000000124c907b4, 0x000000002d01470e }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000018a555ca, 0x0000000158bbc7b0 }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x000000006b0980bc, 0x00000000c0a23e8e }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x000000008bbba964, 0x00000001ebd85c88 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x00000001070a5a1e, 0x000000019ee20bb2 }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x000000002204322a, 0x00000001acabf2d6 }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000000a27524d0, 0x00000001b7963d56 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x0000000020b1e4ba, 0x000000017bffa1fe }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x0000000032cc27fc, 0x000000001f15333e }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x0000000044dd22b8, 0x000000018593129e }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x00000000dffc9e0a, 0x000000019cb32602 }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x00000001b7a0ed14, 0x0000000142b05cc8 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000000c7842488, 0x00000001be49e7a4 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x00000001c02a4fee, 0x0000000108f69d6c }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000003c273778, 0x000000006c0971f0 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x00000001d63f8894, 0x000000005b16467a }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x000000006be557d6, 0x00000001551a628e }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000006a7806ea, 0x000000019e42ea92 }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000016155aa0c, 0x000000012fa83ff2 }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x00000000908650ac, 0x000000011ca9cde0 }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000aa5a8084, 0x00000000c8e5cd74 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000191bb500a, 0x0000000096c27f0c }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x0000000064e9bed0, 0x000000002baed926 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000009444f302, 0x000000017c8de8d2 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x000000019db07d3c, 0x00000000d43d6068 }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000001359e3e6e, 0x00000000cb2c4b26 }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x00000001e4f10dd2, 0x0000000145b8da26 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x0000000124f5735e, 0x000000018fff4b08 }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000124760a4c, 0x0000000150b58ed0 }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x000000000f1fc186, 0x00000001549f39bc }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000150e4cc4, 0x00000000ef4d2f42 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x000000002a6204e8, 0x00000001b1468572 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x00000000beb1d432, 0x000000013d7403b2 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x0000000135f3f1f0, 0x00000001a4681842 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000074fe2232, 0x0000000167714492 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x000000001ac6e2ba, 0x00000001e599099a }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x0000000013fca91e, 0x00000000fe128194 }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000183f4931e, 0x0000000077e8b990 }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000000b6d9b4e4, 0x00000001a267f63a }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000000b5188656, 0x00000001945c245a }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000027a81a84, 0x0000000149002e76 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x0000000125699258, 0x00000001bb8310a4 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x00000001b23de796, 0x000000019ec60bcc }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x00000000fe4365dc, 0x000000012d8590ae }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x00000000c68f497a, 0x0000000065b00684 }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x00000000fbf521ee, 0x000000015e5aeadc }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x000000015eac3378, 0x00000000b77ff2b0 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000134914b90, 0x0000000188da2ff6 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000016335cfe, 0x0000000063da929a }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x000000010372d10c, 0x00000001389caa80 }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000015097b908, 0x000000013db599d2 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x00000001227a7572, 0x0000000122505a86 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000009a8f75c0, 0x000000016bd72746 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000000682c77a2, 0x00000001c3faf1d4 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000000231f091c, 0x00000001111c826c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x000000007d4439f2, 0x00000000153e9fb2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000017e221efc, 0x000000002b1f7b60 }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x0000000167457c38, 0x00000000b1dba570 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000000bdf081c4, 0x00000001f6397b76 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x000000016286d6b0, 0x0000000156335214 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000000c84f001c, 0x00000001d70e3986 }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x0000000064efe7c0, 0x000000003701a774 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x000000000ac2d904, 0x00000000ac81ef72 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x00000000fd226d14, 0x0000000133212464 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x000000011cfd42e0, 0x00000000e4e45610 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000016e5a5678, 0x000000000c1bd370 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001d888fe22, 0x00000001a7b9e7a6 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x00000001af77fcd4, 0x000000007d657a10 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++ ++/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ ++ ++static const __vector unsigned long long vcrc_short_const[16] ++ __attribute__((aligned (16))) = { ++#ifdef __LITTLE_ENDIAN__ ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0x99168a18ec447f11, 0xed837b2613e8221e }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xf38a3556291ea462, 0xc10ec5e033fbca3b }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x855712b3784d2a56, 0x71aa1df0e172334d }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xa5abe9f80eaee722, 0xfee3053e3969324d }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0xebe7e3566325605c, 0x6f8346e1d777606e }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0xba1aca0315141c31, 0x78ed02d5a700e96a }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0x6655004fa06a2517, 0xedb88320b1e6b092 } ++#else /* __LITTLE_ENDIAN__ */ ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0xed837b2613e8221e, 0x99168a18ec447f11 }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x71aa1df0e172334d, 0x855712b3784d2a56 }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xfee3053e3969324d, 0xa5abe9f80eaee722 }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0x6f8346e1d777606e, 0xebe7e3566325605c }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0xaa2215ea329ecc11, 0x5705a9ca4721589f }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0x78ed02d5a700e96a, 0xba1aca0315141c31 }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0xedb88320b1e6b092, 0x6655004fa06a2517 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++ ++/* Barrett constants */ ++/* 33 bit reflected Barrett constant m - (4^32)/n */ ++ ++static const __vector unsigned long long v_Barrett_const[2] ++ __attribute__((aligned (16))) = { ++ /* x^64 div p(x) */ ++#ifdef __LITTLE_ENDIAN__ ++ { 0x00000001f7011641, 0x0000000000000000 }, ++ { 0x00000001db710641, 0x0000000000000000 } ++#else /* __LITTLE_ENDIAN__ */ ++ { 0x0000000000000000, 0x00000001f7011641 }, ++ { 0x0000000000000000, 0x00000001db710641 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++#endif /* POWER8_INTRINSICS */ ++ ++#endif /* __ASSEMBLER__ */ +diff --git a/contrib/power/crc32_z_power8.c b/contrib/power/crc32_z_power8.c +new file mode 100644 +index 000000000..7858cfe0e +--- /dev/null ++++ b/contrib/power/crc32_z_power8.c +@@ -0,0 +1,679 @@ ++/* ++ * Calculate the checksum of data that is 16 byte aligned and a multiple of ++ * 16 bytes. ++ * ++ * The first step is to reduce it to 1024 bits. We do this in 8 parallel ++ * chunks in order to mask the latency of the vpmsum instructions. If we ++ * have more than 32 kB of data to checksum we repeat this step multiple ++ * times, passing in the previous 1024 bits. ++ * ++ * The next step is to reduce the 1024 bits to 64 bits. This step adds ++ * 32 bits of 0s to the end - this matches what a CRC does. We just ++ * calculate constants that land the data in this 32 bits. ++ * ++ * We then use fixed point Barrett reduction to compute a mod n over GF(2) ++ * for n = CRC using POWER8 instructions. We use x = 32. ++ * ++ * http://en.wikipedia.org/wiki/Barrett_reduction ++ * ++ * This code uses gcc vector builtins instead using assembly directly. ++ * ++ * Copyright (C) 2017 Rogerio Alves , IBM ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of either: ++ * ++ * a) the GNU General Public License as published by the Free Software ++ * Foundation; either version 2 of the License, or (at your option) ++ * any later version, or ++ * b) the Apache License, Version 2.0 ++ */ ++ ++#include ++#include "../../zutil.h" ++#include "power.h" ++ ++#define POWER8_INTRINSICS ++#define CRC_TABLE ++ ++#ifdef CRC32_CONSTANTS_HEADER ++#include CRC32_CONSTANTS_HEADER ++#else ++#include "crc32_constants.h" ++#endif ++ ++#define VMX_ALIGN 16 ++#define VMX_ALIGN_MASK (VMX_ALIGN-1) ++ ++#ifdef REFLECT ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, ++ unsigned long len) ++{ ++ while (len--) ++ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); ++ return crc; ++} ++#else ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, ++ unsigned long len) ++{ ++ while (len--) ++ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8); ++ return crc; ++} ++#endif ++ ++static unsigned int __attribute__ ((aligned (32))) ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); ++ ++unsigned long ZLIB_INTERNAL _crc32_z_power8(uLong _crc, const Bytef *_p, ++ z_size_t _len) ++{ ++ unsigned int prealign; ++ unsigned int tail; ++ ++ /* Map zlib API to crc32_vpmsum API */ ++ unsigned int crc = (unsigned int) (0xffffffff & _crc); ++ const unsigned char *p = _p; ++ unsigned long len = (unsigned long) _len; ++ ++ if (p == (const unsigned char *) 0x0) return 0; ++#ifdef CRC_XOR ++ crc ^= 0xffffffff; ++#endif ++ ++ if (len < VMX_ALIGN + VMX_ALIGN_MASK) { ++ crc = crc32_align(crc, p, len); ++ goto out; ++ } ++ ++ if ((unsigned long)p & VMX_ALIGN_MASK) { ++ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); ++ crc = crc32_align(crc, p, prealign); ++ len -= prealign; ++ p += prealign; ++ } ++ ++ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); ++ ++ tail = len & VMX_ALIGN_MASK; ++ if (tail) { ++ p += len & ~VMX_ALIGN_MASK; ++ crc = crc32_align(crc, p, tail); ++ } ++ ++out: ++#ifdef CRC_XOR ++ crc ^= 0xffffffff; ++#endif ++ ++ /* Convert to zlib API */ ++ return (unsigned long) crc; ++} ++ ++#if defined (__clang__) ++#include "clang_workaround.h" ++#else ++#define __builtin_pack_vector(a, b) __builtin_pack_vector_int128 ((a), (b)) ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1) ++#endif ++ ++/* When we have a load-store in a single-dispatch group and address overlap ++ * such that foward is not allowed (load-hit-store) the group must be flushed. ++ * A group ending NOP prevents the flush. ++ */ ++#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory") ++ ++#if defined(__BIG_ENDIAN__) && defined (REFLECT) ++#define BYTESWAP_DATA ++#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) ++#define BYTESWAP_DATA ++#endif ++ ++#ifdef BYTESWAP_DATA ++#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\ ++ (__vector unsigned char) vc) ++#if defined(__LITTLE_ENDIAN__) ++/* Byte reverse permute constant LE. */ ++static const __vector unsigned long long vperm_const ++ __attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL, ++ 0x0001020304050607UL }; ++#else ++static const __vector unsigned long long vperm_const ++ __attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL, ++ 0X0706050403020100UL }; ++#endif ++#else ++#define VEC_PERM(vr, va, vb, vc) ++#endif ++ ++static unsigned int __attribute__ ((aligned (32))) ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { ++ ++ const __vector unsigned long long vzero = {0,0}; ++ const __vector unsigned long long vones = {0xffffffffffffffffUL, ++ 0xffffffffffffffffUL}; ++ ++#ifdef REFLECT ++ const __vector unsigned long long vmask_32bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, ++ (__vector unsigned char)vones, 4); ++#endif ++ ++ const __vector unsigned long long vmask_64bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, ++ (__vector unsigned char)vones, 8); ++ ++ __vector unsigned long long vcrc; ++ ++ __vector unsigned long long vconst1, vconst2; ++ ++ /* vdata0-vdata7 will contain our data (p). */ ++ __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4, ++ vdata5, vdata6, vdata7; ++ ++ /* v0-v7 will contain our checksums */ ++ __vector unsigned long long v0 = {0,0}; ++ __vector unsigned long long v1 = {0,0}; ++ __vector unsigned long long v2 = {0,0}; ++ __vector unsigned long long v3 = {0,0}; ++ __vector unsigned long long v4 = {0,0}; ++ __vector unsigned long long v5 = {0,0}; ++ __vector unsigned long long v6 = {0,0}; ++ __vector unsigned long long v7 = {0,0}; ++ ++ ++ /* Vector auxiliary variables. */ ++ __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7; ++ ++ unsigned int result = 0; ++ unsigned int offset; /* Constant table offset. */ ++ ++ unsigned long i; /* Counter. */ ++ unsigned long chunks; ++ ++ unsigned long block_size; ++ int next_block = 0; ++ ++ /* Align by 128 bits. The last 128 bit block will be processed at end. */ ++ unsigned long length = len & 0xFFFFFFFFFFFFFF80UL; ++ ++#ifdef REFLECT ++ vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc); ++#else ++ vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL); ++ ++ /* Shift into top 32 bits */ ++ vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc, ++ (__vector unsigned char)vzero, 4); ++#endif ++ ++ /* Short version. */ ++ if (len < 256) { ++ /* Calculate where in the constant table we need to start. */ ++ offset = 256 - len; ++ ++ vconst1 = vec_ld(offset, vcrc_short_const); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ ++ /* xor initial value*/ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw ++ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ ++ for (i = 16; i < len; i += 16) { ++ vconst1 = vec_ld(offset + i, vcrc_short_const); ++ vdata0 = vec_ld(i, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw ++ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ } ++ } else { ++ ++ /* Load initial values. */ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ /* xor in initial value */ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ p = (char *)p + 128; ++ ++ do { ++ /* Checksum in blocks of MAX_SIZE. */ ++ block_size = length; ++ if (block_size > MAX_SIZE) { ++ block_size = MAX_SIZE; ++ } ++ ++ length = length - block_size; ++ ++ /* ++ * Work out the offset into the constants table to start at. Each ++ * constant is 16 bytes, and it is used against 128 bytes of input ++ * data - 128 / 16 = 8 ++ */ ++ offset = (MAX_SIZE/8) - (block_size/8); ++ /* We reduce our final 128 bytes in a separate step */ ++ chunks = (block_size/128)-1; ++ ++ vconst1 = vec_ld(offset, vcrc_const); ++ ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0, ++ (__vector unsigned long long)vconst1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1, ++ (__vector unsigned long long)vconst1); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2, ++ (__vector unsigned long long)vconst1); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3, ++ (__vector unsigned long long)vconst1); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4, ++ (__vector unsigned long long)vconst1); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5, ++ (__vector unsigned long long)vconst1); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6, ++ (__vector unsigned long long)vconst1); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7, ++ (__vector unsigned long long)vconst1); ++ ++ if (chunks > 1) { ++ offset += 16; ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ /* ++ * main loop. We modulo schedule it such that it takes three ++ * iterations to complete - first iteration load, second ++ * iteration vpmsum, third iteration xor. ++ */ ++ for (i = 0; i < chunks-2; i++) { ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ GROUP_ENDING_NOP; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata0, (__vector unsigned long long)vconst2); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata1, (__vector unsigned long long)vconst2); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata2, (__vector unsigned long long)vconst2); ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata3, (__vector unsigned long long)vconst2); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata4, (__vector unsigned long long)vconst1); ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata5, (__vector unsigned long long)vconst1); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata6, (__vector unsigned long long)vconst1); ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata7, (__vector unsigned long long)vconst1); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ } ++ ++ /* First cool down*/ ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata0, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata1, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata2, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata3, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata4, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata5, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata6, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata7, (__vector unsigned long long)vconst1); ++ }/* else */ ++ ++ /* Second cool down. */ ++ v0 = vec_xor(v0, va0); ++ v1 = vec_xor(v1, va1); ++ v2 = vec_xor(v2, va2); ++ v3 = vec_xor(v3, va3); ++ v4 = vec_xor(v4, va4); ++ v5 = vec_xor(v5, va5); ++ v6 = vec_xor(v6, va6); ++ v7 = vec_xor(v7, va7); ++ ++#ifdef REFLECT ++ /* ++ * vpmsumd produces a 96 bit result in the least significant bits ++ * of the register. Since we are bit reflected we have to shift it ++ * left 32 bits so it occupies the least significant bits in the ++ * bit reflected domain. ++ */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1, ++ (__vector unsigned char)vzero, 4); ++ v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2, ++ (__vector unsigned char)vzero, 4); ++ v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3, ++ (__vector unsigned char)vzero, 4); ++ v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4, ++ (__vector unsigned char)vzero, 4); ++ v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5, ++ (__vector unsigned char)vzero, 4); ++ v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6, ++ (__vector unsigned char)vzero, 4); ++ v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7, ++ (__vector unsigned char)vzero, 4); ++#endif ++ ++ /* xor with the last 1024 bits. */ ++ va0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(va0, va0, va0, vperm_const); ++ ++ va1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(va1, va1, va1, vperm_const); ++ ++ va2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(va2, va2, va2, vperm_const); ++ ++ va3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(va3, va3, va3, vperm_const); ++ ++ va4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(va4, va4, va4, vperm_const); ++ ++ va5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(va5, va5, va5, vperm_const); ++ ++ va6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(va6, va6, va6, vperm_const); ++ ++ va7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(va7, va7, va7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ vdata0 = vec_xor(v0, va0); ++ vdata1 = vec_xor(v1, va1); ++ vdata2 = vec_xor(v2, va2); ++ vdata3 = vec_xor(v3, va3); ++ vdata4 = vec_xor(v4, va4); ++ vdata5 = vec_xor(v5, va5); ++ vdata6 = vec_xor(v6, va6); ++ vdata7 = vec_xor(v7, va7); ++ ++ /* Check if we have more blocks to process */ ++ next_block = 0; ++ if (length != 0) { ++ next_block = 1; ++ ++ /* zero v0-v7 */ ++ v0 = vec_xor(v0, v0); ++ v1 = vec_xor(v1, v1); ++ v2 = vec_xor(v2, v2); ++ v3 = vec_xor(v3, v3); ++ v4 = vec_xor(v4, v4); ++ v5 = vec_xor(v5, v5); ++ v6 = vec_xor(v6, v6); ++ v7 = vec_xor(v7, v7); ++ } ++ length = length + 128; ++ ++ } while (next_block); ++ ++ /* Calculate how many bytes we have left. */ ++ length = (len & 127); ++ ++ /* Calculate where in (short) constant table we need to start. */ ++ offset = 128 - length; ++ ++ v0 = vec_ld(offset, vcrc_short_const); ++ v1 = vec_ld(offset + 16, vcrc_short_const); ++ v2 = vec_ld(offset + 32, vcrc_short_const); ++ v3 = vec_ld(offset + 48, vcrc_short_const); ++ v4 = vec_ld(offset + 64, vcrc_short_const); ++ v5 = vec_ld(offset + 80, vcrc_short_const); ++ v6 = vec_ld(offset + 96, vcrc_short_const); ++ v7 = vec_ld(offset + 112, vcrc_short_const); ++ ++ offset += 128; ++ ++ v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata0,(__vector unsigned int)v0); ++ v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata1,(__vector unsigned int)v1); ++ v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata2,(__vector unsigned int)v2); ++ v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata3,(__vector unsigned int)v3); ++ v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata4,(__vector unsigned int)v4); ++ v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata5,(__vector unsigned int)v5); ++ v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata6,(__vector unsigned int)v6); ++ v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata7,(__vector unsigned int)v7); ++ ++ /* Now reduce the tail (0-112 bytes). */ ++ for (i = 0; i < length; i+=16) { ++ vdata0 = vec_ld(i,(__vector unsigned long long*)p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ va0 = vec_ld(offset + i,vcrc_short_const); ++ va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata0,(__vector unsigned int)va0); ++ v0 = vec_xor(v0, va0); ++ } ++ ++ /* xor all parallel chunks together. */ ++ v0 = vec_xor(v0, v1); ++ v2 = vec_xor(v2, v3); ++ v4 = vec_xor(v4, v5); ++ v6 = vec_xor(v6, v7); ++ ++ v0 = vec_xor(v0, v2); ++ v4 = vec_xor(v4, v6); ++ ++ v0 = vec_xor(v0, v4); ++ } ++ ++ /* Barrett Reduction */ ++ vconst1 = vec_ld(0, v_Barrett_const); ++ vconst2 = vec_ld(16, v_Barrett_const); ++ ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)v0, 8); ++ v0 = vec_xor(v1,v0); ++ ++#ifdef REFLECT ++ /* shift left one bit */ ++ __vector unsigned char vsht_splat = vec_splat_u8 (1); ++ v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, ++ vsht_splat); ++#endif ++ ++ v0 = vec_and(v0, vmask_64bit); ++ ++#ifndef REFLECT ++ ++ /* ++ * Now for the actual algorithm. The idea is to calculate q, ++ * the multiple of our polynomial that we need to subtract. By ++ * doing the computation 2x bits higher (ie 64 bits) and shifting the ++ * result back down 2x bits, we round down to the nearest multiple. ++ */ ++ ++ /* ma */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0, ++ (__vector unsigned long long)vconst1); ++ /* q = floor(ma/(2^64)) */ ++ v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero, ++ (__vector unsigned char)v1, 8); ++ /* qn */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst2); ++ /* a - qn, subtraction is xor in GF(2) */ ++ v0 = vec_xor (v0, v1); ++ /* ++ * Get the result into r3. We need to shift it left 8 bytes: ++ * V0 [ 0 1 2 X ] ++ * V0 [ 0 X 2 3 ] ++ */ ++ result = __builtin_unpack_vector_1 (v0); ++#else ++ ++ /* ++ * The reflected version of Barrett reduction. Instead of bit ++ * reflecting our data (which is expensive to do), we bit reflect our ++ * constants and our algorithm, which means the intermediate data in ++ * our vector registers goes from 0-63 instead of 63-0. We can reflect ++ * the algorithm because we don't carry in mod 2 arithmetic. ++ */ ++ ++ /* bottom 32 bits of a */ ++ v1 = vec_and(v0, vmask_32bit); ++ ++ /* ma */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst1); ++ ++ /* bottom 32bits of ma */ ++ v1 = vec_and(v1, vmask_32bit); ++ /* qn */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst2); ++ /* a - qn, subtraction is xor in GF(2) */ ++ v0 = vec_xor (v0, v1); ++ ++ /* ++ * Since we are bit reflected, the result (ie the low 32 bits) is in ++ * the high 32 bits. We just need to shift it left 4 bytes ++ * V0 [ 0 1 X 3 ] ++ * V0 [ 0 X 2 3 ] ++ */ ++ ++ /* shift result into top 64 bits of */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ ++ result = __builtin_unpack_vector_0 (v0); ++#endif ++ ++ return result; ++} +diff --git a/contrib/power/crc32_z_resolver.c b/contrib/power/crc32_z_resolver.c +new file mode 100644 +index 000000000..f4e9aa491 +--- /dev/null ++++ b/contrib/power/crc32_z_resolver.c +@@ -0,0 +1,15 @@ ++/* Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../gcc/zifunc.h" ++#include "power.h" ++ ++Z_IFUNC(crc32_z) { ++#ifdef Z_POWER8 ++ if (__builtin_cpu_supports("arch_2_07")) ++ return _crc32_z_power8; ++#endif ++ ++ return crc32_z_default; ++} +diff --git a/contrib/power/power.h b/contrib/power/power.h +index b42c7d6c6..79123aa90 100644 +--- a/contrib/power/power.h ++++ b/contrib/power/power.h +@@ -2,3 +2,7 @@ + * 2019 Rogerio Alves , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ ++ ++#include "../../zconf.h" ++ ++unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t); +diff --git a/crc32.c b/crc32.c +index f8357b083..4e5830b86 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -745,6 +745,13 @@ local z_word_t crc_word_big(data) + #endif + + /* ========================================================================= */ ++#ifdef Z_POWER_OPT ++/* Rename function so resolver can use its symbol. The default version will be ++ * returned by the resolver if the host has no support for an optimized version. ++ */ ++#define crc32_z crc32_z_default ++#endif /* Z_POWER_OPT */ ++ + unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; +@@ -1066,6 +1073,11 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + return crc ^ 0xffffffff; + } + ++#ifdef Z_POWER_OPT ++#undef crc32_z ++#include "contrib/power/crc32_z_resolver.c" ++#endif /* Z_POWER_OPT */ ++ + #endif + + /* ========================================================================= */ +diff --git a/test/crc32_test.c b/test/crc32_test.c +new file mode 100644 +index 000000000..3155553e6 +--- /dev/null ++++ b/test/crc32_test.c +@@ -0,0 +1,205 @@ ++/* crc32_tes.c -- unit test for crc32 in the zlib compression library ++ * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zlib.h" ++#include ++ ++#ifdef STDC ++# include ++# include ++#endif ++ ++void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line)); ++int main OF((void)); ++ ++typedef struct { ++ int line; ++ uLong crc; ++ char* buf; ++ int len; ++ uLong expect; ++} crc32_test; ++ ++void test_crc32(crc, buf, len, chk, line) ++ uLong crc; ++ Byte *buf; ++ z_size_t len; ++ uLong chk; ++ int line; ++{ ++ uLong res = crc32(crc, buf, len); ++ if (res != chk) { ++ fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n", ++ line, (unsigned int)res, (unsigned int)chk); ++ exit(1); ++ } ++} ++ ++static const crc32_test tests[] = { ++ {__LINE__, 0x0, 0x0, 0, 0x0}, ++ {__LINE__, 0xffffffff, 0x0, 0, 0x0}, ++ {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */ ++ {__LINE__, 0x0, 0x0, 256, 0x0}, ++ {__LINE__, 0x0, 0x0, 257, 0x0}, ++ {__LINE__, 0x0, 0x0, 32767, 0x0}, ++ {__LINE__, 0x0, 0x0, 32768, 0x0}, ++ {__LINE__, 0x0, 0x0, 32769, 0x0}, ++ {__LINE__, 0x0, "", 0, 0x0}, ++ {__LINE__, 0xffffffff, "", 0, 0xffffffff}, ++ {__LINE__, 0x0, "abacus", 6, 0xc3d7115b}, ++ {__LINE__, 0x0, "backlog", 7, 0x269205}, ++ {__LINE__, 0x0, "campfire", 8, 0x22a515f8}, ++ {__LINE__, 0x0, "delta", 5, 0x9643fed9}, ++ {__LINE__, 0x0, "executable", 10, 0xd68eda01}, ++ {__LINE__, 0x0, "file", 4, 0x8c9f3610}, ++ {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd}, ++ {__LINE__, 0x0, "hello", 5, 0x3610a686}, ++ {__LINE__, 0x0, "inverter", 8, 0xc9e962c9}, ++ {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69}, ++ {__LINE__, 0x0, "karate", 6, 0x890be0e2}, ++ {__LINE__, 0x0, "landscape", 9, 0xc4e0330b}, ++ {__LINE__, 0x0, "machine", 7, 0x1505df84}, ++ {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39}, ++ {__LINE__, 0x0, "oblivion", 8, 0xdae9de77}, ++ {__LINE__, 0x0, "panama", 6, 0x66b8979c}, ++ {__LINE__, 0x0, "quest", 5, 0x4317f817}, ++ {__LINE__, 0x0, "resource", 8, 0xbc91f416}, ++ {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5}, ++ {__LINE__, 0x0, "test", 4, 0xd87f7e0c}, ++ {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b}, ++ {__LINE__, 0x0, "vector", 6, 0x1b6e485b}, ++ {__LINE__, 0x0, "walrus", 6, 0xbe769b97}, ++ {__LINE__, 0x0, "xeno", 4, 0xe7a06444}, ++ {__LINE__, 0x0, "yelling", 7, 0xfe3944e5}, ++ {__LINE__, 0x0, "zlib", 4, 0x73887d3a}, ++ {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1}, ++ {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e}, ++ {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76}, ++ {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a}, ++ {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d}, ++ {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5}, ++ {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11}, ++ {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac}, ++ {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb}, ++ {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9}, ++ {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37}, ++ {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0}, ++ {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29}, ++ {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8}, ++ {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192}, ++ {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0}, ++ {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7}, ++ {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546}, ++ {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c}, ++ {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca}, ++ {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7}, ++ {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5}, ++ {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba}, ++ {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8}, ++ {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662}, ++ {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2}, ++ {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b}, ++ {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937}, ++ {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f}, ++ {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4}, ++ {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4}, ++ {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631}, ++ {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99}, ++ {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac}, ++ {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9}, ++ {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf}, ++ {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac}, ++ {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388}, ++ {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d}, ++ {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5}, ++ {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9}, ++ {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777}, ++ {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048}, ++ {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84}, ++ {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1}, ++ {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca}, ++ {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1}, ++ {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254}, ++ {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b}, ++ {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b}, ++ {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5}, ++ {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109}, ++ {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41}, ++ {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644}, ++ {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636}, ++ {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb}, ++ {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b}, ++ {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4}, ++ {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36}, ++ {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925}, ++ {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db}, ++ {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8}, ++ {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d}, ++ {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef}, ++ {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5}, ++ {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38}, ++ {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127}, ++ {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0}, ++ {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274}, ++ {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870}, ++ {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd}, ++ {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc}, ++ {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178}, ++ {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070}, ++ {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0}, ++ {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72}, ++ {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620}, ++ {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d}, ++ {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24}, ++ {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df}, ++ {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30}, ++ {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9}, ++ {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7}, ++ {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d}, ++ {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7}, ++ {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7}, ++ {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2}, ++ {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461}, ++ {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a}, ++ {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12}, ++ {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c}, ++ {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1}, ++ {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2}, ++ {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1}, ++ {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83}, ++ {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81}, ++ {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404}, ++ {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010}, ++ {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3}, ++ {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f}, ++ {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de}, ++ {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59}, ++ {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f}, ++ {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac}, ++ {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66}, ++ {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7}, ++ {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a}, ++ {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060}, ++ {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312}, ++ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912}, ++ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B} ++}; ++ ++static const int test_size = sizeof(tests) / sizeof(tests[0]); ++ ++int main(void) ++{ ++ int i; ++ for (i = 0; i < test_size; i++) { ++ test_crc32(tests[i].crc, (Byte*) tests[i].buf, tests[i].len, ++ tests[i].expect, tests[i].line); ++ } ++ return 0; ++} + +From 8ef06a3a9f26205a08a12c324665d5620662264f Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Wed, 25 Mar 2020 12:16:41 -0300 +Subject: [PATCH 3/4] Fix clang's behavior on versions >= 7 + +Clang 7 changed the behavior of vec_xxpermdi in order to match GCC's +behavior. After this change, code that used to work on Clang 6 stopped +to work on Clang >= 7. + +Tested on Clang 6, 7, 8 and 9. + +Reference: https://bugs.llvm.org/show_bug.cgi?id=38192 + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + contrib/power/clang_workaround.h | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h +index b5e7dae01..915f7e528 100644 +--- a/contrib/power/clang_workaround.h ++++ b/contrib/power/clang_workaround.h +@@ -39,7 +39,12 @@ __vector unsigned long long __builtin_pack_vector (unsigned long __a, + return __v; + } + +-#ifndef vec_xxpermdi ++/* ++ * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same ++ * behavior of GCC. That means code adapted to Clang >= 7 does not work on ++ * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6. ++ */ ++#if !defined vec_xxpermdi || __clang_major__ <= 6 + + static inline + unsigned long __builtin_unpack_vector (__vector unsigned long long __v, +@@ -62,9 +67,9 @@ static inline + unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) + { + #if defined(__BIG_ENDIAN__) +- return vec_xxpermdi(__v, __v, 0x0)[1]; +- #else + return vec_xxpermdi(__v, __v, 0x0)[0]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x3)[0]; + #endif + } + +@@ -72,9 +77,9 @@ static inline + unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) + { + #if defined(__BIG_ENDIAN__) +- return vec_xxpermdi(__v, __v, 0x3)[1]; +- #else + return vec_xxpermdi(__v, __v, 0x3)[0]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x0)[0]; + #endif + } + #endif /* vec_xxpermdi */ + +From 113203437eda67261848b14b6c80a33ff7e33d34 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Wed, 18 Jul 2018 13:14:07 +0200 +Subject: [PATCH 4/4] Add support for IBM Z hardware-accelerated deflate + +IBM Z mainframes starting from version z15 provide DFLTCC instruction, +which implements deflate algorithm in hardware with estimated +compression and decompression performance orders of magnitude faster +than the current zlib and ratio comparable with that of level 1. + +This patch adds DFLTCC support to zlib. In order to enable it, the +following build commands should be used: + + $ ./configure --dfltcc + $ make + +When built like this, zlib would compress in hardware on level 1, and in +software on all other levels. Decompression will always happen in +hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to +make it used by default) one could either configure with +--dfltcc-level-mask=0x7e or set the environment variable +DFLTCC_LEVEL_MASK to 0x7e at run time. + +Two DFLTCC compression calls produce the same results only when they +both are made on machines of the same generation, and when the +respective buffers have the same offset relative to the start of the +page. Therefore care should be taken when using hardware compression +when reproducible results are desired. One such use case - reproducible +software builds - is handled explicitly: when SOURCE_DATE_EPOCH +environment variable is set, the hardware compression is disabled. + +DFLTCC does not support every single zlib feature, in particular: + + * inflate(Z_BLOCK) and inflate(Z_TREES) + * inflateMark() + * inflatePrime() + * inflateSyncPoint() + +When used, these functions will either switch to software, or, in case +this is not possible, gracefully fail. + +This patch tries to add DFLTCC support in the least intrusive way. +All SystemZ-specific code is placed into a separate file, but +unfortunately there is still a noticeable amount of changes in the +main zlib code. Below is the summary of these changes. + +DFLTCC takes as arguments a parameter block, an input buffer, an output +buffer and a window. Since DFLTCC requires parameter block to be +doubleword-aligned, and it's reasonable to allocate it alongside +deflate and inflate states, ZALLOC_STATE, ZFREE_STATE and ZCOPY_STATE +macros were introduced in order to encapsulate the allocation details. +The same is true for window, for which ZALLOC_WINDOW and +TRY_FREE_WINDOW macros were introduced. + +Software and hardware window formats do not match, therefore, +deflateSetDictionary(), deflateGetDictionary(), inflateSetDictionary() +and inflateGetDictionary() need special handling, which is triggered +using DEFLATE_SET_DICTIONARY_HOOK, DEFLATE_GET_DICTIONARY_HOOK, +INFLATE_SET_DICTIONARY_HOOK and INFLATE_GET_DICTIONARY_HOOK macros. + +deflateResetKeep() and inflateResetKeep() now update the DFLTCC +parameter block, which is allocated alongside zlib state, using +the new DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros. + +The new DEFLATE_PARAMS_HOOK switches between hardware and software +deflate implementations when deflateParams() arguments demand this. + +The new INFLATE_PRIME_HOOK, INFLATE_MARK_HOOK and +INFLATE_SYNC_POINT_HOOK macros make the respective unsupported calls +gracefully fail. + +The algorithm implemented in hardware has different compression ratio +than the one implemented in software. In order for deflateBound() to +return the correct results for the hardware implementation, the new +DEFLATE_BOUND_ADJUST_COMPLEN and DEFLATE_NEED_CONSERVATIVE_BOUND macros +were introduced. + +Actual compression and decompression are handled by the new DEFLATE_HOOK +and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the +window on its own, calling updatewindow() is suppressed using the new +INFLATE_NEED_UPDATEWINDOW() macro. + +In addition to compression, DFLTCC computes CRC-32 and Adler-32 +checksums, therefore, whenever it's used, software checksumming needs to +be suppressed using the new DEFLATE_NEED_CHECKSUM and +INFLATE_NEED_CHECKSUM macros. + +DFLTCC will refuse to write an End-of-block Symbol if there is no input +data, thus in some cases it is necessary to do this manually. In order +to achieve this, send_bits, bi_reverse, bi_windup and flush_pending +were promoted from local to ZLIB_INTERNAL. Furthermore, since block and +stream termination must be handled in software as well, block_state enum +was moved to deflate.h. + +Since the first call to dfltcc_inflate already needs the window, and it +might be not allocated yet, inflate_ensure_window was factored out of +updatewindow and made ZLIB_INTERNAL. +--- + Makefile.in | 8 + + compress.c | 14 +- + configure | 24 + + contrib/README.contrib | 4 + + contrib/s390/README.txt | 17 + + contrib/s390/dfltcc.c | 1089 +++++++++++++++++++++++++++++++++ + contrib/s390/dfltcc.h | 100 +++ + contrib/s390/dfltcc_deflate.h | 55 ++ + deflate.c | 82 ++- + deflate.h | 12 + + gzguts.h | 4 + + inflate.c | 97 ++- + inflate.h | 2 + + test/infcover.c | 4 +- + test/minigzip.c | 4 + + trees.c | 13 +- + zutil.h | 2 + + 17 files changed, 1469 insertions(+), 62 deletions(-) + create mode 100644 contrib/s390/README.txt + create mode 100644 contrib/s390/dfltcc.c + create mode 100644 contrib/s390/dfltcc.h + create mode 100644 contrib/s390/dfltcc_deflate.h + +diff --git a/Makefile.in b/Makefile.in +index 83d8ca47d..54c529b3b 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -139,6 +139,14 @@ match.lo: match.S + mv _match.o match.lo + rm -f _match.s + ++dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c ++ ++dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c ++ -@mv objs/dfltcc.o $@ ++ + crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c + +diff --git a/compress.c b/compress.c +index 2ad5326c1..179ee2773 100644 +--- a/compress.c ++++ b/compress.c +@@ -5,9 +5,15 @@ + + /* @(#) $Id$ */ + +-#define ZLIB_INTERNAL ++#include "zutil.h" + #include "zlib.h" + ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc.h" ++#else ++#define DEFLATE_BOUND_COMPLEN(source_len) 0 ++#endif ++ + /* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte +@@ -81,6 +87,12 @@ int ZEXPORT compress(dest, destLen, source, sourceLen) + uLong ZEXPORT compressBound(sourceLen) + uLong sourceLen; + { ++ uLong complen = DEFLATE_BOUND_COMPLEN(sourceLen); ++ ++ if (complen > 0) ++ /* Architecture-specific code provided an upper bound. */ ++ return complen + ZLIB_WRAPLEN; ++ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; + } +diff --git a/configure b/configure +index 45d51e596..ab3204a6d 100755 +--- a/configure ++++ b/configure +@@ -118,6 +118,7 @@ case "$1" in + echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log + echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log + echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log ++ echo ' [--dfltcc] [--dfltcc-level-mask=MASK]' | tee -a configure.log + exit 0 ;; + -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; + -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; +@@ -142,6 +143,16 @@ case "$1" in + -w* | --warn) warn=1; shift ;; + -d* | --debug) debug=1; shift ;; + --sanitize) sanitize=1; shift ;; ++ --dfltcc) ++ CFLAGS="$CFLAGS -DDFLTCC" ++ OBJC="$OBJC dfltcc.o" ++ PIC_OBJC="$PIC_OBJC dfltcc.lo" ++ shift ++ ;; ++ --dfltcc-level-mask=*) ++ CFLAGS="$CFLAGS -DDFLTCC_LEVEL_MASK=`echo $1 | sed 's/.*=//'`" ++ shift ++ ;; + *) + echo "unknown option: $1" | tee -a configure.log + echo "$0 --help for help" | tee -a configure.log +@@ -828,6 +839,19 @@ EOF + fi + fi + ++# Check whether sys/sdt.h is available ++cat > $test.c << EOF ++#include ++int main() { return 0; } ++EOF ++if try $CC -c $CFLAGS $test.c; then ++ echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log ++ CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H" ++ SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H" ++else ++ echo "Checking for sys/sdt.h ... No." | tee -a configure.log ++fi ++ + # test to see if we can use a gnu indirection function to detect and load optimized code at runtime + echo >> configure.log + cat > $test.c < + Small, low memory usage inflate. Also serves to provide an + unambiguous description of the deflate format. + ++s390/ by Ilya Leoshkevich ++ Hardware-accelerated deflate on IBM Z with DEFLATE CONVERSION CALL ++ instruction. ++ + testzlib/ by Gilles Vollant + Example of the use of zlib + +diff --git a/contrib/s390/README.txt b/contrib/s390/README.txt +new file mode 100644 +index 000000000..48be008bd +--- /dev/null ++++ b/contrib/s390/README.txt +@@ -0,0 +1,17 @@ ++IBM Z mainframes starting from version z15 provide DFLTCC instruction, ++which implements deflate algorithm in hardware with estimated ++compression and decompression performance orders of magnitude faster ++than the current zlib and ratio comparable with that of level 1. ++ ++This directory adds DFLTCC support. In order to enable it, the following ++build commands should be used: ++ ++ $ ./configure --dfltcc ++ $ make ++ ++When built like this, zlib would compress in hardware on level 1, and in ++software on all other levels. Decompression will always happen in ++hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to ++make it used by default) one could either configure with ++--dfltcc-level-mask=0x7e or set the environment variable ++DFLTCC_LEVEL_MASK to 0x7e at run time. +diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c +new file mode 100644 +index 000000000..b8c20bded +--- /dev/null ++++ b/contrib/s390/dfltcc.c +@@ -0,0 +1,1089 @@ ++/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ ++ ++/* ++ Use the following commands to build zlib with DFLTCC support: ++ ++ $ ./configure --dfltcc ++ $ make ++*/ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "../../zutil.h" ++#include "../../deflate.h" ++#include "../../inftrees.h" ++#include "../../inflate.h" ++#include "dfltcc.h" ++#include "dfltcc_deflate.h" ++#ifdef HAVE_SYS_SDT_H ++#include ++#endif ++ ++/* ++ C wrapper for the DEFLATE CONVERSION CALL instruction. ++ */ ++typedef enum { ++ DFLTCC_CC_OK = 0, ++ DFLTCC_CC_OP1_TOO_SHORT = 1, ++ DFLTCC_CC_OP2_TOO_SHORT = 2, ++ DFLTCC_CC_OP2_CORRUPT = 2, ++ DFLTCC_CC_AGAIN = 3, ++} dfltcc_cc; ++ ++#define DFLTCC_QAF 0 ++#define DFLTCC_GDHT 1 ++#define DFLTCC_CMPR 2 ++#define DFLTCC_XPND 4 ++#define HBT_CIRCULAR (1 << 7) ++#define HB_BITS 15 ++#define HB_SIZE (1 << HB_BITS) ++#define DFLTCC_FACILITY 151 ++ ++local inline dfltcc_cc dfltcc OF((int fn, void *param, ++ Bytef **op1, size_t *len1, ++ z_const Bytef **op2, size_t *len2, ++ void *hist)); ++local inline dfltcc_cc dfltcc(fn, param, op1, len1, op2, len2, hist) ++ int fn; ++ void *param; ++ Bytef **op1; ++ size_t *len1; ++ z_const Bytef **op2; ++ size_t *len2; ++ void *hist; ++{ ++ Bytef *t2 = op1 ? *op1 : NULL; ++ size_t t3 = len1 ? *len1 : 0; ++ z_const Bytef *t4 = op2 ? *op2 : NULL; ++ size_t t5 = len2 ? *len2 : 0; ++ register int r0 __asm__("r0") = fn; ++ register void *r1 __asm__("r1") = param; ++ register Bytef *r2 __asm__("r2") = t2; ++ register size_t r3 __asm__("r3") = t3; ++ register z_const Bytef *r4 __asm__("r4") = t4; ++ register size_t r5 __asm__("r5") = t5; ++ int cc; ++ ++ __asm__ volatile( ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_entry, ++ STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_exit, ++ STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ "ipm %[cc]\n" ++ : [r2] "+r" (r2) ++ , [r3] "+r" (r3) ++ , [r4] "+r" (r4) ++ , [r5] "+r" (r5) ++ , [cc] "=r" (cc) ++ : [r0] "r" (r0) ++ , [r1] "r" (r1) ++ , [hist] "r" (hist) ++#ifdef HAVE_SYS_SDT_H ++ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) ++#endif ++ : "cc", "memory"); ++ t2 = r2; t3 = r3; t4 = r4; t5 = r5; ++ ++ if (op1) ++ *op1 = t2; ++ if (len1) ++ *len1 = t3; ++ if (op2) ++ *op2 = t4; ++ if (len2) ++ *len2 = t5; ++ return (cc >> 28) & 3; ++} ++ ++/* ++ Parameter Block for Query Available Functions. ++ */ ++#define static_assert(c, msg) \ ++ __attribute__((unused)) \ ++ static char static_assert_failed_ ## msg[c ? 1 : -1] ++ ++struct dfltcc_qaf_param { ++ char fns[16]; ++ char reserved1[8]; ++ char fmts[2]; ++ char reserved2[6]; ++}; ++ ++static_assert(sizeof(struct dfltcc_qaf_param) == 32, ++ sizeof_struct_dfltcc_qaf_param_is_32); ++ ++local inline int is_bit_set OF((const char *bits, int n)); ++local inline int is_bit_set(bits, n) ++ const char *bits; ++ int n; ++{ ++ return bits[n / 8] & (1 << (7 - (n % 8))); ++} ++ ++local inline void clear_bit OF((char *bits, int n)); ++local inline void clear_bit(bits, n) ++ char *bits; ++ int n; ++{ ++ bits[n / 8] &= ~(1 << (7 - (n % 8))); ++} ++ ++#define DFLTCC_FMT0 0 ++ ++/* ++ Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. ++ */ ++#define CVT_CRC32 0 ++#define CVT_ADLER32 1 ++#define HTT_FIXED 0 ++#define HTT_DYNAMIC 1 ++ ++struct dfltcc_param_v0 { ++ uint16_t pbvn; /* Parameter-Block-Version Number */ ++ uint8_t mvn; /* Model-Version Number */ ++ uint8_t ribm; /* Reserved for IBM use */ ++ unsigned reserved32 : 31; ++ unsigned cf : 1; /* Continuation Flag */ ++ uint8_t reserved64[8]; ++ unsigned nt : 1; /* New Task */ ++ unsigned reserved129 : 1; ++ unsigned cvt : 1; /* Check Value Type */ ++ unsigned reserved131 : 1; ++ unsigned htt : 1; /* Huffman-Table Type */ ++ unsigned bcf : 1; /* Block-Continuation Flag */ ++ unsigned bcc : 1; /* Block Closing Control */ ++ unsigned bhf : 1; /* Block Header Final */ ++ unsigned reserved136 : 1; ++ unsigned reserved137 : 1; ++ unsigned dhtgc : 1; /* DHT Generation Control */ ++ unsigned reserved139 : 5; ++ unsigned reserved144 : 5; ++ unsigned sbb : 3; /* Sub-Byte Boundary */ ++ uint8_t oesc; /* Operation-Ending-Supplemental Code */ ++ unsigned reserved160 : 12; ++ unsigned ifs : 4; /* Incomplete-Function Status */ ++ uint16_t ifl; /* Incomplete-Function Length */ ++ uint8_t reserved192[8]; ++ uint8_t reserved256[8]; ++ uint8_t reserved320[4]; ++ uint16_t hl; /* History Length */ ++ unsigned reserved368 : 1; ++ uint16_t ho : 15; /* History Offset */ ++ uint32_t cv; /* Check Value */ ++ unsigned eobs : 15; /* End-of-block Symbol */ ++ unsigned reserved431: 1; ++ uint8_t eobl : 4; /* End-of-block Length */ ++ unsigned reserved436 : 12; ++ unsigned reserved448 : 4; ++ uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table ++ Length */ ++ uint8_t reserved464[6]; ++ uint8_t cdht[288]; ++ uint8_t reserved[32]; ++ uint8_t csb[1152]; ++}; ++ ++static_assert(sizeof(struct dfltcc_param_v0) == 1536, ++ sizeof_struct_dfltcc_param_v0_is_1536); ++ ++local z_const char *oesc_msg OF((char *buf, int oesc)); ++local z_const char *oesc_msg(buf, oesc) ++ char *buf; ++ int oesc; ++{ ++ if (oesc == 0x00) ++ return NULL; /* Successful completion */ ++ else { ++ sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); ++ return buf; ++ } ++} ++ ++/* ++ Extension of inflate_state and deflate_state. Must be doubleword-aligned. ++*/ ++struct dfltcc_state { ++ struct dfltcc_param_v0 param; /* Parameter block. */ ++ struct dfltcc_qaf_param af; /* Available functions. */ ++ uLong level_mask; /* Levels on which to use DFLTCC */ ++ uLong block_size; /* New block each X bytes */ ++ uLong block_threshold; /* New block after total_in > X */ ++ uLong dht_threshold; /* New block only if avail_in >= X */ ++ char msg[64]; /* Buffer for strm->msg */ ++}; ++ ++#define ALIGN_UP(p, size) \ ++ (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) ++ ++#define GET_DFLTCC_STATE(state) ((struct dfltcc_state FAR *)( \ ++ (char FAR *)(state) + ALIGN_UP(sizeof(*state), 8))) ++ ++/* ++ Compress. ++ */ ++local inline int dfltcc_can_deflate_with_params(z_streamp strm, ++ int level, ++ uInt window_bits, ++ int strategy); ++local inline int dfltcc_can_deflate_with_params(strm, ++ level, ++ window_bits, ++ strategy) ++ z_streamp strm; ++ int level; ++ uInt window_bits; ++ int strategy; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ ++ /* Unsupported compression settings */ ++ if ((dfltcc_state->level_mask & (1 << level)) == 0) ++ return 0; ++ if (window_bits != HB_BITS) ++ return 0; ++ if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY) ++ return 0; ++ ++ /* Unsupported hardware */ ++ if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || ++ !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || ++ !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) ++ return 0; ++ ++ return 1; ++} ++ ++int ZLIB_INTERNAL dfltcc_can_deflate(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ ++ return dfltcc_can_deflate_with_params(strm, ++ state->level, ++ state->w_bits, ++ state->strategy); ++} ++ ++local void dfltcc_gdht OF((z_streamp strm)); ++local void dfltcc_gdht(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = avail_in = strm->avail_in; ++ ++ dfltcc(DFLTCC_GDHT, ++ param, NULL, NULL, ++ &strm->next_in, &avail_in, NULL); ++} ++ ++local dfltcc_cc dfltcc_cmpr OF((z_streamp strm)); ++local dfltcc_cc dfltcc_cmpr(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = strm->avail_in; ++ size_t avail_out = strm->avail_out; ++ dfltcc_cc cc; ++ ++ cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, ++ param, &strm->next_out, &avail_out, ++ &strm->next_in, &avail_in, state->window); ++ strm->total_in += (strm->avail_in - avail_in); ++ strm->total_out += (strm->avail_out - avail_out); ++ strm->avail_in = avail_in; ++ strm->avail_out = avail_out; ++ return cc; ++} ++ ++local void send_eobs OF((z_streamp strm, ++ z_const struct dfltcc_param_v0 FAR *param)); ++local void send_eobs(strm, param) ++ z_streamp strm; ++ z_const struct dfltcc_param_v0 FAR *param; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ ++ _tr_send_bits( ++ state, ++ bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), ++ param->eobl); ++ flush_pending(strm); ++ if (state->pending != 0) { ++ /* The remaining data is located in pending_out[0:pending]. If someone ++ * calls put_byte() - this might happen in deflate() - the byte will be ++ * placed into pending_buf[pending], which is incorrect. Move the ++ * remaining data to the beginning of pending_buf so that put_byte() is ++ * usable again. ++ */ ++ memmove(state->pending_buf, state->pending_out, state->pending); ++ state->pending_out = state->pending_buf; ++ } ++#ifdef ZLIB_DEBUG ++ state->compressed_len += param->eobl; ++#endif ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate(strm, flush, result) ++ z_streamp strm; ++ int flush; ++ block_state *result; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ uInt masked_avail_in; ++ dfltcc_cc cc; ++ int need_empty_block; ++ int soft_bcc; ++ int no_flush; ++ ++ if (!dfltcc_can_deflate(strm)) { ++ /* Clear history. */ ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; ++ return 0; ++ } ++ ++again: ++ masked_avail_in = 0; ++ soft_bcc = 0; ++ no_flush = flush == Z_NO_FLUSH; ++ ++ /* No input data. Return, except when Continuation Flag is set, which means ++ * that DFLTCC has buffered some output in the parameter block and needs to ++ * be called again in order to flush it. ++ */ ++ if (strm->avail_in == 0 && !param->cf) { ++ /* A block is still open, and the hardware does not support closing ++ * blocks without adding data. Thus, close it manually. ++ */ ++ if (!no_flush && param->bcf) { ++ send_eobs(strm, param); ++ param->bcf = 0; ++ } ++ /* Let one of deflate_* functions write a trailing empty block. */ ++ if (flush == Z_FINISH) ++ return 0; ++ /* Clear history. */ ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; ++ /* Trigger block post-processing if necessary. */ ++ *result = no_flush ? need_more : block_done; ++ return 1; ++ } ++ ++ /* There is an open non-BFINAL block, we are not going to close it just ++ * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see ++ * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new ++ * DHT in order to adapt to a possibly changed input data distribution. ++ */ ++ if (param->bcf && no_flush && ++ strm->total_in > dfltcc_state->block_threshold && ++ strm->avail_in >= dfltcc_state->dht_threshold) { ++ if (param->cf) { ++ /* We need to flush the DFLTCC buffer before writing the ++ * End-of-block Symbol. Mask the input data and proceed as usual. ++ */ ++ masked_avail_in += strm->avail_in; ++ strm->avail_in = 0; ++ no_flush = 0; ++ } else { ++ /* DFLTCC buffer is empty, so we can manually write the ++ * End-of-block Symbol right away. ++ */ ++ send_eobs(strm, param); ++ param->bcf = 0; ++ dfltcc_state->block_threshold = ++ strm->total_in + dfltcc_state->block_size; ++ } ++ } ++ ++ /* No space for compressed data. If we proceed, dfltcc_cmpr() will return ++ * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still ++ * set BCF=1, which is wrong. Avoid complications and return early. ++ */ ++ if (strm->avail_out == 0) { ++ *result = need_more; ++ return 1; ++ } ++ ++ /* The caller gave us too much data. Pass only one block worth of ++ * uncompressed data to DFLTCC and mask the rest, so that on the next ++ * iteration we start a new block. ++ */ ++ if (no_flush && strm->avail_in > dfltcc_state->block_size) { ++ masked_avail_in += (strm->avail_in - dfltcc_state->block_size); ++ strm->avail_in = dfltcc_state->block_size; ++ } ++ ++ /* When we have an open non-BFINAL deflate block and caller indicates that ++ * the stream is ending, we need to close an open deflate block and open a ++ * BFINAL one. ++ */ ++ need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; ++ ++ /* Translate stream to parameter block */ ++ param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; ++ if (!no_flush) ++ /* We need to close a block. Always do this in software - when there is ++ * no input data, the hardware will not honor BCC. */ ++ soft_bcc = 1; ++ if (flush == Z_FINISH && !param->bcf) ++ /* We are about to open a BFINAL block, set Block Header Final bit ++ * until the stream ends. ++ */ ++ param->bhf = 1; ++ /* DFLTCC-CMPR will write to next_out, so make sure that buffers with ++ * higher precedence are empty. ++ */ ++ Assert(state->pending == 0, "There must be no pending bytes"); ++ Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); ++ param->sbb = (unsigned int)state->bi_valid; ++ if (param->sbb > 0) ++ *strm->next_out = (Bytef)state->bi_buf; ++ /* Honor history and check value */ ++ param->nt = 0; ++ if (state->wrap == 1) ++ param->cv = strm->adler; ++ else if (state->wrap == 2) ++ param->cv = ZSWAP32(strm->adler); ++ ++ /* When opening a block, choose a Huffman-Table Type */ ++ if (!param->bcf) { ++ if (state->strategy == Z_FIXED || ++ (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) ++ param->htt = HTT_FIXED; ++ else { ++ param->htt = HTT_DYNAMIC; ++ dfltcc_gdht(strm); ++ } ++ } ++ ++ /* Deflate */ ++ do { ++ cc = dfltcc_cmpr(strm); ++ if (strm->avail_in < 4096 && masked_avail_in > 0) ++ /* We are about to call DFLTCC with a small input buffer, which is ++ * inefficient. Since there is masked data, there will be at least ++ * one more DFLTCC call, so skip the current one and make the next ++ * one handle more data. ++ */ ++ break; ++ } while (cc == DFLTCC_CC_AGAIN); ++ ++ /* Translate parameter block to stream */ ++ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); ++ state->bi_valid = param->sbb; ++ if (state->bi_valid == 0) ++ state->bi_buf = 0; /* Avoid accessing next_out */ ++ else ++ state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); ++ if (state->wrap == 1) ++ strm->adler = param->cv; ++ else if (state->wrap == 2) ++ strm->adler = ZSWAP32(param->cv); ++ ++ /* Unmask the input data */ ++ strm->avail_in += masked_avail_in; ++ masked_avail_in = 0; ++ ++ /* If we encounter an error, it means there is a bug in DFLTCC call */ ++ Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); ++ ++ /* Update Block-Continuation Flag. It will be used to check whether to call ++ * GDHT the next time. ++ */ ++ if (cc == DFLTCC_CC_OK) { ++ if (soft_bcc) { ++ send_eobs(strm, param); ++ param->bcf = 0; ++ dfltcc_state->block_threshold = ++ strm->total_in + dfltcc_state->block_size; ++ } else ++ param->bcf = 1; ++ if (flush == Z_FINISH) { ++ if (need_empty_block) ++ /* Make the current deflate() call also close the stream */ ++ return 0; ++ else { ++ bi_windup(state); ++ *result = finish_done; ++ } ++ } else { ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; /* Clear history */ ++ *result = flush == Z_NO_FLUSH ? need_more : block_done; ++ } ++ } else { ++ param->bcf = 1; ++ *result = need_more; ++ } ++ if (strm->avail_in != 0 && strm->avail_out != 0) ++ goto again; /* deflate() must use all input or all output */ ++ return 1; ++} ++ ++/* ++ Expand. ++ */ ++int ZLIB_INTERNAL dfltcc_can_inflate(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ ++ /* Unsupported hardware */ ++ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && ++ is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); ++} ++ ++local dfltcc_cc dfltcc_xpnd OF((z_streamp strm)); ++local dfltcc_cc dfltcc_xpnd(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = strm->avail_in; ++ size_t avail_out = strm->avail_out; ++ dfltcc_cc cc; ++ ++ cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, ++ param, &strm->next_out, &avail_out, ++ &strm->next_in, &avail_in, state->window); ++ strm->avail_in = avail_in; ++ strm->avail_out = avail_out; ++ return cc; ++} ++ ++dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret) ++ z_streamp strm; ++ int flush; ++ int *ret; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ dfltcc_cc cc; ++ ++ if (flush == Z_BLOCK || flush == Z_TREES) { ++ /* DFLTCC does not support stopping on block boundaries */ ++ if (dfltcc_inflate_disable(strm)) { ++ *ret = Z_STREAM_ERROR; ++ return DFLTCC_INFLATE_BREAK; ++ } else ++ return DFLTCC_INFLATE_SOFTWARE; ++ } ++ ++ if (state->last) { ++ if (state->bits != 0) { ++ strm->next_in++; ++ strm->avail_in--; ++ state->bits = 0; ++ } ++ state->mode = CHECK; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ ++ if (strm->avail_in == 0 && !param->cf) ++ return DFLTCC_INFLATE_BREAK; ++ ++ if (inflate_ensure_window(state)) { ++ state->mode = MEM; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ ++ /* Translate stream to parameter block */ ++ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; ++ param->sbb = state->bits; ++ if (param->hl) ++ param->nt = 0; /* Honor history for the first block */ ++ if (state->wrap & 4) ++ param->cv = state->flags ? ZSWAP32(state->check) : state->check; ++ ++ /* Inflate */ ++ do { ++ cc = dfltcc_xpnd(strm); ++ } while (cc == DFLTCC_CC_AGAIN); ++ ++ /* Translate parameter block to stream */ ++ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); ++ state->last = cc == DFLTCC_CC_OK; ++ state->bits = param->sbb; ++ if (state->wrap & 4) ++ strm->adler = state->check = state->flags ? ++ ZSWAP32(param->cv) : param->cv; ++ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { ++ /* Report an error if stream is corrupted */ ++ state->mode = BAD; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ state->mode = TYPEDO; ++ /* Break if operands are exhausted, otherwise continue looping */ ++ return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? ++ DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; ++} ++ ++int ZLIB_INTERNAL dfltcc_was_inflate_used(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ ++ return !param->nt; ++} ++ ++/* ++ Rotates a circular buffer. ++ The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ ++ */ ++local void rotate OF((Bytef *start, Bytef *pivot, Bytef *end)); ++local void rotate(start, pivot, end) ++ Bytef *start; ++ Bytef *pivot; ++ Bytef *end; ++{ ++ Bytef *p = pivot; ++ Bytef tmp; ++ ++ while (p != start) { ++ tmp = *start; ++ *start = *p; ++ *p = tmp; ++ ++ start++; ++ p++; ++ ++ if (p == end) ++ p = pivot; ++ else if (start == pivot) ++ pivot = p; ++ } ++} ++ ++#define MIN(x, y) ({ \ ++ typeof(x) _x = (x); \ ++ typeof(y) _y = (y); \ ++ _x < _y ? _x : _y; \ ++}) ++ ++#define MAX(x, y) ({ \ ++ typeof(x) _x = (x); \ ++ typeof(y) _y = (y); \ ++ _x > _y ? _x : _y; \ ++}) ++ ++int ZLIB_INTERNAL dfltcc_inflate_disable(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (!dfltcc_can_inflate(strm)) ++ return 0; ++ if (dfltcc_was_inflate_used(strm)) ++ /* DFLTCC has already decompressed some data. Since there is not ++ * enough information to resume decompression in software, the call ++ * must fail. ++ */ ++ return 1; ++ /* DFLTCC was not used yet - decompress in software */ ++ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ /* Convert the window from the hardware to the software format */ ++ rotate(state->window, state->window + param->ho, state->window + HB_SIZE); ++ state->whave = state->wnext = MIN(param->hl, state->wsize); ++ return 0; ++} ++ ++local int env_dfltcc_disabled; ++local int env_source_date_epoch; ++local unsigned long env_level_mask; ++local unsigned long env_block_size; ++local unsigned long env_block_threshold; ++local unsigned long env_dht_threshold; ++local unsigned long env_ribm; ++local uint64_t cpu_facilities[(DFLTCC_FACILITY / 64) + 1]; ++local struct dfltcc_qaf_param cpu_af __attribute__((aligned(8))); ++ ++local inline int is_dfltcc_enabled OF((void)); ++local inline int is_dfltcc_enabled(void) ++{ ++ if (env_dfltcc_disabled) ++ /* User has explicitly disabled DFLTCC. */ ++ return 0; ++ ++ return is_bit_set((const char *)cpu_facilities, DFLTCC_FACILITY); ++} ++ ++local unsigned long xstrtoul OF((const char *s, unsigned long _default)); ++local unsigned long xstrtoul(s, _default) ++ const char *s; ++ unsigned long _default; ++{ ++ char *endptr; ++ unsigned long result; ++ ++ if (!(s && *s)) ++ return _default; ++ errno = 0; ++ result = strtoul(s, &endptr, 0); ++ return (errno || *endptr) ? _default : result; ++} ++ ++__attribute__((constructor)) local void init_globals OF((void)); ++__attribute__((constructor)) local void init_globals(void) ++{ ++ const char *env; ++ register char r0 __asm__("r0"); ++ ++ env = secure_getenv("DFLTCC"); ++ env_dfltcc_disabled = env && !strcmp(env, "0"); ++ ++ env = secure_getenv("SOURCE_DATE_EPOCH"); ++ env_source_date_epoch = !!env; ++ ++#ifndef DFLTCC_LEVEL_MASK ++#define DFLTCC_LEVEL_MASK 0x2 ++#endif ++ env_level_mask = xstrtoul(secure_getenv("DFLTCC_LEVEL_MASK"), ++ DFLTCC_LEVEL_MASK); ++ ++#ifndef DFLTCC_BLOCK_SIZE ++#define DFLTCC_BLOCK_SIZE 1048576 ++#endif ++ env_block_size = xstrtoul(secure_getenv("DFLTCC_BLOCK_SIZE"), ++ DFLTCC_BLOCK_SIZE); ++ ++#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE ++#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 ++#endif ++ env_block_threshold = xstrtoul(secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"), ++ DFLTCC_FIRST_FHT_BLOCK_SIZE); ++ ++#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE ++#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 ++#endif ++ env_dht_threshold = xstrtoul(secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"), ++ DFLTCC_DHT_MIN_SAMPLE_SIZE); ++ ++#ifndef DFLTCC_RIBM ++#define DFLTCC_RIBM 0 ++#endif ++ env_ribm = xstrtoul(secure_getenv("DFLTCC_RIBM"), DFLTCC_RIBM); ++ ++ memset(cpu_facilities, 0, sizeof(cpu_facilities)); ++ r0 = sizeof(cpu_facilities) / sizeof(cpu_facilities[0]) - 1; ++ /* STFLE is supported since z9-109 and only in z/Architecture mode. When ++ * compiling with -m31, gcc defaults to ESA mode, however, since the kernel ++ * is 64-bit, it's always z/Architecture mode at runtime. ++ */ ++ __asm__ volatile( ++#ifndef __clang__ ++ ".machinemode push\n" ++ ".machinemode zarch\n" ++#endif ++ "stfle %[facilities]\n" ++#ifndef __clang__ ++ ".machinemode pop\n" ++#endif ++ : [facilities] "=Q" (cpu_facilities) ++ , [r0] "+r" (r0) ++ : ++ : "cc"); ++ ++ /* Initialize available functions */ ++ if (is_dfltcc_enabled()) ++ dfltcc(DFLTCC_QAF, &cpu_af, NULL, NULL, NULL, NULL, NULL); ++ else ++ memset(&cpu_af, 0, sizeof(cpu_af)); ++} ++ ++/* ++ Memory management. ++ ++ DFLTCC requires parameter blocks and window to be aligned. zlib allows ++ users to specify their own allocation functions, so using e.g. ++ `posix_memalign' is not an option. Thus, we overallocate and take the ++ aligned portion of the buffer. ++*/ ++void ZLIB_INTERNAL dfltcc_reset(strm, size) ++ z_streamp strm; ++ uInt size; ++{ ++ struct dfltcc_state *dfltcc_state = ++ (struct dfltcc_state *)((char FAR *)strm->state + ALIGN_UP(size, 8)); ++ ++ memcpy(&dfltcc_state->af, &cpu_af, sizeof(dfltcc_state->af)); ++ ++ if (env_source_date_epoch) ++ /* User needs reproducible results, but the output of DFLTCC_CMPR ++ * depends on buffers' page offsets. ++ */ ++ clear_bit(dfltcc_state->af.fns, DFLTCC_CMPR); ++ ++ /* Initialize parameter block */ ++ memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); ++ dfltcc_state->param.nt = 1; ++ ++ /* Initialize tuning parameters */ ++ dfltcc_state->level_mask = env_level_mask; ++ dfltcc_state->block_size = env_block_size; ++ dfltcc_state->block_threshold = env_block_threshold; ++ dfltcc_state->dht_threshold = env_dht_threshold; ++ dfltcc_state->param.ribm = env_ribm; ++} ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_state(strm, items, size) ++ z_streamp strm; ++ uInt items; ++ uInt size; ++{ ++ return ZALLOC(strm, ++ ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), ++ sizeof(unsigned char)); ++} ++ ++void ZLIB_INTERNAL dfltcc_copy_state(dst, src, size) ++ voidpf dst; ++ const voidpf src; ++ uInt size; ++{ ++ zmemcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state)); ++} ++ ++static const int PAGE_ALIGN = 0x1000; ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size) ++ z_streamp strm; ++ uInt items; ++ uInt size; ++{ ++ voidpf p, w; ++ ++ /* To simplify freeing, we store the pointer to the allocated buffer right ++ * before the window. Note that DFLTCC always uses HB_SIZE bytes. ++ */ ++ p = ZALLOC(strm, sizeof(voidpf) + MAX(items * size, HB_SIZE) + PAGE_ALIGN, ++ sizeof(unsigned char)); ++ if (p == NULL) ++ return NULL; ++ w = ALIGN_UP((char FAR *)p + sizeof(voidpf), PAGE_ALIGN); ++ *(voidpf *)((char FAR *)w - sizeof(voidpf)) = p; ++ return w; ++} ++ ++void ZLIB_INTERNAL dfltcc_copy_window(dest, src, n) ++ void *dest; ++ const void *src; ++ size_t n; ++{ ++ memcpy(dest, src, MAX(n, HB_SIZE)); ++} ++ ++void ZLIB_INTERNAL dfltcc_free_window(strm, w) ++ z_streamp strm; ++ voidpf w; ++{ ++ if (w) ++ ZFREE(strm, *(voidpf *)((unsigned char FAR *)w - sizeof(voidpf))); ++} ++ ++/* ++ Switching between hardware and software compression. ++ ++ DFLTCC does not support all zlib settings, e.g. generation of non-compressed ++ blocks or alternative window sizes. When such settings are applied on the ++ fly with deflateParams, we need to convert between hardware and software ++ window formats. ++*/ ++int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy, flush) ++ z_streamp strm; ++ int level; ++ int strategy; ++ int *flush; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ int could_deflate = dfltcc_can_deflate(strm); ++ int can_deflate = dfltcc_can_deflate_with_params(strm, ++ level, ++ state->w_bits, ++ strategy); ++ ++ if (can_deflate == could_deflate) ++ /* We continue to work in the same mode - no changes needed */ ++ return Z_OK; ++ ++ if (strm->total_in == 0 && param->nt == 1 && param->hl == 0) ++ /* DFLTCC was not used yet - no changes needed */ ++ return Z_OK; ++ ++ /* For now, do not convert between window formats - simply get rid of the ++ * old data instead. ++ */ ++ *flush = Z_FULL_FLUSH; ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate_done(strm, flush) ++ z_streamp strm; ++ int flush; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might ++ * close the block without resetting the compression state. Detect this ++ * situation and return that deflation is not done. ++ */ ++ if (flush == Z_FULL_FLUSH && strm->avail_out == 0) ++ return 0; ++ ++ /* Return that deflation is not done if DFLTCC is used and either it ++ * buffered some data (Continuation Flag is set), or has not written EOBS ++ * yet (Block-Continuation Flag is set). ++ */ ++ return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf); ++} ++ ++/* ++ Preloading history. ++*/ ++local void append_history OF((struct dfltcc_param_v0 FAR *param, ++ Bytef *history, ++ const Bytef *buf, ++ uInt count)); ++local void append_history(param, history, buf, count) ++ struct dfltcc_param_v0 FAR *param; ++ Bytef *history; ++ const Bytef *buf; ++ uInt count; ++{ ++ size_t offset; ++ size_t n; ++ ++ /* Do not use more than 32K */ ++ if (count > HB_SIZE) { ++ buf += count - HB_SIZE; ++ count = HB_SIZE; ++ } ++ offset = (param->ho + param->hl) % HB_SIZE; ++ if (offset + count <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ zmemcpy(history + offset, buf, count); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ n = HB_SIZE - offset; ++ zmemcpy(history + offset, buf, n); ++ zmemcpy(history, buf + n, count - n); ++ } ++ n = param->hl + count; ++ if (n <= HB_SIZE) ++ /* All history fits into buffer - no need to discard anything */ ++ param->hl = n; ++ else { ++ /* History does not fit into buffer - discard extra bytes */ ++ param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; ++ param->hl = HB_SIZE; ++ } ++} ++ ++local void get_history OF((struct dfltcc_param_v0 FAR *param, ++ const Bytef *history, ++ Bytef *buf)); ++local void get_history(param, history, buf) ++ struct dfltcc_param_v0 FAR *param; ++ const Bytef *history; ++ Bytef *buf; ++{ ++ if (param->ho + param->hl <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ memcpy(buf, history + param->ho, param->hl); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ memcpy(buf, history + param->ho, HB_SIZE - param->ho); ++ memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); ++ } ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ const Bytef *dictionary; ++ uInt dict_length; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ append_history(param, state->window, dictionary, dict_length); ++ state->strstart = 1; /* Add FDICT to zlib header */ ++ state->block_start = state->strstart; /* Make deflate_stored happy */ ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ Bytef *dictionary; ++ uInt *dict_length; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ if (dictionary) ++ get_history(param, state->window, dictionary); ++ if (dict_length) ++ *dict_length = param->hl; ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ const Bytef *dictionary; ++ uInt dict_length; ++{ ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (inflate_ensure_window(state)) { ++ state->mode = MEM; ++ return Z_MEM_ERROR; ++ } ++ ++ append_history(param, state->window, dictionary, dict_length); ++ state->havedict = 1; ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ Bytef *dictionary; ++ uInt *dict_length; ++{ ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (dictionary && state->window) ++ get_history(param, state->window, dictionary); ++ if (dict_length) ++ *dict_length = param->hl; ++ return Z_OK; ++} +diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h +new file mode 100644 +index 000000000..be28b8ad4 +--- /dev/null ++++ b/contrib/s390/dfltcc.h +@@ -0,0 +1,100 @@ ++#ifndef DFLTCC_H ++#define DFLTCC_H ++ ++#include "../../zlib.h" ++#include "../../zutil.h" ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_state OF((z_streamp strm, uInt items, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size)); ++voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_copy_window OF((void *dest, const void *src, ++ size_t n)); ++void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w)); ++#define DFLTCC_BLOCK_HEADER_BITS 3 ++#define DFLTCC_HLITS_COUNT_BITS 5 ++#define DFLTCC_HDISTS_COUNT_BITS 5 ++#define DFLTCC_HCLENS_COUNT_BITS 4 ++#define DFLTCC_MAX_HCLENS 19 ++#define DFLTCC_HCLEN_BITS 3 ++#define DFLTCC_MAX_HLITS 286 ++#define DFLTCC_MAX_HDISTS 30 ++#define DFLTCC_MAX_HLIT_HDIST_BITS 7 ++#define DFLTCC_MAX_SYMBOL_BITS 16 ++#define DFLTCC_MAX_EOBS_BITS 15 ++#define DFLTCC_MAX_PADDING_BITS 7 ++#define DEFLATE_BOUND_COMPLEN(source_len) \ ++ ((DFLTCC_BLOCK_HEADER_BITS + \ ++ DFLTCC_HLITS_COUNT_BITS + \ ++ DFLTCC_HDISTS_COUNT_BITS + \ ++ DFLTCC_HCLENS_COUNT_BITS + \ ++ DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \ ++ (DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \ ++ (source_len) * DFLTCC_MAX_SYMBOL_BITS + \ ++ DFLTCC_MAX_EOBS_BITS + \ ++ DFLTCC_MAX_PADDING_BITS) >> 3) ++int ZLIB_INTERNAL dfltcc_can_inflate OF((z_streamp strm)); ++typedef enum { ++ DFLTCC_INFLATE_CONTINUE, ++ DFLTCC_INFLATE_BREAK, ++ DFLTCC_INFLATE_SOFTWARE, ++} dfltcc_inflate_action; ++dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm, ++ int flush, int *ret)); ++int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_inflate_set_dictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dict_length)); ++int ZLIB_INTERNAL dfltcc_inflate_get_dictionary OF((z_streamp strm, ++ Bytef *dictionary, ++ uInt* dict_length)); ++ ++#define ZALLOC_STATE dfltcc_alloc_state ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE dfltcc_copy_state ++#define ZALLOC_WINDOW dfltcc_alloc_window ++#define ZCOPY_WINDOW dfltcc_copy_window ++#define ZFREE_WINDOW dfltcc_free_window ++#define TRY_FREE_WINDOW dfltcc_free_window ++#define INFLATE_RESET_KEEP_HOOK(strm) \ ++ dfltcc_reset((strm), sizeof(struct inflate_state)) ++#define INFLATE_PRIME_HOOK(strm, bits, value) \ ++ do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0) ++#define INFLATE_TYPEDO_HOOK(strm, flush) \ ++ if (dfltcc_can_inflate((strm))) { \ ++ dfltcc_inflate_action action; \ ++\ ++ RESTORE(); \ ++ action = dfltcc_inflate((strm), (flush), &ret); \ ++ LOAD(); \ ++ if (action == DFLTCC_INFLATE_CONTINUE) \ ++ break; \ ++ else if (action == DFLTCC_INFLATE_BREAK) \ ++ goto inf_leave; \ ++ } ++#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_MARK_HOOK(strm) \ ++ do { \ ++ if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ ++ } while (0) ++#define INFLATE_SYNC_POINT_HOOK(strm) \ ++ do { \ ++ if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ ++ } while (0) ++#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_inflate(strm)) \ ++ return dfltcc_inflate_set_dictionary(strm, dict, dict_len); \ ++ } while (0) ++#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_inflate(strm)) \ ++ return dfltcc_inflate_get_dictionary(strm, dict, dict_len); \ ++ } while (0) ++ ++#endif +diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h +new file mode 100644 +index 000000000..46acfc550 +--- /dev/null ++++ b/contrib/s390/dfltcc_deflate.h +@@ -0,0 +1,55 @@ ++#ifndef DFLTCC_DEFLATE_H ++#define DFLTCC_DEFLATE_H ++ ++#include "dfltcc.h" ++ ++int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm, ++ int flush, ++ block_state *result)); ++int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, ++ int level, ++ int strategy, ++ int *flush)); ++int ZLIB_INTERNAL dfltcc_deflate_done OF((z_streamp strm, int flush)); ++int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dict_length)); ++int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, ++ Bytef *dictionary, ++ uInt* dict_length)); ++ ++#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_RESET_KEEP_HOOK(strm) \ ++ dfltcc_reset((strm), sizeof(deflate_state)) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ ++ do { \ ++ int err; \ ++\ ++ err = dfltcc_deflate_params((strm), \ ++ (level), \ ++ (strategy), \ ++ (hook_flush)); \ ++ if (err == Z_STREAM_ERROR) \ ++ return err; \ ++ } while (0) ++#define DEFLATE_DONE dfltcc_deflate_done ++#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ ++ } while (0) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) ++#define DEFLATE_HOOK dfltcc_deflate ++#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) ++ ++#endif +diff --git a/deflate.c b/deflate.c +index 4a512e1f9..ba446d982 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -61,15 +61,30 @@ const char deflate_copyright[] = + */ + + /* =========================================================================== +- * Function prototypes. ++ * Architecture-specific bits. + */ +-typedef enum { +- need_more, /* block not completed, need more input or more output */ +- block_done, /* block flush performed */ +- finish_started, /* finish started, need only more output at next deflate */ +- finish_done /* finish done, accept no more input or output */ +-} block_state; ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc_deflate.h" ++#else ++#define ZALLOC_STATE ZALLOC ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE zmemcpy ++#define ZALLOC_WINDOW ZALLOC ++#define TRY_FREE_WINDOW TRY_FREE ++#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) ++#define DEFLATE_DONE(strm, flush) 1 ++#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 ++#define DEFLATE_HOOK(strm, flush, bstate) 0 ++#define DEFLATE_NEED_CHECKSUM(strm) 1 ++#endif + ++/* =========================================================================== ++ * Function prototypes. ++ */ + typedef block_state (*compress_func) OF((deflate_state *s, int flush)); + /* Compression function. Returns the block state after the call. */ + +@@ -85,7 +100,6 @@ local block_state deflate_rle OF((deflate_state *s, int flush)); + local block_state deflate_huff OF((deflate_state *s, int flush)); + local void lm_init OF((deflate_state *s)); + local void putShortMSB OF((deflate_state *s, uInt b)); +-local void flush_pending OF((z_streamp strm)); + local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); + local uInt longest_match OF((deflate_state *s, IPos cur_match)); + +@@ -295,7 +309,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ +- s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); ++ s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; +@@ -312,7 +326,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH); + +- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); ++ s->window = (Bytef *) ZALLOC_WINDOW(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + +@@ -430,6 +444,7 @@ int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength) + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); ++ DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ +@@ -488,6 +503,7 @@ int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength) + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; ++ DEFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) +@@ -534,6 +550,8 @@ int ZEXPORT deflateResetKeep(strm) + + _tr_init(s); + ++ DEFLATE_RESET_KEEP_HOOK(strm); ++ + return Z_OK; + } + +@@ -609,6 +627,7 @@ int ZEXPORT deflateParams(strm, level, strategy) + { + deflate_state *s; + compress_func func; ++ int hook_flush = Z_NO_FLUSH; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +@@ -621,15 +640,18 @@ int ZEXPORT deflateParams(strm, level, strategy) + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } ++ DEFLATE_PARAMS_HOOK(strm, level, strategy, &hook_flush); + func = configuration_table[s->level].func; + +- if ((strategy != s->strategy || func != configuration_table[level].func) && +- s->last_flush != -2) { ++ if (((strategy != s->strategy || func != configuration_table[level].func) && ++ s->last_flush != -2) || hook_flush != Z_NO_FLUSH) { + /* Flush the last buffer: */ +- int err = deflate(strm, Z_BLOCK); ++ int flush = RANK(hook_flush) > RANK(Z_BLOCK) ? hook_flush : Z_BLOCK; ++ int err = deflate(strm, flush); + if (err == Z_STREAM_ERROR) + return err; +- if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) ++ if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead || ++ !DEFLATE_DONE(strm, flush)) + return Z_BUF_ERROR; + } + if (s->level != level) { +@@ -705,11 +727,13 @@ uLong ZEXPORT deflateBound(strm, sourceLen) + ~13% overhead plus a small constant */ + fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) + + (sourceLen >> 9) + 4; ++ DEFLATE_BOUND_ADJUST_COMPLEN(strm, fixedlen, sourceLen); + + /* upper bound for stored blocks with length 127 (memLevel == 1) -- + ~4% overhead plus a small constant */ + storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) + + (sourceLen >> 11) + 7; ++ DEFLATE_BOUND_ADJUST_COMPLEN(strm, storelen, sourceLen); + + /* if can't get parameters, return larger bound plus a zlib wrapper */ + if (deflateStateCheck(strm)) +@@ -751,7 +775,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen) + } + + /* if not default parameters, return one of the conservative bounds */ +- if (s->w_bits != 15 || s->hash_bits != 8 + 7) ++ if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) || ++ s->w_bits != 15 || s->hash_bits != 8 + 7) + return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) + + wraplen; + +@@ -780,7 +805,7 @@ local void putShortMSB(s, b) + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +-local void flush_pending(strm) ++void ZLIB_INTERNAL flush_pending(strm) + z_streamp strm; + { + unsigned len; +@@ -1052,7 +1077,8 @@ int ZEXPORT deflate(strm, flush) + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + +- bstate = s->level == 0 ? deflate_stored(s, flush) : ++ bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : ++ s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); +@@ -1099,7 +1125,6 @@ int ZEXPORT deflate(strm, flush) + } + + if (flush != Z_FINISH) return Z_OK; +- if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ + #ifdef GZIP +@@ -1115,7 +1140,7 @@ int ZEXPORT deflate(strm, flush) + } + else + #endif +- { ++ if (s->wrap == 1) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } +@@ -1124,7 +1149,11 @@ int ZEXPORT deflate(strm, flush) + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ +- return s->pending != 0 ? Z_OK : Z_STREAM_END; ++ if (s->pending == 0) { ++ Assert(s->bi_valid == 0, "bi_buf not flushed"); ++ return Z_STREAM_END; ++ } ++ return Z_OK; + } + + /* ========================================================================= */ +@@ -1141,9 +1170,9 @@ int ZEXPORT deflateEnd(strm) + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); +- TRY_FREE(strm, strm->state->window); ++ TRY_FREE_WINDOW(strm, strm->state->window); + +- ZFREE(strm, strm->state); ++ ZFREE_STATE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +@@ -1173,13 +1202,13 @@ int ZEXPORT deflateCopy(dest, source) + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + +- ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); ++ ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; +- zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); ++ ZCOPY_STATE((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + +- ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ++ ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); +@@ -1226,7 +1255,8 @@ local unsigned read_buf(strm, buf, size) + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); +- if (strm->state->wrap == 1) { ++ if (!DEFLATE_NEED_CHECKSUM(strm)) {} ++ else if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } + #ifdef GZIP +diff --git a/deflate.h b/deflate.h +index 1a06cd5f2..f92750ca6 100644 +--- a/deflate.h ++++ b/deflate.h +@@ -299,6 +299,7 @@ void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); + void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); + void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); ++void ZLIB_INTERNAL _tr_send_bits OF((deflate_state *s, int value, int length)); + + #define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +@@ -343,4 +344,15 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + flush = _tr_tally(s, distance, length) + #endif + ++typedef enum { ++ need_more, /* block not completed, need more input or more output */ ++ block_done, /* block flush performed */ ++ finish_started, /* finish started, need only more output at next deflate */ ++ finish_done /* finish done, accept no more input or output */ ++} block_state; ++ ++unsigned ZLIB_INTERNAL bi_reverse OF((unsigned code, int len)); ++void ZLIB_INTERNAL bi_windup OF((deflate_state *s)); ++void ZLIB_INTERNAL flush_pending OF((z_streamp strm)); ++ + #endif /* DEFLATE_H */ +diff --git a/gzguts.h b/gzguts.h +index 57faf3716..581f2b631 100644 +--- a/gzguts.h ++++ b/gzguts.h +@@ -153,7 +153,11 @@ + + /* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ ++#ifdef DFLTCC ++#define GZBUFSIZE 131072 ++#else + #define GZBUFSIZE 8192 ++#endif + + /* gzip modes, also provide a little integrity check on the passed structure */ + #define GZ_NONE 0 +diff --git a/inflate.c b/inflate.c +index 8acbef44e..19dc7242f 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -85,6 +85,27 @@ + #include "inflate.h" + #include "inffast.h" + ++/* architecture-specific bits */ ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc.h" ++#else ++#define ZALLOC_STATE ZALLOC ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE zmemcpy ++#define ZALLOC_WINDOW ZALLOC ++#define ZCOPY_WINDOW zmemcpy ++#define ZFREE_WINDOW ZFREE ++#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) ++#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) ++#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) ++#define INFLATE_NEED_CHECKSUM(strm) 1 ++#define INFLATE_NEED_UPDATEWINDOW(strm) 1 ++#define INFLATE_MARK_HOOK(strm) do {} while (0) ++#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) ++#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#endif ++ + #ifdef MAKEFIXED + # ifndef BUILDFIXED + # define BUILDFIXED +@@ -138,6 +159,7 @@ z_streamp strm; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; ++ INFLATE_RESET_KEEP_HOOK(strm); + Tracev((stderr, "inflate: reset\n")); + return Z_OK; + } +@@ -185,7 +207,7 @@ int windowBits; + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { +- ZFREE(strm, state->window); ++ ZFREE_WINDOW(strm, state->window); + state->window = Z_NULL; + } + +@@ -224,7 +246,7 @@ int stream_size; + strm->zfree = zcfree; + #endif + state = (struct inflate_state FAR *) +- ZALLOC(strm, 1, sizeof(struct inflate_state)); ++ ZALLOC_STATE(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; +@@ -233,7 +255,7 @@ int stream_size; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { +- ZFREE(strm, state); ++ ZFREE_STATE(strm, state); + strm->state = Z_NULL; + } + return ret; +@@ -255,6 +277,7 @@ int value; + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; ++ INFLATE_PRIME_HOOK(strm, bits, value); + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; +@@ -382,6 +405,27 @@ void makefixed() + } + #endif /* MAKEFIXED */ + ++int ZLIB_INTERNAL inflate_ensure_window(state) ++ struct inflate_state *state; ++{ ++ /* if it hasn't been done already, allocate space for the window */ ++ if (state->window == Z_NULL) { ++ state->window = (unsigned char FAR *) ++ ZALLOC_WINDOW(state->strm, 1U << state->wbits, ++ sizeof(unsigned char)); ++ if (state->window == Z_NULL) return 1; ++ } ++ ++ /* if window not in use yet, initialize */ ++ if (state->wsize == 0) { ++ state->wsize = 1U << state->wbits; ++ state->wnext = 0; ++ state->whave = 0; ++ } ++ ++ return 0; ++} ++ + /* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called +@@ -406,20 +450,7 @@ unsigned copy; + + state = (struct inflate_state FAR *)strm->state; + +- /* if it hasn't been done already, allocate space for the window */ +- if (state->window == Z_NULL) { +- state->window = (unsigned char FAR *) +- ZALLOC(strm, 1U << state->wbits, +- sizeof(unsigned char)); +- if (state->window == Z_NULL) return 1; +- } +- +- /* if window not in use yet, initialize */ +- if (state->wsize == 0) { +- state->wsize = 1U << state->wbits; +- state->wnext = 0; +- state->whave = 0; +- } ++ if (inflate_ensure_window(state)) return 1; + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { +@@ -863,6 +894,7 @@ int flush; + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case TYPEDO: ++ INFLATE_TYPEDO_HOOK(strm, flush); + if (state->last) { + BYTEBITS(); + state->mode = CHECK; +@@ -1224,7 +1256,7 @@ int flush; + out -= left; + strm->total_out += out; + state->total += out; +- if ((state->wrap & 4) && out) ++ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) + strm->adler = state->check = + UPDATE_CHECK(state->check, put - out, out); + out = left; +@@ -1279,8 +1311,9 @@ int flush; + */ + inf_leave: + RESTORE(); +- if (state->wsize || (out != strm->avail_out && state->mode < BAD && +- (state->mode < CHECK || flush != Z_FINISH))) ++ if (INFLATE_NEED_UPDATEWINDOW(strm) && ++ (state->wsize || (out != strm->avail_out && state->mode < BAD && ++ (state->mode < CHECK || flush != Z_FINISH)))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; +@@ -1290,7 +1323,7 @@ int flush; + strm->total_in += in; + strm->total_out += out; + state->total += out; +- if ((state->wrap & 4) && out) ++ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) + strm->adler = state->check = + UPDATE_CHECK(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + +@@ -1308,8 +1341,8 @@ z_streamp strm; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +- if (state->window != Z_NULL) ZFREE(strm, state->window); +- ZFREE(strm, strm->state); ++ if (state->window != Z_NULL) ZFREE_WINDOW(strm, state->window); ++ ZFREE_STATE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +@@ -1326,6 +1359,8 @@ uInt *dictLength; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + ++ INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); ++ + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, +@@ -1361,6 +1396,8 @@ uInt dictLength; + return Z_DATA_ERROR; + } + ++ INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); ++ + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); +@@ -1488,6 +1525,7 @@ z_streamp strm; + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; ++ INFLATE_SYNC_POINT_HOOK(strm); + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; + } +@@ -1508,21 +1546,22 @@ z_streamp source; + + /* allocate space */ + copy = (struct inflate_state FAR *) +- ZALLOC(source, 1, sizeof(struct inflate_state)); ++ ZALLOC_STATE(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) +- ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); ++ ZALLOC_WINDOW(source, 1U << state->wbits, ++ sizeof(unsigned char)); + if (window == Z_NULL) { +- ZFREE(source, copy); ++ ZFREE_STATE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); +- zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); ++ ZCOPY_STATE((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { +@@ -1531,8 +1570,7 @@ z_streamp source; + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { +- wsize = 1U << state->wbits; +- zmemcpy(window, state->window, wsize); ++ ZCOPY_WINDOW(window, state->window, 1U << state->wbits); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; +@@ -1579,6 +1617,7 @@ z_streamp strm; + + if (inflateStateCheck(strm)) + return -(1L << 16); ++ INFLATE_MARK_HOOK(strm); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : +diff --git a/inflate.h b/inflate.h +index f127b6b1f..519ed3535 100644 +--- a/inflate.h ++++ b/inflate.h +@@ -124,3 +124,5 @@ struct inflate_state { + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ + }; ++ ++int ZLIB_INTERNAL inflate_ensure_window OF((struct inflate_state *state)); +diff --git a/test/infcover.c b/test/infcover.c +index 2be01646c..a208219dc 100644 +--- a/test/infcover.c ++++ b/test/infcover.c +@@ -373,7 +373,7 @@ local void cover_support(void) + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; +- ret = inflateInit_(&strm, ZLIB_VERSION - 1, (int)sizeof(z_stream)); ++ ret = inflateInit_(&strm, &ZLIB_VERSION[1], (int)sizeof(z_stream)); + assert(ret == Z_VERSION_ERROR); + mem_done(&strm, "wrong version"); + +@@ -444,7 +444,7 @@ local void cover_wrap(void) + } + + /* input and output functions for inflateBack() */ +-local unsigned pull(void *desc, unsigned char **buf) ++local unsigned pull(void *desc, z_const unsigned char **buf) + { + static unsigned int next = 0; + static unsigned char dat[] = {0x63, 0, 2, 0}; +diff --git a/test/minigzip.c b/test/minigzip.c +index a649d2b3d..964408a40 100644 +--- a/test/minigzip.c ++++ b/test/minigzip.c +@@ -132,7 +132,11 @@ static void pwinerror (s) + #endif + #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + ++#ifdef DFLTCC ++#define BUFLEN 262144 ++#else + #define BUFLEN 16384 ++#endif + #define MAX_NAME_LEN 1024 + + #ifdef MAXSEG_64K +diff --git a/trees.c b/trees.c +index 5f305c472..4924bdfc8 100644 +--- a/trees.c ++++ b/trees.c +@@ -149,8 +149,6 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); + local int detect_data_type OF((deflate_state *s)); +-local unsigned bi_reverse OF((unsigned code, int len)); +-local void bi_windup OF((deflate_state *s)); + local void bi_flush OF((deflate_state *s)); + + #ifdef GEN_TREES_H +@@ -223,6 +221,13 @@ local void send_bits(s, value, length) + } + #endif /* ZLIB_DEBUG */ + ++void ZLIB_INTERNAL _tr_send_bits(s, value, length) ++ deflate_state *s; ++ int value; ++ int length; ++{ ++ send_bits(s, value, length); ++} + + /* the arguments must not have side effects */ + +@@ -1133,7 +1138,7 @@ local int detect_data_type(s) + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +-local unsigned bi_reverse(code, len) ++unsigned ZLIB_INTERNAL bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ + { +@@ -1165,7 +1170,7 @@ local void bi_flush(s) + /* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +-local void bi_windup(s) ++void ZLIB_INTERNAL bi_windup(s) + deflate_state *s; + { + if (s->bi_valid > 8) { +diff --git a/zutil.h b/zutil.h +index 0bc7f4ecd..75eb4df47 100644 +--- a/zutil.h ++++ b/zutil.h +@@ -87,6 +87,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ + + #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + ++#define ZLIB_WRAPLEN 6 /* zlib format overhead */ ++ + /* target dependencies */ + + #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) diff --git a/zlib-1.2.13-fix-bug-deflateBound.patch b/zlib-1.2.13-fix-bug-deflateBound.patch new file mode 100644 index 0000000..c04546f --- /dev/null +++ b/zlib-1.2.13-fix-bug-deflateBound.patch @@ -0,0 +1,27 @@ +From e554695638228b846d49657f31eeff0ca4680e8a Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Thu, 15 Dec 2022 09:07:13 -0800 +Subject: [PATCH] Fix bug in deflateBound() for level 0 and memLevel 9. + +memLevel 9 would cause deflateBound() to assume the use of fixed +blocks, even if the compression level was 0, which forces stored +blocks. That could result in a bound less than the size of the +compressed data. Now level 0 always uses the stored blocks bound. +--- + deflate.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/deflate.c b/deflate.c +index cd538b8ac..4a512e1f9 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -752,7 +752,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen) + + /* if not default parameters, return one of the conservative bounds */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) +- return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen; ++ return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) + ++ wraplen; + + /* default settings: return tight bound for that case -- ~0.03% overhead + plus a small constant */ diff --git a/zlib-1.2.13-optimized-s390.patch b/zlib-1.2.13-optimized-s390.patch new file mode 100644 index 0000000..9fe256d --- /dev/null +++ b/zlib-1.2.13-optimized-s390.patch @@ -0,0 +1,41 @@ +Index: deflate.c +=================================================================== +--- deflate.c.orig ++++ deflate.c +@@ -1233,15 +1233,16 @@ local void lm_init (s) + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +-local uInt longest_match(s, cur_match) ++local uInt longest_match(s, pcur_match) + deflate_state *s; +- IPos cur_match; /* current match */ ++ IPos pcur_match; /* current match */ + { ++ ptrdiff_t cur_match = pcur_match; /* extend to pointer width */ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ +- int best_len = (int)s->prev_length; /* best match length so far */ ++ ptrdiff_t best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; +@@ -1256,12 +1257,12 @@ local uInt longest_match(s, cur_match) + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; +- register ush scan_start = *(ushf*)scan; +- register ush scan_end = *(ushf*)(scan + best_len - 1); ++ register uInt scan_start = *(ushf*)scan; ++ register uInt scan_end = *(ushf*)(scan + best_len - 1); + #else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; +- register Byte scan_end1 = scan[best_len - 1]; +- register Byte scan_end = scan[best_len]; ++ register uInt scan_end1 = scan[best_len - 1]; ++ register uInt scan_end = scan[best_len]; + #endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. diff --git a/zlib-1.2.13.tar.gz b/zlib-1.2.13.tar.gz new file mode 100644 index 0000000..e0a654a --- /dev/null +++ b/zlib-1.2.13.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30 +size 1497445 diff --git a/zlib-1.2.13.tar.gz.asc b/zlib-1.2.13.tar.gz.asc new file mode 100644 index 0000000..b6fe0df --- /dev/null +++ b/zlib-1.2.13.tar.gz.asc @@ -0,0 +1,7 @@ +-----BEGIN PGP SIGNATURE----- +Comment: GPGTools - http://gpgtools.org + +iF0EABECAB0WIQRe1GpnIdNlWHeR4qp4P82OWLyvugUCY0h42QAKCRB4P82OWLyv +upvZAKCF7EgWGaMEfO78WnkA8hivLlBMlACgyI7Vm2A5BI2jI+h23yqrKjgQC5s= +=umRA +-----END PGP SIGNATURE----- diff --git a/zlib-1.2.5-minizip-fixuncrypt.patch b/zlib-1.2.5-minizip-fixuncrypt.patch new file mode 100644 index 0000000..2c6fa8e --- /dev/null +++ b/zlib-1.2.5-minizip-fixuncrypt.patch @@ -0,0 +1,24 @@ +From 9352e84c149d02ec0df78e19dad55b7f83185622 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Wed, 27 Apr 2022 14:36:43 +0200 +Subject: [PATCH] zlib-1.2.5-minizip-fixuncrypt.patch + +--- + contrib/minizip/unzip.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/contrib/minizip/unzip.c b/contrib/minizip/unzip.c +index 5e12e4747..aa1a92d17 100644 +--- a/contrib/minizip/unzip.c ++++ b/contrib/minizip/unzip.c +@@ -68,10 +68,6 @@ + #include + #include + +-#ifndef NOUNCRYPT +- #define NOUNCRYPT +-#endif +- + #include "zlib.h" + #include "unzip.h" + diff --git a/zlib-format.patch b/zlib-format.patch new file mode 100644 index 0000000..18859fe --- /dev/null +++ b/zlib-format.patch @@ -0,0 +1,20 @@ +From: meissner@suse.de +Subject: supply format arguments to gzprintf(). + +Index: zlib.h +=================================================================== +--- zlib.h.orig ++++ zlib.h +@@ -1465,7 +1465,11 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((vo + is returned, and the error state is set to Z_STREAM_ERROR. + */ + +-ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); ++ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)) ++#ifdef __GNUC__ ++ __attribute__((__format__(__printf__,2,3))) ++#endif ++; + /* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of diff --git a/zlib-no-version-check.patch b/zlib-no-version-check.patch new file mode 100644 index 0000000..95337d0 --- /dev/null +++ b/zlib-no-version-check.patch @@ -0,0 +1,45 @@ +Index: zlib-1.2.12/infback.c +=================================================================== +--- zlib-1.2.12.orig/infback.c ++++ zlib-1.2.12/infback.c +@@ -34,9 +34,6 @@ int stream_size; + { + struct inflate_state FAR *state; + +- if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || +- stream_size != (int)(sizeof(z_stream))) +- return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; +Index: zlib-1.2.12/inflate.c +=================================================================== +--- zlib-1.2.12.orig/inflate.c ++++ zlib-1.2.12/inflate.c +@@ -202,9 +202,6 @@ int stream_size; + int ret; + struct inflate_state FAR *state; + +- if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || +- stream_size != (int)(sizeof(z_stream))) +- return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +Index: zlib-1.2.12/deflate.c +=================================================================== +--- zlib-1.2.12.orig/deflate.c ++++ zlib-1.2.12/deflate.c +@@ -253,12 +253,7 @@ int ZEXPORT deflateInit2_(strm, level, m + { + deflate_state *s; + int wrap = 1; +- static const char my_version[] = ZLIB_VERSION; + +- if (version == Z_NULL || version[0] != my_version[0] || +- stream_size != sizeof(z_stream)) { +- return Z_VERSION_ERROR; +- } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; diff --git a/zlib-rpmlintrc b/zlib-rpmlintrc new file mode 100644 index 0000000..0905c70 --- /dev/null +++ b/zlib-rpmlintrc @@ -0,0 +1,2 @@ +# used only if build --with profiling +addFilter("zlib.src.*: W: make-check-outside-check-section time make check") diff --git a/zlib.changes b/zlib.changes new file mode 100644 index 0000000..8307a83 --- /dev/null +++ b/zlib.changes @@ -0,0 +1,835 @@ +------------------------------------------------------------------- +Fri May 5 09:56:31 UTC 2023 - Danilo Spinella + +- Fix deflateBound() before deflateInit(), bsc#1210593, bsc#1211005 + bsc1210593.patch + +------------------------------------------------------------------- +Wed Mar 1 17:50:05 UTC 2023 - Dirk Müller + +- avoid buildcycle with krb5 (in sle15+ - jsc#PED-3641) +- skip hwcaps subpackage building for -static subpackage + +------------------------------------------------------------------- +Wed Feb 1 09:26:04 UTC 2023 - Dirk Müller + +- build with glibc hwcaps optimized libs + +------------------------------------------------------------------- +Thu Jan 19 11:07:25 UTC 2023 - Danilo Spinella + +- Update to 1.2.13: + * Fix configure issue that discarded provided CC definition + * Correct incorrect inputs provided to the CRC functions + * Repair prototypes and exporting of new CRC functions + * Fix inflateBack to detect invalid input with distances too far + * Have infback() deliver all of the available output up to any error + * Fix a bug when getting a gzip header extra field with inflate() + * Fix bug in block type selection when Z_FIXED used + * Tighten deflateBound bounds + * Remove deleted assembler code references + * Various portability and appearance improvements +- Added patches: + * zlib-1.2.13-IBM-Z-hw-accelerated-deflate-s390x.patch + * zlib-1.2.13-fix-bug-deflateBound.patch + * zlib-1.2.13-optimized-s390.patch +- Refreshed patches: + * zlib-1.2.12-add-optimized-slide_hash-for-power.patch + * zlib-1.2.12-add-vectorized-longest_match-for-power.patch + * zlib-1.2.12-s390-vectorize-crc32.patch +- Removed patches: + * zlib-1.2.12-fix-configure.patch + * zlib-1.2.12-IBM-Z-hw-accelerated-deflate-s390x.patch + * zlib-1.2.12-optimized-crc32-power8.patch + * zlib-1.2.12-correct-inputs-provided-to-crc-func.patch + * zlib-1.2.12-fix-CVE-2022-37434.patch + * zlib-1.2.11-optimized-s390.patch + +------------------------------------------------------------------- +Sun Dec 11 16:41:51 UTC 2022 - Dirk Müller + +- build zlib with optflags again + +------------------------------------------------------------------- +Mon Oct 10 10:08:02 UTC 2022 - Danilo Spinella + +- Add Power8 optimizations: + * zlib-1.2.12-add-optimized-slide_hash-for-power.patch + * zlib-1.2.12-add-vectorized-longest_match-for-power.patch + * zlib-1.2.12-adler32-vector-optimizations-for-power.patch + * zlib-1.2.12-fix-invalid-memory-access-on-ppc-and-ppc64.patch +- Update zlib-1.2.12-IBM-Z-hw-accelerated-deflate-s390x.patch + +------------------------------------------------------------------- +Tue Aug 23 16:22:59 UTC 2022 - Danilo Spinella + +- Update to 1.2.12: + * A lot of bug fixes + * Improve speed of crc32 functions + * Use ARM crc32 instructions if the ARM architecture has them + For the complete changes, see ChangeLog +- Fixes CVE-2022-37434, heap-based buffer over-read or buffer overflow in + inflate.c via a large gzip header extra field + (CVE-2022-37434, bsc#1202175) +- Added patches: + * zlib-1.2.11-covscan-issues-rhel9.patch + * zlib-1.2.11-covscan-issues.patch + * zlib-1.2.12-s390-vectorize-crc32.patch + * zlib-1.2.12-optimized-crc32-power8.patch + * zlib-1.2.12-IBM-Z-hw-accelerated-deflate-s390x.patch + * zlib-1.2.12-fix-configure.patch + * zlib-1.2.12-correct-inputs-provided-to-crc-func.patch + * zlib-1.2.12-fix-CVE-2022-37434.patch + * zlib-1.2.5-minizip-fixuncrypt.patch +- Removed patches: + * bsc1197459.patch (upstreamed) + * zlib-power8-fate325307.patch + (replaced by zlib-1.2.12-optimized-crc32-power8.patch) + * bsc1174736-DFLTCC_LEVEL_MASK-set-to-0x1ff.patch + (replaced by zlib-1.2.12-IBM-Z-hw-accelrated-deflate-s390x.patch) + * 410.patch + (replaced by zlib-1.2.12-IBM-Z-hw-accelrated-deflate-s390x.patch) +- Refreshed patches: + * zlib-format.patch + * zlib-no-version-check.patch +- Disable profiling since it breaks tests +- Update zlib-rpmlintrc + +------------------------------------------------------------------- +Sat Jul 2 10:00:46 UTC 2022 - Marcus Meissner + +- switch to https urls + +------------------------------------------------------------------- +Fri Mar 25 14:59:29 UTC 2022 - Danilo Spinella + +- Fix memory corruption on deflate, bsc#1197459 + * bsc1197459.patch - CVE-2018-25032 +- Update 410.patch + * Remove included patches: + bsc1174551-fxi-imcomplete-raw-streams.patch + zlib-compression-switching.patch + zlib-s390x-z15-fix-hw-compression.patch +- Refresh bsc1174736-DFLTCC_LEVEL_MASK-set-to-0x1ff.patch + +------------------------------------------------------------------- +Fri Mar 11 17:38:09 UTC 2022 - Arjen de Korte + +- Don't install (internal) crypt.h header in minizip + * minizip-dont-install-crypt-header.patch + +------------------------------------------------------------------- +Mon Nov 9 08:00:35 UTC 2020 - Ali Abdallah + +- Fix hw compression on z15 bsc#1176201 +- Add zlib-s390x-z15-fix-hw-compression.patch + +------------------------------------------------------------------- +Wed Oct 28 14:38:00 UTC 2020 - Ludwig Nussel + +- install to /usr (boo#1029961) + +------------------------------------------------------------------- +Wed Sep 16 15:05:04 UTC 2020 - Stephan Kulow + +- Provide a testsuite subpackage to run post-build validation + +------------------------------------------------------------------- +Fri Aug 28 07:58:23 UTC 2020 - Tomáš Chvátal + +- Add patch to fix compression level switching + bsc#1175811 bsc#1175830 bsc#1175831 + * zlib-compression-switching.patch + +------------------------------------------------------------------- +Thu Aug 27 06:55:29 UTC 2020 - Tomáš Chvátal + +- Set -DDFLTCC_LEVEL_MASK=0x7e on s390/s390x jsc#13776 + +------------------------------------------------------------------- +Thu Aug 6 08:36:48 UTC 2020 - Lidong Zhong + +- Permit a deflateParams() parameter change as soon as possible(bsc#1174736) + * bsc1174736-DFLTCC_LEVEL_MASK-set-to-0x1ff.patch + Fix DFLTCC not flushing EOBS when creating raw streams(bsc#1174551) + * bsc1174551-fxi-imcomplete-raw-streams.patch + +------------------------------------------------------------------- +Thu Apr 23 08:17:19 UTC 2020 - Tomáš Chvátal + +- Update 410.patch to contain latest fixes from IBM bsc#1166260 + * The build behaviour changed + +------------------------------------------------------------------- +Tue Oct 29 10:47:18 UTC 2019 - Tomáš Chvátal + +- Update the zlib-no-version-check.patch to be even more forgiving + with the versions on the zlib to allow updates without rebuilds + +------------------------------------------------------------------- +Mon Oct 21 07:11:58 UTC 2019 - Tomáš Chvátal + +- Add SUSE specific patch to fix bsc#1138793, we simply don't want + to test if the app was linked with exactly same version of zlib + like the one that is present on the runtime: + * zlib-no-version-check.patch + +------------------------------------------------------------------- +Wed Jul 17 07:26:35 UTC 2019 - Tomáš Chvátal + +- Update the s390 patchset bsc#1137624: + * 410.patch + +------------------------------------------------------------------- +Thu Jul 11 16:09:34 UTC 2019 - Bruce Rogers + +- Tweak zlib-power8-fate325307.patch to have type of crc32_vpmsum + conform to usage + bsc#1141059 + +------------------------------------------------------------------- +Tue Jul 2 07:22:36 UTC 2019 - Martin Liška + +- Use FAT LTO objects in order to provide proper static library. + +------------------------------------------------------------------- +Fri Jun 7 11:24:35 UTC 2019 - Tomáš Chvátal + +- Do not enable the previous patchset on s390 but just s390x + bsc#1137624 + +------------------------------------------------------------------- +Thu Jun 6 09:43:12 UTC 2019 - Tomáš Chvátal + +- Add patchset for s390 improvements jsc#SLE-5807 bsc#1136717: + * 410.patch + +------------------------------------------------------------------- +Tue Mar 19 14:11:48 UTC 2019 - Tomáš Chvátal + +- Try to safely abort if we get NULL ptr bsc#1110304 bsc#1129576: + * zlib-power8-fate325307.patch + +------------------------------------------------------------------- +Wed Jun 20 14:51:07 UTC 2018 - tchvatal@suse.com + +- Add patch for fate#325307 zlib speedup on power8: + * zlib-power8-fate325307.patch + +------------------------------------------------------------------- +Tue May 15 10:56:20 UTC 2018 - tchvatal@suse.com + +- Add patch to safeguard against negative values in uInt bsc#1071321: + * 0001-Do-not-try-to-store-negative-values-in-unsigned-int.patch + +------------------------------------------------------------------- +Mon Jun 12 17:35:18 UTC 2017 - kah0922@gmail.com + +- Added 32bit minizip support + +------------------------------------------------------------------- +Thu Jun 1 13:34:38 UTC 2017 - mpluskal@suse.com + +- Add gpg signature +- Re-enable profiling + +------------------------------------------------------------------- +Wed May 10 13:43:08 UTC 2017 - mpluskal@suse.com + +- Add s390 performance patch (fate#314093): + * zlib-1.2.11-optimized-s390.patch + +------------------------------------------------------------------- +Tue Apr 4 08:31:28 UTC 2017 - schwab@suse.de + +- baselibs.conf: add missing dependencies + +------------------------------------------------------------------- +Mon Jan 16 09:57:59 UTC 2017 - mpluskal@suse.com + +- Update to version 1.2.11: + * Fix deflate stored bug when pulling last block from window + * Permit immediate deflateParams changes before any deflate input + +------------------------------------------------------------------- +Tue Jan 3 07:41:49 UTC 2017 - mpluskal@suse.com + +- Update to version 1.2.10: + * Avoid warnings on snprintf() return value + * Fix bug in deflate_stored() for zero-length input + * Fix bug in gzwrite.c that produced corrupt gzip files + * Remove files to be installed before copying them in Makefile.in + * Add warnings when compiling with assembler code + +------------------------------------------------------------------- +Mon Jan 2 09:08:50 UTC 2017 - mpluskal@suse.com + +- Update to version 1.2.9: + * Improve compress() and uncompress() to support large lengths + * Allow building zlib outside of the source directory + * Fix bug when level 0 used with Z_HUFFMAN or Z_RLE + * Fix bugs in creating a very large gzip header + * Add uncompress2() function, which returns the input size used + * Dramatically speed up deflation for level 0 (storing) + * Add gzfread() and gzfwrite(), duplicating the interfaces of fread() and fwrite() + * Add crc32_z() and adler32_z() functions with size_t lengths + * Many portability improvements +- Drop patches included in upstream: + * zlib-bnc1003577.patch + * zlib-bnc1003579-part2.patch + * zlib-bnc1003579.patch + * zlib-bnc1003580.patch + * zlib-bnc1013882.patch +- Drop zlib-1.2.7-improve-longest_match-performance.patch + * not accepted by upstream for two releases + * rebasing no longer possible + +------------------------------------------------------------------- +Sun Dec 4 12:47:51 UTC 2016 - tchvatal@suse.com + +- Include fixes for bnc#1003580 bnc#1003579 bnc#1003577 bnc#1013882: + * zlib-bnc1003577.patch + * zlib-bnc1003579-part2.patch + * zlib-bnc1003579.patch + * zlib-bnc1003580.patch refreshed + * zlib-bnc1013882.patch CVE-2016-9843 + +------------------------------------------------------------------- +Thu Sep 24 20:21:46 UTC 2015 - jengelh@inai.de + +- Trim descriptions to fit target audience. Update RPM group + classification. + +------------------------------------------------------------------- +Mon Jul 13 11:58:44 UTC 2015 - tchvatal@suse.com + +- Require zlib-devel in zlib-devel-static to fix previous change + +------------------------------------------------------------------- +Tue Jun 30 11:30:37 UTC 2015 - tchvatal@suse.com + +- Bring back zlib-devel-static. Needed by binutils + +------------------------------------------------------------------- +Wed Jun 24 09:01:59 UTC 2015 - tchvatal@suse.com + +- Remove zlib-devel-static, nothing should use libz.a anyway. +- Package minizip library, everything using it should now pull + minizip-devel and unbundle it bnc#935864 + +------------------------------------------------------------------- +Thu Sep 25 08:04:55 UTC 2014 - tchvatal@suse.com + +- Install examples to cover another angle from bnc#890228 +- Cleanup with spec-cleaner + +------------------------------------------------------------------- +Mon Jul 29 12:37:19 UTC 2013 - mvyskocil@suse.com + +- zlib-format.patch, backport missing sle11 feature back to openSUSE + bnc#831880 + +------------------------------------------------------------------- +Sat May 11 11:51:40 UTC 2013 - idonmez@suse.com + +- Update to version 1.2.8 + * Add new inflateGetDictionary() function + * Fix bug where gzopen() immediately followed by gzclose() + would write an empty file instead of an empty gzip stream. + * Fix bug in gzclose() when gzwrite() runs out of memory + +------------------------------------------------------------------- +Tue Mar 5 10:00:58 UTC 2013 - mvyskocil@suse.com + +- libz1-32bit obsoletes zlib-32bit <= 1.2.7 (fixes bnc#806310) + +------------------------------------------------------------------- +Sat Dec 15 18:56:05 UTC 2012 - schwab@linux-m68k.org + +- Add zlib-devel-static to baselibs.conf, needed for binutils testsuite +- Migrate zlib- to libz1- + +------------------------------------------------------------------- +Thu Nov 22 01:17:59 UTC 2012 - jengelh@inai.de + +- Replace %make_install by normal make install; + the former is a constant source of agony in older or non-SUSE. + +------------------------------------------------------------------- +Mon Oct 22 05:10:48 UTC 2012 - coolo@suse.com + +- buildignore checks for now to get bootstrapping working + +------------------------------------------------------------------- +Mon Oct 15 07:39:29 UTC 2012 - mvyskocil@suse.com + +- add longest_match performance patch (fate#314093) + * suggested by IBM, sent upstream +- rename the main library package to libz1 according Shared + Library Policy +- profiling build can be enabled via build --with profiling +- use the human-readable package description from zlib.net +- add rpmlintrc + +------------------------------------------------------------------- +Mon May 7 12:34:45 UTC 2012 - joop.boonen@opensuse.org + +- Update to 1.2.7 + * Fix bug in gzclose_w() when gzwrite() fails to allocate memory + * Add "x" (O_EXCL) and "e" (O_CLOEXEC) modes support to gzopen() + * Add gzopen_w() in Windows for wide character path names + * Fix type mismatch between get_crc_table() and crc_table +- Passed the spec file through spec cleaner + +------------------------------------------------------------------- +Thu Feb 9 16:17:55 UTC 2012 - jengelh@medozas.de + +- Remove redundant tags/sections + +------------------------------------------------------------------- +Thu Dec 22 14:31:42 CET 2011 - meissner@suse.de + +- use configure options for library paths. Also fixes + the pkg-config file to not have /usr/local bnc#738169 + +------------------------------------------------------------------- +Wed Dec 7 15:29:27 UTC 2011 - coolo@suse.com + +- zlib has it's own spdx license, so use it :) + +------------------------------------------------------------------- +Sat Nov 19 20:00:11 UTC 2011 - crrodriguez@opensuse.org + +- Do not include the codename in soversion. + +------------------------------------------------------------------- +Sat Nov 19 18:42:12 UTC 2011 - crrodriguez@opensuse.org + +- This is zlib 1.2.5.2 codename "motley" +- Fix bug and add consts in contrib/puff [Oberhumer] +- Fix static-only-build install in Makefile.in +- Add libz.a dependency to shared in Makefile.in for parallel builds +- Spell out "number" (instead of "nb") in zlib.h for total_in, total_out +- Fix bug in zlib.h for _FILE_OFFSET_BITS set and _LARGEFILE64_SOURCE not +- Add comment in zlib.h that adler32_combine with len2 < 0 makes no sense +- Make NO_DIVIDE option in adler32.c much faster (thanks to John Reiser) +- Fix zlib.h LFS support when Z_PREFIX used +- Avoid deflate sensitivity to volatile input data +- Avoid division in adler32_combine for NO_DIVIDE +- Clarify the use of Z_FINISH with deflateBound() amount of space +- Use u4 type for crc_table to avoid conversion warnings +- Apply casts in zlib.h to avoid conversion warnings +- Add OF to prototypes for adler32_combine_ and crc32_combine_ [Miller] +- Improve inflateSync() documentation to note indeterminancy +- Add deflatePending() function to return the amount of pending output +- Check that pointers fit in ints when gzprint() compiled old style +- Add dummy name before $(SHAREDLIBV) in Makefile [Bar-Lev, Bowler] + +------------------------------------------------------------------- +Sun Nov 13 05:52:57 UTC 2011 - crrodriguez@opensuse.org + +-open file descriptors with O_CLOEXEC but only in the· + gzopen() case, not in gzdopen() as that would change + the calling application' semantics. + It is responsability of the caller to ensure O_CLOEXEC is used + in such scenario. + +------------------------------------------------------------------- +Tue Oct 4 09:10:46 UTC 2011 - uli@suse.com + +- cross-build fix: use %__cc macro (not %configure, this is not + autoconf) + +------------------------------------------------------------------- +Fri Aug 26 18:36:11 UTC 2011 - crrodriguez@opensuse.org + +- Use __attribute__ target in SSE optimized functions + so the compiler defines __MMX__ __SSE__ etc, this probably + only matters in 32 bit. what version to use is still + determined at runtime by cpuid. + +------------------------------------------------------------------- +Mon Jun 27 11:05:47 UTC 2011 - dimstar@opensuse.org + +- bnc#652333: Change LICENSE text to not have version, which + changes too often and invalidates the text everytime. + +------------------------------------------------------------------- +Mon May 30 19:38:43 UTC 2011 - crrodriguez@opensuse.org + +- Fix two bugs that will break ia64 systems only. + +------------------------------------------------------------------- +Fri May 27 09:13:31 UTC 2011 - mvyskocil@suse.cz + +- fix bnc#679345: zlib segfaults when passing NULL to gzopen + * return NULL checks back to gz_open + +------------------------------------------------------------------- +Thu May 12 20:02:26 UTC 2011 - crrodriguez@opensuse.org + +- Update SSE patches, fixes bugs in PPC implementation +- X86 improvements. + +------------------------------------------------------------------- +Sat May 7 18:25:48 UTC 2011 - crrodriguez@opensuse.org + +- Update SSE2/MMX patches to their current version. + per request of the author. + * This are integrated now,including support for a number + of additional archs and fixes ARM patches bugs. + +------------------------------------------------------------------- +Mon Apr 18 18:02:50 UTC 2011 - crrodriguez@opensuse.org + +- Update SSE2/MMX patches tp version 3 + now with comments,performance numbers,and ia64 support + +------------------------------------------------------------------- +Wed Mar 30 19:47:30 UTC 2011 - crrodriguez@opensuse.org + +- Update SSE2/MMX patches to version 2. + +------------------------------------------------------------------- +Tue Mar 15 22:38:32 UTC 2011 - crrodriguez@opensuse.org + +- Add highly experimental patches to use SSE2/SSSE3/MMX in zlib + this makes the library up to 6 times faster. + +------------------------------------------------------------------- +Sun Jan 9 14:33:08 CET 2011 - meissner@suse.de + +- do not use compiler profile information, as the testsuite fails + when using it. + +------------------------------------------------------------------- +Tue Dec 21 16:12:56 CET 2010 - meissner@suse.de + +- Add dependency to make it build in a parallel world. + +------------------------------------------------------------------- +Tue Dec 7 17:18:43 UTC 2010 - cristian.rodriguez@opensuse.org + +- use compiler profile information to make libz slightly faster. + +------------------------------------------------------------------- +Tue Sep 21 21:48:23 UTC 2010 - dimstar@opensuse.org + +- Simplified version of zlib-lfs.patch. Should also resolve issues + arising with various combination of LFS derinfes. + +------------------------------------------------------------------- +Tue Sep 14 20:10:13 UTC 2010 - dimstar@opensuse.org + +- Add zlib-lfs.patch: Fix Large File Support. Patch comes from the + zlib-devel mailinglist: + http://mail.madler.net/pipermail/zlib-devel_madler.net/2010-May/002303.html + +------------------------------------------------------------------- +Tue Aug 10 16:04:54 CEST 2010 - dimstar@opensuse.org + +- Update to version 1.2.5: + + fixes bugs in gzseek() and gzeof() +- Changes from version 1.2.4: + + Fixed bugs in adler32_combine(), compressBound(), and + deflateBound() + + Wholesale replacement of gz* functions with faster versions + + As part of that, added gzbuffer(), gzoffset(), gzclose_r(), and + gzclose_w() functions + + Faster Z_HUFFMAN_ONLY and Z_RLE compression for images and + other specialized compression + + Added flush options Z_BLOCK to deflate() and Z_TREES to + inflate() for finer control + + Added inflateReset2() and inflateMark() functions, the latter + to aid in random access applications + + Added LFS (Large File Summit) support for 64-bit file offsets + and many other portability improvements + + Updated examples in examples/ and updated third-party + contributions in contrib/ +- Drop obsolete patches: + + zlib-1.2.1-make-test.patch + + zlib-1.2.1-vsnprintf.patch + + zlib-1.2.1-CFLAGS.dif + + zlib-1.2.3-686.patch + + zlib-1.2.3-visibility-support.patch +- Rebased zlib-1.2.2-format.patch +- Clean spec file using spec-cleaner. +- BuildRequire to have proper pkgconfig() provides. + +------------------------------------------------------------------- +Mon Jun 28 06:38:35 UTC 2010 - jengelh@medozas.de + +- use %_smp_mflags + +------------------------------------------------------------------- +Sat Dec 12 18:41:52 CET 2009 - jengelh@medozas.de + +- add baselibs.conf as a source + +------------------------------------------------------------------- +Tue Nov 24 16:23:32 UTC 2009 - crrodriguez@opensuse.org + +- refresh patches with fuzz=0 + +------------------------------------------------------------------- +Fri Apr 24 10:33:06 CEST 2009 - mseben@suse.cz + +- added LICENSE file, for GPL licensed contrib files, however + they are not used to build our zlib library (bnc#490107) +- added zlib-1.2.3-686.patch - update license text (bnc#490107) + +------------------------------------------------------------------- +Sat Mar 21 16:23:42 CET 2009 - crrodriguez@suse.de + +- there is one valid use case of static zlib in "qemu" package + split a -devel-static subpackage, please do not BuildRequire + this package unless you are 100% sure you need it, if in doubt + mail either the security team or me. + +------------------------------------------------------------------- +Fri Feb 27 19:58:04 CET 2009 - crrodriguez@suse.de + +- add patch from gentoo that makes zlib to only export + its public API using GCC visibility features, this will + of course break wrong code that uses private symbols +- exclude static zlib, at least temporarily, in order to clearly know + what is using it. + +------------------------------------------------------------------- +Wed Jan 7 12:34:56 CET 2009 - olh@suse.de + +- obsolete old -XXbit packages (bnc#437293) + +------------------------------------------------------------------- +Thu Apr 10 12:54:45 CEST 2008 - ro@suse.de + +- added baselibs.conf file to build xxbit packages + for multilib support + +------------------------------------------------------------------- +Sat Jan 12 03:41:39 CET 2008 - crrodriguez@suse.de + +- fix library-without-ldconfig-* errors +- do not delete buildroot on install section +- run make test in the check section. + +------------------------------------------------------------------- +Thu May 3 11:41:41 CEST 2007 - rguenther@suse.de + +- move documentation files to zlib-devel package + +------------------------------------------------------------------- +Wed Jan 31 13:02:30 CET 2007 - ro@suse.de + +- remove libgz completely (obsolete long ago) + +------------------------------------------------------------------- +Wed Jan 25 21:31:09 CET 2006 - mls@suse.de + +- converted neededforbuild to BuildRequires + +------------------------------------------------------------------- +Sat Dec 17 09:02:34 CET 2005 - kukuk@suse.de + +- Remove unpackaged libz.so symlink + +------------------------------------------------------------------- +Wed Jul 20 10:52:46 CEST 2005 - meissner@suse.de + +- Upgraded to 1.2.3. Security fix is now in mainline. + +------------------------------------------------------------------- +Mon Jul 4 10:03:17 CEST 2005 - meissner@suse.de + +- fixed memory corruption problem #94926, CAN-2005-2096. + +------------------------------------------------------------------- +Fri Apr 1 09:34:23 CEST 2005 - kukuk@suse.de + +- Remove movage of setting CFLAGS + +------------------------------------------------------------------- +Tue Mar 29 10:44:32 CEST 2005 - kukuk@suse.de + +- Update to 1.2.2.2 +- Fix compiling with gcc 3.3 and 4.0 + +------------------------------------------------------------------- +Mon Feb 28 10:03:18 CET 2005 - meissner@suse.de + +- Use __printf__ in format attribute to avoid printf defines. + +------------------------------------------------------------------- +Wed Feb 23 13:11:56 CET 2005 - meissner@suse.de + +- supply format arguments to gzprintf(). + +------------------------------------------------------------------- +Tue Feb 8 10:13:30 CET 2005 - ro@suse.de + +- update to 1.2.2 + +------------------------------------------------------------------- +Wed Aug 25 14:39:49 CEST 2004 - ro@suse.de + +- updated crash patch to version from Dmitry (#44087) + +------------------------------------------------------------------- +Mon Aug 23 15:54:02 CEST 2004 - ro@suse.de + +- added fix for crash on invalid input (#44087) + +------------------------------------------------------------------- +Thu Dec 4 01:59:05 CET 2003 - ro@suse.de + +- update to 1.2.1 + +------------------------------------------------------------------- +Fri Nov 21 15:08:36 CET 2003 - kukuk@suse.de + +- Use RPM_OPT_FLAGS +- Compile with no execstack + +------------------------------------------------------------------- +Mon Oct 20 18:27:53 CEST 2003 - ro@suse.de + +- don't build as root + +------------------------------------------------------------------- +Mon May 12 18:55:18 CEST 2003 - kukuk@suse.de + +- Add defattr + +------------------------------------------------------------------- +Mon Mar 3 16:52:51 CET 2003 - ro@suse.de + +- do use vsnprintf + +------------------------------------------------------------------- +Sun Feb 16 10:37:37 CET 2003 - olh@suse.de + +- no absolute symlinks for libz.so + +------------------------------------------------------------------- +Thu Jul 25 08:40:28 CEST 2002 - kukuk@suse.de + +- Rename to zlib +- Splitt off zlib-devel + +------------------------------------------------------------------- +Fri Jul 5 12:50:05 MEST 2002 - bk@suse.de + +- enable make test + +------------------------------------------------------------------- +Mon May 27 09:13:31 CEST 2002 - ro@suse.de + +- update to 1.1.4 including the previous security patch +- Returned incorrect error (Z_MEM_ERROR) on some invalid data +- Avoid accesses before window for invalid distances with inflate window + less than 32K. +- force windowBits > 8 to avoid a bug in the encoder for a window size + of 256 bytes. (A complete fix will be available in 1.1.5). + +------------------------------------------------------------------- +Mon Feb 25 14:37:54 CET 2002 - ro@suse.de + +- remove executable bits from manpage and include file + +------------------------------------------------------------------- +Thu Feb 7 11:55:43 MET 2002 - draht@suse.de + +- prevent double free() (security problem) with + zlib-1.1.3-zfree.dif + +------------------------------------------------------------------- +Tue Dec 11 15:40:35 CET 2001 - froh@suse.de + +- fixed specfile to use %_libdir for make install for + the sake of lib64 on S/390. also updated old sparc64 %ifarch at + this point. + +------------------------------------------------------------------- +Tue Dec 4 17:54:00 CET 2001 - ro@suse.de + +- added Provides zlib-devel for compatibility + +------------------------------------------------------------------- +Tue May 1 18:12:26 CEST 2001 - kukuk@suse.de + +- Minor spec file fixes for sparc64 + +------------------------------------------------------------------- +Thu Dec 7 13:55:36 CET 2000 - kukuk@suse.de + +- Install only shared library in /%{_lib} + +------------------------------------------------------------------- +Wed Dec 6 16:20:09 CET 2000 - ro@suse.de + +- install main lib in /%{_lib} not /usr/%{_lib} + +------------------------------------------------------------------- +Mon Oct 2 20:25:01 CEST 2000 - kukuk@suse.de + +- Use %{_lib} macro + +------------------------------------------------------------------- +Mon Sep 25 15:27:33 CEST 2000 - ro@suse.de + +- sorted + +------------------------------------------------------------------- +Wed Apr 5 01:01:37 CEST 2000 - bk@suse.de + +- fixed BuildRoot support + +------------------------------------------------------------------- +Mon Apr 3 15:12:01 MEST 2000 - bk@suse.de + +- added buildroot for build with dynamically linked rpm which uses libz(s390) + +------------------------------------------------------------------- +Thu Jan 20 15:38:46 CET 2000 - aj@suse.de + +- /usr/man -> /usr/share/man + +------------------------------------------------------------------- +Mon Sep 13 17:23:57 CEST 1999 - bs@suse.de + +- ran old prepare_spec on spec file to switch to new prepare_spec. + +------------------------------------------------------------------- +Tue Jul 13 13:13:43 MEST 1999 - ro@suse.de + +- added zutil.h + +------------------------------------------------------------------- +Thu Mar 4 10:03:38 MET 1999 - ro@suse.de + +- moved from /usr/X11R6 to usr +- libgz is now only a link to libz +- "libgz.so.1" is provided from specfile + +------------------------------------------------------------------- +Tue Aug 25 17:11:04 MEST 1998 - ro@suse.de + +- update to 1.1.3 + +------------------------------------------------------------------- +Fri Mar 20 13:58:12 MET 1998 - ro@suse.de + +- update to 1.1.2 + +------------------------------------------------------------------- +Mon Mar 2 14:03:08 MET 1998 - ro@suse.de + +- update to 1.1.1 + +------------------------------------------------------------------- +Fri Feb 27 11:17:44 MET 1998 - ro@suse.de + +- update to version 1.0.9 + created own specfile for easier maintenance + included man-pages + diff --git a/zlib.keyring b/zlib.keyring new file mode 100644 index 0000000..df8ac8e --- /dev/null +++ b/zlib.keyring @@ -0,0 +1,38 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQGiBDuVqKgRBAD5Mcvdc41W5lpeZvYplEuyEBXwmxnUryE2KaCG1C06sGyqgiec +VPXPbgIPKOUt4veMycVoqU4U4ZNuIeCGPfUvkGKLKvy5lK3iexC1Qvat+9ek2+yX +9zFlTo9QyT4kjn+xaZQYVctL370gUNV4eoiWDdrTjIvBfQCb+bf87eHv0QCg/7xt +wnq3uMpQHX+k2LGD2QDEjUcEALalUPPX99ZDjBN75CFUtbE43a73+jtNOLJFqGo3 +ne/lB8DqVwavrgQQxQqjg2xBVvagNpu2Cpmz3HlWoaqEb5vwxjRjhF5WRE+4s4es +9536lQ6pd5tZK4tHMOjvICkSg2BLUsc8XzBreLv3GEdpHP6EeezgAVQyWMpZkCdn +Xk8FA/9gRmro4+X0KJilw1EShYzudEAi02xQbr9hGiA84pQ4hYkdnLLeRscChwxM +VmoiEuJ51ZzIPlcSifzvlQBHIyYCl0KJeVMECXyjLddWkQM32ZZmQvG02mL2XYmF +/UG+/0vd6b2ISmtns6WrULGPNtagHhul+8j7zUfedsWuqpwbm7QmTWFyayBBZGxl +ciA8bWFkbGVyQGFsdW1uaS5jYWx0ZWNoLmVkdT6IRgQQEQIABgUCPIx/xAAKCRDZ +on0lAZZxp+ETAJ0bn8ntrka3vrFPtI6pRwOlueDEgQCfdFqvNgLv1QTYZJQZ5rUn +oM+F+aGIRgQQEQIABgUCQ5GdzQAKCRAvWOuZeViwlP1AAJ4lI6tis2lruhG8DsQ0 +xtWvb2OCfACfb5B/CYDjmRInrAgbVEla3EiO9sKIWAQQEQIAGAUCO5WoqAgLAwkI +BwIBCgIZAQUbAwAAAAAKCRB4P82OWLyvunKOAJ9kOC1uyoYYiXp2SMdcPMj5J+8J +XQCeKBP9Orx0bXK6luyWnCS5LJhevTyJARwEEAECAAYFAlDH6cIACgkQdxZ3RMno +5CguZAf/dxDbnY+rad6GJ1fYVyB9PfboyXLY/vksmupE9rbYmuLP85Rq1hdN56aZ +Qwjm7EPQi6htFANKOPkjOhutSD4X530Dj6Y7To8t85lW3351OP07EfZGilolIugU +6IMZNaUHVF1T0I68frkNTrmRx0PcOJacWB6fkBdoNtd5NLASgI+cszgLsD6THJZk +58RUDINY6fGBYFZkl2/dBbkLaj3DFr+ed6Oe99d546nfSz+zsm454W2M+Wf/yplK +O8Sd641h1eRGD/vihsOO+4gRgS+tQNzwb+eivON0PMvsGAEPEQ+aPVQ/U/UIQSYA ++cYz2jGSXhVppatEpq5U3aJLbcZKOrkCDQQ7laipEAgA9kJXtwh/CBdyorrWqULz +Bej5UxE5T7bxbrlLOCDaAadWoxTpj0BV89AHxstDqZSt90xkhkn4DIO9ZekX1KHT +UPj1WV/cdlJPPT2N286Z4VeSWc39uK50T8X8dryDxUcwYc58yWb/Ffm7/ZFexwGq +01uejaClcjrUGvC/RgBYK+X0iP1YTknbzSC0neSRBzZrM2w4DUUdD3yIsxx8Wy2O +9vPJI8BD8KVbGI2Ou1WMuF040zT9fBdXQ6MdGGzeMyEstSr/POGxKUAYEY18hKcK +ctaGxAMZyAcpesqVDNmWn6vQClCbAkbTCD1mpF1Bn5x8vYlLIhkmuquiXsNV6TIL +OwACAgf/aMWYoBCocATXsfSUAJb69OPUXWjevZiCf6n+7Id3L5X5um55L5sEBr8+ +8m5SIuHUippgNFJdu2xyulbb1MeegtTttEWymF9sM8cWfeTjXPOd7+ZQumiOXwk/ +g0qqjTrq7EYW5PlMjO2FbH/Ix9SHKVS9a0eGUUl+PBv3fkEZBJ4HhweqcSfLyKU/ +CHysN03Z36gtdu1BJlzHy8BPxWzP4vtPEi57Q1dFDY/+OrdlBnwKTpne6y0rAbi/ +wk6FxDGQ86vdapLI51kTxvkYx8+qZXqE4CG5fWbAFDQVTNZIWJNgYMX7Kgl8Fvw+ +7zCqJsv/KbuonIEb5hNViflVTWlBAIhMBBgRAgAMBQI7laipBRsMAAAAAAoJEHg/ +zY5YvK+6T88An1VSVGbeKbIL+k8HaPUsWB7qs5RhAKDdtkn0xqOr+0pE5eilEc61 +pMCmSQ== +=5shY +-----END PGP PUBLIC KEY BLOCK----- diff --git a/zlib.spec b/zlib.spec new file mode 100644 index 0000000..b245b8c --- /dev/null +++ b/zlib.spec @@ -0,0 +1,256 @@ +# +# spec file for package zlib +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +Name: zlib +Version: 1.2.13 +Release: 0 +Summary: Library implementing the DEFLATE compression algorithm +License: Zlib +URL: https://www.zlib.net/ +Source0: https://zlib.net/zlib-%{version}.tar.gz +Source1: https://zlib.net/zlib-%{version}.tar.gz.asc +Source2: %{name}.keyring +Source4: LICENSE +Source5: baselibs.conf +Source6: zlib-rpmlintrc +#PATCH-FIX-SUSE: compiler check of varguments passed to gzprintf +Patch1: zlib-format.patch +#PATCH-FIX-SUSE do not store negative values in uInt +Patch2: 0001-Do-not-try-to-store-negative-values-in-unsigned-int.patch +#PATCH-FIX-SUSE do not check exact version match as the lib can be updated +# we should simply rely on soname versioning to protect us +Patch3: zlib-no-version-check.patch +#PATCH-FIX-SUSE https://github.com/madler/zlib/pull/229 +Patch4: minizip-dont-install-crypt-header.patch +# PATCH-FIX-UPSTREAM https://github.com/madler/zlib/commit/e554695638228b846d49657f31eeff0ca4680e8a +Patch5: zlib-1.2.13-fix-bug-deflateBound.patch +#PATCH-FIX-SUSE https://github.com/madler/zlib/pull/410 +Patch6: zlib-1.2.13-IBM-Z-hw-accelerated-deflate-s390x.patch +# Patches taken from https://github.com/iii-i/zlib/releases/tag/crc32vx-v3 +Patch7: zlib-1.2.5-minizip-fixuncrypt.patch +Patch8: zlib-1.2.13-optimized-s390.patch +# https://github.com/iii-i/zlib/commit/171d0ff3c9ed40da0ac14085ab16b766b1162069 +Patch10: zlib-1.2.11-covscan-issues.patch +Patch11: zlib-1.2.11-covscan-issues-rhel9.patch +Patch14: zlib-1.2.12-s390-vectorize-crc32.patch +# The following patches are taken from https://github.com/mscastanho/zlib/commits/power-optimizations-1.2.12 +Patch15: zlib-1.2.12-adler32-vector-optimizations-for-power.patch +Patch16: zlib-1.2.12-fix-invalid-memory-access-on-ppc-and-ppc64.patch +Patch17: zlib-1.2.12-add-optimized-slide_hash-for-power.patch +Patch18: zlib-1.2.12-add-vectorized-longest_match-for-power.patch +# PATCH-FIX-UPSTREAM danilo.spinella@suse.com bsc#1210593 bsc#1211005 +# Fix deflateBound() before deflateInit() +Patch19: bsc1210593.patch +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: libtool +BuildRequires: pkgconfig +# SLE15 specific buildcycle: we don't need NIS functionality in PAM for building +#!BuildIgnore: libtirpc3 libtirpc-netconfig +%{?suse_build_hwcaps_libs} + +%description +zlib is a general-purpose lossless data-compression library, +implementing an API for the DEFLATE algorithm, the latter of +which is being used by, for example, gzip and the ZIP archive +format. + +%package -n libz1 +Summary: Library implementing the DEFLATE compression algorithm +Provides: %{name} = %{version}-%{release} +Obsoletes: %{name} < %{version}-%{release} + +%description -n libz1 +zlib is a general-purpose lossless data-compression library, +implementing an API for the DEFLATE algorithm, the latter of +which is being used by, for example, gzip and the ZIP archive +format. + +%package devel +Summary: Development files for zlib, a data compression library +Requires: glibc-devel +Requires: libz1 = %{version} + +%description devel +zlib is a general-purpose lossless data-compression library, +implementing an API for the DEFLATE algorithm, the latter of +which is being used by, for example, gzip and the ZIP archive +format. + +This subpackage holds the development headers for the library. + +The zlib data format is itself portable across platforms. Unlike the +LZW compression method used in unix compress(1) and in the GIF image +format, the compression method currently used in zlib essentially +never expands the data. (LZW can double or triple the file size in +extreme cases.) zlib's memory footprint is also independent of the +input data and can be reduced, if necessary, at some cost in +compression. + +%package devel-static +Summary: Static library for zlib +Requires: %{name}-devel = %{version} +Provides: %{name}-devel:%{_libdir}/libz.a + +%description devel-static +zlib is a general-purpose lossless data-compression library, +implementing an API for the DEFLATE algorithm, the latter of +which is being used by, for example, gzip and the ZIP archive +format. + +This subpackage contains the static version of the library +used for development. + +%package -n libminizip1 +Summary: Library for manipulation with .zip archives + +%description -n libminizip1 +Minizip is a library for manipulation with files from .zip archives. + +%package -n minizip-devel +Summary: Development files for the minizip library +Requires: %{name}-devel = %{version} +Requires: libminizip1 = %{version} +Requires: pkgconfig + +%description -n minizip-devel +This package contains the libraries and header files needed for +developing applications which use minizip. + +%package testsuite +Summary: Provide the test examples to reproduce test suite +Requires: libz1 = %{version} + +%description testsuite +To run the testsuite, execute %{_libexecdir}/%{name}/testsuite + +It should exit 0 + +%prep +%setup -q +%patch1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 +%patch10 -p1 +%patch11 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +cp %{SOURCE4} . + +%build +%global _lto_cflags %{_lto_cflags} -ffat-lto-objects +export LDFLAGS="-Wl,-z,relro,-z,now" +# For sure not autotools build +CC="cc" CFLAGS="%{optflags}" ./configure \ + --shared \ + --prefix=%{_prefix} \ + --libdir=%{_libdir} \ +%ifarch s390x s390 + --dfltcc \ + --dfltcc-level-mask=0x7e \ +%endif + %{nil} + +# Profiling flags breaks tests, as of 1.2.12 +# In particular, gzseek does not work as intended +#%if %{do_profiling} +# %make_build CFLAGS="%{optflags} %{cflags_profile_generate}" +# %make_build check +# %make_build clean +# %make_build %{?_smp_mflags} CFLAGS="%{optflags} %{cflags_profile_feedback}" +#%else + %make_build +#%endif + +# And build minizip +cd contrib/minizip +autoreconf -fvi +%configure \ + --disable-static \ + --disable-silent-rules +%make_build + +%check +%make_build check + +%install +mkdir -p %{buildroot}%{_libdir} +%make_install +# manpage +install -m 644 zlib.3 %{buildroot}%{_mandir}/man3 +install -m 644 zutil.h %{buildroot}%{_includedir} +# examples +mkdir -p %{buildroot}%{_docdir}/%{name} +cp -r examples/ %{buildroot}%{_docdir}/%{name}/ + +install -D examplesh %{buildroot}%{_libexecdir}/%{name}/testsuite + +# Install minizip +cd contrib/minizip +%make_install +find %{buildroot} -type f -name "*.la" -delete -print + +%post -n libz1 -p /sbin/ldconfig +%postun -n libz1 -p /sbin/ldconfig +%post -n libminizip1 -p /sbin/ldconfig +%postun -n libminizip1 -p /sbin/ldconfig + +%files -n libz1 +%license LICENSE +%{_libdir}/libz.so.1.2.* +%{_libdir}/libz.so.1 + +%files devel +%doc README ChangeLog +%dir %{_docdir}/%{name}/ +%dir %{_docdir}/%{name}/examples +%{_docdir}/%{name}/examples/* +%{_mandir}/man3/zlib.3%{?ext_man} +%{_includedir}/zlib.h +%{_includedir}/zconf.h +%{_includedir}/zutil.h +%{_libdir}/libz.so +%{_libdir}/pkgconfig/zlib.pc + +%files -n libminizip1 +%doc contrib/minizip/MiniZip64_info.txt contrib/minizip/MiniZip64_Changes.txt +%{_libdir}/libminizip.so.* + +%files -n minizip-devel +%dir %{_includedir}/minizip +%{_includedir}/minizip/*.h +%{_libdir}/libminizip.so +%{_libdir}/pkgconfig/minizip.pc + +%files devel-static +%{_libdir}/libz.a + +%files testsuite +%dir %{_libexecdir}/%{name} +%{_libexecdir}/%{name}/testsuite + +%changelog