From 175cc3ff99549a782d0abd30a1f7e04ae731dd1b6417e5949240d331dbca3af2 Mon Sep 17 00:00:00 2001 From: Kristyna Streitova Date: Wed, 3 Jul 2019 11:42:26 +0000 Subject: [PATCH] Accepting request 709398 from home:kstreitova:branches:Base:System - add gzip-1.10-ibm_dfltcc_support.patch [jsc#SLE-5818] * it adds support for DFLTCC (hardware-accelerated deflation) for s390x arch * enable it via "--enable-dfltcc" option OBS-URL: https://build.opensuse.org/request/show/709398 OBS-URL: https://build.opensuse.org/package/show/Base:System/gzip?expand=0&rev=68 --- gzip-1.10-ibm_dfltcc_support.patch | 980 +++++++++++++++++++++++++++++ gzip.changes | 8 + gzip.spec | 18 +- 3 files changed, 1004 insertions(+), 2 deletions(-) create mode 100644 gzip-1.10-ibm_dfltcc_support.patch diff --git a/gzip-1.10-ibm_dfltcc_support.patch b/gzip-1.10-ibm_dfltcc_support.patch new file mode 100644 index 0000000..1d7db5c --- /dev/null +++ b/gzip-1.10-ibm_dfltcc_support.patch @@ -0,0 +1,980 @@ +----- +DFLTCC support for IBM System Z [jsc#SLE-5818] + +This patch contains the following upstream commits: +http://git.savannah.gnu.org/cgit/gzip.git/commit/?id=7a6f9c9c3267185a299ad178607ac5e3716ab4a5 +http://git.savannah.gnu.org/cgit/gzip.git/commit/?id=be0c5581e38332b2ffa8a4cf92076cfde02872b4 + +----- + +From 7a6f9c9c3267185a299ad178607ac5e3716ab4a5 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Tue, 2 Apr 2019 08:01:02 -0700 +Subject: bug#34918: [PATCH] Add support for IBM Z hardware-accelerated deflate + +Future versions of IBM Z mainframes will provide DFLTCC instruction, +which implements deflate algorithm in hardware with estimated +compression and decompression performance orders of magnitude faster +than the current gzip and ratio comparable with that of level 1. + +This patch adds DFLTCC support to gzip. In order to enable it, the +following build commands should be used: + + $ ./configure --enable-dfltcc + $ make + +When built like this, gzip would compress in hardware on level 1, and in +software on all other levels. Decompression will always happen in +hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to +make it used by default) one could either add -DDFLTCC_LEVEL_MASK=0x7e +at compile time, or set the environment variable DFLTCC_LEVEL_MASK to +0x7e at run time. + +Two DFLTCC compression calls produce the same results only when they +both are made on machines of the same generation, and when the +respective buffers have the same offset relative to the start of the +page. Therefore care should be taken when using hardware compression +when reproducible results are desired. One such use case - reproducible +software builds - is handled explicitly: when SOURCE_DATE_EPOCH +environment variable is set, the hardware compression is disabled. + +This patch tries to add DFLTCC support in a least intrusive way. +All SystemZ-specific code was placed into a separate file, but +unfortunately there is still a noticeable amount of changes in the +main gzip code. Below is the summary of those changes. + +DFLTCC will refuse to write an End-of-block Symbol if there is no input +data, thus in some cases it is necessary to do this manually. In order +to achieve this, bi_buf and bi_valid were promoted to extern variables. + +lm_init() function moves the input buffer into the window, which is not +desirable for DFLTCC. Therefore, its invocation was moved to +software-only deflate(). In addition to initializing the window, this +function also used to convert compression level to flags, which is still +needed for DFLTCC. This responsibility was handed off to zip() function. + +To achieve maximum performance with DFLTCC, inbuf and outbuf must be +256k big and page-aligned. Additionally, for DFLTCC to work at all, the +window must be page-aligned. + +In addition to compression, DFLTCC computes CRC-32 checksum, therefore, +whenever it's used, software checksumming needs to be suppressed and its +results replaced by those of dfltcc. This is achieved by introducing the +new getcrc() and setcrc() functions. + +Unlike the current software implementation, DFLTCC decompresses data +into the output buffer, and not the window. Therefore, just like +flushing the window, flushing the output buffer must honor the test +flag. + +Finally, znew-k test assumes that "znew -K" would not convert the test +.Z file to .gz, which is not the case with DFLTCC. Since this is not the +main point of the test, this assumption was relaxed. +--- + Makefile.am | 3 + + bits.c | 4 +- + configure.ac | 16 +++ + deflate.c | 21 +-- + dfltcc.c | 429 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + gzip.c | 12 ++ + gzip.h | 26 +++- + tests/znew-k | 3 +- + unzip.c | 4 + + util.c | 30 ++++- + zip.c | 23 +++- + 11 files changed, 536 insertions(+), 35 deletions(-) + create mode 100644 dfltcc.c + +From be0c5581e38332b2ffa8a4cf92076cfde02872b4 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Tue, 2 Apr 2019 10:26:30 -0700 +Subject: Improve IBM Z patch + +Most of this is minor changes to use GNU style and C99 constructs. +* NEWS: Mention IBM Z. +* bootstrap.conf (gnulib_modules): Add stdalign. +* dfltcc.c: Include stdalign.h, stdbool.h. +(union aligned_dfltcc_qaf_param, union aligned_dfltcc_param_v0): +New types, used for C11-style alignment. All uses changed. +(init_param): +* gzip.c (BUFFER_ALIGNED): New macro. +(inbuf, outbuf, window): Use it, so buffers are aligned everywhere. +* gzip.h (INBUFSIZ, OUTBUFSIZE): Use big buffers everywhere, +unless SMALL_MEM. +* zip.c (SLOW, FAST): Now enums since they need not be macros: +--- + NEWS | 4 + + deflate.c | 8 +- + dfltcc.c | 632 +++++++++++++++++++++++++++++---------------------------- + gzip.c | 27 ++- + gzip.h | 16 +- + unzip.c | 2 +- + util.c | 30 ++- + zip.c | 24 +-- + 11 files changed, 376 insertions(+), 370 deletions(-) + + +Index: gzip-1.10/Makefile.am +=================================================================== +--- gzip-1.10.orig/Makefile.am ++++ gzip-1.10/Makefile.am +@@ -55,6 +55,9 @@ gzip_SOURCES = \ + trees.c unlzh.c unlzw.c unpack.c unzip.c util.c zip.c + gzip_LDADD = libver.a lib/libgzip.a + gzip_LDADD += $(LIB_CLOCK_GETTIME) ++if IBM_Z_DFLTCC ++gzip_SOURCES += dfltcc.c ++endif + + BUILT_SOURCES += version.c + version.c: Makefile +Index: gzip-1.10/bits.c +=================================================================== +--- gzip-1.10.orig/bits.c ++++ gzip-1.10/bits.c +@@ -78,7 +78,7 @@ + + local file_t zfile; /* output gzip file */ + +-local unsigned short bi_buf; ++unsigned short bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least significant + * bits). + */ +@@ -88,7 +88,7 @@ local unsigned short bi_buf; + * more than 16 bits on some systems.) + */ + +-local int bi_valid; ++int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ +Index: gzip-1.10/configure.ac +=================================================================== +--- gzip-1.10.orig/configure.ac ++++ gzip-1.10/configure.ac +@@ -74,6 +74,16 @@ AC_ARG_ENABLE([gcc-warnings], + fi] + ) + ++AC_ARG_ENABLE([dfltcc], ++ [AS_HELP_STRING([--enable-dfltcc], ++ [use DEFLATE COMPRESSION CALL instruction on IBM Z])], ++ [case $enableval in ++ yes|no) gl_dfltcc=$enableval ;; ++ *) AC_MSG_ERROR([bad value $enableval for dfltcc option]) ;; ++ esac], ++ [gl_dfltcc=no] ++) ++ + # gl_GCC_VERSION_IFELSE([major], [minor], [run-if-found], [run-if-not-found]) + # ------------------------------------------------ + # If $CPP is gcc-MAJOR.MINOR or newer, then run RUN-IF-FOUND. +@@ -185,6 +195,12 @@ if test "$gl_gcc_warnings" = yes; then + AC_SUBST([GNULIB_WARN_CFLAGS]) + fi + ++if test "$gl_dfltcc" = yes; then ++ AC_DEFINE([IBM_Z_DFLTCC], , ++ [Use DEFLATE COMPRESSION CALL instruction on IBM Z machines.]) ++fi ++AM_CONDITIONAL([IBM_Z_DFLTCC], [test "$gl_dfltcc" = yes]) ++ + # cc -E produces incorrect asm files on SVR4, so postprocess it. + ASCPPPOST="sed '/^ *\\#/d; s,//.*,,; s/% /%/g; s/\\. /./g'" + AC_SUBST([ASCPPPOST]) +Index: gzip-1.10/deflate.c +=================================================================== +--- gzip-1.10.orig/deflate.c ++++ gzip-1.10/deflate.c +@@ -123,10 +123,6 @@ + #define NIL 0 + /* Tail of hash chains */ + +-#define FAST 4 +-#define SLOW 2 +-/* speed options for the general purpose bit flag */ +- + #ifndef TOO_FAR + # define TOO_FAR 4096 + #endif +@@ -215,9 +211,6 @@ local unsigned int max_lazy_match; + * max_insert_length is used only for compression levels <= 3. + */ + +-local int compr_level; +-/* compression level (1..9) */ +- + unsigned good_match; + /* Use a faster search when the previous match is longer than this */ + +@@ -308,14 +301,12 @@ local void check_match (IPos start, IPo + /* =========================================================================== + * Initialize the "longest match" routines for a new file + */ +-void lm_init (pack_level, flags) ++void lm_init (pack_level) + int pack_level; /* 0: store, 1: best speed, 9: best compression */ +- ush *flags; /* general purpose bit flag */ + { + register unsigned j; + + if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level"); +- compr_level = pack_level; + + /* Initialize the hash table. */ + #if defined MAXSEG_64K && HASH_BITS == 15 +@@ -337,11 +328,6 @@ void lm_init (pack_level, flags) + nice_match = configuration_table[pack_level].nice_length; + #endif + max_chain_length = configuration_table[pack_level].max_chain; +- if (pack_level == 1) { +- *flags |= FAST; +- } else if (pack_level == 9) { +- *flags |= SLOW; +- } + /* ??? reduce max_chain_length for binary files */ + + strstart = 0; +@@ -732,7 +718,8 @@ local off_t deflate_fast() + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +-off_t deflate() ++off_t ++deflate (int pack_level) + { + IPos hash_head; /* head of hash chain */ + IPos prev_match; /* previous match */ +@@ -740,7 +727,9 @@ off_t deflate() + int match_available = 0; /* set if previous match exists */ + register unsigned match_length = MIN_MATCH-1; /* length of best match */ + +- if (compr_level <= 3) return deflate_fast(); /* optimized for speed */ ++ lm_init (pack_level); ++ if (pack_level <= 3) ++ return deflate_fast(); + + /* Process the input block. */ + while (lookahead != 0) { +Index: gzip-1.10/dfltcc.c +=================================================================== +--- /dev/null ++++ gzip-1.10/dfltcc.c +@@ -0,0 +1,441 @@ ++/* dfltcc.c -- compress data using IBM Z DEFLATE COMPRESSION CALL ++ ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software Foundation, ++ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#ifdef DFLTCC_USDT ++# include ++#endif ++ ++#include "tailor.h" ++#include "gzip.h" ++ ++#ifdef DYN_ALLOC ++# error "DYN_ALLOC is not supported by DFLTCC" ++#endif ++ ++/* =========================================================================== ++ * C wrappers for the DEFLATE CONVERSION CALL instruction. ++ */ ++ ++typedef enum ++{ ++ DFLTCC_CC_OK = 0, ++ DFLTCC_CC_OP1_TOO_SHORT = 1, ++ DFLTCC_CC_OP2_TOO_SHORT = 2, ++ DFLTCC_CC_OP2_CORRUPT = 2, ++ DFLTCC_CC_AGAIN = 3, ++} dfltcc_cc; ++ ++#define DFLTCC_QAF 0 ++#define DFLTCC_GDHT 1 ++#define DFLTCC_CMPR 2 ++#define DFLTCC_XPND 4 ++#define HBT_CIRCULAR (1 << 7) ++/* #define HB_BITS 15 */ ++/* #define HB_SIZE (1 << HB_BITS) */ ++#define DFLTCC_FACILITY 151 ++#define DFLTCC_FMT0 0 ++#define CVT_CRC32 0 ++#define HTT_FIXED 0 ++#define HTT_DYNAMIC 1 ++ ++#ifndef DFLTCC_BLOCK_SIZE ++# define DFLTCC_BLOCK_SIZE 1048576 ++#endif ++#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE ++# define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 ++#endif ++#ifndef DFLTCC_LEVEL_MASK ++# define DFLTCC_LEVEL_MASK 0x2 ++#endif ++#ifndef DFLTCC_RIBM ++# define DFLTCC_RIBM 0 ++#endif ++ ++#define MAX(a, b) ((a) > (b) ? (a) : (b)) ++ ++struct dfltcc_qaf_param ++{ ++ char fns[16]; ++ char reserved1[8]; ++ char fmts[2]; ++ char reserved2[6]; ++}; ++ ++union aligned_dfltcc_qaf_param ++{ ++ struct dfltcc_qaf_param af; ++ char alignas (8) aligned; ++}; ++ ++struct dfltcc_param_v0 ++{ ++ unsigned short pbvn; /* Parameter-Block-Version Number */ ++ unsigned char mvn; /* Model-Version Number */ ++ unsigned char ribm; /* Reserved for IBM use */ ++ unsigned reserved32 : 31; ++ unsigned cf : 1; /* Continuation Flag */ ++ unsigned char reserved64[8]; ++ unsigned nt : 1; /* New Task */ ++ unsigned reserved129 : 1; ++ unsigned cvt : 1; /* Check Value Type */ ++ unsigned reserved131 : 1; ++ unsigned htt : 1; /* Huffman-Table Type */ ++ unsigned bcf : 1; /* Block-Continuation Flag */ ++ unsigned bcc : 1; /* Block Closing Control */ ++ unsigned bhf : 1; /* Block Header Final */ ++ unsigned reserved136 : 1; ++ unsigned reserved137 : 1; ++ unsigned dhtgc : 1; /* DHT Generation Control */ ++ unsigned reserved139 : 5; ++ unsigned reserved144 : 5; ++ unsigned sbb : 3; /* Sub-Byte Boundary */ ++ unsigned char oesc; /* Operation-Ending-Supplemental Code */ ++ unsigned reserved160 : 12; ++ unsigned ifs : 4; /* Incomplete-Function Status */ ++ unsigned short ifl; /* Incomplete-Function Length */ ++ unsigned char reserved192[8]; ++ unsigned char reserved256[8]; ++ unsigned char reserved320[4]; ++ unsigned short hl; /* History Length */ ++ unsigned reserved368 : 1; ++ unsigned short ho : 15; /* History Offset */ ++ unsigned int cv; /* Check Value */ ++ unsigned eobs : 15; /* End-of-block Symbol */ ++ unsigned reserved431 : 1; ++ unsigned char eobl : 4; /* End-of-block Length */ ++ unsigned reserved436 : 12; ++ unsigned reserved448 : 4; ++ unsigned short cdhtl : 12; /* Compressed-Dynamic-Huffman Table ++ Length */ ++ unsigned char reserved464[6]; ++ unsigned char cdht[288]; ++ unsigned char reserved[32]; ++ unsigned char csb[1152]; ++}; ++ ++union aligned_dfltcc_param_v0 ++{ ++ struct dfltcc_param_v0 param; ++ char alignas (8) aligned; ++}; ++ ++static int ++is_bit_set (const char *bits, int n) ++{ ++ return bits[n / 8] & (1 << (7 - (n % 8))); ++} ++ ++static int ++is_dfltcc_enabled (void) ++{ ++ char facilities[(DFLTCC_FACILITY / 64 + 1) * 8]; ++ ++ char const *env = getenv ("DFLTCC"); ++ if (env && !strcmp (env, "0")) ++ return 0; ++ ++ register int r0 __asm__ ("r0") = sizeof facilities / 8; ++ __asm__ ("stfle %[facilities]\n" ++ : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory"); ++ return is_bit_set (facilities, DFLTCC_FACILITY); ++} ++ ++static dfltcc_cc ++dfltcc (int fn, void *param, ++ uch **op1, size_t *len1, ++ uch const **op2, size_t *len2, ++ void *hist) ++{ ++ uch *t2 = op1 ? *op1 : NULL; ++ size_t t3 = len1 ? *len1 : 0; ++ const uch *t4 = op2 ? *op2 : NULL; ++ size_t t5 = len2 ? *len2 : 0; ++ register int r0 __asm__ ("r0") = fn; ++ register void *r1 __asm__ ("r1") = param; ++ register uch *r2 __asm__ ("r2") = t2; ++ register size_t r3 __asm__ ("r3") = t3; ++ register const uch *r4 __asm__ ("r4") = t4; ++ register size_t r5 __asm__ ("r5") = t5; ++ int cc; ++ ++ __asm__ volatile ( ++#ifdef DFLTCC_USDT ++ STAP_PROBE_ASM (zlib, dfltcc_entry, ++ STAP_PROBE_ASM_TEMPLATE (5)) ++#endif ++ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" ++#ifdef DFLTCC_USDT ++ STAP_PROBE_ASM (zlib, dfltcc_exit, ++ STAP_PROBE_ASM_TEMPLATE (5)) ++#endif ++ "ipm %[cc]\n" ++ : [r2] "+r" (r2) ++ , [r3] "+r" (r3) ++ , [r4] "+r" (r4) ++ , [r5] "+r" (r5) ++ , [cc] "=r" (cc) ++ : [r0] "r" (r0) ++ , [r1] "r" (r1) ++ , [hist] "r" (hist) ++#ifdef DFLTCC_USDT ++ , STAP_PROBE_ASM_OPERANDS (5, r2, r3, r4, r5, hist) ++#endif ++ : "cc", "memory"); ++ t2 = r2; t3 = r3; t4 = r4; t5 = r5; ++ ++ if (op1) ++ *op1 = t2; ++ if (len1) ++ *len1 = t3; ++ if (op2) ++ *op2 = t4; ++ if (len2) ++ *len2 = t5; ++ return (cc >> 28) & 3; ++} ++ ++static void ++dfltcc_qaf (struct dfltcc_qaf_param *param) ++{ ++ dfltcc (DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); ++} ++ ++static void ++dfltcc_gdht (struct dfltcc_param_v0 *param) ++{ ++ const uch *next_in = inbuf + inptr; ++ size_t avail_in = insize - inptr; ++ ++ dfltcc (DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL); ++} ++ ++static off_t total_in; ++ ++static dfltcc_cc ++dfltcc_cmpr_xpnd (struct dfltcc_param_v0 *param, int fn) ++{ ++ uch *next_out = outbuf + outcnt; ++ size_t avail_out = OUTBUFSIZ - outcnt; ++ const uch *next_in = inbuf + inptr; ++ size_t avail_in = insize - inptr; ++ dfltcc_cc cc = dfltcc (fn | HBT_CIRCULAR, param, ++ &next_out, &avail_out, ++ &next_in, &avail_in, ++ window); ++ off_t consumed_in = next_in - (inbuf + inptr); ++ inptr += consumed_in; ++ total_in += consumed_in; ++ outcnt += ((OUTBUFSIZ - outcnt) - avail_out); ++ return cc; ++} ++ ++static struct dfltcc_param_v0 * ++init_param (union aligned_dfltcc_param_v0 *ctx) ++{ ++ char const *s = getenv ("DFLTCC_RIBM"); ++ struct dfltcc_param_v0 *param = &ctx->param; ++ memset (param, 0, sizeof *param); ++ param->ribm = s && *s ? strtoul (s, NULL, 0) : DFLTCC_RIBM; ++ param->nt = 1; ++ param->cvt = CVT_CRC32; ++ param->cv = __builtin_bswap32 (getcrc ()); ++ return param; ++} ++ ++static void ++bi_close_block (struct dfltcc_param_v0 *param) ++{ ++ bi_valid = param->sbb; ++ bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1); ++ send_bits (bi_reverse (param->eobs >> (15 - param->eobl), param->eobl), ++ param->eobl); ++ param->bcf = 0; ++} ++ ++static void ++close_block (struct dfltcc_param_v0 *param) ++{ ++ bi_close_block (param); ++ bi_windup (); ++ param->sbb = (param->sbb + param->eobl) % 8; ++ if (param->sbb != 0) ++ { ++ Assert (outcnt > 0, "outbuf must have enough space for EOBS"); ++ outcnt--; ++ } ++} ++ ++static void ++close_stream (struct dfltcc_param_v0 *param) ++{ ++ if (param->bcf) ++ bi_close_block (param); ++ send_bits (1, 3); /* BFINAL=1, BTYPE=00 */ ++ bi_windup (); ++ put_short (0x0000); ++ put_short (0xFFFF); ++} ++ ++/* Compress ifd into ofd in hardware or fall back to software. */ ++ ++int ++dfltcc_deflate (int pack_level) ++{ ++ /* Check whether we can use hardware compression. */ ++ if (!is_dfltcc_enabled () || getenv ("SOURCE_DATE_EPOCH")) ++ return deflate (pack_level); ++ char const *s = getenv ("DFLTCC_LEVEL_MASK"); ++ unsigned long level_mask ++ = s && *s ? strtoul (s, NULL, 0) : DFLTCC_LEVEL_MASK; ++ if ((level_mask & (1 << pack_level)) == 0) ++ return deflate (pack_level); ++ union aligned_dfltcc_qaf_param ctx; ++ dfltcc_qaf (&ctx.af); ++ if (!is_bit_set (ctx.af.fns, DFLTCC_CMPR) ++ || !is_bit_set (ctx.af.fns, DFLTCC_GDHT) ++ || !is_bit_set (ctx.af.fmts, DFLTCC_FMT0)) ++ return deflate (pack_level); ++ ++ /* Initialize tuning parameters. */ ++ s = getenv ("DFLTCC_BLOCK_SIZE"); ++ unsigned long block_size ++ = s && *s ? strtoul (s, NULL, 0) : DFLTCC_BLOCK_SIZE; ++ ++ s = getenv ("DFLTCC_FIRST_FHT_BLOCK_SIZE"); ++ off_t block_threshold ++ = s && *s ? strtoul (s, NULL, 0) : DFLTCC_FIRST_FHT_BLOCK_SIZE; ++ ++ union aligned_dfltcc_param_v0 ctx_v0; ++ struct dfltcc_param_v0 *param = init_param (&ctx_v0); ++ ++ /* Compress ifd into ofd in a loop. */ ++ while (true) ++ { ++ /* Flush the output data. */ ++ if (outcnt > OUTBUFSIZ - 8) ++ flush_outbuf (); ++ ++ /* Close the block. */ ++ if (param->bcf && total_in == block_threshold && !param->cf) ++ { ++ close_block (param); ++ block_threshold += block_size; ++ } ++ ++ /* Read the input data. */ ++ if (inptr == insize) ++ { ++ if (fill_inbuf (1) == EOF && !param->cf) ++ break; ++ inptr = 0; ++ } ++ ++ /* Temporarily mask some input data. */ ++ int extra = MAX (0, total_in + (insize - inptr) - block_threshold); ++ insize -= extra; ++ ++ /* Start a new block. */ ++ if (!param->bcf) ++ { ++ if (total_in == 0 && block_threshold > 0) ++ param->htt = HTT_FIXED; ++ else { ++ param->htt = HTT_DYNAMIC; ++ dfltcc_gdht (param); ++ } ++ } ++ ++ /* Compress inbuf into outbuf. */ ++ dfltcc_cmpr_xpnd (param, DFLTCC_CMPR); ++ ++ /* Unmask the input data. */ ++ insize += extra; ++ ++ /* Continue the block */ ++ param->bcf = 1; ++ } ++ ++ close_stream (param); ++ setcrc (__builtin_bswap32 (param->cv)); ++ return 0; ++} ++ ++/* Decompress ifd into ofd in hardware or fall back to software. */ ++int ++dfltcc_inflate (void) ++{ ++ /* Check whether we can use hardware decompression. */ ++ if (!is_dfltcc_enabled ()) ++ return inflate (); ++ union aligned_dfltcc_qaf_param ctx; ++ dfltcc_qaf (&ctx.af); ++ if (!is_bit_set (ctx.af.fns, DFLTCC_XPND)) ++ return inflate (); ++ ++ union aligned_dfltcc_param_v0 ctx_v0; ++ struct dfltcc_param_v0 *param = init_param (&ctx_v0); ++ ++ /* Decompress ifd into ofd in a loop. */ ++ while (true) ++ { ++ /* Perform I/O. */ ++ if (outcnt == OUTBUFSIZ) ++ flush_outbuf (); ++ if (inptr == insize) ++ { ++ if (fill_inbuf (1) == EOF) ++ { ++ /* Premature EOF. */ ++ return 2; ++ } ++ inptr = 0; ++ } ++ ++ /* Decompress inbuf into outbuf. */ ++ dfltcc_cc cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND); ++ if (cc == DFLTCC_CC_OK) ++ { ++ /* The entire deflate stream has been successfully decompressed. */ ++ break; ++ } ++ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) ++ { ++ /* The deflate stream is corrupted. */ ++ return 2; ++ } ++ /* There must be more data to decompress. */ ++ } ++ ++ if (param->sbb != 0) ++ { ++ /* The deflate stream has ended in the middle of a byte. Go to ++ the next byte boundary, so that unzip can read CRC and length. */ ++ inptr++; ++ } ++ ++ /* Set CRC value and update bytes_out for unzip. */ ++ setcrc (__builtin_bswap32 (param->cv)); ++ flush_outbuf (); ++ return 0; ++} +Index: gzip-1.10/gzip.c +=================================================================== +--- gzip-1.10.orig/gzip.c ++++ gzip-1.10/gzip.c +@@ -58,6 +58,7 @@ static char const *const license_msg[] = + #include + #include + #include ++#include + #include + #include + #include +@@ -128,10 +129,20 @@ static char const *const license_msg[] = + + /* global buffers */ + +-DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA); +-DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); ++/* With IBM_Z_DFLTCC, DEFLATE COMPRESSION works faster with ++ page-aligned input and output buffers, and requires page-aligned ++ windows; the alignment requirement is 4096. On other platforms ++ alignment doesn't hurt, and alignment up to 8192 is portable so ++ let's do that. */ ++#ifdef __alignas_is_defined ++# define BUFFER_ALIGNED alignas (8192) ++#else ++# define BUFFER_ALIGNED /**/ ++#endif ++DECLARE(uch BUFFER_ALIGNED, inbuf, INBUFSIZ +INBUF_EXTRA); ++DECLARE(uch BUFFER_ALIGNED, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); + DECLARE(ush, d_buf, DIST_BUFSIZE); +-DECLARE(uch, window, 2L*WSIZE); ++DECLARE(uch BUFFER_ALIGNED, window, 2L*WSIZE); + #ifndef MAXSEG_64K + DECLARE(ush, tab_prefix, 1L< $name || framework_failure_ + + fail=0 + + znew -K $name || fail=1 +-test -f $name || fail=1 ++test -f $name || test -f $gzname || fail=1 + + Exit $fail +Index: gzip-1.10/unzip.c +=================================================================== +--- gzip-1.10.orig/unzip.c ++++ gzip-1.10/unzip.c +@@ -129,7 +129,11 @@ int unzip(in, out) + /* Decompress */ + if (method == DEFLATED) { + ++#ifdef IBM_Z_DFLTCC ++ int res = dfltcc_inflate (); ++#else + int res = inflate(); ++#endif + + if (res == 3) { + xalloc_die (); +Index: gzip-1.10/util.c +=================================================================== +--- gzip-1.10.orig/util.c ++++ gzip-1.10/util.c +@@ -96,6 +96,9 @@ static const ulg crc_32_tab[] = { + 0x2d02ef8dL + }; + ++/* Shift register contents. */ ++static ulg crc = 0xffffffffL; ++ + /* =========================================================================== + * Copy input to output unchanged: zcat == cat with --force. + * IN assertion: insize bytes have already been read in inbuf and inptr bytes +@@ -126,13 +129,11 @@ int copy(in, out) + * Return the current crc in either case. + */ + ulg updcrc(s, n) +- uch *s; /* pointer to bytes to pump through */ ++ const uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ + { + register ulg c; /* temporary variable */ + +- static ulg crc = (ulg)0xffffffffL; /* shift register contents */ +- + if (s == NULL) { + c = 0xffffffffL; + } else { +@@ -145,6 +146,20 @@ ulg updcrc(s, n) + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } + ++/* Return a current CRC value. */ ++ulg ++getcrc (void) ++{ ++ return crc ^ 0xffffffffL; ++} ++ ++/* Set a new CRC value. */ ++void ++setcrc (ulg c) ++{ ++ crc = c ^ 0xffffffffL; ++} ++ + /* =========================================================================== + * Clear input and output buffers + */ +@@ -238,7 +253,8 @@ void flush_outbuf() + { + if (outcnt == 0) return; + +- write_buf(ofd, (char *)outbuf, outcnt); ++ if (!test) ++ write_buf (ofd, outbuf, outcnt); + bytes_out += (off_t)outcnt; + outcnt = 0; + } +Index: gzip-1.10/zip.c +=================================================================== +--- gzip-1.10.orig/zip.c ++++ gzip-1.10/zip.c +@@ -23,9 +23,11 @@ + #include "tailor.h" + #include "gzip.h" + +-local ulg crc; /* crc on uncompressed file data */ + off_t header_bytes; /* number of bytes in gzip header */ + ++/* Speed options for the general purpose bit flag. */ ++enum { SLOW = 2, FAST = 4 }; ++ + /* =========================================================================== + * Deflate in to out. + * IN assertions: the input and output buffers are cleared. +@@ -68,11 +70,14 @@ int zip(in, out) + put_long (stamp); + + /* Write deflated file to zip file */ +- crc = updcrc(0, 0); ++ updcrc (NULL, 0); + + bi_init(out); + ct_init(&attr, &method); +- lm_init(level, &deflate_flags); ++ if (level == 1) ++ deflate_flags |= FAST; ++ else if (level == 9) ++ deflate_flags |= SLOW; + + put_byte((uch)deflate_flags); /* extra flags */ + put_byte(OS_CODE); /* OS identifier */ +@@ -85,7 +90,11 @@ int zip(in, out) + } + header_bytes = (off_t)outcnt; + +- (void)deflate(); ++#ifdef IBM_Z_DFLTCC ++ dfltcc_deflate (level); ++#else ++ deflate (level); ++#endif + + #ifndef NO_SIZE_CHECK + /* Check input size +@@ -98,7 +107,7 @@ int zip(in, out) + #endif + + /* Write the crc and uncompressed size */ +- put_long(crc); ++ put_long (getcrc ()); + put_long((ulg)bytes_in); + header_bytes += 2*4; + +@@ -126,7 +135,7 @@ int file_read(buf, size) + read_error(); + } + +- crc = updcrc((uch*)buf, len); ++ updcrc ((uch *) buf, len); + bytes_in += (off_t)len; + return (int)len; + } +Index: gzip-1.10/NEWS +=================================================================== +--- gzip-1.10.orig/NEWS ++++ gzip-1.10/NEWS +@@ -20,6 +20,10 @@ GNU gzip NEWS + These bugs most likely do not happen on practical platforms. + [bugs present since the beginning] + ++** Performance improvements ++ ++ IBM Z platforms now support hardware-accelerated deflation. ++ + + * Noteworthy changes in release 1.9 (2018-01-07) [stable] + diff --git a/gzip.changes b/gzip.changes index 8a7e49c..92fbd34 100644 --- a/gzip.changes +++ b/gzip.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Thu Jun 6 15:16:47 UTC 2019 - Kristýna Streitová + +- add gzip-1.10-ibm_dfltcc_support.patch [jsc#SLE-5818] + * it adds support for DFLTCC (hardware-accelerated deflation) + for s390x arch + * enable it via "--enable-dfltcc" option + ------------------------------------------------------------------- Sun Dec 30 11:34:12 UTC 2018 - astieger@suse.com diff --git a/gzip.spec b/gzip.spec index 930691d..ad34f70 100644 --- a/gzip.spec +++ b/gzip.spec @@ -1,7 +1,7 @@ # # spec file for package gzip # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -34,6 +34,9 @@ Patch6: zdiff.diff # PATCH FIX OPENSUSE BNC#799561 - zgrep silently fails on LZMA compressed files Patch7: xz_lzma.patch Patch8: manpage-no-date.patch +Patch9: gzip-1.10-ibm_dfltcc_support.patch +BuildRequires: autoconf +BuildRequires: automake BuildRequires: makeinfo BuildRequires: xz Requires(post): %{install_info_prereq} @@ -53,14 +56,25 @@ times. %patch6 %patch7 -p1 %patch8 -p1 +%ifarch s390x +%patch9 -p1 +%endif %build export CFLAGS="%{optflags} -fomit-frame-pointer \ -W -Wall -Wno-unused-parameter -Wstrict-prototypes -Wpointer-arith -fPIE" export LDFLAGS="-pie" +# added because of gzip-1.10-ibm_dfltcc_support.patch [jsc#SLE-5818] +%ifarch s390x +autoreconf -f -i +%endif %configure --disable-silent-rules \ gl_cv_func_printf_directive_n=yes \ - gl_cv_func_printf_infinite_long_double=yes + gl_cv_func_printf_infinite_long_double=yes \ +%ifarch s390x + --enable-dfltcc \ +%endif + profile_gzip() { tmpfile=$(mktemp)