pigz/pigz-dictzip.patch

239 lines
9.2 KiB
Diff

diff -u -r pigz-2.1.6.orig/pigz.c pigz-2.1.6/pigz.c
--- pigz-2.1.6.orig/pigz.c 2010-01-17 21:09:37.000000000 +0100
+++ pigz-2.1.6/pigz.c 2011-06-02 12:29:12.245689087 +0200
@@ -304,6 +304,8 @@
} \
} while (0)
+#define DZIP_HEADER_LEN 24
+
/* globals (modified by main thread only when it's the only thread) */
local int ind; /* input file descriptor */
local int outd; /* output file descriptor */
@@ -322,11 +324,12 @@
local int list; /* true to list files instead of compress */
local int first = 1; /* true if we need to print listing header */
local int decode; /* 0 to compress, 1 to decompress, 2 to test */
+local int dictzip; /* true to write random access dictionary */
local int level; /* compression level */
local int rsync; /* true for rsync blocking */
local int procs; /* maximum number of compression threads (>= 1) */
local int dict; /* true to initialize dictionary in each thread */
-local size_t size; /* uncompressed input size per thread (>= 32K) */
+size_t size; /* uncompressed input size per thread (>= 32K) */
/* saved gzip/zip header data for decompression, testing, and listing */
local time_t stamp; /* time stamp from gzip header */
@@ -560,6 +563,17 @@
return dos;
}
+long long get_file_size(int fd)
+{
+ long long old_pos = lseek(fd, 0, SEEK_CUR);
+ long long retval;
+
+ retval = lseek(fd, 0, SEEK_END);
+ lseek(fd, old_pos, SEEK_SET);
+
+ return retval;
+}
+
/* put a 4-byte integer into a byte array in LSB order or MSB order */
#define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8)
#define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16))
@@ -613,12 +627,27 @@
head[0] = 31;
head[1] = 139;
head[2] = 8; /* deflate */
- head[3] = name != NULL ? 8 : 0;
+ head[3] = 0;
+ if (dictzip)
+ head[3] |= 4;
+ if (name != NULL)
+ head[3] |= 8;
PUT4L(head + 4, mtime);
head[8] = level == 9 ? 2 : (level == 1 ? 4 : 0);
head[9] = 3; /* unix */
writen(outd, head, 10);
len = 10;
+ if (dictzip) {
+ long long file_len = get_file_size(ind);
+ int extra_len = DZIP_HEADER_LEN + ((file_len / size) + 2) * sizeof(int);
+
+ char *extra = (char*)malloc(extra_len);
+
+ memset(extra, 0, extra_len);
+ writen(outd, extra, extra_len);
+ free(extra);
+ len += extra_len;
+ }
if (name != NULL)
writen(outd, (unsigned char *)name, strlen(name) + 1);
if (name != NULL)
@@ -1165,6 +1194,10 @@
unsigned long ulen; /* total uncompressed size (overflow ok) */
unsigned long clen; /* total compressed size (overflow ok) */
unsigned long check; /* check value of uncompressed data */
+ char *extra = NULL; /* dictzip extra header field */
+ int dz_cur; /* dictzip current pointer to header */
+ long long file_len = get_file_size(ind);
+ int extra_len = DZIP_HEADER_LEN + ((file_len / size) + 2) * sizeof(int);
(void)dummy;
@@ -1172,6 +1205,25 @@
Trace(("-- write thread running"));
head = put_header();
+ if (dictzip) {
+ extra = (char*)malloc(extra_len + 32);
+ memset(extra, 0, extra_len);
+
+ if (extra_len >= ((1ULL << 16) - 1))
+ bail("file too long for dictzip\n", "");
+ /* extra length */
+ PUT2L(extra, extra_len - 2);
+ /* Random Access ID */
+ extra[2] = 'R';
+ extra[3] = 'A';
+ /* RA version 99 */
+ /* XXX need to standardize! */
+ PUT2L(extra + 6, 99);
+ // PUT2L(extra + 6, 1);
+
+ dz_cur = 0;
+ }
+
/* process output of compress threads until end of input */
ulen = clen = 0;
check = CHECK(0L, Z_NULL, 0);
@@ -1191,6 +1243,15 @@
ulen += (unsigned long)len;
clen += (unsigned long)(job->out->len);
+ if (dictzip) {
+ if (((dz_cur * 4) + DZIP_HEADER_LEN + sizeof(int)) > extra_len) {
+ printf(" %d > %d\n", dz_cur, file_len / size);
+ bail ("input file too large\n", "");
+ }
+ PUT4L(extra + (dz_cur * 4) + DZIP_HEADER_LEN, job->out->len);
+ dz_cur++;
+ }
+
/* write the compressed data and drop the output buffer */
Trace(("-- writing #%ld", seq));
writen(outd, job->out->buf, job->out->len);
@@ -1215,6 +1276,22 @@
/* write trailer */
put_trailer(ulen, clen, check, head);
+ if (dictzip) {
+ /* chunk length */
+ PUT4L(extra + 8, size);
+ /* chunk count */
+ PUT4L(extra + 12, dz_cur);
+// PUT4L(extra + 12, (file_len / size) + 1);
+ /* Target file size (64 bit) */
+ PUT4L(extra + 16, file_len);
+ PUT4L(extra + 20, file_len >> 32);
+ /* write extra header */
+ if (lseek(outd, 10, SEEK_SET) != 10)
+ bail("couldn't seek in output file\n", "");
+ writen(outd, extra, extra_len);
+ free(extra);
+ }
+
/* verify no more jobs, prepare for next use */
possess(compress_have);
assert(compress_head == NULL && peek_lock(compress_have) == 0);
@@ -2625,6 +2702,18 @@
/* prepare gzip header information for compression */
name = headis & 1 ? justname(in) : NULL;
mtime = headis & 2 ? st.st_mtime : 0;
+
+ /* Find a sane chunk size */
+ if (dictzip) {
+ long long file_len = get_file_size(ind);
+ int extra_len;
+
+ extra_len = DZIP_HEADER_LEN + ((file_len / size) + 2) * sizeof(int);
+ while (extra_len >= ((1 << 16) - 1)) {
+ size += (1 << 10ULL);
+ extra_len = DZIP_HEADER_LEN + ((file_len / size) + 2) * sizeof(int);
+ }
+ }
}
SET_BINARY_MODE(ind);
@@ -2678,6 +2767,8 @@
out = malloc(strlen("<stdout>") + 1);
if (out == NULL)
bail("not enough memory", "");
+ if (dictzip)
+ bail("need to have a seekable output for dictzip\n", "");
strcpy(out, "<stdout>");
outd = 1;
if (!decode && !force && isatty(outd))
@@ -2796,6 +2887,7 @@
" -i, --independent Compress blocks independently for damage recovery",
" -R, --rsyncable Input-determined block locations for rsync",
" -d, --decompress Decompress the compressed input",
+" -e, --dictzip Write dictzip random seek information in gzip header",
" -t, --test Test the integrity of the compressed input",
" -l, --list List the contents of the compressed input",
" -f, --force Force overwrite, compress .gz, links, and to terminal",
@@ -2868,17 +2960,18 @@
force = 0; /* don't overwrite, don't compress links */
recurse = 0; /* don't go into directories */
form = 0; /* use gzip format */
+ dictzip = 0; /* don't write dictzip information */
}
/* long options conversion to short options */
local char *longopts[][2] = {
{"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"},
- {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"},
- {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"},
- {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"},
- {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"},
- {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"},
- {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"},
+ {"blocksize", "b"}, {"decompress", "d"}, {"dictzip", "e"}, {"fast", "1"},
+ {"force", "f"}, {"help", "h"}, {"independent", "i"}, {"keep", "k"},
+ {"license", "L"}, {"list", "l"}, {"name", "N"}, {"no-name", "n"},
+ {"no-time", "T"}, {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"},
+ {"rsyncable", "R"}, {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"},
+ {"test", "t"}, {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"},
{"version", "V"}, {"zip", "K"}, {"zlib", "z"}};
#define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1))
@@ -2984,6 +3077,7 @@
case 'b': get = 1; break;
case 'c': pipeout = 1; break;
case 'd': decode = 1; headis = 0; break;
+ case 'e': dictzip = 1; dict = 0; break;
case 'f': force = 1; break;
case 'h': help(); break;
case 'i': dict = 0; break;
@@ -3112,6 +3206,19 @@
fprintf(stderr, "warning: output is concatenated zip files ");
fprintf(stderr, "-- pigz will not be able to extract\n");
}
+
+ /* dictzip sanity checks */
+ if (dictzip && (form > 1)) {
+ fprintf(stderr, "warning: dictzip only works on gzip files\n");
+ }
+
+#if 0
+ if (dictzip && (size >= (64 << 10))) {
+ fprintf(stderr, "warning: dictzip needs chunks < 64k.\n");
+ size = (63 << 10);
+ }
+#endif
+
process(strcmp(argv[n], "-") ? argv[n] : NULL);
done++;
}