bzip2/bzip2-faster.patch

254 lines
8.2 KiB
Diff
Raw Normal View History

--- bzlib_private.h
+++ bzlib_private.h
@@ -340,6 +340,7 @@
#define MTFA_SIZE 4096
#define MTFL_SIZE 16
+#define HUFCODE_SIZE 10
/*-- Structure holding all the decompression-side stuff. --*/
@@ -407,6 +408,7 @@
Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
Int32 minLens[BZ_N_GROUPS];
+ Int16 hufcode[BZ_N_GROUPS][1 << HUFCODE_SIZE];
/* save area for scalars in the main decompress code */
Int32 save_i;
@@ -433,6 +435,7 @@
Int32* save_gLimit;
Int32* save_gBase;
Int32* save_gPerm;
+ Int16* save_gHufCode;
}
DState;
@@ -488,8 +491,8 @@
BZ2_decompress ( DState* );
extern void
-BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
- Int32, Int32, Int32 );
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, Int16 *,
+ UChar*, Int32, Int32, Int32 );
#endif
--- decompress.c
+++ decompress.c
@@ -64,6 +64,9 @@
s->strm->total_in_hi32++; \
}
+#define UNGET_BITS(nnn) \
+ s->bsLive += nnn
+
#define GET_UCHAR(lll,uuu) \
GET_BITS(lll,uuu,8)
@@ -83,23 +86,29 @@
gLimit = &(s->limit[gSel][0]); \
gPerm = &(s->perm[gSel][0]); \
gBase = &(s->base[gSel][0]); \
+ gHufCode = &(s->hufcode[gSel][0]); \
} \
groupPos--; \
- zn = gMinlen; \
+ zn = HUFCODE_SIZE; \
GET_BITS(label1, zvec, zn); \
- while (1) { \
- if (zn > 20 /* the longest code */) \
- RETURN(BZ_DATA_ERROR); \
- if (zvec <= gLimit[zn]) break; \
- zn++; \
- GET_BIT(label2, zj); \
- zvec = (zvec << 1) | zj; \
- }; \
- if (zvec - gBase[zn] < 0 \
+ if (gHufCode[zvec]) { \
+ UNGET_BITS(gHufCode[zvec] >> 10); \
+ lval = gHufCode[zvec] & 511; \
+ } else { \
+ while (1) { \
+ if (zn > 20 /* the longest code */) \
+ RETURN(BZ_DATA_ERROR); \
+ if (zvec <= gLimit[zn]) break; \
+ zn++; \
+ GET_BIT(label2, zj); \
+ zvec = (zvec << 1) | zj; \
+ }; \
+ if (zvec - gBase[zn] < 0 \
|| zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
- RETURN(BZ_DATA_ERROR); \
- lval = gPerm[zvec - gBase[zn]]; \
-}
+ RETURN(BZ_DATA_ERROR); \
+ lval = gPerm[zvec - gBase[zn]]; \
+ } \
+} \
/*---------------------------------------------------*/
@@ -135,6 +144,7 @@
Int32* gLimit;
Int32* gBase;
Int32* gPerm;
+ Int16* gHufCode;
if (s->state == BZ_X_MAGIC_1) {
/*initialise the save area*/
@@ -162,6 +172,7 @@
s->save_gLimit = NULL;
s->save_gBase = NULL;
s->save_gPerm = NULL;
+ s->save_gHufCode = NULL;
}
/*restore from the save area*/
@@ -189,6 +200,7 @@
gLimit = s->save_gLimit;
gBase = s->save_gBase;
gPerm = s->save_gPerm;
+ gHufCode = s->save_gHufCode;
retVal = BZ_OK;
@@ -340,6 +352,7 @@
&(s->limit[t][0]),
&(s->base[t][0]),
&(s->perm[t][0]),
+ &(s->hufcode[t][0]),
&(s->len[t][0]),
minLen, maxLen, alphaSize
);
@@ -414,6 +427,62 @@
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
/*-- uc = MTF ( nextSym-1 ) --*/
+#if MTFL_SIZE == 16
+ {
+ unsigned char *ppx = s->mtfa + s->mtfbase[0];
+ int lno = 0;
+
+ nextSym--;
+ if (nextSym >= MTFL_SIZE) {
+ lno = nextSym / MTFL_SIZE;
+ nextSym %= MTFL_SIZE;
+ ppx = s->mtfa + s->mtfbase[lno];
+ }
+ uc = ppx[nextSym];
+ switch(nextSym) {
+ case 9: ppx[9] = ppx[10];
+ case 10: ppx[10] = ppx[11];
+ case 11: ppx[11] = ppx[12];
+ case 12: ppx[12] = ppx[13];
+ case 13: ppx[13] = ppx[14];
+ case 14: ppx[14] = ppx[15];
+ case 15: goto copy;
+
+ case 8: ppx[8] = ppx[7];
+ case 7: ppx[7] = ppx[6];
+ case 6: ppx[6] = ppx[5];
+ case 5: ppx[5] = ppx[4];
+ case 4: ppx[4] = ppx[3];
+ case 3: ppx[3] = ppx[2];
+ case 2: ppx[2] = ppx[1];
+ case 1: ppx[1] = ppx[0];
+ default: break;
+ }
+ if (lno) {
+ s->mtfbase[lno]++;
+ copy:
+ while (lno > 0) {
+ s->mtfbase[lno]--;
+ s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+ lno--;
+ }
+ s->mtfbase[0]--;
+ if (s->mtfbase[0] == 0) {
+ int ii, jj, kk;
+ s->mtfa[0] = uc;
+ kk = MTFA_SIZE-1;
+ for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+ s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+ kk--;
+ }
+ s->mtfbase[ii] = kk + 1;
+ }
+ }
+ }
+ s->mtfa[s->mtfbase[0]] = uc;
+ }
+#else
{
Int32 ii, jj, kk, pp, lno, off;
UInt32 nn;
@@ -465,6 +534,7 @@
}
}
}
+#endif
/*-- end uc = MTF ( nextSym-1 ) --*/
s->unzftab[s->seqToUnseq[uc]]++;
@@ -616,6 +686,7 @@
s->save_gLimit = gLimit;
s->save_gBase = gBase;
s->save_gPerm = gPerm;
+ s->save_gHufCode = gHufCode;
return retVal;
}
--- huffman.c
+++ huffman.c
@@ -173,13 +173,16 @@
void BZ2_hbCreateDecodeTables ( Int32 *limit,
Int32 *base,
Int32 *perm,
+ Int16 *hufcode,
UChar *length,
Int32 minLen,
Int32 maxLen,
Int32 alphaSize )
{
- Int32 pp, i, j, vec;
+ Int32 pp, i, j, vec, k, vec2;
+ for (i = 0; i < (1 << HUFCODE_SIZE); i++)
+ hufcode[i] = 0;
pp = 0;
for (i = minLen; i <= maxLen; i++)
for (j = 0; j < alphaSize; j++)
@@ -190,16 +193,28 @@
for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
- for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = -1;
vec = 0;
for (i = minLen; i <= maxLen; i++) {
+ if (i <= HUFCODE_SIZE) {
+ for (j = base[i]; j < base[i + 1]; j++) {
+ vec2 = (vec + j - base[i]) << (HUFCODE_SIZE - i);
+ k = (1 << (HUFCODE_SIZE - i));
+ if (vec2 + k > (1 << HUFCODE_SIZE))
+ k = (1 << HUFCODE_SIZE) - vec2;
+ for (; --k >= 0; vec2++)
+ hufcode[vec2] = perm[j] | 512 | (HUFCODE_SIZE - i) << 10;
+ }
+ }
vec += (base[i+1] - base[i]);
limit[i] = vec-1;
vec <<= 1;
}
for (i = minLen + 1; i <= maxLen; i++)
base[i] = ((limit[i-1] + 1) << 1) - base[i];
+ limit[maxLen + 1] = 0x7fffffff; /* make it terminate */
+ base[maxLen + 1] = 0;
}