diff --git a/big-file-performance.patch b/big-file-performance.patch deleted file mode 100644 index 9f3b8ed..0000000 --- a/big-file-performance.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 68b82f6f8419a815cfcf962b3061352d414dc606 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Tue, 25 Oct 2016 21:57:56 -0700 -Subject: [PATCH] diff: fix big performance degradation in 3.4 - -* NEWS, doc/diffutils.texi (Overview): Document this. -* src/analyze.c (diff_2_files): Restore too_expensive heuristic, -but this time with a floor that is 16 times the old floor. This -should fix Bug#16848, by generating good-quality output for its -test case, while not introducing Bug#24715, by running nearly as -fast as diff-3.3 for that test case. ---- - NEWS | 5 +++++ - doc/diffutils.texi | 5 ++++- - src/analyze.c | 11 ++++++++++- - 3 files changed, 19 insertions(+), 2 deletions(-) - -Index: diffutils-3.5/src/analyze.c -=================================================================== ---- diffutils-3.5.orig/src/analyze.c -+++ diffutils-3.5/src/analyze.c -@@ -534,6 +534,7 @@ diff_2_files (struct comparison *cmp) - { - struct context ctxt; - lin diags; -+ lin too_expensive; - - /* Allocate vectors for the results of comparison: - a flag for each line of each file, saying whether that line -@@ -565,11 +566,19 @@ diff_2_files (struct comparison *cmp) - - ctxt.heuristic = speed_large_files; - -+ /* Set TOO_EXPENSIVE to be the approximate square root of the -+ input size, bounded below by 4096. 4096 seems to be good for -+ circa-2016 CPUs; see Bug#16848 and Bug#24715. */ -+ too_expensive = 1; -+ for (; diags != 0; diags >>= 2) -+ too_expensive <<= 1; -+ ctxt.too_expensive = MAX (4096, too_expensive); -+ - files[0] = cmp->file[0]; - files[1] = cmp->file[1]; - - compareseq (0, cmp->file[0].nondiscarded_lines, -- 0, cmp->file[1].nondiscarded_lines, &ctxt); -+ 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt); - - free (ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1)); - diff --git a/diffutils-3.5.tar.xz b/diffutils-3.5.tar.xz deleted file mode 100644 index 84acae2..0000000 --- a/diffutils-3.5.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dad398ccd5b9faca6b0ab219a036453f62a602a56203ac659b43e889bec35533 -size 1360996 diff --git a/diffutils-3.5.tar.xz.sig b/diffutils-3.5.tar.xz.sig deleted file mode 100644 index 19ff48d..0000000 --- a/diffutils-3.5.tar.xz.sig +++ /dev/null @@ -1,16 +0,0 @@ ------BEGIN PGP SIGNATURE----- - -iQIcBAABCgAGBQJXuT2mAAoJEH/Z/MsAC+7uECsP/0wVZOh74XKI0Y4XPxqhgKou -L/uFMgqpq2P9Uwr8jnJCr3xsKWCHYSYXOjmzbQ21wkqDWynT+NbLRcH4HLvO6vU3 -EWtoBor7UG0weTDanNfRBFjVLWsHsDWJj7VGMja9OAkXjpqo0f+iYHHIbJ+oKlIY -gzNqUdjGg8RpsvNapz4XuTsoUNDrTtVOy/k9xHUZCw/h1cZBVpaAU8MEE3MReab6 -pOn660BlVqT50vMd09FKRuTLktJ2LBFZ6x+xdPBJm5LFdUFqClbiNaNv+idhlvfB -GC8qjBr4WhuCtGpJKLFADTOZ8UOxcmx2sNz0ypiQrLT1UkTUtY3B0ADnzWuMEcwx -eaPNzdJhPExY64i7MA2vc2MxlRb7omj8kI+n0rBpiFKLMI3x3ZWf6Papg/acYbJg -0NHQkqdc82gH3vsp5DX/wNn+3TNwks9ziVt7Jervk7uQqWaDLrah3waBN3q5UWsk -HLAlkhb94Ahi+cNMk1oiNKqT+en3AhZ/7O6imKzTM8bTk27Ek7q3ThLfBeUKcp2O -j1aaPbaDGEL9pZZDCeuZCSdRZDGMY+spGNYRC4pmYCL9C2LsH5jtX5ob9gQsPHt3 -XFsi5l83i5N3amwzb2OdSTEwM0xgPX8TmcN435409gvz/VVmeSmx5jhHb9m8SLSk -SrDXEuf5yKX8J8HXVMgt -=e0eV ------END PGP SIGNATURE----- diff --git a/gnulib-diffseq.patch b/gnulib-diffseq.patch deleted file mode 100644 index 0361966..0000000 --- a/gnulib-diffseq.patch +++ /dev/null @@ -1,244 +0,0 @@ -From d55ed0636eefeb43ad1aa8123416c33839652646 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Tue, 25 Oct 2016 14:59:29 -0700 -Subject: [PATCH] diffseq: restore TOO_EXPENSIVE heuristic - -* lib/diffseq.h: Problem with diffutils reported by Andreas Schwab -(Bug#24715). The simplest solution is to restore the -TOO_EXPENSIVE heuristic that I added to GNU diff in 1993, while -using a higher threshold to avoid Bug#16848 on smaller files. -* lib/diffseq.h (struct context): Restore member too_expensive. -(struct partition): Restore members lo_minimal, hi_minimal. -(diag, compareseq): Restore arg find_minimal. All uses changed. -(diag): Restore the TOO_EXPENSIVE heuristic that I added back in -1993 to make 'diff' run faster (but not as well) on large inputs, -but use a threshold of 4096 instead of the old 256. -* lib/fstrcmp.c (strcmp_bounded): -* lib/git-merge-changelog.c (compute_differences): -Adjust to diffseq.h changes. ---- - ChangeLog | 17 +++++++ - lib/diffseq.h | 118 ++++++++++++++++++++++++++++++++++++++++++---- - lib/fstrcmp.c | 10 +++- - lib/git-merge-changelog.c | 3 +- - 4 files changed, 137 insertions(+), 11 deletions(-) - -Index: diffutils-3.5/lib/diffseq.h -=================================================================== ---- diffutils-3.5.orig/lib/diffseq.h -+++ diffutils-3.5/lib/diffseq.h -@@ -34,7 +34,12 @@ - The basic algorithm was independently discovered as described in: - "Algorithms for Approximate String Matching", Esko Ukkonen, - Information and Control Vol. 64, 1985, pp. 100-118, -- . */ -+ . -+ -+ Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE -+ heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) -+ at the price of producing suboptimal output for large inputs with -+ many differences. */ - - /* Before including this file, you need to define: - ELEMENT The element type of the vectors being compared. -@@ -123,6 +128,9 @@ struct context - bool heuristic; - #endif - -+ /* Edit scripts longer than this are too expensive to compute. */ -+ OFFSET too_expensive; -+ - /* Snakes bigger than this are considered "big". */ - #define SNAKE_LIMIT 20 - }; -@@ -132,6 +140,12 @@ struct partition - /* Midpoints of this partition. */ - OFFSET xmid; - OFFSET ymid; -+ -+ /* True if low half will be analyzed minimally. */ -+ bool lo_minimal; -+ -+ /* Likewise for high half. */ -+ bool hi_minimal; - }; - - -@@ -143,10 +157,17 @@ struct partition - When the two searches meet, we have found the midpoint of the shortest - edit sequence. - -- Set *PART to the midpoint (XMID,YMID). The diagonal number -+ If FIND_MINIMAL is true, find the minimal edit script regardless of -+ expense. Otherwise, if the search is too expensive, use heuristics to -+ stop the search and report a suboptimal answer. -+ -+ Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number - XMID - YMID equals the number of inserted elements minus the number - of deleted elements (counting only elements before the midpoint). - -+ Set PART->lo_minimal to true iff the minimal edit script for the -+ left half of the partition is known; similarly for PART->hi_minimal. -+ - This function assumes that the first elements of the specified portions - of the two vectors do not match, and likewise that the last elements do not - match. The caller must trim matching elements from the beginning and end -@@ -156,7 +177,7 @@ struct partition - suboptimal diff output. It cannot cause incorrect diff output. */ - - static void --diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, -+diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal, - struct partition *part, struct context *ctxt) - { - OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */ -@@ -216,6 +237,7 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - { - part->xmid = x; - part->ymid = y; -+ part->lo_minimal = part->hi_minimal = true; - return; - } - } -@@ -248,10 +270,14 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - { - part->xmid = x; - part->ymid = y; -+ part->lo_minimal = part->hi_minimal = true; - return; - } - } - -+ if (find_minimal) -+ continue; -+ - #ifdef USE_HEURISTIC - /* Heuristic: check occasionally for a diagonal that has made lots - of progress compared with the edit distance. If we have any -@@ -295,7 +321,11 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - } - } - if (best > 0) -- return; -+ { -+ part->lo_minimal = true; -+ part->hi_minimal = false; -+ return; -+ } - } - - { -@@ -330,10 +360,77 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - } - } - if (best > 0) -- return; -+ { -+ part->lo_minimal = false; -+ part->hi_minimal = true; -+ return; -+ } - } - } - #endif /* USE_HEURISTIC */ -+ -+ /* Heuristic: if we've gone well beyond the call of duty, give up -+ and report halfway between our best results so far. */ -+ if (c >= ctxt->too_expensive) -+ { -+ OFFSET fxybest; -+ OFFSET fxbest IF_LINT (= 0); -+ OFFSET bxybest; -+ OFFSET bxbest IF_LINT (= 0); -+ -+ /* Find forward diagonal that maximizes X + Y. */ -+ fxybest = -1; -+ for (d = fmax; d >= fmin; d -= 2) -+ { -+ OFFSET x = MIN (fd[d], xlim); -+ OFFSET y = x - d; -+ if (ylim < y) -+ { -+ x = ylim + d; -+ y = ylim; -+ } -+ if (fxybest < x + y) -+ { -+ fxybest = x + y; -+ fxbest = x; -+ } -+ } -+ -+ /* Find backward diagonal that minimizes X + Y. */ -+ bxybest = OFFSET_MAX; -+ for (d = bmax; d >= bmin; d -= 2) -+ { -+ OFFSET x = MAX (xoff, bd[d]); -+ OFFSET y = x - d; -+ if (y < yoff) -+ { -+ x = yoff + d; -+ y = yoff; -+ } -+ if (x + y < bxybest) -+ { -+ bxybest = x + y; -+ bxbest = x; -+ } -+ } -+ -+ /* Use the better of the two diagonals. */ -+ if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) -+ { -+ part->xmid = fxbest; -+ part->ymid = fxybest - fxbest; -+ part->lo_minimal = true; -+ part->hi_minimal = false; -+ } -+ else -+ { -+ part->xmid = bxbest; -+ part->ymid = bxybest - bxbest; -+ part->lo_minimal = false; -+ part->hi_minimal = true; -+ } -+ return; -+ } - } - #undef XREF_YREF_EQUAL - } -@@ -347,6 +444,9 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - Note that XLIM, YLIM are exclusive bounds. All indices into the vectors - are origin-0. - -+ If FIND_MINIMAL, find a minimal difference no matter how -+ expensive it is. -+ - The results are recorded by invoking NOTE_DELETE and NOTE_INSERT. - - Return false if terminated normally, or true if terminated through early -@@ -354,7 +454,7 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y - - static bool - compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, -- struct context *ctxt) -+ bool find_minimal, struct context *ctxt) - { - #ifdef ELEMENT - ELEMENT const *xv = ctxt->xvec; /* Help the compiler. */ -@@ -400,12 +500,12 @@ compareseq (OFFSET xoff, OFFSET xlim, OF - struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 }); - - /* Find a point of correspondence in the middle of the vectors. */ -- diag (xoff, xlim, yoff, ylim, &part, ctxt); -+ diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt); - - /* Use the partitions to split this problem into subproblems. */ -- if (compareseq (xoff, part.xmid, yoff, part.ymid, ctxt)) -+ if (compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt)) - return true; -- if (compareseq (part.xmid, xlim, part.ymid, ylim, ctxt)) -+ if (compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt)) - return true; - } -