diff --git a/big-file-performance.patch b/big-file-performance.patch new file mode 100644 index 0000000..9f3b8ed --- /dev/null +++ b/big-file-performance.patch @@ -0,0 +1,50 @@ +From 68b82f6f8419a815cfcf962b3061352d414dc606 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Tue, 25 Oct 2016 21:57:56 -0700 +Subject: [PATCH] diff: fix big performance degradation in 3.4 + +* NEWS, doc/diffutils.texi (Overview): Document this. +* src/analyze.c (diff_2_files): Restore too_expensive heuristic, +but this time with a floor that is 16 times the old floor. This +should fix Bug#16848, by generating good-quality output for its +test case, while not introducing Bug#24715, by running nearly as +fast as diff-3.3 for that test case. +--- + NEWS | 5 +++++ + doc/diffutils.texi | 5 ++++- + src/analyze.c | 11 ++++++++++- + 3 files changed, 19 insertions(+), 2 deletions(-) + +Index: diffutils-3.5/src/analyze.c +=================================================================== +--- diffutils-3.5.orig/src/analyze.c ++++ diffutils-3.5/src/analyze.c +@@ -534,6 +534,7 @@ diff_2_files (struct comparison *cmp) + { + struct context ctxt; + lin diags; ++ lin too_expensive; + + /* Allocate vectors for the results of comparison: + a flag for each line of each file, saying whether that line +@@ -565,11 +566,19 @@ diff_2_files (struct comparison *cmp) + + ctxt.heuristic = speed_large_files; + ++ /* Set TOO_EXPENSIVE to be the approximate square root of the ++ input size, bounded below by 4096. 4096 seems to be good for ++ circa-2016 CPUs; see Bug#16848 and Bug#24715. */ ++ too_expensive = 1; ++ for (; diags != 0; diags >>= 2) ++ too_expensive <<= 1; ++ ctxt.too_expensive = MAX (4096, too_expensive); ++ + files[0] = cmp->file[0]; + files[1] = cmp->file[1]; + + compareseq (0, cmp->file[0].nondiscarded_lines, +- 0, cmp->file[1].nondiscarded_lines, &ctxt); ++ 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt); + + free (ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1)); + diff --git a/diffutils-3.3.tar.xz b/diffutils-3.3.tar.xz deleted file mode 100644 index a0e54a4..0000000 --- a/diffutils-3.3.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a25e89a8ab65fded1731e4186be1bb25cda967834b6df973599cdcd5abdfc19c -size 1197832 diff --git a/diffutils-3.3.tar.xz.sig b/diffutils-3.3.tar.xz.sig deleted file mode 100644 index 7d990ff..0000000 --- a/diffutils-3.3.tar.xz.sig +++ /dev/null @@ -1,19 +0,0 @@ ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.13 (GNU/Linux) - -iQJ8BAABCgBmBQJRT0IHXxSAAAAAAC4AKGlzc3Vlci1mcHJAbm90YXRpb25zLm9w -ZW5wZ3AuZmlmdGhob3JzZW1hbi5uZXQxNTVEM0ZDNTAwQzgzNDQ4NkQxRUVBNjc3 -RkQ5RkNDQjAwMEJFRUVFAAoJEH/Z/MsAC+7uEJ0QAKLo6rArnNmg5XL/MTRQyzq4 -4FZgJiPsFnsB0Erd7WFlcLUho+w4R9IjgLdp8QoDwKvc8bnK19e4iJtdtgBEtwPj -IfjTslTTHA6OiUZvGOCh+5OFudMx00QUi0MRHE/zxbZzVui9TRGJzevdLT0xJnIF -vPwiz/cafUojIvWtABpsp+7hH8jadIkQYOBGdzG50fDl6ZudvUoNmnnWZ1+hqi3e -Q+Qu6xawe6LBvyR6ZeTdHVOJ+6pcDM4up65XZXJXQCB5Z7OBlcRB1mVHyHlh9HsE -Ycp2sukZkqrsB+746J9RQqWcMRSrper1rJv+oPsXqHW+rkWOKe+jI0TMo+sPhRmM -47YynCrBLsDgfovWwGHTJ5zds7mZdtqPhoE6Xgj1f7cel6n7kSsgIpXiSIiK/+yI -aBg6CMEl+VqHD8IQCZXkz/wVvzMyJQJF0hIWp/W7ynbYccdsnb/0gCkr13fgR50/ -iogjp9h07dMpt556ByfE2OVQcocjpnT+Nqws+n6RassWiD1+UNiF/6ND97D5m0pn -G9t/qLaxo+hkPM3OcoZ/Y8gP0O95gNZMHWWShDDX21nKx4Lo3ymOXY40n18T4WK2 -0eercgJNCepSAFwuenQn3NODQ/em2bNr54JscGeyn1h6sEqlNNrXz3ZgRfV7O9Y5 -9z0vaNoBjswNhlbaI/OF -=oSVM ------END PGP SIGNATURE----- diff --git a/diffutils-3.5.tar.xz b/diffutils-3.5.tar.xz new file mode 100644 index 0000000..84acae2 --- /dev/null +++ b/diffutils-3.5.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad398ccd5b9faca6b0ab219a036453f62a602a56203ac659b43e889bec35533 +size 1360996 diff --git a/diffutils-3.5.tar.xz.sig b/diffutils-3.5.tar.xz.sig new file mode 100644 index 0000000..19ff48d --- /dev/null +++ b/diffutils-3.5.tar.xz.sig @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIcBAABCgAGBQJXuT2mAAoJEH/Z/MsAC+7uECsP/0wVZOh74XKI0Y4XPxqhgKou +L/uFMgqpq2P9Uwr8jnJCr3xsKWCHYSYXOjmzbQ21wkqDWynT+NbLRcH4HLvO6vU3 +EWtoBor7UG0weTDanNfRBFjVLWsHsDWJj7VGMja9OAkXjpqo0f+iYHHIbJ+oKlIY +gzNqUdjGg8RpsvNapz4XuTsoUNDrTtVOy/k9xHUZCw/h1cZBVpaAU8MEE3MReab6 +pOn660BlVqT50vMd09FKRuTLktJ2LBFZ6x+xdPBJm5LFdUFqClbiNaNv+idhlvfB +GC8qjBr4WhuCtGpJKLFADTOZ8UOxcmx2sNz0ypiQrLT1UkTUtY3B0ADnzWuMEcwx +eaPNzdJhPExY64i7MA2vc2MxlRb7omj8kI+n0rBpiFKLMI3x3ZWf6Papg/acYbJg +0NHQkqdc82gH3vsp5DX/wNn+3TNwks9ziVt7Jervk7uQqWaDLrah3waBN3q5UWsk +HLAlkhb94Ahi+cNMk1oiNKqT+en3AhZ/7O6imKzTM8bTk27Ek7q3ThLfBeUKcp2O +j1aaPbaDGEL9pZZDCeuZCSdRZDGMY+spGNYRC4pmYCL9C2LsH5jtX5ob9gQsPHt3 +XFsi5l83i5N3amwzb2OdSTEwM0xgPX8TmcN435409gvz/VVmeSmx5jhHb9m8SLSk +SrDXEuf5yKX8J8HXVMgt +=e0eV +-----END PGP SIGNATURE----- diff --git a/diffutils.changes b/diffutils.changes index 679dac6..80472e8 100644 --- a/diffutils.changes +++ b/diffutils.changes @@ -1,3 +1,30 @@ +------------------------------------------------------------------- +Wed Oct 26 07:33:19 UTC 2016 - schwab@suse.de + +- gnulib-diffseq.patch, big-file-performance.patch: Avoid performance + regression on big files (bsc#1004991) + +------------------------------------------------------------------- +Mon Aug 22 20:34:19 UTC 2016 - astieger@suse.com + +- Diffutils 3.5: + * diff3 no longer malfunctions due to use-after-free + [bug introduced in 3.4] + * diff --color no longer colorizes when TERM=dumb + +------------------------------------------------------------------- +Tue Aug 9 09:03:53 UTC 2016 - mpluskal@suse.com + +- Update to version 3.4 + * diff accepts two new options --color and --palette to generate + and configure colored output. --color takes an optional + argument specifying when to colorize a line: --color=always, + --color=auto, --color=never. --palette is used to configure + which colors are used. + * many bugfixes +- New -lang subpackage +- Drop no longer needed gnulib-perl522.patch + ------------------------------------------------------------------- Thu Feb 4 09:29:27 UTC 2016 - mpluskal@suse.com diff --git a/diffutils.spec b/diffutils.spec index d64bc6e..9284520 100644 --- a/diffutils.spec +++ b/diffutils.spec @@ -17,7 +17,7 @@ Name: diffutils -Version: 3.3 +Version: 3.5 Release: 0 Summary: GNU diff Utilities License: GFDL-1.2 and GPL-3.0+ @@ -27,10 +27,12 @@ Source0: https://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz Source1: https://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz.sig # http://savannah.gnu.org/project/memberlist-gpgkeys.php?group=diffutils Source2: %{name}.keyring -Patch0: gnulib-perl522.patch +Patch1: gnulib-diffseq.patch +Patch2: big-file-performance.patch BuildRequires: xz Requires(pre): %{install_info_prereq} Requires(preun): %{install_info_prereq} +Recommends: %{name}-lang = %{version} Provides: diff = %{version} Obsoletes: diff < %{version} BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -39,9 +41,12 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build The GNU diff utilities find differences between files. diff is used to make source code patches, for instance. +%lang_package + %prep %setup -q -%patch0 -p1 +%patch1 -p1 +%patch2 -p1 %build export CFLAGS="%{optflags} -fPIE" @@ -53,14 +58,14 @@ make %{?_smp_mflags} V=1 make %{?_smp_mflags} check %install -make DESTDIR=%{buildroot} install %{?_smp_mflags} -gzip -9 %{buildroot}%{_infodir}/%{name}.info +%make_install +%find_lang %{name} %post -%install_info --info-dir=%{_infodir} %{_infodir}/%{name}.info.gz +%install_info --info-dir=%{_infodir} %{_infodir}/%{name}.info%{ext_info} %preun -%install_info_delete --info-dir=%{_infodir} %{_infodir}/%{name}.info.gz +%install_info_delete --info-dir=%{_infodir} %{_infodir}/%{name}.info%{ext_info} %files %defattr(-,root,root) @@ -69,10 +74,13 @@ gzip -9 %{buildroot}%{_infodir}/%{name}.info %{_bindir}/diff %{_bindir}/diff3 %{_bindir}/sdiff -%{_infodir}/diffutils.info.* +%{_infodir}/diffutils.info%{ext_info} %{_mandir}/man1/cmp.1%{ext_man} %{_mandir}/man1/diff.1%{ext_man} %{_mandir}/man1/diff3.1%{ext_man} %{_mandir}/man1/sdiff.1%{ext_man} +%files lang -f %{name}.lang +%defattr(-,root,root,-) + %changelog diff --git a/gnulib-diffseq.patch b/gnulib-diffseq.patch new file mode 100644 index 0000000..0361966 --- /dev/null +++ b/gnulib-diffseq.patch @@ -0,0 +1,244 @@ +From d55ed0636eefeb43ad1aa8123416c33839652646 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Tue, 25 Oct 2016 14:59:29 -0700 +Subject: [PATCH] diffseq: restore TOO_EXPENSIVE heuristic + +* lib/diffseq.h: Problem with diffutils reported by Andreas Schwab +(Bug#24715). The simplest solution is to restore the +TOO_EXPENSIVE heuristic that I added to GNU diff in 1993, while +using a higher threshold to avoid Bug#16848 on smaller files. +* lib/diffseq.h (struct context): Restore member too_expensive. +(struct partition): Restore members lo_minimal, hi_minimal. +(diag, compareseq): Restore arg find_minimal. All uses changed. +(diag): Restore the TOO_EXPENSIVE heuristic that I added back in +1993 to make 'diff' run faster (but not as well) on large inputs, +but use a threshold of 4096 instead of the old 256. +* lib/fstrcmp.c (strcmp_bounded): +* lib/git-merge-changelog.c (compute_differences): +Adjust to diffseq.h changes. +--- + ChangeLog | 17 +++++++ + lib/diffseq.h | 118 ++++++++++++++++++++++++++++++++++++++++++---- + lib/fstrcmp.c | 10 +++- + lib/git-merge-changelog.c | 3 +- + 4 files changed, 137 insertions(+), 11 deletions(-) + +Index: diffutils-3.5/lib/diffseq.h +=================================================================== +--- diffutils-3.5.orig/lib/diffseq.h ++++ diffutils-3.5/lib/diffseq.h +@@ -34,7 +34,12 @@ + The basic algorithm was independently discovered as described in: + "Algorithms for Approximate String Matching", Esko Ukkonen, + Information and Control Vol. 64, 1985, pp. 100-118, +- . */ ++ . ++ ++ Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE ++ heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) ++ at the price of producing suboptimal output for large inputs with ++ many differences. */ + + /* Before including this file, you need to define: + ELEMENT The element type of the vectors being compared. +@@ -123,6 +128,9 @@ struct context + bool heuristic; + #endif + ++ /* Edit scripts longer than this are too expensive to compute. */ ++ OFFSET too_expensive; ++ + /* Snakes bigger than this are considered "big". */ + #define SNAKE_LIMIT 20 + }; +@@ -132,6 +140,12 @@ struct partition + /* Midpoints of this partition. */ + OFFSET xmid; + OFFSET ymid; ++ ++ /* True if low half will be analyzed minimally. */ ++ bool lo_minimal; ++ ++ /* Likewise for high half. */ ++ bool hi_minimal; + }; + + +@@ -143,10 +157,17 @@ struct partition + When the two searches meet, we have found the midpoint of the shortest + edit sequence. + +- Set *PART to the midpoint (XMID,YMID). The diagonal number ++ If FIND_MINIMAL is true, find the minimal edit script regardless of ++ expense. Otherwise, if the search is too expensive, use heuristics to ++ stop the search and report a suboptimal answer. ++ ++ Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number + XMID - YMID equals the number of inserted elements minus the number + of deleted elements (counting only elements before the midpoint). + ++ Set PART->lo_minimal to true iff the minimal edit script for the ++ left half of the partition is known; similarly for PART->hi_minimal. ++ + This function assumes that the first elements of the specified portions + of the two vectors do not match, and likewise that the last elements do not + match. The caller must trim matching elements from the beginning and end +@@ -156,7 +177,7 @@ struct partition + suboptimal diff output. It cannot cause incorrect diff output. */ + + static void +-diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, ++diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal, + struct partition *part, struct context *ctxt) + { + OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */ +@@ -216,6 +237,7 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + { + part->xmid = x; + part->ymid = y; ++ part->lo_minimal = part->hi_minimal = true; + return; + } + } +@@ -248,10 +270,14 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + { + part->xmid = x; + part->ymid = y; ++ part->lo_minimal = part->hi_minimal = true; + return; + } + } + ++ if (find_minimal) ++ continue; ++ + #ifdef USE_HEURISTIC + /* Heuristic: check occasionally for a diagonal that has made lots + of progress compared with the edit distance. If we have any +@@ -295,7 +321,11 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + } + } + if (best > 0) +- return; ++ { ++ part->lo_minimal = true; ++ part->hi_minimal = false; ++ return; ++ } + } + + { +@@ -330,10 +360,77 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + } + } + if (best > 0) +- return; ++ { ++ part->lo_minimal = false; ++ part->hi_minimal = true; ++ return; ++ } + } + } + #endif /* USE_HEURISTIC */ ++ ++ /* Heuristic: if we've gone well beyond the call of duty, give up ++ and report halfway between our best results so far. */ ++ if (c >= ctxt->too_expensive) ++ { ++ OFFSET fxybest; ++ OFFSET fxbest IF_LINT (= 0); ++ OFFSET bxybest; ++ OFFSET bxbest IF_LINT (= 0); ++ ++ /* Find forward diagonal that maximizes X + Y. */ ++ fxybest = -1; ++ for (d = fmax; d >= fmin; d -= 2) ++ { ++ OFFSET x = MIN (fd[d], xlim); ++ OFFSET y = x - d; ++ if (ylim < y) ++ { ++ x = ylim + d; ++ y = ylim; ++ } ++ if (fxybest < x + y) ++ { ++ fxybest = x + y; ++ fxbest = x; ++ } ++ } ++ ++ /* Find backward diagonal that minimizes X + Y. */ ++ bxybest = OFFSET_MAX; ++ for (d = bmax; d >= bmin; d -= 2) ++ { ++ OFFSET x = MAX (xoff, bd[d]); ++ OFFSET y = x - d; ++ if (y < yoff) ++ { ++ x = yoff + d; ++ y = yoff; ++ } ++ if (x + y < bxybest) ++ { ++ bxybest = x + y; ++ bxbest = x; ++ } ++ } ++ ++ /* Use the better of the two diagonals. */ ++ if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) ++ { ++ part->xmid = fxbest; ++ part->ymid = fxybest - fxbest; ++ part->lo_minimal = true; ++ part->hi_minimal = false; ++ } ++ else ++ { ++ part->xmid = bxbest; ++ part->ymid = bxybest - bxbest; ++ part->lo_minimal = false; ++ part->hi_minimal = true; ++ } ++ return; ++ } + } + #undef XREF_YREF_EQUAL + } +@@ -347,6 +444,9 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + Note that XLIM, YLIM are exclusive bounds. All indices into the vectors + are origin-0. + ++ If FIND_MINIMAL, find a minimal difference no matter how ++ expensive it is. ++ + The results are recorded by invoking NOTE_DELETE and NOTE_INSERT. + + Return false if terminated normally, or true if terminated through early +@@ -354,7 +454,7 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET y + + static bool + compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, +- struct context *ctxt) ++ bool find_minimal, struct context *ctxt) + { + #ifdef ELEMENT + ELEMENT const *xv = ctxt->xvec; /* Help the compiler. */ +@@ -400,12 +500,12 @@ compareseq (OFFSET xoff, OFFSET xlim, OF + struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 }); + + /* Find a point of correspondence in the middle of the vectors. */ +- diag (xoff, xlim, yoff, ylim, &part, ctxt); ++ diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt); + + /* Use the partitions to split this problem into subproblems. */ +- if (compareseq (xoff, part.xmid, yoff, part.ymid, ctxt)) ++ if (compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt)) + return true; +- if (compareseq (part.xmid, xlim, part.ymid, ylim, ctxt)) ++ if (compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt)) + return true; + } + diff --git a/gnulib-perl522.patch b/gnulib-perl522.patch deleted file mode 100644 index e15505b..0000000 --- a/gnulib-perl522.patch +++ /dev/null @@ -1,15 +0,0 @@ -Subject: http://comments.gmane.org/gmane.comp.lib.gnulib.bugs/35272 - -Index: grep-2.21/build-aux/update-copyright -=================================================================== ---- grep-2.21.orig/build-aux/update-copyright -+++ grep-2.21/build-aux/update-copyright -@@ -124,7 +124,7 @@ use strict; - use warnings; - - my $copyright_re = 'Copyright'; --my $circle_c_re = '(?:\([cC]\)|@copyright{}|©)'; -+my $circle_c_re = '(?:\([cC]\)|@copyright\{}|©)'; - my $holder = $ENV{UPDATE_COPYRIGHT_HOLDER}; - $holder ||= 'Free Software Foundation, Inc.'; - my $prefix_max = 5;