From 68b82f6f8419a815cfcf962b3061352d414dc606 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 25 Oct 2016 21:57:56 -0700 Subject: [PATCH] diff: fix big performance degradation in 3.4 * NEWS, doc/diffutils.texi (Overview): Document this. * src/analyze.c (diff_2_files): Restore too_expensive heuristic, but this time with a floor that is 16 times the old floor. This should fix Bug#16848, by generating good-quality output for its test case, while not introducing Bug#24715, by running nearly as fast as diff-3.3 for that test case. --- NEWS | 5 +++++ doc/diffutils.texi | 5 ++++- src/analyze.c | 11 ++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) Index: diffutils-3.5/src/analyze.c =================================================================== --- diffutils-3.5.orig/src/analyze.c +++ diffutils-3.5/src/analyze.c @@ -534,6 +534,7 @@ diff_2_files (struct comparison *cmp) { struct context ctxt; lin diags; + lin too_expensive; /* Allocate vectors for the results of comparison: a flag for each line of each file, saying whether that line @@ -565,11 +566,19 @@ diff_2_files (struct comparison *cmp) ctxt.heuristic = speed_large_files; + /* Set TOO_EXPENSIVE to be the approximate square root of the + input size, bounded below by 4096. 4096 seems to be good for + circa-2016 CPUs; see Bug#16848 and Bug#24715. */ + too_expensive = 1; + for (; diags != 0; diags >>= 2) + too_expensive <<= 1; + ctxt.too_expensive = MAX (4096, too_expensive); + files[0] = cmp->file[0]; files[1] = cmp->file[1]; compareseq (0, cmp->file[0].nondiscarded_lines, - 0, cmp->file[1].nondiscarded_lines, &ctxt); + 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt); free (ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));