SHA256
1
0
forked from pool/patch
patch/diff3-style-merges-locate-merge.diff

338 lines
9.1 KiB
Diff

---
Makefile.in | 3 -
bestmatch.h | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
common.h | 11 ++++
merge.c | 95 ++++++++++++++++++++++++++++++++++++++++-
patch.c | 19 ++++++--
5 files changed, 256 insertions(+), 10 deletions(-)
Index: b/Makefile.in
===================================================================
--- a/Makefile.in
+++ b/Makefile.in
@@ -85,7 +85,8 @@ HDRS = argmatch.h backupfile.h common.h
error.h getopt.h gettext.h \
inp.h maketime.h partime.h pch.h \
quote.h quotearg.h quotesys.h \
- unlocked-io.h util.h version.h xalloc.h hash.h
+ unlocked-io.h util.h version.h xalloc.h hash.h \
+ bestmatch.h
MISC = AUTHORS COPYING ChangeLog INSTALL Makefile.in NEWS README \
aclocal.m4 \
config.hin configure configure.ac \
Index: b/common.h
===================================================================
--- a/common.h
+++ b/common.h
@@ -296,6 +296,14 @@ void *realloc ();
#define TTY_DEVICE "/dev/tty"
#endif
+#ifndef MIN
+# define MIN(a, b) ((a) <= (b) ? (a) : (b))
+#endif
+
+#ifndef MAX
+# define MAX(a, b) ((a) >= (b) ? (a) : (b))
+#endif
+
/* Output stream state. */
struct outstate
{
@@ -316,4 +324,5 @@ bool similar (char const *, size_t, char
bool copy_till (struct outstate *, LINENUM);
/* Defined in merge.c */
-bool merge_hunk (struct outstate *);
+LINENUM locate_merge (LINENUM, LINENUM *);
+bool merge_hunk (struct outstate *, LINENUM, LINENUM);
Index: b/patch.c
===================================================================
--- a/patch.c
+++ b/patch.c
@@ -286,13 +286,22 @@ main (int argc, char **argv)
goto skip_hunk;
} else if (!where) {
if (merge) {
- if (merge_hunk(&outstate)) {
+ LINENUM matched;
+
+ where = locate_merge (maxfuzz, &matched);
+ if (! where)
+ {
+ where = pch_first () + last_offset;
+ matched = 0;
+ }
+
+ if (merge_hunk (&outstate, where, matched))
+ {
merged++;
mismatch = 1;
- } else {
- /* FIXME: try harder! */
- goto skip_hunk;
- }
+ }
+ else
+ goto skip_hunk;
} else
goto skip_hunk;
} else {
Index: b/merge.c
===================================================================
--- a/merge.c
+++ b/merge.c
@@ -7,8 +7,97 @@
static bool context_matches_file (LINENUM, LINENUM);
static bool common_context (LINENUM, LINENUM, LINENUM);
+#define OFFSET LINENUM
+#define EQUAL(x, y) (context_matches_file (x, y))
+
+#include "bestmatch.h"
+
+LINENUM
+locate_merge (LINENUM fuzz, LINENUM *matched)
+{
+ LINENUM first_guess = pch_first () + last_offset;
+ LINENUM pat_lines = pch_ptrn_lines();
+ LINENUM suffix_context = pch_suffix_context ();
+ LINENUM max_where = input_lines - (pat_lines - suffix_context) + 1;
+ LINENUM min_where = last_frozen_line + 1;
+ LINENUM max_pos_offset = max_where - first_guess;
+ LINENUM max_neg_offset = first_guess - min_where;
+ LINENUM max_offset = (max_pos_offset < max_neg_offset
+ ? max_neg_offset : max_pos_offset);
+ LINENUM prefix_fuzz = MIN (fuzz, pch_prefix_context());
+ LINENUM suffix_fuzz = MIN (fuzz, pch_suffix_context());
+ LINENUM where = 0, max_matched = 0;
+ LINENUM min, max;
+ LINENUM offset;
+
+ /* The minimum number of matched lines and maximum number of changes
+ are mostly guesses. */
+ min = pat_lines - (prefix_fuzz + suffix_fuzz);
+ max = 2 * (prefix_fuzz + suffix_fuzz);
+
+ /* Do not try lines <= 0. */
+ if (first_guess <= max_neg_offset)
+ max_neg_offset = first_guess - 1;
+
+ for (offset = 0; offset <= max_offset; offset++)
+ {
+ if (offset <= max_pos_offset)
+ {
+ LINENUM guess = first_guess + offset;
+ LINENUM last;
+ LINENUM changes;
+
+ changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1,
+ min, max, &last);
+ if (changes <= max && max_matched < last - guess)
+ {
+ max_matched = last - guess;
+ where = guess;
+ if (changes == 0)
+ break;
+ min = last - guess;
+ max = changes - 1;
+ }
+ }
+ if (0 < offset && offset <= max_neg_offset)
+ {
+ LINENUM guess = first_guess - offset;
+ LINENUM last;
+ LINENUM changes;
+
+ changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1,
+ min, max, &last);
+ if (changes <= max && max_matched < last - guess)
+ {
+ max_matched = last - guess;
+ where = guess;
+ if (changes == 0)
+ break;
+ min = last - guess;
+ max = changes - 1;
+ }
+ }
+ }
+ if (debug & 1)
+ {
+ char numbuf0[LINENUM_LENGTH_BOUND + 1];
+ char numbuf1[LINENUM_LENGTH_BOUND + 1];
+ char numbuf2[LINENUM_LENGTH_BOUND + 1];
+ char numbuf3[LINENUM_LENGTH_BOUND + 1];
+ say ("locating merge: min=%s max=%s where=%s matched=%s\n",
+ format_linenum (numbuf0, min),
+ format_linenum (numbuf1, max),
+ format_linenum (numbuf2, where),
+ format_linenum (numbuf3, max_matched));
+ }
+
+ if (where)
+ *matched = max_matched;
+ return where;
+}
+
bool
-merge_hunk (struct outstate *outstate)
+merge_hunk (struct outstate *outstate, LINENUM where, LINENUM matched)
{
LINENUM old = 1;
LINENUM lastold = pch_ptrn_lines ();
@@ -22,8 +111,8 @@ merge_hunk (struct outstate *outstate)
while (pch_char(new) == '=' || pch_char(new) == '\n')
new++;
- merge = pch_first () + last_offset;
- lastmerge = merge + lastold - 1;
+ merge = where;
+ lastmerge = where + matched - 1;
if (! common_context(lastmerge, lastold, lastnew))
lastmerge = merge - 1;
Index: b/bestmatch.h
===================================================================
--- /dev/null
+++ b/bestmatch.h
@@ -0,0 +1,138 @@
+/* Before including this file, you need to define:
+ EQUAL(x, y) A two-argument macro that tests elements
+ at index x and y for equality.
+ OFFSET A signed integer type sufficient to hold the
+ difference between two indices. Usually
+ something like ssize_t. */
+
+/*
+ * Shortest Edit Sequence
+ *
+ * Based on the Greedy LCS/SES Algorithm (Figure 2) in:
+ *
+ * Eugene W. Myers, "An O(ND) Difference Algorithm and Its Variations",
+ * Algorithmica, Vol. 1, No. 1, pp. 251-266, March 1986.
+ * Available: http://dx.doi.org/10.1007/BF01840446
+ * http://xmailserver.org/diff2.pdf
+ *
+ * Returns the number of changes (insertions and deletions) required to get
+ * from a[] to b[]. Returns MAX + 1 if a[] cannot be turned into b[] with
+ * MAX or fewer changes.
+ *
+ * MIN specifies the minimum number of elements in which a[] and b[] must
+ * match. This allows to prevent trivial matches in which a sequence is
+ * completely discarded, or completely made up.
+ *
+ * If PY is not NULL, matches a[] against a prefix of b[], and returns the
+ * number of elements in b[] that were matched in *PY. Otherwise, matches
+ * all elements of b[].
+ *
+ * Note that the divide-and-conquer strategy discussed in section 4b of the
+ * paper is more efficient, but does not allow an open-ended prefix string
+ * search.
+ */
+
+OFFSET
+bestmatch(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
+ OFFSET min, OFFSET max, OFFSET *py)
+{
+ const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
+ const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
+ const OFFSET fmid = xoff - yoff; /* Center diagonal. */
+ OFFSET fmin = fmid;
+ OFFSET fmax = fmid;
+ OFFSET V[2 * max + 3], *fd = V + max + 2 - fmid;
+ OFFSET fmid_plus_2_min, ymax = -1;
+ OFFSET c;
+
+ /*
+ The number of elements that were matched in x and in y can be
+ computed as either (x - x_skipped) or (y - y_skipped), with:
+
+ delta = (x - xoff) - (y - yoff)
+ x_skipped = (c + delta) / 2
+ y_skipped = (c - delta) / 2
+
+ For searching for a minimum number of matching elements, we end up
+ with this check:
+
+ (x - x_skipped) >= min
+ ...
+ x + y - c >= (xoff - yoff) + 2 * min
+ x + y - c >= fmid + 2 * min
+ */
+
+ if (min)
+ {
+ fmid_plus_2_min = fmid + 2 * min;
+ min += yoff;
+ if (min > ylim)
+ return max + 1;
+ }
+ else
+ fmid_plus_2_min = 0; /* disable this check */
+ if (!py)
+ min = ylim;
+
+ /* Handle the exact-match case. */
+ while (xoff < xlim && yoff < ylim && EQUAL (xoff, yoff))
+ {
+ xoff++;
+ yoff++;
+ }
+ if (xoff == xlim && yoff >= min
+ && xoff + yoff >= fmid_plus_2_min)
+ {
+ ymax = yoff;
+ c = 0;
+ }
+ else
+ {
+ fd[fmid] = xoff;
+ for (c = 1; c <= max; c++)
+ {
+ OFFSET d;
+
+ if (fmin > dmin)
+ fd[--fmin - 1] = -1;
+ else
+ ++fmin;
+ if (fmax < dmax)
+ fd[++fmax + 1] = -1;
+ else
+ --fmax;
+ for (d = fmax; d >= fmin; d -= 2)
+ {
+ OFFSET x, y;
+
+ if (fd[d - 1] < fd[d + 1])
+ x = fd[d + 1];
+ else
+ x = fd[d - 1] + 1;
+ for (y = x - d;
+ x < xlim && y < ylim && EQUAL (x, y);
+ x++, y++)
+ continue;
+ fd[d] = x;
+ if (x == xlim && y >= min
+ && x + y - c >= fmid_plus_2_min)
+ {
+ if (ymax < y)
+ ymax = y;
+ if (y == ylim)
+ goto done;
+ }
+ }
+ if (ymax != -1)
+ goto done;
+ }
+ }
+
+ done:
+ if (py)
+ *py = ymax;
+ return c;
+}
+
+#undef OFFSET
+#undef EQUAL