forked from pool/patch
338 lines
9.1 KiB
Diff
338 lines
9.1 KiB
Diff
---
|
|
Makefile.in | 3 -
|
|
bestmatch.h | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
common.h | 11 ++++
|
|
merge.c | 95 ++++++++++++++++++++++++++++++++++++++++-
|
|
patch.c | 19 ++++++--
|
|
5 files changed, 256 insertions(+), 10 deletions(-)
|
|
|
|
Index: b/Makefile.in
|
|
===================================================================
|
|
--- a/Makefile.in
|
|
+++ b/Makefile.in
|
|
@@ -85,7 +85,8 @@ HDRS = argmatch.h backupfile.h common.h
|
|
error.h getopt.h gettext.h \
|
|
inp.h maketime.h partime.h pch.h \
|
|
quote.h quotearg.h quotesys.h \
|
|
- unlocked-io.h util.h version.h xalloc.h hash.h
|
|
+ unlocked-io.h util.h version.h xalloc.h hash.h \
|
|
+ bestmatch.h
|
|
MISC = AUTHORS COPYING ChangeLog INSTALL Makefile.in NEWS README \
|
|
aclocal.m4 \
|
|
config.hin configure configure.ac \
|
|
Index: b/common.h
|
|
===================================================================
|
|
--- a/common.h
|
|
+++ b/common.h
|
|
@@ -296,6 +296,14 @@ void *realloc ();
|
|
#define TTY_DEVICE "/dev/tty"
|
|
#endif
|
|
|
|
+#ifndef MIN
|
|
+# define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
|
+#endif
|
|
+
|
|
+#ifndef MAX
|
|
+# define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
|
+#endif
|
|
+
|
|
/* Output stream state. */
|
|
struct outstate
|
|
{
|
|
@@ -316,4 +324,5 @@ bool similar (char const *, size_t, char
|
|
bool copy_till (struct outstate *, LINENUM);
|
|
|
|
/* Defined in merge.c */
|
|
-bool merge_hunk (struct outstate *);
|
|
+LINENUM locate_merge (LINENUM, LINENUM *);
|
|
+bool merge_hunk (struct outstate *, LINENUM, LINENUM);
|
|
Index: b/patch.c
|
|
===================================================================
|
|
--- a/patch.c
|
|
+++ b/patch.c
|
|
@@ -286,13 +286,22 @@ main (int argc, char **argv)
|
|
goto skip_hunk;
|
|
} else if (!where) {
|
|
if (merge) {
|
|
- if (merge_hunk(&outstate)) {
|
|
+ LINENUM matched;
|
|
+
|
|
+ where = locate_merge (maxfuzz, &matched);
|
|
+ if (! where)
|
|
+ {
|
|
+ where = pch_first () + last_offset;
|
|
+ matched = 0;
|
|
+ }
|
|
+
|
|
+ if (merge_hunk (&outstate, where, matched))
|
|
+ {
|
|
merged++;
|
|
mismatch = 1;
|
|
- } else {
|
|
- /* FIXME: try harder! */
|
|
- goto skip_hunk;
|
|
- }
|
|
+ }
|
|
+ else
|
|
+ goto skip_hunk;
|
|
} else
|
|
goto skip_hunk;
|
|
} else {
|
|
Index: b/merge.c
|
|
===================================================================
|
|
--- a/merge.c
|
|
+++ b/merge.c
|
|
@@ -7,8 +7,97 @@
|
|
static bool context_matches_file (LINENUM, LINENUM);
|
|
static bool common_context (LINENUM, LINENUM, LINENUM);
|
|
|
|
+#define OFFSET LINENUM
|
|
+#define EQUAL(x, y) (context_matches_file (x, y))
|
|
+
|
|
+#include "bestmatch.h"
|
|
+
|
|
+LINENUM
|
|
+locate_merge (LINENUM fuzz, LINENUM *matched)
|
|
+{
|
|
+ LINENUM first_guess = pch_first () + last_offset;
|
|
+ LINENUM pat_lines = pch_ptrn_lines();
|
|
+ LINENUM suffix_context = pch_suffix_context ();
|
|
+ LINENUM max_where = input_lines - (pat_lines - suffix_context) + 1;
|
|
+ LINENUM min_where = last_frozen_line + 1;
|
|
+ LINENUM max_pos_offset = max_where - first_guess;
|
|
+ LINENUM max_neg_offset = first_guess - min_where;
|
|
+ LINENUM max_offset = (max_pos_offset < max_neg_offset
|
|
+ ? max_neg_offset : max_pos_offset);
|
|
+ LINENUM prefix_fuzz = MIN (fuzz, pch_prefix_context());
|
|
+ LINENUM suffix_fuzz = MIN (fuzz, pch_suffix_context());
|
|
+ LINENUM where = 0, max_matched = 0;
|
|
+ LINENUM min, max;
|
|
+ LINENUM offset;
|
|
+
|
|
+ /* The minimum number of matched lines and maximum number of changes
|
|
+ are mostly guesses. */
|
|
+ min = pat_lines - (prefix_fuzz + suffix_fuzz);
|
|
+ max = 2 * (prefix_fuzz + suffix_fuzz);
|
|
+
|
|
+ /* Do not try lines <= 0. */
|
|
+ if (first_guess <= max_neg_offset)
|
|
+ max_neg_offset = first_guess - 1;
|
|
+
|
|
+ for (offset = 0; offset <= max_offset; offset++)
|
|
+ {
|
|
+ if (offset <= max_pos_offset)
|
|
+ {
|
|
+ LINENUM guess = first_guess + offset;
|
|
+ LINENUM last;
|
|
+ LINENUM changes;
|
|
+
|
|
+ changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1,
|
|
+ min, max, &last);
|
|
+ if (changes <= max && max_matched < last - guess)
|
|
+ {
|
|
+ max_matched = last - guess;
|
|
+ where = guess;
|
|
+ if (changes == 0)
|
|
+ break;
|
|
+ min = last - guess;
|
|
+ max = changes - 1;
|
|
+ }
|
|
+ }
|
|
+ if (0 < offset && offset <= max_neg_offset)
|
|
+ {
|
|
+ LINENUM guess = first_guess - offset;
|
|
+ LINENUM last;
|
|
+ LINENUM changes;
|
|
+
|
|
+ changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1,
|
|
+ min, max, &last);
|
|
+ if (changes <= max && max_matched < last - guess)
|
|
+ {
|
|
+ max_matched = last - guess;
|
|
+ where = guess;
|
|
+ if (changes == 0)
|
|
+ break;
|
|
+ min = last - guess;
|
|
+ max = changes - 1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (debug & 1)
|
|
+ {
|
|
+ char numbuf0[LINENUM_LENGTH_BOUND + 1];
|
|
+ char numbuf1[LINENUM_LENGTH_BOUND + 1];
|
|
+ char numbuf2[LINENUM_LENGTH_BOUND + 1];
|
|
+ char numbuf3[LINENUM_LENGTH_BOUND + 1];
|
|
+ say ("locating merge: min=%s max=%s where=%s matched=%s\n",
|
|
+ format_linenum (numbuf0, min),
|
|
+ format_linenum (numbuf1, max),
|
|
+ format_linenum (numbuf2, where),
|
|
+ format_linenum (numbuf3, max_matched));
|
|
+ }
|
|
+
|
|
+ if (where)
|
|
+ *matched = max_matched;
|
|
+ return where;
|
|
+}
|
|
+
|
|
bool
|
|
-merge_hunk (struct outstate *outstate)
|
|
+merge_hunk (struct outstate *outstate, LINENUM where, LINENUM matched)
|
|
{
|
|
LINENUM old = 1;
|
|
LINENUM lastold = pch_ptrn_lines ();
|
|
@@ -22,8 +111,8 @@ merge_hunk (struct outstate *outstate)
|
|
while (pch_char(new) == '=' || pch_char(new) == '\n')
|
|
new++;
|
|
|
|
- merge = pch_first () + last_offset;
|
|
- lastmerge = merge + lastold - 1;
|
|
+ merge = where;
|
|
+ lastmerge = where + matched - 1;
|
|
if (! common_context(lastmerge, lastold, lastnew))
|
|
lastmerge = merge - 1;
|
|
|
|
Index: b/bestmatch.h
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ b/bestmatch.h
|
|
@@ -0,0 +1,138 @@
|
|
+/* Before including this file, you need to define:
|
|
+ EQUAL(x, y) A two-argument macro that tests elements
|
|
+ at index x and y for equality.
|
|
+ OFFSET A signed integer type sufficient to hold the
|
|
+ difference between two indices. Usually
|
|
+ something like ssize_t. */
|
|
+
|
|
+/*
|
|
+ * Shortest Edit Sequence
|
|
+ *
|
|
+ * Based on the Greedy LCS/SES Algorithm (Figure 2) in:
|
|
+ *
|
|
+ * Eugene W. Myers, "An O(ND) Difference Algorithm and Its Variations",
|
|
+ * Algorithmica, Vol. 1, No. 1, pp. 251-266, March 1986.
|
|
+ * Available: http://dx.doi.org/10.1007/BF01840446
|
|
+ * http://xmailserver.org/diff2.pdf
|
|
+ *
|
|
+ * Returns the number of changes (insertions and deletions) required to get
|
|
+ * from a[] to b[]. Returns MAX + 1 if a[] cannot be turned into b[] with
|
|
+ * MAX or fewer changes.
|
|
+ *
|
|
+ * MIN specifies the minimum number of elements in which a[] and b[] must
|
|
+ * match. This allows to prevent trivial matches in which a sequence is
|
|
+ * completely discarded, or completely made up.
|
|
+ *
|
|
+ * If PY is not NULL, matches a[] against a prefix of b[], and returns the
|
|
+ * number of elements in b[] that were matched in *PY. Otherwise, matches
|
|
+ * all elements of b[].
|
|
+ *
|
|
+ * Note that the divide-and-conquer strategy discussed in section 4b of the
|
|
+ * paper is more efficient, but does not allow an open-ended prefix string
|
|
+ * search.
|
|
+ */
|
|
+
|
|
+OFFSET
|
|
+bestmatch(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
|
|
+ OFFSET min, OFFSET max, OFFSET *py)
|
|
+{
|
|
+ const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
|
|
+ const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
|
|
+ const OFFSET fmid = xoff - yoff; /* Center diagonal. */
|
|
+ OFFSET fmin = fmid;
|
|
+ OFFSET fmax = fmid;
|
|
+ OFFSET V[2 * max + 3], *fd = V + max + 2 - fmid;
|
|
+ OFFSET fmid_plus_2_min, ymax = -1;
|
|
+ OFFSET c;
|
|
+
|
|
+ /*
|
|
+ The number of elements that were matched in x and in y can be
|
|
+ computed as either (x - x_skipped) or (y - y_skipped), with:
|
|
+
|
|
+ delta = (x - xoff) - (y - yoff)
|
|
+ x_skipped = (c + delta) / 2
|
|
+ y_skipped = (c - delta) / 2
|
|
+
|
|
+ For searching for a minimum number of matching elements, we end up
|
|
+ with this check:
|
|
+
|
|
+ (x - x_skipped) >= min
|
|
+ ...
|
|
+ x + y - c >= (xoff - yoff) + 2 * min
|
|
+ x + y - c >= fmid + 2 * min
|
|
+ */
|
|
+
|
|
+ if (min)
|
|
+ {
|
|
+ fmid_plus_2_min = fmid + 2 * min;
|
|
+ min += yoff;
|
|
+ if (min > ylim)
|
|
+ return max + 1;
|
|
+ }
|
|
+ else
|
|
+ fmid_plus_2_min = 0; /* disable this check */
|
|
+ if (!py)
|
|
+ min = ylim;
|
|
+
|
|
+ /* Handle the exact-match case. */
|
|
+ while (xoff < xlim && yoff < ylim && EQUAL (xoff, yoff))
|
|
+ {
|
|
+ xoff++;
|
|
+ yoff++;
|
|
+ }
|
|
+ if (xoff == xlim && yoff >= min
|
|
+ && xoff + yoff >= fmid_plus_2_min)
|
|
+ {
|
|
+ ymax = yoff;
|
|
+ c = 0;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ fd[fmid] = xoff;
|
|
+ for (c = 1; c <= max; c++)
|
|
+ {
|
|
+ OFFSET d;
|
|
+
|
|
+ if (fmin > dmin)
|
|
+ fd[--fmin - 1] = -1;
|
|
+ else
|
|
+ ++fmin;
|
|
+ if (fmax < dmax)
|
|
+ fd[++fmax + 1] = -1;
|
|
+ else
|
|
+ --fmax;
|
|
+ for (d = fmax; d >= fmin; d -= 2)
|
|
+ {
|
|
+ OFFSET x, y;
|
|
+
|
|
+ if (fd[d - 1] < fd[d + 1])
|
|
+ x = fd[d + 1];
|
|
+ else
|
|
+ x = fd[d - 1] + 1;
|
|
+ for (y = x - d;
|
|
+ x < xlim && y < ylim && EQUAL (x, y);
|
|
+ x++, y++)
|
|
+ continue;
|
|
+ fd[d] = x;
|
|
+ if (x == xlim && y >= min
|
|
+ && x + y - c >= fmid_plus_2_min)
|
|
+ {
|
|
+ if (ymax < y)
|
|
+ ymax = y;
|
|
+ if (y == ylim)
|
|
+ goto done;
|
|
+ }
|
|
+ }
|
|
+ if (ymax != -1)
|
|
+ goto done;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ done:
|
|
+ if (py)
|
|
+ *py = ymax;
|
|
+ return c;
|
|
+}
|
|
+
|
|
+#undef OFFSET
|
|
+#undef EQUAL
|