--- Makefile.in | 3 - bestmatch.h | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ common.h | 11 ++++ merge.c | 95 ++++++++++++++++++++++++++++++++++++++++- patch.c | 19 ++++++-- 5 files changed, 256 insertions(+), 10 deletions(-) Index: b/Makefile.in =================================================================== --- a/Makefile.in +++ b/Makefile.in @@ -85,7 +85,8 @@ HDRS = argmatch.h backupfile.h common.h error.h getopt.h gettext.h \ inp.h maketime.h partime.h pch.h \ quote.h quotearg.h quotesys.h \ - unlocked-io.h util.h version.h xalloc.h hash.h + unlocked-io.h util.h version.h xalloc.h hash.h \ + bestmatch.h MISC = AUTHORS COPYING ChangeLog INSTALL Makefile.in NEWS README \ aclocal.m4 \ config.hin configure configure.ac \ Index: b/common.h =================================================================== --- a/common.h +++ b/common.h @@ -296,6 +296,14 @@ void *realloc (); #define TTY_DEVICE "/dev/tty" #endif +#ifndef MIN +# define MIN(a, b) ((a) <= (b) ? (a) : (b)) +#endif + +#ifndef MAX +# define MAX(a, b) ((a) >= (b) ? (a) : (b)) +#endif + /* Output stream state. */ struct outstate { @@ -316,4 +324,5 @@ bool similar (char const *, size_t, char bool copy_till (struct outstate *, LINENUM); /* Defined in merge.c */ -bool merge_hunk (struct outstate *); +LINENUM locate_merge (LINENUM, LINENUM *); +bool merge_hunk (struct outstate *, LINENUM, LINENUM); Index: b/patch.c =================================================================== --- a/patch.c +++ b/patch.c @@ -286,13 +286,22 @@ main (int argc, char **argv) goto skip_hunk; } else if (!where) { if (merge) { - if (merge_hunk(&outstate)) { + LINENUM matched; + + where = locate_merge (maxfuzz, &matched); + if (! where) + { + where = pch_first () + last_offset; + matched = 0; + } + + if (merge_hunk (&outstate, where, matched)) + { merged++; mismatch = 1; - } else { - /* FIXME: try harder! */ - goto skip_hunk; - } + } + else + goto skip_hunk; } else goto skip_hunk; } else { Index: b/merge.c =================================================================== --- a/merge.c +++ b/merge.c @@ -7,8 +7,97 @@ static bool context_matches_file (LINENUM, LINENUM); static bool common_context (LINENUM, LINENUM, LINENUM); +#define OFFSET LINENUM +#define EQUAL(x, y) (context_matches_file (x, y)) + +#include "bestmatch.h" + +LINENUM +locate_merge (LINENUM fuzz, LINENUM *matched) +{ + LINENUM first_guess = pch_first () + last_offset; + LINENUM pat_lines = pch_ptrn_lines(); + LINENUM suffix_context = pch_suffix_context (); + LINENUM max_where = input_lines - (pat_lines - suffix_context) + 1; + LINENUM min_where = last_frozen_line + 1; + LINENUM max_pos_offset = max_where - first_guess; + LINENUM max_neg_offset = first_guess - min_where; + LINENUM max_offset = (max_pos_offset < max_neg_offset + ? max_neg_offset : max_pos_offset); + LINENUM prefix_fuzz = MIN (fuzz, pch_prefix_context()); + LINENUM suffix_fuzz = MIN (fuzz, pch_suffix_context()); + LINENUM where = 0, max_matched = 0; + LINENUM min, max; + LINENUM offset; + + /* The minimum number of matched lines and maximum number of changes + are mostly guesses. */ + min = pat_lines - (prefix_fuzz + suffix_fuzz); + max = 2 * (prefix_fuzz + suffix_fuzz); + + /* Do not try lines <= 0. */ + if (first_guess <= max_neg_offset) + max_neg_offset = first_guess - 1; + + for (offset = 0; offset <= max_offset; offset++) + { + if (offset <= max_pos_offset) + { + LINENUM guess = first_guess + offset; + LINENUM last; + LINENUM changes; + + changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1, + min, max, &last); + if (changes <= max && max_matched < last - guess) + { + max_matched = last - guess; + where = guess; + if (changes == 0) + break; + min = last - guess; + max = changes - 1; + } + } + if (0 < offset && offset <= max_neg_offset) + { + LINENUM guess = first_guess - offset; + LINENUM last; + LINENUM changes; + + changes = bestmatch(1, pat_lines + 1, guess, input_lines + 1, + min, max, &last); + if (changes <= max && max_matched < last - guess) + { + max_matched = last - guess; + where = guess; + if (changes == 0) + break; + min = last - guess; + max = changes - 1; + } + } + } + if (debug & 1) + { + char numbuf0[LINENUM_LENGTH_BOUND + 1]; + char numbuf1[LINENUM_LENGTH_BOUND + 1]; + char numbuf2[LINENUM_LENGTH_BOUND + 1]; + char numbuf3[LINENUM_LENGTH_BOUND + 1]; + say ("locating merge: min=%s max=%s where=%s matched=%s\n", + format_linenum (numbuf0, min), + format_linenum (numbuf1, max), + format_linenum (numbuf2, where), + format_linenum (numbuf3, max_matched)); + } + + if (where) + *matched = max_matched; + return where; +} + bool -merge_hunk (struct outstate *outstate) +merge_hunk (struct outstate *outstate, LINENUM where, LINENUM matched) { LINENUM old = 1; LINENUM lastold = pch_ptrn_lines (); @@ -22,8 +111,8 @@ merge_hunk (struct outstate *outstate) while (pch_char(new) == '=' || pch_char(new) == '\n') new++; - merge = pch_first () + last_offset; - lastmerge = merge + lastold - 1; + merge = where; + lastmerge = where + matched - 1; if (! common_context(lastmerge, lastold, lastnew)) lastmerge = merge - 1; Index: b/bestmatch.h =================================================================== --- /dev/null +++ b/bestmatch.h @@ -0,0 +1,138 @@ +/* Before including this file, you need to define: + EQUAL(x, y) A two-argument macro that tests elements + at index x and y for equality. + OFFSET A signed integer type sufficient to hold the + difference between two indices. Usually + something like ssize_t. */ + +/* + * Shortest Edit Sequence + * + * Based on the Greedy LCS/SES Algorithm (Figure 2) in: + * + * Eugene W. Myers, "An O(ND) Difference Algorithm and Its Variations", + * Algorithmica, Vol. 1, No. 1, pp. 251-266, March 1986. + * Available: http://dx.doi.org/10.1007/BF01840446 + * http://xmailserver.org/diff2.pdf + * + * Returns the number of changes (insertions and deletions) required to get + * from a[] to b[]. Returns MAX + 1 if a[] cannot be turned into b[] with + * MAX or fewer changes. + * + * MIN specifies the minimum number of elements in which a[] and b[] must + * match. This allows to prevent trivial matches in which a sequence is + * completely discarded, or completely made up. + * + * If PY is not NULL, matches a[] against a prefix of b[], and returns the + * number of elements in b[] that were matched in *PY. Otherwise, matches + * all elements of b[]. + * + * Note that the divide-and-conquer strategy discussed in section 4b of the + * paper is more efficient, but does not allow an open-ended prefix string + * search. + */ + +OFFSET +bestmatch(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, + OFFSET min, OFFSET max, OFFSET *py) +{ + const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */ + const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */ + const OFFSET fmid = xoff - yoff; /* Center diagonal. */ + OFFSET fmin = fmid; + OFFSET fmax = fmid; + OFFSET V[2 * max + 3], *fd = V + max + 2 - fmid; + OFFSET fmid_plus_2_min, ymax = -1; + OFFSET c; + + /* + The number of elements that were matched in x and in y can be + computed as either (x - x_skipped) or (y - y_skipped), with: + + delta = (x - xoff) - (y - yoff) + x_skipped = (c + delta) / 2 + y_skipped = (c - delta) / 2 + + For searching for a minimum number of matching elements, we end up + with this check: + + (x - x_skipped) >= min + ... + x + y - c >= (xoff - yoff) + 2 * min + x + y - c >= fmid + 2 * min + */ + + if (min) + { + fmid_plus_2_min = fmid + 2 * min; + min += yoff; + if (min > ylim) + return max + 1; + } + else + fmid_plus_2_min = 0; /* disable this check */ + if (!py) + min = ylim; + + /* Handle the exact-match case. */ + while (xoff < xlim && yoff < ylim && EQUAL (xoff, yoff)) + { + xoff++; + yoff++; + } + if (xoff == xlim && yoff >= min + && xoff + yoff >= fmid_plus_2_min) + { + ymax = yoff; + c = 0; + } + else + { + fd[fmid] = xoff; + for (c = 1; c <= max; c++) + { + OFFSET d; + + if (fmin > dmin) + fd[--fmin - 1] = -1; + else + ++fmin; + if (fmax < dmax) + fd[++fmax + 1] = -1; + else + --fmax; + for (d = fmax; d >= fmin; d -= 2) + { + OFFSET x, y; + + if (fd[d - 1] < fd[d + 1]) + x = fd[d + 1]; + else + x = fd[d - 1] + 1; + for (y = x - d; + x < xlim && y < ylim && EQUAL (x, y); + x++, y++) + continue; + fd[d] = x; + if (x == xlim && y >= min + && x + y - c >= fmid_plus_2_min) + { + if (ymax < y) + ymax = y; + if (y == ylim) + goto done; + } + } + if (ymax != -1) + goto done; + } + } + + done: + if (py) + *py = ymax; + return c; +} + +#undef OFFSET +#undef EQUAL